Permalink
Browse files

...

  • Loading branch information...
1 parent 5cdd1d1 commit dd512ce1b41d8c4f7e63a25c81c65af08c9fa568 @sramsay sramsay committed May 20, 2013
Showing with 35 additions and 43 deletions.
  1. +1 −0 .gitignore
  2. +1 −1 project.clj
  3. +24 −33 src/edu/unl/abbot/core.clj
  4. +9 −9 src/edu/unl/abbot/stylesheets.clj
View
@@ -1,2 +1,3 @@
.lein-deps-sum
.DS_Store
+abbot.log
View
@@ -1,4 +1,4 @@
-(defproject edu.unl/abbot "0.8.0"
+(defproject edu.unl/abbot "0.8.1"
:description "Abbot: A Conversion Tool for Text Interoperability"
:dependencies [[org.clojure/clojure "1.5.1"]
[org.clojure/data.xml "0.0.7"]
View
@@ -8,7 +8,7 @@
;;; for the Center for Digital Research in the Humanities at the
;;; University of Nebraska-Lincoln.
;;;
-;;; Last Modified: Sat Mar 16 21:08:24 CDT 2013
+;;; Last Modified: Mon May 20 18:33:02 CDT 2013
;;;
;;; Copyright © 2011-2013 Board of Regents of the University of Nebraska-
;;; Lincoln (and others). See COPYING for details.
@@ -20,19 +20,21 @@
(ns edu.unl.abbot.core
(:import
- (java.io File))
+ (java.io File)
+ (java.nio.file Files Paths))
(:use edu.unl.abbot.stylesheets)
(:use edu.unl.abbot.utils)
+ (:use clojure.xml)
+ (:use clojure.java.io)
+ (:use [clojure.java.shell :only (sh)])
(:use [clojure.tools.logging :only (info error fatal)])
- (:use clojure.java.io)
- (:gen-class))
+ (:gen-class))
(require '[clojure.xml :as xml])
+(require '[clojure.java.io :as io])
(declare converter)
(declare input-files)
-(declare validate)
-(declare isvalid?)
(defn convert-files [{input-dir :inputdir
output-dir :outputdir
@@ -47,7 +49,6 @@
stylesheet (conversion-stylesheet target params)
converter (converter output-dir stylesheet)
input (input-files input-dir)]
- (validate input)
(try
(info "Starting job")
(if single
@@ -56,39 +57,29 @@
(info "Job ended -- shutting down")
(catch Exception ex
(error ex "There was an error during file processing"))))
+ ; else
(do
(println "No input/output directories specified (-h for details)")
(fatal "No input/output directories specified (-h for details)"))))
(defn converter [output-dir stylesheet]
- "Returns a function that runs the conversion and writes out the file"
+ "Return a function that runs the conversion and writes out the file"
; Written as a clojure to keep the main convert-files function
; uncluttered.
(fn [x] (spit (str output-dir (.getName x)) (apply-master stylesheet x))))
(defn input-files [input-dir]
- "Read inputs file and do some basic sanity checking."
- ; Apparently, the only truly reliable way to check that a text file
- ; is indeed an XML file is to parse it. The XML declaration is
- ; optional, and different legal unicode encodings may or may not
- ; have a byte order mark. The .xml extension is everywhere used
- ; in the specification, but nowhere mandated.
- ;
- ; So we check that the file is, in fact, a file, and demand an .xml
- ; extension. Notification of more insidious file errors will have
- ; to be left to Saxon.
- (let [has-xml? [#(.isFile %) #(has-xml-extension? %)]
- files (file-seq (File. input-dir))]
- (filter (fn [x] (every? #(% x) has-xml?)) files)))
-
-(defn validate [input-files]
- (pmap isvalid? input-files))
-
-(defn isvalid? [file]
- (if (xml/parse file)
- true
- (if (.isDirectory (File. "quarantine"))
- (spit file "quarantine" (.getName file))
- (do
- (.mkdir (java.io.File. "quarantine"))
- (spit file "quarantine" (.getName file))))))
+ "Read input files and do some basic sanity checking."
+ ;; Apparently, the only truly reliable way to check that a text file
+ ;; is indeed an XML file is to parse it. The XML declaration is
+ ;; optional, and different legal unicode encodings may or may not
+ ;; have a byte order mark. The .xml extension is everywhere used
+ ;; in the specification, but nowhere mandated.
+ ;;
+ ;; So we check that the file is, in fact, a file, demand an .xml
+ ;; extension, and, if the user has passed the quarantine switch,
+ ;; parse the file and see if it validates. If the user doesn't
+ ;; opt to quarantine invalid files, errors are left to Saxon.
+ (let [has-xml? [#(.isFile %) #(has-xml-extension? %)]
+ files (file-seq (File. input-dir))]
+ (filter (fn [x] (every? #(% x) has-xml?)) files)))
@@ -9,7 +9,7 @@
;;; for the Center for Digital Research in the Humanities, University
;;; of Nebraska-Lincoln.
;;;
-;;; Last Modified: Wed Mar 13 17:25:45 CDT 2013
+;;; Last Modified: Sun Mar 17 15:44:31 CDT 2013
;;;
;;; Copyright © 2011-2013 Board of Regents of the University of Nebraska-
;;; Lincoln (and others). See COPYING for details.
@@ -20,14 +20,14 @@
;;; for more details.
(ns edu.unl.abbot.stylesheets
- (:import
- (java.io InputStreamReader
- FileInputStream))
- (:use edu.unl.abbot.utils)
- (:use clojure.data.xml)
- (:use [clojure.tools.logging :only (error)])
- (:require [saxon :as sax])
- (:require [clojure.java.io :as io]))
+ (:import
+ (java.io InputStreamReader
+ FileInputStream))
+ (:use edu.unl.abbot.utils)
+ (:use clojure.data.xml)
+ (:use [clojure.tools.logging :only (error)])
+ (:require [saxon :as sax])
+ (:require [clojure.java.io :as io]))
(defn create-meta-stylesheet [params]
(try

0 comments on commit dd512ce

Please sign in to comment.