Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow clients code to know about unreadable files and files with broken namespaces #22

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
183 changes: 148 additions & 35 deletions src/bultitude/core.clj
Expand Up @@ -33,36 +33,127 @@
(when-not (= ::done form)
(recur rdr)))))

(defn ns-form-for-file [file]
(with-open [r (PushbackReader. (io/reader file))] (read-ns-form r)))

;;; Classifying files

(defn- starting-classification-for-standalone-file [file]
{:file file
:source-type :standalone-file
:reader-maker #(io/reader file)})

(defn- starting-classification-for-jar-entry [^JarFile jarfile jar-entry]
{:file jar-entry
:jarfile jarfile
:source-type :jar-entry
:reader-maker #(-> jarfile
(.getInputStream jar-entry)
InputStreamReader.
BufferedReader.)})

(defn- has-valid-namespace? [classification]
(= (:status classification) :contains-namespace))

(defn- standalone-file? [classification]
(= (:source-type classification) :standalone-file))
(def jar-entry? (complement standalone-file?))

(defn- readable? [classification]
(or (jar-entry? classification)
(boolean (.canRead (:file classification)))))

(defn- describe-namespace-status
"Produces a map describing whether a file is
* a clojure file with a namespace
* a clojure file that doesn't try to have a namespace
* a file that can't be parsed as Clojure."
[rdr]
(letfn [(plausible-ns-form? [form]
(if (and (list? form) (= 'ns (first form)))
(if (symbol? (second form))
true
(throw (Exception.))) ; not just implausible: flat out impossible/invalid.
false))

(next-form []
(try
(let [form (read rdr false ::done)]
(cond (= ::done form)
::done

(plausible-ns-form? form)
(do
(str form) ;; force the read to read the whole form, throwing on error
(second form))

:else
::boring-form))
(catch Exception _
::broken-namespace)))]
(loop [form (next-form)]
(condp = form
::done {:status :no-attempt-at-namespace}
::boring-form (recur (next-form))
::broken-namespace {:status :invalid-clojure-file}
{:status :contains-namespace, :namespace-symbol form}))))

(defn- extend-starting-classification [classification]
(letfn [(grovel-through-bytes []
(with-open [r (PushbackReader. ((:reader-maker classification)))]
(describe-namespace-status r)))]
(if (not (readable? classification))
(assoc classification :status :unreadable)
(merge classification (grovel-through-bytes)))))

(defn classify-dir-entries
"Looks for all Clojure (.clj) files in the directory tree rooted at `dir`, a string.
Returns a seq of maps.
Each map will contain one of four values for the `:status` key:
:contains-namespace (The namespace is the value of key `:namespace-symbol`.)
:unreadable
:no-namespace (There is no `ns` form.)
:broken-namespace (An `ns` entry in the file is malformed.)
The original java.io.File object is under key `:file`."
[dir]
(->> (file-seq (io/file dir))
(filter clj?)
(map starting-classification-for-standalone-file)
(map extend-starting-classification)))

(defn classify-jar-entries [^File jar]
"Looks for all Clojure (.clj) files in the given jarfile.
Returns a seq of maps.
Each map will contain one of three values for the `:status` key:
:contains-namespace (The namespace is the value of key `:namespace-symbol`.)
:no-namespace (There is no `ns` form.)
:broken-namespace (An `ns` entry in the file is malformed.)
The original JarEntry object is under key `:file` (sic), and the original
jar is under :jar-file."
(try
(let [as-jar-file (JarFile. jar)]
(->> (enumeration-seq (.entries as-jar-file))
(filter clj-jar-entry?)
(map (partial starting-classification-for-jar-entry as-jar-file))
(map extend-starting-classification)))
(catch ZipException e
(throw (Exception. (str "jar file corrupt: " jar) e)))))



(defn- namespaces-in-x [x classifier]
(->> (classifier x)
(filter has-valid-namespace?)
(map :namespace-symbol)))

(defn namespaces-in-dir
"Return a seq of all namespaces found in Clojure source files in dir."
[dir]
(for [^File f (file-seq (io/file dir))
:when (and (clj? f) (.canRead f))
:let [ns-form (ns-form-for-file f)]
:when ns-form]
ns-form))

(defn- ns-in-jar-entry [^JarFile jarfile ^JarEntry entry]
(with-open [rdr (-> jarfile
(.getInputStream entry)
InputStreamReader.
BufferedReader.
PushbackReader.)]
(read-ns-form rdr)))
(namespaces-in-x dir classify-dir-entries))

(defn- namespaces-in-jar
"Return a seq of all valid namespaces found in Clojure source files in the given jar."
[^File jar]
(namespaces-in-x jar classify-jar-entries))

(defn- namespaces-in-jar [^File jar]
(try
(let [jarfile (JarFile. jar)]
(for [entry (enumeration-seq (.entries jarfile))
:when (clj-jar-entry? entry)
:let [ns-form (ns-in-jar-entry jarfile entry)]
:when ns-form]
ns-form))
(catch ZipException e
(throw (Exception. (str "jar file corrupt: " jar) e)))))

(defn- split-classpath [^String classpath]
(.split classpath (System/getProperty "path.separator")))
Expand All @@ -86,22 +177,44 @@
(defn- classpath->files [classpath]
(map io/file classpath))

(defn select-subdirectory
"`dir` is the root of a directory hierarchy. Branches (subdirectories)
of that hierarchy are described in Clojure namespace notation. The branch
so described is returned as a ^File.

A `nil` namespace or empty string means the entire hierarchy.

Example:
(extend-directory-with-namespace (io/file \".\") \"a.b-test\")
=> (io/file \"./a/b_test\")"
[^File dir namespace]
(if namespace
(io/file dir (-> namespace
(.replaceAll "\\." "/")
(.replaceAll "-" "_")))
dir))

(defn filter-by-prefix
"Given a list of namespaces, retain only those whose names
begin with the given prefix. A `nil` prefix means everything
is to be retained."

;; Not describing what `and` does below. As far as I can guess,
;; it's just to make the function produce a nil result from a
;; nil input (instead of the empty sequence you'd otherwise get.
[namespaces prefix]
(if prefix
(filter #(and % (.startsWith (name %) prefix)) namespaces)
namespaces))

(defn file->namespaces
"Map a classpath file to the namespaces it contains. `prefix` allows for
reducing the namespace search space. For large directories on the classpath,
passing a `prefix` can provide significant efficiency gains."
[^String prefix ^File f]
(cond
(.isDirectory f) (namespaces-in-dir
(if prefix
(io/file f (-> prefix
(.replaceAll "\\." "/")
(.replaceAll "-" "_")))
f))
(jar? f) (let [ns-list (namespaces-in-jar f)]
(if prefix
(filter #(and % (.startsWith (name %) prefix)) ns-list)
ns-list))))
(.isDirectory f) (namespaces-in-dir (select-subdirectory f prefix))
(jar? f) (filter-by-prefix (namespaces-in-jar f) prefix)))

(defn namespaces-on-classpath
"Return symbols of all namespaces matching the given prefix both on disk and
Expand Down
2 changes: 2 additions & 0 deletions test/bultitude/clojure-file-without-a-namespace.clj
@@ -0,0 +1,2 @@
(defn fact [n]
(apply * (range 1 (inc n))))
120 changes: 119 additions & 1 deletion test/bultitude/core_test.clj
@@ -1,6 +1,12 @@
(ns bultitude.core-test
(:use clojure.test
bultitude.core))
bultitude.core
[clojure.pprint :only [pprint]])
(require [clojure.java.io :as io])
(:import (java.io File BufferedReader PushbackReader InputStreamReader)
(java.util.jar JarFile JarEntry)))

;;; Top-level tests

(deftest namespaces-on-classpath-test
(testing "find clojure.core"
Expand All @@ -22,3 +28,115 @@
(is (=
#{'bulti-tude.test}
(set (namespaces-on-classpath :prefix "bulti-tude"))))))

(deftest namespaces-in-dir-test
(is (= '#{bulti-tude.test bultitude.core-test}
(set (namespaces-in-dir "test")))))

;; Don't know why `namespaces-in-jar` is private while `namespaces-in-dir` isn't.
;; Since it is, it's tested below, in "utilities".


;; The above functions are the main ones, but the following
;; are useful for tools that want to work with the namespace
;; classifications rather than just namespaces.

(deftest select-subdirectory-test
(is (= (io/file "./test/bulti_tude")
(select-subdirectory (io/file ".") "test.bulti-tude")))

(is (= (io/file "test")
(select-subdirectory (io/file "test") nil)))

(is (= (io/file "test")
(select-subdirectory (io/file "test") ""))))

(deftest filter-by-prefix-test
(is (= '[bultitude.core]
(filter-by-prefix '[bultitude.core clojure.test] "bultitude")))
(is (= '[bultitude.core clojure.test]
(filter-by-prefix '[bultitude.core clojure.test] nil))))

(defn formatted-actual [result-maps]
(set (map #(assoc % :file (.getName (:file %))) result-maps)))

(defn selected-actual [formatted-actual]
(set (map #(select-keys % [:namespace-symbol :status :file])
formatted-actual)))

(deftest classify-dir-entries-test
(let [result (formatted-actual (classify-dir-entries "test"))
expected #{ {:status :contains-namespace
:file "test.clj"
:namespace-symbol 'bulti-tude.test}

{:status :no-attempt-at-namespace
:file "clojure-file-without-a-namespace.clj"}

{:status :contains-namespace
:file "core_test.clj"
:namespace-symbol 'bultitude.core-test}

{:status :invalid-clojure-file
:file "invalid.clj"}}]

(is (= expected (selected-actual result)))
(is (= #{:standalone-file} (set (map :source-type result))))))

(deftest classify-jar-entries-test
(let [result (formatted-actual (classify-jar-entries (io/file "test/test.jar")))
expected #{ {:status :contains-namespace
:file "bulti_tude/test.clj"
:namespace-symbol 'bulti-tude.test}

{:status :no-attempt-at-namespace
:file "bultitude/clojure-file-without-a-namespace.clj"}

{:status :contains-namespace
:file "bultitude/core_test.clj"
:namespace-symbol 'bultitude.core-test}

{:status :invalid-clojure-file
:file "bultitude/invalid.clj"}}]
(is (= expected (selected-actual result)))
(is (= #{:jar-entry} (set (map :source-type result))))
(is (= #{"test/test.jar"} (set (map #(.getName (:jarfile %)) result))))))


;;; Utilities

(deftest describe-namespace-status-test
(let [subject #'bultitude.core/describe-namespace-status
as-reader #(PushbackReader. (java.io.StringReader. %))]
;; success cases
(is (= {:status :contains-namespace
:namespace-symbol 'foo}
(subject (as-reader "(ns foo)"))))
(is (= {:status :contains-namespace
:namespace-symbol 'foo}
(subject (as-reader "1 (ns foo)"))))
;; Note: it doesn't matter if the file is broken
;; after the namespace is recognized
(is (= {:status :contains-namespace
:namespace-symbol 'foo}
(subject (as-reader "1 (ns foo) ("))))

;; No attempt at namespaces
(is (= {:status :no-attempt-at-namespace}
(subject (as-reader ""))))
(is (= {:status :no-attempt-at-namespace}
(subject (as-reader "1"))))
(is (= {:status :no-attempt-at-namespace}
(subject (as-reader "(defn fact [n] (inc n))"))))

;; Broken Clojure files
(is (= {:status :invalid-clojure-file}
(subject (as-reader "(ns foo"))))
(is (= {:status :invalid-clojure-file}
(subject (as-reader "(ns foo]"))))))


(deftest namespaces-in-jar-test
(is (= (set (#'bultitude.core/namespaces-in-jar (io/file "test/test.jar")))
(set '[bulti-tude.test bultitude.core-test]))))

Empty file.