Permalink
Browse files

primitive analysis wip

  • Loading branch information...
1 parent 3cf117b commit 5729bce9396050b9b4baf67d1d9d22d590f92f8c @richhickey richhickey committed Oct 7, 2012
Showing with 342 additions and 32 deletions.
  1. +3 −1 project.clj
  2. +68 −0 src/datomic/codeq/analyzer.clj
  3. +68 −0 src/datomic/codeq/analyzers/clj.clj
  4. +167 −31 src/datomic/codeq/core.clj
  5. +36 −0 src/datomic/codeq/util.clj
View
@@ -5,4 +5,6 @@
:url "http://www.eclipse.org/legal/epl-v10.html"}
:main datomic.codeq.core
:dependencies [[com.datomic/datomic-free "0.8.3538"]
- [org.clojure/clojure "1.5.0-alpha5"]])
+ [commons-codec "1.7"]
+ [local.repo/clojure "1.5.0-alpha6"]
+ #_[org.clojure/clojure "1.5.0-alpha6"]])
@@ -0,0 +1,68 @@
+;; Copyright (c) Metadata Partners, LLC. All rights reserved.
+;; The use and distribution terms for this software are covered by the
+;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
+;; which can be found in the file epl-v10.html at the root of this distribution.
+;; By using this software in any fashion, you are agreeing to be bound by
+;; the terms of this license.
+;; You must not remove this notice, or any other, from this software.
+
+(ns datomic.codeq.analyzer
+ (:import [java.io StringReader]
+ [org.apache.commons.codec.digest DigestUtils]))
+
+(set! *warn-on-reflection* true)
+
+(defprotocol Analyzer
+ (keyname [a] "keyword name for analyzer")
+ (revision [a] "long")
+ (extensions [a] "[string ...], including '.'")
+ (schemas [a] "map of revisions to (incremental) schema data")
+ (analyze [a db f src] "f is file entityid, src is string, returns tx-data"))
+
+(defn sha
+ "Returns the hex string of the sha1 of s"
+ [^String s]
+ (org.apache.commons.codec.digest.DigestUtils/shaHex s))
+
+(defn ws-minify
+ "Consecutive ws becomes a single space, then trim"
+ [s]
+ (let [r (java.io.StringReader. s)
+ sb (StringBuilder.)]
+ (loop [c (.read r) skip true]
+ (when-not (= c -1)
+ (let [ws (Character/isWhitespace c)]
+ (when (or (not ws) (not skip))
+ (.append sb (if ws " " (char c))))
+ (recur (.read r) ws))))
+ (-> sb str .trim)))
+
+(defn loc
+ "Returns zero-based [line col endline endcol] given one-based
+ \"line col endline endcol\" string"
+ [loc-string]
+ (mapv dec (read-string (str "[" loc-string "]"))))
+
+(defn line-offsets
+ "Returns a vector of zero-based offsets of lines. Note the offsets
+ are where the line would be, the last offset is not necessarily
+ within the string. i.e. if the last character is a newline, the last
+ index is the length of the string."
+ [^String s]
+ (let [nl (long \newline)]
+ (persistent!
+ (loop [ret (transient [0]), i 0]
+ (if (= i (.length s))
+ ret
+ (recur (if (= (.codePointAt s i) nl)
+ (conj! ret (inc i))
+ ret)
+ (inc i)))))))
+
+(defn segment
+ "Given a string and line offsets, returns text from (zero-based)
+ line and col to endline/endcol (exclusive)"
+ [^String s line-offsets line col endline endcol]
+ (subs s
+ (+ (nth line-offsets line) col)
+ (+ (nth line-offsets endline) endcol)))
@@ -0,0 +1,68 @@
+;; Copyright (c) Metadata Partners, LLC. All rights reserved.
+;; The use and distribution terms for this software are covered by the
+;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
+;; which can be found in the file epl-v10.html at the root of this distribution.
+;; By using this software in any fashion, you are agreeing to be bound by
+;; the terms of this license.
+;; You must not remove this notice, or any other, from this software.
+
+(ns datomic.codeq.analyzers.clj
+ (:require [datomic.api :as d]
+ [datomic.codeq.util :refer [cond-> index->id-fn tempid?]]
+ [datomic.codeq.analyzer :as az]))
+
+(defn analyze-1
+ "returns [tx-data ctx]"
+ [db f x loc seg ret {:keys [sha->id added ns] :as ctx}]
+ (if loc
+ (let [sha (-> seg az/ws-minify az/sha)
+ cid (sha->id sha)
+ newcid (and (tempid? cid) (not (added cid)))
+ ret (cond-> ret newcid (conj {:db/id cid :code/sha sha :code/text seg}))
+ added (cond-> added newcid (conj cid))]
+ [ret (assoc ctx :added added)])
+ [ret ctx]))
+
+(defn analyze
+ [a db f src]
+ (with-open [r (clojure.lang.LineNumberingPushbackReader. (java.io.StringReader. src))]
+ (let [loffs (az/line-offsets src)
+ eof (Object.)
+ ctx {:sha->id (index->id-fn db :code/sha)
+ :codename->id (index->id-fn db :code/name)
+ :added #{}}]
+ (loop [ret [], ctx ctx, x (read r false eof)]
+ (if (= eof x)
+ ret
+ (let [{:keys [line column]} (meta x)
+ ctx (if (and (coll? x) (= (first x) 'ns))
+ (assoc ctx :ns (second x))
+ ctx)
+ endline (.getLineNumber r)
+ endcol (.getColumnNumber r)
+ [loc seg] (when (and line column)
+ [(str line " " column " " endline " " endcol)
+ (az/segment src loffs (dec line) (dec column) (dec endline) (dec endcol))])
+ [ret ctx] (analyze-1 db f x loc seg ret ctx)]
+ (recur ret ctx (read r false eof))))))))
+
+(deftype CljAnalyzer []
+ az/Analyzer
+ (keyname [a] :clj)
+ (revision [a] 1)
+ (extensions [a] [".clj"])
+ (schemas [a] {1 [{:db/id #db/id[:db.part/db]
+ :db/ident :clj/ns
+ :db/valueType :db.type/ref
+ :db/cardinality :db.cardinality/one
+ :db/doc "codename of ns defined by expression"
+ :db.install/_attribute :db.part/db}
+ {:db/id #db/id[:db.part/db]
+ :db/ident :clj/def
+ :db/valueType :db.type/ref
+ :db/cardinality :db.cardinality/one
+ :db/doc "codename defined by expression"
+ :db.install/_attribute :db.part/db}]})
+ (analyze [a db f src] (analyze a db f src)))
+
+(defn impl [] (CljAnalyzer.))
Oops, something went wrong.

0 comments on commit 5729bce

Please sign in to comment.