Taken from https://github.com/CodeFarmer/decision-tree/blob/master/src/decision_tree/core.clj

In [1]:
(defn log2 [x]
    "Return the base 2 logarithm of x"
    (/ (Math/log x) (Math/log 2)))

#'user/log2

Calculates the entropy, where alist is (input, output), then:
$$S = -k_{b}\sum{p\log(p)}$$

In [2]:
(defn entropy [alist]
    "Given a list of pairs if (input, output), calculate the entropy of the list with respect to output"
    (let [n (count alist)
          counts (vals (frequencies (map #(nth % 1) alist)))] (comment "Returns an array of frequencies in alist")

        (reduce + (map #(* (- %) (log2 %)) (map #(/ % n) counts)))))

#'user/entropy

In [3]:
(defn partition-map
  
   "Given function and a seq, return a map of values the function can take when using members of the seq as an argument, to sub-seqs containing the parts of seq corresponding to those outputs"

  ([afn aseq]     

     (partition-map {} afn aseq))

  ([acc afn aseq]

     (if (empty? aseq)
       acc
       (let [i (first aseq) o (afn i) inputs (get acc o)]
         (if inputs
           (recur
            (assoc acc o (conj inputs i)) afn (rest aseq))
           (recur
            (assoc acc o [i]) afn (rest aseq)))))))


#'user/partition-map

In [4]:
(defn all-keys [aseq]
    "Given a list of maps, return a set containing all the keys from those maps"
    (into #{} (flatten (map keys aseq))))

#'user/all-keys

In [5]:
(defn gain

  ([k alist]

     "Given a key and a list of pairs of (map, output), return the net reduction in entropy caused by partitioning the list according to the values associated with key in the input maps"
               
     (gain (entropy alist) k alist))

  ([current-entropy k alist]
     (let [parts (vals (partition-map #(k (first %)) alist))]
       (- current-entropy
          (reduce +
            (map #(* (entropy %) (/ (count %) (count alist))) parts))))))


#'user/gain

In [6]:
(defn most-informative-key [aseq]
    (apply max-key #(gain % aseq) (all-keys (map first aseq))))

#'user/most-informative-key

In [7]:
(defn map-vals
    ([amap fn]
     (map-vals {} amap fn))
    
    ([acc amap fn]
     (if (empty? amap)
         acc
         (let [[k v] (first amap)]
            (recur (assoc acc k (fn v)) (rest amap) fn)))))

#'user/map-vals

In [8]:
(defn build-decision-tree [aseq]
    (if (zero? (entropy aseq))
        (nth (first aseq) 1)
        (let [k (most-informative-key aseq)]
            [k (map-vals (partition-map #(k (first %)) aseq) build-decision-tree)])))

#'user/build-decision-tree

In [9]:
(defn tree-decide [tree input-map]

  "Given a decision tree and an input map, make decisions based on the contents of input-map and return the correct output (leaf node)."

  (if (vector? tree)
    (let [[k parts-map] tree]
      (tree-decide (parts-map (k input-map)) input-map))
tree))

#'user/tree-decide

In [10]:
(def sample-data 
    
  [[{:outlook "Sunny",    :temperature "Hot",   :humidity  "High",   :wind  "Weak"},    "No"],
   [{:outlook "Sunny",    :temperature "Hot",   :humidity  "High",   :wind  "Strong"},  "No"],
   [{:outlook "Overcast", :temperature  "Hot",  :humidity  "High",   :wind  "Weak"},   "Yes"],
   [{:outlook "Rain",     :temperature  "Mild", :humidity  "High",   :wind  "Weak"},   "Yes"],
   [{:outlook "Rain",     :temperature  "Cool", :humidity  "Normal", :wind  "Weak"},   "Yes"],
   [{:outlook "Rain",     :temperature  "Cool", :humidity  "Normal", :wind  "Strong"}, "No"],
   [{:outlook "Overcast", :temperature  "Cool", :humidity  "Normal", :wind  "Strong"}, "Yes"],
   [{:outlook "Sunny",    :temperature  "Mild", :humidity  "High",   :wind  "Weak"},   "No"],
   [{:outlook "Sunny",    :temperature  "Cool", :humidity  "Normal", :wind  "Weak"},   "Yes"],
   [{:outlook "Rain",     :temperature  "Mild", :humidity  "Normal", :wind  "Weak"},   "Yes"],
   [{:outlook "Sunny",    :temperature  "Mild", :humidity  "Normal", :wind  "Strong"}, "Yes"],
   [{:outlook "Overcast", :temperature  "Mild", :humidity  "High",   :wind  "Strong"}, "Yes"],
   [{:outlook "Overcast", :temperature  "Hot",  :humidity  "Normal", :wind  "Weak"},   "Yes"],
   [{:outlook "Rain",     :temperature  "Mild", :humidity  "High",   :wind  "Strong"}, "No"]]
)

#'user/sample-data

In [11]:
(def tree (build-decision-tree sample-data))

#'user/tree

In [12]:
(tree-decide tree 
             {:outlook "Sunny"
              :temperature "Cold"
              :humidity "Normal"
              :wind "Weak"
              })

"Yes"

In [46]:
(defn take-csv
  "Takes file name and reads data."
  [fname]
  (with-open [file (reader fname)]
    (csv/parse-csv (slurp file))))

CompilerException java.lang.RuntimeException: Unable to resolve symbol: reader in this context, compiling:(null:4:20) 


class clojure.lang.Compiler$CompilerException: 