public
Description: Clustering algorithms for Clojure
Homepage: http://github.com/tyler/clojure-cluster
Clone URL: git://github.com/tyler/clojure-cluster.git
clojure-cluster / cluster.clj
100644 68 lines (60 sloc) 2.263 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
;; Clustering algorithms in Clojure
;;
;; Perform a hierarchical cluster with: hcluster
;; (hcluster vectors)
;; vectors - a sequence of vectors
;;
;; Perform k-means clustering with: kcluster
;; (kcluster vectors number-of-clusters range-start range-end)
;; vectors - a sequence of vectors
;;
 
(ns cluster
    (:use cluster.internal))
 
(defn kcluster
  "Performs k-means clustering.
:vectors - a sequence of vectors
:how-many - how many clusters to find
:start - lower limit of numbers in vectors
:end - upper limit of numbers in vectors
 
(kcluster [[1 2] [3 4] [5 6]] 2 0 6)"
  ([vectors how-many start end]
     (kcluster vectors (random-vectors how-many (count (nth vectors 0)) start end)))
  ([vectors nodes]
     (let [clusters
           (loop [index 0
                  clusters (vec (replicate (count nodes) []))]
             (if (= (count vectors) index)
               clusters
               (let [vector (nth vectors index)
                     [sim closest-node] (closest-vector vector nodes)]
                 (recur
                  (inc index)
                  (assoc clusters
                    closest-node
                    (conj (nth clusters closest-node) index))))))
           new-nodes (map (fn [cluster] (average-vectors (map #(nth vectors %) cluster))) clusters)]
       (if (= new-nodes nodes)
         [clusters new-nodes]
         (kcluster vectors new-nodes)))))
 
 
(defn hcluster
  "Performs hierarchical clustering.
:nodes - a sequence of maps of the form:
{ :vec [1 2 3] }
The return value will be a tree of Maps of the form:
{ :vec [] :left { :vec ... } :right { :vec ... } }"
  [nodes]
  (if (< (count nodes) 2)
    nodes
    (let [vectors (map #(get % :vec) nodes)
          [closest-pair cls] (closest-vectors vectors)
          [left-idx right-idx] closest-pair
          new-nodes (without nodes left-idx right-idx)
          left-node (nth nodes left-idx)
          right-node (nth nodes right-idx)]
      (hcluster (conj
                 new-nodes
                 {:left left-node
                  :right right-node
                  :vec (average-vectors
                        [(get left-node :vec)
                         (get right-node :vec)])})))))