<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -6,12 +6,12 @@ def dig_web():
     twits = getTwits()
     photos = getFlickers()
     words = []
-#    for twit in twits:
-#        twit.content_tags = extractKeyWords(twit)
-#        words.append(twit)
-    for photo in photos:
-        photo.content_tags = photo.tags
-        words.append(photo)
+    for twit in twits:
+        twit.content_tags = extractKeyWords(twit)
+        words.append(twit)
+    #for photo in photos:
+    #    photo.content_tags = photo.tags
+    #    words.append(photo)
     return words
 
 if __name__ == &quot;__main__&quot;:</diff>
      <filename>analyser.py</filename>
    </modified>
    <modified>
      <diff>@@ -1,13 +1,23 @@
+CLUSTERS = {}
+
 class Cluster:
     def __init__(self, tags=None):
         if tags is None:
-            tags = set()
-        self.tags = tags
+            tags = frozenset()
+        elif not isinstance(tags, frozenset):
+            tags = frozenset(tags)
+        self.tags = frozenset(tags)
         self.contextables = set()
+        CLUSTERS[self.tags] = self
 
     def add(self, *args):
         self.contextables.add(*args)
 
+    def populate(self, contextables):
+        for contextable in contextables:
+            if contextable.belongs_to(self):
+                self.add(contextable)
+
     def __repr__(self):
         return &quot;%s =&gt; %s&quot; %(self.tags, self.contextables)
 
@@ -24,26 +34,42 @@ class Cluster:
         return len(self.contextables)
 
 
-
 def create_clusters(contextables, clusters, tags):
-    new_clusters = [ Cluster(cluster.tags.union(set([tag]))) for cluster in clusters 
-                           for tag in tags if tag not in cluster ]
-
-    for contextable in contextables:
-        for cluster in new_clusters:
-            if contextable.belongs_to(cluster):
-                cluster.add(contextable)
+    print &quot;Start creating new clusters&quot;
+    new_clusters = []
+    for cluster in clusters:
+        for tag in tags:
+            if tag not in cluster:
+                tag_cluster = CLUSTERS[frozenset([tag])]
+                new_cluster = Cluster(cluster.tags.union(tag_cluster.tags))
+                new_cluster.populate(cluster.contextables.union(tag_cluster.contextables))
+                new_clusters.append(new_cluster)
+    return list(sorted(new_clusters, key=lambda i: len(i)))
 
+def merge_clusters(contextables, number=5):
+    print &quot;Merging&quot;
+    tags = order_tags(contextables)[:160]
+    new_clusters = []
+    print &quot;First clusters&quot;
+    for tag in tags:
+        cluster = Cluster(frozenset([tag]))
+        cluster.populate(contextables)
+        new_clusters.append(cluster)
+    for i in range(number-1):
+        new_clusters = create_clusters(contextables, new_clusters, tags)[:160/(i+2)]
     return new_clusters
 
-def merge_clusters(contextables, number=5):
-    tags = order_tags(contextables)
-    clusters = [ Cluster(set([tag])) for tag in tags ]
-    new_clusters = create_clusters(contextables, clusters, tags)
+def order_clusters(clusters):
+    clusters = {}
+    for cluster in clusters:
+        for tag in contextable.content_tags:
+            tag = tag.strip()
+            if tags.has_key(tag):
+                tags[tag] += 1
+            else:
+                tags[tag] = 1
+    return sort_dict_by_value(tags)
 
-    for i in range(number-2):
-        new_clusters = create_clusters(contextables, new_clusters, tags)
-    return new_clusters
 
 def order_tags(contextables):
     tags = {}</diff>
      <filename>clusters.py</filename>
    </modified>
    <modified>
      <diff>@@ -1,4 +1,4 @@
 from analyser import dig_web
 from clusters import merge_clusters
 
-mc = merge_clusters(dig_web(), 2)
+mc = merge_clusters(dig_web(), 5)</diff>
      <filename>main.py</filename>
    </modified>
    <modified>
      <diff>@@ -87,6 +87,17 @@ class TestCluster(unittest.TestCase):
 
         self.assertEquals(len(c), 27, c.contextables)
 
+    def test_php_clustering(self):
+        clusters = merge_clusters(ctxs, 1)
+        cluster = Cluster(set([&quot;php&quot;]))
+        for c in clusters:
+            if c == cluster:
+                break
+        else:
+            self.fail(&quot;Cluster not found!&quot;)
+
+        self.assertEquals(len(c), 2, c.contextables)
+
     def test_php_ruby_clustering(self):
         clusters = merge_clusters(ctxs, 2)
         cluster = Cluster(set([&quot;php&quot;, &quot;ruby&quot;]))</diff>
      <filename>test_clusters.py</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>677b9068afae4ac29d6933c3c7e3015665c5c14b</id>
    </parent>
  </parents>
  <author>
    <name>Bruno Gola</name>
    <email>bgola@pedal.(none)</email>
  </author>
  <url>http://github.com/Fabs/eventomeeter/commit/2309475438f5f7c171a13402451abc3a39c08a5c</url>
  <id>2309475438f5f7c171a13402451abc3a39c08a5c</id>
  <committed-date>2008-11-09T01:46:28-08:00</committed-date>
  <authored-date>2008-11-09T01:46:28-08:00</authored-date>
  <message>estou com sono para colocar qualquer comentario que tenha algum sentido...</message>
  <tree>ce07529e1329166b0b1ef6694d5aeca80c8520ed</tree>
  <committer>
    <name>Bruno Gola</name>
    <email>bgola@pedal.(none)</email>
  </committer>
</commit>
