public
Description: Yahoo Hack Day Project
Homepage:
Clone URL: git://github.com/Fabs/eventomeeter.git
eventomeeter / clusters.py
100644 91 lines (76 sloc) 2.692 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
class Cluster:
    CLUSTERS = {}
    def __init__(self, tags=None):
        if tags is None:
            tags = frozenset()
        elif not isinstance(tags, frozenset):
            tags = frozenset(tags)
        self.tags = frozenset(tags)
        self.contextables = set()
        self.CLUSTERS[self.tags] = self
 
    @classmethod
    def get(cls, tags):
        return cls.CLUSTERS[frozenset(tags)]
 
    def add(self, *args):
        self.contextables.add(*args)
 
    def populate(self, contextables):
        for contextable in contextables:
            if contextable.belongs_to(self):
                self.add(contextable)
 
    def __repr__(self):
        return "%s => %s" %(self.tags, self.contextables)
 
    def __contains__(self, item):
        return item in self.tags
 
    def __eq__(self, other):
        return self.tags == other.tags
    
    def __ne__(self, other):
        return not self.__eq__(other)
 
    def __len__(self):
        return len(self.contextables)
 
 
def create_clusters(contextables, clusters, tags):
    new_clusters = []
    for cluster in clusters:
        for tag in tags:
            if tag not in cluster:
                tag_cluster = Cluster.get([tag])
                new_cluster = Cluster(cluster.tags.union(tag_cluster.tags))
                new_cluster.populate(cluster.contextables.union(tag_cluster.contextables))
                if new_cluster.contextables:
                    new_clusters.append(new_cluster)
    return list(sorted(new_clusters, key=lambda i: len(i)))
 
def merge_clusters(contextables, number=5):
    tags = order_tags(contextables)
    new_clusters = []
    for tag in tags:
        cluster = Cluster(frozenset([tag]))
        cluster.populate(contextables)
        new_clusters.append(cluster)
    for i in range(number-1):
        nclusters = create_clusters(contextables, new_clusters, tags)
        if nclusters:
            new_clusters = nclusters
        else:
            return new_clusters
    return new_clusters
 
def order_clusters(clusters):
    for cluster in clusters:
        for tag in contextable.content_tags:
            if tags.has_key(tag):
                tags[tag] += 1
            else:
                tags[tag] = 1
    return sort_dict_by_value(tags)
 
def order_tags(contextables):
    tags = {}
    for contextable in contextables:
        for tag in contextable.content_tags:
            if tags.has_key(tag):
                tags[tag] += 1
            else:
                tags[tag] = 1
    return sort_dict_by_value(tags)
 
def sort_dict_by_value(d):
    items=d.items()
    backitems=[ [v[1],v[0]] for v in items]
    backitems.sort()
    return [ item[1] for item in reversed(backitems) ]