In [1]:
import numpy as np
import pandas as pd
import math
import random

In [2]:

class AssociationRule(object):
    
    def __init__(self, trans, minsup, k):
        self.trans = trans
        self.minsup = minsup
        self.kitemset = {}
        self.k = k
        
    def count(self, itemset):    
        num = 0
        for trans in self.trans:
            flag = True
            for item in itemset:
                if item not in trans:
                    flag = False
                    break
            if flag:
                num += 1
        return num
    
    def support(self, itemset):
        return float(self.count(itemset))/len(self.trans)
    
    def generate(self, itemset):
        candidate_itemset = []
        for i in range(len(itemset)):
            item1 = itemset[i]
            for j in range(i,len(itemset)):
                item2 = itemset[j]
                if item1 != item2 and item1[:-1] == item2[:-1]:
                    candidate_itemset.append(item1+[item2[-1]])
        return candidate_itemset
    
    def run(self):
        candidate_itemset = map(lambda x: [x], set([item for sublist in self.trans for item in sublist]))
        self.kitemset[1] = [itemset for itemset in candidate_itemset if self.support(itemset) >= self.minsup]
        
        for i in range(2,k+1):
            candidate_itemset = self.generate(self.kitemset[i-1])
            self.kitemset[i] = [itemset for itemset in candidate_itemset if self.support(itemset) >= self.minsup]
        
    def getdata(self):
        # Update test data
        random.seed(1)
        row = 100
        trans = []
        for i in range(row):
            col = random.randint(1,100)
            trans.append(list(set([random.randint(1,30) for i in range(col)]))) 
        self.trans = trans
        
    def printset(self,k):
        print str(k) + "-itemset contains:"
        for itemset in self.kitemset[k]:
            print itemset
    
if __name__ == "__main__":
    
    minsup = 0.5
    
    # data set 1 
    trans = [["Bread", "Milk"], 
             ["Bread", "Diaper", "Beer", "Eggs", "Milk"],
             ["Milk", "Diaper", "Beer", "Coke"],
             ["Bread", "Milk", "Diaper", "Beer"],
             ["Bread", "Milk", "Diaper", "Coke"]]
    k = 3
    model = AssociationRule(trans, minsup, k)
    model.run()
    for k in range(1,4):
        model.printset(k)
    
    # data set 2
    k = 5
    model = AssociationRule(None, minsup, k)
    model.getdata()
    model.run()
    model.printset(k)
    
    

1-itemset contains:
['Beer']
['Diaper']
['Bread']
['Milk']
2-itemset contains:
['Beer', 'Diaper']
['Beer', 'Milk']
['Diaper', 'Bread']
['Diaper', 'Milk']
['Bread', 'Milk']
3-itemset contains:
['Beer', 'Diaper', 'Milk']
['Diaper', 'Bread', 'Milk']
5-itemset contains:
[2, 3, 8, 10, 19]
[2, 3, 10, 11, 19]
[2, 3, 12, 19, 26]
[2, 3, 17, 19, 26]
[2, 3, 19, 28, 30]
[2, 6, 8, 9, 10]
[2, 6, 11, 28, 29]
[2, 8, 9, 10, 12]
[2, 8, 9, 10, 19]
[2, 8, 9, 10, 22]
[2, 8, 9, 10, 26]
[2, 8, 10, 11, 19]
[2, 8, 10, 12, 26]
[2, 9, 10, 12, 26]
[2, 10, 11, 28, 29]
[6, 8, 9, 10, 29]
[6, 9, 10, 11, 29]
[6, 9, 10, 12, 26]
[6, 9, 10, 12, 29]
[6, 9, 10, 26, 29]
[6, 9, 10, 29, 30]
[6, 9, 11, 28, 29]
[6, 10, 11, 28, 29]
[8, 9, 10, 12, 26]
[10, 11, 28, 29, 30]
