In [1]:
import sys
from collections import defaultdict
import random

In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

In [21]:
def tidlists(transactions):
    tl = defaultdict(set)
    for tid, t in enumerate(transactions):
        for item in t:
            tl[item].add(tid)
    return list(tl.items())

class IntersectAll:
    def __and__(self, other):
        return other
IntersectAll = IntersectAll()

def eclat(items, minsup=0, minlen=1):
    frequent_itemsets = {(): IntersectAll}
    def recurse(items, prefix):
        while len(items) > 0:
            item, item_tidlist = items.pop()
            l = prefix + (item,)  # l is the (ordered) tuple of items we are looking for
            new_tidlist = frequent_itemsets[prefix] & item_tidlist
            if len(new_tidlist) >= minsup:  # add frequent_itemsets to the new frequent_itemsets
                frequent_itemsets[l] = new_tidlist
            
            # define the new l-conditional database
            new_items = []
            for new_item, _item_tidlist in items:
                new_item_tidlist = _item_tidlist & item_tidlist
                if len(new_item_tidlist) >= minsup:
                    new_items.append((new_item, new_item_tidlist))
            
            # recurse, with l as prefix
            recurse(new_items, l)
    
    recurse(items.copy(), ())
    return {k: len(v) for k, v in frequent_itemsets.items() if len(k) >= minlen}

In [4]:
store_data = pd.read_csv('store_data.csv', header= None) 


In [5]:
store_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [6]:
records = [] 
for i in range(0, store_data.shape[0]): 
 records.append([str(store_data.values[i,j]) 
 for j in range(0, store_data.shape[1])])

In [7]:
records[0]

['shrimp',
 'almonds',
 'avocado',
 'vegetables mix',
 'green grapes',
 'whole weat flour',
 'yams',
 'cottage cheese',
 'energy drink',
 'tomato juice',
 'low fat yogurt',
 'green tea',
 'honey',
 'salad',
 'mineral water',
 'salmon',
 'antioxydant juice',
 'frozen smoothie',
 'spinach',
 'olive oil']

In [8]:
tl = tidlists(records)
len(tl)


121

In [9]:
for i in range(len(tl)-1):
    if tl[i][0] == 'nan':
        print(i)
        del tl[i]
    


23


In [22]:
result = eclat(tl, minsup=60, minlen=3)


In [23]:
type(result)

dict

In [24]:
list(result.items())[:10]

[(('ground beef', 'chocolate', 'spaghetti'), 69),
 (('ground beef', 'chocolate', 'mineral water'), 82),
 (('ground beef', 'spaghetti', 'frozen vegetables'), 65),
 (('ground beef', 'spaghetti', 'milk'), 73),
 (('ground beef', 'spaghetti', 'eggs'), 67),
 (('ground beef', 'spaghetti', 'mineral water'), 128),
 (('ground beef', 'frozen vegetables', 'mineral water'), 69),
 (('ground beef', 'milk', 'mineral water'), 83),
 (('ground beef', 'eggs', 'mineral water'), 76),
 (('pancakes', 'chocolate', 'mineral water'), 70)]

In [25]:
sorted_d = sorted((value, key) for (key,value) in result.items())
sorted_d[:10]


[(60, ('chocolate', 'frozen vegetables', 'milk')),
 (60, ('chocolate', 'spaghetti', 'french fries')),
 (60, ('spaghetti', 'french fries', 'eggs')),
 (62, ('chocolate', 'olive oil', 'mineral water')),
 (62, ('french fries', 'milk', 'mineral water')),
 (62, ('spaghetti', 'frozen vegetables', 'milk')),
 (62, ('spaghetti', 'mineral water', 'green tea')),
 (63, ('chocolate', 'french fries', 'eggs')),
 (64, ('chocolate', 'french fries', 'mineral water')),
 (64, ('milk', 'olive oil', 'mineral water'))]

In [14]:
result

{}