In [7]:
import sys
from collections import defaultdict 
import random
import numpy as np
import pandas as pd

In [3]:
def tidlists(transactions):
    tl = defaultdict(set)
    for tid, t in enumerate(transactions):
        for item in t: tl[item].add(tid)
    return list(tl.items())
    
class IntersectAll:
    def __and__(self, other):
        return other 
IntersectAll = IntersectAll()

def eclat(items, minsup=0, minlen=1): 
    frequent_itemsets = {(): IntersectAll} 
    def recurse(items, prefix):
        while len(items) > 0:
            item, item_tidlist = items.pop()
            l = prefix + (item,) 
            new_tidlist = frequent_itemsets[prefix] & item_tidlist
            if len(new_tidlist) >= minsup: 
                frequent_itemsets[l] = new_tidlist

            # define the new l-conditional database
            new_items = []
            for new_item, _item_tidlist in items:
                new_item_tidlist = _item_tidlist & item_tidlist 
                if len(new_item_tidlist) >= minsup:
                    new_items.append((new_item, new_item_tidlist))
                        
            recurse(new_items, l)
    
    recurse(items.copy(), ())
    return {k: len(v) for k, v in frequent_itemsets.items() if len(k) >= minlen}

In [4]:
dataset = [['Eggs', 'Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Yogurt'], 
        ['Dill', 'Onion', 'Nutmeg', 'Eggs', 'Yogurt'],
        ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
        ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'], 
        ['Corn', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'], 
        ['Orange', 'Corn', 'Eggs', 'Yogurt'],
        ['Milk', 'Apple', 'Orange', 'Eggs'],
        ['Corn', 'Coke', 'Kidney Beans', 'Ice cream'],
        ['Dill', 'Onion', 'Nutmeg'],
        ['Coke', 'Apple', 'Ice cream']
]

In [5]:
tl = tidlists(dataset) 
tl

[('Eggs', {0, 1, 2, 4, 5, 6}),
 ('Milk', {0, 2, 3, 6}),
 ('Onion', {0, 1, 4, 8}),
 ('Nutmeg', {0, 1, 8}),
 ('Kidney Beans', {0, 2, 3, 4, 7}),
 ('Yogurt', {0, 1, 3, 5}),
 ('Dill', {1, 8}),
 ('Apple', {2, 6, 9}),
 ('Unicorn', {3}),
 ('Corn', {3, 4, 5, 7}),
 ('Ice cream', {4, 7, 9}),
 ('Orange', {5, 6}),
 ('Coke', {7, 9})]

In [63]:
def linking_support(tl, len_data, minsup):
    result = eclat(tl, minsup=minsup, minlen=2)
    df = pd.Series(result).reset_index()
    df.rename(columns={'level_0': 'antecedents', 'level_1': 'consequents', 0:'support'}, inplace=True)
    df['support'] = df['support']/len_data
    df = df.groupby(['antecedents', 'consequents']).max().reset_index()
    df_inverse = pd.DataFrame([])
    df_inverse['antecedents'] = df['consequents']
    df_inverse['consequents'] = df['antecedents']
    df_inverse['support'] = df['support']
    df = pd.concat([df, df_inverse], axis=0, ignore_index=True)
    return df
len_data = len(dataset) 
ante_conse_support = linking_support(tl, len_data, minsup=3)
ante_conse_support

Unnamed: 0,antecedents,consequents,support
0,Corn,Kidney Beans,0.3
1,Kidney Beans,Eggs,0.3
2,Kidney Beans,Milk,0.3
3,Milk,Eggs,0.3
4,Nutmeg,Onion,0.3
5,Onion,Eggs,0.3
6,Yogurt,Eggs,0.3
7,Kidney Beans,Corn,0.3
8,Eggs,Kidney Beans,0.3
9,Milk,Kidney Beans,0.3


In [64]:
def single_support(tl, len_data):
    result = eclat(tl, minsup=1, minlen=1)
    df = pd.Series(result).reset_index()
    df.rename(columns={'level_0': 'single', 0:'single_support'}, inplace=True)
    df = df.groupby('single').max().reset_index()
    df['single_support'] = df['single_support']/len_data
    return df
single = single_support(tl, len_data)
single

Unnamed: 0,single,single_support
0,Apple,0.3
1,Coke,0.2
2,Corn,0.4
3,Dill,0.2
4,Eggs,0.6
5,Ice cream,0.3
6,Kidney Beans,0.5
7,Milk,0.4
8,Nutmeg,0.3
9,Onion,0.4


In [78]:
def calculate_confidence_lift_etc(tl, len_data, minsup, sort_by=['confidence', 'lift']):

    ante_conse_support = linking_support(tl, len_data, minsup=minsup)
    single = single_support(tl, len_data)

    df = pd.merge(ante_conse_support, single, left_on='antecedents', right_on='single', how='left')
    df.drop(columns=['single'], inplace=True)
    df.rename(columns={'single_support': 'antecedent support'}, inplace=True)

    df = pd.merge(df, single, left_on='consequents', right_on='single', how='left')
    df.drop(columns=['single'], inplace=True)
    df.rename(columns={'single_support': 'consequents support'}, inplace=True)

    df['confidence'] = df['support']/df['antecedent support']
    df['lift'] = df['confidence']/df['consequents support']
    df['leverage'] = df['support'] - df['antecedent support']*df['consequents support']
    df['conviction'] = (1 - df['consequents support']) / (1 - df['confidence'])
    df = df[['antecedents', 'consequents', 'antecedent support', 'consequents support', 'support', 'confidence', 'lift', 'leverage', 'conviction']]

    return df.sort_values(by=sort_by, ascending=False)

len_data = len(dataset)
calculate_confidence_lift_etc(tl, len_data, minsup=3)

Unnamed: 0,antecedents,consequents,antecedent support,consequents support,support,confidence,lift,leverage,conviction
4,Nutmeg,Onion,0.3,0.4,0.3,1.0,2.5,0.18,inf
11,Onion,Nutmeg,0.4,0.3,0.3,0.75,2.5,0.18,2.8
0,Corn,Kidney Beans,0.4,0.5,0.3,0.75,1.5,0.1,2.0
9,Milk,Kidney Beans,0.4,0.5,0.3,0.75,1.5,0.1,2.0
3,Milk,Eggs,0.4,0.6,0.3,0.75,1.25,0.06,1.6
5,Onion,Eggs,0.4,0.6,0.3,0.75,1.25,0.06,1.6
6,Yogurt,Eggs,0.4,0.6,0.3,0.75,1.25,0.06,1.6
2,Kidney Beans,Milk,0.5,0.4,0.3,0.6,1.5,0.1,1.5
7,Kidney Beans,Corn,0.5,0.4,0.3,0.6,1.5,0.1,1.5
1,Kidney Beans,Eggs,0.5,0.6,0.3,0.6,1.0,0.0,1.0
