In [1]:
import pandas as pd
df = pd.read_csv('Groceries_dataset.csv')
groceries_time = pd.DataFrame(df.groupby('Date')['itemDescription'].nunique().index)
groceries_time['members_count'] = df.groupby('Date')['Member_number'].nunique().values
groceries_time['items_count'] = df.groupby('Date')['itemDescription'].nunique().values
groceries_time['items'] = df.groupby('Date')['itemDescription'].unique().values
groceries_time.set_index('Date',inplace=True)
transactions = groceries_time['items'].tolist()

transactions_length = len(transactions)
global_itemset = {}

In [2]:
def create_l1_itemset(transactions):
    temp_item_set = {}
    for item_set in transactions:
        for item in item_set:
            key = frozenset([item])
            temp_item_set[key] = temp_item_set[key] + 1 if key in temp_item_set else 1
            global_itemset[key] = global_itemset[key] + 1 if key in global_itemset else 1
    return temp_item_set

In [3]:
def prune(itemset, transactions, min_support):
    new_itemset = {}
    for k, v in itemset.items():
        if v/transactions_length >= min_support:
            new_itemset[k] = v
        else:
            transactions = [[item for item in row if item not in list(k)] for row in transactions]
    return new_itemset, transactions

In [4]:
from itertools import chain, combinations
def get_union(transactions, k):
    new_set = {}
    for itemSet in transactions:
        comb = list(combinations(itemSet, k))
        for c in comb:
            key = frozenset(c)
            new_set[key] = new_set[key] + 1 if key in new_set else 1
            global_itemset[key] = global_itemset[key] + 1 if key in global_itemset else 1
    return new_set

In [5]:
def powerset(s):
    return chain.from_iterable(combinations(s, r) for r in range(1, len(s)))

In [6]:
def get_association_rules(itemset, min_confidence, min_lift):
    rules = []
    for item in itemset.keys():
        support = global_itemset[item]/transactions_length
        subsets = powerset(item)
        for subset in subsets:
            lhs = frozenset(subset)
            rhs = frozenset(element for element in item if element not in subset)
            confidence = (global_itemset[lhs.union(rhs)]/transactions_length)/(global_itemset[lhs]/transactions_length)
            if confidence >= min_confidence:
                lift = confidence / (global_itemset[rhs]/transactions_length)
                if lift >= min_lift:
                    rules.append({
                       'lhs': lhs,
                        'rhs': rhs,
                        'support': support,
                        'confidence': confidence,
                        'lift': lift 
                    })
    return rules

In [7]:
def apriori(transactions, min_support, max_length, min_confidence, min_lift):
    itemset = create_l1_itemset(transactions)
    k = 2
    while k <= max_length:
        itemset, transactions = prune(itemset, transactions, min_support)
        unioned = get_union(transactions, k)
        if unioned:
            itemset = unioned
            k+=1
        else:
            break
    rules = get_association_rules(itemset, min_confidence, min_lift)
    sorted_rules = sorted(rules, key=lambda x: x['lift'], reverse=True)
    return sorted_rules

In [8]:
import time
start_time = time.time()
result = apriori(transactions=transactions, min_support=0.00030, max_length=2, min_confidence=0.01, min_lift=3)
end_time = time.time()
elapsed_time = end_time - start_time
print("Elapsed time:", elapsed_time, "seconds")

Elapsed time: 0.4699087142944336 seconds


In [9]:
resultsinDataFrame = pd.DataFrame(result, columns = ['lhs', 'rhs', 'support', 'confidence', 'lift'])
resultsinDataFrame

Unnamed: 0,lhs,rhs,support,confidence,lift
0,(preservation products),(liqueur),0.001374,1.000000,80.888889
1,(liqueur),(preservation products),0.001374,0.111111,80.888889
2,(prosecco),(kitchen utensil),0.001374,0.052632,38.315789
3,(kitchen utensil),(prosecco),0.001374,1.000000,38.315789
4,(male cosmetics),(kitchen utensil),0.001374,0.028571,20.800000
...,...,...,...,...,...
459,(house keeping products),(potato products),0.005495,0.090909,3.008264
460,(house keeping products),(frozen fruits),0.002747,0.045455,3.008264
461,(frozen fruits),(house keeping products),0.002747,0.181818,3.008264
462,(tea),(artif. sweetener),0.004121,0.115385,3.000000
