In [None]:
import itertools as it
from collections import defaultdict

In [None]:
class PrettyFrozenset(frozenset):
    def __repr__(self):
        return str(list(self))        

In [None]:
# loading data
with open("./retail.csv", 'r') as file:
    baskets = file.readlines()

transactions = [[int(item) for item in basket.strip().split(' ')] for basket in baskets]
# transactions = [[1, 3, 4],
#                 [2, 3, 5],
#                 [1, 2, 3, 5],
#                 [2, 5],
#                 [1, 3, 5]]

frequent_sets = {}
support_threshold = 0 #round(0.01*len(transactions))
# support_threshold = 2
confidence_threshold = 0.60
print(f"support threshold = {support_threshold}")
print(f"confidence threshold = {confidence_threshold}")

In [None]:
# Pass 1: recording singleton support in dictionary
# This generates C1
C1 = defaultdict(int)
for basket in transactions:
    for item in basket:
        C1[PrettyFrozenset((item,))] += 1        

print(C1)
print(f"There are {len(C1)} candidate singletons.")

In [None]:
# Pruning C1: removing infrequent singletons from dictionary
# this generates L1

items = tuple(C1)

# if the item count is less than support, entry is removed from dictionary
for item in items:
    if C1[item] < support_threshold:
        del C1[item]

# defining frequent itemset map as L1
L1 = C1
print(L1)
print(f"There are {len(L1)} frequent singletons.")
# appending list of frequent singletons to frequent_sets
frequent_sets.update(L1)

In [None]:
# Generating candidate item pairs for C2 using singletons in L1
frequent_singletons = [i for s in L1 for i in s]
pairs = it.combinations(frequent_singletons,2)

# initializing C2 with candidates
C2 = {}
for pair in pairs:
    C2[PrettyFrozenset(pair)] = 0

print(C2)
print(f"There are {len(C2)} candidate item pairs.")

In [None]:
# Pass 2: finding support of all candidate item pairs in C2

for basket in transactions:
    
    for pair in it.combinations(basket,2):
        pair = PrettyFrozenset(pair)
        if pair in C2:
            C2[pair] += 1
print(C2)

In [None]:
# Pruning: removing all infrequent item pairs from C2
# this will generate L2
pairs = tuple(C2)
for pair in pairs:
    pair = PrettyFrozenset(pair)
    if C2[pair] < support_threshold:
        del C2[pair]
        
L2 = C2
print(L2)
print(f"There are {len(L2)} frequent item pairs.")
frequent_sets.update(L2)

In [None]:
# generating candidate item triples for C3 using singletons in L2
frequent_singletons = set([i for s in L2 for i in s])
triples = it.combinations(frequent_singletons, 3)

# populating C3 with candidate triples
C3 = {}
for t in triples:
    t = PrettyFrozenset(t)
    C3[t] = 0

print(C3)
print(f"There are {len(C3)} candidate item triples.")

In [None]:
# Initial Prune of C3: will remove all triples containing infrequent pairs (pairs not in L2)

for t in triples:
    for p in it.combinations(t, 2):
        p = PrettyFrozenset(p)
        if p not in L2:
            del C3[t]
            break

print(C3)
print(f"There are now {len(C3)} candidate item triples after initial pruning.")
    

In [None]:
# Pass 3: counting support for all triples

for basket in transactions:

    for t in it.combinations(basket, 3):
        t = PrettyFrozenset(t)
        if t in C3:
            C3[t] += 1

print(C3)

In [None]:
# Pruning C3 to remove infrequent triples
# this generates L3
triples = tuple(C3)
for t in triples:
    if C3[t] < support_threshold:
        del C3[t]
L3 = C3

print(L3)
print(f"There are now {len(L3)} candidate item triples after support threshold pruning.")
frequent_sets.update(L3)

In [None]:
print(frequent_sets)

In [None]:
# generating association rules based on frequent pairs and triples
rules = []

for itmset in frequent_sets.keys():
    
    for r in range(1, len(itmset)):
        
        for A in it.combinations(itmset,r):
            
            antecedent = PrettyFrozenset(A)
            consequent = itmset - antecedent
            confidence = frequent_sets[itmset] / frequent_sets[antecedent]
            
            if (confidence >= confidence_threshold):
                
                rules.append((list(antecedent), list(consequent)))

print("\nAssociation Rules:")
for rule in rules:
    print(rule)

In [None]:
for item in frequent_sets:
    print(item)