In [24]:
dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]


In [25]:
for i, t in enumerate(dataset, 1):
    print(f"Transaction {i}: {t}")


Transaction 1: ['Coffee', 'Donut', 'Sandwich']
Transaction 2: ['Coffee', 'Donut']
Transaction 3: ['Coffee', 'Sandwich']
Transaction 4: ['Coffee', 'Muffin']
Transaction 5: ['Donut', 'Muffin']


In [26]:
import pandas as pd

items = sorted({item for trans in dataset for item in trans})
df = pd.DataFrame([{item: (item in trans) for item in items} for trans in dataset])
print(df)


   Coffee  Donut  Muffin  Sandwich
0    True   True   False      True
1    True   True   False     False
2    True  False   False      True
3    True  False    True     False
4   False   True    True     False


In [27]:
from itertools import combinations

def support(itemset):
    count = sum(all(i in trans for i in itemset) for trans in dataset)
    return count / len(dataset)

min_support = 0.4
frequent_itemsets = []

# Single items
for item in items:
    sup = support([item])
    if sup >= min_support:
        frequent_itemsets.append((frozenset([item]), sup))

# Pairs
for pair in combinations(items, 2):
    sup = support(pair)
    if sup >= min_support:
        frequent_itemsets.append((frozenset(pair), sup))

print("Frequent Itemsets (support ≥ 0.4):")
for i in frequent_itemsets:
    print(i)


Frequent Itemsets (support ≥ 0.4):
(frozenset({'Coffee'}), 0.8)
(frozenset({'Donut'}), 0.6)
(frozenset({'Muffin'}), 0.4)
(frozenset({'Sandwich'}), 0.4)
(frozenset({'Donut', 'Coffee'}), 0.4)
(frozenset({'Coffee', 'Sandwich'}), 0.4)


In [28]:
rules = []
for itemset, sup in frequent_itemsets:
    if len(itemset) > 1:
        for A_size in range(1, len(itemset)):
            for A in combinations(itemset, A_size):
                A, B = set(A), itemset - set(A)
                sup_AB = support(itemset)
                sup_A = support(A)
                sup_B = support(B)
                conf = sup_AB / sup_A
                lift = conf / sup_B
                rules.append((A, B, sup_AB, conf, lift))

print("Association Rules:")
for r in rules:
    print(f"{set(r[0])} → {set(r[1])}, support={r[2]:.2f}, confidence={r[3]:.2f}, lift={r[4]:.2f}")


Association Rules:
{'Donut'} → {'Coffee'}, support=0.40, confidence=0.67, lift=0.83
{'Coffee'} → {'Donut'}, support=0.40, confidence=0.50, lift=0.83
{'Coffee'} → {'Sandwich'}, support=0.40, confidence=0.50, lift=1.25
{'Sandwich'} → {'Coffee'}, support=0.40, confidence=1.00, lift=1.25


In [29]:
strong_rules = [r for r in rules if r[2] >= 0.4 and r[3] >= 0.6]
print("Strong Rules:")
for r in strong_rules:
    print(f"{set(r[0])} → {set(r[1])}, support={r[2]:.2f}, confidence={r[3]:.2f}, lift={r[4]:.2f}")


Strong Rules:
{'Donut'} → {'Coffee'}, support=0.40, confidence=0.67, lift=0.83
{'Sandwich'} → {'Coffee'}, support=0.40, confidence=1.00, lift=1.25


In [30]:
for r in strong_rules:
    A, B, sup, conf, lift = r
    print(f"If a customer buys {list(A)}, they are likely to buy {list(B)}.")
    print(f"Support={sup:.2f}, Confidence={conf:.2f}, Lift={lift:.2f}\n")


If a customer buys ['Donut'], they are likely to buy ['Coffee'].
Support=0.40, Confidence=0.67, Lift=0.83

If a customer buys ['Sandwich'], they are likely to buy ['Coffee'].
Support=0.40, Confidence=1.00, Lift=1.25



In [31]:
def generate_rules(min_support=0.4, min_conf=0.6):
    frequent_itemsets = []
    for item in items:
        sup = support([item])
        if sup >= min_support:
            frequent_itemsets.append((frozenset([item]), sup))
    for pair in combinations(items, 2):
        sup = support(pair)
        if sup >= min_support:
            frequent_itemsets.append((frozenset(pair), sup))

    rules = []
    for itemset, sup in frequent_itemsets:
        if len(itemset) > 1:
            for A_size in range(1, len(itemset)):
                for A in combinations(itemset, A_size):
                    A, B = set(A), itemset - set(A)
                    sup_AB = support(itemset)
                    sup_A = support(A)
                    sup_B = support(B)
                    conf = sup_AB / sup_A
                    lift = conf / sup_B
                    if sup_AB >= min_support and conf >= min_conf:
                        rules.append((A, B, sup_AB, conf, lift))
    return rules

# Try with different thresholds
print("Rules with min_support=0.4, min_conf=0.6:")
print(generate_rules(0.4, 0.6))

print("\nRules with min_support=0.2, min_conf=0.5:")
print(generate_rules(0.2, 0.5))


Rules with min_support=0.4, min_conf=0.6:
[({'Donut'}, frozenset({'Coffee'}), 0.4, 0.6666666666666667, 0.8333333333333334), ({'Sandwich'}, frozenset({'Coffee'}), 0.4, 1.0, 1.25)]

Rules with min_support=0.2, min_conf=0.5:
[({'Donut'}, frozenset({'Coffee'}), 0.4, 0.6666666666666667, 0.8333333333333334), ({'Coffee'}, frozenset({'Donut'}), 0.4, 0.5, 0.8333333333333334), ({'Muffin'}, frozenset({'Coffee'}), 0.2, 0.5, 0.625), ({'Coffee'}, frozenset({'Sandwich'}), 0.4, 0.5, 1.25), ({'Sandwich'}, frozenset({'Coffee'}), 0.4, 1.0, 1.25), ({'Muffin'}, frozenset({'Donut'}), 0.2, 0.5, 0.8333333333333334), ({'Sandwich'}, frozenset({'Donut'}), 0.2, 0.5, 0.8333333333333334)]


In [32]:
print("Rules with Lift > 1 (good associations):")
for r in rules:
    if r[4] > 1:
        print(f"{set(r[0])} → {set(r[1])}, lift={r[4]:.2f}")


Rules with Lift > 1 (good associations):
{'Coffee'} → {'Sandwich'}, lift=1.25
{'Sandwich'} → {'Coffee'}, lift=1.25
