In [None]:
#Apriori Association

In [16]:
# Step 1: Import libraries
import pandas as pd
import itertools

In [18]:
# Q1: Transactions

dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]
transactions = [set(t) for t in dataset]
n = len(transactions)
print("Q1 - Transactions (each transaction is a set):")
for i, t in enumerate(transactions, 1):
    print(f" T{i} = {sorted(list(t))}")
print()

Q1 - Transactions (each transaction is a set):
 T1 = ['Coffee', 'Donut', 'Sandwich']
 T2 = ['Coffee', 'Donut']
 T3 = ['Coffee', 'Sandwich']
 T4 = ['Coffee', 'Muffin']
 T5 = ['Donut', 'Muffin']



In [20]:
# Q2: One-hot encode and show table
items = sorted({it for t in dataset for it in t})
print("Q2 - One-hot encoded table (rows=T1..Tn, cols=items):")
header = ["T\\Item"] + items
rows = []
print("{:<4}".format(header[0]) + "".join(f"{it:>10}" for it in header[1:]))
for idx, tr in enumerate(transactions, 1):
    row = [("1" if it in tr else "0") for it in items]
    print(f"T{idx:<3}" + "".join(f"{val:>10}" for val in row))
print()

def support(itemset):
    """Return support as fraction for a tuple/list/set itemset"""
    s = set(itemset)
    count = sum(1 for tr in transactions if s.issubset(tr))
    return count / n

Q2 - One-hot encoded table (rows=T1..Tn, cols=items):
T\Item    Coffee     Donut    Muffin  Sandwich
T1           1         1         0         1
T2           1         1         0         0
T3           1         0         0         1
T4           1         0         1         0
T5           0         1         1         0



In [22]:
# Q3: Compute support for ALL itemsets and list frequent itemsets
print("Q3 - Support for all non-empty itemsets:")
all_supports = {}
for k in range(1, len(items)+1):
    for comb in itertools.combinations(items, k):
        sup = support(comb)
        all_supports[comb] = sup
        print(f" {comb} : support = {sup:.4f}")

min_support = 0.4
frequent_itemsets = [comb for comb, sup in all_supports.items() if sup >= min_support]
print()
print(f"Frequent itemsets (support >= {min_support}):")
for comb in sorted(frequent_itemsets, key=lambda x:(len(x), x)):
    print(f" {comb} -> support={all_supports[comb]:.4f}")
print()

Q3 - Support for all non-empty itemsets:
 ('Coffee',) : support = 0.8000
 ('Donut',) : support = 0.6000
 ('Muffin',) : support = 0.4000
 ('Sandwich',) : support = 0.4000
 ('Coffee', 'Donut') : support = 0.4000
 ('Coffee', 'Muffin') : support = 0.2000
 ('Coffee', 'Sandwich') : support = 0.4000
 ('Donut', 'Muffin') : support = 0.2000
 ('Donut', 'Sandwich') : support = 0.2000
 ('Muffin', 'Sandwich') : support = 0.0000
 ('Coffee', 'Donut', 'Muffin') : support = 0.0000
 ('Coffee', 'Donut', 'Sandwich') : support = 0.2000
 ('Coffee', 'Muffin', 'Sandwich') : support = 0.0000
 ('Donut', 'Muffin', 'Sandwich') : support = 0.0000
 ('Coffee', 'Donut', 'Muffin', 'Sandwich') : support = 0.0000

Frequent itemsets (support >= 0.4):
 ('Coffee',) -> support=0.8000
 ('Donut',) -> support=0.6000
 ('Muffin',) -> support=0.4000
 ('Sandwich',) -> support=0.4000
 ('Coffee', 'Donut') -> support=0.4000
 ('Coffee', 'Sandwich') -> support=0.4000



In [24]:
# Q4: Generate ALL possible association rules from frequent itemsets
print("Q4 - All association rules from frequent itemsets (support, confidence, lift):")
rules = []
for itemset in frequent_itemsets:
    if len(itemset) < 2:
        continue
    itemset_support = all_supports[itemset]
    # consider all non-empty proper subsets as antecedents
    for r in range(1, len(itemset)):
        for antecedent in itertools.combinations(itemset, r):
            antecedent = tuple(sorted(antecedent))
            consequent = tuple(sorted(set(itemset) - set(antecedent)))
            ant_sup = all_supports[antecedent]
            cons_sup = all_supports[consequent]
            conf = itemset_support / ant_sup if ant_sup > 0 else 0
            lift = conf / cons_sup if cons_sup > 0 else float('nan')
            rule = {
                "antecedent": antecedent,
                "consequent": consequent,
                "support": round(itemset_support, 4),
                "confidence": round(conf, 4),
                "lift": round(lift, 4)
            }
            rules.append(rule)
for r in rules:
    print(f" {r['antecedent']} -> {r['consequent']}: support={r['support']}, conf={r['confidence']}, lift={r['lift']}")
print()

Q4 - All association rules from frequent itemsets (support, confidence, lift):
 ('Coffee',) -> ('Donut',): support=0.4, conf=0.5, lift=0.8333
 ('Donut',) -> ('Coffee',): support=0.4, conf=0.6667, lift=0.8333
 ('Coffee',) -> ('Sandwich',): support=0.4, conf=0.5, lift=1.25
 ('Sandwich',) -> ('Coffee',): support=0.4, conf=1.0, lift=1.25



In [26]:
# Q5: Which satisfy support>=0.4 and confidence>=0.6
min_confidence = 0.6
strong_rules = [r for r in rules if r['support'] >= min_support and r['confidence'] >= min_confidence]
print(f"Q5 - Rules with support>={min_support} and confidence>={min_confidence}:")
if strong_rules:
    for r in strong_rules:
        print(f" {r['antecedent']} -> {r['consequent']}: support={r['support']}, conf={r['confidence']}, lift={r['lift']}")
else:
    print(" No rules meet the thresholds.")
print()

Q5 - Rules with support>=0.4 and confidence>=0.6:
 ('Donut',) -> ('Coffee',): support=0.4, conf=0.6667, lift=0.8333
 ('Sandwich',) -> ('Coffee',): support=0.4, conf=1.0, lift=1.25



In [28]:
# Q6: Interpret one strong rule (choose the best)
if strong_rules:
    chosen = strong_rules[0]
    a = ", ".join(chosen['antecedent'])
    c = ", ".join(chosen['consequent'])
    print("Q6 - Interpretation of a strong rule:")
    print(f" If a customer buys {a}, they are likely to also buy {c} (confidence={chosen['confidence']}, lift={chosen['lift']}).")
else:
    print("Q6 - No strong rules to interpret under given thresholds.")
print()

Q6 - Interpretation of a strong rule:
 If a customer buys Donut, they are likely to also buy Coffee (confidence=0.6667, lift=0.8333).



In [30]:
# Q7: Experiment with different min_support and show counts
print("Q7 - Experiment: effect of changing min_support")
for ms in [0.5, 0.4, 0.3, 0.2]:
    freq = [comb for comb, s in all_supports.items() if s >= ms]
    # count possible rules from freq
    rule_count = 0
    for itemset in freq:
        if len(itemset) < 2:
            continue
        # each itemset of size k can generate (2^k - 2) directed rules total; but we'll count them explicitly:
        for r in range(1, len(itemset)):
            for _ in itertools.combinations(itemset, r):
                rule_count += 1
    print(f" min_support={ms:.2f} => frequent_itemsets={len(freq)}, possible_rules={rule_count}")
print()

Q7 - Experiment: effect of changing min_support
 min_support=0.50 => frequent_itemsets=2, possible_rules=0
 min_support=0.40 => frequent_itemsets=6, possible_rules=4
 min_support=0.30 => frequent_itemsets=6, possible_rules=4
 min_support=0.20 => frequent_itemsets=10, possible_rules=16



In [32]:
# Q8: Why Lift > 1 indicates a good rule (explanation)
print("Q8 - Why lift > 1 indicates a useful rule:")
print(" Lift(A->B) = P(B|A) / P(B). If >1 then P(B|A) > P(B): seeing A increases the chance of B => positive association.")

Q8 - Why lift > 1 indicates a useful rule:
 Lift(A->B) = P(B|A) / P(B). If >1 then P(B|A) > P(B): seeing A increases the chance of B => positive association.
