Practical 10: Apriori Algorithm for Market Basket Analysis

In [1]:
import pandas as pd
import seaborn as sns
from itertools import combinations
from collections import defaultdict



In [2]:
# Load inbuilt dataset
df = sns.load_dataset('titanic')



In [3]:
# We'll treat 'passenger class', 'sex', and 'embarked' as items "in a basket" (like items bought together)
df = df[['pclass', 'sex', 'embarked']].dropna()

# Convert each row to a transaction (basket of categorical attributes)
transactions = df.astype(str).values.tolist()



In [4]:
# Step 1: Create itemsets and count frequency
def get_frequent_itemsets(transactions, min_support=0.1):
    item_counts = defaultdict(int)
    transaction_list = list(map(set, transactions))
    total_tx = len(transaction_list)
    freq_itemsets = []

    # Generate all combinations of size 1 to 3
    for size in range(1, 4):
        for transaction in transaction_list:
            for itemset in combinations(transaction, size):
                item_counts[itemset] += 1

    # Filter based on min support
    for itemset, count in item_counts.items():
        support = count / total_tx
        if support >= min_support:
            freq_itemsets.append((itemset, support))

    return freq_itemsets



In [5]:
# Step 2: Generate association rules from frequent itemsets
def generate_rules(freq_itemsets, min_confidence=0.6):
    rules = []
    itemset_dict = {frozenset(k): v for k, v in freq_itemsets}

    for itemset in itemset_dict:
        if len(itemset) >= 2:
            for i in range(1, len(itemset)):
                for antecedent in combinations(itemset, i):
                    antecedent = frozenset(antecedent)
                    consequent = itemset - antecedent
                    if consequent:
                        confidence = itemset_dict[itemset] / itemset_dict.get(antecedent, 1)
                        if confidence >= min_confidence:
                            rules.append((set(antecedent), set(consequent), confidence))

    return rules



In [6]:
# Run Apriori
frequent_itemsets = get_frequent_itemsets(transactions, min_support=0.1)
rules = generate_rules(frequent_itemsets, min_confidence=0.6)



In [7]:
# Display results
print("Frequent Itemsets:")
for item, support in frequent_itemsets:
    print(f"{set(item)}: support = {support:.2f}")



Frequent Itemsets:
{'S'}: support = 0.72
{'3'}: support = 0.55
{'male'}: support = 0.65
{'C'}: support = 0.19
{'1'}: support = 0.24
{'female'}: support = 0.35
{'2'}: support = 0.21
{'S', '3'}: support = 0.40
{'S', 'male'}: support = 0.50
{'3', 'male'}: support = 0.39
{'1', 'female'}: support = 0.10
{'S', 'female'}: support = 0.23
{'3', 'female'}: support = 0.16
{'S', '1'}: support = 0.14
{'1', 'male'}: support = 0.14
{'2', 'S'}: support = 0.11
{'2', 'male'}: support = 0.12
{'C', 'male'}: support = 0.11
{'S', 'male', '3'}: support = 0.30
{'2', 'S', 'male'}: support = 0.11


In [8]:
print("\nAssociation Rules:")
for antecedent, consequent, confidence in rules:
    print(f"{antecedent} => {consequent} (confidence = {confidence:.2f})")



Association Rules:
{'3'} => {'S'} (confidence = 0.72)
{'S'} => {'male'} (confidence = 0.68)
{'male'} => {'S'} (confidence = 0.76)
{'3'} => {'male'} (confidence = 0.71)
{'male'} => {'3'} (confidence = 0.60)
{'female'} => {'S'} (confidence = 0.65)
{'S', 'male'} => {'3'} (confidence = 0.60)
{'S', '3'} => {'male'} (confidence = 0.75)
{'male', '3'} => {'S'} (confidence = 0.76)
{'2', 'S'} => {'male'} (confidence = 1.00)
{'2', 'male'} => {'S'} (confidence = 0.90)
