In [5]:
# Q1: What does the dataset represent?
print("Q1: The dataset represents 5 shopping transactions:")
dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]

for i, transaction in enumerate(dataset, 1):
    print(f"Transaction {i}: {transaction}")

Q1: The dataset represents 5 shopping transactions:
Transaction 1: ['Coffee', 'Donut', 'Sandwich']
Transaction 2: ['Coffee', 'Donut']
Transaction 3: ['Coffee', 'Sandwich']
Transaction 4: ['Coffee', 'Muffin']
Transaction 5: ['Donut', 'Muffin']


In [6]:
# Q2: One-hot encoded DataFrame (manual implementation)
print("\nQ2: One-hot encoded DataFrame:")

# Get all unique items
all_items = sorted(list(set(item for transaction in dataset for item in transaction)))
print("Columns:", all_items)
print("Rows: 5 transactions")

# Create one-hot encoding manually
one_hot_data = []
for transaction in dataset:
    row = [1 if item in transaction else 0 for item in all_items]
    one_hot_data.append(row)

# Create DataFrame
import pandas as pd
df = pd.DataFrame(one_hot_data, columns=all_items)
print(df)
print("\nRows = Transactions, Columns = Items, 1 = Purchased, 0 = Not Purchased")


Q2: One-hot encoded DataFrame:
Columns: ['Coffee', 'Donut', 'Muffin', 'Sandwich']
Rows: 5 transactions
   Coffee  Donut  Muffin  Sandwich
0       1      1       0         1
1       1      1       0         0
2       1      0       0         1
3       1      0       1         0
4       0      1       1         0

Rows = Transactions, Columns = Items, 1 = Purchased, 0 = Not Purchased


In [7]:
# Q3: Frequent itemsets with min_support=0.4 (manual calculation)
print("\nQ3: Frequent itemsets with support ≥ 0.4:")

# Calculate support for single items
total_transactions = len(dataset)
min_support_count = 0.4 * total_transactions

# Single items
single_items = {}
for item in all_items:
    count = sum(1 for transaction in dataset if item in transaction)
    support = count / total_transactions
    if support >= 0.4:
        single_items[item] = support

# Double items (itemsets of size 2)
double_items = {}
for i in range(len(all_items)):
    for j in range(i+1, len(all_items)):
        item1, item2 = all_items[i], all_items[j]
        count = sum(1 for transaction in dataset if item1 in transaction and item2 in transaction)
        support = count / total_transactions
        if support >= 0.4:
            double_items[frozenset([item1, item2])] = support

# Display results
print("Single items:")
for item, support in single_items.items():
    print(f"  {item}: support = {support:.1f}")

print("\nDouble items:")
for itemset, support in double_items.items():
    print(f"  {set(itemset)}: support = {support:.1f}")


Q3: Frequent itemsets with support ≥ 0.4:
Single items:
  Coffee: support = 0.8
  Donut: support = 0.6
  Muffin: support = 0.4
  Sandwich: support = 0.4

Double items:
  {'Donut', 'Coffee'}: support = 0.4
  {'Coffee', 'Sandwich'}: support = 0.4


In [8]:
# Q4: Association rules with support, confidence, lift (manual calculation)
print("\nQ4: All possible association rules:")

# Calculate all possible rules from frequent itemsets
rules = []

# For each double itemset, generate both possible rules
for itemset in double_items.keys():
    items = list(itemset)
    
    # Rule: A -> B
    support_AB = double_items[itemset]
    support_A = single_items[items[0]]
    support_B = single_items[items[1]]
    
    confidence_A_B = support_AB / support_A
    lift_A_B = support_AB / (support_A * support_B)
    
    rules.append({
        'rule': f"{items[0]} → {items[1]}",
        'support': support_AB,
        'confidence': confidence_A_B,
        'lift': lift_A_B
    })
    
    # Rule: B -> A
    confidence_B_A = support_AB / support_B
    lift_B_A = support_AB / (support_A * support_B)
    
    rules.append({
        'rule': f"{items[1]} → {items[0]}",
        'support': support_AB,
        'confidence': confidence_B_A,
        'lift': lift_B_A
    })

# Display rules
for rule in rules:
    print(f"{rule['rule']}: support={rule['support']:.1f}, confidence={rule['confidence']:.3f}, lift={rule['lift']:.3f}")


Q4: All possible association rules:
Donut → Coffee: support=0.4, confidence=0.667, lift=0.833
Coffee → Donut: support=0.4, confidence=0.500, lift=0.833
Coffee → Sandwich: support=0.4, confidence=0.500, lift=1.250
Sandwich → Coffee: support=0.4, confidence=1.000, lift=1.250


In [9]:
# Q5: Rules with support≥0.4 and confidence≥0.6
print("\nQ5: Rules with support≥0.4 and confidence≥0.6:")

good_rules = [rule for rule in rules if rule['support'] >= 0.4 and rule['confidence'] >= 0.6]

if good_rules:
    for rule in good_rules:
        print(f"{rule['rule']}: support={rule['support']:.1f}, confidence={rule['confidence']:.3f}")
else:
    print("No rules meet both criteria")


Q5: Rules with support≥0.4 and confidence≥0.6:
Donut → Coffee: support=0.4, confidence=0.667
Sandwich → Coffee: support=0.4, confidence=1.000


In [10]:
# Q6: Interpret one strong rule
print("\nQ6: Strong rule interpretation:")

if good_rules:
    strong_rule = good_rules[0]
    antecedent, consequent = strong_rule['rule'].split(' → ')
    confidence_pct = strong_rule['confidence'] * 100
    
    print(f"If a customer buys {antecedent}, they are {confidence_pct:.0f}% likely to also buy {consequent}.")
    print("This suggests a strong association between these two items.")
else:
    print("No strong rules found meeting the criteria")


Q6: Strong rule interpretation:
If a customer buys Donut, they are 67% likely to also buy Coffee.
This suggests a strong association between these two items.


In [11]:
# Q7: Effect of changing parameters
print("\nQ7: How changing parameters affects rule generation:")

print("Higher min_support = Fewer rules (only very common itemsets)")
print("Lower min_support = More rules (includes less common itemsets)")
print("Higher min_confidence = Fewer rules (only very strong associations)")
print("Lower min_confidence = More rules (includes weaker associations)")
print("Example: min_support=0.5 would give fewer rules than min_support=0.4")


Q7: How changing parameters affects rule generation:
Higher min_support = Fewer rules (only very common itemsets)
Lower min_support = More rules (includes less common itemsets)
Higher min_confidence = Fewer rules (only very strong associations)
Lower min_confidence = More rules (includes weaker associations)
Example: min_support=0.5 would give fewer rules than min_support=0.4


In [12]:
# Q8: Why Lift > 1 indicates a good rule
print("\nQ8: Why Lift > 1 indicates a good association rule:")

print("Lift > 1 means:")
print("- Items are positively correlated")
print("- The rule is better than random chance")
print("- Buying the antecedent increases the probability of buying the consequent")
print("- It's a meaningful association worth considering for business decisions")
print("Lift = 1: Items are independent (no association)")
print("Lift < 1: Items are negatively correlated (avoid each other)")


Q8: Why Lift > 1 indicates a good association rule:
Lift > 1 means:
- Items are positively correlated
- The rule is better than random chance
- Buying the antecedent increases the probability of buying the consequent
- It's a meaningful association worth considering for business decisions
Lift = 1: Items are independent (no association)
Lift < 1: Items are negatively correlated (avoid each other)
