In [2]:
import pandas as pd
import numpy as np
from itertools import combinations
from collections import defaultdict

print("QUESTION 3: ASSOCIATION RULE MINING")
print("=" * 60)

QUESTION 3: ASSOCIATION RULE MINING


In [3]:
print("\nPART A: DATA PREPARATION")
print("-" * 40)

# 1. Load dataset
transactions = [
    ['Bread', 'Milk', 'Eggs'],
    ['Bread', 'Butter'],
    ['Milk', 'Diapers', 'Beer'],
    ['Bread', 'Milk', 'Butter'],
    ['Milk', 'Diapers', 'Bread'],
    ['Beer', 'Diapers'],
    ['Bread', 'Milk', 'Eggs', 'Butter'],
    ['Eggs', 'Milk'],
    ['Bread', 'Diapers', 'Beer'],
    ['Milk', 'Butter']
]

print("1. Transaction Dataset:")
for i, items in enumerate(transactions, 1):
    print(f"   T{i}: {items}")



PART A: DATA PREPARATION
----------------------------------------
1. Transaction Dataset:
   T1: ['Bread', 'Milk', 'Eggs']
   T2: ['Bread', 'Butter']
   T3: ['Milk', 'Diapers', 'Beer']
   T4: ['Bread', 'Milk', 'Butter']
   T5: ['Milk', 'Diapers', 'Bread']
   T6: ['Beer', 'Diapers']
   T7: ['Bread', 'Milk', 'Eggs', 'Butter']
   T8: ['Eggs', 'Milk']
   T9: ['Bread', 'Diapers', 'Beer']
   T10: ['Milk', 'Butter']


In [4]:
# 2. One-hot encoding manually
all_items = sorted(set(item for transaction in transactions for item in transaction))
print(f"\n2. Unique Items: {all_items}")

# Create one-hot encoded DataFrame
one_hot_data = []
for transaction in transactions:
    row = [1 if item in transaction else 0 for item in all_items]
    one_hot_data.append(row)

encoded_df = pd.DataFrame(one_hot_data, columns=all_items)
print("\nOne-hot Encoded DataFrame:")
print(encoded_df.astype(int))


2. Unique Items: ['Beer', 'Bread', 'Butter', 'Diapers', 'Eggs', 'Milk']

One-hot Encoded DataFrame:
   Beer  Bread  Butter  Diapers  Eggs  Milk
0     0      1       0        0     1     1
1     0      1       1        0     0     0
2     1      0       0        1     0     1
3     0      1       1        0     0     1
4     0      1       0        1     0     1
5     1      0       0        1     0     0
6     0      1       1        0     1     1
7     0      0       0        0     1     1
8     1      1       0        1     0     0
9     0      0       1        0     0     1


In [5]:
# ==============================================
# PART B: MANUAL APRIORI IMPLEMENTATION
# ==============================================

print("\n" + "=" * 60)
print("PART B: APRIORI ALGORITHM")
print("Parameters: min_support=0.2, min_confidence=0.5")
print("=" * 60)

# Helper function to calculate support
def calculate_support(itemset, transactions):
    """Calculate support for an itemset"""
    count = 0
    for transaction in transactions:
        if all(item in transaction for item in itemset):
            count += 1
    return count / len(transactions)

# Find all frequent itemsets
min_support = 0.2
n_transactions = len(transactions)


PART B: APRIORI ALGORITHM
Parameters: min_support=0.2, min_confidence=0.5


In [6]:
print("\n1. FINDING FREQUENT ITEMSETS:")

# Generate frequent 1-itemsets
frequent_1 = []
for item in all_items:
    support = calculate_support([item], transactions)
    if support >= min_support:
        frequent_1.append((frozenset([item]), support))

print(f"\nFrequent 1-itemsets (min_support={min_support}):")
for itemset, support in sorted(frequent_1, key=lambda x: x[1], reverse=True):
    print(f"   {set(itemset)}: Support = {support:.3f}")

# Generate frequent 2-itemsets
frequent_2 = []
for i in range(len(all_items)):
    for j in range(i+1, len(all_items)):
        itemset = frozenset([all_items[i], all_items[j]])
        support = calculate_support(itemset, transactions)
        if support >= min_support:
            frequent_2.append((itemset, support))

print(f"\nFrequent 2-itemsets (min_support={min_support}):")
for itemset, support in sorted(frequent_2, key=lambda x: x[1], reverse=True):
    print(f"   {set(itemset)}: Support = {support:.3f}")

# Generate frequent 3-itemsets
frequent_3 = []
for i in range(len(all_items)):
    for j in range(i+1, len(all_items)):
        for k in range(j+1, len(all_items)):
            itemset = frozenset([all_items[i], all_items[j], all_items[k]])
            support = calculate_support(itemset, transactions)
            if support >= min_support:
                frequent_3.append((itemset, support))

print(f"\nFrequent 3-itemsets (min_support={min_support}):")
for itemset, support in sorted(frequent_3, key=lambda x: x[1], reverse=True):
    print(f"   {set(itemset)}: Support = {support:.3f}")

# Combine all frequent itemsets
all_frequent = frequent_1 + frequent_2 + frequent_3


1. FINDING FREQUENT ITEMSETS:

Frequent 1-itemsets (min_support=0.2):
   {'Milk'}: Support = 0.700
   {'Bread'}: Support = 0.600
   {'Butter'}: Support = 0.400
   {'Diapers'}: Support = 0.400
   {'Beer'}: Support = 0.300
   {'Eggs'}: Support = 0.300

Frequent 2-itemsets (min_support=0.2):
   {'Milk', 'Bread'}: Support = 0.400
   {'Beer', 'Diapers'}: Support = 0.300
   {'Bread', 'Butter'}: Support = 0.300
   {'Milk', 'Butter'}: Support = 0.300
   {'Milk', 'Eggs'}: Support = 0.300
   {'Bread', 'Diapers'}: Support = 0.200
   {'Bread', 'Eggs'}: Support = 0.200
   {'Milk', 'Diapers'}: Support = 0.200

Frequent 3-itemsets (min_support=0.2):
   {'Milk', 'Bread', 'Butter'}: Support = 0.200
   {'Milk', 'Bread', 'Eggs'}: Support = 0.200


In [7]:
# ==============================================
# GENERATE ASSOCIATION RULES
# ==============================================

print("\n2. GENERATING ASSOCIATION RULES:")
print("-" * 40)

def generate_rules(frequent_itemsets, min_confidence=0.5):
    """Generate association rules from frequent itemsets"""
    rules = []
    
    for itemset, support in frequent_itemsets:
        if len(itemset) < 2:
            continue
            
        items = list(itemset)
        # Generate all possible antecedents and consequents
        for i in range(1, len(items)):
            for antecedent in combinations(items, i):
                antecedent = frozenset(antecedent)
                consequent = frozenset(itemset - antecedent)
                
                # Find support for antecedent
                ant_support = calculate_support(antecedent, transactions)
                if ant_support == 0:
                    continue
                
                # Calculate confidence
                confidence = support / ant_support
                
                # Calculate lift
                cons_support = calculate_support(consequent, transactions)
                if cons_support == 0:
                    continue
                lift = confidence / cons_support
                
                if confidence >= min_confidence:
                    rules.append({
                        'antecedent': antecedent,
                        'consequent': consequent,
                        'support': support,
                        'confidence': confidence,
                        'lift': lift
                    })
    
    return rules

# Generate rules
min_confidence = 0.5
rules = generate_rules(all_frequent, min_confidence)

# Sort by lift (descending)
rules.sort(key=lambda x: x['lift'], reverse=True)

print(f"\nAssociation Rules (min_confidence={min_confidence}):")
print("=" * 80)
print(f"{'Antecedent':<30} {'Consequent':<20} {'Support':<10} {'Confidence':<12} {'Lift':<10}")
print("-" * 80)

for rule in rules:
    ant = ', '.join(sorted(rule['antecedent']))
    cons = ', '.join(sorted(rule['consequent']))
    print(f"{ant:<30} {cons:<20} {rule['support']:<10.3f} {rule['confidence']:<12.3f} {rule['lift']:<10.3f}")


2. GENERATING ASSOCIATION RULES:
----------------------------------------

Association Rules (min_confidence=0.5):
Antecedent                     Consequent           Support    Confidence   Lift      
--------------------------------------------------------------------------------
Beer                           Diapers              0.300      1.000        2.500     
Diapers                        Beer                 0.300      0.750        2.500     
Eggs                           Bread, Milk          0.200      0.667        1.667     
Bread, Milk                    Eggs                 0.200      0.500        1.667     
Eggs                           Milk                 0.300      1.000        1.429     
Bread, Eggs                    Milk                 0.200      1.000        1.429     
Bread                          Butter               0.300      0.500        1.250     
Butter                         Bread, Milk          0.200      0.500        1.250     
Bread, Milk         

In [10]:

# ==============================================
# PART C: INTERPRETATION
# ==============================================

print("\n" + "=" * 60)
print("PART C: INTERPRETATION")
print("=" * 60)

# Top 3 rules by lift
top_3_rules = rules[:3]

print("\n1. THREE STRONGEST RULES (by Lift):")
for i, rule in enumerate(top_3_rules, 1):
    ant = ', '.join(sorted(rule['antecedent']))
    cons = ', '.join(sorted(rule['consequent']))
    
    print(f"\nRule {i}: IF {ant} THEN {cons}")
    print(f"   Support: {rule['support']:.3f} ({rule['support']*100:.1f}% of transactions)")
    print(f"   Confidence: {rule['confidence']:.3f} ({rule['confidence']*100:.1f}% certainty)")
    print(f"   Lift: {rule['lift']:.3f}")
     # Interpretation
    if i == 1:
        print(f"   Insight: This is the strongest association. Customers buying {ant}")
        print(f"   are {rule['lift']:.1f} times more likely to buy {cons} than average.")
    elif i == 2:
        print(f"   Insight: Very reliable rule with {rule['confidence']*100:.1f}% confidence.")
        print(f"   {ant} strongly predicts purchase of {cons}.")
    elif i == 3:
        print(f"   Insight: Good predictive rule. When {ant} is purchased,")
        print(f"   there's {rule['confidence']*100:.1f}% chance {cons} will also be bought.")


PART C: INTERPRETATION

1. THREE STRONGEST RULES (by Lift):

Rule 1: IF Beer THEN Diapers
   Support: 0.300 (30.0% of transactions)
   Confidence: 1.000 (100.0% certainty)
   Lift: 2.500
   Insight: This is the strongest association. Customers buying Beer
   are 2.5 times more likely to buy Diapers than average.

Rule 2: IF Diapers THEN Beer
   Support: 0.300 (30.0% of transactions)
   Confidence: 0.750 (75.0% certainty)
   Lift: 2.500
   Insight: Very reliable rule with 75.0% confidence.
   Diapers strongly predicts purchase of Beer.

Rule 3: IF Eggs THEN Bread, Milk
   Support: 0.200 (20.0% of transactions)
   Confidence: 0.667 (66.7% certainty)
   Lift: 1.667
   Insight: Good predictive rule. When Eggs is purchased,
   there's 66.7% chance Bread, Milk will also be bought.


In [11]:
# Business recommendations
print("\n" + "=" * 60)
print("2. BUSINESS RECOMMENDATIONS")
print("=" * 60)

print("\nRecommendation 1: BUNDLED PRODUCT OFFERS")
print("-" * 40)
print("Create special offers for frequently purchased together items:")
print("• 'Bread & Milk' combo discount")
print("• 'Butter & Bread' package deal")
print("• 'Diapers & Beer' weekend special")
print("This increases average transaction value through cross-selling.")

print("\nRecommendation 2: STRATEGIC STORE LAYOUT")
print("-" * 40)
print("Reorganize store layout based on association rules:")
print("• Place Butter near the Bread section")
print("• Position Milk close to both Bread and Eggs")
print("• Create a 'Baby & Relax' aisle with Diapers and Beer")
print("• Use end-caps for complementary item displays")


2. BUSINESS RECOMMENDATIONS

Recommendation 1: BUNDLED PRODUCT OFFERS
----------------------------------------
Create special offers for frequently purchased together items:
• 'Bread & Milk' combo discount
• 'Butter & Bread' package deal
• 'Diapers & Beer' weekend special
This increases average transaction value through cross-selling.

Recommendation 2: STRATEGIC STORE LAYOUT
----------------------------------------
Reorganize store layout based on association rules:
• Place Butter near the Bread section
• Position Milk close to both Bread and Eggs
• Create a 'Baby & Relax' aisle with Diapers and Beer
• Use end-caps for complementary item displays


In [13]:
# ==============================================
# SUMMARY STATISTICS
# ==============================================

print("\n" + "=" * 60)
print("SUMMARY STATISTICS")
print("=" * 60)

# Calculate basic statistics
total_transactions = len(transactions)
total_items = len(all_items)

# Item frequencies
item_counts = {}
for transaction in transactions:
    for item in transaction:
        item_counts[item] = item_counts.get(item, 0) + 1

print(f"\nDataset Overview:")
print(f"• Total Transactions: {total_transactions}")
print(f"• Unique Items: {total_items}")
print(f"• Minimum Support: {min_support}")
print(f"• Minimum Confidence: {min_confidence}")
print(f"• Frequent Itemsets Found: {len(all_frequent)}")
print(f"• Association Rules Generated: {len(rules)}")

print("\nMost Frequent Items:")
for item, count in sorted(item_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"  {item}: {count} purchases ({count/total_transactions*100:.1f}%)")



SUMMARY STATISTICS

Dataset Overview:
• Total Transactions: 10
• Unique Items: 6
• Minimum Support: 0.2
• Minimum Confidence: 0.5
• Frequent Itemsets Found: 16
• Association Rules Generated: 19

Most Frequent Items:
  Milk: 7 purchases (70.0%)
  Bread: 6 purchases (60.0%)
  Butter: 4 purchases (40.0%)
  Diapers: 4 purchases (40.0%)
  Eggs: 3 purchases (30.0%)
