In [1]:
# 📦 Step 1: Import libraries

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
# 📄 Step 2: Define transactions

# Example dataset: Each sublist is a transaction with items bought together
transactions = [
    ['milk', 'bread', 'butter'],
    ['bread', 'diapers', 'beer', 'eggs'],
    ['milk', 'diapers', 'beer', 'cola'],
    ['bread', 'milk', 'diapers', 'beer'],
    ['bread', 'milk', 'diapers', 'cola'],
    ['milk', 'bread', 'butter'],
    ['diapers', 'cola'],
    ['milk', 'bread', 'butter'],
    ['bread', 'butter', 'eggs'],
    ['milk', 'diapers', 'bread', 'butter']
]



In [3]:
# 🧼 Step 3: Transform data

# Convert transaction list into a one-hot encoded DataFrame
te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_array, columns=te.columns_)

print("🔍 One-hot encoded data:")
print(df)

🔍 One-hot encoded data:
    beer  bread  butter   cola  diapers   eggs   milk
0  False   True    True  False    False  False   True
1   True   True   False  False     True   True  False
2   True  False   False   True     True  False   True
3   True   True   False  False     True  False   True
4  False   True   False   True     True  False   True
5  False   True    True  False    False  False   True
6  False  False   False   True     True  False  False
7  False   True    True  False    False  False   True
8  False   True    True  False    False   True  False
9  False   True    True  False     True  False   True


In [4]:
# 📊 Step 4: Generate frequent itemsets
# ----------------------------

# Set a minimum support threshold (e.g., 0.4 = 40% of transactions)
frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)

print("\n📈 Frequent itemsets (support ≥ 0.4):")
print(frequent_itemsets)


📈 Frequent itemsets (support ≥ 0.4):
   support               itemsets
0      0.8                (bread)
1      0.5               (butter)
2      0.6              (diapers)
3      0.7                 (milk)
4      0.5        (bread, butter)
5      0.4       (bread, diapers)
6      0.6          (bread, milk)
7      0.4         (butter, milk)
8      0.4        (milk, diapers)
9      0.4  (bread, butter, milk)


In [5]:
# 🔗 Step 5: Generate association rules

# Generate rules from frequent itemsets
# Use confidence ≥ 0.6 and lift ≥ 1.0
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
rules = rules[rules['lift'] >= 1.0]

print("\n📋 Association Rules (confidence ≥ 0.6 and lift ≥ 1.0):")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


📋 Association Rules (confidence ≥ 0.6 and lift ≥ 1.0):
        antecedents    consequents  support  confidence      lift
0           (bread)       (butter)      0.5    0.625000  1.250000
1          (butter)        (bread)      0.5    1.000000  1.250000
3           (bread)         (milk)      0.6    0.750000  1.071429
4            (milk)        (bread)      0.6    0.857143  1.071429
5          (butter)         (milk)      0.4    0.800000  1.142857
7   (bread, butter)         (milk)      0.4    0.800000  1.142857
8     (bread, milk)       (butter)      0.4    0.666667  1.333333
9    (butter, milk)        (bread)      0.4    1.000000  1.250000
10         (butter)  (bread, milk)      0.4    0.800000  1.333333


In [6]:
# 📊 Optional: Sort by lift
# ----------------------------

rules_sorted = rules.sort_values(by='lift', ascending=False)

print("\n🏆 Top rules by lift:")
print(rules_sorted[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


🏆 Top rules by lift:
        antecedents    consequents  support  confidence      lift
10         (butter)  (bread, milk)      0.4    0.800000  1.333333
8     (bread, milk)       (butter)      0.4    0.666667  1.333333
0           (bread)       (butter)      0.5    0.625000  1.250000
1          (butter)        (bread)      0.5    1.000000  1.250000
9    (butter, milk)        (bread)      0.4    1.000000  1.250000
5          (butter)         (milk)      0.4    0.800000  1.142857
7   (bread, butter)         (milk)      0.4    0.800000  1.142857
4            (milk)        (bread)      0.6    0.857143  1.071429
3           (bread)         (milk)      0.6    0.750000  1.071429


# ✅ Summary
 - We prepared transactional data
 - Applied the Apriori algorithm to extract frequent itemsets
 - Generated association rules using confidence and lift thresholds
 - Interpreted meaningful patterns for decision-making (e.g., product bundling)
