# 1. Imports

In [1]:
import random
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# 2. Simulating transactions

In [2]:
# Define a list of unique items that can appear in transactions
items = ['Bread','Milk','Eggs','Cheese','Apples','Bananas','Chicken','Cereal']

# Initialize an empty list to hold all simulated transactions
transactions = []

# Loop 20 times to create 20 separate transactions
for i in range(20):
    # pick 2–6 unique items per transaction
    txn = random.sample(items, random.randint(2,6))
    # Append this list of items to the 'transactions' list
    transactions.append(txn)

# Display each transaction with its index (starting at 1)
for i, txn in enumerate(transactions, 1):
    print(f"Txn {i}: {txn}")

Txn 1: ['Milk', 'Bananas', 'Bread', 'Apples']
Txn 2: ['Milk', 'Chicken', 'Cheese']
Txn 3: ['Eggs', 'Bananas', 'Bread', 'Cheese', 'Milk']
Txn 4: ['Cheese', 'Bread', 'Cereal', 'Apples']
Txn 5: ['Cheese', 'Chicken', 'Bread', 'Milk', 'Eggs', 'Bananas']
Txn 6: ['Bananas', 'Milk', 'Eggs']
Txn 7: ['Chicken', 'Bread', 'Cheese', 'Bananas', 'Eggs', 'Cereal']
Txn 8: ['Chicken', 'Cheese', 'Cereal', 'Eggs']
Txn 9: ['Chicken', 'Eggs', 'Bread']
Txn 10: ['Chicken', 'Cereal', 'Bananas']
Txn 11: ['Milk', 'Apples']
Txn 12: ['Bananas', 'Bread']
Txn 13: ['Cereal', 'Milk', 'Apples']
Txn 14: ['Chicken', 'Bread', 'Apples']
Txn 15: ['Cereal', 'Cheese', 'Bread', 'Chicken', 'Apples', 'Bananas']
Txn 16: ['Apples', 'Milk']
Txn 17: ['Bananas', 'Apples', 'Chicken']
Txn 18: ['Apples', 'Milk', 'Chicken']
Txn 19: ['Apples', 'Chicken', 'Bread', 'Eggs', 'Bananas', 'Cereal']
Txn 20: ['Cheese', 'Cereal', 'Chicken', 'Bread', 'Bananas']


# 3. One-Hot Encoding

In [3]:
# Convert to one-hot DataFrame where each row corresponds to one transaction, and each column corresponds to one item.
# Create a DataFrame of lists
oht = pd.DataFrame([
    {
        item: (item in txn) 
        for item in items    
    }
    for txn in transactions
])

oht.head()

Unnamed: 0,Bread,Milk,Eggs,Cheese,Apples,Bananas,Chicken,Cereal
0,True,True,False,False,True,True,False,False
1,False,True,False,True,False,False,True,False
2,True,True,True,True,False,True,False,False
3,True,False,False,True,True,False,False,True
4,True,True,True,True,False,True,True,False


# 4. Apriori: Frequent Items(Support ≥ 0.3)

In [4]:
freq_itemsets = apriori(oht, min_support=0.3, use_colnames=True)
print(freq_itemsets)

    support            itemsets
0      0.55             (Bread)
1      0.45              (Milk)
2      0.35              (Eggs)
3      0.40            (Cheese)
4      0.50            (Apples)
5      0.55           (Bananas)
6      0.60           (Chicken)
7      0.40            (Cereal)
8      0.30     (Cheese, Bread)
9      0.40    (Bananas, Bread)
10     0.35    (Chicken, Bread)
11     0.30   (Chicken, Cheese)
12     0.35  (Bananas, Chicken)
13     0.30   (Cereal, Chicken)


# 5. Generate Association Rules (confidence ≥ 0.7)

In [9]:
rules = association_rules(freq_itemsets, metric="confidence", min_threshold=0.7)
# Sort by confidence
rules = rules.sort_values('confidence', ascending=False)
# View the rules, showing key columns only
rules[['antecedents','consequents','support','confidence','lift']].head(2)



Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Cheese),(Bread),0.3,0.75,1.363636
4,(Cereal),(Chicken),0.3,0.75,1.25


# 6. Explain One Rule in Plain Language

In my data, 74.99% of all shopping baskets that include Cheese also include Bread. So if a customer grabs Cheese, there’s a very high chance they’ll pick up Bread in the same trip.