# Market Basket Analysis using Apriori
This notebook demonstrates how to perform Market Basket Analysis using the Apriori algorithm on synthetic transaction data from AdventureWorks-like sales.

In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder


In [3]:
# Load the dataset
df = pd.read_csv("market_basket.csv")
df.head()

Unnamed: 0,OrderID,Product
0,SO1001,Shoes
1,SO1001,Bottle
2,SO1001,Bike
3,SO1002,Helmet
4,SO1002,Bottle


In [4]:
# Convert to list of transactions
transactions = df.groupby('OrderID')['Product'].apply(list).tolist()
transactions[:5]

[['Shoes', 'Bottle', 'Bike'],
 ['Helmet', 'Bottle', 'Bike'],
 ['Gloves'],
 ['Shoes', 'Jersey', 'Socks', 'Helmet'],
 ['Helmet', 'Shoes', 'Jersey']]

In [5]:
# Encode transactions to binary matrix
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Apply Apriori algorithm
frequent_itemsets = apriori(df_encoded, min_support=0.02, use_colnames=True)
frequent_itemsets.sort_values(by='support', ascending=False).head()

Unnamed: 0,support,itemsets
3,0.341,(Helmet)
1,0.336,(Bottle)
7,0.335,(Socks)
2,0.319,(Gloves)
6,0.304,(Shorts)


In [6]:
# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
rules.sort_values(by='confidence', ascending=False).head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
32,"(Bottle, Shorts)",(Socks),0.098,0.335,0.038,0.387755,1.157478,1.0,0.00517,1.086167,0.150834,0.096203,0.079331,0.250594
18,"(Bike, Gloves)",(Socks),0.075,0.335,0.029,0.386667,1.154229,1.0,0.003875,1.084239,0.144455,0.076115,0.077694,0.236617
20,"(Bike, Helmet)",(Socks),0.09,0.335,0.033,0.366667,1.094527,1.0,0.00285,1.05,0.094905,0.084184,0.047619,0.232587
31,"(Socks, Shorts)",(Bottle),0.104,0.336,0.038,0.365385,1.087454,1.0,0.003056,1.046303,0.089756,0.094527,0.044254,0.23924
25,"(Gloves, Shorts)",(Bottle),0.093,0.336,0.033,0.354839,1.056068,1.0,0.001752,1.0292,0.058535,0.083333,0.028372,0.226526


In [11]:
# Filter rules for stronger associations
# strong_rules = rules[(rules['confidence'] >= 0.4) & (rules['lift'] >= 1.2)]
strong_rules = rules[(rules['confidence'] >= 0.1) & (rules['lift'] >= 0.2)]
strong_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Bike),(Socks),0.097,0.342756,1.023153
1,(Socks),(Bike),0.097,0.289552,1.023153
2,(Socks),(Bottle),0.113,0.337313,1.003909
3,(Bottle),(Socks),0.113,0.33631,1.003909
4,(Helmet),(Shorts),0.107,0.313783,1.032181
5,(Shorts),(Helmet),0.107,0.351974,1.032181
6,(Socks),(Shorts),0.104,0.310448,1.02121
7,(Shorts),(Socks),0.104,0.342105,1.02121
8,"(Helmet, Bottle)",(Bike),0.027,0.290323,1.025875
10,"(Bike, Jersey)",(Bottle),0.025,0.347222,1.033399
