# 🛍️ Market Basket Analysis – Instacart Dataset

This notebook performs Market Basket Analysis using the Apriori algorithm to find product combinations frequently bought together. The results can inform product bundling and promotional strategies.

In [None]:
# 📦 Install required packages
!pip install pandas mlxtend matplotlib seaborn

In [None]:
# 📚 Imports
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# 📂 Load sample data (replace with your dataset path)
df = pd.read_csv('data/instacart_sample.csv')
df.head()

In [None]:
# 🧹 Preprocessing – create basket matrix
# Assumes df has columns: order_id, product_name
basket = df.groupby(['order_id', 'product_name'])['product_name'].count().unstack().reset_index().fillna(0).set_index('order_id')
basket = basket.applymap(lambda x: 1 if x > 0 else 0)
basket.head()

In [None]:
# ✅ Apriori - Get frequent itemsets
frequent_itemsets = apriori(basket, min_support=0.02, use_colnames=True)
frequent_itemsets.sort_values(by='support', ascending=False).head()

In [None]:
# 🔗 Generate Association Rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
rules = rules.sort_values(by='confidence', ascending=False)
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head()

In [None]:
# 📈 Visualization – Top rules by lift
plt.figure(figsize=(10, 6))
sns.barplot(x='lift', y='consequents', data=rules.nlargest(10, 'lift'))
plt.title('Top 10 Association Rules by Lift')
plt.xlabel('Lift')
plt.ylabel('Consequent Items')
plt.tight_layout()
plt.show()