In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt

orders         = pd.read_csv('Dataset/orders.csv')
prior_products = pd.read_csv('Dataset/order_products__prior.csv')
products       = pd.read_csv('Dataset/products.csv')
aisles         = pd.read_csv('Dataset/aisles.csv')

print("Orders:", orders.shape)
print("Prior items:", prior_products.shape)
print("Products:", products.shape)
orders.head()


Orders: (3421083, 7)
Prior items: (32434489, 4)
Products: (49688, 4)


Unnamed: 0,order_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
0,2539329,1,prior,1,2,8,
1,2398795,1,prior,2,3,7,15.0
2,473747,1,prior,3,3,12,21.0
3,2254736,1,prior,4,4,7,29.0
4,431534,1,prior,5,4,15,28.0


In [2]:
orders = orders[orders.eval_set == 'prior'].copy()
prior_products = prior_products.dropna(subset=['product_id'])
top_prods = prior_products['product_id'].value_counts().nlargest(1500).index
prior_products = prior_products[prior_products.product_id.isin(top_prods)]
print("Filtered prior_products:", prior_products.shape)


Filtered prior_products: (19776345, 4)


In [3]:
prior_items_aisle = (
    prior_products
      .merge(products[['product_id','aisle_id']], on='product_id', how='left')
      .merge(aisles, on='aisle_id', how='left')
)
prior_items_aisle.head()


Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,aisle_id,aisle
0,2,33120,1,1,86,eggs
1,2,28985,2,1,83,fresh vegetables
2,2,9327,3,0,104,spices seasonings
3,2,17794,6,1,83,fresh vegetables
4,3,33754,1,1,120,yogurt


In [None]:
basket_aisle = (
    prior_items_aisle
      .groupby(['order_id','aisle'])['product_id']
      .count()
      .unstack(fill_value=0)
      .astype(bool)
)
print("Aisle-basket shape:", basket_aisle.shape)
basket_aisle.head()


In [None]:
sampled = basket_aisle.sample(n=100_000, random_state=42)
print("Sampled basket shape:", sampled.shape)


In [None]:
freq_aisles = apriori(sampled, min_support=0.01, use_colnames=True)
aisle_rules = association_rules(freq_aisles, metric="confidence", min_threshold=0.3)
top_10_aisle = aisle_rules.nlargest(10, 'lift')[[
    'antecedents','consequents','support','confidence','lift'
]]
top_10_aisle


In [None]:
labels = [f"{set(a)} → {set(c)}" for a,c in zip(top_10_aisle.antecedents, top_10_aisle.consequents)]
plt.figure(figsize=(8,5))
plt.barh(labels, top_10_aisle.lift)
plt.xlabel('Lift')
plt.title('Top 10 Aisle-Level Rules (Sampled)')
plt.tight_layout()
plt.show()
