# 03. Frequent Itemsets (Apriori)

Generate frequent itemsets using the Apriori algorithm.

In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
import os

## 1. Load Processed Data

In [2]:
data_path = os.path.join("..", "data", "processed", "grocery_encoded.csv")
df_encoded = pd.read_csv(data_path)
# Ensure data is boolean for memory efficiency (read_csv reads as int/float usually)
df_encoded = df_encoded.astype(bool)
df_encoded.head()

Unnamed: 0,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,beverages,bottled beer,...,uht-milk,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,True,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


## 2. Apriori Algorithm

In [3]:
min_support = 0.001  # Lowered to capture more itemsets
frequent_itemsets = apriori(df_encoded, min_support=min_support, use_colnames=True)

# Add a column for length of itemset
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

frequent_itemsets.head()

Unnamed: 0,support,itemsets,length
0,0.00147,(abrasive cleaner),1
1,0.001938,(artif. sweetener),1
2,0.008087,(baking powder),1
3,0.001136,(bathroom cleaner),1
4,0.03395,(beef),1


In [4]:
print(f"Found {len(frequent_itemsets)} frequent itemsets.")

Found 750 frequent itemsets.


## 3. Save Results

In [5]:
output_path = os.path.join("..", "results", "frequent_itemsets.csv")
frequent_itemsets.to_csv(output_path, index=False)
print(f"Saved to {output_path}")

Saved to ..\results\frequent_itemsets.csv
