# 03. Frequent Itemsets (Apriori)

Generate frequent itemsets using the Apriori algorithm.

In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
import os

## 1. Load Processed Data

In [2]:
data_path = os.path.join("..", "data", "processed", "grocery_encoded.csv")
df_encoded = pd.read_csv(data_path)
# Ensure data is boolean for memory efficiency (read_csv reads as int/float usually)
df_encoded = df_encoded.astype(bool)
df_encoded.head()

## 2. Apriori Algorithm

In [3]:
min_support = 0.001  # Lowered to capture more itemsets
frequent_itemsets = apriori(df_encoded, min_support=min_support, use_colnames=True)

# Add a column for length of itemset
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

frequent_itemsets.head()

In [4]:
print(f"Found {len(frequent_itemsets)} frequent itemsets.")

## 3. Save Results

In [5]:
output_path = os.path.join("..", "results", "frequent_itemsets.csv")
frequent_itemsets.to_csv(output_path, index=False)
print(f"Saved to {output_path}")