### Imports

In [None]:
import pandas as pd
import random
import os
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori


# 1. Simulating Transaction Data

In [6]:
# Pool of 30 Supermarket Items
items = [
    "milk", "bread", "butter", "eggs", "cheese", "apples", "bananas", "chicken", "beef", "pasta",
    "rice", "flour", "sugar", "salt", "pepper", "onions", "tomatoes", "carrots", "potatoes", "cereal",
    "oil", "juice", "yogurt", "tea", "coffee", "chocolate", "cookies", "soap", "shampoo", "toothpaste"
]

### Generate Transaction Data, 3k transactions, 30 with 2-7 items each
setting seed as 69 for reproducibility


In [7]:
random.seed(69)  
transactions = []
for _ in range(3000):
    num_items = random.randint(2, 7)
    transaction = random.sample(items, num_items)
    transactions.append(transaction)


### Save raw Transactions to CSV

In [8]:
transactions_df = pd.DataFrame(transactions)
transactions_df.to_csv('data/supermarket_transactions.csv', index=False)


# 2. Preprocessing Data: Using One-Hot Encoding

In [9]:
# Preparing transaction data
transaction_list = transactions_df['Transaction'].apply(eval).tolist() 

# One-hot encode transactions
encoder = TransactionEncoder()
encoded_array = encoder.fit_transform(transaction_list)
onehot_df = pd.DataFrame(encoded_array, columns=encoder.columns_)


KeyError: 'Transaction'

# 3. Genreating Frequent Itemsets with Apriori from the previous Week

In [None]:

# We'll set min_len=2 to only get itemsets with 2+ items
frequent_itemsets = apriori(one_hot_df, min_support=0.05, use_colnames=True, min_len=2)

# [Student: Bob] Filter to only include itemsets with 2+ items
# (This is redundant with min_len=2 but good for demonstration)
frequent_itemsets = frequent_itemsets[frequent_itemsets['itemsets'].apply(len) >= 2]

# [Student: Carol] Sort by support and get top 10
frequent_itemsets = frequent_itemsets.sort_values('support', ascending=False)
top_10_itemsets = frequent_itemsets.head(10)

# [Student: Alice] Save frequent itemsets to CSV
frequent_itemsets.to_csv('data/frequent_itemsets.csv', index=False)

print("Top 10 Frequent Itemsets (2+ items):")
print(top_10_itemsets)

TypeError: apriori() got an unexpected keyword argument 'min_len'