### Imports

In [19]:
import pandas as pd
import random
import os
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori


# 1. Simulating Transaction Data

In [20]:
# Pool of 30 Supermarket Items
items = [
    "milk", "bread", "butter", "eggs", "cheese", "apples", "bananas", "chicken", "beef", "pasta",
    "rice", "flour", "sugar", "salt", "pepper", "onions", "tomatoes", "carrots", "potatoes", "cereal",
    "oil", "juice", "yogurt", "tea", "coffee", "chocolate", "cookies", "soap", "shampoo", "toothpaste"
]

### Generate Transaction Data, 3k transactions, 30 with 2-7 items each
setting seed as 69 for reproducibility


In [21]:
random.seed(69)
transactions = []
for _ in range(3000):
    num_items = random.randint(2, 7)
    transaction = random.sample(items_pool, num_items)
    transactions.append(transaction)



### Save raw Transactions to CSV

In [22]:
transactions_df = pd.DataFrame(transactions)
transactions_df.to_csv('data/supermarket_transactions.csv', index=False)


# 2. Preprocessing Data: Using One-Hot Encoding

In [23]:
encoder = TransactionEncoder()
encoded_array = encoder.fit_transform(transactions)
onehot_df = pd.DataFrame(encoded_array, columns=encoder.columns_)

# 3. Genreating Frequent Itemsets with Apriori from the previous Week

In [24]:
frequent_itemsets = apriori(onehot_df, min_support=0.05, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

#### Saving the topn 10 frequent itemsets to CSV

In [None]:
print("Top 10 Frequent Itemsets:")
frequent_itemsets = frequent_itemsets[frequent_itemsets['length'] >= 2]
frequent_itemsets.to_csv('data/frequent_itemsets.csv', index=False)


Top 10 Frequent Itemsets:


#### Closed Itemsets


In [30]:
closed_itemsets = frequent_itemsets.copy()
closed_mask = []

for i, row in closed_itemsets.iterrows():
    is_closed = True
    for j, other_row in frequent_itemsets.iterrows():
        if row['itemsets'] < other_row['itemsets'] and row['support'] == other_row['support']:
            is_closed = False
            break
    closed_mask.append(is_closed)

closed_itemsets = closed_itemsets[closed_mask]
closed_itemsets.to_csv("data/closed_itemsets.csv", index=False)

#### Maximal Itemsets

In [31]:
maximal_itemsets = frequent_itemsets.copy()
maximal_mask = []

for i, row in maximal_itemsets.iterrows():
    is_maximal = True
    for j, other_row in frequent_itemsets.iterrows():
        if row['itemsets'] < other_row['itemsets']:
            is_maximal = False
            break
    maximal_mask.append(is_maximal)

maximal_itemsets = maximal_itemsets[maximal_mask]
maximal_itemsets.to_csv("data/maximal_itemsets.csv", index=False)