In [1]:
import random
import csv

# Define a pool of 30 unique supermarket items
ITEM_POOL = [
    "Milk", "Bread", "Eggs", "Cheese", "Butter", "Chicken Breast", "Ground Beef", "Apples",
    "Bananas", "Oranges", "Tomatoes", "Potatoes", "Onions", "Carrots", "Lettuce", "Spinach",
    "Yogurt", "Cereal", "Rice", "Pasta", "Coffee", "Tea", "Juice", "Soda", "Water",
    "Chocolate", "Cookies", "Ice Cream", "Laundry Detergent", "Toothpaste"
]

def simulate_transactions(num_transactions=3000, item_pool=ITEM_POOL, min_items=2, max_items=7):
    transactions = []
    for txn_id in range(1, num_transactions + 1):
        num_items = random.randint(min_items, max_items)
        items = random.sample(item_pool, num_items)
        transactions.append({
            "TransactionID": txn_id,
            "Items": ", ".join(items)
        })
    return transactions

def save_transactions_csv(transactions, filename="supermarket_transactions.csv"):
    with open(filename, "w", newline='', encoding="utf-8") as csvfile:
        fieldnames = ["TransactionID", "Items"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for txn in transactions:
            writer.writerow(txn)

if __name__ == "__main__":
    transactions = simulate_transactions()
    save_transactions_csv(transactions)
    print(f"Simulated {len(transactions)} supermarket transactions and saved to 'supermarket_transactions.csv'.")

Simulated 3000 supermarket transactions and saved to 'supermarket_transactions.csv'.


In [None]:
#Generated frequent itemsets with min_support=0.05 and saved top 10 to 'frequent_itemsets.csv'.
import pandas as pd
from mlxtend.frequent_patterns import apriori

df = pd.read_csv("supermarket_transactions.csv")
df['ItemList'] = df['Items'].apply(lambda x: [item.strip() for item in x.split(",")])

encoded_rows = []
for items in df['ItemList']:
    row = {item: 1 if item in items else 0 for item in ITEM_POOL}
    encoded_rows.append(row)

df_basket = pd.DataFrame(encoded_rows)

frequent_itemsets = apriori(df_basket, min_support=0.05, use_colnames=True)
frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False)

print(frequent_itemsets.head(10))
frequent_itemsets['itemsets'] = frequent_itemsets['itemsets'].apply(lambda x: ', '.join(list(x)))
frequent_itemsets.head(10).to_csv("frequent_itemsets.csv", index=False)

     support       itemsets
17  0.156333       (Cereal)
10  0.155333     (Tomatoes)
7   0.154667       (Apples)
9   0.153000      (Oranges)
11  0.152333     (Potatoes)
6   0.152333  (Ground Beef)
18  0.152333         (Rice)
15  0.152000      (Spinach)
2   0.152000         (Eggs)
21  0.151667          (Tea)


