# Importing Necessary Libraries and Packages

In [2]:
# [Student:Nathan] Import necessary libraries
import numpy as np #[Student:Nathan] For random number generation(our simulated records) and array operations
import pandas as pd #[Student:Nathan] For data manipulation and analysis (Working with CSV files)

from mlxtend.preprocessing import TransactionEncoder  # [Student:Nathan] For encoding (one hot encode) transaction data into a format we can use for association rule mining
from mlxtend.frequent_patterns import apriori, association_rules #[Student:Nathan] For generating frequent itemsets and association rules

# [Student:Nathan] For generating frequent itemsets and association rules

# Ensuring Reproducibility
np.random.seed(42) #[Student:Nathan] For reproducibility

# Simulating Transaction Data

In [3]:
# Create a pool of items
# [Zakariya ] Define a list of unique supermarket items (at least 30 items)
item_pool = [  # [Zakariya] Create a Python list that will act as the pool of available items
    "Milk", "Bread", "Eggs", "Butter", "Cheese", "Yogurt", "Apples", "Bananas",
    "Oranges", "Grapes", "Tomatoes", "Onions", "Potatoes", "Carrots", "Chicken",
    "Beef", "Fish", "Rice", "Pasta", "Sugar", "Salt", "Flour", "Oil", "Cereal",
    "Juice", "Soda", "Coffee", "Tea", "Biscuits", "Chocolate"
]  # [Zakariya] Ensure the list has at least 30 different items as required

# [Zakariya] Set the number of transactions to simulate (at least 3000)
n_transactions = 3000  # [Zakariya] Define how many random supermarket transactions we want to generate

# [Zakariya] Create an empty list to store all transactions
transactions = []  # [Zakariya] This will hold each transaction as a list of item names

# [Zakariya] Loop over the number of transactions to generate each transaction
for _ in range(n_transactions):  # Zakariya] Repeat the process n_transactions times
    transaction_length = np.random.randint(2, 8)  # [Zakariya] Randomly choose a length between 2 and 7 items (upper bound 8 is exclusive)
    transaction_items = list(np.random.choice(item_pool, size=transaction_length, replace=False))  # [Zakariya] Randomly choose unique items for this transaction
    transactions.append(transaction_items)  # [Zakariya] Add the generated transaction list to the main transactions list

# [Zakariya] Build a DataFrame for saving raw transactions to CSV
transaction_strings = [", ".join(t) for t in transactions]  # [Zakariya] Convert each transaction list into a single comma-separated string
transactions_df = pd.DataFrame({  # [Zakariya] Create a DataFrame from the transaction data
    "transaction_id": range(1, n_transactions + 1),  # [Zakariya] Assign sequential transaction IDs starting from 1
    "items": transaction_strings  # [Zakariya] Store the string representation of items in each transaction
})  # [Zakariya] Close the DataFrame constructor

# [Zakariya] Save the simulated transactions to CSV as required
transactions_df.to_csv("supermarket_transactions.csv", index=False)  # [Zakariya] Export raw transaction data to supermarket_transactions.csv without row index

# One-Hot Encoding

In [None]:
te = TransactionEncoder()  # [Student:Nathan] Initialize the TransactionEncoder

te_array = te.fit(transactions).transform(transactions)  # [Student:Nathan] Fit and transform the transaction data into a one-hot encoded array (encode each transaction as True/False per item)

#[Student: Nathan] Convert the encoded array into a pandas DataFrame
one_hot_df = pd.DataFrame(te_array, columns=te.columns_)  # [Student: Nathan] Build a DataFrame where rows are transactions and columns are items

# [Student: Nathan] Inspect the first few rows to help in identifying any issues
one_hot_df.head()


Unnamed: 0,Apples,Bananas,Beef,Biscuits,Bread,Butter,Carrots,Cereal,Cheese,Chicken,...,Oranges,Pasta,Potatoes,Rice,Salt,Soda,Sugar,Tea,Tomatoes,Yogurt
0,False,False,True,False,False,False,False,False,False,False,...,True,False,False,True,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,True
2,False,False,False,False,False,False,False,False,True,False,...,True,False,False,False,False,False,False,False,True,False
3,False,False,False,False,True,True,False,False,True,False,...,False,True,False,False,False,False,False,False,False,True
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False


# Generating Frequent Itemsets using Apiori

# Identifying Closed Frequent Itemsets

# Identifying Maximal Frequent Itemsets

# Summary Prints