In [5]:
# [Student: Selmah Tzindori] Simulation of 3,000 supermarket transactions and export to CSV

# Import the random module to help us randomly select items for each transaction
import random

# Import pandas for working with structured data like tables and CSV files
import pandas as pd

# Step 1: Define a list (pool) of 30 unique supermarket items
# These will be randomly picked to form each transaction
item_pool = [
    'milk', 'bread', 'eggs', 'cheese', 'butter', 'juice', 'apples', 'bananas', 'oranges', 'grapes',
    'cereal', 'chocolate', 'yogurt', 'chicken', 'beef', 'pasta', 'rice', 'tomatoes', 'onions', 'potatoes',
    'carrots', 'lettuce', 'beans', 'soda', 'water', 'coffee', 'tea', 'cookies', 'ice cream', 'toilet paper'
]

# Step 2: Set the number of transactions to simulate
num_transactions = 3000  # Total number of customers or baskets

# Create an empty list that will hold each simulated transaction
transactions = []

# Step 3: Loop 3,000 times to create each transaction
for _ in range(num_transactions):
    # Randomly choose a number between 2 and 7 to determine how many items in this transaction
    transaction_length = random.randint(2, 7)

    # Randomly select 'transaction_length' number of unique items from the item pool
    transaction = random.sample(item_pool, transaction_length)

    # Add the generated transaction (a list of items) to our list of all transactions
    transactions.append(transaction)

# Step 4: Convert the list of transactions into a format suitable for saving to CSV
# Each transaction will become one string, with items separated by commas
transaction_strings = [', '.join(t) for t in transactions]

# Create a pandas DataFrame with one column called 'Transaction'
# Each row in the DataFrame represents a customer transaction
transactions_df = pd.DataFrame({'Transaction': transaction_strings})

# Step 5: Save the DataFrame to a CSV file
# This file will be used in the next steps of the project (frequent itemset mining)
transactions_df.to_csv('supermarket_transactions.csv', index=False)

# Step 6: Show the first 5 transactions to check the output looks correct
transactions_df.head()

Unnamed: 0,Transaction
0,"potatoes, chocolate, cheese, soda, grapes, apples"
1,"potatoes, tomatoes"
2,"grapes, cheese, coffee, water"
3,"oranges, toilet paper"
4,"tea, chocolate, lettuce, pasta"


In [None]:
# Levin: Convert list of lists into one-hot encoded DataFrame
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder() # Initializing the encoder object
te_ary = te.fit(transactions).transform(transactions) 
# Fitting the encoder to the transaction data and transform it to a boolean array
# This will return a 2D array where each row represents a transaction and each column represents an item
# The value will be True if the item is in that transaction, otherwise False

df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

df_encoded # displaying the converted lists of one-hot encoded Dataframe 


Unnamed: 0,apples,bananas,beans,beef,bread,butter,carrots,cereal,cheese,chicken,...,oranges,pasta,potatoes,rice,soda,tea,toilet paper,tomatoes,water,yogurt
0,True,False,False,False,False,False,False,False,True,False,...,False,False,True,False,True,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,True,False,False
2,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,True,False
3,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,True,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,False,False,False,False,True,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,True
2996,False,False,False,False,False,False,False,False,False,False,...,False,False,False,True,False,False,False,False,False,False
2997,False,False,False,False,False,False,True,False,False,False,...,False,False,False,True,False,False,False,False,False,False
2998,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
