# Notion of Maximality and Closedness

## Preamble

### Modules

`Student: Misati`

`Editor: Samuel`

In [131]:
# [Student: Misati] Importing data mining libraries
# [Editor: Samuel] Import necessary libraries
import pandas as pd
import itertools
import random
from mlxtend.frequent_patterns import apriori, association_rules
from collections import defaultdict

### Configuration

`Student: Misati`

`Editor: Samuel`

In [132]:
# [Student: Misati] Setting global configuration variables
# [Editor: Samuel] Set number of transactions to simulate
MIN_SUP = 0.05
SEED = 123
NUM_TRANSACTIONS = 3000

In [133]:
# [Editor: Samuel] Set a seed
random.seed(SEED)

### Utility Functions

`Student: Samuel`

`Editor: Misati`

In [134]:
# [Student: Samuel] Generate list of transactions
# [Editor: Misati] Conversion to function
def create_transactions(item_pool):
    transactions = []
    for _ in range(NUM_TRANSACTIONS):
        # Randomly choose 2 to 7 items per transaction
        num_items = random.randint(2, 7)
        transaction = random.sample(item_pool, num_items)
        transactions.append(transaction)
    return transactions

## Simulate Transaction Data

`Student: Samuel`

`Editor: Misati`

In [135]:
# [Student: Samuel] Define a pool of 30 unique supermarket items
item_pool = [
    'milk', 'bread', 'eggs', 'butter', 'cheese', 'apples', 'bananas', 'chicken', 'beef', 'rice',
    'pasta', 'sugar', 'salt', 'pepper', 'onions', 'tomatoes', 'lettuce', 'carrots', 'potatoes', 'cereal',
    'yogurt', 'juice', 'soda', 'coffee', 'tea', 'biscuits', 'chocolate', 'detergent', 'soap', 'toothpaste'
]

In [136]:
# [Student: Samuel] Generate a list of transactions
# [Editor: Misati] Using a utility function to generate transactions
transactions = create_transactions(item_pool)

In [137]:
# [Student: Samuel] Convert list of transactions into a DataFrame for saving
df_transactions = pd.DataFrame({'Transaction': transactions})

In [138]:
# [Student: Samuel] Save simulated transactions to CSV
df_transactions.to_csv('supermarket_transactions.csv', index=False)

# [Student: Samuel] preview
df_transactions.head()

Unnamed: 0,Transaction
0,"[beef, eggs]"
1,"[beef, butter, chocolate, soap, detergent]"
2,"[salt, carrots]"
3,"[pasta, toothpaste, detergent, bread, apples, ..."
4,"[carrots, pasta, soda, chicken]"


## Preprocessing: One-Hot Encoding

`Student: Ambachow`

`Editor: Misati`

In [139]:
# [Student: Ambachow] Load the dataset
df_transactions = pd.read_csv('supermarket_transactions.csv')

In [140]:
# [Student: Ambachow] Create a set of all unique items across all transactions
unique_items = list(set(item for transaction in df_transactions['Transaction'] for item in eval(transaction)))

# [Student: Ambachow] Create a DataFrame for one-hot encoding
# [Editor: Misati] Using booleans for faster inference
df_one_hot = pd.DataFrame(False, index=range(len(df_transactions)), columns=unique_items)

# [Student: Ambachow] Populate the one-hot DataFrame
# [Editor: Misati] Using booleans for faster inference
for idx, transaction in enumerate(df_transactions['Transaction']):
    items = eval(transaction)  # Convert string representation of list to actual list
    for item in items:
        df_one_hot.loc[idx, item] = True

In [141]:
# [Student: Ambachow] Preview the one-hot encoded DataFrame
df_one_hot.head()

Unnamed: 0,pasta,sugar,soda,bananas,cheese,eggs,beef,onions,potatoes,juice,...,pepper,tomatoes,tea,apples,carrots,biscuits,rice,chicken,butter,cereal
0,False,False,False,False,False,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,True,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
3,True,False,False,False,True,False,False,False,False,False,...,False,False,False,True,False,False,False,False,False,False
4,True,False,True,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,True,False,False


### Save One-Hot Encoded Dataframe

In [142]:
df_one_hot.to_csv('supermarket_one_hot_encoded.csv', index=False)

## Generate Frequent Itemsets

`Student: Paul`


In [147]:
from mlxtend.frequent_patterns import apriori

# Generate frequent itemsets
frequent_itemsets = apriori(df_one_hot, min_support=MIN_SUP, use_colnames=True)

# Sort by support
frequent_itemsets.sort_values(by='support', ascending=False, inplace=True)

# Save to CSV
frequent_itemsets.to_csv('frequent_itemsets.csv', index=False)
print(frequent_itemsets.head())



     support      itemsets
10  0.172667  (toothpaste)
11  0.161333        (milk)
21  0.157000    (tomatoes)
2   0.155667        (soda)
7   0.154667      (onions)


## Identify Closed Frequent Itemsets

`Student: Misati`

In [148]:
# [Student: Misati] Compute the support count
support_count = defaultdict(int)
for t in transactions:
    for item in t:
        support_count[frozenset([item])] += 1
    for i1 in t:
        for i2 in t:
            if i1 < i2:
                support_count[frozenset([i1, i2])] += 1
    if len(t) == 3:
        support_count[frozenset(t)] += 1

In [149]:
# [Student: Misati] Find closed itemsets
closed_itemsets = []
for itemset in support_count:
    is_closed = True
    for other in support_count:
        if itemset < other and support_count[itemset] == support_count[other]:
            is_closed = False
            break
    if is_closed:
        closed_itemsets.append((set(itemset), support_count[itemset]))

closed_itemsets = sorted(closed_itemsets, key=lambda x: x[1], reverse = True)

In [150]:
# [Student: Misati] Converting to a Pandas dataframe
column_names = ['Itemset', 'Support']
df_closed_frequent_itemsets = pd.DataFrame(closed_itemsets, columns=column_names)

In [151]:
# [Student: Misati] Display the top 5 closed itemsets
df_closed_frequent_itemsets.head()

Unnamed: 0,Itemset,Support
0,{toothpaste},518
1,{milk},484
2,{tomatoes},471
3,{soda},467
4,{onions},464


## Identify Maximal Frequent Itemsets

`Student: Chawanda`

## Submission Files

`Student: Gathogo`