In [1]:
import pandas as pd

In [2]:
import os

In [3]:

print(os.getcwd())


C:\NJIT\sem2\Data_Mining


In [4]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
from itertools import combinations
import time

# Define the path to the CSV file (adjust this path as needed)
file_path = 'prajwal_mid/grocery_transactions.csv'

# Function to read integrated transactions
def read_integrated_transactions(file):
    with open(file, 'r') as f:
        content = f.read().splitlines()

    unique_items = []
    datasets = {}
    current_store = None

    for line in content:
        line = line.strip()

        if line == "Unique Items":
            continue  # Skip the header for unique items
        elif "Transactions" in line:
            if current_store is not None:
                current_store = line.replace(" Transactions", "").strip()
                datasets[current_store] = []
            else:
                current_store = line.replace(" Transactions", "").strip()
                datasets[current_store] = []
        elif current_store and line:
            cleaned_line = line.replace('"', '').strip()
            datasets[current_store].append(cleaned_line.split(','))

    return unique_items, datasets

# Brute force frequent itemsets
def brute_force_frequent_itemsets(data, min_support):
    itemsets = []
    total_transactions = len(data)
    for i in range(1, len(data.columns) + 1):
        for combo in combinations(data.columns, i):
            support_count = data[list(combo)].all(axis=1).sum()
            support = support_count / total_transactions
            if support >= min_support:
                itemsets.append((list(combo), support))
    return pd.DataFrame(itemsets, columns=['itemsets', 'support'])

# Process function
def process_store(transactions, min_support, min_confidence):
    encoder = TransactionEncoder()
    onehot = encoder.fit(transactions).transform(transactions)
    df = pd.DataFrame(onehot, columns=encoder.columns_)

    min_support_fraction = min_support / 100
    min_confidence_fraction = min_confidence / 100

    start_time = time.time()
    brute_force_freq_itemsets = brute_force_frequent_itemsets(df, min_support_fraction)
    print("\nBrute Force Frequent Itemsets:")
    display(brute_force_freq_itemsets)

    if not brute_force_freq_itemsets.empty:
        brute_force_rules = association_rules(brute_force_freq_itemsets, metric="confidence", min_threshold=min_confidence_fraction)
        print("\nBrute Force Association Rules:")
        display(brute_force_rules)

    start_time_apriori = time.time()
    frequent_itemsets_apriori = apriori(df, min_support=min_support_fraction, use_colnames=True)
    print("\nApriori Frequent Itemsets:")
    display(frequent_itemsets_apriori)

    if not frequent_itemsets_apriori.empty:
        rules_apriori = association_rules(frequent_itemsets_apriori, metric="confidence", min_threshold=min_confidence_fraction)
        print("\nApriori Association Rules:")
        display(rules_apriori)

    print("\nExecution Time:")
    print("Brute Force: {:.4f} seconds".format(time.time() - start_time))
    print("Apriori: {:.4f} seconds".format(time.time() - start_time_apriori))

# Load datasets
unique_items, datasets = read_integrated_transactions(file_path)

# List available stores
print("Available Stores:")
for i, name in enumerate(datasets.keys()):
    print(f"{i + 1}: {name}")

# In JupyterLab, you can now select the store and parameters in another cell:
# Example values:
store_index = 0  # Change to 1, 2, etc. based on the printed list above
min_support = 20  # As a percentage
min_confidence = 50  # As a percentage

store_name = list(datasets.keys())[store_index]
print(f"\nSelected store: {store_name}")
print(f"\nTransactions for selected store:")
display(pd.DataFrame(datasets[store_name]))

# Process the store
process_store(datasets[store_name], min_support, min_confidence)


Available Stores:
1: Walmart
2: Kroger
3: Safeway
4: Whole Foods Market
5: Trader Joe's

Selected store: Walmart

Transactions for selected store:


Unnamed: 0,0,1,2,3
0,milk,bread,butter,
1,bread,diaper,beer,egg
2,milk,diaper,beer,coke
3,milk,bread,diaper,butter
4,bread,milk,coke,
5,milk,bread,egg,butter
6,diaper,beer,milk,
7,bread,butter,egg,coke
8,milk,bread,butter,diaper
9,coke,bread,milk,beer



Brute Force Frequent Itemsets:


Unnamed: 0,itemsets,support
0,[beer],0.375
1,[bread],0.8125
2,[butter],0.4375
3,[coke],0.3125
4,[diaper],0.5
5,[egg],0.25
6,[milk],0.6875
7,"[beer, milk]",0.3125
8,"[bread, butter]",0.375
9,"[bread, coke]",0.25



Brute Force Association Rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(beer),(milk),0.375,0.6875,0.3125,0.833333,1.212121,1.0,0.054688,1.875,0.28,0.416667,0.466667,0.643939
1,(butter),(bread),0.4375,0.8125,0.375,0.857143,1.054945,1.0,0.019531,1.3125,0.092593,0.428571,0.238095,0.659341
2,(coke),(bread),0.3125,0.8125,0.25,0.8,0.984615,1.0,-0.003906,0.9375,-0.022222,0.285714,-0.066667,0.553846
3,(diaper),(bread),0.5,0.8125,0.375,0.75,0.923077,1.0,-0.03125,0.75,-0.142857,0.4,-0.333333,0.605769
4,(egg),(bread),0.25,0.8125,0.25,1.0,1.230769,1.0,0.046875,inf,0.25,0.307692,1.0,0.653846
5,(bread),(milk),0.8125,0.6875,0.5,0.615385,0.895105,1.0,-0.058594,0.8125,-0.384615,0.5,-0.230769,0.671329
6,(milk),(bread),0.6875,0.8125,0.5,0.727273,0.895105,1.0,-0.058594,0.6875,-0.272727,0.5,-0.454545,0.671329
7,(butter),(milk),0.4375,0.6875,0.3125,0.714286,1.038961,1.0,0.011719,1.09375,0.066667,0.384615,0.085714,0.584416
8,(diaper),(milk),0.5,0.6875,0.3125,0.625,0.909091,1.0,-0.03125,0.833333,-0.166667,0.357143,-0.2,0.539773
9,"(bread, butter)",(milk),0.375,0.6875,0.25,0.666667,0.969697,1.0,-0.007812,0.9375,-0.047619,0.307692,-0.066667,0.515152



Apriori Frequent Itemsets:


Unnamed: 0,support,itemsets
0,0.375,(beer)
1,0.8125,(bread)
2,0.4375,(butter)
3,0.3125,(coke)
4,0.5,(diaper)
5,0.25,(egg)
6,0.6875,(milk)
7,0.3125,"(beer, milk)"
8,0.375,"(bread, butter)"
9,0.25,"(bread, coke)"



Apriori Association Rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(beer),(milk),0.375,0.6875,0.3125,0.833333,1.212121,1.0,0.054688,1.875,0.28,0.416667,0.466667,0.643939
1,(butter),(bread),0.4375,0.8125,0.375,0.857143,1.054945,1.0,0.019531,1.3125,0.092593,0.428571,0.238095,0.659341
2,(coke),(bread),0.3125,0.8125,0.25,0.8,0.984615,1.0,-0.003906,0.9375,-0.022222,0.285714,-0.066667,0.553846
3,(diaper),(bread),0.5,0.8125,0.375,0.75,0.923077,1.0,-0.03125,0.75,-0.142857,0.4,-0.333333,0.605769
4,(egg),(bread),0.25,0.8125,0.25,1.0,1.230769,1.0,0.046875,inf,0.25,0.307692,1.0,0.653846
5,(bread),(milk),0.8125,0.6875,0.5,0.615385,0.895105,1.0,-0.058594,0.8125,-0.384615,0.5,-0.230769,0.671329
6,(milk),(bread),0.6875,0.8125,0.5,0.727273,0.895105,1.0,-0.058594,0.6875,-0.272727,0.5,-0.454545,0.671329
7,(butter),(milk),0.4375,0.6875,0.3125,0.714286,1.038961,1.0,0.011719,1.09375,0.066667,0.384615,0.085714,0.584416
8,(diaper),(milk),0.5,0.6875,0.3125,0.625,0.909091,1.0,-0.03125,0.833333,-0.166667,0.357143,-0.2,0.539773
9,"(bread, butter)",(milk),0.375,0.6875,0.25,0.666667,0.969697,1.0,-0.007812,0.9375,-0.047619,0.307692,-0.066667,0.515152



Execution Time:
Brute Force: 0.0959 seconds
Apriori: 0.0230 seconds
