In [1]:
import pandas as pd
from itertools import combinations
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
from mlxtend.preprocessing import TransactionEncoder
import time

In [None]:
import os
import csv
import pandas as pd
import time
from itertools import combinations
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
from mlxtend.preprocessing import TransactionEncoder

file_paths = {
    "AMAZON": r"AMAZON.csv",
    "COSTCO": r"COSTCO.csv",
    "DMART": r"DMART.csv",
    "WALMART": r"WALMART.csv",
    "KMART": r"KMART.csv"
}

# Extract transactions from CSV files
def load_transactions(file_path):
    with open(file_path, newline='') as csvfile:
        reader = csv.reader(csvfile)
        transactions = [list(filter(None, row)) for row in reader]  # Filter out empty items in rows
    return transactions

# Applyinng Brute Force method to generate frequent items
def generate_frequent_itemsets(transactions, support_threshold):
    item_count = {}
    for transaction in transactions:
        for item in transaction:
            item_count[item] = item_count.get(item, 0) + 1

    frequent_itemsets = {1: {item: count for item, count in item_count.items() if count / len(transactions) >= support_threshold}}

    k = 2
    while True:
        prev_itemsets = list(frequent_itemsets[k - 1].keys())
        new_itemsets = list(combinations(prev_itemsets, k))
        item_count = {}
        for transaction in transactions:
            transaction_set = set(transaction)
            for itemset in new_itemsets:
                if set(itemset).issubset(transaction_set):
                    item_count[itemset] = item_count.get(itemset, 0) + 1

        frequent_itemsets[k] = {itemset: count for itemset, count in item_count.items() if count / len(transactions) >= support_threshold}
        if not frequent_itemsets[k]:
            del frequent_itemsets[k]
            break
        k += 1
    return frequent_itemsets

# Applying Apriori Algorithm
def apriori_algorithm(transactions, support_threshold, confidence_threshold):
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    frequent_itemsets = apriori(df, min_support=support_threshold, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence_threshold)

    return frequent_itemsets, rules

# Applying FP-Growth Algorithm
def fpgrowth_algorithm(transactions, support_threshold, confidence_threshold):
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    frequent_itemsets = fpgrowth(df, min_support=support_threshold, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence_threshold)

    return frequent_itemsets, rules

# Comparing by timing function
def measure_execution_time(algorithm_func, *args):
    start_time = time.time()
    result = algorithm_func(*args)
    end_time = time.time()
    return result, end_time - start_time

# Source code
while True:
    # user defined entry or exit
    print("\nAvailable databases:")
    for i, name in enumerate(file_paths.keys(), 1):
        print(f"{i}. {name}")
    print("0. Exit")
    
    choice = int(input("Enter the number corresponding to the database you'd like to choose (or 0 to exit): "))

    # Exit the loop if the user chooses 0
    if choice == 0:
        print("Exiting the program.")
        break

    # selected database 
    db_name = list(file_paths.keys())[choice - 1]

    # Load the selected transactions
    transactions = load_transactions(file_paths[db_name])
    print(f"Loaded {len(transactions)} transactions from {db_name}.")

    # user-defined for support and confidence thresholds
    support_threshold = float(input("Enter support threshold in % (e.g., 10 for 10%): ")) / 100
    confidence_threshold = float(input("Enter confidence threshold in % (e.g., 20 for 20%): ")) / 100

    print(f"\nProcessing {db_name} with support {support_threshold * 100}% and confidence {confidence_threshold * 100}%...")

    # Brute Force
    bf_result, bf_time = measure_execution_time(generate_frequent_itemsets, transactions, support_threshold)
    print(f"\nBrute Force Frequent Itemsets:\n{bf_result}")
    print(f"Brute Force Time: {bf_time:.4f}s")

    # Apriori
    apriori_result, apriori_time = measure_execution_time(apriori_algorithm, transactions, support_threshold, confidence_threshold)
    print(f"\nApriori Frequent Itemsets:\n{apriori_result[0]}")
    print(f"Apriori Rules:\n{apriori_result[1]}")
    print(f"Apriori Time: {apriori_time:.4f}s")

    # FP-Growth
    fp_result, fp_time = measure_execution_time(fpgrowth_algorithm, transactions, support_threshold, confidence_threshold)
    print(f"\nFP-Growth Frequent Itemsets:\n{fp_result[0]}")
    print(f"FP-Growth Rules:\n{fp_result[1]}")
    print(f"FP-Growth Time: {fp_time:.4f}s")

    # If user wants to analyze different dataset
    continue_choice = input("\nDo you want to analyze another dataset? (yes/no): ").strip().lower()
    if continue_choice != 'yes':
        print("Exiting the program.")
        break


  and should_run_async(code)



Available databases:
1. AMAZON
2. COSTCO
3. DMART
4. WALMART
5. KMART
0. Exit
Enter the number corresponding to the database you'd like to choose (or 0 to exit): 1
Loaded 20 transactions from AMAZON.
Enter support threshold in % (e.g., 10 for 10%): 20
Enter confidence threshold in % (e.g., 20 for 20%): 30

Processing AMAZON with support 20.0% and confidence 30.0%...

Brute Force Frequent Itemsets:
{1: {'Cereal': 11, 'Detergent': 11, 'Shampoo': 10, 'Coffee': 10, 'Bread': 6, 'Milk': 8, 'Soap': 7, 'Toothpaste': 4, 'Diapers': 6}, 2: {('Cereal', 'Detergent'): 5, ('Cereal', 'Shampoo'): 6, ('Detergent', 'Shampoo'): 7, ('Cereal', 'Coffee'): 4, ('Cereal', 'Bread'): 4, ('Cereal', 'Soap'): 5, ('Shampoo', 'Soap'): 4, ('Shampoo', 'Coffee'): 4, ('Detergent', 'Coffee'): 6, ('Detergent', 'Milk'): 5, ('Coffee', 'Milk'): 6}}
Brute Force Time: 0.0070s

Apriori Frequent Itemsets:
    support                   itemsets
0      0.30                    (Bread)
1      0.55                   (Cereal)
2      0.