In [None]:
import os
import csv
import pandas as pd
import time
from itertools import combinations
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
from mlxtend.preprocessing import TransactionEncoder

# Paths to dataset files
dataset_files = {
    "AMAZON": r"AMAZON.csv",
    "COSTCO": r"COSTCO.csv",
    "DMART": r"DMART.csv",
    "WALMART": r"WALMART.csv",
    "KMART": r"KMART.csv"
}

class TransactionAnalyzer:
    def __init__(self, filepath):
        self.filepath = filepath
        self.transactions = self.extract_transactions()
    
    def extract_transactions(self):
        """Load transaction data from CSV."""
        with open(self.filepath, newline='') as file:
            reader = csv.reader(file)
            return [list(filter(None, row)) for row in reader]

    def compute_frequent_itemsets(self, support_min):
        """Brute force method for generating frequent itemsets."""
        item_frequency = {}
        for transaction in self.transactions:
            for item in transaction:
                item_frequency[item] = item_frequency.get(item, 0) + 1

        frequent_sets = {1: {item: count for item, count in item_frequency.items() if count / len(self.transactions) >= support_min}}

        k = 2
        while True:
            prev_itemset = list(frequent_sets[k - 1].keys())
            new_item_combinations = list(combinations(prev_itemset, k))
            current_count = {}

            for transaction in self.transactions:
                transaction_set = set(transaction)
                for combination in new_item_combinations:
                    if set(combination).issubset(transaction_set):
                        current_count[combination] = current_count.get(combination, 0) + 1

            frequent_sets[k] = {combo: count for combo, count in current_count.items() if count / len(self.transactions) >= support_min}
            if not frequent_sets[k]:
                del frequent_sets[k]
                break
            k += 1

        return frequent_sets

    def run_apriori(self, support_min, confidence_min):
        """Run the Apriori algorithm."""
        encoder = TransactionEncoder()
        transformed_data = encoder.fit(self.transactions).transform(self.transactions)
        df_transactions = pd.DataFrame(transformed_data, columns=encoder.columns_)

        frequent_itemsets = apriori(df_transactions, min_support=support_min, use_colnames=True)
        rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence_min)

        return frequent_itemsets, rules

    def run_fpgrowth(self, support_min, confidence_min):
        """Run the FP-Growth algorithm."""
        encoder = TransactionEncoder()
        transformed_data = encoder.fit(self.transactions).transform(self.transactions)
        df_transactions = pd.DataFrame(transformed_data, columns=encoder.columns_)

        frequent_itemsets = fpgrowth(df_transactions, min_support=support_min, use_colnames=True)
        rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence_min)

        return frequent_itemsets, rules

def time_algorithm_execution(algorithm_func, *params):
    """Measure execution time of an algorithm."""
    start_time = time.time()
    output = algorithm_func(*params)
    elapsed_time = time.time() - start_time
    return output, elapsed_time

def main_menu():
    """User interaction interface."""
    while True:
        print("\nWelcome to the Data Transaction Analysis Tool!")
        print("Please select a dataset to analyze or exit the program:")
        
        # List available datasets
        for index, name in enumerate(dataset_files.keys(), start=1):
            print(f"{index}. {name}")
        print("0. Exit")

        try:
            user_input = int(input("Enter the dataset number for analyzing (0 to exit): "))
            if user_input < 0 or user_input > len(dataset_files):
                raise ValueError("Invalid choice. Please select a valid option.")

            if user_input == 0:
                print("Thank you for using the program! Goodbye!")
                break

            selected_dataset = list(dataset_files.keys())[user_input - 1]
            analyzer = TransactionAnalyzer(dataset_files[selected_dataset])
            print(f"Successfully loaded {len(analyzer.transactions)} transactions from {selected_dataset}.")

            # Get user-defined thresholds
            support_input = float(input("Please enter the minimum support threshold (Example: 10 for 10%): ")) / 100
            confidence_input = float(input("Please enter the minimum confidence threshold (Example: 10 for 10%): ")) / 100

            print(f"\nAnalyzing dataset: {selected_dataset} with support threshold: {support_input * 100:.2f}% and confidence threshold: {confidence_input * 100:.2f}%...")

            # Brute Force Frequent Itemsets
            brute_force_result, bf_execution_time = time_algorithm_execution(analyzer.compute_frequent_itemsets, support_input)
            print(f"\nBrute Force Frequent Itemsets:\n{brute_force_result}")
            print(f"Execution Time (Brute Force): {bf_execution_time:.4f} seconds")

            # Apriori Algorithm
            apriori_result, apriori_time = time_algorithm_execution(analyzer.run_apriori, support_input, confidence_input)
            print(f"\nFrequent Itemsets (Apriori):\n{apriori_result}")
            print(f"Execution Time (Apriori): {apriori_time:.4f} seconds")

            # FP-Growth Algorithm
            fpgrowth_result, fp_execution_time = time_algorithm_execution(analyzer.run_fpgrowth, support_input, confidence_input)
            print(f"\nFrequent Itemsets (FP-Growth):\n{fpgrowth_result}")
            print(f"Execution Time (FP-Growth): {fp_execution_time:.4f} seconds")

            # Ask user if they want to analyze another dataset
            retry = input("\nWould you like to analyze a different dataset? (yes/no): ").strip().lower()
            if retry != 'yes':
                print("Thank you for using the program! Goodbye!")
                break

        except ValueError as e:
            print(f"Error: {e}. Please try again.")

if __name__ == "__main__":
    main_menu()



Welcome to the Data Transaction Analysis Tool!
Please select a dataset to analyze or exit the program:
1. AMAZON
2. COSTCO
3. DMART
4. WALMART
5. KMART
0. Exit
Enter the dataset number for analyzing (0 to exit): 1
Successfully loaded 20 transactions from AMAZON.
Please enter the minimum support threshold (Example: 10 for 10%): 20
Please enter the minimum confidence threshold (Example: 10 for 10%): 20

Analyzing dataset: AMAZON with support threshold: 20.00% and confidence threshold: 20.00%...

Brute Force Frequent Itemsets:
{1: {'Cereal': 11, 'Detergent': 11, 'Shampoo': 10, 'Coffee': 10, 'Bread': 6, 'Milk': 8, 'Soap': 7, 'Toothpaste': 4, 'Diapers': 6}, 2: {('Cereal', 'Detergent'): 5, ('Cereal', 'Shampoo'): 6, ('Detergent', 'Shampoo'): 7, ('Cereal', 'Coffee'): 4, ('Cereal', 'Bread'): 4, ('Cereal', 'Soap'): 5, ('Shampoo', 'Soap'): 4, ('Shampoo', 'Coffee'): 4, ('Detergent', 'Coffee'): 6, ('Detergent', 'Milk'): 5, ('Coffee', 'Milk'): 6}}
Execution Time (Brute Force): 0.0050 seconds

Frequ