In [5]:
import pandas as pd
import itertools
import time

# Load transactions from a CSV file, processing the items column into a list of sets
def load_transactions(file_path):
    df = pd.read_csv(file_path, usecols=[1], skiprows=1, names=['items'])
    return [set(item.split(", ")) for item in df['items']]

# Function to calculate support of an itemset within the transactions
def calculate_support(itemset, transactions):
    return sum(1 for transaction in transactions if itemset.issubset(transaction))

# Generate frequent itemsets based on minimum support
def find_frequent_itemsets(transactions, min_support, max_length=3):
    items_pool = {item for transaction in transactions for item in transaction}
    frequent_itemsets = []
    
    for size in range(1, max_length + 1):
        for combination in itertools.combinations(items_pool, size):
            support_count = calculate_support(set(combination), transactions)
            if support_count >= min_support:
                frequent_itemsets.append((set(combination), support_count))
    
    return frequent_itemsets

# Generate association rules based on frequent itemsets
def generate_rules(frequent_itemsets, transactions, min_confidence):
    rules = []
    
    for itemset, support_count in frequent_itemsets:
        for subset_size in range(1, len(itemset)):
            for subset in itertools.combinations(itemset, subset_size):
                subset = set(subset)
                remainder = itemset - subset
                if remainder:
                    subset_support = calculate_support(subset, transactions)
                    confidence = support_count / subset_support if subset_support > 0 else 0
                    if confidence >= min_confidence:
                        rules.append((subset, remainder, confidence))
    
    return rules

# Main function to process a CSV file, find itemsets, and generate rules
def process_transactions(file_path, min_support, min_confidence):
    transactions = load_transactions(file_path)
    min_support_count = int(min_support * len(transactions))  # Convert to absolute support count
    frequent_itemsets = find_frequent_itemsets(transactions, min_support_count)
    association_rules = generate_rules(frequent_itemsets, transactions, min_confidence)
    
    return frequent_itemsets, association_rules

# Start execution timer
start_time = time.time()

# Provided datasets
datasets = {
    'Target': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\target_data_new.csv',
    'Costco': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\costco_data_new.csv',
    '7-Eleven': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\seven_eleven_data.csv',
    'ShopRite': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\shoprite_data.csv',
    'K-Mart': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\kmart_data.csv'
}

# Minimum support and confidence
min_support = 0.05
min_confidence = 0.3

# Analyze each dataset
for store, file_path in datasets.items():
    itemsets, rules = process_transactions(file_path, min_support, min_confidence)
    print(f"Results for {store}:")
    print("Frequent Itemsets:", itemsets)
    print("Association Rules:", rules)
    print("\n")

# Total execution time
print(f"Total Execution Time: {time.time() - start_time:.2f} seconds")


Results for Target:
Frequent Itemsets: [({'Toys'}, 278), ({'Gardening Supplies'}, 287), ({'Clothes'}, 287), ({'Bedding'}, 291), ({'Furniture'}, 329), ({'Beauty Products'}, 292), ({'Stationery'}, 320), ({'Groceries'}, 286), ({'Electronics'}, 309), ({'Cleaning Supplies'}, 295), ({'Toys', 'Gardening Supplies'}, 71), ({'Toys', 'Clothes'}, 77), ({'Toys', 'Bedding'}, 73), ({'Furniture', 'Toys'}, 82), ({'Beauty Products', 'Toys'}, 77), ({'Stationery', 'Toys'}, 87), ({'Groceries', 'Toys'}, 82), ({'Toys', 'Electronics'}, 91), ({'Toys', 'Cleaning Supplies'}, 74), ({'Clothes', 'Gardening Supplies'}, 89), ({'Gardening Supplies', 'Bedding'}, 85), ({'Furniture', 'Gardening Supplies'}, 97), ({'Beauty Products', 'Gardening Supplies'}, 82), ({'Stationery', 'Gardening Supplies'}, 95), ({'Groceries', 'Gardening Supplies'}, 83), ({'Electronics', 'Gardening Supplies'}, 87), ({'Gardening Supplies', 'Cleaning Supplies'}, 89), ({'Clothes', 'Bedding'}, 80), ({'Furniture', 'Clothes'}, 89), ({'Beauty Products', 

In [22]:
import pandas as pd
from time import time
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Function to load and filter transactions from the dataset
def load_and_filter_transactions(file_path, min_items=2):
    df = pd.read_csv(file_path)
    
    # Use 'Filtered Transaction' as the correct column for transaction data
    transactions = df['Filtered Transaction'].apply(lambda x: x.split(', '))
    
    # Filtering transactions based on the minimum number of items
    filtered_transactions = [tx for tx in transactions if len(tx) >= min_items]
    return filtered_transactions

# Function to analyze transactions and generate frequent itemsets and association rules
def perform_transaction_analysis(file_path, min_support, min_confidence):
    start_time = time()  # Start timer for each analysis

    # Load and filter transactions
    transactions = load_and_filter_transactions(file_path)
    
    # Encoding transactions into a format suitable for Apriori
    encoder = TransactionEncoder()
    transaction_array = encoder.fit(transactions).transform(transactions)
    transaction_df = pd.DataFrame(transaction_array, columns=encoder.columns_)
    
    # Finding frequent itemsets using Apriori algorithm
    frequent_itemsets = apriori(transaction_df, min_support=min_support, use_colnames=True)
    
    # Generating association rules based on the frequent itemsets
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
    
    # Print filtered transactions
    print(f"Filtered Transactions from {file_path.split('/')[-1]}:")
    for transaction in transactions:
        print(transaction)
        
    # Print the generated association rules
    print("\nGenerated Association Rules:")
    print(rules[['antecedents', 'consequents', 'support', 'confidence']])
    
    # Print the time taken for each dataset analysis
    print(f"\nTotal Execution Time: {time() - start_time:.2f} seconds\n")

# Main function to handle the execution of transaction analysis
def execute_analysis():
    datasets = {
    'Target': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\target_data_new.csv',
    'Costco': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\costco_data_new.csv',
    '7-Eleven': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\seven_eleven_data.csv',
    'ShopRite': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\shoprite_data.csv',
    'K-Mart': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\kmart_data.csv'
}


    # Display the available datasets to the user
    print("Please select dataset(s):")
    for key, value in datasets.items():
        print(f"{key} - {value.split('/')[-1]}")

    # Accept user input for selected datasets and analysis parameters
    selected_datasets = input("Enter your choices (e.g., Target, 7-Eleven): ").split(',')
    min_support = float(input("Enter minimum support value (e.g., 0.05): "))
    min_confidence = float(input("Enter minimum confidence value (e.g., 0.5): "))

    # Perform analysis on selected datasets
    for dataset in selected_datasets:
        dataset = dataset.strip()  # Remove any leading/trailing spaces
        if dataset in datasets:
            perform_transaction_analysis(datasets[dataset], min_support, min_confidence)
        else:
            print(f"Dataset {dataset} not found. Please check your input.")

# Execute the analysis if the script is run directly
if __name__ == "__main__":
    execute_analysis()


Please select dataset(s):
Target - target_data_new.csv
Costco - costco_data_new.csv
7-Eleven - seven_eleven_data.csv
ShopRite - shoprite_data.csv
K-Mart - kmart_data.csv
Enter your choices (e.g., Target, 7-Eleven): Target,Shoprite
Enter minimum support value (e.g., 0.05): 0.05
Enter minimum confidence value (e.g., 0.5): 0.3
Filtered Transactions from target_data_new.csv:
['Beauty Products', 'Bedding', 'Gardening Supplies', 'Electronics']
['Groceries', 'Gardening Supplies', 'Cleaning Supplies', 'Bedding']
['Furniture', 'Electronics', 'Groceries', 'Gardening Supplies', 'Bedding']
['Furniture', 'Cleaning Supplies', 'Clothes']
['Furniture', 'Clothes', 'Bedding', 'Groceries', 'Electronics']
['Clothes', 'Gardening Supplies', 'Stationery', 'Toys']
['Cleaning Supplies', 'Toys']
['Stationery', 'Cleaning Supplies', 'Electronics', 'Gardening Supplies', 'Clothes']
['Stationery', 'Groceries']
['Groceries', 'Electronics', 'Gardening Supplies', 'Furniture', 'Beauty Products']
['Gardening Supplies', '

In [21]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules
import time

# Function to load transactions from the dataset
def load_transactions(file_path):
    # Read CSV and split the transactions by commas
    df = pd.read_csv(file_path)
    transactions = df['Filtered Transaction'].str.split(', ').tolist()
    return transactions

# Function to perform frequent itemset and association rule analysis
def perform_analysis(file_path, min_support, min_confidence):
    start_time = time.time()  # Start timing the analysis for this dataset

    # Load the transaction data
    transactions = load_transactions(file_path)
    
    # Encode the transactions into a format suitable for fpgrowth
    encoder = TransactionEncoder()
    transformed_data = encoder.fit(transactions).transform(transactions)
    transactions_df = pd.DataFrame(transformed_data, columns=encoder.columns_)
    
    # Apply the fpgrowth algorithm to find frequent itemsets
    frequent_itemsets = fpgrowth(transactions_df, min_support=min_support, use_colnames=True)
    
    # Derive association rules from the frequent itemsets
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
    
    # Print the results
    print(f"\nFrequent Itemsets from {file_path.split('/')[-1]}:")
    print(frequent_itemsets)
    print("\nGenerated Association Rules:")
    print(rules[['antecedents', 'consequents', 'support', 'confidence']])
    print("\n")
    
    # Print execution time for this dataset in the required format
    print(f"Total Execution Time: {time.time() - start_time:.2f} seconds\n")

# Main function to execute the analysis based on user input
def execute_analysis():
    # Dictionary mapping dataset numbers to file paths
   datasets = {
    'Target': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\target_data_new.csv',
    'Costco': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\costco_data_new.csv',
    '7-Eleven': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\seven_eleven_data.csv',
    'ShopRite': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\shoprite_data.csv',
    'K-Mart': 'C:\\Users\\anuve\\Downloads\\dataset2\\dataset2\\kmart_data.csv'
}

    
    # Display options to the user
    print("Choose dataset(s) for analysis:")
    for index, name in enumerate(['Target', 'Costco', '7-Eleven', 'ShopRite', 'K-Mart'], start=1):
        print(f"{index} - {name}")
    
    # Accept user input for datasets and parameters
    selected_datasets = input("Enter dataset numbers (space-separated, e.g., 1 3): ").split()
    min_support_value = float(input("Enter minimum support (e.g., 0.05): "))
    min_confidence_value = float(input("Enter minimum confidence (e.g., 0.5): "))
    
    # Perform analysis for each selected dataset
    for dataset_key in selected_datasets:
        if dataset_key in datasets:
            perform_analysis(datasets[dataset_key], min_support_value, min_confidence_value)
        else:
            print(f"Invalid choice: {dataset_key}")

# Run the analysis when the script is executed
if __name__ == "__main__":
    execute_analysis()


Choose dataset(s) for analysis:
1 - Target
2 - Costco
3 - 7-Eleven
4 - ShopRite
5 - K-Mart
Enter dataset numbers (space-separated, e.g., 1 3): 1 3
Enter minimum support (e.g., 0.05): 0.05
Enter minimum confidence (e.g., 0.5): 0.3

Frequent Itemsets from target_data_new.csv:
    support                                 itemsets
0     0.309                            (Electronics)
1     0.292                        (Beauty Products)
2     0.291                                (Bedding)
3     0.287                     (Gardening Supplies)
4     0.295                      (Cleaning Supplies)
5     0.286                              (Groceries)
6     0.329                              (Furniture)
7     0.287                                (Clothes)
8     0.320                             (Stationery)
9     0.278                                   (Toys)
10    0.098                 (Furniture, Electronics)
11    0.099                (Stationery, Electronics)
12    0.092           (Beauty Produc