In [24]:
# Imports & setup
import pandas as pd
import itertools
import time
import os
import warnings

from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
warnings.filterwarnings("ignore", category=RuntimeWarning)


In [25]:
# Dataset selection and input for support/confidence

datasets = ["Amazon.csv", "Nike.csv", "Generic.csv", "BestBuy.csv", "Kmart.csv"]

print("Available Datasets:")
for i, name in enumerate(datasets, start=1):
    print(f"{i}. {name}")

while True:
    try:
        choice = int(input("\nEnter the number of the dataset you want to use (1-5): ").strip())
        if 1 <= choice <= len(datasets):
            dataset_name = datasets[choice - 1]
            break
        else:
            print("Please enter a number between 1 and 5.")
    except ValueError:
        print("Invalid input. Please enter a valid number.")

if not os.path.exists(dataset_name):
    raise FileNotFoundError(f"File '{dataset_name}' not found in the notebook working directory.")

def get_float_input(prompt):
    while True:
        try:
            val = float(input(prompt).strip())
            if 0 < val <= 1:
                return val
            else:
                print("Value must be between 0 and 1.")
        except ValueError:
            print("Please enter a numeric value between 0 and 1.")

min_support = get_float_input("\nEnter minimum support (0–1), e.g. 0.3: ")
min_conf = get_float_input("Enter minimum confidence (0–1), e.g. 0.6: ")

print(f"\nSelected: {dataset_name}  |  min_support={min_support}  |  min_confidence={min_conf}\n")


Available Datasets:
1. Amazon.csv
2. Nike.csv
3. Generic.csv
4. BestBuy.csv
5. Kmart.csv



Enter the number of the dataset you want to use (1-5):  1

Enter minimum support (0–1), e.g. 0.3:  0.3
Enter minimum confidence (0–1), e.g. 0.6:  0.6



Selected: Amazon.csv  |  min_support=0.3  |  min_confidence=0.6



In [26]:
# Load dataset and preprocess transactions

df = pd.read_csv(dataset_name)

possible_cols = [c for c in df.columns if ('item' in c.lower()) or ('trans' in c.lower())]
if not possible_cols:
    raise KeyError(f"Could not find a column containing 'Item' or 'Transaction'. Columns: {list(df.columns)}")
col_name = possible_cols[0]

transactions = df[col_name].astype(str).apply(lambda x: [i.strip() for i in x.split(',')]).tolist()
num_transactions = len(transactions)

print(f"Loaded dataset: {dataset_name}")
print(f"Total Transactions: {num_transactions}")
print("Sample transactions (up to 5):")
for i in range(min(5, num_transactions)):
    print(f" T{i+1}: {transactions[i]}")

all_items = sorted(set(itertools.chain.from_iterable(transactions)))
encoded_vals = [{item: (item in t) for item in all_items} for t in transactions]
df_encoded = pd.DataFrame(encoded_vals)


Loaded dataset: Amazon.csv
Total Transactions: 20
Sample transactions (up to 5):
 T1: ["A Beginner's Guide", 'Java: The Complete Reference', 'Java For Dummies', 'Android Programming: The Big Nerd Ranch']
 T2: ["A Beginner's Guide", 'Java: The Complete Reference', 'Java For Dummies']
 T3: ["A Beginner's Guide", 'Java: The Complete Reference', 'Java For Dummies', 'Android Programming: The Big Nerd Ranch', 'Head First Java 2nd Edition']
 T4: ['Android Programming: The Big Nerd Ranch', 'Head First Java 2nd Edition', 'Beginning Programming with Java']
 T5: ['Android Programming: The Big Nerd Ranch', 'Beginning Programming with Java', 'Java 8 Pocket Guide']


In [27]:
# Brute-force
import time
import itertools

print("\nRunning Brute Force Algorithm")
start_time_bf = time.perf_counter()

items = sorted(set(itertools.chain.from_iterable(transactions)))
frequent_itemsets = []

k = 1
current_itemsets = [[item] for item in items]

while current_itemsets:
    print(f"\n🔹 Checking {k}-itemsets ...")
    next_itemsets = []
    for itemset in current_itemsets:
        count = sum(1 for t in transactions if set(itemset).issubset(set(t)))
        support = count / num_transactions
        if support >= min_support:
            frequent_itemsets.append((itemset, support))
            next_itemsets.extend([
                sorted(list(set(itemset) | {new_item}))
                for new_item in items if new_item not in itemset
            ])
            print(f"{set(itemset)} (support={support:.2f})")

    current_itemsets = [list(x) for x in set(tuple(x) for x in next_itemsets)]
    k += 1
  

end_time_bf = time.perf_counter()
execution_time_bf = end_time_bf - start_time_bf

print("\n=== Association Rules (Brute Force) ===")
rules_bf = []
for itemset, support in frequent_itemsets:
    if len(itemset) < 2:
        continue
    for i in range(1, len(itemset)):
        for antecedent in itertools.combinations(itemset, i):
            consequent = tuple(sorted(set(itemset) - set(antecedent)))
            antecedent_count = sum(1 for t in transactions if set(antecedent).issubset(set(t)))
            if antecedent_count == 0:
                continue
            confidence = support / (antecedent_count / num_transactions)
            if confidence >= min_conf:
                print(f"{set(antecedent)} → {set(consequent)} (support={support:.2f}, confidence={confidence:.2f})")
                rules_bf.append((antecedent, consequent, support, confidence))

print("\n=== Brute Force Summary ===")
print(f"Dataset used: {dataset_name}")
print(f"Total transactions: {num_transactions}")
print(f"Frequent itemsets found: {len(frequent_itemsets)}")
print(f"Association rules found: {len(rules_bf)}")
print(f"Execution time: {execution_time_bf:.6f} s")



Running Brute Force Algorithm

🔹 Checking 1-itemsets ...
{"A Beginner's Guide"} (support=0.55)
{'Android Programming: The Big Nerd Ranch'} (support=0.65)
{'Beginning Programming with Java'} (support=0.30)
{'Head First Java 2nd Edition'} (support=0.40)
{'Java For Dummies'} (support=0.65)
{'Java: The Complete Reference'} (support=0.50)

🔹 Checking 2-itemsets ...
{'Android Programming: The Big Nerd Ranch', 'Head First Java 2nd Edition'} (support=0.30)
{'Java For Dummies', 'Java: The Complete Reference'} (support=0.50)
{'Android Programming: The Big Nerd Ranch', 'Java: The Complete Reference'} (support=0.30)
{'Java For Dummies', 'Android Programming: The Big Nerd Ranch'} (support=0.45)
{"A Beginner's Guide", 'Java: The Complete Reference'} (support=0.45)
{"A Beginner's Guide", 'Android Programming: The Big Nerd Ranch'} (support=0.30)
{"A Beginner's Guide", 'Java For Dummies'} (support=0.45)

🔹 Checking 3-itemsets ...
{"A Beginner's Guide", 'Java For Dummies', 'Java: The Complete Reference

In [14]:
# Apriori

print("\nRunning Apriori Algorithm...")
start_time_ap = time.time()

frequent_itemsets_ap = apriori(df_encoded, min_support=min_support, use_colnames=True)

end_time_ap = time.time()
execution_time_ap = end_time_ap - start_time_ap

rules_ap = pd.DataFrame()
if frequent_itemsets_ap.empty:
    print(f"\nApriori found NO frequent itemsets with min_support = {min_support:.4f}.")
else:
    rules_ap = association_rules(frequent_itemsets_ap, metric="confidence", min_threshold=min_conf)

print("\n=== Frequent Itemsets (Apriori) ===")
if not frequent_itemsets_ap.empty:
    for _, row in frequent_itemsets_ap.iterrows():
        items = ', '.join(sorted(list(row['itemsets'])))
        print(f"{{{items}}} (support={row['support']:.2f})")
else:
    print("No frequent itemsets found for Apriori at the given support.")

print("\n=== Association Rules (Apriori) ===")
if not rules_ap.empty:
    for _, row in rules_ap.iterrows():
        antecedent = ', '.join(sorted(list(row['antecedents'])))
        consequent = ', '.join(sorted(list(row['consequents'])))
        print(f"{antecedent} → {consequent} (support={row['support']:.2f}, confidence={row['confidence']:.2f})")
else:
    print("No rules found for Apriori with the given confidence.")

print("\n=== Apriori Summary ===")
print(f"Dataset used: {dataset_name}")
print(f"Total transactions: {num_transactions}")
print(f"Frequent itemsets found: {len(frequent_itemsets_ap)}")
print(f"Association rules found: {len(rules_ap) if not rules_ap.empty else 0}")
print(f"Execution time: {execution_time_ap:.6f} s")



Running Apriori Algorithm...

=== Frequent Itemsets (Apriori) ===
{A Beginner's Guide} (support=0.55)
{Android Programming: The Big Nerd Ranch} (support=0.65)
{Beginning Programming with Java} (support=0.30)
{Head First Java 2nd Edition} (support=0.40)
{Java For Dummies} (support=0.65)
{Java: The Complete Reference} (support=0.50)
{A Beginner's Guide, Android Programming: The Big Nerd Ranch} (support=0.30)
{A Beginner's Guide, Java For Dummies} (support=0.45)
{A Beginner's Guide, Java: The Complete Reference} (support=0.45)
{Android Programming: The Big Nerd Ranch, Head First Java 2nd Edition} (support=0.30)
{Android Programming: The Big Nerd Ranch, Java For Dummies} (support=0.45)
{Android Programming: The Big Nerd Ranch, Java: The Complete Reference} (support=0.30)
{Java For Dummies, Java: The Complete Reference} (support=0.50)
{A Beginner's Guide, Java For Dummies, Java: The Complete Reference} (support=0.45)
{Android Programming: The Big Nerd Ranch, Java For Dummies, Java: The Com

In [28]:
# FP-Growth

print("\nRunning FP-Growth Algorithm...")
start_time_fp = time.time()

frequent_itemsets_fp = fpgrowth(df_encoded, min_support=min_support, use_colnames=True)

end_time_fp = time.time()
execution_time_fp = end_time_fp - start_time_fp

rules_fp = pd.DataFrame()
if frequent_itemsets_fp.empty:
    print(f"\nFP-Growth found NO frequent itemsets with min_support = {min_support:.4f}.")
else:
    rules_fp = association_rules(frequent_itemsets_fp, metric="confidence", min_threshold=min_conf)

print("\n=== Frequent Itemsets (FP-Growth) ===")
if not frequent_itemsets_fp.empty:
    for _, row in frequent_itemsets_fp.iterrows():
        items = ', '.join(sorted(list(row['itemsets'])))
        print(f"{{{items}}} (support={row['support']:.2f})")
else:
    print("No frequent itemsets found for FP-Growth at the given support.")

print("\n=== Association Rules (FP-Growth) ===")
if not rules_fp.empty:
    for _, row in rules_fp.iterrows():
        antecedent = ', '.join(sorted(list(row['antecedents'])))
        consequent = ', '.join(sorted(list(row['consequents'])))
        print(f"{antecedent} → {consequent} (support={row['support']:.2f}, confidence={row['confidence']:.2f})")
else:
    print("No rules found for FP-Growth with the given confidence.")

print("\n=== FP-Growth Summary ===")
print(f"Dataset used: {dataset_name}")
print(f"Total transactions: {num_transactions}")
print(f"Frequent itemsets found: {len(frequent_itemsets_fp)}")
print(f"Association rules found: {len(rules_fp) if not rules_fp.empty else 0}")
print(f"Execution time: {execution_time_fp:.6f} s")



Running FP-Growth Algorithm...

=== Frequent Itemsets (FP-Growth) ===
{Java For Dummies} (support=0.65)
{Android Programming: The Big Nerd Ranch} (support=0.65)
{A Beginner's Guide} (support=0.55)
{Java: The Complete Reference} (support=0.50)
{Head First Java 2nd Edition} (support=0.40)
{Beginning Programming with Java} (support=0.30)
{Android Programming: The Big Nerd Ranch, Java For Dummies} (support=0.45)
{A Beginner's Guide, Java For Dummies} (support=0.45)
{A Beginner's Guide, Android Programming: The Big Nerd Ranch} (support=0.30)
{Java For Dummies, Java: The Complete Reference} (support=0.50)
{A Beginner's Guide, Java: The Complete Reference} (support=0.45)
{Android Programming: The Big Nerd Ranch, Java: The Complete Reference} (support=0.30)
{A Beginner's Guide, Java For Dummies, Java: The Complete Reference} (support=0.45)
{Android Programming: The Big Nerd Ranch, Java For Dummies, Java: The Complete Reference} (support=0.30)
{Android Programming: The Big Nerd Ranch, Head Fir

In [29]:
# Cell 7: Summary comparison table

summary = pd.DataFrame([
    {"Algorithm": "Brute Force", "Time(s)": execution_time_bf},
    {"Algorithm": "Apriori", "Time(s)": execution_time_ap},
    {"Algorithm": "FP-Growth", "Time(s)": execution_time_fp}
])

print("\n=== Final Comparison Summary ===")
display(summary)



=== Final Comparison Summary ===


Unnamed: 0,Algorithm,Time(s)
0,Brute Force,0.004987
1,Apriori,0.003
2,FP-Growth,0.050552
