In [None]:
# Question 4: Optimizing Association Rule Learning Performance
# Description:
# Discuss and demonstrate methods to improve the performance of association rule learning in large datasets.

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules
import time

# Sample large-ish dataset (simulated)
transactions = []
import random
items = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
for _ in range(5000): # 5000 transactions
    num_items = random.randint(2, 6)
    transactions.append(random.sample(items, num_items))

print(f"Number of transactions: {len(transactions)}")

# Data preparation
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

print("\n--- FP-Growth with different min_support values ---")

# Experiment 1: Higher min_support (more aggressive pruning)
min_support_high = 0.1
print(f"\nRunning FP-Growth with min_support = {min_support_high}")
start_time = time.time()
frequent_itemsets_high = fpgrowth(df, min_support=min_support_high, use_colnames=True)
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")
print(f"Number of frequent itemsets found: {len(frequent_itemsets_high)}")
# print(frequent_itemsets_high.head()) # Uncomment to see some itemsets

# Generate rules for high support
rules_high = association_rules(frequent_itemsets_high, metric="confidence", min_threshold=0.7)
print(f"Number of association rules found (min_support={min_support_high}): {len(rules_high)}")
# print(rules_high.head()) # Uncomment to see some rules


# Experiment 2: Lower min_support (less aggressive pruning)
min_support_low = 0.05
print(f"\nRunning FP-Growth with min_support = {min_support_low}")
start_time = time.time()
frequent_itemsets_low = fpgrowth(df, min_support=min_support_low, use_colnames=True)
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")
print(f"Number of frequent itemsets found: {len(frequent_itemsets_low)}")
# print(frequent_itemsets_low.head()) # Uncomment to see some itemsets

# Generate rules for low support
rules_low = association_rules(frequent_itemsets_low, metric="confidence", min_threshold=0.7)
print(f"Number of association rules found (min_support={min_support_low}): {len(rules_low)}")
# print(rules_low.head()) # Uncomment to see some rules