In [None]:
# Question 4: Optimizing Association Rule Learning Performance
# Description:
# Discuss methods to improve the performance of association rule learning in large datasets.

from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore", category=RuntimeWarning)

dataset = [
    ['milk', 'bread', 'butter'],
    ['bread', 'butter'],
    ['milk', 'bread'],
    ['milk', 'bread', 'butter', 'eggs'],
    ['bread', 'butter']
]

te = TransactionEncoder()
te_array = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_array, columns=te.columns_)

frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
print("Frequent Itemsets:")
print(frequent_itemsets)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

rules = rules.replace([np.inf, -np.inf], np.nan).dropna(subset=['lift', 'confidence', 'support'])

print("\nOptimized Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])



Frequent Itemsets:
   support         itemsets
0      1.0          (bread)
1      0.8         (butter)
2      0.6           (milk)
3      0.8  (butter, bread)
4      0.6    (milk, bread)

Optimized Association Rules:
  antecedents consequents  support  confidence  lift
0    (butter)     (bread)      0.8         1.0   1.0
1     (bread)    (butter)      0.8         0.8   1.0
2      (milk)     (bread)      0.6         1.0   1.0
