In [6]:
!pip install mlxtend




In [8]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import random

# Generate synthetic transactional data
def generate_transactions(num_transactions=40, item_pool_size=20):
    item_pool = ['milk', 'bread', 'beer', 'diapers', 'eggs', 'coffee', 'juice', 'cereal', 'yogurt', 'cheese', 'soda', 'chips', 'pasta', 'rice', 'apples', 'bananas', 'onions', 'carrots', 'chicken', 'beef']
    transactions = []
    frequent_patterns = [
        ['milk', 'bread'],
        ['beer', 'diapers'],
        ['eggs', 'coffee']
    ]
    
    for i in range(num_transactions):
        basket_size = np.random.randint(3, 9)
        basket = random.sample(item_pool, basket_size)
        
        if np.random.rand() > 0.5:
            pattern = frequent_patterns[np.random.randint(0, len(frequent_patterns))]
            basket.extend([item for item in pattern if item not in basket])
        
        transactions.append(basket)
    return transactions

# Generate the transactions
transactions = generate_transactions()

# Convert transactions to a one-hot encoded DataFrame for Apriori
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_transactions = pd.DataFrame(te_ary, columns=te.columns_)

# Apply the Apriori algorithm
frequent_itemsets = apriori(df_transactions, min_support=0.2, use_colnames=True)

# Generate and sort association rules (Corrected)
# Use 'min_threshold' instead of 'min_confidence'
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
rules = rules.sort_values(by='lift', ascending=False)

# Display the top 5 rules
print("--- Top 5 Association Rules (Sorted by Lift) ---")
print(rules.head(5))

--- Top 5 Association Rules (Sorted by Lift) ---
  antecedents consequents  antecedent support  consequent support  support  \
4      (eggs)    (coffee)               0.350               0.375    0.250   
3    (coffee)      (eggs)               0.375               0.350    0.250   
0     (bread)      (soda)               0.400               0.350    0.200   
1      (soda)     (bread)               0.350               0.400    0.200   
5   (diapers)     (juice)               0.425               0.375    0.225   

   confidence      lift  representativity  leverage  conviction  \
4    0.714286  1.904762               1.0  0.118750    2.187500   
3    0.666667  1.904762               1.0  0.118750    1.950000   
0    0.500000  1.428571               1.0  0.060000    1.300000   
1    0.571429  1.428571               1.0  0.060000    1.400000   
5    0.529412  1.411765               1.0  0.065625    1.328125   

   zhangs_metric   jaccard  certainty  kulczynski  
4       0.730769  0.526316 