In [1]:
import random
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Define item list
item_pool = [
    'cooking_oil', 'wheat_flour', 'rice', 'tomato_paste',
    'plantain_chips', 'oranges', 'bananas', 'apples',
    'palm_oil', 'indomie', 'groundnut', 'pure_water',
    'detergent', 'body_lotion', 'matches', 'mangoes',
    'pineapples', 'watermelon', 'coca_cola', 'fanta'
]

# Generate transaction data
random.seed(42)
transactions = []
for _ in range(50):
    basket = random.sample(item_pool, k=random.randint(3, 8))
    
    # Natural item pairings
    if 'rice' in basket and random.random() > 0.3:
        basket.append('tomato_paste')
    if 'indomie' in basket and random.random() > 0.4:
        basket.append('eggs')
    
    transactions.append(list(set(basket)))

# Convert to one-hot encoding
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Mine association rules
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

# Display results
print("Sample transactions:")
print(transactions[:3])
print("\nTop association rules (by lift):")
print(rules.sort_values('lift', ascending=False).head(5))

Sample transactions:
[['rice', 'palm_oil', 'tomato_paste', 'pure_water', 'wheat_flour', 'pineapples', 'cooking_oil', 'apples'], ['rice', 'body_lotion', 'tomato_paste', 'wheat_flour', 'pineapples', 'cooking_oil', 'coca_cola'], ['body_lotion', 'indomie', 'mangoes', 'cooking_oil', 'watermelon', 'apples', 'bananas']]

Top association rules (by lift):
    antecedents     consequents  antecedent support  consequent support  \
0      (apples)          (rice)                0.34                0.42   
2        (rice)  (tomato_paste)                0.42                0.54   
1    (palm_oil)  (tomato_paste)                0.38                0.54   
3  (watermelon)  (tomato_paste)                0.38                0.54   

   support  confidence      lift  representativity  leverage  conviction  \
0     0.20    0.588235  1.400560               1.0    0.0572    1.408571   
2     0.26    0.619048  1.146384               1.0    0.0332    1.207500   
1     0.20    0.526316  0.974659               