In [15]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd

In [16]:
df = pd.read_csv('Groceries_dataset.csv')

In [17]:
# Transactions
transactions_series = df.groupby(['Member_number', 'Date'])['itemDescription'].apply(list)
transactions = transactions_series.tolist()

In [18]:
te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_array, columns=te.columns_)

In [19]:
frequent_itemsets = apriori(df_encoded, min_support=0.01, use_colnames=True)

In [20]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1)
print(rules)

          antecedents   consequents  antecedent support  consequent support  \
0  (other vegetables)  (whole milk)            0.122101            0.157923   
1        (rolls/buns)  (whole milk)            0.110005            0.157923   
2              (soda)  (whole milk)            0.097106            0.157923   
3            (yogurt)  (whole milk)            0.085879            0.157923   

    support  confidence      lift  representativity  leverage  conviction  \
0  0.014837    0.121511  0.769430               1.0 -0.004446    0.958551   
1  0.013968    0.126974  0.804028               1.0 -0.003404    0.964550   
2  0.011629    0.119752  0.758296               1.0 -0.003707    0.956636   
3  0.011161    0.129961  0.822940               1.0 -0.002401    0.967861   

   zhangs_metric   jaccard  certainty  kulczynski  
0      -0.254477  0.055948  -0.043241    0.107730  
1      -0.214986  0.055000  -0.036752    0.107711  
2      -0.260917  0.047776  -0.045329    0.096694  
3      -0.

In [None]:
# Filter for simple (1→1) rules
simple_rules = rules[
    (rules['antecedents'].apply(lambda x: len(x) == 1)) &
    (rules['consequents'].apply(lambda x: len(x) == 1))
]


top_3_rules = simple_rules.sort_values(by='lift', ascending=False).head(3)


j = 1
for i, row in top_3_rules.iterrows():
    
    antecedent = list(row['antecedents'])[0]
    consequent = list(row['consequents'])[0]
    
    print(f"Rule #{j}: If a customer buys **{antecedent}**, they are likely to also buy **{consequent}**.")
    print(f"  - Support: {row['support']:.2f}")
    print(f"  - Confidence: {row['confidence']:.2f}")
    print(f"  - Lift: {row['lift']:.2f}\n")
    
    j+=1

Rule #1: If a customer buys **yogurt**, they are likely to also buy **whole milk**.
  - Support: 0.01
  - Confidence: 0.13
  - Lift: 0.82

Rule #2: If a customer buys **rolls/buns**, they are likely to also buy **whole milk**.
  - Support: 0.01
  - Confidence: 0.13
  - Lift: 0.80

Rule #3: If a customer buys **other vegetables**, they are likely to also buy **whole milk**.
  - Support: 0.01
  - Confidence: 0.12
  - Lift: 0.77

