In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

file_path = "Online retail.csv"
df = pd.read_csv(file_path, header=None)


transactions = df[0].apply(lambda x: x.split(',')).tolist()

transactions = [[item.strip() for item in sublist] for sublist in transactions]

transactions = [[item for item in sublist if item] for sublist in transactions]

transactions = [sublist for sublist in transactions if sublist]

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_ohe = pd.DataFrame(te_ary, columns=te.columns_)


frequent_itemsets = apriori(df_ohe, min_support=0.01, use_colnames=True)


rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.2)
rules = rules[rules['lift'] > 1.0]

rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])

print("Top 10 Association Rules:")
print(rules.head(10))
print("\nInterpretation of a rule (e.g., 'If {A} -> {B}'):")
print("- Support: The proportion of transactions that contain both A and B.")
print("- Confidence: The probability that a customer who buys A will also buy B.")
print("- Lift: The measure of how much more likely a customer is to buy B given they have already bought A, compared to the overall probability of buying B. A value > 1 indicates a positive correlation.")

rules.head(10).to_csv('top_10_association_rules.csv', index=False)

Top 10 Association Rules:
                   antecedents      consequents  antecedent support  \
134        (eggs, ground beef)  (mineral water)            0.019997   
151        (ground beef, milk)  (mineral water)            0.021997   
123   (chocolate, ground beef)  (mineral water)            0.023064   
144  (frozen vegetables, milk)  (mineral water)            0.023597   
100                     (soup)  (mineral water)            0.050527   
161      (spaghetti, pancakes)  (mineral water)            0.025197   
158     (spaghetti, olive oil)  (mineral water)            0.022930   
156          (spaghetti, milk)  (mineral water)            0.035462   
125          (chocolate, milk)  (mineral water)            0.032129   
154   (spaghetti, ground beef)  (mineral water)            0.039195   

     consequent support   support  confidence      lift  representativity  \
134            0.238368  0.010132    0.506667  2.125563               1.0   
151            0.238368  0.011065    0