Load and Process the Data

In [9]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

df = pd.read_excel("/content/sample_data/Online retail.xlsx", header=None)

transactions = df[0].astype(str).apply(lambda x: x.split(','))
transactions.head()


Unnamed: 0,0
0,"[shrimp, almonds, avocado, vegetables mix, gre..."
1,"[burgers, meatballs, eggs]"
2,[chutney]
3,"[turkey, avocado]"
4,"[mineral water, milk, energy bar, whole wheat ..."


In [11]:
te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)

basket = pd.DataFrame(te_array, columns=te.columns_)
basket.head()


Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


Apriori Algorithm

In [12]:
frequent_itemsets = apriori(basket, min_support=0.02, use_colnames=True)
frequent_itemsets.head()

Unnamed: 0,support,itemsets
0,0.020397,(almonds)
1,0.033329,(avocado)
2,0.033729,(brownies)
3,0.087188,(burgers)
4,0.030129,(butter)


In [13]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
rules.sort_values('lift', ascending=False).head(10)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
64,(spaghetti),(ground beef),0.17411,0.098254,0.039195,0.225115,2.291162,1.0,0.022088,1.163716,0.682343,0.168096,0.140684,0.312015
65,(ground beef),(spaghetti),0.098254,0.17411,0.039195,0.398915,2.291162,1.0,0.022088,1.373997,0.624943,0.168096,0.272197,0.312015
87,(olive oil),(spaghetti),0.065858,0.17411,0.02293,0.348178,1.999758,1.0,0.011464,1.267048,0.535186,0.105651,0.210764,0.239939
86,(spaghetti),(olive oil),0.17411,0.065858,0.02293,0.1317,1.999758,1.0,0.011464,1.075829,0.605334,0.105651,0.070484,0.239939
79,(mineral water),(soup),0.238368,0.050527,0.023064,0.096756,1.914955,1.0,0.01102,1.051182,0.62733,0.08676,0.04869,0.27661
78,(soup),(mineral water),0.050527,0.238368,0.023064,0.456464,1.914955,1.0,0.01102,1.401255,0.503221,0.08676,0.286354,0.27661
53,(frozen vegetables),(milk),0.095321,0.129583,0.023597,0.247552,1.910382,1.0,0.011245,1.156781,0.526755,0.117219,0.135532,0.214826
52,(milk),(frozen vegetables),0.129583,0.095321,0.023597,0.182099,1.910382,1.0,0.011245,1.106099,0.54749,0.117219,0.095921,0.214826
0,(eggs),(burgers),0.179709,0.087188,0.028796,0.160237,1.83783,1.0,0.013128,1.086988,0.555754,0.120941,0.080026,0.245256
1,(burgers),(eggs),0.087188,0.179709,0.028796,0.330275,1.83783,1.0,0.013128,1.224818,0.499424,0.120941,0.183552,0.245256


Analyze & Interpret Results

In [15]:
strong_rules = rules[(rules['confidence'] > 0.4) & (rules['lift'] > 1.0)]
print(strong_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


      antecedents      consequents   support  confidence      lift
63  (ground beef)  (mineral water)  0.040928    0.416554  1.747522
73    (olive oil)  (mineral water)  0.027596    0.419028  1.757904
78         (soup)  (mineral water)  0.023064    0.456464  1.914955


The association rules reveal key insights into customer purchasing behavior. Customers who buy ground beef, olive oil, or soup are significantly more likely to purchase mineral water, with confidence levels of 41.66%, 41.90%, and 45.64%, respectively. The high lift values (above 1.7) indicate that these products are frequently bought together, suggesting strong associations. Retailers can leverage these insights by offering bundle deals, optimizing store layouts to place these items near each other, and using targeted marketing campaigns promoting meal kits or health-conscious product combinations. These strategies can enhance sales and improve the shopping experience.