## Data Preprocessing:

In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [22]:
import warnings
warnings.filterwarnings("ignore")

In [23]:
df = pd.read_excel('/content/sample_data/Online retail.xlsx',header =None,names = ['Transaction'])

In [24]:
df.head()

Unnamed: 0,Transaction
0,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,"burgers,meatballs,eggs"
2,chutney
3,"turkey,avocado"
4,"mineral water,milk,energy bar,whole wheat rice..."


In [26]:
transactions = df['Transaction'].apply(lambda x:x.split(','))

In [27]:
#df.head()

In [28]:
# Create a list of unique items
items = sorted(set(item for transaction in transactions for item in transaction))


In [29]:
basket = pd.DataFrame(0, index=range(len(transactions)), columns=items)
for i, transaction in enumerate(transactions):
    for item in transaction:
        basket.at[i, item] = 1


In [30]:
basket.head()

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,0,1,1,0,1,0,0,0,0,0,...,0,1,0,0,1,0,0,1,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [31]:
basket.shape

(7501, 120)

## Association Rule Mining:

In [35]:
from mlxtend.frequent_patterns import apriori, association_rules

In [36]:
frequent_itemsets = apriori(basket,min_support = 0.005, use_colnames=True)
rules = association_rules(frequent_itemsets,metric='lift',min_threshold=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(almonds),(burgers),0.020397,0.087188,0.005199,0.254902,2.923577,0.003421,1.225089,0.671653
1,(burgers),(almonds),0.087188,0.020397,0.005199,0.059633,2.923577,0.003421,1.041724,0.720799
2,(almonds),(chocolate),0.020397,0.163845,0.005999,0.294118,1.795099,0.002657,1.184553,0.452150
3,(chocolate),(almonds),0.163845,0.020397,0.005999,0.036615,1.795099,0.002657,1.016834,0.529719
4,(almonds),(eggs),0.020397,0.179709,0.006532,0.320261,1.782108,0.002867,1.206774,0.448005
...,...,...,...,...,...,...,...,...,...,...
1827,"(olive oil, spaghetti)",(pancakes),0.022930,0.095054,0.005066,0.220930,2.324260,0.002886,1.161572,0.583127
1828,"(pancakes, spaghetti)",(olive oil),0.025197,0.065858,0.005066,0.201058,3.052910,0.003407,1.169224,0.689825
1829,(olive oil),"(pancakes, spaghetti)",0.065858,0.025197,0.005066,0.076923,3.052910,0.003407,1.056037,0.719852
1830,(pancakes),"(olive oil, spaghetti)",0.095054,0.022930,0.005066,0.053296,2.324260,0.002886,1.032075,0.629602


In [37]:
frequent_itemsets = apriori(basket,min_support = 0.01, use_colnames=True)
rules = association_rules(frequent_itemsets,metric='lift',min_threshold=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(avocado),(mineral water),0.033329,0.238368,0.011598,0.348000,1.459926,0.003654,1.168147,0.325896
1,(mineral water),(avocado),0.238368,0.033329,0.011598,0.048658,1.459926,0.003654,1.016113,0.413630
2,(cake),(burgers),0.081056,0.087188,0.011465,0.141447,1.622319,0.004398,1.063198,0.417434
3,(burgers),(cake),0.087188,0.081056,0.011465,0.131498,1.622319,0.004398,1.058080,0.420238
4,(burgers),(chocolate),0.087188,0.163845,0.017064,0.195719,1.194537,0.002779,1.039630,0.178411
...,...,...,...,...,...,...,...,...,...,...
401,"(mineral water, spaghetti)",(pancakes),0.059725,0.095054,0.011465,0.191964,2.019529,0.005788,1.119933,0.536902
402,"(pancakes, spaghetti)",(mineral water),0.025197,0.238368,0.011465,0.455026,1.908923,0.005459,1.397557,0.488452
403,(mineral water),"(pancakes, spaghetti)",0.238368,0.025197,0.011465,0.048098,1.908923,0.005459,1.024059,0.625163
404,(pancakes),"(mineral water, spaghetti)",0.095054,0.059725,0.011465,0.120617,2.019529,0.005788,1.069244,0.557862


In [51]:
rules['antecedents'].value_counts()

Unnamed: 0_level_0,count
antecedents,Unnamed: 1_level_1
(mineral water),42
(spaghetti),36
(chocolate),31
(milk),27
(eggs),24
(ground beef),18
(frozen vegetables),17
(green tea),16
(french fries),15
(pancakes),14


In [49]:
rules[ (rules['lift'] >= 3) &
      (rules['confidence'] >= 0.3) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
214,(herb & pepper),(ground beef),0.04946,0.098254,0.015998,0.32345,3.291994,0.011138,1.33286,0.73246


## Analysis and Interpretation:

Customers who buy **herb & pepper** are **3.29 times** more likely to also buy **ground beef** (32.3% of the time). This suggests these items are commonly used together in recipes.

 Insights:
- **Product Placement**: Place these items near each other or bundle them in promotions.
- **Targeted Marketing**: Offer deals on ground beef to customers purchasing herb & pepper.

This strong association can help optimize sales strategies and product layout.