In [31]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [32]:
df = pd.read_excel("/content/Online retail.xlsx")

In [33]:
df

Unnamed: 0,"shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil"
0,"burgers,meatballs,eggs"
1,chutney
2,"turkey,avocado"
3,"mineral water,milk,energy bar,whole wheat rice..."
4,low fat yogurt
...,...
7495,"butter,light mayo,fresh bread"
7496,"burgers,frozen vegetables,eggs,french fries,ma..."
7497,chicken
7498,"escalope,green tea"


In [34]:
transactions = df.iloc[:, 0].dropna().apply(lambda x: x.split(','))

In [35]:
te = TransactionEncoder()

In [36]:
te_array = te.fit_transform(transactions)

In [37]:
te_array

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False,  True, False]])

In [38]:
df_encoded = pd.DataFrame(te_array, columns=te.columns_)


In [39]:
df_encoded.head()

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [40]:
frequent_itemsets = apriori(df_encoded, min_support=0.02, use_colnames=True)

In [41]:
frequent_itemsets.head()

Unnamed: 0,support,itemsets
0,0.020267,(almonds)
1,0.0332,(avocado)
2,0.033733,(brownies)
3,0.0872,(burgers)
4,0.030133,(butter)


In [42]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

In [43]:
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(eggs),(burgers),0.028800,0.160237,1.837585
1,(burgers),(eggs),0.028800,0.330275,1.837585
2,(french fries),(burgers),0.022000,0.128705,1.475976
3,(burgers),(french fries),0.022000,0.252294,1.475976
4,(mineral water),(burgers),0.024400,0.102406,1.174384
...,...,...,...,...,...
91,(pancakes),(spaghetti),0.025200,0.265077,1.522265
92,(spaghetti),(shrimp),0.021200,0.121746,1.706717
93,(shrimp),(spaghetti),0.021200,0.297196,1.706717
94,(spaghetti),(tomatoes),0.020933,0.120214,1.757520


In [44]:
top_rules = rules.sort_values(by='lift', ascending=False).head(10)

In [45]:
for index, row in top_rules.iterrows():
    print(f"Rule: {set(row['antecedents'])} -> {set(row['consequents'])}")
    print(f"Support: {row['support']:.2f}")
    print(f"Confidence: {row['confidence']:.2f}")
    print(f"Lift: {row['lift']:.2f}\n")


Rule: {'ground beef'} -> {'spaghetti'}
Support: 0.04
Confidence: 0.40
Lift: 2.29

Rule: {'spaghetti'} -> {'ground beef'}
Support: 0.04
Confidence: 0.23
Lift: 2.29

Rule: {'spaghetti'} -> {'olive oil'}
Support: 0.02
Confidence: 0.13
Lift: 2.00

Rule: {'olive oil'} -> {'spaghetti'}
Support: 0.02
Confidence: 0.35
Lift: 2.00

Rule: {'soup'} -> {'mineral water'}
Support: 0.02
Confidence: 0.46
Lift: 1.92

Rule: {'mineral water'} -> {'soup'}
Support: 0.02
Confidence: 0.10
Lift: 1.92

Rule: {'milk'} -> {'frozen vegetables'}
Support: 0.02
Confidence: 0.18
Lift: 1.91

Rule: {'frozen vegetables'} -> {'milk'}
Support: 0.02
Confidence: 0.25
Lift: 1.91

Rule: {'burgers'} -> {'eggs'}
Support: 0.03
Confidence: 0.33
Lift: 1.84

Rule: {'eggs'} -> {'burgers'}
Support: 0.03
Confidence: 0.16
Lift: 1.84



1.What is lift and why is it important?
➤ Lift = (Confidence) / (Expected Confidence)
It measures how much more likely the consequent is given the antecedent than it would be by chance. A lift > 1 indicates a positive association.

2.What is support and confidence?
➤ Support = Fraction of transactions containing the itemset.
➤ Confidence = Likelihood that the rule's consequent is purchased when the antecedent is purchased.
Formula:

Support(A → B) = P(A ∩ B)

Confidence(A → B) = P(B | A) = Support(A ∩ B) / Support(A)
3.Limitations of Association Rules:

Generates too many trivial rules if thresholds are low.

Doesn’t account for sequence or time of purchases.

Not effective for rare items.

Scalability can be an issue with large datasets.

