In [34]:
!pip install pandas mlxtend



In [35]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from IPython.display import display

In [36]:
df = pd.read_csv('MBA.csv',low_memory=False)

In [37]:
df.drop_duplicates(inplace=True)
df.dropna(subset=['BillNo', 'Itemname'], inplace=True)

In [38]:
basket = df.groupby(['BillNo', 'Itemname'])['Quantity'].sum().unstack().fillna(0)
basket = basket.astype(bool)

In [39]:
frequent_itemsets = apriori(basket, min_support=0.02, use_colnames=True)

In [40]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

In [41]:
strong_rules = rules[(rules['confidence'] >= 0.5) & (rules['lift'] >= 1.2)]

In [42]:
for idx, row in strong_rules.iterrows():
    print(f"{set(row['antecedents'])} => {set(row['consequents'])} | "
          f"Support: {row['support']:.2f}, Confidence: {row['confidence']:.2f}, Lift: {row['lift']:.2f}")

{'60 TEATIME FAIRY CAKE CASES'} => {'PACK OF 72 RETROSPOT CAKE CASES'} | Support: 0.02, Confidence: 0.55, Lift: 8.61
{'ALARM CLOCK BAKELIKE PINK'} => {'ALARM CLOCK BAKELIKE GREEN'} | Support: 0.02, Confidence: 0.54, Lift: 11.32
{'ALARM CLOCK BAKELIKE GREEN'} => {'ALARM CLOCK BAKELIKE RED'} | Support: 0.03, Confidence: 0.65, Lift: 12.88
{'ALARM CLOCK BAKELIKE RED'} => {'ALARM CLOCK BAKELIKE GREEN'} | Support: 0.03, Confidence: 0.62, Lift: 12.88
{'ALARM CLOCK BAKELIKE PINK'} => {'ALARM CLOCK BAKELIKE RED'} | Support: 0.02, Confidence: 0.60, Lift: 11.77
{'CHARLOTTE BAG PINK POLKADOT'} => {'CHARLOTTE BAG SUKI DESIGN'} | Support: 0.02, Confidence: 0.55, Lift: 12.77
{'RED RETROSPOT CHARLOTTE BAG'} => {'CHARLOTTE BAG PINK POLKADOT'} | Support: 0.03, Confidence: 0.51, Lift: 13.85
{'CHARLOTTE BAG PINK POLKADOT'} => {'RED RETROSPOT CHARLOTTE BAG'} | Support: 0.03, Confidence: 0.70, Lift: 13.85
{'CHARLOTTE BAG SUKI DESIGN'} => {'RED RETROSPOT CHARLOTTE BAG'} | Support: 0.02, Confidence: 0.58, Lif

In [43]:
table = strong_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].copy()
table['antecedents'] = table['antecedents'].apply(lambda x: ', '.join(list(x)))
table['consequents'] = table['consequents'].apply(lambda x: ', '.join(list(x)))
display(table)
table.to_csv('mba_strong_rules_table.csv', index=False)

Unnamed: 0,antecedents,consequents,support,confidence,lift
1,60 TEATIME FAIRY CAKE CASES,PACK OF 72 RETROSPOT CAKE CASES,0.021526,0.545113,8.612697
3,ALARM CLOCK BAKELIKE PINK,ALARM CLOCK BAKELIKE GREEN,0.020338,0.541502,11.316103
4,ALARM CLOCK BAKELIKE GREEN,ALARM CLOCK BAKELIKE RED,0.031324,0.654602,12.880423
5,ALARM CLOCK BAKELIKE RED,ALARM CLOCK BAKELIKE GREEN,0.031324,0.616358,12.880423
7,ALARM CLOCK BAKELIKE PINK,ALARM CLOCK BAKELIKE RED,0.022466,0.598155,11.769743
9,CHARLOTTE BAG PINK POLKADOT,CHARLOTTE BAG SUKI DESIGN,0.02019,0.552846,12.767889
10,RED RETROSPOT CHARLOTTE BAG,CHARLOTTE BAG PINK POLKADOT,0.025732,0.505837,13.850875
11,CHARLOTTE BAG PINK POLKADOT,RED RETROSPOT CHARLOTTE BAG,0.025732,0.704607,13.850875
12,CHARLOTTE BAG SUKI DESIGN,RED RETROSPOT CHARLOTTE BAG,0.02499,0.577143,11.345236
15,STRAWBERRY CHARLOTTE BAG,CHARLOTTE BAG SUKI DESIGN,0.020388,0.575419,13.289219
