In [1]:
!pip install mlxtend



In [3]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Load the correct dataset
df = pd.read_excel("online_retail_II.xlsx")

# Show first 5 rows
df.head()

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085.0,United Kingdom
1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.1,13085.0,United Kingdom
4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085.0,United Kingdom


In [4]:
# Drop rows with missing values
df.dropna(inplace=True)

In [5]:
# Keep only transactions that are not cancelled
df = df[~df['Invoice'].astype(str).str.startswith('C')]

In [6]:
# Focus on United Kingdom transactions
df = df[df['Country'] == 'United Kingdom']

In [7]:
# Group by Invoice and Description, and sum Quantity
basket = df.groupby(['Invoice', 'Description'])['Quantity'].sum().unstack().fillna(0)

In [8]:
# Convert all positive quantities to 1, else 0
basket = basket.applymap(lambda x: 1 if x > 0 else 0)

In [9]:
from mlxtend.frequent_patterns import apriori, association_rules

In [10]:
frequent_itemsets = apriori(basket, min_support=0.03, use_colnames=True)
frequent_itemsets.head()



Unnamed: 0,support,itemsets
0,0.032928,(6 RIBBONS RUSTIC CHARM)
1,0.058022,(60 TEATIME FAIRY CAKE CASES)
2,0.035767,(72 SWEETHEART FAIRY CAKE CASES)
3,0.03713,(ANTIQUE SILVER TEA GLASS ETCHED)
4,0.071534,(ASSORTED COLOUR BIRD ORNAMENT)


In [11]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules = rules.sort_values(by='lift', ascending=False)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
4,(WOODEN PICTURE FRAME WHITE FINISH),(WOODEN FRAME ANTIQUE WHITE ),0.045021,0.055638,0.030828,0.684741,12.30718,1.0,0.028323,2.995518,0.96206,0.441463,0.666168,0.619412
5,(WOODEN FRAME ANTIQUE WHITE ),(WOODEN PICTURE FRAME WHITE FINISH),0.055638,0.045021,0.030828,0.554082,12.30718,1.0,0.028323,2.1416,0.972875,0.441463,0.53306,0.619412
2,(SWEETHEART CERAMIC TRINKET BOX),(STRAWBERRY CERAMIC TRINKET BOX),0.043375,0.070285,0.033099,0.763089,10.857068,1.0,0.03005,3.924322,0.949059,0.410853,0.745179,0.617005
3,(STRAWBERRY CERAMIC TRINKET BOX),(SWEETHEART CERAMIC TRINKET BOX),0.070285,0.043375,0.033099,0.470921,10.857068,1.0,0.03005,1.808095,0.976529,0.410853,0.446932,0.617005
0,(WHITE HANGING HEART T-LIGHT HOLDER),(RED HANGING HEART T-LIGHT HOLDER),0.165777,0.053707,0.039173,0.236301,4.399802,1.0,0.03027,1.239092,0.926272,0.217254,0.192957,0.482844
