# Implementing market basket analysis

In [21]:
#Loading neccesary packages
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [22]:
#Reading Data From Web
retaildata = pd.read_excel("/Users/superman/Desktop/Education/MBA/Semester-V/BUSI 650/course_files_export/OnlineRetail.xlsx")
retaildata.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


# Data Preparation

In [23]:
#Data Cleaning
retaildata['Description'] = retaildata['Description'].str.strip() #removes spaces from beginning and end
retaildata.dropna(axis=0, subset=['InvoiceNo'], inplace=True) #removes duplicate invoice
retaildata['InvoiceNo'] = retaildata['InvoiceNo'].astype('str') #converting invoice number to be string
retaildata = retaildata[~retaildata['InvoiceNo'].str.contains('C')] #remove the credit transactions 
retaildata.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [24]:
retaildata['Country'].value_counts()
retaildata.shape

(532621, 8)

In [25]:
retaildata.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [26]:
#Separating transactions for Austria
mybskt = (retaildata[retaildata['Country'] =="Austria"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [27]:
#viewing transaction basket
mybskt.head()

Description,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS SMALL TUBE SKULL,3 PIECE SPACEBOY COOKIE CUTTER SET,36 FOIL HEART CAKE CASES,36 FOIL STAR CAKE CASES,36 PENCILS TUBE SKULLS,3D TRADITIONAL CHRISTMAS STICKERS,3D VINTAGE CHRISTMAS STICKERS,6 GIFT TAGS 50'S CHRISTMAS,6 GIFT TAGS VINTAGE CHRISTMAS,...,VINTAGE RED TRIM ENAMEL BOWL,WATERING CAN BLUE ELEPHANT,WATERING CAN GREEN DINOSAUR,WHITE ROCKING HORSE HAND PAINTED,WOOD 2 DRAWER CABINET WHITE FINISH,WOOD STOCKING CHRISTMAS SCANDISPOT,WOODLAND CHARLOTTE BAG,WOODLAND DESIGN COTTON TOTE BAG,WRAP RED DOILEY,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
539330,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
542887,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0
543027,48.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
545570,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
547493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
#converting all positive vaues to 1 and everything else to 0
def my_encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

my_basket= mybskt.applymap(my_encode_units)
my_basket.drop('POSTAGE', inplace=True, axis=1) #Remove "postage" as an item

# Training Model

In [29]:
#Generatig frequent itemsets
my_frequent_itemsets = apriori(my_basket, min_support=0.07, use_colnames=True)

# http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/

In [30]:
#generating rules
my_rules = association_rules(my_frequent_itemsets, metric="lift", min_threshold=1)

In [44]:
#viewing top 100 rules
my_rules.head(100)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(6 RIBBONS RUSTIC CHARM),(DOLLY GIRL LUNCH BOX),0.117647,0.176471,0.117647,1.000000,5.666667,0.096886,inf
1,(DOLLY GIRL LUNCH BOX),(6 RIBBONS RUSTIC CHARM),0.176471,0.117647,0.117647,0.666667,5.666667,0.096886,2.647059
2,(JAM MAKING SET PRINTED),(6 RIBBONS RUSTIC CHARM),0.117647,0.117647,0.117647,1.000000,8.500000,0.103806,inf
3,(6 RIBBONS RUSTIC CHARM),(JAM MAKING SET PRINTED),0.117647,0.117647,0.117647,1.000000,8.500000,0.103806,inf
4,(6 RIBBONS RUSTIC CHARM),(RED RETROSPOT SHOPPER BAG),0.117647,0.176471,0.117647,1.000000,5.666667,0.096886,inf
...,...,...,...,...,...,...,...,...,...
95,(CARD CHRISTMAS VILLAGE),(KIDS RAIN MAC PINK),0.117647,0.117647,0.117647,1.000000,8.500000,0.103806,inf
96,(SPACEBOY BIRTHDAY CARD),(CARD PARTY GAMES),0.176471,0.117647,0.117647,0.666667,5.666667,0.096886,2.647059
97,(CARD PARTY GAMES),(SPACEBOY BIRTHDAY CARD),0.117647,0.176471,0.117647,1.000000,5.666667,0.096886,inf
98,(EDWARDIAN PARASOL BLACK),(CERAMIC STRAWBERRY DESIGN MUG),0.117647,0.117647,0.117647,1.000000,8.500000,0.103806,inf


# Making recommendations

In [34]:
my_basket.head()


Description,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS SMALL TUBE SKULL,3 PIECE SPACEBOY COOKIE CUTTER SET,36 FOIL HEART CAKE CASES,36 FOIL STAR CAKE CASES,36 PENCILS TUBE SKULLS,3D TRADITIONAL CHRISTMAS STICKERS,3D VINTAGE CHRISTMAS STICKERS,6 GIFT TAGS 50'S CHRISTMAS,6 GIFT TAGS VINTAGE CHRISTMAS,...,VINTAGE RED TRIM ENAMEL BOWL,WATERING CAN BLUE ELEPHANT,WATERING CAN GREEN DINOSAUR,WHITE ROCKING HORSE HAND PAINTED,WOOD 2 DRAWER CABINET WHITE FINISH,WOOD STOCKING CHRISTMAS SCANDISPOT,WOODLAND CHARLOTTE BAG,WOODLAND DESIGN COTTON TOTE BAG,WRAP RED DOILEY,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
539330,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
542887,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
543027,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
545570,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
547493,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
my_basket.describe().transpose()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
12 PENCILS SMALL TUBE RED RETROSPOT,17.0,0.058824,0.242536,0.0,0.0,0.0,0.0,1.0
12 PENCILS SMALL TUBE SKULL,17.0,0.058824,0.242536,0.0,0.0,0.0,0.0,1.0
3 PIECE SPACEBOY COOKIE CUTTER SET,17.0,0.058824,0.242536,0.0,0.0,0.0,0.0,1.0
36 FOIL HEART CAKE CASES,17.0,0.058824,0.242536,0.0,0.0,0.0,0.0,1.0
36 FOIL STAR CAKE CASES,17.0,0.058824,0.242536,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...
WOOD STOCKING CHRISTMAS SCANDISPOT,17.0,0.058824,0.242536,0.0,0.0,0.0,0.0,1.0
WOODLAND CHARLOTTE BAG,17.0,0.058824,0.242536,0.0,0.0,0.0,0.0,1.0
WOODLAND DESIGN COTTON TOTE BAG,17.0,0.117647,0.332106,0.0,0.0,0.0,0.0,1.0
WRAP RED DOILEY,17.0,0.058824,0.242536,0.0,0.0,0.0,0.0,1.0


In [37]:
my_basket['SPACEBOY LUNCH BOX'].sum()

2

In [38]:
my_basket['ROUND SNACK BOXES SET OF4 WOODLAND'].sum()

4

In [39]:
#Filtering rules based on condition
my_rules[ (my_rules['lift'] >= 3) &
       (my_rules['confidence'] >= 0.3) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(6 RIBBONS RUSTIC CHARM),(DOLLY GIRL LUNCH BOX),0.117647,0.176471,0.117647,1.000000,5.666667,0.096886,inf
1,(DOLLY GIRL LUNCH BOX),(6 RIBBONS RUSTIC CHARM),0.176471,0.117647,0.117647,0.666667,5.666667,0.096886,2.647059
2,(JAM MAKING SET PRINTED),(6 RIBBONS RUSTIC CHARM),0.117647,0.117647,0.117647,1.000000,8.500000,0.103806,inf
3,(6 RIBBONS RUSTIC CHARM),(JAM MAKING SET PRINTED),0.117647,0.117647,0.117647,1.000000,8.500000,0.103806,inf
4,(6 RIBBONS RUSTIC CHARM),(RED RETROSPOT SHOPPER BAG),0.117647,0.176471,0.117647,1.000000,5.666667,0.096886,inf
...,...,...,...,...,...,...,...,...,...
366539,(STRAWBERRY LUNCH BOX WITH CUTLERY),"(LUNCH BOX WITH CUTLERY RETROSPOT, RED RETROSP...",0.176471,0.117647,0.117647,0.666667,5.666667,0.096886,2.647059
366540,(DINOSAUR LUNCH BOX WITH CUTLERY),"(LUNCH BOX WITH CUTLERY RETROSPOT, RED RETROSP...",0.117647,0.117647,0.117647,1.000000,8.500000,0.103806,inf
366541,(RED RETROSPOT PLATE),"(LUNCH BOX WITH CUTLERY RETROSPOT, RED RETROSP...",0.117647,0.117647,0.117647,1.000000,8.500000,0.103806,inf
366542,(PINK POLKADOT BOWL),"(LUNCH BOX WITH CUTLERY RETROSPOT, RED RETROSP...",0.117647,0.117647,0.117647,1.000000,8.500000,0.103806,inf
