In [1]:
#IMPORTING REQUIRED LIBRARIES
    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#FOR MARKET BASKET ANALYSIS
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [29]:
import warnings
warnings.simplefilter('ignore')

In [28]:
#IMPORTING DATASET
dataset = pd.read_excel('Online Retail.xlsx')

In [6]:
dataset.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


# First Observations

In [114]:
# Shape of the data
print(dataset.shape)

(541909, 8)


In [115]:
#Info of the data
print(dataset.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   InvoiceNo    541909 non-null  object 
 1   StockCode    541909 non-null  object 
 2   Description  540455 non-null  object 
 3   Quantity     541909 non-null  int64  
 4   InvoiceDate  541909 non-null  int64  
 5   UnitPrice    541909 non-null  float64
 6   CustomerID   406829 non-null  float64
 7   Country      541909 non-null  object 
dtypes: float64(2), int64(2), object(4)
memory usage: 33.1+ MB
None


In [116]:
#Missing values in the data
print(dataset.isnull().sum())

InvoiceNo           0
StockCode           0
Description      1454
Quantity            0
InvoiceDate         0
UnitPrice           0
CustomerID     135080
Country             0
dtype: int64


In [117]:
#Number of unique values in each variable
print(dataset.nunique())

InvoiceNo      25900
StockCode       4070
Description     4223
Quantity         722
InvoiceDate        2
UnitPrice       1630
CustomerID      4372
Country           38
dtype: int64


# Data Preprocesssing

In the data there are some invoice that are 'CREDITS' rather of 'DEBITS'.
So we want to remove them. They are identified with 'C' in the Inovice field.

In [18]:
dataset['InvocieNo'] = dataset['InvoiceNo'].astype('str')

In [23]:
data = dataset[~dataset['InvoiceNo'].str.contains('C',na=False)]

In [30]:
#Filling the missing values
data['CustomerID'] = data['CustomerID'].fillna('000000')
data['Description'] = data['Description'].fillna('Unknown')

In [118]:
print(dataset['InvoiceDate'])

0         2010
1         2010
2         2010
3         2010
4         2010
          ... 
541904    2011
541905    2011
541906    2011
541907    2011
541908    2011
Name: InvoiceDate, Length: 541909, dtype: int64


# Feature Engineering

We add column with an Year, Month, Hour, Month_Year of each transaction

In [43]:
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'],errors='coerce')
data['Year'] = data['InvoiceDate'].dt.year
data['Month'] = data['InvoiceDate'].dt.month
data['Hour'] = data['InvoiceDate'].dt.hour
data['Month_Year'] = pd.to_datetime(data['InvoiceDate']).dt.to_period('M')

In [46]:
# Creating Income Field by using existing fields
data['Income'] = round(data['Quantity'] * data['UnitPrice'],2)

# Exploratory Data Analysis (EDA)

In [119]:
#Stock code

Products = data['StockCode'].nunique()
print(Products)

4059


In [120]:
#Number of unique products ordered by customers

Product_group = data[['CustomerID','StockCode']].groupby('CustomerID').count().sort_values('StockCode',ascending=False,axis=0)
print(Product_group)

            StockCode
CustomerID           
000000         134697
17841.0          7847
14911.0          5677
14096.0          5111
12748.0          4596
...               ...
17925.0             1
15823.0             1
15802.0             1
13302.0             1
12346.0             1

[4340 rows x 1 columns]


In [52]:
#Country
#List of unqiue countries

data['Country'].unique()

array(['United Kingdom', 'France', 'Australia', 'Netherlands', 'Germany',
       'Norway', 'EIRE', 'Switzerland', 'Spain', 'Poland', 'Portugal',
       'Italy', 'Belgium', 'Lithuania', 'Japan', 'Iceland',
       'Channel Islands', 'Denmark', 'Cyprus', 'Sweden', 'Finland',
       'Austria', 'Bahrain', 'Israel', 'Greece', 'Hong Kong', 'Singapore',
       'Lebanon', 'United Arab Emirates', 'Saudi Arabia',
       'Czech Republic', 'Canada', 'Unspecified', 'Brazil', 'USA',
       'European Community', 'Malta', 'RSA'], dtype=object)

In [121]:
#Number of Unique orders by country
country_group = data[['Country','InvoiceNo']].groupby('Country').count().sort_values('InvoiceNo',axis=0,ascending=False)
print(country_group)

                      InvoiceNo
Country                        
United Kingdom           487622
Germany                    9042
France                     8408
EIRE                       7894
Spain                      2485
Netherlands                2363
Belgium                    2031
Switzerland                1967
Portugal                   1501
Australia                  1185
Norway                     1072
Italy                       758
Channel Islands             748
Finland                     685
Cyprus                      614
Sweden                      451
Unspecified                 446
Austria                     398
Denmark                     380
Poland                      330
Japan                       321
Israel                      295
Hong Kong                   284
Singapore                   222
Iceland                     182
USA                         179
Canada                      151
Greece                      145
Malta                       112
United A

In [57]:
#Description of the products

data['Description'].unique()

array(['WHITE HANGING HEART T-LIGHT HOLDER', 'WHITE METAL LANTERN',
       'CREAM CUPID HEARTS COAT HANGER', ..., 'lost',
       'CREAM HANGING HEART T-LIGHT HOLDER',
       'PAPER CRAFT , LITTLE BIRDIE'], dtype=object)

In [122]:
Desc = data['Description'].value_counts()
print(Desc)

WHITE HANGING HEART T-LIGHT HOLDER    2327
JUMBO BAG RED RETROSPOT               2115
REGENCY CAKESTAND 3 TIER              2019
PARTY BUNTING                         1707
LUNCH BAG RED RETROSPOT               1594
                                      ... 
PINK PAINTED KASHMIRI CHAIR              1
WHITE FRANGIPANI HAIR CLIP               1
ENAMEL MUG PANTRY                        1
WHITE DOVE HONEYCOMB PAPER GARLAND       1
DOORKNOB CERAMIC IVORY                   1
Name: Description, Length: 4208, dtype: int64


In [123]:
#The customers who generated most orders

Customer_group = data[['CustomerID','InvoiceNo']].groupby('CustomerID').count().sort_values('InvoiceNo',axis=0,ascending=False)
print(Customer_group)

            InvoiceNo
CustomerID           
000000         134697
17841.0          7847
14911.0          5677
14096.0          5111
12748.0          4596
...               ...
17925.0             1
15823.0             1
15802.0             1
13302.0             1
12346.0             1

[4340 rows x 1 columns]


# Analyzing KPIs

In [72]:
Country_Income = data[['Country','Income']].groupby('Country').sum().sort_values('Income',axis=0,ascending=False)
Country_Income

Unnamed: 0_level_0,Income
Country,Unnamed: 1_level_1
United Kingdom,9003098.0
Netherlands,285446.3
EIRE,283454.0
Germany,228867.1
France,209715.1
Australia,138521.3
Spain,61577.11
Switzerland,57089.9
Belgium,41196.34
Sweden,38378.33


In [124]:
# Which product generates the most revenue

Product_Income = data.groupby('Description')['Income'].sum().sort_values(ascending=False)
print(Product_Income)

Description
DOTCOM POSTAGE                         206248.77
REGENCY CAKESTAND 3 TIER               174484.74
PAPER CRAFT , LITTLE BIRDIE            168469.60
WHITE HANGING HEART T-LIGHT HOLDER     106292.77
PARTY BUNTING                           99504.33
                                         ...    
Wrongly mrked had 85123a in box             0.00
add stock to allocate online orders         0.00
adjust                                      0.00
wrongly sold sets                           0.00
Adjust bad debt                        -11062.06
Name: Income, Length: 4208, dtype: float64


In [125]:
#Annual Income

Annual_Income = data[['Year','Income']].groupby('Year').sum().sort_values('Income',axis=0,ascending=False)
print(Annual_Income)

            Income
Year              
2011  9.820814e+06
2010  8.237461e+05


In [126]:
#Monthly Income
Monthly_Income = data[['Month_Year','Income']].groupby('Month_Year').sum()
print(Monthly_Income)

                Income
Month_Year            
2010-12      823746.14
2011-01      691364.56
2011-02      523631.89
2011-03      717639.36
2011-04      537808.62
2011-05      770536.02
2011-06      761739.90
2011-07      719221.19
2011-08      737014.26
2011-09     1058590.17
2011-10     1154979.30
2011-11     1509496.33
2011-12      638792.68


# Basket Analysis using Aprior Algorithm:

In [87]:
#Further Analysis, We choose transactions from only one country, we choose Netherlands

Data = data[data['Country'] == 'Netherlands']

In [96]:
#We modify the table with data in such way that each transaction is written in one line

basket = Data.groupby(['InvoiceNo','Description'])['Quantity'].sum().unstack().fillna(0)
basket

Description,DOLLY GIRL BEAKER,I LOVE LONDON MINI BACKPACK,I LOVE LONDON MINI RUCKSACK,SPACEBOY BABY GIFT SET,10 COLOUR SPACEBOY PEN,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE WOODLAND,16 PIECE CUTLERY SET PANTRY DESIGN,20 DOLLY PEGS RETROSPOT,200 RED + WHITE BENDY STRAWS,3 HEARTS HANGING DECORATION RUSTIC,3 HOOK HANGER MAGIC GARDEN,3 PIECE SPACEBOY COOKIE CUTTER SET,3 RAFFIA RIBBONS 50'S CHRISTMAS,3 RAFFIA RIBBONS VINTAGE CHRISTMAS,3 STRIPEY MICE FELTCRAFT,36 FOIL HEART CAKE CASES,36 FOIL STAR CAKE CASES,36 PENCILS TUBE RED RETROSPOT,36 PENCILS TUBE WOODLAND,3D CHRISTMAS STAMPS STICKERS,3D HEARTS HONEYCOMB PAPER GARLAND,5 HOOK HANGER MAGIC TOADSTOOL,5 HOOK HANGER RED MAGIC TOADSTOOL,6 RIBBONS RUSTIC CHARM,60 CAKE CASES DOLLY GIRL DESIGN,60 CAKE CASES VINTAGE CHRISTMAS,60 TEATIME FAIRY CAKE CASES,72 SWEETHEART FAIRY CAKE CASES,ABC TREASURE BOOK BOX,AIRLINE BAG VINTAGE JET SET RED,AIRLINE BAG VINTAGE JET SET WHITE,ALARM CLOCK BAKELIKE CHOCOLATE,ALARM CLOCK BAKELIKE GREEN,ALARM CLOCK BAKELIKE IVORY,ALARM CLOCK BAKELIKE PINK,ALARM CLOCK BAKELIKE RED,ALUMINIUM STAMPED HEART,ANGEL DECORATION PAINTED ZINC,APOTHECARY MEASURING JAR,APRON APPLE DELIGHT,ASS FLORAL PRINT MULTI SCREWDRIVER,ASSORTED COLOUR BIRD ORNAMENT,ASSORTED COLOURS SILK FAN,ASSORTED EASTER DECORATIONS BELLS,"ASSORTED FLOWER COLOUR ""LEIS""",ASSORTED TUTTI FRUTTI FOB NOTEBOOK,ASSTD FRUIT+FLOWERS FRIDGE MAGNETS,BAKING MOULD HEART WHITE CHOCOLATE,BAKING MOULD ROSE MILK CHOCOLATE,BAKING SET 9 PIECE RETROSPOT,BAKING SET SPACEBOY DESIGN,BALLOON PUMP WITH 10 BALLOONS,BANQUET BIRTHDAY CARD,BASKET OF TOADSTOOLS,BEADED CHANDELIER T-LIGHT HOLDER,BEADED CRYSTAL HEART BLUE ON STICK,BICYCLE PUNCTURE REPAIR KIT,...,TRIPLE HOOK ANTIQUE IVORY ROSE,TROPICAL PASSPORT COVER,TV DINNER TRAY AIR HOSTESS,TV DINNER TRAY DOLLY GIRL,VICTORIAN GLASS HANGING T-LIGHT,VICTORIAN SEWING KIT,VINTAGE CARAVAN GREETING CARD,VINTAGE DOILY DELUXE SEWING KIT,VINTAGE DOILY JUMBO BAG RED,VINTAGE DOILY TRAVEL SEWING KIT,VINTAGE DONKEY TAIL GAME,VINTAGE EMBOSSED HEART,VINTAGE HEADS AND TAILS CARD GAME,VINTAGE KID DOLLY CARD,WALL ART DOLLY GIRL,WALL ART SPACEBOY,WALL TIDY RETROSPOT,WATERING CAN PINK BUNNY,WHITE BROCANTE SOAP DISH,WHITE HANGING HEART T-LIGHT HOLDER,WHITE SPOT BLUE CERAMIC DRAWER KNOB,WHITE SPOT RED CERAMIC DRAWER KNOB,WHITE WOOD GARDEN PLANT LADDER,WOOD STAMP SET BEST WISHES,WOODEN ADVENT CALENDAR CREAM,WOODEN ADVENT CALENDAR RED,WOODEN CROQUET GARDEN SET,WOODEN OWLS LIGHT GARLAND,WOODEN SCHOOL COLOURING SET,WOODLAND HEIGHT CHART STICKERS,WOODLAND BUNNIES LOLLY MAKERS,WOODLAND CHARLOTTE BAG,WOODLAND DESIGN COTTON TOTE BAG,WOODLAND LARGE BLUE FELT HEART,WOODLAND LARGE PINK FELT HEART,WOODLAND LARGE RED FELT HEART,WOODLAND MINI BACKPACK,WOODLAND MINI RUCKSACK,WOODLAND PARTY BAG + STICKER SET,WOODLAND SMALL BLUE FELT HEART,WOODLAND SMALL PINK FELT HEART,WOODLAND SMALL RED FELT HEART,WOODLAND STORAGE BOX LARGE,WOODLAND STORAGE BOX SMALL,WRAP VINTAGE DOILEY,WRAP ENGLISH ROSE,WRAP GINGHAM ROSE,WRAP GREEN PEARS,WRAP I LOVE LONDON,WRAP PAISLEY PARK,WRAP RED APPLES,WRAP WEDDING DAY,YELLOW METAL CHICKEN HEART,ZINC HEART T-LIGHT HOLDER,ZINC STAR T-LIGHT HOLDER,ZINC FOLKART SLEIGH BELLS,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1
536403,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
539491,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
539731,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,288.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,40.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,200.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,192.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
541206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,144.0,48.0,0.0,0.0,0.0,0.0,0.0,288.0,0.0,120.0,0.0,0.0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,72.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,144.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
541570,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,192.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,192.0,0.0,0.0,0.0,0.0,120.0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,256.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578143,0.0,0.0,0.0,0.0,0.0,384.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
579528,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
581175,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,72.0,...,0.0,0.0,32.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,0.0,72.0,0.0,0.0,0.0,0.0,48.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
581176,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,72.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [97]:
#Model

basket_model = basket.applymap(lambda x:1 if x > 0 else 0)

In [127]:
#In this case we set min_support = 0.08

popular_sets = apriori(basket_model,min_support=0.08,use_colnames=True)
print(popular_sets)

      support                                           itemsets
0    0.105263                   (72 SWEETHEART FAIRY CAKE CASES)
1    0.094737                             (CARD BIRTHDAY COWBOY)
2    0.126316                                 (CARD DOLLY GIRL )
3    0.094737                               (CARD GINGHAM ROSE )
4    0.084211                      (CHARLOTTE BAG APPLES DESIGN)
..        ...                                                ...
199  0.084211  (DOLLY GIRL LUNCH BOX, SPACEBOY LUNCH BOX , SP...
200  0.084211  (DOLLY GIRL LUNCH BOX, SPACEBOY LUNCH BOX , ST...
201  0.084211  (DOLLY GIRL LUNCH BOX, SPACEBOY LUNCH BOX , WO...
202  0.084211  (DOLLY GIRL LUNCH BOX, SPACEBOY LUNCH BOX , ST...
203  0.084211  (SPACEBOY BIRTHDAY CARD, PLASTERS IN TIN SPACE...

[204 rows x 2 columns]


In [101]:
rules = association_rules(popular_sets,metric='lift',min_threshold=1)

In [107]:
rules[(rules['lift'] >= 5) & (rules['confidence'] >= 1)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
108,(PACK OF 60 MUSHROOM CAKE CASES),(SPACEBOY BIRTHDAY CARD),0.084211,0.178947,0.084211,1.0,5.588235,0.069141,inf
330,"(PLASTERS IN TIN SPACEBOY, LUNCH BAG RED RETRO...",(SPACEBOY BIRTHDAY CARD),0.084211,0.178947,0.084211,1.0,5.588235,0.069141,inf
332,"(LUNCH BAG RED RETROSPOT, SPACEBOY BIRTHDAY CARD)",(PLASTERS IN TIN SPACEBOY),0.084211,0.126316,0.084211,1.0,7.916667,0.073573,inf
473,"(DOLLY GIRL LUNCH BOX, PLASTERS IN TIN SPACEBOY)","(SPACEBOY LUNCH BOX , ROUND SNACK BOXES SET OF...",0.084211,0.189474,0.084211,1.0,5.277778,0.068255,inf
487,"(DOLLY GIRL LUNCH BOX, RED RETROSPOT CHARLOTTE...","(SPACEBOY LUNCH BOX , ROUND SNACK BOXES SET OF...",0.094737,0.189474,0.094737,1.0,5.277778,0.076787,inf
489,"(SPACEBOY LUNCH BOX , RED RETROSPOT CHARLOTTE ...","(DOLLY GIRL LUNCH BOX, ROUND SNACK BOXES SET O...",0.094737,0.168421,0.094737,1.0,5.9375,0.078781,inf
515,"(DOLLY GIRL LUNCH BOX, SPACEBOY BIRTHDAY CARD)","(SPACEBOY LUNCH BOX , ROUND SNACK BOXES SET OF...",0.084211,0.189474,0.084211,1.0,5.277778,0.068255,inf
568,"(LUNCH BAG RED RETROSPOT, ROUND SNACK BOXES SE...",(PLASTERS IN TIN SPACEBOY),0.084211,0.126316,0.084211,1.0,7.916667,0.073573,inf
569,"(PLASTERS IN TIN SPACEBOY, LUNCH BAG RED RETRO...",(SPACEBOY BIRTHDAY CARD),0.084211,0.178947,0.084211,1.0,5.588235,0.069141,inf
571,"(LUNCH BAG RED RETROSPOT, SPACEBOY BIRTHDAY CARD)","(PLASTERS IN TIN SPACEBOY, ROUND SNACK BOXES S...",0.084211,0.126316,0.084211,1.0,7.916667,0.073573,inf


# Recommendations

In [128]:
print(rules['antecedents'][332])

frozenset({'LUNCH BAG RED RETROSPOT', 'SPACEBOY BIRTHDAY CARD'})


In [129]:
print(basket_model['SPACEBOY BIRTHDAY CARD'].sum())

17


In [130]:
print(basket_model['LUNCH BAG RED RETROSPOT'].sum())

10


In [131]:
print(basket_model['PLASTERS IN TIN SPACEBOY'].sum())

12
