# Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Data preprocessing

In [3]:
dataset = pd.read_excel('/kaggle/input/market-basket-analysis/Assignment-1_Data.xlsx')

In [4]:
dataset.head(200)

Unnamed: 0,BillNo,Itemname,Quantity,Date,Price,CustomerID,Country
0,536365,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...
195,536389,CHRISTMAS LIGHTS 10 REINDEER,6,2010-12-01 10:03:00,8.50,12431.0,Australia
196,536389,VINTAGE UNION JACK CUSHION COVER,8,2010-12-01 10:03:00,4.95,12431.0,Australia
197,536389,VINTAGE HEADS AND TAILS CARD GAME,12,2010-12-01 10:03:00,1.25,12431.0,Australia
198,536389,SET OF 3 COLOURED FLYING DUCKS,6,2010-12-01 10:03:00,5.45,12431.0,Australia


In [5]:
dataset.isnull().sum()

BillNo             0
Itemname        1455
Quantity           0
Date               0
Price              0
CustomerID    134041
Country            0
dtype: int64

In [6]:
dataset['Itemname'] = dataset['Itemname'].str.strip()

In [7]:
dataset.dropna(axis=0, subset=['Itemname'], inplace = True)
dataset = dataset.drop(columns= ['CustomerID'])
dataset.isnull().sum()

BillNo      0
Itemname    0
Quantity    0
Date        0
Price       0
Country     0
dtype: int64

In [8]:
dataset.dtypes

BillNo              object
Itemname            object
Quantity             int64
Date        datetime64[ns]
Price              float64
Country             object
dtype: object

In [9]:
dataset['BillNo'] =  dataset['BillNo'].astype('str')
dataset = dataset[~dataset['BillNo'].str.contains('C')]

In [10]:
dataset['Country'].value_counts()

United Kingdom          486167
Germany                   9042
France                    8408
Spain                     2485
Netherlands               2363
Belgium                   2031
Switzerland               1967
Portugal                  1501
Australia                 1185
Norway                    1072
Italy                      758
Sweden                     451
Unspecified                446
Austria                    398
Poland                     330
Japan                      321
Israel                     295
Hong Kong                  284
Singapore                  222
Iceland                    182
USA                        179
Greece                     145
Malta                      112
United Arab Emirates        68
RSA                         58
Lebanon                     45
Lithuania                   35
Brazil                      32
Bahrain                     18
Saudi Arabia                 9
Name: Country, dtype: int64

In [11]:
dataset.shape

(520609, 6)

In [12]:
basket = (dataset[dataset['Country'] == 'Germany' ].groupby(['BillNo','Itemname'])['Quantity'].sum().unstack().fillna(0))

In [13]:
basket

Itemname,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS SMALL TUBE SKULL,12 PENCILS TALL TUBE POSY,12 PENCILS TALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE SKULLS,...,YULETIDE IMAGES GIFT WRAP SET,ZINC HEART T-LIGHT HOLDER,ZINC STAR T-LIGHT HOLDER,ZINC BOX SIGN HOME,ZINC FOLKART SLEIGH BELLS,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC WILLIE WINKIE CANDLE STICK
BillNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536527,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536840,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536861,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536967,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536983,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
581266,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
581494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
581570,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
581574,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
def encode(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1
basket = basket.applymap(encode)
basket.drop('POSTAGE', inplace = True, axis = 1)
basket

Itemname,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS SMALL TUBE SKULL,12 PENCILS TALL TUBE POSY,12 PENCILS TALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE SKULLS,...,YULETIDE IMAGES GIFT WRAP SET,ZINC HEART T-LIGHT HOLDER,ZINC STAR T-LIGHT HOLDER,ZINC BOX SIGN HOME,ZINC FOLKART SLEIGH BELLS,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC WILLIE WINKIE CANDLE STICK
BillNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536527,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536840,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536861,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536967,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536983,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
581266,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
581494,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
581570,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
581574,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
frequent_items = apriori(basket, min_support = 0.007, use_colnames= True)




In [16]:
rules = association_rules(frequent_items, metric = 'lift', min_threshold = 1)


In [17]:
rules.head(100)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(10 COLOUR SPACEBOY PEN),(LUNCH BAG APPLE DESIGN),0.024070,0.061269,0.010941,0.454545,7.418831,0.009466,1.721007,0.886547
1,(LUNCH BAG APPLE DESIGN),(10 COLOUR SPACEBOY PEN),0.061269,0.024070,0.010941,0.178571,7.418831,0.009466,1.188089,0.921678
2,(PLASTERS IN TIN SPACEBOY),(10 COLOUR SPACEBOY PEN),0.107221,0.024070,0.008753,0.081633,3.391466,0.006172,1.062679,0.789828
3,(10 COLOUR SPACEBOY PEN),(PLASTERS IN TIN SPACEBOY),0.024070,0.107221,0.008753,0.363636,3.391466,0.006172,1.402938,0.722534
4,(10 COLOUR SPACEBOY PEN),(ROUND SNACK BOXES SET OF4 WOODLAND),0.024070,0.245077,0.008753,0.363636,1.483766,0.002854,1.186308,0.334081
...,...,...,...,...,...,...,...,...,...,...
95,(36 PENCILS TUBE WOODLAND),(JUMBO BAG APPLES),0.026258,0.061269,0.008753,0.333333,5.440476,0.007144,1.408096,0.838202
96,(36 PENCILS TUBE WOODLAND),(JUMBO BAG PINK POLKADOT),0.026258,0.035011,0.008753,0.333333,9.520833,0.007833,1.447484,0.919101
97,(JUMBO BAG PINK POLKADOT),(36 PENCILS TUBE WOODLAND),0.035011,0.026258,0.008753,0.250000,9.520833,0.007833,1.298322,0.927438
98,(36 PENCILS TUBE WOODLAND),(JUMBO BAG RED RETROSPOT),0.026258,0.078775,0.008753,0.333333,4.231481,0.006684,1.381838,0.784270


In [18]:
rules = rules.sort_values(by='lift', ascending = False)

In [19]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
58982,"(SPACEBOY BIRTHDAY CARD, PINK VINTAGE SPOT BEA...","(SPACEBOY CHILDRENS CUP, RED VINTAGE SPOT BEAK...",0.008753,0.008753,0.008753,1.000000,114.250000,0.008676,inf,1.000000
57294,"(PLASTERS IN TIN WOODLAND ANIMALS, JUMBO BAG P...","(JUMBO BAG WOODLAND ANIMALS, PLASTERS IN TIN V...",0.008753,0.008753,0.008753,1.000000,114.250000,0.008676,inf,1.000000
2611,(MONSTERS STENCIL CRAFT),(HAPPY STENCIL CRAFT),0.008753,0.008753,0.008753,1.000000,114.250000,0.008676,inf,1.000000
56376,"(CHILDRENS CUTLERY SPACEBOY, LUNCH BAG PINK PO...","(SKULL LUNCH BOX WITH CUTLERY, LUNCH BAG SPACE...",0.008753,0.008753,0.008753,1.000000,114.250000,0.008676,inf,1.000000
66328,"(ROUND SNACK BOXES SET OF4 WOODLAND, CARD PSYC...","(REGENCY CAKESTAND 3 TIER, RED HARMONICA IN B...",0.008753,0.008753,0.008753,1.000000,114.250000,0.008676,inf,1.000000
...,...,...,...,...,...,...,...,...,...,...
3769,(MEMO BOARD COTTAGE DESIGN),(ROUND SNACK BOXES SET OF 4 FRUITS),0.054705,0.157549,0.008753,0.160000,1.015556,0.000134,1.002918,0.016204
5552,(REGENCY CAKESTAND 3 TIER),(STRAWBERRY LUNCH BOX WITH CUTLERY),0.137856,0.078775,0.010941,0.079365,1.007496,0.000081,1.000641,0.008629
5553,(STRAWBERRY LUNCH BOX WITH CUTLERY),(REGENCY CAKESTAND 3 TIER),0.078775,0.137856,0.010941,0.138889,1.007496,0.000081,1.001200,0.008076
3594,(REGENCY CAKESTAND 3 TIER),(LUNCH BAG WOODLAND),0.137856,0.078775,0.010941,0.079365,1.007496,0.000081,1.000641,0.008629
