In [1]:
import numpy as np 
import pandas as pd 
from mlxtend.frequent_patterns import apriori ,  association_rules

In [2]:
data = pd.read_csv('Online_Retail.csv', encoding='latin1')
data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,01-12-2010 08:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,01-12-2010 08:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,01-12-2010 08:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,01-12-2010 08:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,01-12-2010 08:26,3.39,17850.0,United Kingdom


# Data preparation

In [3]:
data["Description"] = data["Description"].str.strip()  #remove space from begining and ending
data.dropna(axis=0, subset=['InvoiceNo'], inplace=True) #remove duplicate invoice number
data["InvoiceNo"] = data["InvoiceNo"].astype('str')  #convert invoice number to string
data = data[~data['InvoiceNo'].str.contains('C')]   #remove the credited transactions
data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,01-12-2010 08:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,01-12-2010 08:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,01-12-2010 08:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,01-12-2010 08:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,01-12-2010 08:26,3.39,17850.0,United Kingdom


In [4]:
data.shape

(532621, 8)

In [5]:
data["Country"].value_counts()

Country
United Kingdom          487622
Germany                   9042
France                    8408
EIRE                      7894
Spain                     2485
Netherlands               2363
Belgium                   2031
Switzerland               1967
Portugal                  1501
Australia                 1185
Norway                    1072
Italy                      758
Channel Islands            748
Finland                    685
Cyprus                     614
Sweden                     451
Unspecified                446
Austria                    398
Denmark                    380
Poland                     330
Japan                      321
Israel                     295
Hong Kong                  284
Singapore                  222
Iceland                    182
USA                        179
Canada                     151
Greece                     145
Malta                      112
United Arab Emirates        68
European Community          60
RSA                         58


In [6]:
basket = data[data['Country']=='Germany'].groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')

In [7]:
basket.head()

Description,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS SMALL TUBE SKULL,12 PENCILS TALL TUBE POSY,12 PENCILS TALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE SKULLS,...,YULETIDE IMAGES GIFT WRAP SET,ZINC HEART T-LIGHT HOLDER,ZINC STAR T-LIGHT HOLDER,ZINC BOX SIGN HOME,ZINC FOLKART SLEIGH BELLS,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536527,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536840,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536861,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536967,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536983,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# converting all positive value to 1 and else 0 
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1    
basket = basket.applymap(encode_units)
basket.drop('POSTAGE', inplace=True, axis=1)    
basket.head()

  basket = basket.applymap(encode_units)


Description,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS SMALL TUBE SKULL,12 PENCILS TALL TUBE POSY,12 PENCILS TALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE SKULLS,...,YULETIDE IMAGES GIFT WRAP SET,ZINC HEART T-LIGHT HOLDER,ZINC STAR T-LIGHT HOLDER,ZINC BOX SIGN HOME,ZINC FOLKART SLEIGH BELLS,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536527,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536840,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536861,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536967,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536983,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
# Generating frequent itemsets
frequent_itemsets = apriori(basket, min_support=0.03, use_colnames=True)   
frequent_itemsets.head()    



Unnamed: 0,support,itemsets
0,0.032823,(3 HOOK HANGER MAGIC GARDEN)
1,0.041575,(3 PIECE SPACEBOY COOKIE CUTTER SET)
2,0.037199,(5 HOOK HANGER RED MAGIC TOADSTOOL)
3,0.102845,(6 RIBBONS RUSTIC CHARM)
4,0.032823,(ALARM CLOCK BAKELIKE GREEN)


In [10]:
# Generating the rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)    
rules.head(6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(6 RIBBONS RUSTIC CHARM),(REGENCY CAKESTAND 3 TIER),0.102845,0.137856,0.041575,0.404255,2.932455,1.0,0.027398,1.447171,0.734531,0.208791,0.308997,0.352921
1,(REGENCY CAKESTAND 3 TIER),(6 RIBBONS RUSTIC CHARM),0.137856,0.102845,0.041575,0.301587,2.932455,1.0,0.027398,1.284563,0.76436,0.208791,0.221525,0.352921
2,(ROUND SNACK BOXES SET OF4 WOODLAND),(6 RIBBONS RUSTIC CHARM),0.245077,0.102845,0.035011,0.142857,1.389058,1.0,0.009806,1.046681,0.371014,0.111888,0.044599,0.241641
3,(6 RIBBONS RUSTIC CHARM),(ROUND SNACK BOXES SET OF4 WOODLAND),0.102845,0.245077,0.035011,0.340426,1.389058,1.0,0.009806,1.144561,0.312195,0.111888,0.126303,0.241641
4,(ROUND SNACK BOXES SET OF4 WOODLAND),(BLUE HARMONICA IN BOX),0.245077,0.04814,0.032823,0.133929,2.782062,1.0,0.021025,1.099055,0.848502,0.12605,0.090127,0.407873
5,(BLUE HARMONICA IN BOX),(ROUND SNACK BOXES SET OF4 WOODLAND),0.04814,0.245077,0.032823,0.681818,2.782062,1.0,0.021025,2.372616,0.67295,0.12605,0.578524,0.407873


In [11]:
basket["ROUND SNACK BOXES SET OF4 WOODLAND"].sum()  

np.int64(112)

In [12]:
basket["BLUE HARMONICA IN BOX"].sum()   

np.int64(22)

In [13]:
# FILTERIUNG THE RULES BASED ON CERTAIN PARAMETERS
rules[(rules['support']>=0.03) & (rules['confidence']>=0.5)]     


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
5,(BLUE HARMONICA IN BOX),(ROUND SNACK BOXES SET OF4 WOODLAND),0.048140,0.245077,0.032823,0.681818,2.782062,1.0,0.021025,2.372616,0.672950,0.126050,0.578524,0.407873
6,(RED RETROSPOT CUP),(BLUE POLKADOT CUP),0.070022,0.048140,0.035011,0.500000,10.386364,1.0,0.031640,1.903720,0.971765,0.421053,0.474713,0.613636
7,(BLUE POLKADOT CUP),(RED RETROSPOT CUP),0.048140,0.070022,0.035011,0.727273,10.386364,1.0,0.031640,3.409920,0.949425,0.421053,0.706738,0.613636
8,(JUMBO BAG APPLES),(CHARLOTTE BAG APPLES DESIGN),0.061269,0.065646,0.030635,0.500000,7.616667,1.0,0.026613,1.868709,0.925408,0.318182,0.464871,0.483333
11,(CHARLOTTE BAG APPLES DESIGN),(ROUND SNACK BOXES SET OF 4 FRUITS),0.065646,0.157549,0.032823,0.500000,3.173611,1.0,0.022480,1.684902,0.733021,0.172414,0.406494,0.354167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180,"(RED RETROSPOT CHARLOTTE BAG, WOODLAND CHARLOT...",(ROUND SNACK BOXES SET OF4 WOODLAND),0.059081,0.245077,0.030635,0.518519,2.115741,1.0,0.016155,1.567918,0.560465,0.112000,0.362211,0.321759
185,"(ROUND SNACK BOXES SET OF4 WOODLAND, SPACEBOY ...",(ROUND SNACK BOXES SET OF 4 FRUITS),0.070022,0.157549,0.037199,0.531250,3.371962,1.0,0.026167,1.797228,0.756401,0.195402,0.443588,0.383681
186,"(ROUND SNACK BOXES SET OF 4 FRUITS, SPACEBOY L...",(ROUND SNACK BOXES SET OF4 WOODLAND),0.039387,0.245077,0.037199,0.944444,3.853671,1.0,0.027546,13.588621,0.770870,0.150442,0.926409,0.548115
191,"(ROUND SNACK BOXES SET OF4 WOODLAND, WOODLAND ...",(ROUND SNACK BOXES SET OF 4 FRUITS),0.063457,0.157549,0.039387,0.620690,3.939655,1.0,0.029390,2.221007,0.796729,0.216867,0.549754,0.435345
