In [56]:
import warnings
warnings.filterwarnings('ignore')

In [57]:
import mlxtend
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

In [58]:
df=pd.read_csv('book.csv')
df.head()

Unnamed: 0,ChildBks,YouthBks,CookBks,DoItYBks,RefBks,ArtBks,GeogBks,ItalCook,ItalAtlas,ItalArt,Florence
0,0,1,0,1,0,0,1,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0
3,1,1,1,0,1,0,1,0,0,0,0
4,0,0,1,0,0,0,1,0,0,0,0


In [59]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   ChildBks   2000 non-null   int64
 1   YouthBks   2000 non-null   int64
 2   CookBks    2000 non-null   int64
 3   DoItYBks   2000 non-null   int64
 4   RefBks     2000 non-null   int64
 5   ArtBks     2000 non-null   int64
 6   GeogBks    2000 non-null   int64
 7   ItalCook   2000 non-null   int64
 8   ItalAtlas  2000 non-null   int64
 9   ItalArt    2000 non-null   int64
 10  Florence   2000 non-null   int64
dtypes: int64(11)
memory usage: 172.0 KB


In [60]:
# Data is free from null values
# Also all columns are of int type
# Lets see the sales of books

fig=go.Figure()
fig.add_trace(go.Pie(labels=df.sum().index,values=df.sum().values))
fig.update_layout(autosize=False,title_text='Distribution of books')



In [61]:
# 'CookBks' is the most sold book in the store

# list of top 5 book

top_5_df=df.sum().sort_values(ascending=False)[:5]
fig=go.Figure(go.Bar(x=top_5_df.index,y=top_5_df.values,text=top_5_df.values,textposition='auto'))
fig.update_layout(autosize=False,title_text='Top 5 Selling Books',xaxis=dict(title='Name of Book'),yaxis=dict(title='No of Books sold'))

In [62]:
apriori_df=apriori(df.astype('bool'),min_support=0.1,use_colnames=True)

In [63]:
apriori_df['length']=apriori_df['itemsets'].apply(lambda x:len(x))
apriori_df

Unnamed: 0,support,itemsets,length
0,0.423,(ChildBks),1
1,0.2475,(YouthBks),1
2,0.431,(CookBks),1
3,0.282,(DoItYBks),1
4,0.2145,(RefBks),1
5,0.241,(ArtBks),1
6,0.276,(GeogBks),1
7,0.1135,(ItalCook),1
8,0.1085,(Florence),1
9,0.165,"(YouthBks, ChildBks)",2


In [64]:
# While changing the threshold to 0.2
apriori(df.astype('bool'),min_support=0.2,use_colnames=True)

Unnamed: 0,support,itemsets
0,0.423,(ChildBks)
1,0.2475,(YouthBks)
2,0.431,(CookBks)
3,0.282,(DoItYBks)
4,0.2145,(RefBks)
5,0.241,(ArtBks)
6,0.276,(GeogBks)
7,0.256,"(CookBks, ChildBks)"


In [65]:
# only 7 rules we got
# Now with 0.3
apriori(df.astype('bool'),min_support=0.3,use_colnames=True)

Unnamed: 0,support,itemsets
0,0.423,(ChildBks)
1,0.431,(CookBks)


In [66]:
# Only 2 rules we got

In [67]:
# With length=1 apriori algorithm will be
apriori_df[apriori_df['length']==1].sort_values('support',ascending=False)

Unnamed: 0,support,itemsets,length
2,0.431,(CookBks),1
0,0.423,(ChildBks),1
3,0.282,(DoItYBks),1
6,0.276,(GeogBks),1
1,0.2475,(YouthBks),1
5,0.241,(ArtBks),1
4,0.2145,(RefBks),1
7,0.1135,(ItalCook),1
8,0.1085,(Florence),1


'CookBks' and 'ChildBks' gives maximum support  

In [68]:
# With length=2 apriori algorithm will be
apriori_df[apriori_df['length']==2].sort_values('support',ascending=False)

Unnamed: 0,support,itemsets,length
10,0.256,"(CookBks, ChildBks)",2
14,0.195,"(GeogBks, ChildBks)",2
22,0.1925,"(GeogBks, CookBks)",2
19,0.1875,"(DoItYBks, CookBks)",2
11,0.184,"(DoItYBks, ChildBks)",2
21,0.167,"(ArtBks, CookBks)",2
9,0.165,"(YouthBks, ChildBks)",2
13,0.1625,"(ArtBks, ChildBks)",2
15,0.162,"(YouthBks, CookBks)",2
20,0.1525,"(RefBks, CookBks)",2


'ChildBks' and 'CookBks' have the maximum support

In [69]:
# With length=3 apriori algorithm will be
apriori_df[apriori_df['length']==3].sort_values('support',ascending=False)

Unnamed: 0,support,itemsets,length
33,0.1495,"(GeogBks, CookBks, ChildBks)",3
30,0.146,"(DoItYBks, CookBks, ChildBks)",3
29,0.129,"(YouthBks, CookBks, ChildBks)",3
32,0.1265,"(ArtBks, CookBks, ChildBks)",3
31,0.1225,"(RefBks, CookBks, ChildBks)",3
37,0.1085,"(GeogBks, DoItYBks, CookBks)",3
34,0.1045,"(GeogBks, DoItYBks, ChildBks)",3
38,0.1035,"(ArtBks, GeogBks, CookBks)",3
35,0.102,"(ArtBks, GeogBks, ChildBks)",3
36,0.1015,"(ArtBks, DoItYBks, CookBks)",3


Most of the people purchased 'CookBks', followed by 'ChildBks' and 'GeogBks'. 

In [70]:
# Making association rules
rules_df=association_rules(apriori_df,metric='lift', min_threshold=1).sort_values('lift',ascending=False)
rules_df

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
29,(CookBks),(ItalCook),0.4310,0.1135,0.1135,0.263341,2.320186,0.064582,1.203406
28,(ItalCook),(CookBks),0.1135,0.4310,0.1135,1.000000,2.320186,0.064582,inf
77,"(ArtBks, ChildBks)",(GeogBks),0.1625,0.2760,0.1020,0.627692,2.274247,0.057150,1.944628
80,(GeogBks),"(ArtBks, ChildBks)",0.2760,0.1625,0.1020,0.369565,2.274247,0.057150,1.328448
85,(ArtBks),"(DoItYBks, CookBks)",0.2410,0.1875,0.1015,0.421162,2.246196,0.056313,1.403674
...,...,...,...,...,...,...,...,...,...
4,(DoItYBks),(ChildBks),0.2820,0.4230,0.1840,0.652482,1.542511,0.064714,1.660347
12,(YouthBks),(CookBks),0.2475,0.4310,0.1620,0.654545,1.518667,0.055328,1.647105
13,(CookBks),(YouthBks),0.4310,0.2475,0.1620,0.375870,1.518667,0.055328,1.205678
3,(ChildBks),(CookBks),0.4230,0.4310,0.2560,0.605201,1.404179,0.073687,1.441240


In [71]:
def scatter_plot(df,x,y):
    fig=go.Figure()
    fig.add_scatter(x=df[x],y=df[y],mode='markers')
    fig.update_layout(xaxis=dict(linecolor='#373645',
                                 title=x),
                      yaxis=dict(linecolor='#373645',
                                 title=y),
                      plot_bgcolor='white',
                    #   showlegend=False,
                      autosize=False,
                      title_text=f'<b>{x} vs {y}</b>')
    fig.show()

In [72]:
# Visualizing the obtained rules
scatter_plot(rules_df,'support','confidence')


In [73]:
# Lift vs confidence
scatter_plot(rules_df,'lift','confidence')

In [74]:
rules_df.pivot('antecedents','consequents','lift').style.background_gradient(cmap='Greens').set_precision(2).highlight_null('#000000')

consequents,frozenset({'ItalCook'}),"frozenset({'YouthBks', 'CookBks'})",frozenset({'GeogBks'}),"frozenset({'ArtBks', 'GeogBks'})",frozenset({'ChildBks'}),"frozenset({'RefBks', 'CookBks'})","frozenset({'GeogBks', 'DoItYBks'})","frozenset({'ArtBks', 'DoItYBks'})",frozenset({'YouthBks'}),frozenset({'DoItYBks'}),"frozenset({'DoItYBks', 'ChildBks'})","frozenset({'GeogBks', 'ChildBks'})","frozenset({'CookBks', 'ChildBks'})",frozenset({'RefBks'}),"frozenset({'GeogBks', 'CookBks'})",frozenset({'ArtBks'}),"frozenset({'ArtBks', 'CookBks'})","frozenset({'DoItYBks', 'CookBks'})","frozenset({'ArtBks', 'ChildBks'})",frozenset({'CookBks'}),"frozenset({'RefBks', 'ChildBks'})","frozenset({'YouthBks', 'ChildBks'})"
antecedents,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
frozenset({'CookBks'}),2.32,,1.62,1.88,1.4,,1.9,1.91,1.52,1.54,1.84,1.78,,1.65,,1.61,,,1.81,,1.88,1.81
"frozenset({'YouthBks', 'CookBks'})",,,,,1.88,,,,,,,,,,,,,,,,,
frozenset({'GeogBks'}),,,,,1.67,,,,1.76,1.7,2.06,,2.12,1.87,,1.92,2.25,2.1,2.27,1.62,,
"frozenset({'ArtBks', 'GeogBks'})",,,,,1.89,,,,,,,,,,,,,,,1.88,,
"frozenset({'RefBks', 'CookBks'})",,,,,1.9,,,,,,,,,,,,,,,,,
frozenset({'ChildBks'}),,1.88,1.67,1.89,,1.9,1.86,,1.58,1.54,,,,1.67,1.84,1.59,1.79,1.84,,1.4,,
"frozenset({'GeogBks', 'DoItYBks'})",,,,,1.86,,,,,,,,,,,,,,,1.9,,
"frozenset({'ArtBks', 'DoItYBks'})",,,,,,,,,,,,,,,,,,,,1.91,,
frozenset({'YouthBks'}),,,1.76,,1.58,,,,,1.65,,,2.04,,,1.69,,,,1.52,,
frozenset({'DoItYBks'}),,,1.7,,1.54,,,,1.65,,,1.9,2.02,1.74,2.0,1.82,2.16,,,1.54,,


In [75]:
# To check with different support and confidence, will make a function for it

def give_my_rules(df,support,confidence):
    apriori_df=apriori(df.astype('bool'),min_support=support,use_colnames=True) 

    print('-'*20,'\nApriori df\n','-'*20)   
    print(apriori_df.to_string())
    
    rules_df=association_rules(apriori_df, min_threshold=confidence).sort_values('lift',ascending=False)
    
    print('-'*20,'\nAssociation df\n','-'*20)
    print(rules_df.to_string())

    scatter_plot(rules_df,'support','confidence')
    scatter_plot(rules_df,'lift','confidence')
    return rules_df.pivot('antecedents','consequents','lift').style.background_gradient(cmap='Greens').set_precision(2).highlight_null('#000000')

In [76]:
# with 10% support and 10% confidence
give_my_rules(df,0.1,0.1)

-------------------- 
Apriori df
 --------------------
    support                       itemsets
0    0.4230                     (ChildBks)
1    0.2475                     (YouthBks)
2    0.4310                      (CookBks)
3    0.2820                     (DoItYBks)
4    0.2145                       (RefBks)
5    0.2410                       (ArtBks)
6    0.2760                      (GeogBks)
7    0.1135                     (ItalCook)
8    0.1085                     (Florence)
9    0.1650           (YouthBks, ChildBks)
10   0.2560            (CookBks, ChildBks)
11   0.1840           (DoItYBks, ChildBks)
12   0.1515             (RefBks, ChildBks)
13   0.1625             (ArtBks, ChildBks)
14   0.1950            (GeogBks, ChildBks)
15   0.1620            (YouthBks, CookBks)
16   0.1155           (YouthBks, DoItYBks)
17   0.1010             (YouthBks, ArtBks)
18   0.1205            (YouthBks, GeogBks)
19   0.1875            (DoItYBks, CookBks)
20   0.1525              (RefBks, CookBks)

consequents,frozenset({'ItalCook'}),"frozenset({'YouthBks', 'CookBks'})",frozenset({'GeogBks'}),"frozenset({'ArtBks', 'GeogBks'})",frozenset({'ChildBks'}),"frozenset({'RefBks', 'CookBks'})","frozenset({'GeogBks', 'DoItYBks'})","frozenset({'ArtBks', 'DoItYBks'})",frozenset({'YouthBks'}),frozenset({'DoItYBks'}),"frozenset({'DoItYBks', 'ChildBks'})","frozenset({'GeogBks', 'ChildBks'})","frozenset({'CookBks', 'ChildBks'})",frozenset({'RefBks'}),"frozenset({'GeogBks', 'CookBks'})",frozenset({'ArtBks'}),"frozenset({'ArtBks', 'CookBks'})","frozenset({'DoItYBks', 'CookBks'})","frozenset({'ArtBks', 'ChildBks'})",frozenset({'CookBks'}),"frozenset({'RefBks', 'ChildBks'})","frozenset({'YouthBks', 'ChildBks'})"
antecedents,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
frozenset({'CookBks'}),2.32,,1.62,1.88,1.4,,1.9,1.91,1.52,1.54,1.84,1.78,,1.65,,1.61,,,1.81,,1.88,1.81
"frozenset({'YouthBks', 'CookBks'})",,,,,1.88,,,,,,,,,,,,,,,,,
frozenset({'GeogBks'}),,,,,1.67,,,,1.76,1.7,2.06,,2.12,1.87,,1.92,2.25,2.1,2.27,1.62,,
"frozenset({'ArtBks', 'GeogBks'})",,,,,1.89,,,,,,,,,,,,,,,1.88,,
"frozenset({'RefBks', 'CookBks'})",,,,,1.9,,,,,,,,,,,,,,,,,
frozenset({'ChildBks'}),,1.88,1.67,1.89,,1.9,1.86,,1.58,1.54,,,,1.67,1.84,1.59,1.79,1.84,,1.4,,
"frozenset({'GeogBks', 'DoItYBks'})",,,,,1.86,,,,,,,,,,,,,,,1.9,,
"frozenset({'ArtBks', 'DoItYBks'})",,,,,,,,,,,,,,,,,,,,1.91,,
frozenset({'YouthBks'}),,,1.76,,1.58,,,,,1.65,,,2.04,,,1.69,,,,1.52,,
frozenset({'DoItYBks'}),,,1.7,,1.54,,,,1.65,,,1.9,2.02,1.74,2.0,1.82,2.16,,,1.54,,


In [77]:
# With 20% support and 30% confidence
give_my_rules(df,0.2,0.3)

-------------------- 
Apriori df
 --------------------
   support             itemsets
0   0.4230           (ChildBks)
1   0.2475           (YouthBks)
2   0.4310            (CookBks)
3   0.2820           (DoItYBks)
4   0.2145             (RefBks)
5   0.2410             (ArtBks)
6   0.2760            (GeogBks)
7   0.2560  (CookBks, ChildBks)
-------------------- 
Association df
 --------------------
  antecedents consequents  antecedent support  consequent support  support  confidence      lift  leverage  conviction
0   (CookBks)  (ChildBks)               0.431               0.423    0.256    0.593968  1.404179  0.073687    1.421069
1  (ChildBks)   (CookBks)               0.423               0.431    0.256    0.605201  1.404179  0.073687    1.441240


consequents,frozenset({'ChildBks'}),frozenset({'CookBks'})
antecedents,Unnamed: 1_level_1,Unnamed: 2_level_1
frozenset({'CookBks'}),1.4,
frozenset({'ChildBks'}),,1.4
