# Association rules

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

# 1) books data

In [6]:
book_data=pd.read_csv("G:\\Assignments\\Association\\book.csv")
book_data.head()

Unnamed: 0,ChildBks,YouthBks,CookBks,DoItYBks,RefBks,ArtBks,GeogBks,ItalCook,ItalAtlas,ItalArt,Florence
0,0,1,0,1,0,0,1,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0
3,1,1,1,0,1,0,1,0,0,0,0
4,0,0,1,0,0,0,1,0,0,0,0


In [7]:
book_data.shape

(2000, 11)

In [8]:
book_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   ChildBks   2000 non-null   int64
 1   YouthBks   2000 non-null   int64
 2   CookBks    2000 non-null   int64
 3   DoItYBks   2000 non-null   int64
 4   RefBks     2000 non-null   int64
 5   ArtBks     2000 non-null   int64
 6   GeogBks    2000 non-null   int64
 7   ItalCook   2000 non-null   int64
 8   ItalAtlas  2000 non-null   int64
 9   ItalArt    2000 non-null   int64
 10  Florence   2000 non-null   int64
dtypes: int64(11)
memory usage: 172.0 KB


In [9]:
book_data.dtypes

ChildBks     int64
YouthBks     int64
CookBks      int64
DoItYBks     int64
RefBks       int64
ArtBks       int64
GeogBks      int64
ItalCook     int64
ItalAtlas    int64
ItalArt      int64
Florence     int64
dtype: object

In [10]:
book_data.columns

Index(['ChildBks', 'YouthBks', 'CookBks', 'DoItYBks', 'RefBks', 'ArtBks',
       'GeogBks', 'ItalCook', 'ItalAtlas', 'ItalArt', 'Florence'],
      dtype='object')

In [11]:
book_data.isnull().sum()

ChildBks     0
YouthBks     0
CookBks      0
DoItYBks     0
RefBks       0
ArtBks       0
GeogBks      0
ItalCook     0
ItalAtlas    0
ItalArt      0
Florence     0
dtype: int64

# Apriori Algorithm

In [12]:
frequent_itemsets = apriori(book_data, min_support=0.1, use_colnames=True)
frequent_itemsets



Unnamed: 0,support,itemsets
0,0.423,(ChildBks)
1,0.2475,(YouthBks)
2,0.431,(CookBks)
3,0.282,(DoItYBks)
4,0.2145,(RefBks)
5,0.241,(ArtBks)
6,0.276,(GeogBks)
7,0.1135,(ItalCook)
8,0.1085,(Florence)
9,0.165,"(ChildBks, YouthBks)"


In [13]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.7)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ChildBks),(YouthBks),0.4230,0.2475,0.1650,0.390071,1.576044,0.060308,1.233750
1,(YouthBks),(ChildBks),0.2475,0.4230,0.1650,0.666667,1.576044,0.060308,1.731000
2,(ChildBks),(CookBks),0.4230,0.4310,0.2560,0.605201,1.404179,0.073687,1.441240
3,(CookBks),(ChildBks),0.4310,0.4230,0.2560,0.593968,1.404179,0.073687,1.421069
4,(ChildBks),(DoItYBks),0.4230,0.2820,0.1840,0.434988,1.542511,0.064714,1.270770
...,...,...,...,...,...,...,...,...,...
95,"(CookBks, GeogBks)",(ArtBks),0.1925,0.2410,0.1035,0.537662,2.230964,0.057107,1.641657
96,"(ArtBks, GeogBks)",(CookBks),0.1275,0.4310,0.1035,0.811765,1.883445,0.048547,3.022812
97,(CookBks),"(ArtBks, GeogBks)",0.4310,0.1275,0.1035,0.240139,1.883445,0.048547,1.148237
98,(ArtBks),"(CookBks, GeogBks)",0.2410,0.1925,0.1035,0.429461,2.230964,0.057107,1.415327


An leverage value of 0 indicates independence. Range will be [-1 1]

A high conviction value means that the consequent is highly depending on the antecedent and range [0 inf]

In [14]:
rules.sort_values('lift',ascending = False)[0:20]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
28,(CookBks),(ItalCook),0.431,0.1135,0.1135,0.263341,2.320186,0.064582,1.203406
29,(ItalCook),(CookBks),0.1135,0.431,0.1135,1.0,2.320186,0.064582,inf
76,"(ChildBks, ArtBks)",(GeogBks),0.1625,0.276,0.102,0.627692,2.274247,0.05715,1.944628
81,(GeogBks),"(ChildBks, ArtBks)",0.276,0.1625,0.102,0.369565,2.274247,0.05715,1.328448
86,(ArtBks),"(CookBks, DoItYBks)",0.241,0.1875,0.1015,0.421162,2.246196,0.056313,1.403674
83,"(CookBks, DoItYBks)",(ArtBks),0.1875,0.241,0.1015,0.541333,2.246196,0.056313,1.654797
99,(GeogBks),"(CookBks, ArtBks)",0.276,0.167,0.1035,0.375,2.245509,0.057408,1.3328
94,"(CookBks, ArtBks)",(GeogBks),0.167,0.276,0.1035,0.61976,2.245509,0.057408,1.904063
98,(ArtBks),"(CookBks, GeogBks)",0.241,0.1925,0.1035,0.429461,2.230964,0.057107,1.415327
95,"(CookBks, GeogBks)",(ArtBks),0.1925,0.241,0.1035,0.537662,2.230964,0.057107,1.641657


In [17]:
rules[rules.lift>1]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ChildBks),(YouthBks),0.4230,0.2475,0.1650,0.390071,1.576044,0.060308,1.233750
1,(YouthBks),(ChildBks),0.2475,0.4230,0.1650,0.666667,1.576044,0.060308,1.731000
2,(ChildBks),(CookBks),0.4230,0.4310,0.2560,0.605201,1.404179,0.073687,1.441240
3,(CookBks),(ChildBks),0.4310,0.4230,0.2560,0.593968,1.404179,0.073687,1.421069
4,(ChildBks),(DoItYBks),0.4230,0.2820,0.1840,0.434988,1.542511,0.064714,1.270770
...,...,...,...,...,...,...,...,...,...
95,"(CookBks, GeogBks)",(ArtBks),0.1925,0.2410,0.1035,0.537662,2.230964,0.057107,1.641657
96,"(ArtBks, GeogBks)",(CookBks),0.1275,0.4310,0.1035,0.811765,1.883445,0.048547,3.022812
97,(CookBks),"(ArtBks, GeogBks)",0.4310,0.1275,0.1035,0.240139,1.883445,0.048547,1.148237
98,(ArtBks),"(CookBks, GeogBks)",0.2410,0.1925,0.1035,0.429461,2.230964,0.057107,1.415327


In [31]:
rule1=association_rules(frequent_itemsets, metric= "confidence", min_threshold=0.7)
rule1

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(RefBks),(ChildBks),0.2145,0.423,0.1515,0.706294,1.669725,0.060767,1.964548
1,(GeogBks),(ChildBks),0.276,0.423,0.195,0.706522,1.670264,0.078252,1.966074
2,(RefBks),(CookBks),0.2145,0.431,0.1525,0.710956,1.649549,0.06005,1.968556
3,(ItalCook),(CookBks),0.1135,0.431,0.1135,1.0,2.320186,0.064582,inf
4,"(ChildBks, YouthBks)",(CookBks),0.165,0.431,0.129,0.781818,1.813963,0.057885,2.607917
5,"(YouthBks, CookBks)",(ChildBks),0.162,0.423,0.129,0.796296,1.882497,0.060474,2.832545
6,"(ChildBks, DoItYBks)",(CookBks),0.184,0.431,0.146,0.793478,1.841017,0.066696,2.755158
7,"(CookBks, DoItYBks)",(ChildBks),0.1875,0.423,0.146,0.778667,1.84082,0.066687,2.606928
8,"(ChildBks, RefBks)",(CookBks),0.1515,0.431,0.1225,0.808581,1.876058,0.057204,2.972534
9,"(RefBks, CookBks)",(ChildBks),0.1525,0.423,0.1225,0.803279,1.899004,0.057993,2.933083


# -----------------------------------------------------------------------------------------------------------

# 2) Movies Data

In [33]:
movies_data = pd.read_csv("G:\\Assignments\\Association\\my_movies.csv")
movies_data.head()

Unnamed: 0,V1,V2,V3,V4,V5,Sixth Sense,Gladiator,LOTR1,Harry Potter1,Patriot,LOTR2,Harry Potter2,LOTR,Braveheart,Green Mile
0,Sixth Sense,LOTR1,Harry Potter1,Green Mile,LOTR2,1,0,1,1,0,1,0,0,0,1
1,Gladiator,Patriot,Braveheart,,,0,1,0,0,1,0,0,0,1,0
2,LOTR1,LOTR2,,,,0,0,1,0,0,1,0,0,0,0
3,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0
4,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0


In [34]:
movies_data.shape

(10, 15)

In [35]:
movies_data.dtypes

V1               object
V2               object
V3               object
V4               object
V5               object
Sixth Sense       int64
Gladiator         int64
LOTR1             int64
Harry Potter1     int64
Patriot           int64
LOTR2             int64
Harry Potter2     int64
LOTR              int64
Braveheart        int64
Green Mile        int64
dtype: object

In [36]:
movies_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   V1             10 non-null     object
 1   V2             10 non-null     object
 2   V3             7 non-null      object
 3   V4             2 non-null      object
 4   V5             1 non-null      object
 5   Sixth Sense    10 non-null     int64 
 6   Gladiator      10 non-null     int64 
 7   LOTR1          10 non-null     int64 
 8   Harry Potter1  10 non-null     int64 
 9   Patriot        10 non-null     int64 
 10  LOTR2          10 non-null     int64 
 11  Harry Potter2  10 non-null     int64 
 12  LOTR           10 non-null     int64 
 13  Braveheart     10 non-null     int64 
 14  Green Mile     10 non-null     int64 
dtypes: int64(10), object(5)
memory usage: 1.3+ KB


# Data Preprocessing

In [37]:
data=pd.get_dummies(movies_data)
data

Unnamed: 0,Sixth Sense,Gladiator,LOTR1,Harry Potter1,Patriot,LOTR2,Harry Potter2,LOTR,Braveheart,Green Mile,...,V2_LOTR,V2_LOTR1,V2_LOTR2,V2_Patriot,V3_Braveheart,V3_Gladiator,V3_Harry Potter1,V3_Sixth Sense,V4_Green Mile,V5_LOTR2
0,1,0,1,1,0,1,0,0,0,1,...,0,1,0,0,0,0,1,0,1,1
1,0,1,0,0,1,0,0,0,1,0,...,0,0,0,1,1,0,0,0,0,0
2,0,0,1,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,1,1,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
4,1,1,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
5,1,1,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
6,0,0,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,1,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
8,1,1,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
9,1,1,0,0,0,0,0,1,0,1,...,1,0,0,0,0,1,0,0,1,0


# Apriori Algorithm

In [38]:
frequent_itemsets = apriori(data, min_support=0.1, use_colnames=True)
frequent_itemsets



Unnamed: 0,support,itemsets
0,0.6,(Sixth Sense)
1,0.7,(Gladiator)
2,0.2,(LOTR1)
3,0.2,(Harry Potter1)
4,0.6,(Patriot)
...,...,...
1392,0.1,"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,..."
1393,0.1,"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,..."
1394,0.1,"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,..."
1395,0.1,"(V4_Green Mile, V3_Harry Potter1, Green Mile, ..."


In [39]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.7)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Gladiator),(Sixth Sense),0.7,0.6,0.5,0.714286,1.190476,0.08,1.40
1,(Sixth Sense),(Gladiator),0.6,0.7,0.5,0.833333,1.190476,0.08,1.80
2,(LOTR1),(Sixth Sense),0.2,0.6,0.1,0.500000,0.833333,-0.02,0.80
3,(Sixth Sense),(LOTR1),0.6,0.2,0.1,0.166667,0.833333,-0.02,0.96
4,(Harry Potter1),(Sixth Sense),0.2,0.6,0.1,0.500000,0.833333,-0.02,0.80
...,...,...,...,...,...,...,...,...,...
64247,(V1_Sixth Sense),"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80
64248,(Harry Potter1),"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80
64249,(V5_LOTR2),"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
64250,(V2_LOTR1),"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf


In [40]:
rules.sort_values('lift',ascending = False)[0:20]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
32126,"(LOTR1, Harry Potter1, Sixth Sense, V5_LOTR2)","(LOTR2, Green Mile, V3_Harry Potter1)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33436,"(V2_LOTR1, Harry Potter1, V5_LOTR2)","(LOTR1, V4_Green Mile, LOTR2, Sixth Sense)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33414,"(V4_Green Mile, Harry Potter1, V5_LOTR2)","(LOTR1, V2_LOTR1, LOTR2, Sixth Sense)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33415,"(V2_LOTR1, V4_Green Mile, Harry Potter1)","(LOTR1, LOTR2, Sixth Sense, V5_LOTR2)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33416,"(V4_Green Mile, Harry Potter1, LOTR2)","(LOTR1, V2_LOTR1, Sixth Sense, V5_LOTR2)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33417,"(V2_LOTR1, V4_Green Mile, V5_LOTR2)","(LOTR1, Harry Potter1, LOTR2, Sixth Sense)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33418,"(V4_Green Mile, LOTR2, V5_LOTR2)","(LOTR1, V2_LOTR1, Harry Potter1, Sixth Sense)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33419,"(V2_LOTR1, V4_Green Mile, LOTR2)","(LOTR1, Harry Potter1, Sixth Sense, V5_LOTR2)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33420,"(LOTR1, Harry Potter1, Sixth Sense)","(V2_LOTR1, V4_Green Mile, LOTR2, V5_LOTR2)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33421,"(LOTR1, Sixth Sense, V5_LOTR2)","(V2_LOTR1, V4_Green Mile, Harry Potter1, LOTR2)",0.1,0.1,0.1,1.0,10.0,0.09,inf


In [41]:
rules[rules.lift>1]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Gladiator),(Sixth Sense),0.7,0.6,0.5,0.714286,1.190476,0.08,1.40
1,(Sixth Sense),(Gladiator),0.6,0.7,0.5,0.833333,1.190476,0.08,1.80
6,(Sixth Sense),(Patriot),0.6,0.6,0.4,0.666667,1.111111,0.04,1.20
7,(Patriot),(Sixth Sense),0.6,0.6,0.4,0.666667,1.111111,0.04,1.20
10,(Sixth Sense),(LOTR),0.6,0.1,0.1,0.166667,1.666667,0.04,1.08
...,...,...,...,...,...,...,...,...,...
64247,(V1_Sixth Sense),"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80
64248,(Harry Potter1),"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80
64249,(V5_LOTR2),"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
64250,(V2_LOTR1),"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf


In [42]:
rule1=association_rules(frequent_itemsets, metric= "confidence", min_threshold=0.7)
rule1

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Gladiator),(Sixth Sense),0.7,0.6,0.5,0.714286,1.190476,0.08,1.4
1,(Sixth Sense),(Gladiator),0.6,0.7,0.5,0.833333,1.190476,0.08,1.8
2,(LOTR),(Sixth Sense),0.1,0.6,0.1,1.000000,1.666667,0.04,inf
3,(Green Mile),(Sixth Sense),0.2,0.6,0.2,1.000000,1.666667,0.08,inf
4,(V1_Sixth Sense),(Sixth Sense),0.2,0.6,0.2,1.000000,1.666667,0.08,inf
...,...,...,...,...,...,...,...,...,...
56755,"(LOTR2, V5_LOTR2)","(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
56756,"(V2_LOTR1, LOTR2)","(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
56757,(V3_Harry Potter1),"(V4_Green Mile, Sixth Sense, Green Mile, LOTR1...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
56758,(V5_LOTR2),"(V4_Green Mile, Sixth Sense, V3_Harry Potter1,...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf


# -----------------------------------------------------------------------------------------------------------