In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder
# conda install -c conda-forge mlxtend

In [2]:
data = pd.read_csv("my_movies.csv")
data.head()

Unnamed: 0,V1,V2,V3,V4,V5,Sixth Sense,Gladiator,LOTR1,Harry Potter1,Patriot,LOTR2,Harry Potter2,LOTR,Braveheart,Green Mile
0,Sixth Sense,LOTR1,Harry Potter1,Green Mile,LOTR2,1,0,1,1,0,1,0,0,0,1
1,Gladiator,Patriot,Braveheart,,,0,1,0,0,1,0,0,0,1,0
2,LOTR1,LOTR2,,,,0,0,1,0,0,1,0,0,0,0
3,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0
4,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0


# Pre-processing

In [3]:
df=pd.get_dummies(data)
df.head()

Unnamed: 0,Sixth Sense,Gladiator,LOTR1,Harry Potter1,Patriot,LOTR2,Harry Potter2,LOTR,Braveheart,Green Mile,...,V2_LOTR,V2_LOTR1,V2_LOTR2,V2_Patriot,V3_Braveheart,V3_Gladiator,V3_Harry Potter1,V3_Sixth Sense,V4_Green Mile,V5_LOTR2
0,1,0,1,1,0,1,0,0,0,1,...,0,1,0,0,0,0,1,0,1,1
1,0,1,0,0,1,0,0,0,1,0,...,0,0,0,1,1,0,0,0,0,0
2,0,0,1,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,1,1,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
4,1,1,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0


In [4]:
df=pd.get_dummies(data)
df.head()
df.mean()

Sixth Sense         0.6
Gladiator           0.7
LOTR1               0.2
Harry Potter1       0.2
Patriot             0.6
LOTR2               0.2
Harry Potter2       0.1
LOTR                0.1
Braveheart          0.1
Green Mile          0.2
V1_Gladiator        0.6
V1_Harry Potter1    0.1
V1_LOTR1            0.1
V1_Sixth Sense      0.2
V2_Harry Potter2    0.1
V2_LOTR             0.1
V2_LOTR1            0.1
V2_LOTR2            0.1
V2_Patriot          0.6
V3_Braveheart       0.1
V3_Gladiator        0.1
V3_Harry Potter1    0.1
V3_Sixth Sense      0.4
V4_Green Mile       0.2
V5_LOTR2            0.1
dtype: float64

# Apriori Algorithm

In [5]:
frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.6,(Sixth Sense)
1,0.7,(Gladiator)
2,0.2,(LOTR1)
3,0.2,(Harry Potter1)
4,0.6,(Patriot)
...,...,...
1392,0.1,"(V2_LOTR1, V5_LOTR2, Harry Potter1, V3_Harry P..."
1393,0.1,"(V2_LOTR1, V5_LOTR2, V3_Harry Potter1, LOTR2, ..."
1394,0.1,"(V2_LOTR1, V5_LOTR2, Harry Potter1, V3_Harry P..."
1395,0.1,"(V2_LOTR1, V5_LOTR2, Harry Potter1, V3_Harry P..."


In [6]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.7)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Sixth Sense),(Gladiator),0.6,0.7,0.5,0.833333,1.190476,0.08,1.80
1,(Gladiator),(Sixth Sense),0.7,0.6,0.5,0.714286,1.190476,0.08,1.40
2,(Sixth Sense),(LOTR1),0.6,0.2,0.1,0.166667,0.833333,-0.02,0.96
3,(LOTR1),(Sixth Sense),0.2,0.6,0.1,0.500000,0.833333,-0.02,0.80
4,(Sixth Sense),(Harry Potter1),0.6,0.2,0.1,0.166667,0.833333,-0.02,0.96
...,...,...,...,...,...,...,...,...,...
64247,(Green Mile),"(V2_LOTR1, V5_LOTR2, Harry Potter1, V3_Harry P...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80
64248,(LOTR1),"(V2_LOTR1, V5_LOTR2, Harry Potter1, V3_Harry P...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80
64249,(V1_Sixth Sense),"(V2_LOTR1, V5_LOTR2, Harry Potter1, V3_Harry P...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80
64250,(V4_Green Mile),"(V2_LOTR1, V5_LOTR2, Harry Potter1, V3_Harry P...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80


In [7]:
rules.sort_values('lift',ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
32126,"(Green Mile, LOTR2, LOTR1, V5_LOTR2)","(Harry Potter1, V3_Harry Potter1, Sixth Sense)",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
40478,"(V3_Harry Potter1, V4_Green Mile, Harry Potter1)","(Green Mile, V1_Sixth Sense, V5_LOTR2, Sixth S...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
40450,"(V3_Harry Potter1, Sixth Sense, V1_Sixth Sense...","(Green Mile, V4_Green Mile, V5_LOTR2)",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
40451,"(V3_Harry Potter1, Sixth Sense, V4_Green Mile,...","(Green Mile, V1_Sixth Sense, V5_LOTR2)",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
40452,"(V3_Harry Potter1, V4_Green Mile, Green Mile, ...","(Harry Potter1, V5_LOTR2, Sixth Sense)",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
...,...,...,...,...,...,...,...,...,...
40,(Green Mile),(Gladiator),0.2,0.7,0.1,0.500000,0.714286,-0.04,0.600000
1652,"(Sixth Sense, Green Mile, V4_Green Mile)",(Gladiator),0.2,0.7,0.1,0.500000,0.714286,-0.04,0.600000
1665,(Gladiator),"(Sixth Sense, Green Mile, V4_Green Mile)",0.7,0.2,0.1,0.142857,0.714286,-0.04,0.933333
6289,"(Green Mile, V4_Green Mile, V1_Sixth Sense, Si...",(Gladiator),0.2,0.7,0.1,0.500000,0.714286,-0.04,0.600000


In [8]:
rules[rules.lift>1]
len(rules)

64252