In [2]:
###Problem Definition

#Perform Apriori Algorithm on a simple dataset and mine association rules that may exist betweeen itemsets with metric set to confidence & lift.

###Tasks to be performed

#Importing Required Libraries
#Creating a simple dataset
#Transaction Encoding
#Understanding Apriori Algorithm
#Applying Apriori Algorithm
#Mining Association Rules


In [3]:
!pip install mlxtend



In [4]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import warnings
warnings.filterwarnings("ignore")

# creating a simple data set

In [5]:
dataset = [['l2','l2','l3'],
           ['l2','l4'],
           ['l2','l3'],
           ['l1','l2','l4'],
           ['l1','l3'],
           ['l2','l3'],
           ['l1','l3'],
           ['l1','l2','l3','l5'],
           ['l1','l2','l3']]

# transaction encoded

In [6]:
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df=pd.DataFrame(te_ary,columns=te.columns_)
df.head()

Unnamed: 0,l1,l2,l3,l4,l5
0,False,True,True,False,False
1,False,True,False,True,False
2,False,True,True,False,False
3,True,True,False,True,False
4,True,False,True,False,False


In [7]:
df.shape

(9, 5)

Understanding Apriori Algorithm

Apriori algorithm is a classical algorithm used in data mining. Used for finding frequent itemsets and mining association rules that may exist between different itemsets. It operates very well on a dataset containing large number of transactions. It is not only easy to understand but also to implement. The frequent itemsets generated by the Apriori Algorithm can be used to determine the association rules that may exist between different items.

There are three major components of Apriori Algorithm.They are:

1)Support : It can be defined as the popularity of a particular item.It can be calculated as the number of transactions invloving that particular item divided by total number of transactions.
2)Confidence :It is the likelihood of an item Y being purchased when X was purchased.
2)Lift :It is the likelihood of an item Y being purchased when item X is purchased while considering the popularity of Y.


# 3. apriori algorithm

In [17]:
frequent_itemsets = apriori(df,min_support=0.1,use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.555556,(l1)
1,0.777778,(l2)
2,0.777778,(l3)
3,0.222222,(l4)
4,0.111111,(l5)
5,0.333333,"(l2, l1)"
6,0.444444,"(l3, l1)"
7,0.111111,"(l4, l1)"
8,0.111111,"(l5, l1)"
9,0.555556,"(l2, l3)"


### from above, you can see that the result is a dataframe with support for each itemsets

# mining association rules

In [18]:
association_rules(frequent_itemsets, metric = "confidence", min_threshold=0.8)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(l5),(l1),0.111111,0.555556,0.111111,1.0,1.8,0.049383,inf
1,(l4),(l2),0.222222,0.777778,0.222222,1.0,1.285714,0.049383,inf
2,(l5),(l2),0.111111,0.777778,0.111111,1.0,1.285714,0.024691,inf
3,(l5),(l3),0.111111,0.777778,0.111111,1.0,1.285714,0.024691,inf
4,"(l4, l1)",(l2),0.111111,0.777778,0.111111,1.0,1.285714,0.024691,inf
5,"(l2, l5)",(l1),0.111111,0.555556,0.111111,1.0,1.8,0.049383,inf
6,"(l1, l5)",(l2),0.111111,0.777778,0.111111,1.0,1.285714,0.024691,inf
7,(l5),"(l2, l1)",0.111111,0.333333,0.111111,1.0,3.0,0.074074,inf
8,"(l3, l5)",(l1),0.111111,0.555556,0.111111,1.0,1.8,0.049383,inf
9,"(l1, l5)",(l3),0.111111,0.777778,0.111111,1.0,1.285714,0.024691,inf


### from above , you can see the result of association analysis showing which item is frequently purchased with other items

In [19]:
rules = association_rules(frequent_itemsets, metric="lift",min_threshold=2)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,"(l2, l1)",(l5),0.333333,0.111111,0.111111,0.333333,3.0,0.074074,1.333333
1,(l5),"(l2, l1)",0.111111,0.333333,0.111111,1.0,3.0,0.074074,inf
2,"(l3, l1)",(l5),0.444444,0.111111,0.111111,0.25,2.25,0.061728,1.185185
3,(l5),"(l3, l1)",0.111111,0.444444,0.111111,1.0,2.25,0.061728,inf
4,"(l2, l3, l1)",(l5),0.222222,0.111111,0.111111,0.5,4.5,0.08642,1.777778
5,"(l2, l5)","(l3, l1)",0.111111,0.444444,0.111111,1.0,2.25,0.061728,inf
6,"(l3, l5)","(l2, l1)",0.111111,0.333333,0.111111,1.0,3.0,0.074074,inf
7,"(l2, l1)","(l3, l5)",0.333333,0.111111,0.111111,0.333333,3.0,0.074074,1.333333
8,"(l3, l1)","(l2, l5)",0.444444,0.111111,0.111111,0.25,2.25,0.061728,1.185185
9,(l5),"(l2, l3, l1)",0.111111,0.222222,0.111111,1.0,4.5,0.08642,inf


In [20]:
rules[ (rules['confidence'] > 0.3) &
       (rules['lift'] > 2)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,"(l2, l1)",(l5),0.333333,0.111111,0.111111,0.333333,3.0,0.074074,1.333333
1,(l5),"(l2, l1)",0.111111,0.333333,0.111111,1.0,3.0,0.074074,inf
3,(l5),"(l3, l1)",0.111111,0.444444,0.111111,1.0,2.25,0.061728,inf
4,"(l2, l3, l1)",(l5),0.222222,0.111111,0.111111,0.5,4.5,0.08642,1.777778
5,"(l2, l5)","(l3, l1)",0.111111,0.444444,0.111111,1.0,2.25,0.061728,inf
6,"(l3, l5)","(l2, l1)",0.111111,0.333333,0.111111,1.0,3.0,0.074074,inf
7,"(l2, l1)","(l3, l5)",0.333333,0.111111,0.111111,0.333333,3.0,0.074074,1.333333
9,(l5),"(l2, l3, l1)",0.111111,0.222222,0.111111,1.0,4.5,0.08642,inf


In [21]:
rules[rules['antecedents'] == {'A', 'D',}]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction


### from above , we can say that when a customer buys milk and bread together,he/she is likely to banana as well, so to increase the sales we can put these items together