In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import transactionencoder

In [2]:
data_raw = [
    ['cat', 'dog', 'fish', 'pigeons', 'geese', 'chickens'],
    ['soursops', 'dog', 'fish', 'pigeons', 'geese', 'chickens'],
    ['cat', 'pillows', 'pigeons', 'geese'],
    ['cat', 'unisnakes', 'snakes', 'pigeons', 'chickens'],
    ['snakes', 'dog', 'pigeons', 'rats', 'geese']
]

In [5]:
# fix data formating before convert into dataframe
te = transactionencoder.TransactionEncoder()
te_ary = te.fit(data_raw).transform(data_raw)

In [4]:
# convert raw data into data frame
dataset = pd.DataFrame(te_ary, columns=te.columns_)
dataset.head()

Unnamed: 0,cat,chickens,dog,fish,geese,pigeons,pillows,rats,snakes,soursops,unisnakes
0,True,True,True,True,True,True,False,False,False,False,False
1,False,True,True,True,True,True,False,False,False,True,False
2,True,False,False,False,True,True,True,False,False,False,False
3,True,True,False,False,False,True,False,False,True,False,True
4,False,False,True,False,True,True,False,True,True,False,False


In [6]:
# search the transaction with support >= 0.6
frequent_itemset = apriori(dataset, min_support=0.6, use_colnames=True)
frequent_itemset

Unnamed: 0,support,itemsets
0,0.6,(cat)
1,0.6,(chickens)
2,0.6,(dog)
3,0.8,(geese)
4,1.0,(pigeons)
5,0.6,"(pigeons, cat)"
6,0.6,"(pigeons, chickens)"
7,0.6,"(dog, geese)"
8,0.6,"(dog, pigeons)"
9,0.8,"(pigeons, geese)"


In [7]:
# filter the transaction with confidence >= 0.5
# show the dataframe
result = association_rules(frequent_itemset, metric="confidence", min_threshold=0.5)
result

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(pigeons),(cat),1.0,0.6,0.6,0.6,1.0,0.0,1.0
1,(cat),(pigeons),0.6,1.0,0.6,1.0,1.0,0.0,inf
2,(pigeons),(chickens),1.0,0.6,0.6,0.6,1.0,0.0,1.0
3,(chickens),(pigeons),0.6,1.0,0.6,1.0,1.0,0.0,inf
4,(dog),(geese),0.6,0.8,0.6,1.0,1.25,0.12,inf
5,(geese),(dog),0.8,0.6,0.6,0.75,1.25,0.12,1.6
6,(dog),(pigeons),0.6,1.0,0.6,1.0,1.0,0.0,inf
7,(pigeons),(dog),1.0,0.6,0.6,0.6,1.0,0.0,1.0
8,(pigeons),(geese),1.0,0.8,0.8,0.8,1.0,0.0,1.0
9,(geese),(pigeons),0.8,1.0,0.8,1.0,1.0,0.0,inf


In [8]:
# simplify the dataframe
result_simplify = result[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
result_simplify

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(pigeons),(cat),0.6,0.6,1.0
1,(cat),(pigeons),0.6,1.0,1.0
2,(pigeons),(chickens),0.6,0.6,1.0
3,(chickens),(pigeons),0.6,1.0,1.0
4,(dog),(geese),0.6,1.0,1.25
5,(geese),(dog),0.6,0.75,1.25
6,(dog),(pigeons),0.6,1.0,1.0
7,(pigeons),(dog),0.6,0.6,1.0
8,(pigeons),(geese),0.8,0.8,1.0
9,(geese),(pigeons),0.8,1.0,1.0
