In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
dataset = [['a','c','d','f','g','i','m','p'],
           ['a','b','c','f','l','m','o'],
           ['b','f','h','j','o'],
           ['b','c','k','s','p'],
           ['a','c','e','f','l','m','n','p']]

In [3]:
dataset

[['a', 'c', 'd', 'f', 'g', 'i', 'm', 'p'],
 ['a', 'b', 'c', 'f', 'l', 'm', 'o'],
 ['b', 'f', 'h', 'j', 'o'],
 ['b', 'c', 'k', 's', 'p'],
 ['a', 'c', 'e', 'f', 'l', 'm', 'n', 'p']]

In [4]:
te = TransactionEncoder()
te_array = te.fit(dataset).transform(dataset)
te_array

array([[ True, False,  True,  True, False,  True,  True, False,  True,
        False, False, False,  True, False, False,  True, False],
       [ True,  True,  True, False, False,  True, False, False, False,
        False, False,  True,  True, False,  True, False, False],
       [False,  True, False, False, False,  True, False,  True, False,
         True, False, False, False, False,  True, False, False],
       [False,  True,  True, False, False, False, False, False, False,
        False,  True, False, False, False, False,  True,  True],
       [ True, False,  True, False,  True,  True, False, False, False,
        False, False,  True,  True,  True, False,  True, False]])

In [5]:
te.columns_

['a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 's']

In [6]:
df = pd.DataFrame(te_array,columns=te.columns_)

In [7]:
df

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,s
0,True,False,True,True,False,True,True,False,True,False,False,False,True,False,False,True,False
1,True,True,True,False,False,True,False,False,False,False,False,True,True,False,True,False,False
2,False,True,False,False,False,True,False,True,False,True,False,False,False,False,True,False,False
3,False,True,True,False,False,False,False,False,False,False,True,False,False,False,False,True,True
4,True,False,True,False,True,True,False,False,False,False,False,True,True,True,False,True,False


In [8]:
frequent_itemset = apriori(df,min_support=0.6,use_colnames=True)
frequent_itemset['length'] = frequent_itemset['itemsets'].apply(lambda x: len(x))
frequent_itemset

Unnamed: 0,support,itemsets,length
0,0.6,(a),1
1,0.6,(b),1
2,0.8,(c),1
3,0.8,(f),1
4,0.6,(m),1
5,0.6,(p),1
6,0.6,"(c, a)",2
7,0.6,"(a, f)",2
8,0.6,"(a, m)",2
9,0.6,"(c, f)",2


In [9]:
rules = association_rules(frequent_itemset,metric='confidence',min_threshold=0.8)
rules.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   antecedents         37 non-null     object 
 1   consequents         37 non-null     object 
 2   antecedent support  37 non-null     float64
 3   consequent support  37 non-null     float64
 4   support             37 non-null     float64
 5   confidence          37 non-null     float64
 6   lift                37 non-null     float64
 7   leverage            37 non-null     float64
 8   conviction          37 non-null     float64
 9   zhangs_metric       37 non-null     float64
dtypes: float64(8), object(2)
memory usage: 3.0+ KB


In [10]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(a),(c),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
1,(a),(f),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
2,(a),(m),0.6,0.6,0.6,1.0,1.666667,0.24,inf,1.0
3,(m),(a),0.6,0.6,0.6,1.0,1.666667,0.24,inf,1.0
4,(m),(c),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
5,(p),(c),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
6,(m),(f),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
7,"(c, a)",(f),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
8,"(c, f)",(a),0.6,0.6,0.6,1.0,1.666667,0.24,inf,1.0
9,"(a, f)",(c),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
