# 연관규칙 분석

In [1]:
!pip install mlxtend





In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [3]:
#리스트의 리스트 형태로 트랜잭션 구성
dataset = [['a','c'],
          ['a','b','c'],
          ['a','b','d'],
          ['b','d','f'],
          ['a','d','e'],
          ['a','b','c','d'],
          ['b','d'],
          ['c','e'],
          ['a','b','d','f'],
          ['a','c','e','f']]

In [4]:
#TransactionEncoder는 리스트의 리스트로 구성된 트랜잭션 데이터를 Numpy 배열로 인코딩하기 위함
te=TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
te_ary

array([[ True, False,  True, False, False, False],
       [ True,  True,  True, False, False, False],
       [ True,  True, False,  True, False, False],
       [False,  True, False,  True, False,  True],
       [ True, False, False,  True,  True, False],
       [ True,  True,  True,  True, False, False],
       [False,  True, False,  True, False, False],
       [False, False,  True, False,  True, False],
       [ True,  True, False,  True, False,  True],
       [ True, False,  True, False,  True,  True]])

In [5]:
te.columns_

['a', 'b', 'c', 'd', 'e', 'f']

In [6]:
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,a,b,c,d,e,f
0,True,False,True,False,False,False
1,True,True,True,False,False,False
2,True,True,False,True,False,False
3,False,True,False,True,False,True
4,True,False,False,True,True,False
5,True,True,True,True,False,False
6,False,True,False,True,False,False
7,False,False,True,False,True,False
8,True,True,False,True,False,True
9,True,False,True,False,True,True


In [7]:
#지지도 확인
frequent_itemsets= apriori(df,min_support = 0.3, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.7,(a)
1,0.6,(b)
2,0.5,(c)
3,0.6,(d)
4,0.3,(e)
5,0.3,(f)
6,0.4,"(a, b)"
7,0.4,"(c, a)"
8,0.4,"(a, d)"
9,0.5,"(b, d)"


In [8]:
#지지도 내림차순 정렬
frequent_itemsets.sort_values('support', ascending=False)

Unnamed: 0,support,itemsets
0,0.7,(a)
1,0.6,(b)
3,0.6,(d)
2,0.5,(c)
9,0.5,"(b, d)"
6,0.4,"(a, b)"
7,0.4,"(c, a)"
8,0.4,"(a, d)"
4,0.3,(e)
5,0.3,(f)


In [9]:
#신뢰도 확인
association_rules(frequent_itemsets, metric ="confidence", min_threshold=0.3)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(a),(b),0.7,0.6,0.4,0.571429,0.952381,-0.02,0.933333,-0.142857
1,(b),(a),0.6,0.7,0.4,0.666667,0.952381,-0.02,0.9,-0.111111
2,(c),(a),0.5,0.7,0.4,0.8,1.142857,0.05,1.5,0.25
3,(a),(c),0.7,0.5,0.4,0.571429,1.142857,0.05,1.166667,0.416667
4,(a),(d),0.7,0.6,0.4,0.571429,0.952381,-0.02,0.933333,-0.142857
5,(d),(a),0.6,0.7,0.4,0.666667,0.952381,-0.02,0.9,-0.111111
6,(b),(d),0.6,0.6,0.5,0.833333,1.388889,0.14,2.4,0.7
7,(d),(b),0.6,0.6,0.5,0.833333,1.388889,0.14,2.4,0.7
8,"(a, b)",(d),0.4,0.6,0.3,0.75,1.25,0.06,1.6,0.333333
9,"(a, d)",(b),0.4,0.6,0.3,0.75,1.25,0.06,1.6,0.333333


In [13]:
#향상도 확인
association_rules(frequent_itemsets, metric="lift",min_threshold=1)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(기저귀),(맥주),0.545455,0.545455,0.454545,0.833333,1.527778,0.157025,2.727273,0.76
1,(맥주),(기저귀),0.545455,0.545455,0.454545,0.833333,1.527778,0.157025,2.727273,0.76
2,(기저귀),(빵),0.545455,0.545455,0.363636,0.666667,1.222222,0.066116,1.363636,0.4
3,(빵),(기저귀),0.545455,0.545455,0.363636,0.666667,1.222222,0.066116,1.363636,0.4
4,(기저귀),(우유),0.545455,0.818182,0.454545,0.833333,1.018519,0.008264,1.090909,0.04
5,(우유),(기저귀),0.818182,0.545455,0.454545,0.555556,1.018519,0.008264,1.022727,0.1
6,(빵),(우유),0.545455,0.818182,0.454545,0.833333,1.018519,0.008264,1.090909,0.04
7,(우유),(빵),0.818182,0.545455,0.454545,0.555556,1.018519,0.008264,1.022727,0.1
8,"(기저귀, 우유)",(맥주),0.454545,0.545455,0.363636,0.8,1.466667,0.115702,2.272727,0.583333
9,"(맥주, 우유)",(기저귀),0.363636,0.545455,0.363636,1.0,1.833333,0.165289,inf,0.714286


In [14]:
df_assoc = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)
df_assoc.sort_values('lift', ascending =False) #lift 기준 내림차순 정렬

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
5,"(맥주, 우유)",(기저귀),0.363636,0.545455,0.363636,1.0,1.833333,0.165289,inf,0.714286
6,(기저귀),"(맥주, 우유)",0.545455,0.363636,0.363636,0.666667,1.833333,0.165289,1.909091,1.0
0,(기저귀),(맥주),0.545455,0.545455,0.454545,0.833333,1.527778,0.157025,2.727273,0.76
1,(맥주),(기저귀),0.545455,0.545455,0.454545,0.833333,1.527778,0.157025,2.727273,0.76
4,"(기저귀, 우유)",(맥주),0.454545,0.545455,0.363636,0.8,1.466667,0.115702,2.272727,0.583333
7,(맥주),"(기저귀, 우유)",0.545455,0.454545,0.363636,0.666667,1.466667,0.115702,1.636364,0.7
2,(기저귀),(빵),0.545455,0.545455,0.363636,0.666667,1.222222,0.066116,1.363636,0.4
3,(빵),(기저귀),0.545455,0.545455,0.363636,0.666667,1.222222,0.066116,1.363636,0.4
