# Quiz2
## 1. 라이브러리 등록

In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

## 2. 메소드 정의

In [2]:
def get_dataframe(dataset):
    te = TransactionEncoder()
    data = te.fit_transform(dataset)
    return pd.DataFrame(data, columns=te.columns_)

def threshold_object(frequent_itemsets, cnt):
    frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x:len(x))
    return frequent_itemsets[frequent_itemsets['length']>=cnt]

## 3. 데이터 입력

In [3]:
dataset = [
    ['기저귀','버터','맥주'],
    ['기저귀','맥주'],
    ['기저귀','빵'],
    ['떡','땅콩','사이다']
]

In [4]:
df = get_dataframe(dataset)
df

Unnamed: 0,기저귀,땅콩,떡,맥주,버터,빵,사이다
0,True,False,False,True,True,False,False
1,True,False,False,True,False,False,False
2,True,False,False,False,False,True,False
3,False,True,True,False,False,False,True


## 4. 지지도 구하기
> 최저 지지도는 0.1 로 설정한다

In [6]:
frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
print(frequent_itemsets)

    support       itemsets
0      0.75          (기저귀)
1      0.25           (땅콩)
2      0.25            (떡)
3      0.50           (맥주)
4      0.25           (버터)
5      0.25            (빵)
6      0.25          (사이다)
7      0.50      (기저귀, 맥주)
8      0.25      (버터, 기저귀)
9      0.25       (빵, 기저귀)
10     0.25        (떡, 땅콩)
11     0.25      (사이다, 땅콩)
12     0.25       (사이다, 떡)
13     0.25       (버터, 맥주)
14     0.25  (버터, 기저귀, 맥주)
15     0.25   (사이다, 떡, 땅콩)


In [7]:
remove_frequent_itemsets = threshold_object(frequent_itemsets, 1)
print(remove_frequent_itemsets)

    support       itemsets  length
0      0.75          (기저귀)       1
1      0.25           (땅콩)       1
2      0.25            (떡)       1
3      0.50           (맥주)       1
4      0.25           (버터)       1
5      0.25            (빵)       1
6      0.25          (사이다)       1
7      0.50      (기저귀, 맥주)       2
8      0.25      (버터, 기저귀)       2
9      0.25       (빵, 기저귀)       2
10     0.25        (떡, 땅콩)       2
11     0.25      (사이다, 땅콩)       2
12     0.25       (사이다, 떡)       2
13     0.25       (버터, 맥주)       2
14     0.25  (버터, 기저귀, 맥주)       3
15     0.25   (사이다, 떡, 땅콩)       3


## 5. 신뢰도 및 향상도 구하기
> 향상도(lift)는 1.33으로 같으나 신뢰도(confidence)가 1로 가장 높은 "맥주를 사면 기저귀를 산다"가 발생할 확률이 높은 것을 예측할 수 있다.

In [8]:
result = association_rules(remove_frequent_itemsets, metric='confidence', min_threshold=0.3)
result.sort_values('lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
25,(땅콩),"(사이다, 떡)",0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
24,(떡),"(사이다, 땅콩)",0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
23,(사이다),"(떡, 땅콩)",0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
22,"(떡, 땅콩)",(사이다),0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
21,"(사이다, 땅콩)",(떡),0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
6,(떡),(땅콩),0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
7,(땅콩),(떡),0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
8,(사이다),(땅콩),0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
9,(땅콩),(사이다),0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
10,(사이다),(떡),0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
