In [1]:
# mlxtend 모듈을 이용해 분석

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

In [2]:
dataset=[['사과','치즈','생수'],
         ['생수','호두','치즈','고등어'],
         ['수박','사과','생수'],
         ['생수','호두','치즈','옥수수']]

In [3]:
tren = TransactionEncoder()
tren_ary = tren.fit(dataset).transform(dataset)
print(tren.columns_)

['고등어', '사과', '생수', '수박', '옥수수', '치즈', '호두']


In [4]:
print(tren_ary)

[[False  True  True False False  True False]
 [ True False  True False False  True  True]
 [False  True  True  True False False False]
 [False False  True False  True  True  True]]


In [5]:
df = pd.DataFrame(tren_ary, columns=tren.columns_)
df

Unnamed: 0,고등어,사과,생수,수박,옥수수,치즈,호두
0,False,True,True,False,False,True,False
1,True,False,True,False,False,True,True
2,False,True,True,True,False,False,False
3,False,False,True,False,True,True,True


### apriori: 지지도 계산

In [6]:
print(tren.columns_)
fre_items = apriori(df, min_support=0.01)
fre_items

['고등어', '사과', '생수', '수박', '옥수수', '치즈', '호두']


Unnamed: 0,support,itemsets
0,0.25,(0)
1,0.5,(1)
2,1.0,(2)
3,0.25,(3)
4,0.25,(4)
5,0.75,(5)
6,0.5,(6)
7,0.25,"(0, 2)"
8,0.25,"(0, 5)"
9,0.25,"(0, 6)"


In [7]:
fre_items = apriori(df, min_support=0.01, use_colnames=True)
fre_items

Unnamed: 0,support,itemsets
0,0.25,(고등어)
1,0.5,(사과)
2,1.0,(생수)
3,0.25,(수박)
4,0.25,(옥수수)
5,0.75,(치즈)
6,0.5,(호두)
7,0.25,"(생수, 고등어)"
8,0.25,"(치즈, 고등어)"
9,0.25,"(호두, 고등어)"


#### 람다식(lambda)
- lambda 매개변수 : 표현식
- apply() 함수와 같이 사용
- map(함수, 리스트)

In [8]:
def ss(a, b):
    return a + b

In [9]:
ss(5, 2)

7

In [10]:
# lambda 표헌
(lambda a, b:a + b)(5, 2)

7

In [12]:
# map
list(map(lambda x: x **2, range(5)))

[0, 1, 4, 9, 16]

In [17]:
# df.apply()를 이용해 fre_items에 "length" 필드 추가
fre_items["length"] = fre_items["itemsets"].apply(lambda x: len(x))
fre_items.head()
fre_items.tail()

Unnamed: 0,support,itemsets,length
26,0.25,"(옥수수, 호두, 생수)",3
27,0.5,"(호두, 생수, 치즈)",3
28,0.25,"(옥수수, 호두, 치즈)",3
29,0.25,"(호두, 생수, 치즈, 고등어)",4
30,0.25,"(옥수수, 호두, 생수, 치즈)",4


In [None]:
fre_items["itemsets"].apply(lambda x: len(x))

In [18]:
# support가 0.3이상, length가 2 이상인 데이터만 추출
# 결과 값은 support 내림차순 정렬

fre_items = fre_items[(fre_items["support"]>=0.3)&(fre_items["length"]>=2)]
fre_items.sort_values(by="support", ascending=False)

Unnamed: 0,support,itemsets,length
15,0.75,"(생수, 치즈)",2
10,0.5,"(생수, 사과)",2
16,0.5,"(호두, 생수)",2
19,0.5,"(호두, 치즈)",2
27,0.5,"(호두, 생수, 치즈)",3


In [22]:
# apriori 함수를 이용한 지지도 / 신뢰도 / 양상도 확인
from mlxtend.frequent_patterns import association_rules

fre_items = apriori(df, min_support=0.3, use_colnames=True)
fre_items

association_rules(fre_items, min_threshold=0.3)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(생수),(사과),1.0,0.5,0.5,0.5,1.0,0.0,1.0
1,(사과),(생수),0.5,1.0,0.5,1.0,1.0,0.0,inf
2,(생수),(치즈),1.0,0.75,0.75,0.75,1.0,0.0,1.0
3,(치즈),(생수),0.75,1.0,0.75,1.0,1.0,0.0,inf
4,(호두),(생수),0.5,1.0,0.5,1.0,1.0,0.0,inf
5,(생수),(호두),1.0,0.5,0.5,0.5,1.0,0.0,1.0
6,(호두),(치즈),0.5,0.75,0.5,1.0,1.333333,0.125,inf
7,(치즈),(호두),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
8,"(호두, 생수)",(치즈),0.5,0.75,0.5,1.0,1.333333,0.125,inf
9,"(호두, 치즈)",(생수),0.5,1.0,0.5,1.0,1.0,0.0,inf


In [23]:
association_rules(fre_items, metric="lift", min_threshold=1.3)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(호두),(치즈),0.5,0.75,0.5,1.0,1.333333,0.125,inf
1,(치즈),(호두),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
2,"(호두, 생수)",(치즈),0.5,0.75,0.5,1.0,1.333333,0.125,inf
3,"(생수, 치즈)",(호두),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
4,(호두),"(생수, 치즈)",0.5,0.75,0.5,1.0,1.333333,0.125,inf
5,(치즈),"(호두, 생수)",0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
