Upgrade mlxtend to use fpgrowth

In [1]:
pip install mlxtend --upgrade --no-deps

Requirement already up-to-date: mlxtend in /usr/local/lib/python3.7/dist-packages (0.18.0)


In [2]:
import mlxtend
import numpy as np
import pandas as pd

In [3]:
# 데이터셋을 생성
# 우유, 기저귀, 쥬스, 맥주, 양상추
data = np.array([
    ['우유', '기저귀', '쥬스'],
    ['양상추', '기저귀', '맥주'],
    ['우유', '양상추', '기저귀', '맥주'],
    ['양상추', '맥주']
])

  import sys


# Apriori 알고리즘

In [4]:
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,기저귀,맥주,양상추,우유,쥬스
0,True,False,False,True,True
1,True,True,True,False,False
2,True,True,True,True,False
3,False,True,True,False,False


In [5]:
te_ary

array([[ True, False, False,  True,  True],
       [ True,  True,  True, False, False],
       [ True,  True,  True,  True, False],
       [False,  True,  True, False, False]])

In [6]:
te.columns_

['기저귀', '맥주', '양상추', '우유', '쥬스']

In [7]:
%%time
from mlxtend.frequent_patterns import apriori

print(apriori(df, min_support=0.5, use_colnames=True))

   support        itemsets
0     0.75           (기저귀)
1     0.75            (맥주)
2     0.75           (양상추)
3     0.50            (우유)
4     0.50       (맥주, 기저귀)
5     0.50      (양상추, 기저귀)
6     0.50       (기저귀, 우유)
7     0.75       (맥주, 양상추)
8     0.50  (맥주, 양상추, 기저귀)
CPU times: user 25.8 ms, sys: 427 µs, total: 26.2 ms
Wall time: 29.8 ms


# FP-Growth 알고리즘

In [8]:
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,기저귀,맥주,양상추,우유,쥬스
0,True,False,False,True,True
1,True,True,True,False,False
2,True,True,True,True,False
3,False,True,True,False,False


In [9]:
%%time
from mlxtend.frequent_patterns import fpgrowth

print(fpgrowth(df, min_support=0.5, use_colnames=True))

   support        itemsets
0     0.75           (기저귀)
1     0.50            (우유)
2     0.75           (양상추)
3     0.75            (맥주)
4     0.50       (맥주, 기저귀)
5     0.50      (양상추, 기저귀)
6     0.50  (맥주, 양상추, 기저귀)
7     0.50       (기저귀, 우유)
8     0.75       (맥주, 양상추)
CPU times: user 12.1 ms, sys: 14 µs, total: 12.1 ms
Wall time: 11.7 ms


### apriori에서 신뢰도나 향상도를 데이터프레임에서 확인

In [10]:
from mlxtend.frequent_patterns import association_rules

association = fpgrowth(df, min_support=0.1, use_colnames=True)
association_rules(association, metric='confidence', min_threshold=0.5, support_only=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(맥주),(기저귀),0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
1,(기저귀),(맥주),0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
2,(양상추),(기저귀),0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
3,(기저귀),(양상추),0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
4,"(맥주, 양상추)",(기저귀),0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
5,"(맥주, 기저귀)",(양상추),0.5,0.75,0.5,1.0,1.333333,0.125,inf
6,"(양상추, 기저귀)",(맥주),0.5,0.75,0.5,1.0,1.333333,0.125,inf
7,(맥주),"(양상추, 기저귀)",0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
8,(양상추),"(맥주, 기저귀)",0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
9,(기저귀),"(맥주, 양상추)",0.75,0.75,0.5,0.666667,0.888889,-0.0625,0.75
