In [9]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
df_raw=[['소주','콜라','맥주'],
       ['소주','콜라','와인'],
       ['소주','주스'],
       ['콜라','맥주'],
       ['소주','콜라','맥주','와인'],
       ['주스']]
df_raw

[['소주', '콜라', '맥주'],
 ['소주', '콜라', '와인'],
 ['소주', '주스'],
 ['콜라', '맥주'],
 ['소주', '콜라', '맥주', '와인'],
 ['주스']]

In [11]:
#Item 중 index 0번을 갖고 있으면, True, 아니면 False
enc = TransactionEncoder()
df_raw_enc = enc.fit_transform(X=df_raw)


df_asso = pd.DataFrame(df_raw_enc, columns=enc.columns_)
df_asso.head()

Unnamed: 0,맥주,소주,와인,주스,콜라
0,True,True,False,False,True
1,False,True,True,False,True
2,False,True,False,True,False
3,True,False,False,False,True
4,True,True,True,False,True


In [18]:
# 최소 지지도
min_support = 0.5

# 지지도 계산
df_freq = apriori(df_asso, min_support = min_support, use_colnames=True)
df_freq.round(3)

Unnamed: 0,support,itemsets
0,0.5,(맥주)
1,0.667,(소주)
2,0.667,(콜라)
3,0.5,"(콜라, 맥주)"
4,0.5,"(콜라, 소주)"


In [19]:
# 기준 = 신뢰도, 최소 신뢰도(threshold) =0.5
df_asso_rule = association_rules(df_freq, metric="confidence",min_threshold=0.5)
df_asso_rule.round(3).sort_values(by='confidence', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1,(맥주),(콜라),0.5,0.667,0.5,1.0,1.5,0.167,inf
0,(콜라),(맥주),0.667,0.5,0.5,0.75,1.5,0.167,2.0
2,(콜라),(소주),0.667,0.667,0.5,0.75,1.125,0.056,1.333
3,(소주),(콜라),0.667,0.667,0.5,0.75,1.125,0.056,1.333


    [맥주, 콜라] confidence =1 : 맥주를 구매한 고객은 콜라도 반드시 구매 => 맥주를 구매한 고객에게 콜라를 추천
    [콜라, 맥주] confidence = 0.75 : 콜라를 구매한 고객 중 맥주를 고른 고객은 0.75
    [콜라, 소주] lift = 1.125, [콜라, 맥주] lift = 1.5 : 콜라를 구매한 고객들에게는 소주보다 맥주를 추천

In [20]:
purch= pd.read_csv("../data/BigData/상품구매.csv")
purch.head()

Unnamed: 0,ID,PRODUCT
0,C-11,BAGUETTE
1,C-11,HERRING
2,C-11,AVOCADO
3,C-11,ARTICHOKE
4,C-11,HEINEKEN


In [38]:
list_association=[['우유','라면'],['라면'],['아이스크림','라면'],['과자','라면'],['아이스크림']]

In [22]:
ID = list(set(purch['ID']))
ID.sort()

list_association = []

for i in ID:
    temp_list = list(purch[purch['ID'] == i]['PRODUCT'])
    temp_list.sort()
    
    list_association.append(temp_list)

for row in list_association:
    print(row)

['APPLES', 'ARTICHOKE', 'AVOCADO', 'BAGUETTE', 'CORNED BEEF', 'HEINEKEN', 'HERRING']
['APPLES', 'CORNED BEEF', 'HEINEKEN', 'HERRING', 'OLIVES', 'SARDINES', 'STEAK']
['APPLES', 'AVOCADO', 'BAGUETTE', 'ICE CREAM', 'PEPPERS', 'SARDINES', 'STEAK']
['APPLES', 'COKE', 'CORNED BEEF', 'HAM', 'HERRING', 'OLIVES', 'TURKEY']
['ARTICHOKE', 'BOURBON', 'COKE', 'HAM', 'ICE CREAM', 'OLIVES', 'TURKEY']
['ARTICHOKE', 'AVOCADO', 'BAGUETTE', 'COKE', 'HEINEKEN', 'HERRING', 'TURKEY']
['APPLES', 'CHICKEN', 'COKE', 'CORNED BEEF', 'HEINEKEN', 'ICE CREAM', 'SARDINES']
['BAGUETTE', 'BOURBON', 'CRACKERS', 'HEINEKEN', 'OLIVES', 'PEPPERS', 'SODA']
['BOURBON', 'CRACKERS', 'HEINEKEN', 'HERRING', 'OLIVES', 'SODA', 'STEAK']
['APPLES', 'BAGUETTE', 'CORNED BEEF', 'HAM', 'HERRING', 'OLIVES', 'TURKEY']
['ARTICHOKE', 'AVOCADO', 'BAGUETTE', 'BOURBON', 'CORNED BEEF', 'HEINEKEN', 'HERRING']
['ARTICHOKE', 'BOURBON', 'CRACKERS', 'HEINEKEN', 'OLIVES', 'SODA', 'STEAK']
['BOURBON', 'CORNED BEEF', 'CRACKERS', 'HEINEKEN', 'HERRING', 

In [39]:
#Item 중 index 0번을 갖고 있으면, True, 아니면 False
enc = TransactionEncoder()
df_raw_enc = enc.fit_transform(X=list_association)


df_asso = pd.DataFrame(df_raw_enc, columns=enc.columns_)
df_asso.head()

Unnamed: 0,과자,라면,아이스크림,우유
0,False,True,False,True
1,False,True,False,False
2,False,True,True,False
3,True,True,False,False
4,False,False,True,False


In [42]:
# 최소 지지도
min_support = 0

# 지지도 계산
df_freq = apriori(df_asso,min_support = min_support, use_colnames=True)
df_freq.round(3).sort_values(by='support',ascending=False)

Unnamed: 0,support,itemsets
1,0.8,(라면)
2,0.4,(아이스크림)
0,0.2,(과자)
3,0.2,(우유)
4,0.2,"(과자, 라면)"
7,0.2,"(아이스크림, 라면)"
8,0.2,"(우유, 라면)"
5,0.0,"(아이스크림, 과자)"
6,0.0,"(우유, 과자)"
9,0.0,"(우유, 아이스크림)"


In [35]:
# 기준 = 신뢰도, 최소 신뢰도(threshold) =0.5
df_asso_rule = association_rules(df_freq, metric="lift",min_threshold=1.8)
df_asso_rule.round(3).sort_values(by='confidence', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1,(SODA),(BOURBON),0.4,0.55,0.4,1.0,1.818,0.18,inf
5,"(OLIVES, SODA)",(BOURBON),0.4,0.55,0.4,1.0,1.818,0.18,inf
7,(SODA),"(OLIVES, BOURBON)",0.4,0.5,0.4,1.0,2.0,0.2,inf
8,"(OLIVES, COKE)",(TURKEY),0.3,0.4,0.3,1.0,2.5,0.18,inf
4,"(OLIVES, BOURBON)",(SODA),0.5,0.4,0.4,0.8,2.0,0.2,3.0
2,(HERRING),(CORNED BEEF),0.4,0.4,0.3,0.75,1.875,0.14,2.4
3,(CORNED BEEF),(HERRING),0.4,0.4,0.3,0.75,1.875,0.14,2.4
9,(TURKEY),"(OLIVES, COKE)",0.4,0.3,0.3,0.75,2.5,0.18,2.8
0,(BOURBON),(SODA),0.55,0.4,0.4,0.727,1.818,0.18,2.2
6,(BOURBON),"(OLIVES, SODA)",0.55,0.4,0.4,0.727,1.818,0.18,2.2


    [SODA, BOURBON] confidence =1 : SODA를 구매한 고객은 BOURBON과 OLIVES도 반드시 구매 => SODA만 구매한 고객에게 BOURBON, OLIVES를 추천
