In [39]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [4]:
drink = {1:'소주', 2:'콜라', 3:'맥주', 4:'와인', 5:'주스'}

ds_raw = [[drink[1], drink[2], drink[3]],
         [drink[1], drink[2], drink[4]],
         [drink[1], drink[5]],
         [drink[2], drink[3]],
         [drink[1], drink[2], drink[3], drink[4]],
         [drink[5]]]
ds_raw

[['소주', '콜라', '맥주'],
 ['소주', '콜라', '와인'],
 ['소주', '주스'],
 ['콜라', '맥주'],
 ['소주', '콜라', '맥주', '와인'],
 ['주스']]

In [5]:
enc = TransactionEncoder()
ds_raw_enc = enc.fit_transform(X = ds_raw)
df_asso = pd.DataFrame(ds_raw_enc, columns = enc.columns_)
df_asso.head()

Unnamed: 0,맥주,소주,와인,주스,콜라
0,True,True,False,False,True
1,False,True,True,False,True
2,False,True,False,True,False
3,True,False,False,False,True
4,True,True,True,False,True


In [6]:
min_support = 0.5
df_freq = apriori(df_asso, min_support = 0.5, use_colnames = True)
df_freq.round(3)

Unnamed: 0,support,itemsets
0,0.5,(맥주)
1,0.667,(소주)
2,0.667,(콜라)
3,0.5,"(맥주, 콜라)"
4,0.5,"(소주, 콜라)"


In [8]:
df_asso_rule = association_rules(df_freq, metric='confidence', min_threshold=0.5)
df_asso_rule.round(3)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(맥주),(콜라),0.5,0.667,0.5,1.0,1.5,0.167,inf
1,(콜라),(맥주),0.667,0.5,0.5,0.75,1.5,0.167,2.0
2,(소주),(콜라),0.667,0.667,0.5,0.75,1.125,0.056,1.333
3,(콜라),(소주),0.667,0.667,0.5,0.75,1.125,0.056,1.333


In [30]:
df = pd.read_csv('상품구매.csv')
df.head()

Unnamed: 0,ID,PRODUCT
0,C-11,BAGUETTE
1,C-11,HERRING
2,C-11,AVOCADO
3,C-11,ARTICHOKE
4,C-11,HEINEKEN


In [20]:
member, product = df.iloc[1, [0, 1]]
print(member, product)

C-11 HERRING


In [23]:
shopping_list = dict()
for index in range(len(df)):
    member, product = df.iloc[index, [0, 1]]
    if member not in shopping_list:
        shopping_list[member] = [product]
    else:
        shopping_list[member].append(product)
df_now = pd.DataFrame()
for member in shopping_list:
    df_now[member] = shopping_list[member]
df_now = df_now.T

In [24]:
df_now.head()

Unnamed: 0,0,1,2,3,4,5,6
C-11,BAGUETTE,HERRING,AVOCADO,ARTICHOKE,HEINEKEN,APPLES,CORNED BEEF
C-12,HERRING,CORNED BEEF,APPLES,OLIVES,STEAK,SARDINES,HEINEKEN
C-13,BAGUETTE,SARDINES,APPLES,PEPPERS,AVOCADO,STEAK,ICE CREAM
C-14,HERRING,CORNED BEEF,OLIVES,HAM,TURKEY,COKE,APPLES
C-15,OLIVES,BOURBON,COKE,TURKEY,ICE CREAM,ARTICHOKE,HAM


In [33]:
df_qq = [df_now.iloc[i] for i in range(len(df_now))]

In [36]:
enc = TransactionEncoder()
ds_raw_enc = enc.fit_transform(X = df_qq)
df_asso = pd.DataFrame(ds_raw_enc, columns = enc.columns_)
df_asso.head()

Unnamed: 0,APPLES,ARTICHOKE,AVOCADO,BAGUETTE,BOURBON,CHICKEN,COKE,CORNED BEEF,CRACKERS,HAM,HEINEKEN,HERRING,ICE CREAM,OLIVES,PEPPERS,SARDINES,SODA,STEAK,TURKEY
0,True,True,True,True,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False
1,True,False,False,False,False,False,False,True,False,False,True,True,False,True,False,True,False,True,False
2,True,False,True,True,False,False,False,False,False,False,False,False,True,False,True,True,False,True,False
3,True,False,False,False,False,False,True,True,False,True,False,True,False,True,False,False,False,False,True
4,False,True,False,False,True,False,True,False,False,True,False,False,True,True,False,False,False,False,True


In [37]:
min_support = 0.5
df_freq = apriori(df_asso, min_support = 0.5, use_colnames = True)
df_freq.round(3)

Unnamed: 0,support,itemsets
0,0.55,(BOURBON)
1,0.5,(COKE)
2,0.6,(HEINEKEN)
3,0.65,(OLIVES)
4,0.5,"(OLIVES, BOURBON)"


In [38]:
df_asso_rule = association_rules(df_freq, metric='confidence', min_threshold=0.5)
df_asso_rule.round(3)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(OLIVES),(BOURBON),0.65,0.55,0.5,0.769,1.399,0.142,1.95
1,(BOURBON),(OLIVES),0.55,0.65,0.5,0.909,1.399,0.142,3.85


In [47]:
drink = {1:'우유', 2:'라면', 3:'아이스크림', 4:'과자'}
ds_raw = [[drink[1], drink[2]],
         [drink[2]],
         [drink[3], drink[1]],
         [drink[4], drink[2]],
         [drink[3]]]

In [48]:
enc = TransactionEncoder()
ds_raw_enc = enc.fit_transform(X = ds_raw)
df_asso = pd.DataFrame(ds_raw_enc, columns = enc.columns_)
df_asso.head()

Unnamed: 0,과자,라면,아이스크림,우유
0,False,True,False,True
1,False,True,False,False
2,False,False,True,True
3,True,True,False,False
4,False,False,True,False


In [57]:
min_support = 0.5
df_freq = apriori(df_asso, min_support = 0.1, use_colnames = True)
df_freq.round(3)

Unnamed: 0,support,itemsets
0,0.2,(과자)
1,0.6,(라면)
2,0.4,(아이스크림)
3,0.4,(우유)
4,0.2,"(라면, 과자)"
5,0.2,"(라면, 우유)"
6,0.2,"(우유, 아이스크림)"


In [54]:
df_asso_rule = association_rules(df_freq, metric='confidence', min_threshold=0.5)
df_asso_rule.round(3)

  "confidence": lambda sAC, sA, _: sAC/sA,


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(과자),(라면),0.2,0.6,0.2,1.0,1.667,0.08,inf
1,(우유),(라면),0.4,0.6,0.2,0.5,0.833,-0.04,0.8
2,(우유),(아이스크림),0.4,0.4,0.2,0.5,1.25,0.04,1.2
3,(아이스크림),(우유),0.4,0.4,0.2,0.5,1.25,0.04,1.2
