# 載入模組

In [1]:
#!pip install apyori

In [2]:
# import module
import pandas as pd
import numpy as np
from apyori import apriori

In [3]:
# import retailer data
data_df = pd.read_csv('retailer_ratings.csv', encoding='utf-8')
data_df

Unnamed: 0,CustomerID,StockCode,ProductName,quantity_Quartile,price_Quartile,buy_index_Quartile,rating
0,1,22664,龍口埔里水粉400g,3,1,2,2.0
1,1,22294,康乃馨夜用特長超薄蝶型(14片x4包),3,4,4,3.9
2,1,21385,家樂福哥斯大黎加濾泡式咖啡粉,2,2,3,2.8
3,1,21107,木柄菜匙,2,1,2,1.9
4,1,22379,美琪天然T3抗菌洗手乳,2,2,4,3.6
...,...,...,...,...,...,...,...
397222,18287,22866,啵樂樂乳酸飲料-蘋果味-235ml,4,1,5,4.5
397223,18287,23264,防蹣抗菌記憶床墊平面10公分-單人,5,5,4,4.2
397224,18287,21819,狗家族繡絨毛巾-藍色,5,2,4,3.9
397225,18287,23223,Adidas Training加重訓練護腕/護踝0.5kg,5,5,3,3.4


# 做出Apriori需要的雙list資料型態，一個顧客購買的商品，包在一個list裡面

In [4]:
# 做顧客購物清單的雙層List 例如:[[A,C],[B,D,E],[A,F]...]
user_product_df = data_df.groupby('CustomerID').agg({'ProductName': lambda x : ','.join(x).split(',')})
record = user_product_df['ProductName'].tolist()
record

[['龍口埔里水粉400g',
  '康乃馨夜用特長超薄蝶型(14片x4包)',
  '家樂福哥斯大黎加濾泡式咖啡粉',
  '木柄菜匙',
  '美琪天然T3抗菌洗手乳',
  '康寶全新鮮味炒手素食500g',
  'Paldo金炸醬麵(包)203gx4',
  '宜而爽男童羅紋短袖圓領衫-28',
  '歐邁福麵包餅乾香蒜奶油口味-300g',
  'Jordan清新水果味兒童牙膏(6-12歲)75ml',
  '員山農會養生豆奶250ml',
  '奶油小酥餅6入',
  '安安辣豆瓣醬 130g',
  'Caravan捲式髮捲(小)',
  '天然防蹣防蚊保潔墊-雙人加大',
  '舒適超鋒3刮鬍刀片 4入',
  '台東關山好米(圓二)9kg',
  '克潮靈 環保除濕桶補充包-檜木香',
  '蘇菲極淨肌超薄潔翼日用25cm-16PCx2包',
  '聲寶YK-W1252B HDMI 1.8m',
  '馬玉山紫山藥黑豆漿 30克x12',
  'YS香烤軟雞肉卷 200g',
  '韓國不倒翁Q拉麵(純粗麵條)110g*5',
  '天仁茗賞-高山烏龍茶',
  'MG 專利安全飛盤-螢黃',
  'Fun Bath 3D沐浴海棉-小馬',
  '宗家府食堂-韓國芝麻葉',
  'Binggrae螃蟹餅乾(火辣炒碼口味)',
  'No Brand黃豆粉韓式年糕風味餅乾-130g',
  'OP生物分解抗菌立體密封袋 L',
  'blacklabel BL-9168 16吋立扇',
  '宗家府泡菜-白菜切塊300g',
  '蕾妮亞私密肌濕式衛生紙巾-15PC',
  '【烤肉用品】自然風特選燒烤專用炭精1.8kg',
  '3M新防蹣水洗枕幼兒型(附枕套)',
  '靠得住安全瞬吸護墊無香標準型14.5cm-30PCx2',
  '中祥自然之顏蔬菜蘇打',
  '【狗食】寶路乾糧雞肉及蔬菜口味3kg',
  '天生好米嚴選月之米6kg',
  '原味巡禮野菜餅',
  '履歷金目鱸魚片(每包約280克)',
  'keyway名廚標準量水杯600cc',
  '正安白菜切塊 罐裝',
  '馬玉山榛果杏仁茶 30克x12',
  'DL安全棒球',
  '20公升折疊水箱(台灣製)',
  '好麗友好多魚餅乾(海苔口味)',
  '金頂金霸王

### 帶入模組並設定參數:降低運算量並篩選更有效的規則(最小支持度, 最小信賴度, 最小提升度, 規則最大產品數)

In [5]:
association_rules = apriori(record, min_support=0.03, min_confidence = 0.6, min_lift=4,max_length = 2)

# 將產出的規則，整理成我們要看的表格

In [6]:
#做出規則的List
association_results = list(association_rules)

#轉成 Dataframe
association_DF = pd.DataFrame(association_results)
association_DF

Unnamed: 0,items,support,ordered_statistics
0,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,"[((100純棉緞檔浴巾), (Tea Zen 韓國玉米鬚茶), 0.70571428571..."
1,"(1片/包), 家樂福大比目魚切片(每包淨重約160g)",0.032060,"[((1片/包)), (家樂福大比目魚切片(每包淨重約160g), 1.0, 31.1912..."
2,"(家樂福大比目魚切片(每包淨重約400g, 2片/包))",0.030308,"[((2片/包)), (家樂福大比目魚切片(每包淨重約400g), 1.0, 32.9942..."
3,"(3點1刻 奶油玉米濃湯-18gx12, UCC 114 即溶咖啡)",0.030308,"[((UCC 114 即溶咖啡), (3點1刻 奶油玉米濃湯-18gx12), 0.6553..."
4,"(3點1刻經典原味奶茶-20gx15, 讚岐冷凍烏龍麵)",0.074982,"[((3點1刻經典原味奶茶-20gx15), (讚岐冷凍烏龍麵), 0.7217537942..."
...,...,...,...
159,"(蘇菲超熟睡夜用細緻棉柔28cm(16片x3包), 舒味思葡萄柚口味汽水-330mlx4)",0.055711,"[((舒味思葡萄柚口味汽水-330mlx4), (蘇菲超熟睡夜用細緻棉柔28cm(16片x3..."
160,"(韓國不倒翁Q拉麵(純粗麵條)110g*5, 蘇菲極淨肌超薄潔翼日用25cm-16PCx2包)",0.052558,"[((蘇菲極淨肌超薄潔翼日用25cm-16PCx2包), (韓國不倒翁Q拉麵(純粗麵條)11..."
161,"(韓國水協-魚板辣炒年糕, 西莎100g-牛肉及肝 100g)",0.035739,"[((西莎100g-牛肉及肝 100g), (韓國水協-魚板辣炒年糕), 0.6580645..."
162,"(西莎100g-牛肉及肝 100g, 鹿野農會產地米1.5Kg)",0.032586,"[((西莎100g-牛肉及肝 100g), (鹿野農會產地米1.5Kg), 0.6, 5.4..."


In [7]:
#展開
association_DF = association_DF.explode('ordered_statistics')
association_DF

Unnamed: 0,items,support,ordered_statistics
0,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,"((100純棉緞檔浴巾), (Tea Zen 韓國玉米鬚茶), 0.705714285714..."
0,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,"((Tea Zen 韓國玉米鬚茶), (100純棉緞檔浴巾), 0.789137380191..."
1,"(1片/包), 家樂福大比目魚切片(每包淨重約160g)",0.032060,"((1片/包)), (家樂福大比目魚切片(每包淨重約160g), 1.0, 31.19125..."
1,"(1片/包), 家樂福大比目魚切片(每包淨重約160g)",0.032060,"((家樂福大比目魚切片(每包淨重約160g), (1片/包)), 1.0, 31.19125..."
2,"(家樂福大比目魚切片(每包淨重約400g, 2片/包))",0.030308,"((2片/包)), (家樂福大比目魚切片(每包淨重約400g), 1.0, 32.99421..."
...,...,...,...
159,"(蘇菲超熟睡夜用細緻棉柔28cm(16片x3包), 舒味思葡萄柚口味汽水-330mlx4)",0.055711,"((舒味思葡萄柚口味汽水-330mlx4), (蘇菲超熟睡夜用細緻棉柔28cm(16片x3包..."
160,"(韓國不倒翁Q拉麵(純粗麵條)110g*5, 蘇菲極淨肌超薄潔翼日用25cm-16PCx2包)",0.052558,"((蘇菲極淨肌超薄潔翼日用25cm-16PCx2包), (韓國不倒翁Q拉麵(純粗麵條)110..."
161,"(韓國水協-魚板辣炒年糕, 西莎100g-牛肉及肝 100g)",0.035739,"((西莎100g-牛肉及肝 100g), (韓國水協-魚板辣炒年糕), 0.65806451..."
162,"(西莎100g-牛肉及肝 100g, 鹿野農會產地米1.5Kg)",0.032586,"((西莎100g-牛肉及肝 100g), (鹿野農會產地米1.5Kg), 0.6, 5.46..."


In [8]:
#再次展開成四個 row 為一個規則
association_DF = association_DF.explode('ordered_statistics')
association_DF

Unnamed: 0,items,support,ordered_statistics
0,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,(100純棉緞檔浴巾)
0,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,(Tea Zen 韓國玉米鬚茶)
0,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,0.705714
0,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,12.8697
0,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,(Tea Zen 韓國玉米鬚茶)
...,...,...,...
162,"(西莎100g-牛肉及肝 100g, 鹿野農會產地米1.5Kg)",0.032586,5.4622
163,"(韓國水協-魚板辣炒年糕, 飛利浦HD4924智慧變頻電磁爐)",0.035039,(飛利浦HD4924智慧變頻電磁爐)
163,"(韓國水協-魚板辣炒年糕, 飛利浦HD4924智慧變頻電磁爐)",0.035039,(韓國水協-魚板辣炒年糕)
163,"(韓國水協-魚板辣炒年糕, 飛利浦HD4924智慧變頻電磁爐)",0.035039,0.829876


In [9]:
# 重製 index 
association_DF.reset_index(drop=True,inplace=True)
association_DF

Unnamed: 0,items,support,ordered_statistics
0,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,(100純棉緞檔浴巾)
1,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,(Tea Zen 韓國玉米鬚茶)
2,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,0.705714
3,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,12.8697
4,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,(Tea Zen 韓國玉米鬚茶)
...,...,...,...
799,"(西莎100g-牛肉及肝 100g, 鹿野農會產地米1.5Kg)",0.032586,5.4622
800,"(韓國水協-魚板辣炒年糕, 飛利浦HD4924智慧變頻電磁爐)",0.035039,(飛利浦HD4924智慧變頻電磁爐)
801,"(韓國水協-魚板辣炒年糕, 飛利浦HD4924智慧變頻電磁爐)",0.035039,(韓國水協-魚板辣炒年糕)
802,"(韓國水協-魚板辣炒年糕, 飛利浦HD4924智慧變頻電磁爐)",0.035039,0.829876


In [10]:
#創造每條規則的support dataframe
association_DF_copy = association_DF.copy()
for i in range(len(association_DF)):
    if i%4 != 0:
        association_DF_copy.drop(index=i,inplace=True)
association_DF_copy.reset_index(drop=True,inplace=True)
association_DF_copy

Unnamed: 0,items,support,ordered_statistics
0,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,(100純棉緞檔浴巾)
1,"(100純棉緞檔浴巾, Tea Zen 韓國玉米鬚茶)",0.043273,(Tea Zen 韓國玉米鬚茶)
2,"(1片/包), 家樂福大比目魚切片(每包淨重約160g)",0.032060,(1片/包))
3,"(1片/包), 家樂福大比目魚切片(每包淨重約160g)",0.032060,(家樂福大比目魚切片(每包淨重約160g)
4,"(家樂福大比目魚切片(每包淨重約400g, 2片/包))",0.030308,(2片/包))
...,...,...,...
196,"(蘇菲超熟睡夜用細緻棉柔28cm(16片x3包), 舒味思葡萄柚口味汽水-330mlx4)",0.055711,(舒味思葡萄柚口味汽水-330mlx4)
197,"(韓國不倒翁Q拉麵(純粗麵條)110g*5, 蘇菲極淨肌超薄潔翼日用25cm-16PCx2包)",0.052558,(蘇菲極淨肌超薄潔翼日用25cm-16PCx2包)
198,"(韓國水協-魚板辣炒年糕, 西莎100g-牛肉及肝 100g)",0.035739,(西莎100g-牛肉及肝 100g)
199,"(西莎100g-牛肉及肝 100g, 鹿野農會產地米1.5Kg)",0.032586,(西莎100g-牛肉及肝 100g)


In [11]:
#創造每條規則的 Dataframe
rule_list = []
association_list = association_DF['ordered_statistics'].tolist()
n = 0
while True:
    if n < len(association_DF)-3:
        tmp_list = association_list[n:n+4]
        rule_list.append(tmp_list)
        n +=4
    else:
        break
associate_df = pd.DataFrame(data = rule_list, columns=['item1','item2','confidence','lift'])
associate_df

Unnamed: 0,item1,item2,confidence,lift
0,(100純棉緞檔浴巾),(Tea Zen 韓國玉米鬚茶),0.705714,12.869703
1,(Tea Zen 韓國玉米鬚茶),(100純棉緞檔浴巾),0.789137,12.869703
2,(1片/包)),(家樂福大比目魚切片(每包淨重約160g),1.000000,31.191257
3,(家樂福大比目魚切片(每包淨重約160g),(1片/包)),1.000000,31.191257
4,(2片/包)),(家樂福大比目魚切片(每包淨重約400g),1.000000,32.994220
...,...,...,...,...
196,(舒味思葡萄柚口味汽水-330mlx4),(蘇菲超熟睡夜用細緻棉柔28cm(16片x3包)),0.669474,7.076585
197,(蘇菲極淨肌超薄潔翼日用25cm-16PCx2包),(韓國不倒翁Q拉麵(純粗麵條)110g*5),0.650759,6.609490
198,(西莎100g-牛肉及肝 100g),(韓國水協-魚板辣炒年糕),0.658065,5.297930
199,(西莎100g-牛肉及肝 100g),(鹿野農會產地米1.5Kg),0.600000,5.462201


In [12]:
# 將規則與Support合併
association_rule_df = pd.concat([associate_df,association_DF_copy['support']],axis=1)
association_rule_df

Unnamed: 0,item1,item2,confidence,lift,support
0,(100純棉緞檔浴巾),(Tea Zen 韓國玉米鬚茶),0.705714,12.869703,0.043273
1,(Tea Zen 韓國玉米鬚茶),(100純棉緞檔浴巾),0.789137,12.869703,0.043273
2,(1片/包)),(家樂福大比目魚切片(每包淨重約160g),1.000000,31.191257,0.032060
3,(家樂福大比目魚切片(每包淨重約160g),(1片/包)),1.000000,31.191257,0.032060
4,(2片/包)),(家樂福大比目魚切片(每包淨重約400g),1.000000,32.994220,0.030308
...,...,...,...,...,...
196,(舒味思葡萄柚口味汽水-330mlx4),(蘇菲超熟睡夜用細緻棉柔28cm(16片x3包)),0.669474,7.076585,0.055711
197,(蘇菲極淨肌超薄潔翼日用25cm-16PCx2包),(韓國不倒翁Q拉麵(純粗麵條)110g*5),0.650759,6.609490,0.052558
198,(西莎100g-牛肉及肝 100g),(韓國水協-魚板辣炒年糕),0.658065,5.297930,0.035739
199,(西莎100g-牛肉及肝 100g),(鹿野農會產地米1.5Kg),0.600000,5.462201,0.032586


In [13]:
# 把信心度為100%的規則去掉
association_rule_df.drop(association_rule_df[association_rule_df['confidence']==1].index,axis=0, inplace=True)

In [14]:
# 新增一欄 item2原本的購買率
association_rule_df['item2_original_buy_rate'] = association_rule_df['confidence'] / association_rule_df['lift']

In [15]:
# show 結果 (以提升度為大小為準則)
association_rule_df.sort_values(by = 'lift', ascending=False).head(30)

Unnamed: 0,item1,item2,confidence,lift,support,item2_original_buy_rate
136,(現烤月皇酥禮盒8入),(家樂福超值狗乾糧(牛肉風味)3.5K),0.836066,18.145484,0.035739,0.046076
135,(家樂福超值狗乾糧(牛肉風味)3.5K),(現烤月皇酥禮盒8入),0.775665,18.145484,0.035739,0.042747
87,(呂青麥草本精萃洗髮精500ml 韓國進口),(天命海苔酥(蔬菜口味)),0.757709,17.581319,0.030133,0.043097
88,(天命海苔酥(蔬菜口味)),(呂青麥草本精萃洗髮精500ml 韓國進口),0.699187,17.581319,0.030133,0.039769
170,(桂格鮮榖王堅果多穀飲),(飛利浦HD4924智慧變頻電磁爐),0.735294,17.415182,0.039418,0.042221
171,(飛利浦HD4924智慧變頻電磁爐),(桂格鮮榖王堅果多穀飲),0.93361,17.415182,0.039418,0.053609
156,(金車日式綠茶-580mlX24),(康朵 紓壓甜睡香氛擴香膏-120g),0.663043,16.672477,0.03206,0.039769
155,(康朵 紓壓甜睡香氛擴香膏-120g),(金車日式綠茶-580mlX24),0.806167,16.672477,0.03206,0.048353
75,(伯朗曼特寧咖啡3合1),(舒潔迪士尼盒裝面紙-140PCx5),0.718147,16.595876,0.032586,0.043273
76,(舒潔迪士尼盒裝面紙-140PCx5),(伯朗曼特寧咖啡3合1),0.753036,16.595876,0.032586,0.045375


In [16]:
# show 結果 (以信賴度為準則 購買 item1 的條件下, 購買item2的機率)
association_rule_df.sort_values(by = 'confidence', ascending=False).head(30)

Unnamed: 0,item1,item2,confidence,lift,support,item2_original_buy_rate
171,(飛利浦HD4924智慧變頻電磁爐),(桂格鮮榖王堅果多穀飲),0.93361,17.415182,0.039418,0.053609
192,(飛利浦HD4924智慧變頻電磁爐),(立頓茗閒情100％台灣茶-活綠茶-90g(2.5gx36)),0.892116,14.933135,0.037666,0.059741
160,(朵蔓頭皮淨化噴霧),(韓國水協-魚板辣炒年糕),0.886364,7.135915,0.08199,0.124212
96,(天仁茉香綠茶),(美祿巧克力麥芽飲品鐵罐裝 1.5Kg),0.858065,12.855203,0.046601,0.066748
167,(桂格鮮榖王堅果多穀飲),(立頓茗閒情100％台灣茶-活綠茶-90g(2.5gx36)),0.839869,14.058574,0.045025,0.059741
72,(家樂福素食熟水餃),(五月花竹纖維廚房紙巾-60PCx4捲),0.836806,11.621621,0.042221,0.072004
169,(桂格鮮榖王堅果多穀飲),(韓國水協-魚板辣炒年糕),0.836601,6.73529,0.044849,0.124212
136,(現烤月皇酥禮盒8入),(家樂福超值狗乾糧(牛肉風味)3.5K),0.836066,18.145484,0.035739,0.046076
200,(飛利浦HD4924智慧變頻電磁爐),(韓國水協-魚板辣炒年糕),0.829876,6.681142,0.035039,0.124212
99,(天生好米履歷鷺巡一等糙米2.2kg),(農心辛拉麵超值包120g x5),0.829653,4.40117,0.046076,0.188507


# 輸出結果到本地端

In [17]:
# 結果存入csv檔 (用提升度來存)
# association_rule_df.to_csv('association_rules.csv',encoding='utf_8_sig',index=False)

# 搜尋我們要的產品(範例:咖啡)

In [18]:
association_rule_df[association_rule_df['item1'].apply(lambda x : str(x)).str.contains('咖啡')]

Unnamed: 0,item1,item2,confidence,lift,support,item2_original_buy_rate
6,(UCC 114 即溶咖啡),(3點1刻 奶油玉米濃湯-18gx12),0.655303,12.594174,0.030308,0.052032
44,(UCC 114 即溶咖啡),(【海倫仙度絲】去屑薄荷舒爽洗髮乳),0.651515,15.11727,0.030133,0.043097
75,(伯朗曼特寧咖啡3合1),(舒潔迪士尼盒裝面紙-140PCx5),0.718147,16.595876,0.032586,0.043273
79,(星巴克黃金烘焙義式濃縮咖啡膠囊(12顆)),(光泉冷泡茶茶王烏龍-1235mlx12),0.671233,8.66832,0.042922,0.077435
130,(家樂福曼特寧風情咖啡豆),(海太加鈣營養餅-起士口味),0.665198,5.110298,0.052908,0.130168
157,(星巴克特選系列-卡布奇諾咖啡(每盒內含4杯)),(沙威隆抗菌潔淨沐浴乳茶樹精油),0.762115,9.498144,0.030308,0.080238
