購物籃分析 - Python實戰：如何找出商品搭配的總體策略？

1. 引入套件與資料

In [1]:
from apyori import apriori
import pandas as pd
import numpy as np
import os


In [3]:
alldata = pd.read_csv('sales_data.csv')
alldata = pd.DataFrame(alldata)


2. 篩選具分析價值系列

In [4]:
# 篩選前三銷售量好的系列

top3_series = alldata.groupby('系列')['單價'].sum()
top3_series=top3_series.sort_values(ascending= False)
top3_series = top3_series.index[0:3]


3. 製作產品利潤dataframe

In [5]:
# 利潤表格

profit_df=alldata.copy()
profit_df =profit_df.drop_duplicates('產品') 
profit_df =profit_df[['產品','單價','成本']] 
profit_df['利潤'] =profit_df['單價']-profit_df['成本']


In [6]:
profit_df

Unnamed: 0,產品,單價,成本,利潤
0,產品4-1,391,240,151
1,產品4-2,238,137,101
2,產品4-3,434,253,181
3,產品4-4,339,205,134
6,產品4-5,646,410,236
...,...,...,...,...
353074,產品496-12,645,233,412
353076,產品496-16,419,182,237
353082,產品496-29,645,263,382
353084,產品496-3,690,233,457


4. 產出各系列下產品之間搭配關係

In [8]:
# order_number = np.unique(alldata['訂單編號'])
# series = np.unique(alldata['系列'])

record=[] 

for s in top3_series:
    series_data=alldata[alldata['系列']==s]
    order_number=np.unique(series_data['訂單編號'])
    # 創建資料夾
    os.mkdir(s + '商品搭配分析')  
    for i in order_number:
        cart =series_data[series_data['訂單編號']==i]['產品'].values
        record.append(cart)
        print(cart)
    association_rules = apriori(record, min_support=0.003, min_lift = 1.000000001)
    association_results = pd.DataFrame(association_rules)
    # 兩個以上產品組合
    association_results= association_results[association_results['items'].str.len() >1]
    # 提取rules 中結果
    thebig = association_results['items'].str.len().max()
    association_results['confidence'] = association_results['ordered_statistics'].str[0].str[2]
    separate = pd.DataFrame(association_results['items'].values.tolist(), columns=[ 'p'+ str(x) for x in range(thebig)])
    separate['support'] = association_results['support'].values
    separate['confidence'] = association_results['confidence'].values
    separate['lift']=association_results['ordered_statistics'].str[0].str[3]
    # 產出csv
    for i in np.unique(separate['p0']):
        data = separate[separate['p0']==i]
        # 利潤計算
        profit_list=[]
        for p in data['p1']: 
            cart_profit = profit_df[profit_df['產品']==p]['利潤'].values+profit_df[profit_df['產品']==i]['利潤'].values        
            profit_list.append(cart_profit[0])
        sortval = pd.DataFrame({
                    '當購買時':i,
                    '購買產品':data['p1'],
                    '機率':data['confidence'],
                    '提升度':data['lift'],
                    '產品組合利潤':profit_list,
                    '潛在利潤':data['confidence']*profit_list #多項推廣時
                    })
        sortval.sort_values(by=['機率'], ascending=False , inplace = True)
        sortval.to_csv( os.getcwd()+'/'+s+'商品搭配分析/'+ s+"_當購買 "+ i +" 時購買以下商品機率.csv", encoding = 'utf-8-sig')


9;]
[&#39;產品3-35&#39;]
[&#39;產品3-97&#39;]
[&#39;產品3-35&#39;]
[&#39;產品3-6&#39;]
[&#39;產品3-11&#39;]
[&#39;產品3-97&#39; &#39;產品3-97&#39;]
[&#39;產品3-95&#39; &#39;產品3-97&#39;]
[&#39;產品3-29&#39;]
[&#39;產品3-98&#39; &#39;產品3-97&#39;]
[&#39;產品3-36&#39; &#39;產品3-12&#39; &#39;產品3-35&#39;]
[&#39;產品3-2&#39;]
[&#39;產品3-35&#39;]
[&#39;產品3-35&#39; &#39;產品3-96&#39;]
[&#39;產品3-1&#39;]
[&#39;產品3-12&#39;]
[&#39;產品3-35&#39;]
[&#39;產品3-31&#39;]
[&#39;產品3-97&#39;]
[&#39;產品3-36&#39;]
[&#39;產品3-9&#39;]
[&#39;產品3-97&#39;]
[&#39;產品3-12&#39; &#39;產品3-35&#39; &#39;產品3-12&#39;]
[&#39;產品3-35&#39;]
[&#39;產品3-35&#39;]
[&#39;產品3-14&#39;]
[&#39;產品3-11&#39; &#39;產品3-14&#39; &#39;產品3-36&#39;]
[&#39;產品3-35&#39;]
[&#39;產品3-2&#39;]
[&#39;產品3-94&#39;]
[&#39;產品3-35&#39; &#39;產品3-96&#39;]
[&#39;產品3-98&#39;]
[&#39;產品3-35&#39;]
[&#39;產品3-12&#39; &#39;產品3-3&#39; &#39;產品3-12&#39; &#39;產品3-3&#39; &#39;產品3-12&#39; &#39;產品3-35&#39;]
[&#39;產品3-29&#39;]
[&#39;產品3-36&#39;]
[&#39;產品3-2&#39; &#39;產品3-36&#39;]
[&#39;產品3-35&#39;]
[&#39;產品3-93