<a href="https://colab.research.google.com/github/Re14m/training/blob/master/2022-0607_recipie376.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# [pandasでマーケティングシミュレーションをするレシピ](https://axross-recipe.com/recipes/376)

## 環境準備

In [None]:
from google.colab import drive
drive.mount("/content/drive/")

## シミュレーションの準備をしよう

In [None]:
# シミュレーション値の設定

# シミュレーション人数
simulation_num = 50000

# 指定期間の使用数
using_num = 20

# 初期シェア分布(個数比)
SampleA_first = 0.2
SampleB_first = 0.3
SampleC_first =  0.5

# あるサンプルからサンプルへの遷移確率のリスト
# SampleA_toは、A→A（リピート）, A→B(Bのトライアル), A→C(Cのトライアル)の確率を順に格納したリスト

SampleA_to = [0.9, 0.03, 0.07]
SampleB_to = [0.1, 0.7, 0.2]
SampleC_to = [0.1, 0.1, 0.8]

In [None]:
# 各商品への遷移確率の合計（トライアル率）
sample_name_list = ["SampleA","SampleB","SampleC"]

for val in range(len(sample_name_list)):
    trial_sum = list([SampleA_to[val], SampleB_to[val], SampleC_to[val]])
    trial_rate = sum(trial_sum) - trial_sum[val]
    print(sample_name_list[val],"のトライアル率は","{:.3f}\n".format(trial_rate), "\t\tリピート率は", trial_sum[val])

## 表を生成しよう

In [None]:
# 与えられた確立を元にシミュレーションする
import pandas as pd
import random

# 与えられた引数を元に、使用回数ごとにどの銘柄を使ったかをtableにする関数
def make_simulation_table(simulation_num, using_num, SampleA_first, SampleB_first, SampleC_first):
    
    #シミュレーション人数
    user_val = ["user_" + str(num) for num in range(1, simulation_num+1)]

    #想定使用回数
    cols = ["use_" + str(num) for num in range(1, using_num + 1)]
    
    #DataFrameの生成
    simulation_table = pd.DataFrame(index = user_val,columns = cols)
    
    #use_1（使用1回目)は上から初期シェア比率に応じて入れていく
    SampleA_using_num = round(simulation_num * SampleA_first)
    SampleB_using_num = round(simulation_num * SampleB_first)
    SampleC_using_num = round(simulation_num * SampleC_first)   
    
    for val in range(SampleA_using_num):
        simulation_table["use_1"][val] = "SampleA"

    for val in range(SampleA_using_num, SampleA_using_num + SampleB_using_num):
        simulation_table["use_1"][val] = "SampleB"
        
    for val in range(SampleA_using_num + SampleB_using_num, SampleA_using_num + SampleB_using_num + SampleC_using_num):
        simulation_table["use_1"][val] = "SampleC"
    
    return simulation_table

In [None]:
table = make_simulation_table(simulation_num, using_num,SampleA_first, SampleB_first, SampleC_first)

print(table.head())
print(table.tail())

## 2回目以降の使用商品をシミュレーションしよう

In [None]:
# use2以降は、一つ前のカラムの値に応じて、それぞれの遷移確率でランダムに代入する。
# 同じ行の一つ前の列の値を参考して、確率に応じて代入する文字列を出力する関数を作る
def select_object(simulation_table, index_num, col_num, SampleA_to, SampleB_to, SampleC_to):
 
# SampleAの場合
    if simulation_table.iloc[index_num, col_num-1] == "SampleA":
        threshold_num = random.random()

        if threshold_num < SampleA_to[0]:
            using = "SampleA"
        elif SampleA_to[0] <= threshold_num < sum(SampleA_to[0:2]):
            using = "SampleB"
        elif sum(SampleA_to[0:2])<= threshold_num <= sum(SampleA_to[0:3]):
            using = "SampleC"    
        
# SampleBの場合
        elif simulation_table.iloc[index_num, col_num-1] == "SampleB":
            threshold_num = random.random()
            
        if threshold_num < SampleB_to[0]:
            using = "SampleA"
        elif SampleB_to[0] <= threshold_num < sum(SampleB_to[0:2]):
            using = "SampleB"
        elif sum(SampleB_to[0:2])<= threshold_num <= sum(SampleB_to[0:3]):
            using = "SampleC"    

# SampleCの場合
        elif simulation_table.iloc[index_num, col_num-1] == "SampleC":
            threshold_num = random.random()
            
        if threshold_num < SampleC_to[0]:
            using = "SampleA"
        elif SampleC_to[0] <= threshold_num < sum(SampleC_to[0:2]):
            using = "SampleB"
        elif sum(SampleC_to[0:2])<= threshold_num <= sum(SampleC_to[0:3]):
            using = "SampleC"    

        return using
                
# 遷移確率を元に各カラムに値を挿入
    for index_val in range(simulation_num):
        for col_val in range(1, using_num):
            simulation_table.iloc[index_val][col_val] = select_object(simulation_table, index_val, col_val, SampleA_to, SampleB_to, SampleC_to)

In [None]:
# 与えられた確立を元にシミュレーションする
import pandas as pd
import random

# 与えられた引数を元に、使用回数ごとにどの銘柄を使ったかをtableにする関数
# 内部のselect_objectで遷移確率を引数にとる。
def make_simulation_table(simulation_num, using_num, SampleA_first, SampleB_first, SampleC_first):
    
    #シミュレーション人数
    user_val = ["user_" + str(num) for num in range(1, simulation_num+1)]

    #想定使用回数
    cols = ["use_" + str(num) for num in range(1, using_num + 1)]
    
    #DataFrameの生成
    simulation_table = pd.DataFrame(index = user_val,columns = cols)
    
    #use_1（使用1回目)は上から初期シェア比率に応じて入れていく
    SampleA_using_num = round(simulation_num * SampleA_first)
    SampleB_using_num = round(simulation_num * SampleB_first)
    SampleC_using_num = round(simulation_num * SampleC_first)   
    
    for val in range(SampleA_using_num):
        simulation_table["use_1"][val] = "SampleA"

    for val in range(SampleA_using_num, SampleA_using_num + SampleB_using_num):
        simulation_table["use_1"][val] = "SampleB"
        
    for val in range(SampleA_using_num + SampleB_using_num, SampleA_using_num + SampleB_using_num + SampleC_using_num):
        simulation_table["use_1"][val] = "SampleC"
    
      
    #use2以降は、一つ前のカラムの値に応じて、それぞれの遷移確率でランダムに代入する。
    #同じ行の一つ前の列の値を参考して、確率に応じて代入する文字列を出力する関数を作る
    def select_object(simulation_table, index_num, col_num, SampleA_to, SampleB_to, SampleC_to):
        #SampleAの場合
        if simulation_table.iloc[index_num, col_num-1] == "SampleA":
            threshold_num = random.random()
            
            if threshold_num < SampleA_to[0]:
                using = "SampleA"
            elif SampleA_to[0] <= threshold_num < sum(SampleA_to[0:2]):
                using = "SampleB"
            elif sum(SampleA_to[0:2])<= threshold_num <= sum(SampleA_to[0:3]):
                using = "SampleC"    
 
        
        #SampleBの場合
        elif simulation_table.iloc[index_num, col_num-1] == "SampleB":
            threshold_num = random.random()
            
            if threshold_num < SampleB_to[0]:
                using = "SampleA"
            elif SampleB_to[0] <= threshold_num < sum(SampleB_to[0:2]):
                using = "SampleB"
            elif sum(SampleB_to[0:2])<= threshold_num <= sum(SampleB_to[0:3]):
                using = "SampleC"    

        #SampleCの場合
        elif simulation_table.iloc[index_num, col_num-1] == "SampleC":
            threshold_num = random.random()
            
            if threshold_num < SampleC_to[0]:
                using = "SampleA"
            elif SampleC_to[0] <= threshold_num < sum(SampleC_to[0:2]):
                using = "SampleB"
            elif sum(SampleC_to[0:2])<= threshold_num <= sum(SampleC_to[0:3]):
                using = "SampleC"    

        return using
                
    #遷移確率を元に各カラムに値を挿入
    for index_val in range(simulation_num):
        for col_val in range(1, using_num):
            simulation_table.iloc[index_val][col_val] = select_object(simulation_table, index_val, col_val, SampleA_to, SampleB_to, SampleC_to)

    return simulation_table

table = make_simulation_table(simulation_num, using_num,SampleA_first, SampleB_first, SampleC_first)
table

## 作った表を解析しよう

In [None]:
# 最終的な個数シェアを算出
table_A = (table == "SampleA")
print(table_A.values.sum())

table_B = (table == "SampleB")
print(table_B.values.sum())

table_C = (table == "SampleC")
print(table_C.values.sum())

total_share = simulation_num * using_num #セルの数

print("SampleA_share", table_A.values.sum() / total_share)
print("SampleB_share", table_B.values.sum() / total_share)
print("SampleC_share", table_C.values.sum() / total_share)

In [None]:
# 個数シェア推移

import matplotlib.pyplot as plt
%matplotlib inline

SampleA_share_list = []
SampleB_share_list = []
SampleC_share_list = []

for val in range(0, using_num):
    SampleA_share_list.append(table.iloc[:,val].value_counts()["SampleA"]/simulation_num)

for val in range(0, using_num):
    SampleB_share_list.append(table.iloc[:,val].value_counts()["SampleB"]/simulation_num)

for val in range(0, using_num):
    SampleC_share_list.append(table.iloc[:,val].value_counts()["SampleC"]/simulation_num)    

plt.plot(SampleA_share_list, label = "SampleA")
plt.plot(SampleB_share_list, label = "SampleB")
plt.plot(SampleC_share_list, label = "SampleC")
plt.legend(bbox_to_anchor=(1.05,1))
plt.xlabel("using_num")
plt.ylabel("number_share")

plt.title("number_share")

In [None]:
# シミュレーションに用いる値を設定する

#シミュレーション人数
simulation_num = 50000

#指定期間の使用数
using_num = 20

#初期シェア分布(個数比)
SampleA_first = 0.2
SampleB_first = 0.3
SampleC_first =  0.5

#あるサンプルからサンプルへの遷移確率のリスト
#SampleA_toは、A→A（リピート）, A→B(Bのトライアル), A→C(Cのトライアル)の確率を順に格納したリスト

SampleA_to = [0.85, 0.05, 0.1]
SampleB_to = [0.05, 0.83, 0.12]
SampleC_to = [0.08, 0.08, 0.84]