In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
import seaborn as sns
import statsmodels.api as sm

# Load data

In [2]:
T_e2e_share = pd.read_csv('order_e2e_pre_500.csv')  # load teatment group pre-exp
T_e2e_post = pd.read_csv('order_e2e_post_500.csv')  # load teatment group post-exp
T_other_share = pd.read_csv('order_jd_pre_500.csv')  # load control group pre-exp

In [3]:
T_matched_comp = pd.read_csv('order_jd_post_500.csv')    # load control group post-exp

# Calculate Inventory Management Performance Metric

In [5]:
h=1
b=9

## Treatment Group, Post Exp

In [6]:
# calculate inventory metric
T_e2e_comp = T_e2e_post
e2e_holding_cost =  []
e2e_stockout_cost = []
e2e_total_cost = []
e2e_turnover = []
e2e_stockout_ratio = []
N_e2e = len(T_e2e_comp)
for n in range(N_e2e):
    inv = T_e2e_comp.iloc[n].test_inv
    inv =[float(s) for s in inv[1:len(inv)-1].split(',')]
    holding_cost = 0
    stockout_cost = 0
    stockout_day = 0
    T = len(inv)
    for t in range(T-1):
        cur_inv = inv[t]
        if(cur_inv>= 0):
            holding_cost += h*cur_inv
        else:
            stockout_cost += -b*cur_inv
            stockout_day+=1
    turnover = T_e2e_comp.iloc[n].ave_inv/T_e2e_comp.iloc[n].ave_demand
    stockout_ratio = stockout_day/T
    
    e2e_holding_cost.append(holding_cost)
    e2e_stockout_cost.append(stockout_cost)
    e2e_total_cost.append(holding_cost+stockout_cost)
    e2e_turnover.append(turnover)
    e2e_stockout_ratio.append(stockout_ratio)

In [7]:
print('Algorithm A Average Holding cost: ', np.mean(e2e_holding_cost))
print('Algorithm A Average Stockout cost: ', np.mean(e2e_stockout_cost))
print('Algorithm A Average Total cost: ', np.mean(e2e_total_cost))
e2e_turnover1 = [max(0, item) for item in e2e_turnover]
print('Algorithm A Average Turnover rate: ', np.mean(e2e_turnover1))
print('Algorithm A Average Stockout rate: ', np.mean(e2e_stockout_ratio))

Algorithm A Average Holding cost:  497.99
Algorithm A Average Stockout cost:  386.1
Algorithm A Average Total cost:  884.09
Algorithm A Average Turnover rate:  12.268704620140678
Algorithm A Average Stockout rate:  0.10827010628260629


## Control Group, Post Exp

In [91]:
# calculate inventory metric
N_match = len(T_matched_comp)
jd_holding_cost =  []
jd_stockout_cost = []
jd_total_cost = []
jd_turnover = []
jd_stockout_ratio = []
for n in range(N_match):
    inv = T_matched_comp.iloc[n].test_inv
    inv =[float(s) for s in inv[1:len(inv)-1].split(',')]
    holding_cost = 0
    stockout_cost = 0
    stockout_day = 0
    T =  len(inv)
    for t in range(T-1):
        cur_inv = inv[t]
        if(cur_inv>=0):
            holding_cost += h*cur_inv
        else:
            stockout_cost += -b*cur_inv
#             if(T_matched_comp.iloc[n].test_demand[t]<=0):
#             if(cur_inv<=0):
            stockout_day+=1
    turnover = T_matched_comp.iloc[n].ave_inv/T_matched_comp.iloc[n].ave_demand
    stockout_ratio = stockout_day/T
    
    jd_holding_cost.append(holding_cost)
    jd_stockout_cost.append(stockout_cost)
    jd_total_cost.append(holding_cost+stockout_cost)
    jd_turnover.append(turnover)
    jd_stockout_ratio.append(stockout_ratio)

In [92]:
print('Algorithm B Average Holding cost: ', np.mean(jd_holding_cost))
print('Algorithm B Average Holding Stockout cost: ', np.mean(jd_stockout_cost))
print('Algorithm B Average Total cost: ', np.mean(jd_total_cost))
jd_turnover1 = [max(0, item) for item in jd_turnover]
print('Algorithm B Average Turnover rate: ', np.mean(jd_turnover1))
print('Algorithm B Average Stockout rate: ', np.mean(jd_stockout_ratio))

Algorithm B Average Holding cost:  841.428
Algorithm B Average Holding Stockout cost:  1143.72
Algorithm B Average Total cost:  1985.148
Algorithm B Average Turnover rate:  18.47105619138942
Algorithm B Average Stockout rate:  0.23652916666666668


## T-test

In [93]:
# t-test holding cost:
a = np.array(e2e_holding_cost)
b = np.array(jd_holding_cost)
t, p = stats.ttest_ind(a,b)
print("t = " , str(t))
print("p = " , str(p))

t =  -7.57492193507139
p =  8.176636956450516e-14


In [94]:
# t-test stockout cost:
a = np.array(e2e_stockout_cost)
b = np.array(jd_stockout_cost)
t, p = stats.ttest_ind(a,b)
print("t = " , str(t))
print("p = " , str(p))

t =  -7.765646288944399
p =  2.0079594211986885e-14


In [95]:
# t-test total cost:
a = np.array(e2e_total_cost)
b = np.array(jd_total_cost)
t, p = stats.ttest_ind(a,b)
print("t = " , str(t))
print("p = " , str(p))

t =  -11.914765084142427
p =  1.0703809200101026e-30


In [96]:
# t-test turnover:
a = np.array(e2e_turnover1)
b = np.array(jd_turnover1)
t, p = stats.ttest_ind(a,b)
print("t = " , str(t))
print("p = " , str(p))

t =  -5.477551569311841
p =  5.4587417669170734e-08


In [97]:
# t-test stockout:
a = np.array(e2e_stockout_ratio)
b = np.array(jd_stockout_ratio)
t, p = stats.ttest_ind(a,b)
print("t = " , str(t))
print("p = " , str(p))

t =  -8.427020034813369
p =  1.2259502974682909e-16


## Linear Regression

In [98]:
x_demand = np.concatenate((T_e2e_comp.ave_demand, T_matched_comp.ave_demand), axis = 0)

In [99]:

vlt_e2e = []
for i in range(N_e2e):
    if type(T_e2e_comp.vlt[i]) == int:
        vlt_e2e.append(T_e2e_comp.vlt[i])
    else:
#         vlt_e2e.append(T_e2e_comp.vlt[i].days)
        vlt_e2e.append(int(T_e2e_comp.vlt.values[i].split(' ')[0]))
        

In [100]:
vlt_match = []#T_matched_comp.vlt.dt.days.values
for i in range(N_e2e):
#     if type(T_matched_comp.vlt[i]) == int:
#         vlt_match.append(T_matched_comp.vlt[i])
#     else:
#         vlt_match.append(T_matched_comp.vlt[i].days)
    vlt_match.append(int(T_matched_comp.vlt.values[i].split(' ')[0]))

In [101]:
x_vlt = np.concatenate((vlt_e2e,vlt_match), axis = 0)

x_e2e = np.concatenate((np.ones(N_e2e), np.zeros(N_e2e)), axis = 0)

In [102]:
#holding cost
a = np.array(e2e_holding_cost)
b = np.array(jd_holding_cost)
y = np.concatenate((a,b), axis = 0)
df_lr = pd.DataFrame({'y': y, 'e2e':x_e2e, 'vlt':x_vlt, 'd':x_demand})
X = df_lr.drop('y',1)
model = sm.OLS(y, X)
lr =model.fit()
print(lr.summary())


                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.468
Model:                            OLS   Adj. R-squared (uncentered):              0.467
Method:                 Least Squares   F-statistic:                              292.8
Date:                Tue, 12 Jan 2021   Prob (F-statistic):                   2.89e-136
Time:                        13:18:58   Log-Likelihood:                         -8006.2
No. Observations:                1000   AIC:                                  1.602e+04
Df Residuals:                     997   BIC:                                  1.603e+04
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [103]:
#stockout cost
a = np.array(e2e_stockout_cost)
b = np.array(jd_stockout_cost)
y = np.concatenate((a,b), axis = 0)
df_lr = pd.DataFrame({'y': y, 'e2e':x_e2e, 'vlt':x_vlt, 'd':x_demand})
X = df_lr.drop('y',1)
model = sm.OLS(y, X)
lr =model.fit()
print(lr.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.289
Model:                            OLS   Adj. R-squared (uncentered):              0.287
Method:                 Least Squares   F-statistic:                              135.1
Date:                Tue, 12 Jan 2021   Prob (F-statistic):                    1.88e-73
Time:                        13:18:58   Log-Likelihood:                         -8722.4
No. Observations:                1000   AIC:                                  1.745e+04
Df Residuals:                     997   BIC:                                  1.747e+04
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [104]:
#total cost
a = np.array(e2e_total_cost)
b = np.array(jd_total_cost)
y = np.concatenate((a,b), axis = 0)
df_lr = pd.DataFrame({'y': y, 'e2e':x_e2e, 'vlt':x_vlt, 'd':x_demand})
X = df_lr.drop('y',1)
model = sm.OLS(y, X)
lr =model.fit()
print(lr.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.581
Model:                            OLS   Adj. R-squared (uncentered):              0.580
Method:                 Least Squares   F-statistic:                              461.3
Date:                Tue, 12 Jan 2021   Prob (F-statistic):                   6.88e-188
Time:                        13:18:59   Log-Likelihood:                         -8642.6
No. Observations:                1000   AIC:                                  1.729e+04
Df Residuals:                     997   BIC:                                  1.731e+04
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [105]:
#turnover rate
a = np.array(e2e_turnover1)
b = np.array(jd_turnover1)
y = np.concatenate((a,b), axis = 0)
df_lr = pd.DataFrame({'y': y, 'e2e':x_e2e, 'vlt':x_vlt, 'd':x_demand})
X = df_lr.drop('y',1)
model = sm.OLS(y, X)
lr =model.fit()
print(lr.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.309
Model:                            OLS   Adj. R-squared (uncentered):              0.307
Method:                 Least Squares   F-statistic:                              148.5
Date:                Tue, 12 Jan 2021   Prob (F-statistic):                    1.52e-79
Time:                        13:18:59   Log-Likelihood:                         -4403.3
No. Observations:                1000   AIC:                                      8813.
Df Residuals:                     997   BIC:                                      8827.
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [106]:
#stockout rate
a = np.array(e2e_stockout_ratio)
b = np.array(jd_stockout_ratio)
y = np.concatenate((a,b), axis = 0)
df_lr = pd.DataFrame({'y': y, 'e2e':x_e2e, 'vlt':x_vlt, 'd':x_demand})
X = df_lr.drop('y',1)
model = sm.OLS(y, X)
lr =model.fit()
print(lr.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.380
Model:                            OLS   Adj. R-squared (uncentered):              0.378
Method:                 Least Squares   F-statistic:                              203.5
Date:                Tue, 12 Jan 2021   Prob (F-statistic):                   5.93e-103
Time:                        13:18:59   Log-Likelihood:                          14.906
No. Observations:                1000   AIC:                                     -23.81
Df Residuals:                     997   BIC:                                     -9.089
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

# Difference-in-Difference Estimation

In [107]:
post_exp_e2e = [e2e_holding_cost, e2e_stockout_cost, e2e_total_cost, e2e_turnover1, e2e_stockout_ratio]
post_exp_jd = [jd_holding_cost, jd_stockout_cost, jd_total_cost, jd_turnover, jd_stockout_ratio]

In [108]:
h=1
b=9

## Treatment Group, Pre Exp

In [109]:
# calculate inventory metric
T_e2e_comp = T_e2e_share
e2e_holding_cost =  []
e2e_stockout_cost = []
e2e_total_cost = []
e2e_turnover = []
e2e_stockout_ratio = []
for n in range(len(T_e2e_comp)):
    inv = T_e2e_comp.iloc[n].test_inv
    inv =[float(s) for s in inv[1:len(inv)-1].split(',')]
    holding_cost = 0
    stockout_cost = 0
    stockout_day = 0
#     T = len(T_e2e_comp.iloc[n].test_demand)
    T = len(inv)
    for t in range(T-1):
#         print(t)
#         print(T)
#         print(inv)
        cur_inv = inv[t]
        if(cur_inv>= 0):
            holding_cost += h*cur_inv
        else:
            stockout_cost += -b*cur_inv
#             if(T_e2e_comp.iloc[n].test_demand[t]<=0):
            stockout_day+=1
    turnover = T_e2e_comp.iloc[n].ave_inv/T_e2e_comp.iloc[n].ave_demand
#     turnover = np.maximum(0, T_e2e_comp.iloc[n].ave_inv/T_e2e_comp.iloc[n].ave_demand)
    stockout_ratio = stockout_day/T
    
    e2e_holding_cost.append(holding_cost)
    e2e_stockout_cost.append(stockout_cost)
    e2e_total_cost.append(holding_cost+stockout_cost)
    e2e_turnover.append(turnover)
    e2e_stockout_ratio.append(stockout_ratio)

In [110]:
print('Algorithm A Average Holding cost: ', np.mean(e2e_holding_cost))
print('Algorithm A Average Stockout cost: ', np.mean(e2e_stockout_cost))
print('Algorithm A Average Total cost: ', np.mean(e2e_total_cost))
e2e_turnover1 = [max(0, item) for item in e2e_turnover]
print('Algorithm A Average Turnover rate: ', np.mean(e2e_turnover1))
print('Algorithm A Average Stockout rate: ', np.mean(e2e_stockout_ratio))

Algorithm A Average Holding cost:  666.394
Algorithm A Average Stockout cost:  815.868
Algorithm A Average Total cost:  1482.262
Algorithm A Average Turnover rate:  15.487856815194194
Algorithm A Average Stockout rate:  0.23613333333333333


## Control Group, Pre Exp

In [111]:
# calculate inventory metric
T_matched_comp = T_other_share
N_match = len(T_matched_comp)
jd_holding_cost =  []
jd_stockout_cost = []
jd_total_cost = []
jd_turnover = []
jd_stockout_ratio = []
for n in range(N_match):
    inv = T_matched_comp.iloc[n].test_inv
    inv =[float(s) for s in inv[1:len(inv)-1].split(',')]
    holding_cost = 0
    stockout_cost = 0
    stockout_day = 0
    T =  len(inv)
    for t in range(T-1):
        cur_inv = inv[t]
        if(cur_inv>=0):
            holding_cost += h*cur_inv
        else:
            stockout_cost += -b*cur_inv
#             if(T_matched_comp.iloc[n].test_demand[t]<=0):
#             if(cur_inv<=0):
            stockout_day+=1
    turnover = T_matched_comp.iloc[n].ave_inv/T_matched_comp.iloc[n].ave_demand
    stockout_ratio = stockout_day/T
    
    jd_holding_cost.append(holding_cost)
    jd_stockout_cost.append(stockout_cost)
    jd_total_cost.append(holding_cost+stockout_cost)
    jd_turnover.append(turnover)
    jd_stockout_ratio.append(stockout_ratio)

In [112]:
print('Algorithm B Average Holding cost: ', np.mean(jd_holding_cost))
print('Algorithm B Average Holding Stockout cost: ', np.mean(jd_stockout_cost))
print('Algorithm B Average Total cost: ', np.mean(jd_total_cost))
jd_turnover1 = [max(0, item) for item in jd_turnover]
print('Algorithm B Average Turnover rate: ', np.mean(jd_turnover1))
print('Algorithm B Average Stockout rate: ', np.mean(jd_stockout_ratio))

Algorithm B Average Holding cost:  506.46
Algorithm B Average Holding Stockout cost:  473.13
Algorithm B Average Total cost:  979.59
Algorithm B Average Turnover rate:  15.152613035317902
Algorithm B Average Stockout rate:  0.19752527930902117


In [113]:
#holding
post = np.array(post_exp_e2e[0])- np.array(post_exp_jd[0])
pre = np.array(e2e_holding_cost)- np.array(jd_holding_cost)

In [114]:
a = post
b = pre
t, p = stats.ttest_ind(a,b)
print("t = " , str(t))
print("p = " , str(p))


t =  -8.628115507392884
p =  2.426748791349065e-17


In [115]:
# DID stockout cost
post = np.array(post_exp_e2e[1])- np.array(post_exp_jd[1])
pre = np.array(e2e_stockout_cost)- np.array(jd_stockout_cost)

In [116]:
a = post
b = pre
t, p = stats.ttest_ind(a,b)
print("t = " , str(t))
print("p = " , str(p))



t =  -10.024732956234065
p =  1.3342089234992861e-22


In [117]:
# DID total cost
post = np.array(post_exp_e2e[2])- np.array(post_exp_jd[2])
pre = np.array(e2e_total_cost)- np.array(jd_total_cost)

In [118]:
a = post
b = pre
t, p = stats.ttest_ind(a,b)
print("t = " , str(t))
print("p = " , str(p))



t =  -15.63288508210019
p =  1.9298811915945072e-49


In [119]:
# DID turnover rate
post = np.array(post_exp_e2e[3])- np.array(post_exp_jd[3])
pre = np.array(e2e_turnover1)- np.array(jd_turnover1)

In [120]:
a = post
b = pre
t, p = stats.ttest_ind(a,b)
print("t = " , str(t))
print("p = " , str(p))

t =  -2.9015848716410195
p =  0.0037943829761408732


In [121]:
# DID stockout rate
post = np.array(post_exp_e2e[4])- np.array(post_exp_jd[4])
pre = np.array(e2e_stockout_ratio)- np.array(jd_stockout_ratio)

In [122]:
a = post
b = pre
t, p = stats.ttest_ind(a,b)
print("t = " , str(t))
print("p = " , str(p))

t =  -7.674689405907815
p =  3.9373204680470405e-14
