In [12]:
from tqdm import tqdm
import numpy as np
import pandas as pd
%matplotlib inline

In [13]:
def generateSimpleData(delta, alpha, scale, season, init_oh, disc_len, weekMax):

    price = np.append(np.ones( weekMax - disc_len), (1 - delta)*np.ones(disc_len))
    
    slsu = np.zeros(weekMax)
    bst_est = np.zeros(weekMax)
    on_hand = np.zeros(weekMax)
    on_hand[0]=init_oh
    rew = np.zeros(weekMax)
    
    for weekNo in range(weekMax):
        if weekNo:
            on_hand[weekNo] = on_hand[weekNo-1] - slsu[weekNo-1]   
            
        curr_mean =scale*weekMax*season[weekNo]*(price[ weekNo]**(-alpha))
        demand = np.random.poisson(curr_mean)
        slsu[weekNo] = min(demand, on_hand[weekNo])
        rew[weekNo] = slsu[weekNo]*price[weekNo]
        bst_est[weekNo] = curr_mean
            # slsu[ShopNo, weekNo] = effictivityScale[ShopNo]*season[weekNo]*(priceMatrix[ShopNo, weekNo]**(-alpha))

    return price,  slsu, on_hand, rew, bst_est

In [14]:
weekMax = 12
init_oh = 800

season = np.array([1]*weekMax)
season = season/sum(season)

### Точная оценка Q и V

In [15]:
state_cols = [ 'oh_after', 'days_left']
next_state_cols = [col + '_next' for col in state_cols]

def gen_rl_df(delta, alpha, scale, season, init_oh, unit_price =1, weekMax = 12, disc_len = None):
    if disc_len !=None:
        dics =disc_len
    else:
        dics = np.random.randint(0, weekMax+1)
    price,  slsu, on_hand, rew, best_est = generateSimpleData(delta, alpha, scale, \
                                                    season, init_oh, disc_len = dics, weekMax = weekMax)

    res_ser = rew*unit_price
    days_left = np.arange(weekMax, 0, -1)-1


    df = pd.concat([pd.Series(price), 
               pd.Series(season),
               pd.Series(on_hand),
               pd.Series(days_left), 
               pd.Series(slsu), 
               pd.Series(res_ser).shift(-1), 
               pd.Series(best_est)], 1)

    
    df.columns = ['price','season', 'on_hand','days_left', 'slsu' , 'rev_ser', 'bst_est']
    df['d_r_sh'] = df.on_hand / df.slsu
    df['oh_after'] = df.on_hand - df.slsu


    df['action'] = (df.price -1. ).fillna(0)

    df_shifted = df[state_cols].shift(-1)
    df_shifted.columns = next_state_cols

    return pd.concat([df[state_cols], df_shifted, df[['price','action', 'rev_ser', 'slsu', 'bst_est']]], 1)
    


def V(oh_left, reg_sales,disc_sales,delta, days_left  ):
    res_list = [1*min(reg_sales*days_left,oh_left)]
    
    for i in range(days_left):
        reg_price_sales = min(i*reg_sales, oh_left)
        cor_proce_sales = min(disc_sales*(days_left - i),oh_left - reg_price_sales)
        res_list.append(reg_price_sales*1. + cor_proce_sales*(1- delta))

    return max(res_list)

def Q(oh_left, reg_sales,disc_sales,delta, days_left, action):
    if not days_left:
        return 0
    
    if action >= 0:
        res_list = [1*min(reg_sales*days_left,oh_left)]
        
        for i in range(1, days_left+1):
            reg_price_sales = min(i*reg_sales, oh_left)
            cor_proce_sales = min(disc_sales*(days_left - i),oh_left - reg_price_sales)
            res_list.append(reg_price_sales*1. + cor_proce_sales*(1- delta))

        return  max(res_list)
    
    return (1- delta)*min(disc_sales*days_left,oh_left)


## Обучение RF на точное значение Q

In [16]:

samples_list = []
for i in (range(1000)):
    tmp_df = gen_rl_df(delta = .35, alpha = 2., scale = 40, season = season, init_oh = init_oh)
    samples_list.append(tmp_df)
    
data_samples = pd.concat(samples_list)



df = data_samples[state_cols + ['rev_ser', 'slsu', 'bst_est']]
df['V'] = df.apply(lambda x:V(x['oh_after'], 40, 94.67, .35, int(x['days_left'])), 1)
df['Q_keep'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), 0), 1)
df['Q_disc'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), -.35), 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [17]:
from sklearn.ensemble import RandomForestRegressor
r = RandomForestRegressor(n_jobs=-1, n_estimators=1000)
_ = r.fit(df[['oh_after', 'days_left']].values, df.Q_keep.values)

In [18]:

samples_list = []
for i in range(100):
    tmp_df = gen_rl_df(delta = .35, alpha = 2., scale = 40, season = season, init_oh = init_oh)
    samples_list.append(tmp_df)
    
data_samples = pd.concat(samples_list)



df = data_samples[state_cols + ['rev_ser', 'slsu', 'bst_est']]
#df['oh_after'] = df.on_hand - df.slsu
df['V'] = df.apply(lambda x:V(x['oh_after'], 40, 94.67, .35, int(x['days_left'])), 1)
df['Q_keep'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), 0), 1)
df['Q_disc'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), -.35), 1)
df['predicted'] = r.predict(df[['oh_after', 'days_left']].values)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [19]:
r_dir = r

# Q-learning training

In [20]:

from sklearn.ensemble import RandomForestRegressor
r = RandomForestRegressor(n_jobs=-1, n_estimators=1000)
#r.fit(df[['oh_after', 'days_left']].values, df.Q_keep.values)

In [21]:
samples_list = []
for i in range(1000):
    tmp_df = gen_rl_df(delta = .35, alpha = 2., scale = 40, season = season, init_oh = init_oh)
    samples_list.append(tmp_df)
    
data_samples = pd.concat(samples_list)



df =data_samples.fillna(0)
df['dumm_0'] = 0.
df['dumm_m35'] = -.35

df['V'] = df.apply(lambda x:V(x['oh_after'], 40, 94.67, .35, int(x['days_left'])), 1)
df['Q_keep'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), 0), 1)
df['Q_disc'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), -.35), 1)

In [22]:
df

Unnamed: 0,oh_after,days_left,oh_after_next,days_left_next,price,action,rev_ser,slsu,bst_est,dumm_0,dumm_m35,V,Q_keep,Q_disc
0,764.0,11,728.0,10.0,1.00,0.00,36.00,36.0,40.000000,0.0,-0.35,566.6000,566.6000,496.6000
1,728.0,10,694.0,9.0,1.00,0.00,34.00,36.0,40.000000,0.0,-0.35,529.2000,529.2000,473.2000
2,694.0,9,651.0,8.0,1.00,0.00,43.00,34.0,40.000000,0.0,-0.35,489.2130,489.2130,451.1000
3,651.0,8,614.0,7.0,1.00,0.00,37.00,43.0,40.000000,0.0,-0.35,449.2130,449.2130,423.1500
4,614.0,7,576.0,6.0,1.00,0.00,38.00,37.0,40.000000,0.0,-0.35,409.2130,409.2130,399.1000
5,576.0,6,536.0,5.0,1.00,0.00,40.00,38.0,40.000000,0.0,-0.35,369.2130,347.6775,369.2130
6,536.0,5,435.0,4.0,1.00,0.00,65.65,40.0,40.000000,0.0,-0.35,307.6775,286.1420,307.6775
7,435.0,4,344.0,3.0,0.65,-0.35,59.15,101.0,94.674556,0.0,-0.35,246.1420,224.6065,246.1420
8,344.0,3,250.0,2.0,0.65,-0.35,61.10,91.0,94.674556,0.0,-0.35,184.6065,163.0710,184.6065
9,250.0,2,165.0,1.0,0.65,-0.35,55.25,94.0,94.674556,0.0,-0.35,123.0710,101.5355,123.0710


In [23]:
for i in tqdm(range(40)):
    if i:
        
        df['pred_0']   = r.predict(df[['days_left_next','oh_after_next', 'dumm_0']])
        df['pred_m35'] = r.predict(df[['days_left_next','oh_after_next', 'dumm_m35']])
        
        df.loc[df.days_left == 0, ['pred_0','pred_m35']] = 0.
        
        df['V_next']   = np.maximum(df['pred_0'], df['pred_m35'])
        df['target']   = df.rev_ser + df['V_next']
    else:
        df['target'] = df.rev_ser
        
    r.fit(df[[ 'days_left','oh_after', 'action']].values, df.target.values)

100%|██████████| 40/40 [02:49<00:00,  4.23s/it]


In [30]:
tmp_df = gen_rl_df(delta = .35, alpha = 2., scale = 40, season = season, init_oh = init_oh, disc_len=0)


tmp_df['V'] = tmp_df.apply(lambda x:V(x['oh_after'], 40, 94.67, .35, int(x['days_left'])), 1)
tmp_df['Q_keep'] = tmp_df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), 0), 1)
tmp_df['Q_disc'] = tmp_df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), -.35), 1)

tmp_df['dumm_0'] = 0.
tmp_df['dumm_m35'] = -.35
tmp_df['Q_keep_pred'] = r.predict(tmp_df[['days_left','oh_after', 'dumm_0']].values)
tmp_df['Q_disc_pred'] = r.predict(tmp_df[['days_left','oh_after', 'dumm_m35']].values)

In [31]:
df['dumm_0'] = 0.
df['dumm_m35'] = -.35
df['Q_keep_pred'] = r.predict(df[['days_left','oh_after', 'dumm_0']].values)
df['Q_disc_pred'] = r.predict(df[['days_left','oh_after', 'dumm_m35']].values)

In [32]:
df[ ['days_left', 'oh_after','action','rev_ser' ,'Q_keep', 'Q_disc', 'Q_keep_pred', 'Q_disc_pred'] ]

Unnamed: 0,days_left,oh_after,action,rev_ser,Q_keep,Q_disc,Q_keep_pred,Q_disc_pred
0,11,762.0,0.00,40.00,565.3000,495.3000,550.071678,550.071678
1,10,722.0,0.00,71.50,525.3000,469.3000,509.120448,508.592304
2,9,612.0,-0.35,53.95,453.8000,397.8000,444.430218,399.525815
3,8,529.0,-0.35,52.00,399.8500,343.8500,345.786657,343.825764
4,7,449.0,-0.35,53.30,344.6065,291.8500,291.841429,291.841429
5,6,367.0,-0.35,67.60,283.0710,238.5500,238.537998,238.537998
6,5,263.0,-0.35,56.55,221.5355,170.9500,170.906424,170.906424
7,4,176.0,-0.35,68.90,160.0000,114.4000,114.428540,114.428540
8,3,70.0,-0.35,45.50,70.0000,45.5000,45.442150,45.442150
9,2,0.0,-0.35,0.00,0.0000,0.0000,0.000000,0.000000


In [35]:
r.predict(np.array([[2, 300, 0.]]))

array([98.3262983])

In [36]:
r.predict(np.array([[2, 300, -.35]]))

array([117.29264048])

In [35]:
tmp_df = gen_rl_df(delta = .35, alpha = 2., scale = 40, season = season, init_oh = init_oh, disc_len=0)


tmp_df['V'] = tmp_df.apply(lambda x:V(x['oh_after'], 40, 94.67, .35, int(x['days_left'])), 1)
tmp_df['Q_keep'] = tmp_df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), 0), 1)
tmp_df['Q_disc'] = tmp_df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), -.35), 1)

tmp_df['dumm_0'] = 0.
tmp_df['dumm_m35'] = -.35
tmp_df['Q_keep_pred'] = r.predict(tmp_df[['days_left','oh_after', 'dumm_0']].values)
tmp_df['Q_disc_pred'] = r.predict(tmp_df[['days_left','oh_after', 'dumm_m35']].values)

In [36]:
tmp_df

Unnamed: 0,oh_after,days_left,oh_after_next,days_left_next,price,action,rev_ser,slsu,bst_est,V,Q_keep,Q_disc,dumm_0,dumm_m35,Q_keep_pred,Q_disc_pred
0,758.0,11,715.0,10.0,1.0,0.0,43.0,42.0,40.0,562.7,562.7,492.7,0.0,-0.35,547.919688,547.919688
1,715.0,10,680.0,9.0,1.0,0.0,35.0,43.0,40.0,520.75,520.75,464.75,0.0,-0.35,507.810231,506.754712
2,680.0,9,633.0,8.0,1.0,0.0,47.0,35.0,40.0,484.0,484.0,442.0,0.0,-0.35,469.608024,465.16339
3,633.0,8,585.0,7.0,1.0,0.0,48.0,47.0,40.0,439.45,439.45,411.45,0.0,-0.35,428.982421,424.71701
4,585.0,7,542.0,6.0,1.0,0.0,43.0,48.0,40.0,394.25,394.25,380.25,0.0,-0.35,397.247776,387.157614
5,542.0,6,504.0,5.0,1.0,0.0,38.0,43.0,40.0,352.3,347.6775,352.3,0.0,-0.35,353.168528,353.155873
6,504.0,5,464.0,4.0,1.0,0.0,40.0,38.0,40.0,307.6775,286.142,307.6775,0.0,-0.35,301.065868,320.406612
7,464.0,4,431.0,3.0,1.0,0.0,33.0,40.0,40.0,246.142,224.6065,246.142,0.0,-0.35,243.138742,254.323137
8,431.0,3,384.0,2.0,1.0,0.0,47.0,33.0,40.0,184.6065,163.071,184.6065,0.0,-0.35,175.997514,194.269129
9,384.0,2,352.0,1.0,1.0,0.0,32.0,47.0,40.0,123.071,101.5355,123.071,0.0,-0.35,121.335186,122.38476
