In [1]:
from tqdm import tqdm
import numpy as np
import pandas as pd
%matplotlib inline

In [2]:
def generateSimpleData(delta, alpha, scale, season, init_oh, disc_len, weekMax):

    price = np.append(np.ones( weekMax - disc_len), (1 - delta)*np.ones(disc_len))
    
    slsu = np.zeros(weekMax)
    bst_est = np.zeros(weekMax)
    on_hand = np.zeros(weekMax)
    on_hand[0]=init_oh
    rew = np.zeros(weekMax)
    
    for weekNo in range(weekMax):
        if weekNo:
            on_hand[weekNo] = on_hand[weekNo-1] - slsu[weekNo-1]   
            
        curr_mean =scale*weekMax*season[weekNo]*(price[ weekNo]**(-alpha))
        demand = np.random.poisson(curr_mean)
        slsu[weekNo] = min(demand, on_hand[weekNo])
        rew[weekNo] = slsu[weekNo]*price[weekNo]
        bst_est[weekNo] = curr_mean
            # slsu[ShopNo, weekNo] = effictivityScale[ShopNo]*season[weekNo]*(priceMatrix[ShopNo, weekNo]**(-alpha))

    return price,  slsu, on_hand, rew, bst_est

In [3]:
weekMax = 12
init_oh = 800

season = np.array([1]*weekMax)
season = season/sum(season)

### Точная оценка Q и V

In [5]:
state_cols = [ 'oh_after', 'days_left']
next_state_cols = [col + '_next' for col in state_cols]

def gen_rl_df(delta, alpha, scale, season, init_oh, unit_price =1, weekMax = 12, disc_len = None):
    if disc_len !=None:
        dics =disc_len
    else:
        dics = np.random.randint(0, weekMax+1)
    price,  slsu, on_hand, rew, best_est = generateSimpleData(delta, alpha, scale, \
                                                    season, init_oh, disc_len = dics, weekMax = weekMax)

    res_ser = rew*unit_price
    days_left = np.arange(weekMax, 0, -1)-1


    df = pd.concat([pd.Series(price), 
               pd.Series(season),
               pd.Series(on_hand),
               pd.Series(days_left), 
               pd.Series(slsu), 
               pd.Series(res_ser).shift(-1), 
               pd.Series(best_est)], 1)

    
    df.columns = ['price','season', 'on_hand','days_left', 'slsu' , 'rev_ser', 'bst_est']
    df['d_r_sh'] = df.on_hand / df.slsu
    df['oh_after'] = df.on_hand - df.slsu


    df['action'] = (df.price -1. ).fillna(0)

    df_shifted = df[state_cols].shift(-1)
    df_shifted.columns = next_state_cols

    return pd.concat([df[state_cols], df_shifted, df[['price','action', 'rev_ser', 'slsu', 'bst_est']]], 1)
    


def V(oh_left, reg_sales,disc_sales,delta, days_left  ):
    res_list = [1*min(reg_sales*days_left,oh_left)]
    
    for i in range(days_left):
        reg_price_sales = min(i*reg_sales, oh_left)
        cor_proce_sales = min(disc_sales*(days_left - i),oh_left - reg_price_sales)
        res_list.append(reg_price_sales*1. + cor_proce_sales*(1- delta))

    return max(res_list)

def Q(oh_left, reg_sales,disc_sales,delta, days_left, action):
    if not days_left:
        return 0
    
    if action >= 0:
        res_list = [1*min(reg_sales*days_left,oh_left)]
        
        for i in range(1, days_left+1):
            reg_price_sales = min(i*reg_sales, oh_left)
            cor_proce_sales = min(disc_sales*(days_left - i),oh_left - reg_price_sales)
            res_list.append(reg_price_sales*1. + cor_proce_sales*(1- delta))

        return  max(res_list)
    
    return (1- delta)*min(disc_sales*days_left,oh_left)


## Обучение RF на точное значение Q

In [6]:

samples_list = []
for i in (range(1000)):
    tmp_df = gen_rl_df(delta = .35, alpha = 2., scale = 40, season = season, init_oh = init_oh)
    samples_list.append(tmp_df)
    
data_samples = pd.concat(samples_list)



df = data_samples[state_cols + ['rev_ser', 'slsu', 'bst_est']]
df['V'] = df.apply(lambda x:V(x['oh_after'], 40, 94.67, .35, int(x['days_left'])), 1)
df['Q_keep'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), 0), 1)
df['Q_disc'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), -.35), 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [9]:
from sklearn.ensemble import RandomForestRegressor
r = RandomForestRegressor(n_jobs=-1, n_estimators=1000)
_ = r.fit(df[['oh_after', 'days_left']].values, df.Q_keep.values)

In [11]:

samples_list = []
for i in range(100):
    tmp_df = gen_rl_df(delta = .35, alpha = 2., scale = 40, season = season, init_oh = init_oh)
    samples_list.append(tmp_df)
    
data_samples = pd.concat(samples_list)



df = data_samples[state_cols + ['rev_ser', 'slsu', 'bst_est']]
#df['oh_after'] = df.on_hand - df.slsu
df['V'] = df.apply(lambda x:V(x['oh_after'], 40, 94.67, .35, int(x['days_left'])), 1)
df['Q_keep'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), 0), 1)
df['Q_disc'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), -.35), 1)
df['predicted'] = r.predict(df[['oh_after', 'days_left']].values)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [12]:
r_dir = r

# Q-learning training

In [7]:

from sklearn.ensemble import RandomForestRegressor
r = RandomForestRegressor(n_jobs=-1, n_estimators=1000)
#r.fit(df[['oh_after', 'days_left']].values, df.Q_keep.values)

In [10]:
samples_list = []
for i in range(1000):
    tmp_df = gen_rl_df(delta = .35, alpha = 2., scale = 40, season = season, init_oh = init_oh)
    samples_list.append(tmp_df)
    
data_samples = pd.concat(samples_list)



df =data_samples.fillna(0)
df['dumm_0'] = 0.
df['dumm_m35'] = -.35

df['V'] = df.apply(lambda x:V(x['oh_after'], 40, 94.67, .35, int(x['days_left'])), 1)
df['Q_keep'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), 0), 1)
df['Q_disc'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), -.35), 1)

In [11]:
df

Unnamed: 0,oh_after,days_left,oh_after_next,days_left_next,price,action,rev_ser,slsu,bst_est,dumm_0,dumm_m35,V,Q_keep,Q_disc
0,757.0,11,724.0,10.0,1.00,0.00,33.00,43.0,40.000000,0.0,-0.35,562.0500,562.0500,492.0500
1,724.0,10,676.0,9.0,1.00,0.00,48.00,33.0,40.000000,0.0,-0.35,526.6000,526.6000,470.6000
2,676.0,9,632.0,8.0,1.00,0.00,44.00,48.0,40.000000,0.0,-0.35,481.4000,481.4000,439.4000
3,632.0,8,600.0,7.0,1.00,0.00,32.00,44.0,40.000000,0.0,-0.35,438.8000,438.8000,410.8000
4,600.0,7,566.0,6.0,1.00,0.00,34.00,32.0,40.000000,0.0,-0.35,404.0000,404.0000,390.0000
5,566.0,6,529.0,5.0,1.00,0.00,37.00,34.0,40.000000,0.0,-0.35,367.9000,347.6775,367.9000
6,529.0,5,483.0,4.0,1.00,0.00,46.00,37.0,40.000000,0.0,-0.35,307.6775,286.1420,307.6775
7,483.0,4,396.0,3.0,1.00,0.00,56.55,46.0,40.000000,0.0,-0.35,246.1420,224.6065,246.1420
8,396.0,3,313.0,2.0,0.65,-0.35,53.95,87.0,94.674556,0.0,-0.35,184.6065,163.0710,184.6065
9,313.0,2,223.0,1.0,0.65,-0.35,58.50,83.0,94.674556,0.0,-0.35,123.0710,101.5355,123.0710


In [15]:
for i in tqdm(range(40)):
    if i:
        
        df['pred_0']   = r.predict(df[['days_left_next','oh_after_next', 'dumm_0']])
        df['pred_m35'] = r.predict(df[['days_left_next','oh_after_next', 'dumm_m35']])
        
        df.loc[df.days_left == 0, ['pred_0','pred_m35']] = 0.
        
        df['V_next']   = np.maximum(df['pred_0'], df['pred_m35'])
        df['target']   = df.rev_ser + df['V_next']
    else:
        df['target'] = df.rev_ser
        
    r.fit(df[[ 'days_left','oh_after', 'action']].values, df.target.values)

100%|██████████| 120/120 [07:58<00:00,  3.99s/it]


In [50]:
df

Unnamed: 0,oh_after,days_left,oh_after_next,days_left_next,price,action,rev_ser,slsu,bst_est,V,Q_keep,Q_disc,dumm_0,dumm_m35,Q_keep_pred,Q_disc_pred
0,707.0,11,625.0,10.0,0.65,-0.35,53.30,93.0,94.674556,543.5500,543.5500,459.5500,0.0,-0.35,504.465729,497.022835
1,625.0,10,537.0,9.0,0.65,-0.35,57.20,82.0,94.674556,486.1420,486.1420,406.2500,0.0,-0.35,445.325183,410.455541
2,537.0,9,434.0,8.0,0.65,-0.35,66.95,88.0,94.674556,424.6065,424.6065,349.0500,0.0,-0.35,353.439830,348.982773
3,434.0,8,354.0,7.0,0.65,-0.35,52.00,103.0,94.674556,363.0710,363.0710,282.1000,0.0,-0.35,282.055650,282.055650
4,354.0,7,260.0,6.0,0.65,-0.35,61.10,80.0,94.674556,301.5355,301.5355,230.1000,0.0,-0.35,229.937079,229.937079
5,260.0,6,153.0,5.0,0.65,-0.35,69.55,94.0,94.674556,240.0000,240.0000,169.0000,0.0,-0.35,168.958991,168.958991
6,153.0,5,73.0,4.0,0.65,-0.35,52.00,107.0,94.674556,153.0000,153.0000,99.4500,0.0,-0.35,99.368250,99.368250
7,73.0,4,0.0,3.0,0.65,-0.35,47.45,80.0,94.674556,73.0000,73.0000,47.4500,0.0,-0.35,47.228350,47.228350
8,0.0,3,0.0,2.0,0.65,-0.35,0.00,73.0,94.674556,0.0000,0.0000,0.0000,0.0,-0.35,0.000000,0.000000
9,0.0,2,0.0,1.0,0.65,-0.35,0.00,0.0,94.674556,0.0000,0.0000,0.0000,0.0,-0.35,0.000000,0.000000


In [None]:
df.price == .65 & 

In [29]:
samples_list = []
for i in range(10):
    tmp_df = gen_rl_df(delta = .35, alpha = 2., scale = 40, season = season, init_oh = init_oh)
    samples_list.append(tmp_df)
    
data_samples = pd.concat(samples_list)



df = data_samples.fillna(0)
df['V'] = df.apply(lambda x:V(x['oh_after'], 40, 94.67, .35, int(x['days_left'])), 1)
df['Q_keep'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), 0), 1)
df['Q_disc'] = df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), -.35), 1)


In [30]:
df['dumm_0'] = 0.
df['dumm_m35'] = -.35
df['Q_keep_pred'] = r.predict(df[['days_left','oh_after', 'dumm_0']].values)
df['Q_disc_pred'] = r.predict(df[['days_left','oh_after', 'dumm_m35']].values)

In [31]:
df[ ['days_left', 'oh_after','action','rev_ser' ,'Q_keep', 'Q_disc', 'Q_keep_pred', 'Q_disc_pred'] ]

Unnamed: 0,days_left,oh_after,action,rev_ser,Q_keep,Q_disc,Q_keep_pred,Q_disc_pred
0,11,707.0,-0.35,53.30,543.5500,459.5500,504.465729,497.022835
1,10,625.0,-0.35,57.20,486.1420,406.2500,445.325183,410.455541
2,9,537.0,-0.35,66.95,424.6065,349.0500,353.439830,348.982773
3,8,434.0,-0.35,52.00,363.0710,282.1000,282.055650,282.055650
4,7,354.0,-0.35,61.10,301.5355,230.1000,229.937079,229.937079
5,6,260.0,-0.35,69.55,240.0000,169.0000,168.958991,168.958991
6,5,153.0,-0.35,52.00,153.0000,99.4500,99.368250,99.368250
7,4,73.0,-0.35,47.45,73.0000,47.4500,47.228350,47.228350
8,3,0.0,-0.35,0.00,0.0000,0.0000,0.000000,0.000000
9,2,0.0,-0.35,0.00,0.0000,0.0000,0.000000,0.000000


In [35]:
r.predict(np.array([[2, 300, 0.]]))

array([98.3262983])

In [36]:
r.predict(np.array([[2, 300, -.35]]))

array([117.29264048])

In [48]:
tmp_df = gen_rl_df(delta = .35, alpha = 2., scale = 40, season = season, init_oh = init_oh, disc_len=0)


tmp_df['V'] = tmp_df.apply(lambda x:V(x['oh_after'], 40, 94.67, .35, int(x['days_left'])), 1)
tmp_df['Q_keep'] = tmp_df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), 0), 1)
tmp_df['Q_disc'] = tmp_df.apply(lambda x:Q(x['oh_after'], 40, 94.67, .35, int(x['days_left']), -.35), 1)

tmp_df['dumm_0'] = 0.
tmp_df['dumm_m35'] = -.35
tmp_df['Q_keep_pred'] = r.predict(tmp_df[['days_left','oh_after', 'dumm_0']].values)
tmp_df['Q_disc_pred'] = r.predict(tmp_df[['days_left','oh_after', 'dumm_m35']].values)

In [49]:
tmp_df

Unnamed: 0,oh_after,days_left,oh_after_next,days_left_next,price,action,rev_ser,slsu,bst_est,V,Q_keep,Q_disc,dumm_0,dumm_m35,Q_keep_pred,Q_disc_pred
0,752.0,11,720.0,10.0,1.0,0.0,32.0,48.0,40.0,558.8,558.8,488.8,0.0,-0.35,545.803462,545.803462
1,720.0,10,680.0,9.0,1.0,0.0,40.0,32.0,40.0,524.0,524.0,468.0,0.0,-0.35,510.008798,508.353737
2,680.0,9,645.0,8.0,1.0,0.0,35.0,40.0,40.0,484.0,484.0,442.0,0.0,-0.35,471.740311,470.020823
3,645.0,8,610.0,7.0,1.0,0.0,35.0,35.0,40.0,447.25,447.25,419.25,0.0,-0.35,432.097575,428.876599
4,610.0,7,569.0,6.0,1.0,0.0,41.0,35.0,40.0,409.213,409.213,396.5,0.0,-0.35,387.689917,387.858281
5,569.0,6,524.0,5.0,1.0,0.0,45.0,41.0,40.0,369.213,347.6775,369.213,0.0,-0.35,350.082372,350.085837
6,524.0,5,487.0,4.0,1.0,0.0,37.0,45.0,40.0,307.6775,286.142,307.6775,0.0,-0.35,285.122137,315.317934
7,487.0,4,434.0,3.0,1.0,0.0,53.0,37.0,40.0,246.142,224.6065,246.142,0.0,-0.35,236.897709,240.308331
8,434.0,3,391.0,2.0,1.0,0.0,43.0,53.0,40.0,184.6065,163.071,184.6065,0.0,-0.35,169.814073,193.832484
9,391.0,2,362.0,1.0,1.0,0.0,29.0,43.0,40.0,123.071,101.5355,123.071,0.0,-0.35,122.217498,115.559623
