In [1]:
import pandas as pd
import numpy as np
import scipy
from scipy.stats import lognorm, norm, poisson
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
import category_encoders as ce
from statsmodels.tsa.seasonal import seasonal_decompose
from copy import copy, deepcopy
import plotly
from plotly import tools
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from copy import copy, deepcopy
import dask
import gplearn as gpl        
from gplearn.genetic import SymbolicRegressor
import zipfile

In [14]:
def plot_gp(mu, lb, ub, test_x, test_y, train_x=None, train_y=None, name='', samples={},
            layout='v', xaxis_title='Time', yaxis_title='Sales', fig_size=[1000,500], w=3, f=10):
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Samples"))
    samples = {'sample '+str(i): s for i, s in enumerate(samples)} if not isinstance(samples, dict) else samples
    if train_x is not None:
        fig.add_trace(go.Scatter(x=train_x, y=train_y, mode='lines', name='History', line=dict(width=w), line_color='#1a76ff'))  # plot training data

    fig.add_trace(
        go.Scatter(x=test_x, y=ub, fill=None, mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))
    fig.add_trace(
        go.Scatter(x=test_x, y=lb, fill='tonexty', mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))

    fig.add_trace(go.Scatter(x=test_x, y=mu, line=dict(color='#c71313', width=w), mode='lines', name='Skyolia Forecast'))  # plot the mean
    fig.add_trace(go.Scatter(x=test_x, y=test_y, line=dict(color='#1a76ff', width=w), mode='lines', name='Observed'))
    for k, v in samples.items():
        fig.add_trace(go.Scatter(x=test_x, y=v, name=k, mode='lines', 
                                 line=dict(width=w)))  # plot samples
    fig.update_layout(title_text=name, paper_bgcolor='#343434', plot_bgcolor='#343434', xaxis_title=xaxis_title, yaxis_title=yaxis_title,
                          font=dict(family="Montserrat", color="#fff", size=f), title_x=0.5, hovermode="x")
    fig.update_xaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    fig.update_yaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    return fig

def confidence_interval(mu, cov):
    std = np.sqrt(np.diag(cov)) #compute std
    uncertainty = 1.96 * std
    return mu, std, mu-uncertainty, mu+uncertainty

def order_quantity(mu, std, cu, co):
    cf = cu/(cu+co)
    return scipy.stats.norm.ppf(cf, loc=mu, scale=std)

def plot_cov(covs, cols, subplot_titles, labels=None):
    fig = make_subplots(rows=int(len(covs)/cols) + 1, cols=cols, subplot_titles=subplot_titles)
    height = (1000/cols)*2
    for i, cov in enumerate(covs):
        row, col = int(i / cols)+1, (i%cols)+1
        fig.add_trace(go.Heatmap(z=cov, x=labels, y=labels, colorscale='Greys'), row=row, col=col)
    fig.update_layout(title_text='Cov matrix', height=height)#, yaxis1=dict(domain=[0, 1]), yaxis1=dict(domain=[0, 1])
    return fig

def plot_ts_decomposition(df, index, obs, model="additive", features=False, period=None, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = seasonal_decompose(decompose, model=model, period=period)
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if features:
        features = [col for col in list(df.columns) if col not in [index, obs]]
        for col in features:
            fig.add_trace(go.Scatter(x=decompose.index, y=df[col].values, name=col, mode='lines'))
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

def plot_stl_decomposition(df, index, obs, model="additive", period=None, seasonal=7, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = STL(decompose, period=period, seasonal=seasonal).fit()
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

In [26]:
df = pd.read_csv('/home/skyolia/JupyterProjects/data/time_series/nestle.csv', sep=';')
df.rename(columns={"PERIOD_TAG": "Date", 'numeric_distribution_selling_promotion': 'promo',
                  'numeric_distribution_selling_promotion_hyperparmarkets': 'hyp_promo'}, inplace=True)
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
df = df.loc[(df['product_group'].isin(['Product_04', 'Product_11', 'Product_17', 'Product_32']))]
df = df.drop(columns=[col for col in df.columns if len(df[col].unique()) == 1])
df

Unnamed: 0,Date,product_group,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2
2,2016-08-07,Product_04,3976,3976,0,0.0,0.000000,0.0,0.0
8,2016-08-07,Product_11,363,363,0,0.0,0.000000,0.0,0.0
11,2016-08-07,Product_17,485,485,0,0.0,0.000000,0.0,0.0
20,2016-08-07,Product_32,224,224,0,0.0,0.000000,0.0,0.0
30,2016-08-14,Product_04,8845,8845,3752,71.9,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...
6820,2019-12-22,Product_32,1982,1982,1540,0.3,0.000000,0.0,0.0
6834,2019-12-29,Product_04,4510,4510,5162,0.3,0.000000,0.0,0.0
6841,2019-12-29,Product_11,362,362,361,4.1,62.427746,1.0,1.0
6847,2019-12-29,Product_17,385,385,672,3.2,0.000000,0.0,0.0


In [27]:
'''df['dayofweek'] = df['Date'].dt.dayofweek 
df['sin_dayofweek'] = np.sin(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
df['cos_dayofweek'] = np.cos(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
df.drop(columns=['dayofweek'], inplace=True)'''

df['dayofyear'] = df['Date'].dt.dayofyear
df['sin_dayofyear'] = np.sin(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
df['cos_dayofyear'] = np.cos(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
df.drop(columns=['dayofyear'], inplace=True)
df

Unnamed: 0,Date,product_group,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2,sin_dayofyear,cos_dayofyear
2,2016-08-07,Product_04,3976,3976,0,0.0,0.000000,0.0,0.0,-0.601624,-0.798779
8,2016-08-07,Product_11,363,363,0,0.0,0.000000,0.0,0.0,-0.601624,-0.798779
11,2016-08-07,Product_17,485,485,0,0.0,0.000000,0.0,0.0,-0.601624,-0.798779
20,2016-08-07,Product_32,224,224,0,0.0,0.000000,0.0,0.0,-0.601624,-0.798779
30,2016-08-14,Product_04,8845,8845,3752,71.9,0.000000,0.0,0.0,-0.693281,-0.720667
...,...,...,...,...,...,...,...,...,...,...,...
6820,2019-12-22,Product_32,1982,1982,1540,0.3,0.000000,0.0,0.0,-0.154309,0.988023
6834,2019-12-29,Product_04,4510,4510,5162,0.3,0.000000,0.0,0.0,-0.034422,0.999407
6841,2019-12-29,Product_11,362,362,361,4.1,62.427746,1.0,1.0,-0.034422,0.999407
6847,2019-12-29,Product_17,385,385,672,3.2,0.000000,0.0,0.0,-0.034422,0.999407


In [28]:
output_col = ['SellOut']
time_col = 'Date'
to_remove = ['dispatches_SellIn', 'orders_SellIn']
categorical = ['product_group']
binary = ['type_promo_1', 'type_promo_2']
numerical = [col for col in df.columns if col not in categorical + binary + to_remove + output_col + [time_col]]
df[numerical] = df[numerical].apply(pd.to_numeric,1)

In [17]:
fig = go.Figure(data=go.Heatmap(z=df[numerical+output_col].corr(),x=numerical+output_col,y=numerical+output_col))
fig.show()

In [29]:
train = df[df['Date'] < '2019-07-01']
test = df[df['Date']>='2019-07-01']
features = categorical + numerical + binary

X_train, X_test = train[features], test[features]
Y_train, Y_test = train[output_col] + 1e-15, test[output_col] + 1e-15
T_train, T_test = train[time_col], test[time_col]

y_scaler = MinMaxScaler(feature_range=(0, 1))
Y_train, Y_test = y_scaler.fit_transform(Y_train).ravel() + 1e-15, Y_test.values.ravel() + 1e-15

MS = MinMaxScaler(feature_range=(-1, 1))
scaled_train = MS.fit_transform(X_train[numerical])
scaled_test = MS.transform(X_test[numerical])
X_train[numerical], X_test[numerical] = scaled_train, scaled_test

encoder = ce.CatBoostEncoder(verbose=1, cols=categorical)
encoder.fit(X_train[categorical], Y_train)
X_train[categorical] = encoder.transform(X_train[categorical])
X_test[categorical] = encoder.transform(X_test[categorical])

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



((608, 7), (104, 7), (608,), (104,))

In [30]:
Y_train

array([1.00000000e-15, 1.00000000e-15, 1.00000000e-15, 1.00000000e-15,
       2.88460060e-01, 2.31413854e-02, 3.02144999e-02, 3.57499808e-02,
       2.75082648e-01, 2.22956869e-02, 2.65241793e-02, 4.91273929e-02,
       2.94149304e-01, 2.32182671e-02, 2.88306297e-02, 3.55193357e-02,
       7.35834551e-01, 2.53709541e-02, 4.58214807e-02, 3.45967556e-02,
       2.80925655e-01, 2.39102022e-02, 3.17521335e-02, 3.69032060e-02,
       2.62474052e-01, 2.14499885e-02, 2.96763281e-02, 3.58268625e-02,
       2.77004690e-01, 2.30645037e-02, 2.95994465e-02, 3.72107327e-02,
       2.83847159e-01, 2.45252556e-02, 2.99069732e-02, 9.29499500e-02,
       3.01914354e-01, 2.10655801e-02, 2.99838548e-02, 3.65187976e-02,
       3.29514877e-01, 2.29876220e-02, 3.14446068e-02, 3.78257861e-02,
       6.70869532e-01, 2.16806335e-02, 3.25978319e-02, 3.74413777e-02,
       3.32359499e-01, 2.58322442e-02, 3.47505190e-02, 1.02867687e-01,
       2.95225648e-01, 2.95225648e-02, 2.82924579e-02, 3.49811640e-02,
      

In [9]:
sub = train.loc[train['product_group'] == 'Product_04']
fig, trend, seasonal, residual = plot_ts_decomposition(sub, time_col, output_col[0], features=True)
fig.show()
print(np.mean(trend), np.var(trend), np.std(trend))
print(np.mean(seasonal), np.var(seasonal), np.std(seasonal))
print(np.mean(residual), np.var(residual), np.std(residual))

5354.75125 86176.00810771994 293.55750391996446
11.052422337278086 2104765.6168138 1450.7810368259572
-2.390162721893307 1273144.0113489272 1128.3368341718385


In [32]:
def power_f(x, d):
    d = np.round(np.clip(d, 1, 2)).astype(int)
    return np.power(x, d)

def _tanh(x, a, b):
    return np.tanh(a*x+b)

def _mle_poisson(true, pred, w=None):
    pred = np.where(pred>0, pred, np.exp(pred) - 1) + 1 + 1e-15
    ll = -np.mean(poisson.logpmf(true, pred))
    ll = np.nan_to_num(ll, nan=1e5)
    return ll

power = gpl.functions.make_function(power_f, 'pow', 2, wrap=True)
tanh = gpl.functions.make_function(_tanh, 'tanh', 3, wrap=True)
function_set = ['add', 'sub', 'mul', 'div', 'sqrt', power, 'log', 'min', 'max', 'sin', 'cos', 'abs', tanh]
mle_poisson = gpl.fitness.make_fitness(_mle_poisson, greater_is_better=False)

model = SymbolicRegressor(population_size = 2000, tournament_size=500, const_range=(-5,5),
                          generations = 500, stopping_criteria=-1000,
                          function_set = function_set, metric='mean absolute error',
                          p_crossover=0.65, p_subtree_mutation=0.15,
                          p_hoist_mutation=0.05, p_point_mutation=0.1, low_memory=True,
                          verbose = 1, random_state = None, n_jobs = -1)

model.fit(X_train, Y_train)
print(model._program)

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    17.92            4.319       27        0.0344264              N/A      8.60m
   1     2.57          0.22953        3        0.0339857              N/A     11.66m
   2     1.62         0.263879        2        0.0344264              N/A      9.40m
   3     1.56         0.146191        2        0.0344264              N/A     11.15m
   4     1.68         0.188238        2        0.0344264              N/A     11.06m
   5     1.56         0.232818        3        0.0344264              N/A     11.67m
   6     1.63         0.176783        3        0.0341134              N/A     11.65m
   7     1.60         0.220957        1        0.0352046              N/A     11.81m
   8     1.77         0.273971        2        0.0344264              N/A  

  94     7.00         0.103645        7        0.0252935              N/A     38.32m
  95     7.17         0.119509        7        0.0252935              N/A     40.57m
  96     7.05         0.116996        7        0.0252935              N/A     40.20m
  97     7.03         0.123553        7        0.0252935              N/A     39.76m
  98     7.06         0.103491        7        0.0252935              N/A     38.57m
  99     6.96         0.124951        7        0.0252935              N/A     39.55m
 100     7.03         0.113107        7        0.0252935              N/A     39.73m
 101     7.09          0.10438        7        0.0252935              N/A     38.96m
 102     7.09         0.140985       11        0.0252854              N/A     38.80m
 103     6.94         0.498916        7        0.0252935              N/A     38.06m
 104     7.01         0.113393        7        0.0252935              N/A     38.69m
 105     6.99          0.10344        7        0.0252935         

 191     6.89         0.131013        9        0.0252781              N/A     29.70m
 192     7.13         0.104392        7        0.0252935              N/A     29.51m
 193     6.91          0.11383        9        0.0252479              N/A     30.43m
 194     6.92         0.111529        7        0.0252935              N/A     29.37m
 195     6.82         0.136357        7        0.0252935              N/A     29.48m
 196     7.08         0.112297       13        0.0252854              N/A     28.11m
 197     7.04          0.13928        7        0.0252935              N/A     28.41m
 198     6.79         0.102457        7        0.0252935              N/A     28.49m
 199     6.96         0.125609        7        0.0252935              N/A     28.09m
 200     7.02         0.228829        9        0.0252479              N/A     28.26m
 201     6.95         0.104797       14        0.0250263              N/A     27.57m
 202     7.06         0.132776        8        0.0250263         

 288     7.17         0.152245       11        0.0250263              N/A     19.95m
 289     7.04         0.122152        8        0.0250263              N/A     19.51m
 290     7.12         0.159249        7        0.0252935              N/A     20.10m
 291     6.99         0.563148        7        0.0252935              N/A     19.86m
 292     7.05         0.110261        7        0.0252935              N/A     19.46m
 293     6.94         0.111635        7        0.0252935              N/A     19.65m
 294     6.93         0.118582        7        0.0252935              N/A     19.26m
 295     7.04         0.173267        8        0.0250263              N/A     19.23m
 296     6.96         0.117643        7        0.0252935              N/A     19.72m
 297     7.05         0.108892        7        0.0252935              N/A     19.56m
 298     7.05         0.119578        7        0.0252935              N/A     19.12m
 299     7.04         0.162799        7        0.0252935         

 385     6.96         0.108385        7        0.0252935              N/A     10.94m
 386     6.96        0.0977806        7        0.0252935              N/A     10.74m
 387     7.06          0.11619        7        0.0252935              N/A     10.59m
 388     7.10         0.109956        7        0.0252935              N/A     10.65m
 389     7.13         0.134976        9        0.0250233              N/A     10.54m
 390     7.00         0.147827        8        0.0250263              N/A     10.41m
 391     6.93         0.128797        7        0.0252935              N/A     10.19m
 392     7.07         0.163288        7        0.0252935              N/A     10.12m
 393     6.93         0.125331        7        0.0252935              N/A     10.13m
 394     6.98         0.106713        7        0.0252935              N/A     10.13m
 395     7.10         0.104295        7        0.0252935              N/A      9.82m
 396     6.88         0.126192        8        0.0250263         

 482     7.04        0.0976178        7        0.0252935              N/A      1.61m
 483     7.08        0.0997972        7        0.0252935              N/A      1.55m
 484     7.07         0.104669        9        0.0251927              N/A      1.46m
 485     7.09         0.160131        7        0.0252935              N/A      1.33m
 486     6.98         0.108862        8        0.0250263              N/A      1.25m
 487     6.77         0.114832        7        0.0252935              N/A      1.15m
 488     7.14         0.106508        7        0.0252935              N/A      1.03m
 489     7.15         0.121951        7        0.0252935              N/A     57.00s
 490     7.04         0.109199        7        0.0252935              N/A     50.53s
 491     6.99         0.123277        7        0.0252935              N/A     46.39s
 492     7.13         0.102374        7        0.0252935              N/A     40.82s
 493     7.12         0.112067        7        0.0252935         

In [75]:
mus = []
for i in kernel_d:
    k = kernel_d[i]
    gpr = GPR(kernel=k, optimizer=None, alpha=1e-5).fit(X_train, Y_train)
    mus.append(gpr.predict(X_test, return_std=False))
mus = y_scaler.inverse_transform(np.stack(mus))
mus = dict(zip(kernel_d.keys(), mus))
kernel_d

{'promo': ArcCosine(variance=47.1, weight_variances=0.958, bias_variance=0.958) + WhiteKernel(noise_level=0.000981),
 'hyp_promo': ArcCosine(variance=148, weight_variances=1.14e-05, bias_variance=1.14e-05) + WhiteKernel(noise_level=1e-05),
 'product_group': ArcCosine(variance=339, weight_variances=0.014, bias_variance=0.014) + WhiteKernel(noise_level=6.54e-05),
 'binary': RBF(length_scale=255) * 11.1**2,
 'year': ExpSineSquared(length_scale=84.6, periodicity=364) * 27.7**2,
 'trend': ArcCosine(variance=46.3, weight_variances=1.38, bias_variance=1.38)}

In [76]:
gpr = GPR(kernel=kernel, optimizer=None).fit(X_train, Y_train)
mu_test, std_test = gpr.predict(X_test, return_std=True)
lb, ub = norm.ppf(0.025, mu_test, std_test), norm.ppf(0.975, mu_test, std_test)
oq = order_quantity(mu_test, std_test, 100, 400)

pred = y_scaler.inverse_transform(np.stack((mu_test, lb, ub)).T)
Y_train = y_scaler.inverse_transform(Y_train[:,np.newaxis]).ravel()
oq = y_scaler.inverse_transform(oq[:,np.newaxis]).ravel()

pred = pd.DataFrame({"mu": pred[:,0], "lb": pred[:,1], "ub": pred[:,2]})
mus = pd.DataFrame(mus)
pred = pd.concat((test.reset_index(), pred, mus), 1)

#fig = plot_gp(pred[:,0], pred[:,1], pred[:,2], T_test, Y_test, T_train, Y_train, samples=mus, layout='h')
#fig.show()
pred


divide by zero encountered in log


In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only



Unnamed: 0,index,Date,product_group,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2,delta_t,mu,lb,ub,promo.1,hyp_promo.1,product_group.1,binary,year,trend
0,5784,2019-07-07,Product_04,9955,9955,5923,0.5,0.000000,0.0,0.0,1064.0,5274.547906,-11511.555657,22060.651469,1382.201854,1629.670759,5386.770306,5385.538251,1919.221443,1992.146962
1,5791,2019-07-07,Product_11,0,0,286,0.1,0.000000,0.0,0.0,1064.0,600.354341,-16144.785841,17345.494523,1161.148207,1629.670759,270.441944,270.978267,1919.221443,1992.146962
2,5797,2019-07-07,Product_17,659,763,825,0.0,0.000000,0.0,0.0,1064.0,878.077580,-15864.539813,17620.694973,1102.869814,1629.670759,577.157278,577.589179,1919.221443,1992.146962
3,5812,2019-07-07,Product_32,1045,1045,1336,0.9,0.000000,0.0,0.0,1064.0,1330.470867,-15409.237850,18070.179583,1583.973958,1629.670759,1064.829676,1065.098129,1919.221443,1992.146962
4,5826,2019-07-14,Product_04,8940,8940,5912,0.4,0.000000,0.0,0.0,1071.0,5271.092449,-11556.132697,22098.317596,1328.746753,1629.670759,5386.770306,5385.538251,1870.003719,1992.127968
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,6820,2019-12-22,Product_32,1982,1982,1540,0.3,0.000000,0.0,0.0,1232.0,1174.458349,-16274.730418,18623.647115,1274.086386,1629.670759,1064.829676,1065.098129,1867.380179,1991.775411
100,6834,2019-12-29,Product_04,4510,4510,5162,0.3,0.000000,0.0,0.0,1239.0,5122.602839,-12379.761885,22624.967563,1274.086386,1629.670759,5386.770306,5385.538251,1848.271137,1991.760680
101,6841,2019-12-29,Product_11,362,362,361,4.1,62.427746,1.0,1.0,1239.0,776.613422,-16754.973006,18308.199850,2543.392895,2968.382931,270.441944,270.978267,1848.271137,1991.760680
102,6847,2019-12-29,Product_17,385,385,672,3.2,0.000000,0.0,0.0,1239.0,754.198456,-16701.860046,18210.256957,2382.473173,1629.670759,577.157278,577.589179,1848.271137,1991.760680


In [77]:
store = 'Product_17'
sub_train = train.loc[train['product_group'] == store] 
sub_pred = pred.loc[pred['product_group'] == store] 
sub_mus = pred.loc[pred['product_group'] == store, list(kernel_d.keys())].to_dict('series')
err = mean_absolute_error(sub_pred[output_col[0]], sub_pred['mu'])
fig = plot_gp(sub_pred['mu'], sub_pred['lb'], sub_pred['ub'], sub_pred['Date'], sub_pred[output_col[0]], sub_train['Date'], sub_train[output_col[0]], name=err, samples=sub_mus, layout='h')
fig.show()

ValueError: Cannot index with multidimensional key

In [78]:
def mase(test_y, pred, train_y):
    e_t = test_y - pred
    scale = mean_absolute_error(train_y[1:], train_y[:-1])
    return np.mean(np.abs(e_t / scale))

def mape(test_y, pred):
    return np.round(np.mean(np.abs(100*(test_y-pred)/(test_y + 1e-9))), 0)

def rmspe(test_y, pred):
    return (np.sqrt(np.mean(np.square((test_y - pred) / (test_y + 1e-9))))) * 100

errors = {'MAE':[mean_absolute_error(Y_test, pred['mu'])], 
        'RMSE':[mean_squared_error(Y_test, pred['mu'], squared=False)], 
        'RMSPE': [rmspe(Y_test, pred['mu'])],
        'MAPE':[mape(Y_test, pred['mu'])],
        'R2':[r2_score(Y_test, pred['mu'])],
        'MASE':[mase(Y_test, pred['mu'], Y_train)]} 
errors = pd.DataFrame(errors, index =['THIS']) 
errors

Unnamed: 0,MAE,RMSE,RMSPE,MAPE,R2,MASE
THIS,310.672272,526.418838,47.79813,32.0,0.951615,0.120271


In [112]:
-1 * gpr.log_marginal_likelihood_value_

-256.6863537910383

In [79]:
from sklearn.inspection import permutation_importance
from sklearn.metrics import fbeta_score, make_scorer


result = permutation_importance(gpr, X_test, y_scaler.transform(Y_test[:,np.newaxis]).ravel(), n_repeats=10, random_state=42, n_jobs=2, scoring='r2')
fig = go.Figure()
# Use x instead of y argument for horizontal plot
for i in range(X_test.shape[1]):
    fig.add_trace(go.Box(x=result['importances'][i], name=X_test.columns[i]))

fig.show()


X does not have valid feature names, but MinMaxScaler was fitted with feature names

