In [9]:
import pandas as pd
import numpy as np
import scipy
from scipy.stats import lognorm, norm, poisson
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
import category_encoders as ce
from statsmodels.tsa.seasonal import seasonal_decompose
from copy import copy, deepcopy
import plotly
from plotly import tools
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from copy import copy, deepcopy
import dask
import gplearn as gpl        
from gplearn.genetic import SymbolicRegressor
import zipfile

In [4]:
def plot_gp(mu, lb, ub, test_x, test_y, train_x=None, train_y=None, name='', samples={},
            layout='v', xaxis_title='Time', yaxis_title='Sales', fig_size=[1000,500], w=3, f=10):
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Samples"))
    samples = {'sample '+str(i): s for i, s in enumerate(samples)} if not isinstance(samples, dict) else samples
    if train_x is not None:
        fig.add_trace(go.Scatter(x=train_x, y=train_y, mode='lines', name='History', line=dict(width=w), line_color='#1a76ff'))  # plot training data

    fig.add_trace(
        go.Scatter(x=test_x, y=ub, fill=None, mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))
    fig.add_trace(
        go.Scatter(x=test_x, y=lb, fill='tonexty', mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))

    fig.add_trace(go.Scatter(x=test_x, y=mu, line=dict(color='#c71313', width=w), mode='lines', name='Skyolia Forecast'))  # plot the mean
    fig.add_trace(go.Scatter(x=test_x, y=test_y, line=dict(color='#1a76ff', width=w), mode='lines', name='Observed'))
    for k, v in samples.items():
        fig.add_trace(go.Scatter(x=test_x, y=v, name=k, mode='lines', 
                                 line=dict(width=w)))  # plot samples
    fig.update_layout(title_text=name, paper_bgcolor='#343434', plot_bgcolor='#343434', xaxis_title=xaxis_title, yaxis_title=yaxis_title,
                          font=dict(family="Montserrat", color="#fff", size=f), title_x=0.5, hovermode="x")
    fig.update_xaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    fig.update_yaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    return fig

def confidence_interval(mu, cov):
    std = np.sqrt(np.diag(cov)) #compute std
    uncertainty = 1.96 * std
    return mu, std, mu-uncertainty, mu+uncertainty

def order_quantity(mu, std, cu, co):
    cf = cu/(cu+co)
    return scipy.stats.norm.ppf(cf, loc=mu, scale=std)

def plot_cov(covs, cols, subplot_titles, labels=None):
    fig = make_subplots(rows=int(len(covs)/cols) + 1, cols=cols, subplot_titles=subplot_titles)
    height = (1000/cols)*2
    for i, cov in enumerate(covs):
        row, col = int(i / cols)+1, (i%cols)+1
        fig.add_trace(go.Heatmap(z=cov, x=labels, y=labels, colorscale='Greys'), row=row, col=col)
    fig.update_layout(title_text='Cov matrix', height=height)#, yaxis1=dict(domain=[0, 1]), yaxis1=dict(domain=[0, 1])
    return fig

def plot_ts_decomposition(df, index, obs, model="additive", features=False, period=None, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = seasonal_decompose(decompose, model=model, period=period)
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if features:
        features = [col for col in list(df.columns) if col not in [index, obs]]
        for col in features:
            fig.add_trace(go.Scatter(x=decompose.index, y=df[col].values, name=col, mode='lines'))
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

def plot_stl_decomposition(df, index, obs, model="additive", period=None, seasonal=7, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = STL(decompose, period=period, seasonal=seasonal).fit()
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

In [17]:
df = pd.read_csv('/home/skyolia/JupyterProjects/data/time_series/nestle.csv', sep=';')
df.rename(columns={"PERIOD_TAG": "Date", 'numeric_distribution_selling_promotion': 'promo',
                  'numeric_distribution_selling_promotion_hyperparmarkets': 'hyp_promo'}, inplace=True)
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
df = df.loc[(df['product_group'].isin(['Product_04', 'Product_11', 'Product_17', 'Product_32']))]
df = df.drop(columns=[col for col in df.columns if len(df[col].unique()) == 1])
df

Unnamed: 0,Date,product_group,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2
2,2016-08-07,Product_04,3976,3976,0,0.0,0.000000,0.0,0.0
8,2016-08-07,Product_11,363,363,0,0.0,0.000000,0.0,0.0
11,2016-08-07,Product_17,485,485,0,0.0,0.000000,0.0,0.0
20,2016-08-07,Product_32,224,224,0,0.0,0.000000,0.0,0.0
30,2016-08-14,Product_04,8845,8845,3752,71.9,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...
6820,2019-12-22,Product_32,1982,1982,1540,0.3,0.000000,0.0,0.0
6834,2019-12-29,Product_04,4510,4510,5162,0.3,0.000000,0.0,0.0
6841,2019-12-29,Product_11,362,362,361,4.1,62.427746,1.0,1.0
6847,2019-12-29,Product_17,385,385,672,3.2,0.000000,0.0,0.0


In [18]:
'''df['dayofweek'] = df['Date'].dt.dayofweek 
df['sin_dayofweek'] = np.sin(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
df['cos_dayofweek'] = np.cos(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
df.drop(columns=['dayofweek'], inplace=True)'''

df['dayofyear'] = df['Date'].dt.dayofyear
df['sin_dayofyear'] = np.sin(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
df['cos_dayofyear'] = np.cos(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
df.drop(columns=['dayofyear'], inplace=True)
df

Unnamed: 0,Date,product_group,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2,sin_dayofyear,cos_dayofyear
2,2016-08-07,Product_04,3976,3976,0,0.0,0.000000,0.0,0.0,-0.601624,-0.798779
8,2016-08-07,Product_11,363,363,0,0.0,0.000000,0.0,0.0,-0.601624,-0.798779
11,2016-08-07,Product_17,485,485,0,0.0,0.000000,0.0,0.0,-0.601624,-0.798779
20,2016-08-07,Product_32,224,224,0,0.0,0.000000,0.0,0.0,-0.601624,-0.798779
30,2016-08-14,Product_04,8845,8845,3752,71.9,0.000000,0.0,0.0,-0.693281,-0.720667
...,...,...,...,...,...,...,...,...,...,...,...
6820,2019-12-22,Product_32,1982,1982,1540,0.3,0.000000,0.0,0.0,-0.154309,0.988023
6834,2019-12-29,Product_04,4510,4510,5162,0.3,0.000000,0.0,0.0,-0.034422,0.999407
6841,2019-12-29,Product_11,362,362,361,4.1,62.427746,1.0,1.0,-0.034422,0.999407
6847,2019-12-29,Product_17,385,385,672,3.2,0.000000,0.0,0.0,-0.034422,0.999407


In [19]:
output_col = ['SellOut']
time_col = 'Date'
to_remove = ['dispatches_SellIn', 'orders_SellIn']
categorical = ['product_group']
binary = ['type_promo_1', 'type_promo_2']
numerical = [col for col in df.columns if col not in categorical + binary + to_remove + output_col + [time_col]]
df[numerical] = df[numerical].apply(pd.to_numeric,1)

In [48]:
fig = go.Figure(data=go.Heatmap(z=df[numerical+output_col].corr(),x=numerical+output_col,y=numerical+output_col))
fig.show()

In [20]:
train = df[df['Date'] < '2019-07-01']
test = df[df['Date']>='2019-07-01']
features = categorical + numerical + binary

X_train, X_test = train[features], test[features]
Y_train, Y_test = train[output_col], test[output_col]
T_train, T_test = train[time_col], test[time_col]

y_scaler = MinMaxScaler(feature_range=(0, 1))
Y_train, Y_test = y_scaler.fit_transform(Y_train).ravel() + 1e-15, Y_test.values.ravel() + 1e-15

MS = MinMaxScaler(feature_range=(-1, 1))
scaled_train = MS.fit_transform(X_train[numerical])
scaled_test = MS.transform(X_test[numerical])
X_train[numerical], X_test[numerical] = scaled_train, scaled_test

encoder = ce.CatBoostEncoder(verbose=1, cols=categorical)
encoder.fit(X_train[categorical], Y_train)
X_train[categorical] = encoder.transform(X_train[categorical])
X_test[categorical] = encoder.transform(X_test[categorical])

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



((608, 7), (104, 7), (608,), (104,))

In [21]:
X_train

Unnamed: 0,product_group,promo,hyp_promo,sin_dayofyear,cos_dayofyear,type_promo_1,type_promo_2
2,0.412354,-1.000000,-1.0,-0.601630,-0.798813,0.0,0.0
8,0.021573,-1.000000,-1.0,-0.601630,-0.798813,0.0,0.0
11,0.045000,-1.000000,-1.0,-0.601630,-0.798813,0.0,0.0
20,0.082248,-1.000000,-1.0,-0.601630,-0.798813,0.0,0.0
30,0.412354,0.528162,-1.0,-0.693288,-0.720699,0.0,0.0
...,...,...,...,...,...,...,...
5728,0.082248,-0.987248,-1.0,0.145801,-0.989351,0.0,0.0
5742,0.412354,-0.985122,-1.0,0.025819,-0.999704,0.0,0.0
5749,0.021573,-0.997875,-1.0,0.025819,-0.999704,0.0,0.0
5755,0.045000,-0.995749,-1.0,0.025819,-0.999704,0.0,0.0


In [33]:
sub = train.loc[train['product_group'] == 'Product_04']
fig, trend, seasonal, residual = plot_ts_decomposition(sub, time_col, output_col[0], features=True)
fig.show()
print(np.mean(trend), np.var(trend), np.std(trend))
print(np.mean(seasonal), np.var(seasonal), np.std(seasonal))
print(np.mean(residual), np.var(residual), np.std(residual))

5354.75125 86176.00810771994 293.55750391996446
11.052422337278086 2104765.6168138 1450.7810368259572
-2.390162721893307 1273144.0113489272 1128.3368341718385


In [23]:
def power_f(x, d):
    d = np.round(np.clip(d, 1, 2)).astype(int)
    return np.power(x, d)

def _tanh(x, a, b):
    return np.tanh(a*x+b)

def _mle_poisson(true, pred, w=None):
    pred = np.where(pred>0, pred, np.exp(pred) - 1) + 1 + 1e-15
    ll = -np.mean(poisson.logpmf(true, pred))
    ll = np.nan_to_num(ll, nan=1e5)
    return ll

power = gpl.functions.make_function(power_f, 'pow', 2, wrap=True)
tanh = gpl.functions.make_function(_tanh, 'tanh', 3, wrap=True)
function_set = ['add', 'sub', 'mul', 'div', 'sqrt', power, 'log', 'min', 'max', 'sin', 'cos', 'abs', tanh]
mle_poisson = gpl.fitness.make_fitness(_mle_poisson, greater_is_better=False)

model = SymbolicRegressor(population_size = 2000, tournament_size=500, const_range=(-5,5),
                          generations = 500, stopping_criteria=-1000,
                          function_set = function_set, metric='mean absolute error',
                          p_crossover=0.65, p_subtree_mutation=0.15,
                          p_hoist_mutation=0.05, p_point_mutation=0.1, low_memory=True,
                          verbose = 1, random_state = None, n_jobs = -1)

model.fit(X_train, Y_train)
print(model._program)

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    17.61          2.03679       13        0.0341085              N/A      7.58m
   1     6.98         0.321451       11        0.0285143              N/A      8.07m
   2     3.00         0.343931        6         0.027986              N/A      9.59m
   3     4.45         0.568086        4        0.0294371              N/A      8.71m
   4     4.21         0.444017        4        0.0294371              N/A      8.85m
   5     4.14         0.443726        4        0.0294371              N/A      8.90m
   6     4.36         0.417887        6        0.0293228              N/A      9.67m
   7     4.29         0.407933        7        0.0285143              N/A      8.93m
   8     4.37         0.443223        4        0.0294371              N/A  

  94     4.88         0.145019        7        0.0252935              N/A     24.25m
  95     7.16         0.137769        7        0.0252935              N/A     19.72m
  96     7.02         0.105673        7        0.0252935              N/A      8.47m
  97     6.94         0.112412        7        0.0252935              N/A     10.49m
  98     7.05         0.118507        7        0.0252935              N/A     15.64m
  99     7.01         0.120239       11        0.0252062              N/A     26.24m
 100     6.95         0.156367        9        0.0251791              N/A     33.35m
 101     7.02         0.110995        7        0.0252935              N/A     35.36m
 102     6.91         0.115507        7        0.0252935              N/A     36.81m
 103     7.11         0.111413        7        0.0252935              N/A     37.31m
 104     7.10         0.107093        7        0.0252935              N/A     36.89m
 105     7.09         0.154959        7        0.0252935         

 191     7.07         0.110099        7        0.0252935              N/A     28.97m
 192     6.93         0.134356        7        0.0252935              N/A     28.34m
 193     7.08         0.111191        7        0.0252935              N/A     28.37m
 194     7.08        0.0980654        7        0.0252935              N/A     28.52m
 195     7.10         0.116336        7        0.0252935              N/A     28.49m
 196     6.94          0.10845        9        0.0252479              N/A     28.09m
 197     7.00        0.0967623        7        0.0252935              N/A     27.90m
 198     7.00        0.0993937        7        0.0252935              N/A     26.80m
 199     7.10         0.118945        7        0.0252935              N/A     28.00m
 200     7.01         0.106797        7        0.0252935              N/A     27.76m
 201     7.06         0.118245        7        0.0252935              N/A     27.92m
 202     7.02         0.116531        7        0.0252935         

 288     7.04         0.113201        7        0.0252935              N/A     19.52m
 289     6.87         0.126512        8        0.0252854              N/A     19.39m
 290     6.93         0.107133        7        0.0252935              N/A     19.10m
 291     6.86         0.125211        7        0.0252935              N/A     18.81m
 292     7.08          0.11848        7        0.0252935              N/A     18.66m
 293     7.09         0.112971        7        0.0252935              N/A     18.78m
 294     7.03         0.106983        9        0.0252479              N/A     18.51m
 295     7.08         0.123909        7        0.0252935              N/A     18.63m
 296     7.10         0.117937        7        0.0252935              N/A     18.38m
 297     7.08         0.108923        7        0.0252935              N/A     19.18m
 298     7.06         0.121727        7        0.0252935              N/A     18.67m
 299     7.09         0.139467        8        0.0252854         

 385     7.02         0.125617        7        0.0252935              N/A     10.32m
 386     7.02         0.113751        7        0.0252935              N/A     10.21m
 387     7.01         0.116011        7        0.0252935              N/A     10.08m
 388     7.04         0.105586       11        0.0251927              N/A      9.91m
 389     6.96         0.129848        7        0.0252935              N/A      9.92m
 390     6.95         0.105792       13        0.0249215              N/A      9.67m
 391     7.17        0.0943589        8        0.0252854              N/A      9.75m
 392     6.89         0.107866        8        0.0252854              N/A      9.52m
 393     7.03         0.110897        7        0.0252935              N/A      9.46m
 394     6.94         0.136898        8        0.0250263              N/A      9.35m
 395     6.98         0.108281        7        0.0252935              N/A      9.63m
 396     7.17          0.12752        7        0.0252935         

 482     6.99         0.101416        7        0.0252935              N/A      1.51m
 483     6.97         0.116601        7        0.0252935              N/A      1.45m
 484     7.15         0.100991        7        0.0252935              N/A      1.31m
 485     7.11         0.165734        8        0.0250263              N/A      1.29m
 486     7.16         0.111411        7        0.0252935              N/A      1.16m
 487     7.14         0.120625        9        0.0252854              N/A      1.07m
 488     7.09         0.100708        7        0.0252935              N/A     58.26s
 489     6.97         0.111723        7        0.0252935              N/A     55.19s
 490     7.17         0.506522        7        0.0252935              N/A     47.56s
 491     7.12         0.138159        8        0.0250263              N/A     42.18s
 492     6.90          0.11297        7        0.0252935              N/A     38.00s
 493     6.98         0.128879       12        0.0252854         

In [75]:
mus = []
for i in kernel_d:
    k = kernel_d[i]
    gpr = GPR(kernel=k, optimizer=None, alpha=1e-5).fit(X_train, Y_train)
    mus.append(gpr.predict(X_test, return_std=False))
mus = y_scaler.inverse_transform(np.stack(mus))
mus = dict(zip(kernel_d.keys(), mus))
kernel_d

{'promo': ArcCosine(variance=47.1, weight_variances=0.958, bias_variance=0.958) + WhiteKernel(noise_level=0.000981),
 'hyp_promo': ArcCosine(variance=148, weight_variances=1.14e-05, bias_variance=1.14e-05) + WhiteKernel(noise_level=1e-05),
 'product_group': ArcCosine(variance=339, weight_variances=0.014, bias_variance=0.014) + WhiteKernel(noise_level=6.54e-05),
 'binary': RBF(length_scale=255) * 11.1**2,
 'year': ExpSineSquared(length_scale=84.6, periodicity=364) * 27.7**2,
 'trend': ArcCosine(variance=46.3, weight_variances=1.38, bias_variance=1.38)}

In [76]:
gpr = GPR(kernel=kernel, optimizer=None).fit(X_train, Y_train)
mu_test, std_test = gpr.predict(X_test, return_std=True)
lb, ub = norm.ppf(0.025, mu_test, std_test), norm.ppf(0.975, mu_test, std_test)
oq = order_quantity(mu_test, std_test, 100, 400)

pred = y_scaler.inverse_transform(np.stack((mu_test, lb, ub)).T)
Y_train = y_scaler.inverse_transform(Y_train[:,np.newaxis]).ravel()
oq = y_scaler.inverse_transform(oq[:,np.newaxis]).ravel()

pred = pd.DataFrame({"mu": pred[:,0], "lb": pred[:,1], "ub": pred[:,2]})
mus = pd.DataFrame(mus)
pred = pd.concat((test.reset_index(), pred, mus), 1)

#fig = plot_gp(pred[:,0], pred[:,1], pred[:,2], T_test, Y_test, T_train, Y_train, samples=mus, layout='h')
#fig.show()
pred


divide by zero encountered in log


In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only



Unnamed: 0,index,Date,product_group,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2,delta_t,mu,lb,ub,promo.1,hyp_promo.1,product_group.1,binary,year,trend
0,5784,2019-07-07,Product_04,9955,9955,5923,0.5,0.000000,0.0,0.0,1064.0,5274.547906,-11511.555657,22060.651469,1382.201854,1629.670759,5386.770306,5385.538251,1919.221443,1992.146962
1,5791,2019-07-07,Product_11,0,0,286,0.1,0.000000,0.0,0.0,1064.0,600.354341,-16144.785841,17345.494523,1161.148207,1629.670759,270.441944,270.978267,1919.221443,1992.146962
2,5797,2019-07-07,Product_17,659,763,825,0.0,0.000000,0.0,0.0,1064.0,878.077580,-15864.539813,17620.694973,1102.869814,1629.670759,577.157278,577.589179,1919.221443,1992.146962
3,5812,2019-07-07,Product_32,1045,1045,1336,0.9,0.000000,0.0,0.0,1064.0,1330.470867,-15409.237850,18070.179583,1583.973958,1629.670759,1064.829676,1065.098129,1919.221443,1992.146962
4,5826,2019-07-14,Product_04,8940,8940,5912,0.4,0.000000,0.0,0.0,1071.0,5271.092449,-11556.132697,22098.317596,1328.746753,1629.670759,5386.770306,5385.538251,1870.003719,1992.127968
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,6820,2019-12-22,Product_32,1982,1982,1540,0.3,0.000000,0.0,0.0,1232.0,1174.458349,-16274.730418,18623.647115,1274.086386,1629.670759,1064.829676,1065.098129,1867.380179,1991.775411
100,6834,2019-12-29,Product_04,4510,4510,5162,0.3,0.000000,0.0,0.0,1239.0,5122.602839,-12379.761885,22624.967563,1274.086386,1629.670759,5386.770306,5385.538251,1848.271137,1991.760680
101,6841,2019-12-29,Product_11,362,362,361,4.1,62.427746,1.0,1.0,1239.0,776.613422,-16754.973006,18308.199850,2543.392895,2968.382931,270.441944,270.978267,1848.271137,1991.760680
102,6847,2019-12-29,Product_17,385,385,672,3.2,0.000000,0.0,0.0,1239.0,754.198456,-16701.860046,18210.256957,2382.473173,1629.670759,577.157278,577.589179,1848.271137,1991.760680


In [77]:
store = 'Product_17'
sub_train = train.loc[train['product_group'] == store] 
sub_pred = pred.loc[pred['product_group'] == store] 
sub_mus = pred.loc[pred['product_group'] == store, list(kernel_d.keys())].to_dict('series')
err = mean_absolute_error(sub_pred[output_col[0]], sub_pred['mu'])
fig = plot_gp(sub_pred['mu'], sub_pred['lb'], sub_pred['ub'], sub_pred['Date'], sub_pred[output_col[0]], sub_train['Date'], sub_train[output_col[0]], name=err, samples=sub_mus, layout='h')
fig.show()

ValueError: Cannot index with multidimensional key

In [78]:
def mase(test_y, pred, train_y):
    e_t = test_y - pred
    scale = mean_absolute_error(train_y[1:], train_y[:-1])
    return np.mean(np.abs(e_t / scale))

def mape(test_y, pred):
    return np.round(np.mean(np.abs(100*(test_y-pred)/(test_y + 1e-9))), 0)

def rmspe(test_y, pred):
    return (np.sqrt(np.mean(np.square((test_y - pred) / (test_y + 1e-9))))) * 100

errors = {'MAE':[mean_absolute_error(Y_test, pred['mu'])], 
        'RMSE':[mean_squared_error(Y_test, pred['mu'], squared=False)], 
        'RMSPE': [rmspe(Y_test, pred['mu'])],
        'MAPE':[mape(Y_test, pred['mu'])],
        'R2':[r2_score(Y_test, pred['mu'])],
        'MASE':[mase(Y_test, pred['mu'], Y_train)]} 
errors = pd.DataFrame(errors, index =['THIS']) 
errors

Unnamed: 0,MAE,RMSE,RMSPE,MAPE,R2,MASE
THIS,310.672272,526.418838,47.79813,32.0,0.951615,0.120271


In [112]:
-1 * gpr.log_marginal_likelihood_value_

-256.6863537910383

In [79]:
from sklearn.inspection import permutation_importance
from sklearn.metrics import fbeta_score, make_scorer


result = permutation_importance(gpr, X_test, y_scaler.transform(Y_test[:,np.newaxis]).ravel(), n_repeats=10, random_state=42, n_jobs=2, scoring='r2')
fig = go.Figure()
# Use x instead of y argument for horizontal plot
for i in range(X_test.shape[1]):
    fig.add_trace(go.Box(x=result['importances'][i], name=X_test.columns[i]))

fig.show()


X does not have valid feature names, but MinMaxScaler was fitted with feature names

