In [None]:
https://towardsdatascience.com/improving-marketing-mix-modeling-using-machine-learning-approaches-25ea4cd6994b
https://towardsdatascience.com/modeling-marketing-mix-using-pymc3-ba18dd9e6e68
https://marketingmixmodeling.medium.com/marketing-mix-modeling-automatic-hyperparametrization-for-adstock-and-diminishing-returns-using-3db0bc722ccd
https://towardsdatascience.com/automated-marketing-mix-modeling-with-facebooks-robyn-fd79e60b489d
https://towardsdatascience.com/carryover-and-shape-effects-in-media-mix-modeling-paper-review-fd699b509e2d
https://towardsdatascience.com/an-upgraded-marketing-mix-modeling-in-python-5ebb3bddc1b6

In [1]:
import pandas as pd
import numpy as np
import scipy
from scipy.stats import lognorm, norm
from sklearn.gaussian_process import GaussianProcessRegressor as GPR, kernels as gpk
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
from statsmodels.tsa.seasonal import seasonal_decompose
from copy import copy, deepcopy
import plotly
from plotly import tools
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from copy import copy, deepcopy
import dask
import gplearn as gpl        
from gplearn.genetic import SymbolicRegressor
import zipfile

In [2]:
class SpectralMixture(gpk.Kernel):
    def __init__(self, q, w, m, v, d, active_dim=None):
        self.q, self.w, self.m, self.v, self.d = q, w, m, v, d
        self.active_dim = active_dim

    @property
    def anisotropic(self):
        return False

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("v", "numeric", self.v.ravel(), len(self.v.ravel()))

    @property
    def hyperparameter_mean(self):
        return gpk.Hyperparameter("m", "numeric", self.m.ravel(), len(self.m.ravel()))

    @property
    def hyperparameter_weight(self):
        return gpk.Hyperparameter("w", "numeric", self.w.ravel(), len(self.w.ravel()))

    def __call__(self, X, Y=None, eval_gradient=False):
        w, m, v = self.w[:, np.newaxis], np.reshape(self.m, (self.d, self.q)), np.reshape(self.v, (self.d, self.q))
        assert w.shape == (q, 1), 'Weights must be [q x 1]'
        assert m.shape[1] == q
        assert v.shape[1] == q
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            Y = X
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
        tau = X[:, np.newaxis, :] - Y

        # tau(m,n,p) tensordot means(p,q) -> dot_prod(m,n,q)
        # where dot_prod[i,j,k] = tau[i,j]'*means[:,k]
        K = np.cos(2 * np.pi * np.tensordot(tau, m, axes=1)) * \
            np.exp(-2 * np.pi ** 2 * np.tensordot(tau ** 2, v, axes=1))

        # return the weighted sum of the individual
        # Gaussian kernels, dropping the third index
        return np.tensordot(K, w, axes=1).squeeze(axis=(2,))

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return True

    def __repr__(self):
        return "{0}(weight=[{1}], mean=[{2}], variance=[{3}])".format(
            self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.w)),
            ", ".join(map("{0:.3g}".format, self.m)), ", ".join(map("{0:.3g}".format, self.v)))

class Polynomial(gpk.Kernel):

    def __init__(self, variance=1.0, offset=0.0, degree=1.0, active_dim=None):
        self.degree = degree
        self.variance = variance
        self.offset = offset
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(self.variance), 'variance and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.variance) and len(self.variance) > 1

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("degree", "numeric", self.degree)

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("offset", "numeric", self.offset)

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return gpk.Hyperparameter("variance", "numeric", self.variance, len(self.variance))
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            return (np.matmul(X * self.variance, X.T) + self.offset) ** self.degree
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            return (np.tensordot(X * self.variance, Y, [[-1], [-1]]) + self.offset) ** self.degree

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}(variance=[{1}], offset={2:.3g}, degree={3:.3g})".format(
                self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.variance)), self.offset, self.degree)
        else:  # isotropic
            return "{0}(variance={1:.3g}, offset={2:.3g}, degree={3:.3g})".format(
                self.__class__.__name__, self.variance, self.offset, self.degree)


class Brownian(gpk.Kernel):

    def __init__(self, variance=1.0, active_dim=None):
        if len(active_dim) != 1:
            raise ValueError("Input dimensional for Brownian kernel must be 1.")
        self.variance = variance
        self.active_dim = active_dim

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            Y = X
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y

        return np.where(np.sign(X) == np.sign(Y.T), self.variance * np.fmin(np.abs(X), np.abs(Y.T)), 0.)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        return "{0}(variance={1:.3g})".format(self.__class__.__name__, self.variance)


class ArcCosine(gpk.Kernel):
    implemented_orders = {0, 1, 2}

    def __init__(self, order=0, variance=1.0, weight_variances=1.0, bias_variance=1.0, active_dim=None):
        if order not in self.implemented_orders:
            raise ValueError("Requested kernel order is not implemented.")
        self.order = order
        self.variance = variance
        self.bias_variance = bias_variance
        self.weight_variances = weight_variances
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.weight_variances), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.weight_variances) and len(self.weight_variances) > 1

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    @property
    def hyperparameter_weight_variances(self):
        if self.anisotropic:
            return gpk.Hyperparameter("weight_variances", "numeric", self.weight_variances, len(self.weight_variances))
        return gpk.Hyperparameter("weight_variances", "numeric", self.weight_variances)

    @property
    def hyperparameter_bias_variance(self):
        return gpk.Hyperparameter("bias_variance", "numeric", self.bias_variance)

    def _weighted_product(self, X, X2=None):
        if X2 is None:
            return np.sum(self.weight_variances * X ** 2, axis=1) + self.bias_variance
        return np.matmul((self.weight_variances * X), X2.T) + self.bias_variance

    def _J(self, theta):
        """
        Implements the order dependent family of functions defined in equations
        4 to 7 in the reference paper.
        """
        if self.order == 0:
            return np.pi - theta
        elif self.order == 1:
            return np.sin(theta) + (np.pi - theta) * np.cos(theta)
        elif self.order == 2:
            return 3.0 * np.sin(theta) * np.cos(theta) + (np.pi - theta) * (
                    1.0 + 2.0 * np.cos(theta) ** 2)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        X_denominator = np.sqrt(self._weighted_product(X))
        if Y is None:
            Y = X
            Y_denominator = X_denominator
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            Y_denominator = np.sqrt(self._weighted_product(Y))

        numerator = self._weighted_product(X, Y)
        cos_theta = numerator / X_denominator[:, None] / Y_denominator[None, :]
        jitter = 1e-15
        theta = np.arccos(jitter + (1 - 2 * jitter) * cos_theta)

        return self.variance * (1.0 / np.pi) * self._J(theta) * X_denominator[:, None] ** self.order * Y_denominator[
                                                                                                       None,
                                                                                                       :] ** self.order

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}(variance={1:.3g}, weight_variances=[{2}], bias_variance={3:.3g})".format(
                self.__class__.__name__, self.variance, ", ".join(map("{0:.3g}".format, self.weight_variances)),
                self.bias_variance)
        else:  # isotropic
            return "{0}(variance={1:.3g}, weight_variances={2:.3g}, bias_variance={2:.3g})".format(
                self.__class__.__name__, self.variance, self.weight_variances, self.bias_variance)


class Gibbs(gpk.Kernel):

    def __init__(self, lfunc, args, active_dim=None):
        self.lfunc = lfunc
        self.args = args
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        rx = self.lfunc(X, **self.args)
        if Y is None:
            rz = self.lfunc(X, **self.args)
            dists = squareform(pdist(X, metric='sqeuclidean'))
            np.fill_diagonal(dists, 1)
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            rz = self.lfunc(Y, **self.args)
            dists = cdist(X, Y, metric='sqeuclidean')

        rx2, rz2 = np.reshape(rx ** 2, (-1, 1)), np.reshape(rz ** 2, (1, -1))
        return np.sqrt((2.0 * np.outer(rx, rz)) / (rx2 + rz2)) * np.exp(-1.0 * dists / (rx2 + rz2))

    def diag(self, X):
        return np.alloc(1.0, X.shape[0])

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}".format(self.__class__.__name__)


class WarpedInput(gpk.Kernel):

    def __init__(self, stationary, func, args, active_dim=None):
        self.stationary = stationary
        self.func = func
        self.args = args
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        X = self.func(X, **self.args)
        if Y is not None:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            Y = self.func(Y, **self.args)

        return self.stationary(X, Y, eval_gradient)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        return ''


class Gabor(gpk.Kernel):

    def __init__(self, stationary, length_scale=1.0, periodicity=1.0, active_dim=None):
        self.stationary = stationary
        self.length_scale = length_scale
        self.periodicity = periodicity
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'length_scale and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("periodicity", "numeric", self.periodicity)

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return gpk.Hyperparameter("length_scale", "numeric", self.length_scale, len(self.length_scale))
        return gpk.Hyperparameter("length_scale", "numeric", self.length_scale)

    def __call__(self, X, Y=None, eval_gradient=False):
        stationary = self.stationary(length_scale=self.length_scale)
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            dists = squareform(pdist(X / self.length_scale, metric='sqeuclidean'))
            np.fill_diagonal(dists, 1)
            tmp1 = stationary(X, Y, eval_gradient)
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            dists = cdist(X / self.length_scale, Y / self.length_scale, metric='sqeuclidean')
            tmp1 = stationary(X, Y, eval_gradient)

        tmp2 = 2 * np.pi * np.sqrt(dists) * self.length_scale / self.periodicity
        return tmp1 * np.cos(tmp2)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return True

    def __repr__(self):
        if self.anisotropic:
            return "{0}(length_scale=[{1}], periodicity={2:.3g})".format(
                self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.length_scale)), self.periodicity)
        else:  # isotropic
            return "{0}(length_scale={1:.3g}, periodicity={2:.3g})".format(
                self.__class__.__name__, self.length_scale, self.periodicity)


class ConstantKernel(gpk.ConstantKernel):
    def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(constant_value=constant_value, constant_value_bounds=constant_value_bounds)
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class Matern(gpk.Matern):
    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5, active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds, nu=nu)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class RationalQuadratic(gpk.RationalQuadratic):
    def __init__(self, length_scale=1.0, alpha=1.0, length_scale_bounds=(1e-05, 100000.0), alpha_bounds=(1e-05, 100000.0),
                 active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds, alpha=alpha, alpha_bounds=alpha_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)
        

class RBF(gpk.RBF):
    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class ExpSineSquared(gpk.ExpSineSquared):
    def __init__(self, length_scale=1.0, periodicity=1.0, length_scale_bounds=(1e-5, 1e5),
                 periodicity_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(length_scale=length_scale, periodicity=periodicity, length_scale_bounds=length_scale_bounds,
                         periodicity_bounds=periodicity_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class WhiteKernel(gpk.WhiteKernel):
    def __init__(self, noise_level=1.0, noise_level_bounds=(1e-05, 100000.0), active_dim=None):
        super(WhiteKernel, self).__init__(noise_level=noise_level, noise_level_bounds=noise_level_bounds)
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)

In [3]:
def plot_gp(mu, lb, ub, test_x, test_y, train_x=None, train_y=None, name='', samples={},
            layout='v', xaxis_title='Time', yaxis_title='Sales', fig_size=[1000,500], w=3, f=10):
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Samples"))
    samples = {'sample '+str(i): s for i, s in enumerate(samples)} if not isinstance(samples, dict) else samples
    if train_x is not None:
        fig.add_trace(go.Scatter(x=train_x, y=train_y, mode='lines', name='History', line=dict(width=w), line_color='#1a76ff'))  # plot training data

    fig.add_trace(
        go.Scatter(x=test_x, y=ub, fill=None, mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))
    fig.add_trace(
        go.Scatter(x=test_x, y=lb, fill='tonexty', mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))

    fig.add_trace(go.Scatter(x=test_x, y=mu, line=dict(color='#c71313', width=w), mode='lines', name='Skyolia Forecast'))  # plot the mean
    fig.add_trace(go.Scatter(x=test_x, y=test_y, line=dict(color='#1a76ff', width=w), mode='lines', name='Observed'))
    for k, v in samples.items():
        fig.add_trace(go.Scatter(x=test_x, y=v, name=k, mode='lines', 
                                 line=dict(width=w)))  # plot samples
    fig.update_layout(title_text=name, paper_bgcolor='#343434', plot_bgcolor='#343434', xaxis_title=xaxis_title, yaxis_title=yaxis_title,
                          font=dict(family="Montserrat", color="#fff", size=f), title_x=0.5, hovermode="x")
    fig.update_xaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    fig.update_yaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    return fig

def date_encoding(train, test, time_col):
    start_date = train[time_col].iloc[0]
    train['delta_t'] = (train[time_col] - start_date) / np.timedelta64(1, 'D')
    test['delta_t'] = (test[time_col] - start_date) / np.timedelta64(1, 'D')
    train['norm_delta_t'] = train['delta_t']
    test['norm_delta_t'] = test['delta_t']
    return train, test, start_date


def categorical_encoding(train, test, categorical):
    new_cat, OHEncoders = [], {}
    for cat in categorical:
        OE = OneHotEncoder(sparse=False, drop='if_binary')
        train_ohe = OE.fit_transform(train[[cat]])
        test_ohe = OE.transform(test[[cat]].astype(str))
        for i in range(train_ohe.shape[1]):
            c = OE.categories_[0][i]
            train[cat + '_' + str(c)] = train_ohe[:, i]
            test[cat + '_' + str(c)] = test_ohe[:, i]
            new_cat.append(cat + '_' + str(c))
        OHEncoders[cat] = OE
    return train, test, OHEncoders, new_cat


def numerical_scaling(train, test, numerical):
    MS = MinMaxScaler(feature_range=(0, 1))
    scaled_train = MS.fit_transform(train[numerical])
    scaled_test = MS.transform(test[numerical])
    train[numerical] = scaled_train
    test[numerical] = scaled_test
    return train, test, MS


def output_scaling(train, test, output_col):
    YScaler = MinMaxScaler(feature_range=(0, 1))
    Y_train = YScaler.fit_transform(train[[output_col]]).ravel() + 1e-15
    Y_test = test[output_col]
    return Y_train, Y_test, YScaler


def final_features(X_train, new_cat, numerical, binary):
    features = new_cat + numerical + binary + ['norm_delta_t', 'delta_t']
    unique_col = [col for col in features if len(pd.unique(X_train[col])) == 1]
    return [col for col in features if col not in unique_col]

def shift_df(df, shift, dropna=True):
    origin = df.copy()
    for i in range(1, shift+1):
        shifted_df = origin.shift(i)
        shifted_df = shifted_df.rename(columns=dict(zip(shifted_df.columns, [str(c)+'_'+str(i) for c in shifted_df.columns])))
        df = pd.concat([shifted_df, df], axis=1)
    return df.dropna() if dropna else df

def plot_cov(covs, cols, subplot_titles, labels=None):
    fig = make_subplots(rows=int(len(covs)/cols) + 1, cols=cols, subplot_titles=subplot_titles)
    height = (1000/cols)*2
    for i, cov in enumerate(covs):
        row, col = int(i / cols)+1, (i%cols)+1
        fig.add_trace(go.Heatmap(z=cov, x=labels, y=labels, colorscale='Greys'), row=row, col=col)
    fig.update_layout(title_text='Cov matrix', height=height)#, yaxis1=dict(domain=[0, 1]), yaxis1=dict(domain=[0, 1])
    return fig

def plot_ts_decomposition(df, index, obs, model="additive", features=False, period=None, samples=None):
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = seasonal_decompose(decompose, model=model, period=period)
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if features:
        features = [col for col in list(df.columns) if col not in [index, obs]]
        for col in features:
            fig.add_trace(go.Scatter(x=decompose.index, y=df[col].values, name=col, mode='lines'))
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

def plot_stl_decomposition(df, index, obs, model="additive", period=None, seasonal=7, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = STL(decompose, period=period, seasonal=seasonal).fit()
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

In [19]:
df = pd.read_csv('/home/skyolia/JupyterProjects/data/time_series/mmm.csv')
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
df.index=df['Date']
df['Store'] = 1
df.to_csv('history.csv', index=False)
df

Unnamed: 0_level_0,Date,TV,Radio,Banners,Sales,Store
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-07,2018-01-07,13528.10,0.00,0.00,9779.80,1
2018-01-14,2018-01-14,0.00,5349.65,2218.93,13245.19,1
2018-01-21,2018-01-21,0.00,4235.86,2046.96,12022.66,1
2018-01-28,2018-01-28,0.00,3562.21,0.00,8846.95,1
2018-02-04,2018-02-04,0.00,0.00,2187.29,9797.07,1
...,...,...,...,...,...,...
2021-10-03,2021-10-03,0.00,0.00,1691.68,9030.17,1
2021-10-10,2021-10-10,11543.58,4615.35,2518.88,15904.11,1
2021-10-17,2021-10-17,0.00,4556.16,1919.19,12839.29,1
2021-10-24,2021-10-24,0.00,0.00,1707.65,9063.45,1


In [20]:
output_col = 'Sales'
time_col = 'Date'
sku_col = 'Store'
to_remove = []
categorical = []
binary = []
numerical = [col for col in df.columns if col not in categorical + binary + to_remove + [output_col,time_col,sku_col]]
df[numerical] = df[numerical].apply(pd.to_numeric,1)
fig = go.Figure(data=go.Heatmap(z=df[numerical+[output_col]].corr(),x=numerical+[output_col],y=numerical+[output_col]))
fig.show()

In [21]:
#sub = df.loc[(df['Store'] == 1) & (df['Dept'] == 23)] 
fig, trend, seasonal, residual = plot_ts_decomposition(df, time_col, output_col, features=True)
fig.show()
print(np.mean(trend), np.var(trend), np.std(trend))
print(np.mean(seasonal), np.var(seasonal), np.std(seasonal))
print(np.mean(residual), np.var(residual), np.std(residual))

10510.727849532224 197742.85170137536 444.6828664355929
46.87002696005918 3021208.0598327694 1738.1622651043745
55.85002896942842 4366445.504932243 2089.6041502955154


In [22]:
shift = 3
shifted_df = shift_df(df[numerical], shift, False)
df = pd.concat((df[[time_col, sku_col] + to_remove + categorical + binary + [output_col]], shifted_df),1).dropna()
numerical = list(shifted_df.columns)

df


In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only



Unnamed: 0_level_0,Date,Store,Sales,TV_3,Radio_3,Banners_3,TV_2,Radio_2,Banners_2,TV_1,Radio_1,Banners_1,TV,Radio,Banners
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-01-28,2018-01-28,1,8846.95,13528.10,0.00,0.00,0.00,5349.65,2218.93,0.00,4235.86,2046.96,0.00,3562.21,0.00
2018-02-04,2018-02-04,1,9797.07,0.00,5349.65,2218.93,0.00,4235.86,2046.96,0.00,3562.21,0.00,0.00,0.00,2187.29
2018-02-11,2018-02-11,1,13527.65,0.00,4235.86,2046.96,0.00,3562.21,0.00,0.00,0.00,2187.29,8045.44,4310.55,1992.98
2018-02-18,2018-02-18,1,9635.43,0.00,3562.21,0.00,0.00,0.00,2187.29,8045.44,4310.55,1992.98,0.00,0.00,2253.02
2018-02-25,2018-02-25,1,15355.11,0.00,0.00,2187.29,8045.44,4310.55,1992.98,0.00,0.00,2253.02,9697.29,4478.81,2042.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-03,2021-10-03,1,9030.17,0.00,5213.48,1715.62,7663.81,0.00,2399.59,0.00,4757.98,0.00,0.00,0.00,1691.68
2021-10-10,2021-10-10,1,15904.11,7663.81,0.00,2399.59,0.00,4757.98,0.00,0.00,0.00,1691.68,11543.58,4615.35,2518.88
2021-10-17,2021-10-17,1,12839.29,0.00,4757.98,0.00,0.00,0.00,1691.68,11543.58,4615.35,2518.88,0.00,4556.16,1919.19
2021-10-24,2021-10-24,1,9063.45,0.00,0.00,1691.68,11543.58,4615.35,2518.88,0.00,4556.16,1919.19,0.00,0.00,1707.65


In [12]:
train = df[df['Date'] < '2021-04-30']
test = df[df['Date']>='2021-04-30']

train, test, start_date = date_encoding(train, test, time_col)  # DATE ENCODING
train, test, OHEncoders, new_cat = categorical_encoding(train, test, categorical)  # CATEGORICAL ENCODING
if len(numerical) > 0:
    train, test, MS = numerical_scaling(train, test, numerical+['norm_delta_t'])  # NUMERCIAL SCALING
features = final_features(train, new_cat, numerical, binary)  # REMOVE COLUMNS WITH UNIQUE VALUE
X_train, T_train = train[features], train[time_col]
X_test, T_test = test[features], test[time_col]
Y_train, Y_test, YScaler = output_scaling(train, test, output_col)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

((170, 14), (27, 14), (170,), (27,))

In [69]:
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index
cidx = [X_train.columns.get_loc(i) for i in categorical+binary] #categorical columns index
t = [X_train.columns.get_loc("delta_t")]

hp = [1.00000000e-05, 5.00000000e-01, 2.63797436e-03, 1.00000000e-05,
       7.26690988e-02, 5.00000000e-01, 1.00000000e-05, 3.08017386e-03,
       6.39502692e+02, 5.00000000e-01, 1.16618894e-02, 1.00000000e-05,
       1.05391477e-03, 2.50000000e+00, 1.00000000e-05, 1.00094649e-05,
       1.00000000e-05, 2.50000000e+00, 4.00239676e-03, 1.96849760e-03,
       1.82519802e+00, 2.50000000e+00, 1.00000000e-05, 1.00000000e-05,
       4.77804606e+02, 1.00000000e-05, 2.32076860e-01, 1.60210676e-01,
       1.01998470e+02, 3.55837534e+02, 1.00000000e-05, 2.94066571e-03,
       1.00000000e-05, 1.00000000e-05]

def create_kernel(hp):
    kernel, kernel_d, k, nus = 0, {}, 0, [0.5, 1.5, 2.5]
    for i, c in enumerate(nidx):
        #hp[i*4+1] = nus[np.argmin(np.abs(nus - hp[i*4+1]))]
        comp = Matern(hp[i*4], active_dim=[c], nu=hp[i*4+1]) * ConstantKernel(hp[i*4+2]) + WhiteKernel(hp[i*4+3], active_dim=[c])
        kernel += comp
        kernel_d[X_train.columns[c]] = comp
    k = 4*len(nidx)
    binary = RBF(hp[k:k+len(cidx)], active_dim=cidx) * ConstantKernel(hp[k+len(cidx)], active_dim=cidx)
    k = k+len(cidx)
    year = ExpSineSquared(hp[k+1], periodicity=364, active_dim=t) * ConstantKernel(hp[k+2], active_dim=t)
    trend = ArcCosine(0, hp[k+3], hp[k+4], hp[k+5], active_dim=t)
    kernel = kernel + trend + binary + year + gpk.WhiteKernel(hp[k+6])
    kernel_d['binary'], kernel_d['year'], kernel_d['trend'] = binary, year, trend
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test = np.full(len(X_train), Y_train[-1])
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

mu_test, unc, lb, ub = confidence_interval(mu_test, cov)
fig = plot_gp(mu_test, lb, ub, T_test, Y_test, T_train, Y_train, samples=samples, layout='h')
fig.show()

plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

fig, _, _, _ = plot_ts_decomposition(pd.concat([train, test]), time_col, output_col[0], samples=samples)
fig.show()

In [9]:
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index
cidx = [X_train.columns.get_loc(i) for i in categorical+binary] #categorical columns index
t = [X_train.columns.get_loc("delta_t")]
d = len(nidx)*4 + len(cidx)+1 + 6

hp = [6.06387879e-01, 1.54480769e-01, 9.43084583e-02, 1.59215826e-05,
       1.00000000e-05, 7.62648306e-03, 8.22332932e-02, 1.31355919e-05,
       2.16375326e-03, 4.95512963e-01, 6.67703413e-01, 3.93819962e-05,
       1.21040758e-02, 1.55855562e-02, 3.70006412e-01, 2.75154993e-03,
       1.90281664e-02, 6.62707237e-03, 6.45344219e-01, 4.82218123e-03,
       1.49028544e-01, 1.00000000e-05, 2.33967065e-05, 1.00000000e-05,
       2.40335206e+00, 1.04188985e-02, 1.00000000e-05, 8.11096446e-02,
       4.30387760e+00, 6.69028876e-01, 1.00000000e-05, 9.48206540e-05,
       7.20150572e-05, 1.00000000e-05]

def create_kernel(hp):
    kernel, kernel_d, k, nus = 0, {}, 0, [0.5, 1.5, 2.5]
    for i, c in enumerate(nidx):
        comp = ArcCosine(2, hp[i*4], hp[i*4+1], hp[i*4+2], active_dim=[c]) + WhiteKernel(hp[i*4+3], active_dim=[c])
        kernel += comp
        kernel_d[X_train.columns[c]] = comp
    k = 4*len(nidx)
    binary = RBF(hp[k:k+len(cidx)], active_dim=cidx) * ConstantKernel(hp[k+len(cidx)], active_dim=cidx)
    k = k+len(cidx)
    year = ExpSineSquared(hp[k+1], periodicity=364, active_dim=t) * ConstantKernel(hp[k+2], active_dim=t)
    trend = ArcCosine(0, hp[k+3], hp[k+4], hp[k+5], active_dim=t)
    kernel = kernel + trend + binary + year + gpk.WhiteKernel(hp[k+6])
    kernel_d['binary'], kernel_d['year'], kernel_d['trend'] = binary, year, trend
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test = np.full(len(X_train), Y_train[-1])
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

mu_test, unc, lb, ub = confidence_interval(mu_test, cov)
fig = plot_gp(mu_test, lb, ub, T_test, Y_test, T_train, Y_train, samples=samples, layout='h')
fig.show()

plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

fig, _, _, _ = plot_ts_decomposition(pd.concat([train, test]), time_col, output_col[0], samples=samples)
fig.show()

In [98]:
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index
cidx = [X_train.columns.get_loc(i) for i in categorical+binary] #categorical columns index
t = [X_train.columns.get_loc("delta_t")]
d = len(nidx)*4 + len(cidx)+1 + 6

hp = [1.25029298e-05, 1.45293024e-05, 9.50817455e+00, 1.00000000e-05,
       1.00000000e+03, 8.45895599e-01, 5.32503748e-01, 1.38574478e-05,
       1.00000000e-05, 2.06245488e-01, 1.00000000e-05, 4.43992809e-03,
       1.22068709e-05, 9.13940861e-02, 1.00000000e-05, 2.34681378e-05,
       1.00000000e-05, 1.45476782e-05, 6.49337582e+00, 1.00000000e-05,
       1.00000000e+03, 4.67910717e-05, 2.25243119e-05, 1.01912561e-05,
       1.41081169e-05, 1.00000000e-05, 1.95372084e-03, 1.18116317e-01,
       5.13480611e+00, 1.13932721e+00, 3.31173873e-04, 1.00000000e-05,
       1.00000000e-05, 1.10046604e-05]

def create_kernel(hp):
    kernel, kernel_d, k, nus = 0, {}, 0, [0.5, 1.5, 2.5]
    for i, c in enumerate(nidx):
        comp = RationalQuadratic(hp[i*4], active_dim=[c], alpha=hp[i*4+1]) * ConstantKernel(hp[i*4+2], active_dim=[c]) + WhiteKernel(hp[i*4+3], active_dim=[c])
        kernel += comp
        kernel_d[X_train.columns[c]] = comp
    k = 4*len(nidx)
    binary = RBF(hp[k:k+len(cidx)], active_dim=cidx) * ConstantKernel(hp[k+len(cidx)], active_dim=cidx)
    k = k+len(cidx)
    year = ExpSineSquared(hp[k+1], periodicity=364, active_dim=t) * ConstantKernel(hp[k+2], active_dim=t)
    trend = ArcCosine(0, hp[k+3], hp[k+4], hp[k+5], active_dim=t)
    kernel = kernel + trend + binary + year + gpk.WhiteKernel(hp[k+6])
    kernel_d['binary'], kernel_d['year'], kernel_d['trend'] = binary, year, trend
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test = np.full(len(X_train), Y_train[-1])
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

mu_test, unc, lb, ub = confidence_interval(mu_test, cov)
fig = plot_gp(mu_test, lb, ub, T_test, Y_test, T_train, Y_train, samples=samples, layout='h')
fig.show()

plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

fig, _, _, _ = plot_ts_decomposition(pd.concat([train, test]), time_col, output_col[0], samples=samples)
fig.show()

In [None]:
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index
cidx = [X_train.columns.get_loc(i) for i in categorical+binary] #categorical columns index
t = [X_train.columns.get_loc("delta_t")]
d = len(nidx)+10

hp = [2.03671428e+01, 3.30738394e+02, 1.26182214e+02, 1.30264484e+01,
       3.66844986e+02, 5.44219620e+02, 1.60092189e+01, 4.11471712e+01,
       1.31718110e+02, 1.03811419e+01, 1.41154273e+01, 1.81776595e+01,
       1.50000000e+00, 1.99905648e+00, 6.98612816e+01, 2.52164400e-01,
       1.61263298e-01, 6.37176471e+01, 2.72121790e+00, 1.02748925e-03,
       5.21633972e+00]

def create_kernel(hp):
    #hp[len(nidx)] = nus[np.argmin(np.abs(nus - hp[len(nidx)]))]
    poly = Matern(hp[:len(nidx)], nu=hp[len(nidx)], active_dim=nidx) * ConstantKernel(hp[len(nidx)+1], active_dim=nidx)
    k = len(nidx)+2
    year = ExpSineSquared(hp[k], periodicity=364, active_dim=t) * ConstantKernel(hp[k+1], active_dim=t)
    trend = ArcCosine(0, hp[k+2], hp[k+3], hp[k+4], active_dim=t)
    kernel = poly + year + trend + gpk.WhiteKernel(hp[k+5])
    kernel_d = {'poly': poly, 'year': year, 'trend':trend}
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test = np.full(len(X_train), Y_train[-1])
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

mu_test, std = np.full(len(X_train), Y_train[-1]), np.sqrt(np.diag(cov))
fig = plot_gp(mu_test, lb, ub, T_test, Y_test, T_train, Y_train, samples=samples, layout='h')
fig.show()

plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

fig, _, _, _ = plot_ts_decomposition(pd.concat([train, test]), time_col, output_col[0], samples=samples)
fig.show()

In [13]:
nidx = [X_train.columns.get_loc(i) for i in X_train.columns if i in numerical] #numerical columns index
cidx = [X_train.columns.get_loc(i) for i in X_train.columns if i in new_cat+binary and i not in ['Open']] #categorical columns index
t, nt = [X_train.columns.get_loc("delta_t")], [X_train.columns.get_loc("norm_delta_t")]
d = len(nidx)*4 + len(cidx)+1 + 6

hp = [1.06928146e+00, 9.67178577e-04, 2.99214613e-02, 2.72980537e+00,
       1.00000000e-05, 5.08152945e-03, 8.13566030e+00, 3.90391648e-01,
       2.17504774e-02, 1.92812921e+01, 8.27716284e+00, 3.54760847e+00,
       1.00000000e-05, 2.13121150e+02, 2.49274252e+01, 3.11320998e-02,
       1.00000000e-05, 8.17206431e-03, 2.60539773e+00, 1.11789196e-03]

def create_kernel(hp):
    poly = ArcCosine(2, hp[len(nidx)], hp[:len(nidx)], hp[len(nidx) + 1], active_dim=nidx)
    k = len(nidx) + 2
    year = ExpSineSquared(hp[k], periodicity=364, active_dim=t) * ConstantKernel(hp[k + 1], active_dim=t)
    trend = ArcCosine(2, hp[k + 2], hp[k + 3], hp[k + 4], active_dim=nt)
    kernel = poly + year + trend + gpk.WhiteKernel(hp[k + 5])
    kernel_d = {'Kernel': kernel, 'poly': poly, 'year': year, 'trend':trend}
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test, std = np.full(len(X_train), Y_train[-1]), np.sqrt(np.diag(cov))
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

lb, ub = norm.ppf(0.025, mu_test, std), norm.ppf(0.975, mu_test, std)
fig = plot_gp(mu_test, lb, ub, T_test, Y_test, T_train, Y_train, samples=samples, layout='h')
fig.show()

plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

fig, _, _, _ = plot_ts_decomposition(pd.concat([train, test]), time_col, output_col, samples=samples)
fig.show()

In [14]:
mus = []
for i in kernel_d:
    k = kernel_d[i]
    gpr = GPR(kernel=k, optimizer=None, alpha=1e-5).fit(X_train, Y_train)
    mus.append(gpr.predict(X_test, return_std=False))
mus = YScaler.inverse_transform(np.stack(mus))
mus = dict(zip(kernel_d.keys(), mus))
kernel_d

{'Kernel': ArcCosine(variance=1e-05, weight_variances=[1.07, 0.000967, 0.0299, 2.73, 1e-05, 0.00508, 8.14, 0.39, 0.0218, 19.3, 8.28, 3.55], bias_variance=213) + ExpSineSquared(length_scale=24.9, periodicity=364) * 0.176**2 + ArcCosine(variance=1e-05, weight_variances=0.00817, bias_variance=0.00817) + WhiteKernel(noise_level=0.00112),
 'poly': ArcCosine(variance=1e-05, weight_variances=[1.07, 0.000967, 0.0299, 2.73, 1e-05, 0.00508, 8.14, 0.39, 0.0218, 19.3, 8.28, 3.55], bias_variance=213),
 'year': ExpSineSquared(length_scale=24.9, periodicity=364) * 0.176**2,
 'trend': ArcCosine(variance=1e-05, weight_variances=0.00817, bias_variance=0.00817)}

In [15]:
gpr = GPR(kernel=kernel, optimizer=None).fit(X_train, Y_train)
mu_test, std_test = gpr.predict(X_test, return_std=True)
lb, ub = norm.ppf(0.025, mu_test, std_test), norm.ppf(0.975, mu_test, std_test)

pred = YScaler.inverse_transform(np.stack((mu_test, lb, ub)).T)
Y_train = YScaler.inverse_transform(Y_train[:,np.newaxis]).ravel()

#samples = np.random.multivariate_normal(mu_test, cov_test, 3)

fig = plot_gp(pred[:,0], pred[:,1], pred[:,2], T_test, Y_test, T_train, Y_train, samples=mus, layout='h')
fig.show()

In [16]:
def mase(test_y, pred, train_y):
    e_t = test_y - pred
    scale = mean_absolute_error(train_y[1:], train_y[:-1])
    return np.mean(np.abs(e_t / scale))

def mape(test_y, pred):
    return np.round(np.mean(np.abs(100*(test_y-pred)/(test_y + 1e-9))), 0)

def rmspe(test_y, pred):
    return (np.sqrt(np.mean(np.square((test_y - pred) / (test_y + 1e-9))))) * 100

errors = {'MAE':[mean_absolute_error(Y_test, pred[:,0])], 
        'RMSE':[mean_squared_error(Y_test, pred[:,0], squared=False)], 
        'RMSPE': [rmspe(Y_test, pred[:,0])],
        'MAPE':[mape(Y_test, pred[:,0])],
        'R2':[r2_score(Y_test, pred[:,0])],
        'MASE':[mase(Y_test, pred[:,0], Y_train)]} 
errors = pd.DataFrame(errors, index =['THIS']) 
errors

Unnamed: 0,MAE,RMSE,RMSPE,MAPE,R2,MASE
THIS,404.018764,500.426365,4.690267,4.0,0.954707,0.161404


In [17]:
-1 * gpr.log_marginal_likelihood_value_

-287.3236052168993

In [18]:
from sklearn.inspection import permutation_importance
from sklearn.metrics import fbeta_score, make_scorer


result = permutation_importance(gpr, X_test, YScaler.transform(Y_test[:,np.newaxis]).ravel(), n_repeats=10, random_state=42, n_jobs=2, scoring='r2')
fig = go.Figure()
# Use x instead of y argument for horizontal plot
for i in range(X_test.shape[1]):
    fig.add_trace(go.Box(x=result['importances'][i], name=X_test.columns[i]))

fig.show()


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.


X does not have valid feature names, but MinMaxScaler was fitted with feature names



In [57]:
from joblib import dump, load
dump(kernel_d, '1.joblib') 

['1.joblib']