In [1]:
import pandas as pd
import numpy as np
import scipy
from scipy.stats import lognorm, norm
from sklearn.gaussian_process import GaussianProcessRegressor as GPR, kernels as gpk
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
from statsmodels.tsa.seasonal import seasonal_decompose
from copy import copy, deepcopy
import plotly
from plotly import tools
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from copy import copy, deepcopy
import dask
import gplearn as gpl        
from gplearn.genetic import SymbolicRegressor
import zipfile

In [2]:
class SpectralMixture(gpk.Kernel):
    def __init__(self, q, w, m, v, d, active_dim=None):
        self.q, self.w, self.m, self.v, self.d = q, w, m, v, d
        self.active_dim = active_dim

    @property
    def anisotropic(self):
        return False

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("v", "numeric", self.v.ravel(), len(self.v.ravel()))

    @property
    def hyperparameter_mean(self):
        return gpk.Hyperparameter("m", "numeric", self.m.ravel(), len(self.m.ravel()))

    @property
    def hyperparameter_weight(self):
        return gpk.Hyperparameter("w", "numeric", self.w.ravel(), len(self.w.ravel()))

    def __call__(self, X, Y=None, eval_gradient=False):
        w, m, v = self.w[:, np.newaxis], np.reshape(self.m, (self.d, self.q)), np.reshape(self.v, (self.d, self.q))
        assert w.shape == (q, 1), 'Weights must be [q x 1]'
        assert m.shape[1] == q
        assert v.shape[1] == q
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            Y = X
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
        tau = X[:, np.newaxis, :] - Y

        # tau(m,n,p) tensordot means(p,q) -> dot_prod(m,n,q)
        # where dot_prod[i,j,k] = tau[i,j]'*means[:,k]
        K = np.cos(2 * np.pi * np.tensordot(tau, m, axes=1)) * \
            np.exp(-2 * np.pi ** 2 * np.tensordot(tau ** 2, v, axes=1))

        # return the weighted sum of the individual
        # Gaussian kernels, dropping the third index
        return np.tensordot(K, w, axes=1).squeeze(axis=(2,))

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return True

    def __repr__(self):
        return "{0}(weight=[{1}], mean=[{2}], variance=[{3}])".format(
            self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.w)),
            ", ".join(map("{0:.3g}".format, self.m)), ", ".join(map("{0:.3g}".format, self.v)))

class Polynomial(gpk.Kernel):

    def __init__(self, variance=1.0, offset=0.0, degree=1.0, active_dim=None):
        self.degree = degree
        self.variance = variance
        self.offset = offset
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(self.variance), 'variance and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.variance) and len(self.variance) > 1

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("degree", "numeric", self.degree)

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("offset", "numeric", self.offset)

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return gpk.Hyperparameter("variance", "numeric", self.variance, len(self.variance))
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            return (np.matmul(X * self.variance, X.T) + self.offset) ** self.degree
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            return (np.tensordot(X * self.variance, Y, [[-1], [-1]]) + self.offset) ** self.degree

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}(variance=[{1}], offset={2:.3g}, degree={3:.3g})".format(
                self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.variance)), self.offset, self.degree)
        else:  # isotropic
            return "{0}(variance={1:.3g}, offset={2:.3g}, degree={3:.3g})".format(
                self.__class__.__name__, self.variance, self.offset, self.degree)


class Brownian(gpk.Kernel):

    def __init__(self, variance=1.0, active_dim=None):
        if len(active_dim) != 1:
            raise ValueError("Input dimensional for Brownian kernel must be 1.")
        self.variance = variance
        self.active_dim = active_dim

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            Y = X
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y

        return np.where(np.sign(X) == np.sign(Y.T), self.variance * np.fmin(np.abs(X), np.abs(Y.T)), 0.)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        return "{0}(variance={1:.3g})".format(self.__class__.__name__, self.variance)


class ArcCosine(gpk.Kernel):
    implemented_orders = {0, 1, 2}

    def __init__(self, order=0, variance=1.0, weight_variances=1.0, bias_variance=1.0, active_dim=None):
        if order not in self.implemented_orders:
            raise ValueError("Requested kernel order is not implemented.")
        self.order = order
        self.variance = variance
        self.bias_variance = bias_variance
        self.weight_variances = weight_variances
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.weight_variances), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.weight_variances) and len(self.weight_variances) > 1

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    @property
    def hyperparameter_weight_variances(self):
        if self.anisotropic:
            return gpk.Hyperparameter("weight_variances", "numeric", self.weight_variances, len(self.weight_variances))
        return gpk.Hyperparameter("weight_variances", "numeric", self.weight_variances)

    @property
    def hyperparameter_bias_variance(self):
        return gpk.Hyperparameter("bias_variance", "numeric", self.bias_variance)

    def _weighted_product(self, X, X2=None):
        if X2 is None:
            return np.sum(self.weight_variances * X ** 2, axis=1) + self.bias_variance
        return np.matmul((self.weight_variances * X), X2.T) + self.bias_variance

    def _J(self, theta):
        """
        Implements the order dependent family of functions defined in equations
        4 to 7 in the reference paper.
        """
        if self.order == 0:
            return np.pi - theta
        elif self.order == 1:
            return np.sin(theta) + (np.pi - theta) * np.cos(theta)
        elif self.order == 2:
            return 3.0 * np.sin(theta) * np.cos(theta) + (np.pi - theta) * (
                    1.0 + 2.0 * np.cos(theta) ** 2)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        X_denominator = np.sqrt(self._weighted_product(X))
        if Y is None:
            Y = X
            Y_denominator = X_denominator
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            Y_denominator = np.sqrt(self._weighted_product(Y))

        numerator = self._weighted_product(X, Y)
        cos_theta = numerator / X_denominator[:, None] / Y_denominator[None, :]
        jitter = 1e-15
        theta = np.arccos(jitter + (1 - 2 * jitter) * cos_theta)

        return self.variance * (1.0 / np.pi) * self._J(theta) * X_denominator[:, None] ** self.order * Y_denominator[
                                                                                                       None,
                                                                                                       :] ** self.order

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}(variance={1:.3g}, weight_variances=[{2}], bias_variance={3:.3g})".format(
                self.__class__.__name__, self.variance, ", ".join(map("{0:.3g}".format, self.weight_variances)),
                self.bias_variance)
        else:  # isotropic
            return "{0}(variance={1:.3g}, weight_variances={2:.3g}, bias_variance={2:.3g})".format(
                self.__class__.__name__, self.variance, self.weight_variances, self.bias_variance)


class Gibbs(gpk.Kernel):

    def __init__(self, lfunc, args, active_dim=None):
        self.lfunc = lfunc
        self.args = args
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        rx = self.lfunc(X, **self.args)
        if Y is None:
            rz = self.lfunc(X, **self.args)
            dists = squareform(pdist(X, metric='sqeuclidean'))
            np.fill_diagonal(dists, 1)
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            rz = self.lfunc(Y, **self.args)
            dists = cdist(X, Y, metric='sqeuclidean')

        rx2, rz2 = np.reshape(rx ** 2, (-1, 1)), np.reshape(rz ** 2, (1, -1))
        return np.sqrt((2.0 * np.outer(rx, rz)) / (rx2 + rz2)) * np.exp(-1.0 * dists / (rx2 + rz2))

    def diag(self, X):
        return np.alloc(1.0, X.shape[0])

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}".format(self.__class__.__name__)


class WarpedInput(gpk.Kernel):

    def __init__(self, stationary, func, args, active_dim=None):
        self.stationary = stationary
        self.func = func
        self.args = args
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        X = self.func(X, **self.args)
        if Y is not None:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            Y = self.func(Y, **self.args)

        return self.stationary(X, Y, eval_gradient)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        return ''


class Gabor(gpk.Kernel):

    def __init__(self, stationary, length_scale=1.0, periodicity=1.0, active_dim=None):
        self.stationary = stationary
        self.length_scale = length_scale
        self.periodicity = periodicity
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'length_scale and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("periodicity", "numeric", self.periodicity)

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return gpk.Hyperparameter("length_scale", "numeric", self.length_scale, len(self.length_scale))
        return gpk.Hyperparameter("length_scale", "numeric", self.length_scale)

    def __call__(self, X, Y=None, eval_gradient=False):
        stationary = self.stationary(length_scale=self.length_scale)
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            dists = squareform(pdist(X / self.length_scale, metric='sqeuclidean'))
            np.fill_diagonal(dists, 1)
            tmp1 = stationary(X, Y, eval_gradient)
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            dists = cdist(X / self.length_scale, Y / self.length_scale, metric='sqeuclidean')
            tmp1 = stationary(X, Y, eval_gradient)

        tmp2 = 2 * np.pi * np.sqrt(dists) * self.length_scale / self.periodicity
        return tmp1 * np.cos(tmp2)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return True

    def __repr__(self):
        if self.anisotropic:
            return "{0}(length_scale=[{1}], periodicity={2:.3g})".format(
                self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.length_scale)), self.periodicity)
        else:  # isotropic
            return "{0}(length_scale={1:.3g}, periodicity={2:.3g})".format(
                self.__class__.__name__, self.length_scale, self.periodicity)


class ConstantKernel(gpk.ConstantKernel):
    def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(constant_value=constant_value, constant_value_bounds=constant_value_bounds)
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class Matern(gpk.Matern):
    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5, active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds, nu=nu)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class RationalQuadratic(gpk.RationalQuadratic):
    def __init__(self, length_scale=1.0, alpha=1.0, length_scale_bounds=(1e-05, 100000.0), alpha_bounds=(1e-05, 100000.0),
                 active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds, alpha=alpha, alpha_bounds=alpha_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)
        

class RBF(gpk.RBF):
    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class ExpSineSquared(gpk.ExpSineSquared):
    def __init__(self, length_scale=1.0, periodicity=1.0, length_scale_bounds=(1e-5, 1e5),
                 periodicity_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(length_scale=length_scale, periodicity=periodicity, length_scale_bounds=length_scale_bounds,
                         periodicity_bounds=periodicity_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class WhiteKernel(gpk.WhiteKernel):
    def __init__(self, noise_level=1.0, noise_level_bounds=(1e-05, 100000.0), active_dim=None):
        super(WhiteKernel, self).__init__(noise_level=noise_level, noise_level_bounds=noise_level_bounds)
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)

In [3]:
def plot_gp(mu, lb, ub, test_x, test_y, train_x=None, train_y=None, name='', samples={},
            layout='v', xaxis_title='Time', yaxis_title='Sales', fig_size=[1000,500], w=3, f=10):
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Samples"))
    samples = {'sample '+str(i): s for i, s in enumerate(samples)} if not isinstance(samples, dict) else samples
    if train_x is not None:
        fig.add_trace(go.Scatter(x=train_x, y=train_y, mode='lines', name='History', line=dict(width=w), line_color='#1a76ff'))  # plot training data

    fig.add_trace(
        go.Scatter(x=test_x, y=ub, fill=None, mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))
    fig.add_trace(
        go.Scatter(x=test_x, y=lb, fill='tonexty', mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))

    fig.add_trace(go.Scatter(x=test_x, y=mu, line=dict(color='#c71313', width=w), mode='lines', name='Skyolia Forecast'))  # plot the mean
    fig.add_trace(go.Scatter(x=test_x, y=test_y, line=dict(color='#1a76ff', width=w), mode='lines', name='Observed'))
    for k, v in samples.items():
        fig.add_trace(go.Scatter(x=test_x, y=v, name=k, mode='lines', 
                                 line=dict(width=w)))  # plot samples
    fig.update_layout(title_text=name, paper_bgcolor='#343434', plot_bgcolor='#343434', xaxis_title=xaxis_title, yaxis_title=yaxis_title,
                          font=dict(family="Montserrat", color="#fff", size=f), title_x=0.5, hovermode="x")
    fig.update_xaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    fig.update_yaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    return fig

def confidence_interval(mu, cov):
    std = np.sqrt(np.diag(cov)) #compute std
    uncertainty = 1.96 * std
    return mu, std, mu-uncertainty, mu+uncertainty

def order_quantity(mu, std, cu, co):
    cf = cu/(cu+co)
    return scipy.stats.norm.ppf(cf, loc=mu, scale=std)

def plot_cov(covs, cols, subplot_titles, labels=None):
    fig = make_subplots(rows=int(len(covs)/cols) + 1, cols=cols, subplot_titles=subplot_titles)
    height = (1000/cols)*2
    for i, cov in enumerate(covs):
        row, col = int(i / cols)+1, (i%cols)+1
        fig.add_trace(go.Heatmap(z=cov, x=labels, y=labels, colorscale='Greys'), row=row, col=col)
    fig.update_layout(title_text='Cov matrix', height=height)#, yaxis1=dict(domain=[0, 1]), yaxis1=dict(domain=[0, 1])
    return fig

def plot_ts_decomposition(df, index, obs, model="additive", features=False, period=None, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = seasonal_decompose(decompose, model=model, period=period)
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if features:
        features = [col for col in list(df.columns) if col not in [index, obs]]
        for col in features:
            fig.add_trace(go.Scatter(x=decompose.index, y=df[col].values, name=col, mode='lines'))
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

def plot_stl_decomposition(df, index, obs, model="additive", period=None, seasonal=7, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = STL(decompose, period=period, seasonal=seasonal).fit()
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

In [81]:
df = pd.read_csv('/home/skyolia/JupyterProjects/data/time_series/nestle.csv', sep=';')
df.rename(columns={"PERIOD_TAG": "Date", 'numeric_distribution_selling_promotion': 'promo',
                  'numeric_distribution_selling_promotion_hyperparmarkets': 'hyp_promo'}, inplace=True)
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
df = df.loc[(df['product_group'] == 'Product_20') & (df['customer_name'] == 'Customer_01')]
df = df.drop(columns=[col for col in df.columns if len(df[col].unique()) == 1])
df

Unnamed: 0,Date,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2
14,2016-08-07,2766,2766,0,0.0,61.271676,1.0,1.0
43,2016-08-14,2125,2125,3076,1.5,0.000000,0.0,0.0
74,2016-08-21,4841,5186,2773,1.1,0.000000,0.0,0.0
105,2016-08-28,4501,4501,2865,1.0,0.000000,0.0,0.0
137,2016-09-04,5289,5461,3012,2.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...
6682,2019-12-01,2437,2437,12195,95.6,94.219653,0.0,1.0
6724,2019-12-08,2454,2454,4762,1.1,0.000000,0.0,0.0
6766,2019-12-15,6024,6024,4503,1.1,0.000000,0.0,0.0
6808,2019-12-22,8136,8136,5352,1.0,0.000000,0.0,0.0


In [82]:
output_col = ['SellOut']
time_col = 'Date'
to_remove = ['dispatches_SellIn', 'orders_SellIn']
categorical = []
binary = ['type_promo_1', 'type_promo_2']
numerical = [col for col in df.columns if col not in categorical + binary + to_remove + output_col + [time_col]]
df[numerical] = df[numerical].apply(pd.to_numeric,1)

In [17]:
fig = go.Figure(data=go.Heatmap(z=df[numerical+output_col].corr(),x=numerical+output_col,y=numerical+output_col))
fig.show()

In [14]:
#sub = df.loc[(df['Store'] == 1) & (df['Dept'] == 23)] 
fig, trend, seasonal, residual = plot_ts_decomposition(df, time_col, output_col[0], features=True)
fig.show()
print(np.mean(trend), np.var(trend), np.std(trend))
print(np.mean(seasonal), np.var(seasonal), np.std(seasonal))
print(np.mean(residual), np.var(residual), np.std(residual))

5444.460317460317 101670.47594537222 318.8580811981597
-19.79890382953265 2798079.5002545696 1672.7460955729562
56.292529585798896 3507151.752534947 1872.7391042360778


In [83]:
df['delta_t'] = (df['Date'] - df['Date'].iloc[0])/np.timedelta64(1,'D')

train = df[df['Date'] < '2019-07-01']
test = df[df['Date']>='2019-07-01']
features = categorical + numerical + binary + ['delta_t']

X_train, X_test = train[features], test[features]
Y_train, Y_test = train[output_col], test[output_col]
T_train, T_test = train[time_col], test[time_col]

y_scaler = MinMaxScaler(feature_range=(0, 1))
Y_train, Y_test = y_scaler.fit_transform(Y_train).ravel() + 1e-15, Y_test.values.ravel() + 1e-15

MS = MinMaxScaler(feature_range=(0, 1))
scaled_train = MS.fit_transform(X_train[numerical+ ['delta_t']])
scaled_test = MS.transform(X_test[numerical+ ['delta_t']])
X_train[numerical+ ['delta_t']], X_test[numerical+ ['delta_t']] = scaled_train, scaled_test

cat_enc_d = {}
for cat in categorical:
    LE = LabelEncoder()
    X_train[cat] = LE.fit_transform(X_train[cat])
    X_test[cat] = LE.transform(X_test[cat])
    cat_enc_d[cat] = LE

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



((152, 5), (26, 5), (152,), (26,))

In [37]:
nidx = [X_train.columns.get_loc(i) for i in numerical+ ['delta_t']] #numerical columns index
cidx = [X_train.columns.get_loc(i) for i in categorical+binary+ ['delta_t']] #categorical columns index
t = [X_train.columns.get_loc("delta_t")]
d = len(nidx)*4 + len(cidx)+1 + 6

hp = [2.24595251e+02, 2.50000000e+00, 1.83353844e+01, 5.37463810e-04,
       4.86211524e+01, 5.00000000e-01, 4.44348045e+01, 1.00000000e-05,
       5.76165304e+02, 5.00000000e-01, 8.19590794e-01, 1.58641902e-05,
       1.00000000e-05, 1.00000000e-05, 3.43143611e-01, 9.73579960e-03,
       2.28909039e+02, 2.43844494e+01, 1.00000000e-05, 1.52589563e+02,
       2.28898584e+00, 1.87276391e-05]

def create_kernel(hp):
    kernel, kernel_d, k, nus = 0, {}, 0, [0.5, 1.5, 2.5]
    for i, c in enumerate(nidx):
        #hp[i*4+1] = nus[np.argmin(np.abs(nus - hp[i*4+1]))]
        comp = Matern(hp[i*4], active_dim=[c], nu=hp[i*4+1]) * ConstantKernel(hp[i*4+2]) + gpk.WhiteKernel(hp[i*4+3])
        kernel += comp
        kernel_d[X_train.columns[c]] = comp
    k = 4*len(nidx)
    binary = RBF(hp[k:k+len(cidx)], active_dim=cidx) * ConstantKernel(hp[k+len(cidx)], active_dim=cidx)
    k = k+len(cidx)
    year = ExpSineSquared(hp[k+1], periodicity=364, active_dim=t) * ConstantKernel(hp[k+2], active_dim=t)
    trend = ArcCosine(0, hp[k+3], hp[k+4], hp[k+5], active_dim=t)
    kernel = kernel + trend + binary + year + gpk.WhiteKernel(hp[k+6])
    kernel_d['binary'], kernel_d['year'], kernel_d['trend'] = binary, year, trend
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test = np.full(len(X_train), Y_train[-1])
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

mu_test, unc, lb, ub = confidence_interval(mu_test, cov)
fig = plot_gp(mu_test, lb, ub, T_test, Y_test, T_train, Y_train, samples=samples, layout='h')
fig.show()

plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

fig, _, _, _ = plot_ts_decomposition(pd.concat([train, test]), time_col, output_col[0], samples=samples)
fig.show()

In [69]:
nidx = [X_train.columns.get_loc(i) for i in numerical+ ['delta_t']] #numerical columns index
cidx = [X_train.columns.get_loc(i) for i in categorical+binary+ ['delta_t']] #categorical columns index
t = [X_train.columns.get_loc("delta_t")]
d = len(nidx)*4 + len(cidx)+1 + 6

hp = [1.78460228e+02, 1.00000000e-05, 1.78589859e+02, 1.00000000e-05,
       7.40134981e-02, 1.00000000e-05, 1.86741871e+02, 1.64420752e-04,
       4.63393918e+01, 1.00000000e-05, 1.31125517e+02, 1.00000000e-05,
       1.00000000e-05, 2.45110651e+02, 2.77318618e+01, 8.98159949e+01,
       1.91917025e+02, 4.50851575e+02, 1.73276625e-02, 8.59777466e+00,
       2.11568093e+02, 4.65824734e-04]

def create_kernel(hp):
    kernel, kernel_d, k, nus = 0, {}, 0, [0.5, 1.5, 2.5]
    for i, c in enumerate(nidx):
        comp = RationalQuadratic(hp[i*4], active_dim=[c], alpha=hp[i*4+1]) * ConstantKernel(hp[i*4+2], active_dim=[c]) + WhiteKernel(hp[i*4+3], active_dim=[c])
        kernel += comp
        kernel_d[X_train.columns[c]] = comp
    k = 4*len(nidx)
    binary = RBF(hp[k:k+len(cidx)], active_dim=cidx) * ConstantKernel(hp[k+len(cidx)], active_dim=cidx)
    k = k+len(cidx)
    year = ExpSineSquared(hp[k+1], periodicity=364, active_dim=t) * ConstantKernel(hp[k+2], active_dim=t)
    trend = ArcCosine(0, hp[k+3], hp[k+4], hp[k+5], active_dim=t)
    kernel = kernel + trend + binary + year + gpk.WhiteKernel(hp[k+6])
    kernel_d['binary'], kernel_d['year'], kernel_d['trend'] = binary, year, trend
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test = np.full(len(X_train), Y_train[-1])
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

mu_test, unc, lb, ub = confidence_interval(mu_test, cov)
fig = plot_gp(mu_test, lb, ub, T_test, Y_test, T_train, Y_train, samples=samples, layout='h')
fig.show()

plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

fig, _, _, _ = plot_ts_decomposition(pd.concat([train, test]), time_col, output_col[0], samples=samples)
fig.show()

In [84]:
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index
cidx = [X_train.columns.get_loc(i) for i in categorical+binary] #categorical columns index
t = [X_train.columns.get_loc("delta_t")]
d = len(nidx)*4 + len(cidx)+1 + 6

hp = [1.00000000e-05, 1.72144126e+01, 6.19683987e-03, 1.00000000e-05,
       2.10449724e+00, 6.50073767e+00, 1.07953544e+00, 1.00000000e-05,
       4.16686053e-02, 3.44823675e-01, 2.74289793e-01, 1.00000000e-05,
       3.99973384e-03, 3.59239788e+00, 7.89713581e-02, 6.63345551e-03,
       2.23031134e-02, 1.00000000e-05, 1.05103498e-02, 3.85932496e-01,
       1.00000000e-05, 7.89663499e-04]

def create_kernel(hp):
    kernel, kernel_d, k, nus = 0, {}, 0, [0.5, 1.5, 2.5]
    for i, c in enumerate(nidx):
        comp = ArcCosine(2, hp[i*4], hp[i*4+1], hp[i*4+2], active_dim=[c]) + WhiteKernel(hp[i*4+3], active_dim=[c])
        kernel += comp
        kernel_d[X_train.columns[c]] = comp
    k = 4*len(nidx)
    binary = RBF(hp[k:k+len(cidx)], active_dim=cidx) * ConstantKernel(hp[k+len(cidx)], active_dim=cidx)
    k = k+len(cidx)
    year = ExpSineSquared(hp[k+1], periodicity=364, active_dim=t) * ConstantKernel(hp[k+2], active_dim=t)
    trend = ArcCosine(0, hp[k+3], hp[k+4], hp[k+5], active_dim=t)
    kernel = kernel + trend + binary + year + gpk.WhiteKernel(hp[k+6])
    kernel_d['binary'], kernel_d['year'], kernel_d['trend'] = binary, year, trend
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test = np.full(len(X_train), Y_train[-1])
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

mu_test, unc, lb, ub = confidence_interval(mu_test, cov)
fig = plot_gp(mu_test, lb, ub, T_test, Y_test, T_train, Y_train, samples=samples, layout='h')
fig.show()

plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

fig, _, _, _ = plot_ts_decomposition(pd.concat([train, test]), time_col, output_col[0], samples=samples)
fig.show()

In [62]:
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index
cidx = [X_train.columns.get_loc(i) for i in categorical+binary] #categorical columns index
t = [X_train.columns.get_loc("delta_t")]
d = len(nidx)+10

hp = [2.70785740e+02, 8.71190838e-02, 5.00000000e-01, 1.66028755e-01,
       4.79174811e+02, 1.00000000e-05, 1.00000000e-05, 1.00000000e-05,
       2.45657470e-04, 4.14184222e-02, 1.00000000e-05, 6.20707196e+01,
       5.58939497e-04]

def create_kernel(hp):
    #hp[len(nidx)] = nus[np.argmin(np.abs(nus - hp[len(nidx)]))]
    poly = Matern(hp[:len(nidx)], nu=hp[len(nidx)], active_dim=nidx) * ConstantKernel(hp[len(nidx)+1], active_dim=nidx)
    k = len(nidx)+2
    binary = RBF(hp[k:k+len(cidx)], active_dim=cidx) * ConstantKernel(hp[k+len(cidx)], active_dim=cidx)
    k = k+len(cidx)+1
    year = ExpSineSquared(hp[k], periodicity=364, active_dim=t) * ConstantKernel(hp[k+1], active_dim=t)
    trend = ArcCosine(0, hp[k+2], hp[k+3], hp[k+4], active_dim=t)
    kernel = poly + binary + year + trend + gpk.WhiteKernel(hp[k+5])
    kernel_d = {'poly': poly, 'binary': binary, 'year': year, 'trend':trend}
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test = np.full(len(X_train), Y_train[-1])
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

mu_test, unc, lb, ub = confidence_interval(mu_test, cov)
fig = plot_gp(mu_test, lb, ub, T_test, Y_test, T_train, Y_train, samples=samples, layout='h')
fig.show()

plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

fig, _, _, _ = plot_ts_decomposition(pd.concat([train, test]), time_col, output_col[0], samples=samples)
fig.show()

In [70]:
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index
cidx = [X_train.columns.get_loc(i) for i in categorical+binary] #categorical columns index
t = [X_train.columns.get_loc("delta_t")]
d = len(nidx)*4 + len(cidx)+1 + 6

hp = [3.91849245e+02, 8.74720000e+02, 1.00000000e-05, 5.20462618e+00,
       1.00000000e-05, 2.90768871e+01, 1.00000000e-05, 1.51600659e+01,
       1.00000000e-05, 2.28809825e-01, 8.75593862e+00, 2.81872618e+02,
       1.52696715e-03]

def create_kernel(hp):
    poly = ArcCosine(2, hp[len(nidx)], hp[:len(nidx)], hp[len(nidx) + 1], active_dim=nidx)
    k = len(nidx) + 2
    binary = RBF(hp[k:k + len(cidx)], active_dim=cidx) * ConstantKernel(hp[k + len(cidx)], active_dim=cidx)
    k = k + len(cidx) + 1
    year = ExpSineSquared(hp[k], periodicity=364, active_dim=t) * ConstantKernel(hp[k + 1], active_dim=t)
    trend = ArcCosine(0, hp[k + 2], hp[k + 3], hp[k + 4], active_dim=t)
    kernel = poly + binary + year + trend + gpk.WhiteKernel(hp[k + 5])
    kernel_d = {'poly': poly, 'binary': binary, 'year': year, 'trend':trend}
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test = np.full(len(X_train), Y_train[-1])
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

mu_test, unc, lb, ub = confidence_interval(mu_test, cov)
fig = plot_gp(mu_test, lb, ub, T_test, Y_test, T_train, Y_train, samples=samples, layout='h')
fig.show()

plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

fig, _, _, _ = plot_ts_decomposition(pd.concat([train, test]), time_col, output_col[0], samples=samples)
fig.show()

In [85]:
mus = []
for i in kernel_d:
    k = kernel_d[i]
    gpr = GPR(kernel=k, optimizer=None, alpha=1e-5).fit(X_train, Y_train)
    mus.append(gpr.predict(X_test, return_std=False))
mus = y_scaler.inverse_transform(np.stack(mus))
mus = dict(zip(kernel_d.keys(), mus))
kernel_d

{'promo': ArcCosine(variance=1e-05, weight_variances=17.2, bias_variance=17.2) + WhiteKernel(noise_level=1e-05),
 'hyp_promo': ArcCosine(variance=2.1, weight_variances=6.5, bias_variance=6.5) + WhiteKernel(noise_level=1e-05),
 'binary': RBF(length_scale=[0.0417, 0.345]) * 0.524**2,
 'year': ExpSineSquared(length_scale=1e-05, periodicity=364) * 0.0632**2,
 'trend': ArcCosine(variance=3.59, weight_variances=0.079, bias_variance=0.079)}

In [86]:
gpr = GPR(kernel=kernel, optimizer=None).fit(X_train, Y_train)
mu_test, std_test = gpr.predict(X_test, return_std=True)
lb, ub = norm.ppf(0.025, mu_test, std_test), norm.ppf(0.975, mu_test, std_test)
oq = order_quantity(mu_test, std_test, 100, 400)

pred = y_scaler.inverse_transform(np.stack((mu_test, lb, ub)).T)
Y_train = y_scaler.inverse_transform(Y_train[:,np.newaxis]).ravel()
oq = y_scaler.inverse_transform(oq[:,np.newaxis]).ravel()

#samples = np.random.multivariate_normal(mu_test, cov_test, 3)

fig = plot_gp(pred[:,0], pred[:,1], pred[:,2], T_test, Y_test, T_train, Y_train, samples=mus, layout='h')
fig.show()


divide by zero encountered in log



In [63]:
_, cov_test = gpr.predict(X_test, return_cov=True)
plot_cov([cov_test], 1, '').show()

In [64]:
test

Unnamed: 0,Date,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2,delta_t
5800,2019-07-07,9704,10568,5717,23.3,0.0,0.0,0.0,1064.0
5842,2019-07-14,11424,11424,5550,23.3,0.0,0.0,0.0,1071.0
5884,2019-07-21,19548,19548,6169,2.7,0.0,0.0,0.0,1078.0
5926,2019-07-28,7603,7603,10948,95.3,88.439306,1.0,1.0,1085.0
5968,2019-08-04,1729,1729,4570,5.7,0.0,0.0,0.0,1092.0
6010,2019-08-11,5729,5729,4840,1.2,0.0,0.0,0.0,1099.0
6052,2019-08-18,1214,1214,14581,96.1,92.485549,1.0,1.0,1106.0
6094,2019-08-25,1062,1062,4392,0.9,0.0,0.0,0.0,1113.0
6136,2019-09-01,5373,5373,5108,3.2,0.0,0.0,0.0,1120.0
6178,2019-09-08,1923,1923,4479,2.7,0.0,0.0,0.0,1127.0


In [87]:
def mase(test_y, pred, train_y):
    e_t = test_y - pred
    scale = mean_absolute_error(train_y[1:], train_y[:-1])
    return np.mean(np.abs(e_t / scale))

def mape(test_y, pred):
    return np.round(np.mean(np.abs(100*(test_y-pred)/(test_y + 1e-9))), 0)

def rmspe(test_y, pred):
    return (np.sqrt(np.mean(np.square((test_y - pred) / (test_y + 1e-9))))) * 100

errors = {'MAE':[mean_absolute_error(Y_test, pred[:,0])], 
        'RMSE':[mean_squared_error(Y_test, pred[:,0], squared=False)], 
        'RMSPE': [rmspe(Y_test, pred[:,0])],
        'MAPE':[mape(Y_test, pred[:,0])],
        'R2':[r2_score(Y_test, pred[:,0])],
        'MASE':[mase(Y_test, pred[:,0], Y_train)]} 
errors = pd.DataFrame(errors, index =['THIS']) 
errors

Unnamed: 0,MAE,RMSE,RMSPE,MAPE,R2,MASE
THIS,467.724453,787.800478,9.543084,7.0,0.926524,0.226058


In [88]:
-1 * gpr.log_marginal_likelihood_value_

-59.298402958781196

In [33]:
from sklearn.inspection import permutation_importance
from sklearn.metrics import fbeta_score, make_scorer


result = permutation_importance(gpr, X_test, y_scaler.transform(Y_test[:,np.newaxis]).ravel(), n_repeats=10, random_state=42, n_jobs=2, scoring='r2')
fig = go.Figure()
# Use x instead of y argument for horizontal plot
for i in range(X_test.shape[1]):
    fig.add_trace(go.Box(x=result['importances'][i], name=X_test.columns[i]))

fig.show()


X does not have valid feature names, but MinMaxScaler was fitted with feature names

