In [None]:
!pip install einops
!pip install statsmodels --upgrade
!pip install -U "ray[air]"

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.optim as opt
from torch.distributions.normal import Normal
from torch.distributions.log_normal import LogNormal
from torch.distributions.poisson import Poisson
from torch.distributions.negative_binomial import NegativeBinomial
from torch.distributions.categorical import Categorical
from torch.distributions.mixture_same_family import MixtureSameFamily
from einops import rearrange
import pandas as pd
import numpy as np
from scipy.optimize import dual_annealing, minimize, fmin_bfgs, fmin_cg, least_squares
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from statsmodels.tsa.seasonal import seasonal_decompose
from copy import copy, deepcopy
import plotly
from plotly import tools
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from copy import copy, deepcopy
import dask
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
'''torch.cuda.set_device(0)
torch.backends.cudnn.benchmark = True'''

Mounted at /content/drive


'torch.cuda.set_device(0)\ntorch.backends.cudnn.benchmark = True'

In [None]:
def plot_gp(mu, lb, ub, test_x, test_y, train_x=None, train_y=None, name='', samples={},
            layout='v', xaxis_title='Time', yaxis_title='Sales', fig_size=[1000,500], w=3, f=10):
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Samples"))
    samples = {'sample '+str(i): s for i, s in enumerate(samples)} if not isinstance(samples, dict) else samples
    if train_x is not None:
        fig.add_trace(go.Scatter(x=train_x, y=train_y, mode='lines', name='History', line=dict(width=w), line_color='#1a76ff'))  # plot training data

    fig.add_trace(
        go.Scatter(x=test_x, y=ub, fill=None, mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))
    fig.add_trace(
        go.Scatter(x=test_x, y=lb, fill='tonexty', mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))

    fig.add_trace(go.Scatter(x=test_x, y=mu, line=dict(color='#c71313', width=w), mode='lines', name='Skyolia Forecast'))  # plot the mean
    fig.add_trace(go.Scatter(x=test_x, y=test_y, line=dict(color='#1a76ff', width=w), mode='lines', name='Observed'))
    for k, v in samples.items():
        fig.add_trace(go.Scatter(x=test_x, y=v, name=k, mode='lines',
                                 line=dict(width=w)))  # plot samples
    fig.update_layout(title_text=name, paper_bgcolor='#343434', plot_bgcolor='#343434', xaxis_title=xaxis_title, yaxis_title=yaxis_title,
                          font=dict(family="Montserrat", color="#fff", size=f), title_x=0.5, hovermode="x")
    fig.update_xaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    fig.update_yaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    return fig

def confidence_interval(mu, cov):
    std = np.sqrt(np.diag(cov)) #compute std
    uncertainty = 1.96 * std
    return mu, std, mu-uncertainty, mu+uncertainty

def order_quantity(mu, std, cu, co):
    cf = cu/(cu+co)
    return scipy.stats.norm.ppf(cf, loc=mu, scale=std)

def plot_cov(covs, cols, subplot_titles, labels=None):
    fig = make_subplots(rows=int(len(covs)/cols) + 1, cols=cols, subplot_titles=subplot_titles)
    height = (1000/cols)*2
    for i, cov in enumerate(covs):
        row, col = int(i / cols)+1, (i%cols)+1
        fig.add_trace(go.Heatmap(z=cov, x=labels, y=labels, colorscale='Greys'), row=row, col=col)
    fig.update_layout(title_text='Cov matrix', height=height)#, yaxis1=dict(domain=[0, 1]), yaxis1=dict(domain=[0, 1])
    return fig

def plot_ts_decomposition(df, index, obs, model="additive", features=False, period=None, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]

    decomposition = seasonal_decompose(decompose, model=model, period=period)
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if features:
        features = [col for col in list(df.columns) if col not in [index, obs]]
        for col in features:
            fig.add_trace(go.Scatter(x=decompose.index, y=df[col].values, name=col, mode='lines'))
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

def plot_stl_decomposition(df, index, obs, model="additive", period=None, seasonal=7, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]

    decomposition = STL(decompose, period=period, seasonal=seasonal).fit()
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

In [None]:
def date_encoding(df, dt_column, daily=True, weekly=True, yearly=True):
    df[dt_column], periodic_column = df[dt_column].astype('datetime64[ns]'), []
    if daily:
        df['hourofday'] = df[dt_column].dt.hour
        df['sin_hourofday'] = np.sin(2*np.pi*df['hourofday']/np.max(df['hourofday']))
        df['cos_hourofday'] = np.cos(2*np.pi*df['hourofday']/np.max(df['hourofday']))
        df.drop(columns=['hourofday'], inplace=True), periodic_column.extend(['sin_hourofday', 'cos_hourofday'])

    if weekly:
        df['dayofweek'] = df[dt_column].dt.dayofweek
        df['sin_dayofweek'] = np.sin(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
        df['cos_dayofweek'] = np.cos(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
        df.drop(columns=['dayofweek'], inplace=True), periodic_column.extend(['sin_dayofweek', 'cos_dayofweek'])
    if yearly:
        df['dayofyear'] = df[dt_column].dt.dayofyear
        df['sin_dayofyear'] = np.sin(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
        df['cos_dayofyear'] = np.cos(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
        df.drop(columns=['dayofyear'], inplace=True), periodic_column.extend(['sin_dayofyear', 'cos_dayofyear'])
    return df, periodic_column


def categorical_encoding(train, valid, test, categorical):
    new_cat, LEncoders = [], {}
    for cat in categorical:
        LE = LabelEncoder()
        train[cat] = LE.fit_transform(train[cat]).astype(int)
        valid[cat] = LE.transform(valid[cat]).astype(int)
        test[cat] = LE.transform(test[cat]).astype(int)
        LEncoders[cat] = LE
    return train, valid, test, LEncoders


def numerical_scaling(train, valid, test, numerical):
    MS = MinMaxScaler(feature_range=(-1, 1))
    scaled_train = MS.fit_transform(train[numerical])
    scaled_valid = MS.transform(valid[numerical])
    scaled_test = MS.transform(test[numerical])
    train[numerical] = scaled_train
    valid[numerical] = scaled_valid
    test[numerical] = scaled_test
    return train, valid, test, MS


def nn_ts_pipeline(train, valid, test, categorical, numerical, periodic, eps=0):
    _train, _valid, _test = train.copy(), valid.copy(), test.copy()
    _train, _valid, _test, LEncoders = categorical_encoding(_train, _valid, _test, categorical)  # CATEGORICAL ENCODING
    if len(numerical) > 0:
        _train, _valid, _test, MS = numerical_scaling(_train, _valid, _test, numerical)  # NUMERCIAL SCALING
    features = categorical + numerical + periodic
    X_train, T_train = _train[features], _train[time_column]
    X_valid, T_valid = _valid[features], _valid[time_column]
    X_test, T_test = _test[features], _test[time_column]
    #Y_train, Y_valid, Y_test, YScaler = output_scaling(train, valid, test, out_column)
    Y_train, Y_valid, Y_test = _train[out_column].values+eps, _valid[out_column].values+eps, _test[out_column].values+eps
    return X_train, T_train, Y_train, X_valid, T_valid, Y_valid, X_test, T_test, Y_test


def output_scaling(train, valid, test, output_col):
    YScaler = MinMaxScaler(feature_range=(0, 1))
    Y_train = YScaler.fit_transform(train[[output_col]]).ravel()+1e-5
    Y_valid = YScaler.transform(valid[[output_col]]).ravel()+1e-5
    Y_test = test[output_col]
    return Y_train, Y_valid, Y_test, YScaler


def shift_df(df, shift, dropna=True):
    origin = df.copy()
    for i in range(1, shift+1):
        shifted_df = origin.shift(i)
        shifted_df = shifted_df.rename(columns=dict(zip(shifted_df.columns, [str(c)+'_'+str(i) for c in shifted_df.columns])))
        df = pd.concat([shifted_df, df], axis=1)
    return df.dropna() if dropna else df

def mixture_quantile(pi, mu, std, q):
    pi, mu, std = torch.from_numpy(pi), torch.from_numpy(mu), torch.from_numpy(std)
    n, solutions, evaluations = pi.shape[0], [], []
    for i in range(n):
        mix = Categorical(pi[i])
        comp = LogNormal(mu[i], std[i], validate_args=None)
        pdf = MixtureSameFamily(mix, comp)
        def objf(x):
            x = torch.from_numpy(x)
            return torch.abs(pdf.cdf(x) - q).data.numpy()
        def bounds():
            res = [LogNormal(m, s, validate_args=None).icdf(torch.tensor(q)).data.numpy() for m, s in zip(mu[i], std[0])]
            return np.min(res), np.max(res)

        lb, ub = bounds()
        result = dual_annealing(objf, list(zip([lb], [ub])),maxiter=1000)
        solution = result['x']
        evaluation = objf(solution)
        solutions.append(solution), evaluations.append(evaluation)
    return np.asarray(solutions).ravel(), np.asarray(evaluations)

In [None]:
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/time series/rossmann sales/sub_rossmann.csv')
df

Unnamed: 0,Store,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,Promo2,Competition
0,45,2015-07-31,6301,442,1,1,0,1,d,a,9710.0,0,1.0
1,45,2015-07-30,6063,445,1,1,0,1,d,a,9710.0,0,1.0
2,45,2015-07-29,5341,387,1,1,0,1,d,a,9710.0,0,1.0
3,45,2015-07-28,5504,365,1,1,0,1,d,a,9710.0,0,1.0
4,45,2015-07-27,7450,500,1,1,0,1,d,a,9710.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4705,1045,2013-01-05,4854,622,1,0,0,0,a,c,26990.0,0,0.0
4706,1045,2013-01-04,6351,769,1,0,0,1,a,c,26990.0,0,0.0
4707,1045,2013-01-03,7582,923,1,0,0,1,a,c,26990.0,0,0.0
4708,1045,2013-01-02,8282,975,1,0,0,1,a,c,26990.0,0,0.0


In [None]:
pd.DataFrame(data={'Dtypes': df.dtypes, 'Isnull': df.isnull().sum(), 'Nunique': df.nunique()}, index=df.columns)

Unnamed: 0,Dtypes,Isnull,Nunique
Store,int64,0,5
Date,object,0,942
Sales,int64,0,2918
Customers,int64,0,918
Open,int64,0,2
Promo,int64,0,2
StateHoliday,object,0,4
SchoolHoliday,int64,0,2
StoreType,object,0,3
Assortment,object,0,2


In [None]:
out_column, time_column = 'Sales', 'Date'
df[time_column] = pd.to_datetime(df[time_column])

to_remove = ['Customers'] + [c for c in df.columns if df[c].nunique()==1]
features = [c for c in df.columns if (c not in [out_column]) and (c not in to_remove)]
categorical = [c for c in features if (df[c].dtype=='object') and (df[c].nunique() >= 2)] + ['Store']
numerical = [c for c in features if (df[c].dtype=='float') or (df[c].dtype=='int') and (c not in categorical)]

fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.03, subplot_titles=('Label Distribution', "Features Correlation"))
fig.append_trace(go.Histogram(x=df[out_column]), row=1, col=1)
fig.append_trace(go.Heatmap(z=df[numerical+[out_column]].corr(),x=numerical+[out_column],y=numerical+[out_column]), row=1, col=2)
fig.show()

In [None]:
sub = df.loc[(df['Store'] == 1045)]
fig, trend, seasonal, residual = plot_ts_decomposition(sub, time_column, out_column, features=True, period=7)
fig.show()
print(np.mean(trend), np.var(trend), np.std(trend))
print(np.mean(seasonal), np.var(seasonal), np.std(seasonal))
print(np.mean(residual), np.var(residual), np.std(residual))

7056.175518925518 2127566.948610514 1458.618164089051
3.6627465284194023 4387582.072148839 2094.6555975025676
-0.20498168967954772 2400761.7794677755 1549.4391822423283


In [None]:
df, periodic = date_encoding(df, time_column, daily=False, weekly=True, yearly=True)
train = df[df[time_column] < '2015-06-01']
valid = df[(df[time_column] >= '2015-06-01') & (df[time_column] < '2015-06-14')]
test = df[df[time_column]>='2015-06-14']

X_train, T_train, Y_train, X_valid, T_valid, Y_valid, X_test, T_test, Y_test = nn_ts_pipeline(train, valid, test, categorical, numerical, periodic, eps=1e-5)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((4405, 14), (240, 14), (4405,), (240,))

In [None]:
X_train

Unnamed: 0,StateHoliday,StoreType,Assortment,Store,Open,Promo,SchoolHoliday,CompetitionDistance,Promo2,Competition,sin_dayofweek,cos_dayofweek,sin_dayofyear,cos_dayofyear
61,0,2,0,0,-1.0,-1.0,-1.0,-0.511811,-1.0,1.0,-2.449294e-16,1.0,0.516062,-0.856551
62,0,2,0,0,1.0,-1.0,-1.0,-0.511811,-1.0,1.0,-8.660254e-01,0.5,0.530730,-0.847541
63,0,2,0,0,1.0,-1.0,-1.0,-0.511811,-1.0,1.0,-8.660254e-01,-0.5,0.545240,-0.838280
64,0,2,0,0,1.0,-1.0,-1.0,-0.511811,-1.0,1.0,1.224647e-16,-1.0,0.559589,-0.828770
65,0,2,0,0,1.0,-1.0,-1.0,-0.511811,-1.0,1.0,8.660254e-01,-0.5,0.573772,-0.819015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4705,0,0,1,4,1.0,-1.0,-1.0,1.000000,-1.0,-1.0,-8.660254e-01,0.5,0.085965,0.996298
4706,0,0,1,4,1.0,-1.0,1.0,1.000000,-1.0,-1.0,-8.660254e-01,-0.5,0.068802,0.997630
4707,0,0,1,4,1.0,-1.0,1.0,1.000000,-1.0,-1.0,1.224647e-16,-1.0,0.051620,0.998667
4708,0,0,1,4,1.0,-1.0,1.0,1.000000,-1.0,-1.0,8.660254e-01,-0.5,0.034422,0.999407


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

from torch import autograd

def sinc(x):
    x = x * np.pi
    return torch.where(x != 0., torch.sin(x) / x, 1.)


class RegrDataLoader(Dataset):
    def __init__(self, X, Y, numerical_col, categorical_col):
        self.X1, self.X2, self.Y = X[numerical_col].values.astype(np.float32), X[categorical_col].values, Y.astype(np.float32)

    def __len__(self):
        return len(self.Y)

    def __getitem__(self, idx):
        return self.X1[idx], self.X2[idx], self.Y[idx]

class Embedder(nn.Module):
    def __init__(self, embedding_sizes):
        super().__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(categories, size) for categories,size in embedding_sizes])

    def forward(self, x):
        x = [e(x[:,i]) for i,e in enumerate(self.embeddings)]
        return torch.cat(x, 1)

class NNModel(nn.Module):
    def __init__(self, input_shape, units=None, factors=None, activ=True, norm=False, dropout=False, slops=None):
        super().__init__()
        self.input_shape = input_shape
        self.units = units
        self.factors = factors
        self.activ, self.norm = activ, norm
        self.network = nn.ModuleList()
        if self.factors:
            self.units = np.round(self.input_shape * np.asarray(self.factors)).astype(int)
        if self.units is not None:
            self.dropout = np.zeros_like(self.units) if not dropout else dropout
            self.slops = np.full(len(self.units), 1) if slops is None else slops
            for i, j, k in zip(self.units, self.dropout, self.slops):
                if i >= 1:
                    block = self.__build_block__(input_shape, i, p=j, slop=k)
                    self.network.extend(block)
                    input_shape = i
        self.output_shape = input_shape
        self.reset_parameters()

    def __build_block__(self, input_shape, units, p, slop):
        block = []
        block.append(nn.Linear(input_shape, units, bias=not self.norm))
        if self.norm:
            #block.append(nn.BatchNorm1d(units))
            block.append(nn.LayerNorm(units, eps=1e-5))
        if self.activ:
            #block.append(nn.LeakyReLU())
            block.append(nn.ELU(slop))
            #block.append(nn.GELU())
        if p > 0:
            block.append(nn.Dropout(p))
        return block

    def forward(self, x):
        for layer in self.network:
          tmp = layer(x)
          x = tmp
        return x

    def reset_parameters(self):
        for layer in self.network:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_normal_(layer.weight)
                if not self.norm:
                    layer.bias.data.fill_(0.1)


class TabMLP(nn.Module):
    def __init__(self, vocab_size, embed_size, numerical, mlp_units, mlp_dropout=0.00001):
        """
        categories: tuple containing the number of unique values within each category
        """
        super().__init__()
        embedding_sizes = list(zip(vocab_size, embed_size))
        self.cat_embedder = Embedder(embedding_sizes)
        self.norm = nn.BatchNorm1d(numerical, eps=1e-5)
        input_size = numerical + sum(embed_size)
        self.mlp = NNModel(int(input_size), units=mlp_units, factors=None, dropout=[mlp_dropout]*len(mlp_units))
        self.output_shape = mlp_units[-1]

    def forward(self, x_cont, x_cat):
        x = x_cont
        if x_cat.nelement() != 0: #skipped if there's no categorical feature
            x_ = self.cat_embedder(x_cat)
            x = self.norm(x)
            x = torch.cat([x_, x], 1)
        return self.mlp(x)

class MDN(nn.Module):
    def __init__(self, shared, clf_nn, n_comp):
        super(MDN, self).__init__()
        self.shared = shared
        self.clf_nn = clf_nn
        self.n_comp = n_comp
        self.pi = nn.Linear(self.clf_nn.output_shape, self.n_comp) if self.n_comp > 1 else None

    def proba_model(self, x):
        model = self.clf_nn(x)
        model = self.pi(model)
        model = nn.Softmax(dim=-1)(model)
        return model

class OneParamMDN(MDN):
    def __init__(self, shared, clf_nn, n_comp, alpha_nn):
        super(OneParamMDN, self).__init__(shared, clf_nn, n_comp)
        self.alpha_nn = alpha_nn
        self.ai = nn.Linear(self.alpha_nn.output_shape, self.n_comp)

    def forward(self, x_cont, x_cat):
        x = self.shared(x_cont, x_cat)
        proba = self.proba_model(x) if self.n_comp > 1 else torch.ones((len(x),1)).to(device)
        alpha = self.alpha_model(x)
        return proba, alpha

    def alpha_model(self, x):
        model = self.alpha_nn(x)
        model = self.ai(model)
        return model

class TwoParamMDN(MDN):
    def __init__(self, shared, clf_nn, n_comp, alpha_nn, beta_nn):
        super(TwoParamMDN, self).__init__(shared, clf_nn, n_comp)
        self.alpha_nn = alpha_nn
        self.beta_nn = beta_nn
        self.ai = nn.Linear(self.alpha_nn.output_shape, self.n_comp)
        self.bi = nn.Linear(self.beta_nn.output_shape, self.n_comp)

    def forward(self, x_cont, x_cat):
        x = self.shared(x_cont, x_cat)
        proba = self.proba_model(x) if self.n_comp > 1 else torch.ones((len(x),1)).to(device)
        alpha = self.alpha_model(x)
        beta = self.beta_model(x)
        return proba, alpha, beta

    def alpha_model(self, x):
        model = self.alpha_nn(x)
        model = self.ai(model)
        return model

    def beta_model(self, x):
        model = self.beta_nn(x)
        model = self.bi(model)
        return model

class BaseParametric:
    def __init__(self, model, numerical_col, categorical_col, resume=None):
        self.model = model.to(device)
        self.losses = {'Epoch': [], 'Train': [], 'Test': [], 'BState': [], 'LState': [], 'LR': []}
        self.numerical_col, self.categorical_col = numerical_col, categorical_col
        self.times = None
        self.optim = resume

    def train_model(self, optim, train_loader, grad_clip, l2_reg):
          total_loss = 0
          self.model = self.model.train()
        #with autograd.detect_anomaly():
          for i, (X1, X2, Y) in enumerate(train_loader):
              X1, X2, Y = X1.to(device), X2.to(device), Y.to(device)
              #self.model.get_weight()
              self.optim.zero_grad()
              loss = self.loss_function(X1, X2, Y, l2_reg)
              loss.backward()
              torch.nn.utils.clip_grad_norm_(self.model.parameters(), grad_clip)
              self.optim.step()
              '''self.model.get_grad()
              print('_'*50)'''
              total_loss += loss.item()
          return total_loss/(i+1)


    def eval_model(self, test_loader):
        self.model = self.model.eval()
        total_loss = 0
        for i, (X1, X2, Y) in enumerate(test_loader):
            X1, X2, Y = X1.to(device), X2.to(device), Y.to(device)
            loss = self.loss_function(X1, X2, Y, l2_reg=0)
            total_loss += loss.item()
        return total_loss/(i+1)#np.abs(-100. - total_loss)


    def fit(self, X_train, Y_train, epoch, lr, opt_kwarg, batch_size=None,  grad_clip=100, momentum=0.9, X_test=None, Y_test=None, l2_reg=0, eval=True, verbose=True, save=True):

        batch_size = len(X_train) if batch_size is None else batch_size
        train_load = DataLoader(RegrDataLoader(X_train, Y_train, self.numerical_col, self.categorical_col), batch_size=batch_size, shuffle=False)  # DATALOADER obj
        if X_test is not None:
            test_load = DataLoader(RegrDataLoader(X_test, Y_test, self.numerical_col, self.categorical_col), batch_size=batch_size, shuffle=False)  # DATALOADER obj

        best_loss = 1e100
        #self.optim = opt.Adam(self.model.parameters(), lr=lr)
        self.optim = opt.SGD(self.model.parameters(), lr=lr, momentum=momentum, nesterov=True)

        #scheduler = None
        scheduler = opt.lr_scheduler.CyclicLR(self.optim, **opt_kwarg)
        #scheduler = opt.lr_scheduler.ReduceLROnPlateau(self.optim, **opt_kwarg)
        #scheduler = opt.lr_scheduler.MultiStepLR(self.optim, milestones=[28, 120], gamma=0.1)

        eval_score = ''
        for i in range(epoch):
            if verbose:
                print('##### EPOCH ' + str(i) + ' #####')

            train_loss = self.train_model(self.optim, train_load, grad_clip, l2_reg)
            self.losses['LState'] = deepcopy(self.model.state_dict())

            if verbose:
                print('train loss : ', train_loss)
            self.losses['Epoch'].append(i), self.losses['Train'].append(train_loss)

            if X_test is not None:
                valid_loss = self.eval_model(test_load)

                if verbose:
                    print('test loss : ', valid_loss)
                self.losses['Test'].append(valid_loss)

                if scheduler is not None:
                    '''scheduler.step(valid_loss)
                    self.losses['LR'].append(self.optim.param_groups[0]['lr'])'''
                    scheduler.step()
                    self.losses['LR'].append(scheduler.get_last_lr()[0])

                if valid_loss < best_loss:
                    self.losses['BState'] = deepcopy(self.model.state_dict())
                    best_loss = valid_loss
                    print('===========SAVE===========')




    def feature_importance(self, rep, X_test, Y_test, batch_size=None):
        res = np.zeros((rep, X_test.shape[1]))
        batch_size = len(X_train) if batch_size is None else batch_size
        test_load = DataLoader(RegrDataLoader(X_test, Y_test, self.numerical_col, self.categorical_col), batch_size=batch_size, shuffle=True)  # DATALOADER obj
        base_loss = self.eval_model(test_load)
        origin = X_test.copy()
        for i, col in enumerate(X_test.columns):
            for j in range(rep):
                X_test.loc[:,col] = np.random.permutation(X_test.loc[:, col])
                test_load = DataLoader(RegrDataLoader(X_test, Y_test, self.numerical_col, self.categorical_col), batch_size=batch_size, shuffle=True)  # DATALOADER obj
                loss = self.eval_model(test_load)
                res[j, i] = base_loss - loss
                X_test = origin
        res = np.abs(res)
        return {'importances': res, 'importances_mean': np.mean(res, 0), 'importances_std': np.std(res, 0)}


class PoissonModel(BaseParametric):
    def __init__(self, model, numerical_col, categorical_col,resume=None):
        super(PoissonModel, self).__init__(model, numerical_col, categorical_col,)

    def loss_function(self, X1, X2, Y, l2_reg):
        pi, rate = self.model(X1, X2)
        rate = nn.ELU()(rate) + 1 + 1e-15 #POSITIVE
        mix = Categorical(pi)
        comp = Poisson(rate, validate_args=None)
        mixture = MixtureSameFamily(mix, comp)
        loss = mixture.log_prob(Y)
        loss = -torch.mean(loss)
        return loss

    def prdict(self, X, batch_size):
        self.model.eval()
        outputs = {'pi': [], 'rate': [], 'pred': []}
        X1 = torch.tensor(X[self.numerical_col].values.astype(np.float32)).to(device)
        X2 = torch.tensor(X[self.categorical_col].values).to(device)
        data_load = DataLoader(TensorDataset(X1, X2), batch_size=batch_size)
        for i, (X1, X2) in enumerate(data_load):
            X1, X2 = X1.to(device), X2.to(device)
            pi, rate = self.model(X1, X2)
            rate = nn.ELU()(rate) + 1 + 1e-15 #POSITIVE
            mix = Categorical(pi)
            comp = Poisson(rate, validate_args=None)
            mixture = MixtureSameFamily(mix, comp)
            pi, rate, pred = pi.cpu().data.numpy(), rate.cpu().data.numpy(), mixture.mean.cpu().data.numpy()
            outputs['pi'].extend(pi), outputs['rate'].extend(rate), outputs['pred'].extend(pred)
        return outputs

class NegativeBinomialModel(BaseParametric):
    def __init__(self, model, numerical_col, categorical_col,resume=None):
        super(NegativeBinomialModel, self).__init__(model, numerical_col, categorical_col,)

    def loss_function(self, X1, X2, Y, l2_reg):
        pi, count, probs = self.model(X1, X2)
        count = nn.ELU()(count) + 1 + 1e-15 #POSITIVE
        probs = nn.Sigmoid()(torch.tanh(probs))#[0, 1)
        mix = Categorical(pi)
        comp = NegativeBinomial(count, probs, validate_args=None)
        mixture = MixtureSameFamily(mix, comp)
        loss = mixture.log_prob(Y)
        loss = -torch.mean(loss)
        return loss

    def prdict(self, X, batch_size):
        self.model.eval()
        outputs = {'pi': [], 'count': [], 'probs': [], 'pred': []}
        X1 = torch.tensor(X[self.numerical_col].values.astype(np.float32)).to(device)
        X2 = torch.tensor(X[self.categorical_col].values).to(device)
        data_load = DataLoader(TensorDataset(X1, X2), batch_size=batch_size)
        for i, (X1, X2) in enumerate(data_load):
            X1, X2 = X1.to(device), X2.to(device)
            pi, count, probs = self.model(X1, X2)
            count = nn.ELU()(count) + 1 + 1e-15 #POSITIVE
            probs = nn.Sigmoid()(torch.tanh(probs))#[0, 1)
            mix = Categorical(pi)
            comp = NegativeBinomial(count, probs, validate_args=None)
            mixture = MixtureSameFamily(mix, comp)
            pi, count, probs, pred = pi.cpu().data.numpy(), count.cpu().data.numpy(), probs.cpu().data.numpy(), mixture.mean.cpu().data.numpy()
            outputs['pi'].extend(pi), outputs['count'].extend(count), outputs['probs'].extend(probs), outputs['pred'].extend(pred)
        return outputs

class NormalModel(BaseParametric):
    def __init__(self, model, numerical_col, categorical_col,resume=None):
        super(NormalModel, self).__init__(model, numerical_col, categorical_col,)

    def loss_function(self, X1, X2, Y, l2_reg):
        pi, mu, std = self.model(X1, X2)
        std = nn.ELU()(std) + 1 + 1e-15 #POSITIVE
        mix = Categorical(pi)
        comp = Normal(mu, std, validate_args=None)
        mixture = MixtureSameFamily(mix, comp)
        loss = mixture.log_prob(Y)
        loss = -torch.mean(loss)
        return loss

    def prdict(self, X, batch_size):
        self.model.eval()
        outputs = {'pi': [], 'mu': [], 'std': [], 'pred': []}
        X1 = torch.tensor(X[self.numerical_col].values.astype(np.float32)).to(device)
        X2 = torch.tensor(X[self.categorical_col].values).to(device)
        data_load = DataLoader(TensorDataset(X1, X2), batch_size=batch_size)
        for i, (X1, X2) in enumerate(data_load):
            X1, X2 = X1.to(device), X2.to(device)
            pi, mu, std = self.model(X1, X2)
            std = nn.ELU()(std) + 1 + 1e-15 #POSITIVE
            mix = Categorical(pi)
            comp = Normal(mu, std, validate_args=None)
            mixture = MixtureSameFamily(mix, comp)
            pi, mu, std, pred = pi.cpu().data.numpy(), mu.cpu().data.numpy(), std.cpu().data.numpy(), mixture.mean.cpu().data.numpy()
            outputs['pi'].extend(pi), outputs['mu'].extend(mu), outputs['std'].extend(std), outputs['pred'].extend(pred)
        return outputs

class LogNormalModel(BaseParametric):
    def __init__(self, model, numerical_col, categorical_col,resume=None):
        super(LogNormalModel, self).__init__(model, numerical_col, categorical_col,)

    def loss_function(self, X1, X2, Y, l2_reg):
        pi, mu, std = self.model(X1, X2)
        std = nn.ELU()(std) + 1 + 1e-15 #LOGNORMAL
        mix = Categorical(pi)
        comp = LogNormal(mu, std, validate_args=None)
        mixture = MixtureSameFamily(mix, comp)
        loss = mixture.log_prob(Y)
        loss = -torch.mean(loss)
        return loss

    def prdict(self, X, batch_size):
        self.model.eval()
        outputs = {'pi': [], 'mu': [], 'std': [], 'pred': []}
        X1 = torch.tensor(X[self.numerical_col].values.astype(np.float32)).to(device)
        X2 = torch.tensor(X[self.categorical_col].values).to(device)
        data_load = DataLoader(TensorDataset(X1, X2), batch_size=batch_size)
        for i, (X1, X2) in enumerate(data_load):
            X1, X2 = X1.to(device), X2.to(device)
            pi, mu, std = self.model(X1, X2)
            std = nn.ELU()(std) + 1 + 1e-15 #LOGNORMAL
            mix = Categorical(pi)
            comp = LogNormal(mu, std, validate_args=None)
            mixture = MixtureSameFamily(mix, comp)
            pi, mu, std, pred = pi.cpu().data.numpy(), mu.cpu().data.numpy(), std.cpu().data.numpy(), mixture.mean.cpu().data.numpy()
            outputs['pi'].extend(pi), outputs['mu'].extend(mu), outputs['std'].extend(std), outputs['pred'].extend(pred)
        return outputs

    def mixture_quantile(self, pi, mu, std, q, maxiter=50):
        pi, mu, std, q = torch.from_numpy(pi), torch.from_numpy(mu), torch.from_numpy(std), torch.tensor(q)
        n, solutions, evaluations = pi.shape[0], [], []
        comp = LogNormal(mu, std, validate_args=None)
        icdf = comp.icdf(q) #SHAPE = [n, n_comp]
        lb, ub = torch.min(icdf, dim=1)[0].numpy(), torch.max(icdf, dim=1)[0].numpy()
        x0 = (ub+lb)/2
        for i in range(n):
            mix = Categorical(pi[i])
            comp = LogNormal(mu[i], std[i], validate_args=None)
            pdf = MixtureSameFamily(mix, comp)
            def objf(x):
                x = torch.from_numpy(x)
                return torch.abs(pdf.cdf(x) - q).data.numpy()
            result = dual_annealing(objf, list(zip([lb[i]], [ub[i]])), maxiter=maxiter)
            xopt, fopt = result['x'], result['fun']
            solutions.append(xopt), evaluations.append(fopt)
        return np.asarray(solutions).ravel(), np.asarray(evaluations)


def gradient_clipper(model: nn.Module, val: float) -> nn.Module:
    def process_grad(grad):
        grad[grad != grad] = 1e-10
        return torch.clamp(grad, -val, val)
    for parameter in model.parameters():
        parameter.register_hook(lambda grad: process_grad(grad))

    return model

In [None]:
n_comp, epoch, lr, batch_size, d, mlp_d = 3, 50000, 1e-5, 1024, 0.000001, 0.1
cyclic_kwarg = {'base_lr': lr, 'max_lr': 1e-2, 'step_size_up':250, 'step_size_down':250}
plateau_kwarg = {'factor':0.5, 'patience':200, 'verbose':True, 'min_lr':1e-7, 'mode':'min'}
embed_size = np.ceil(np.sqrt(list(X_train[categorical].nunique()))).astype(int)

shared_nn = TabMLP(df[categorical].nunique(), embed_size, len(numerical+periodic), [128, 512, 768, 256], mlp_dropout=mlp_d)
clf_nn = NNModel(shared_nn.output_shape , units=[64], factors=None, dropout=[mlp_d])
alpha_nn = NNModel(shared_nn.output_shape, units=[64], factors=None, dropout=[mlp_d])
beta_nn = NNModel(shared_nn.output_shape, units=[64], factors=None, dropout=[mlp_d])#, dropout=[d,d,d,d]

nn_model = gradient_clipper(TwoParamMDN(shared_nn, clf_nn, n_comp, alpha_nn, beta_nn), 10)
#nn_model = gradient_clipper(OneParamMDN(shared_nn, clf_nn, n_comp, alpha_nn), 10)
#nn_model.load_state_dict(best_state)
print(nn_model)
print(sum(p.numel() for p in nn_model.parameters() if p.requires_grad))
mdn = LogNormalModel(nn_model, numerical+periodic, categorical)
mdn.fit(X_train, Y_train, epoch, lr, cyclic_kwarg, batch_size=batch_size, grad_clip=10, momentum=0.9, X_test=X_valid, Y_test=Y_valid, l2_reg=0, eval=False, verbose=True)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
test loss :  6.230269432067871
##### EPOCH 909 #####
train loss :  5.700837898254394
test loss :  6.185525417327881
##### EPOCH 910 #####
train loss :  5.660970687866211
test loss :  6.173270225524902
##### EPOCH 911 #####
train loss :  5.759149265289307
test loss :  6.308735370635986
##### EPOCH 912 #####
train loss :  5.747994422912598
test loss :  6.183632850646973
##### EPOCH 913 #####
train loss :  5.538286399841309
test loss :  6.344192028045654
##### EPOCH 914 #####
train loss :  5.661609649658203
test loss :  6.312393665313721
##### EPOCH 915 #####
train loss :  5.6325146675109865
test loss :  6.218766689300537
##### EPOCH 916 #####
train loss :  5.639958667755127
test loss :  6.253206729888916
##### EPOCH 917 #####
train loss :  5.585346317291259
test loss :  6.177027225494385
##### EPOCH 918 #####
train loss :  5.708072090148926
test loss :  6.370718002319336
##### EPOCH 919 #####
train loss :  5.690060806274414

KeyboardInterrupt: ignored

In [None]:
best_state = deepcopy(mdn.losses['BState'])
mdn.model.load_state_dict(best_state), np.min(mdn.losses['Test'])
print(np.min(mdn.losses['Test']))
fig = make_subplots(rows=2, cols=1)
s = 0
fig.append_trace(go.Scatter(x=mdn.losses['Epoch'][s:], y=mdn.losses['Train'][s:],mode='lines',name='Train'), row=1, col=1)
fig.append_trace(go.Scatter(x=mdn.losses['Epoch'][s:], y=mdn.losses['Test'][s:],mode='lines',name='Test'), row=1, col=1)
fig.append_trace(go.Scatter(x=mdn.losses['Epoch'][s:], y=mdn.losses['LR'][s:],mode='lines',name='LR'), row=2, col=1)
fig.update_layout(height=1000, width=1500, title_text="Stacked Subplots")
fig.show()

6.109090328216553


In [None]:
outputs = mdn.prdict(X_test, len(X_test))
pi, rate, mdn_mu = np.asarray(outputs['pi']), np.asarray(outputs['rate']), np.asarray(outputs['pred'])
#nn_output = pd.DataFrame(torch.cat((pi, mu, std), 1).data.cpu().numpy())
'''mdn_lb, evaluations_lb = mdn.mixture_quantile(pi, mu, std, 0.025)
mdn_ub, evaluations_ub = mdn.mixture_quantile(pi, mu, std, 0.975)'''
'''mdn_lb = y_scaler.inverse_transform(mdn_lb).ravel()
mdn_ub = y_scaler.inverse_transform(mdn_ub).ravel()'''
pred = pd.DataFrame({"mu": mdn_mu, "lb": mdn_mu, "ub": mdn_mu})
pred = pd.concat((test.reset_index(drop=True), pred), 1)
pred

KeyError: ignored

In [None]:
import ray

ray.init(ignore_reinit_error=True)

def mixture_quantile_1(pi, mu, std, q):
  comp = LogNormal(mu, std, validate_args=None)
  icdf = comp.icdf(q) #SHAPE = [n, n_comp]
  lb, ub = torch.min(icdf, dim=1)[0].numpy(), torch.max(icdf, dim=1)[0].numpy()
  x0 = (ub+lb)/2
  return lb, ub, x0

@ray.remote
def objf(p, m, s, lb, ub, q, maxiter):
    n, solutions, evaluations = pi.shape[0], [], []
    mix = Categorical(p)
    comp = LogNormal(m, s, validate_args=None)
    pdf = MixtureSameFamily(mix, comp)
    def _objf_(x):
      x = torch.from_numpy(x)
      return torch.abs(pdf.cdf(x) - q).data.numpy()
    result = dual_annealing(_objf_, list(zip([lb], [ub])), maxiter=maxiter)
    return result

outputs = mdn.prdict(X_test, len(X_test))
pi, mu, std, mdn_mu = np.asarray(outputs['pi']), np.asarray(outputs['mu']), np.asarray(outputs['std']), np.asarray(outputs['pred'])
pi, mu, std, q1, q2 = torch.from_numpy(pi), torch.from_numpy(mu), torch.from_numpy(std), torch.tensor(0.025), torch.tensor(0.975)

lb, ub, x0 = mixture_quantile_1(pi, mu, std, q1)
futures = [objf.remote(pi[i], mu[i], std[i], lb[i], ub[i], q1, 50) for i in range(len(Y_test))]
res = ray.get(futures) #duration =  25.94241976737976
mdn_lb, evaluations_lb = np.asarray([d['x'] for d in res]).ravel(), [d['fun'] for d in res]

lb, ub, x0 = mixture_quantile_1(pi, mu, std, q2)
futures = [objf.remote(pi[i], mu[i], std[i], lb[i], ub[i], q2, 50) for i in range(len(Y_test))]
res = ray.get(futures) #duration =  25.94241976737976
mdn_ub, evaluations_ub = np.asarray([d['x'] for d in res]).ravel(), [d['fun'] for d in res]

pred = pd.DataFrame({"mu": mdn_mu, "lb": mdn_lb, "ub": mdn_ub})
pred = pd.concat((test.reset_index(drop=True), pred), 1)
pred

2023-07-15 13:46:52,541	INFO worker.py:1474 -- Calling ray.init() again after it has already been called.

In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only.



Unnamed: 0,Store,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,Promo2,Competition,sin_dayofweek,cos_dayofweek,sin_dayofyear,cos_dayofyear,mu,lb,ub
0,45,2015-07-31,6301,442,1,1,0,1,d,a,9710.0,0,1.0,-8.660254e-01,-0.5,-0.486273,-0.873807,6418.011230,5279.841866,7728.360845
1,45,2015-07-30,6063,445,1,1,0,1,d,a,9710.0,0,1.0,1.224647e-16,-1.0,-0.471160,-0.882048,6118.711426,4993.689223,7421.252251
2,45,2015-07-29,5341,387,1,1,0,1,d,a,9710.0,0,1.0,8.660254e-01,-0.5,-0.455907,-0.890028,5960.903809,4817.520192,7293.914965
3,45,2015-07-28,5504,365,1,1,0,1,d,a,9710.0,0,1.0,8.660254e-01,0.5,-0.440519,-0.897743,6213.591797,5028.958934,7593.709448
4,45,2015-07-27,7450,500,1,1,0,1,d,a,9710.0,0,1.0,0.000000e+00,1.0,-0.425000,-0.905193,7590.643555,6254.686776,9127.846236
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,1045,2015-06-18,8701,1068,1,1,0,0,a,c,26990.0,0,1.0,1.224647e-16,-1.0,0.230306,-0.973118,9938.613281,7555.671817,12836.271349
236,1045,2015-06-17,8305,973,1,1,0,0,a,c,26990.0,0,1.0,8.660254e-01,-0.5,0.247022,-0.969010,9490.137695,7199.156485,12280.073493
237,1045,2015-06-16,9158,1094,1,1,0,0,a,c,26990.0,0,1.0,8.660254e-01,0.5,0.263665,-0.964614,10347.331055,7853.185617,13383.601472
238,1045,2015-06-15,11109,1224,1,1,0,0,a,c,26990.0,0,1.0,0.000000e+00,1.0,0.280231,-0.959933,11572.178711,8384.479245,15539.018232


In [None]:
def mase(train_y, test_y, pred):
    n = train_y.shape[0]
    d = np.abs(np.diff(train_y)).sum()/(n-1)
    errors = np.abs(test_y - pred)
    return errors.mean()/d

def mape(test_y, pred):
    return np.round(np.mean(np.abs(100*(test_y-pred)/(test_y + 1e-9))), 0)

def rmspe(test_y, pred):
    return (np.sqrt(np.mean(np.square((test_y - pred) / (test_y + 1e-9))))) * 100

def persistence(train_y, test_y):
    predictions, history = [], list(np.copy(train_y))
    for i in test_y:
        predictions.append(history[-1])
        history.append(i)
    return np.asarray(predictions)

def mda(actual, predicted):
    """ Mean Directional Accuracy """
    return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))

def wape(true, pred):
    return np.sum(np.abs(true - pred))/np.sum(true)

naive = persistence(Y_train, Y_test)
errors = {'MAE':[mean_absolute_error(Y_test, pred['mu'].values)],
        'RMSE':[mean_squared_error(Y_test, pred['mu'].values)],
        'RMSPE': [rmspe(Y_test, pred['mu'].values)],
        'MAPE':[mean_absolute_percentage_error(Y_test, pred['mu'].values)],
        'R2': [r2_score(Y_test, pred['mu'].values)],
        'MASE':[mase(Y_train, Y_test, pred['mu'].values)],
        'MDA': [mda(Y_test, pred['mu'].values)],
        'WAPE':[wape(Y_test, pred['mu'].values)]}
errors = pd.DataFrame(errors, index =['THIS', 'NAIVE'])
errors

Unnamed: 0,MAE,RMSE,RMSPE,MAPE,R2,MASE,MDA,WAPE
THIS,598.453195,874403.208616,28.557361,0.173312,0.90429,0.235319,0.849372,0.099173
NAIVE,598.453195,874403.208616,28.557361,0.173312,0.90429,0.235319,0.849372,0.099173


In [None]:
sub_pred = pred.loc[pred['Store']==337]
sub_ytest, sub_ttest = sub_pred[out_column].values, sub_pred[time_column].values
sub_train = train.loc[train['Store']==337]
sub_ytrain, sub_ttrain = sub_train[out_column].values, sub_train[time_column].values
fig = plot_gp(sub_pred['mu'].values, sub_pred['lb'].values, sub_pred['ub'].values, sub_ttest, sub_ytest, sub_ttrain, sub_ytrain, samples=[], layout='h',
              name='MAE: '+str(mean_absolute_error(sub_ytest, sub_pred['mu'].values))+
              'MAPE: '+str(mean_absolute_percentage_error(sub_ytest, sub_pred['mu'].values))+
              'R2: '+str(r2_score(sub_ytest, sub_pred['mu'].values))+
              'MDA: '+str(mda(sub_ytest, sub_pred['mu'].values))+
              'WAPE: '+str(wape(sub_ytest, sub_pred['mu'].values)))

fig.show()

In [None]:
eval_d = {'MAE': [349.2379377968677,460.5344468241799,762.6511459641752,321.63778935573845,1098.2046508789062,],
 'MAPE': [0.16175636970597324,0.19263966825056897,0.19676547974799782,0.16138884608417464,0.1540076293835695,],
 'R2': [0.9539694213024554,0.9410898335463883,0.8646351400322383,0.9443069421928927,0.7037609571651804,],
 'MDA': [0.8723404255319149,0.8085106382978723,0.8085106382978723,0.8723404255319149,0.8723404255319149,],
 'WAPE': [0.07476127217294001,0.08657984829904565,0.1146269257461185,0.06885967194642133,0.12398672312154686,]}

eval_d = pd.DataFrame(eval_d, index=['Store 45', 'Store 175', 'Store 224', 'Store 337', 'Store 1045'])
eval_d

Unnamed: 0,MAE,MAPE,R2,MDA,WAPE
Store 45,349.237938,0.161756,0.953969,0.87234,0.074761
Store 175,460.534447,0.19264,0.94109,0.808511,0.08658
Store 224,762.651146,0.196765,0.864635,0.808511,0.114627
Store 337,321.637789,0.161389,0.944307,0.87234,0.06886
Store 1045,1098.204651,0.154008,0.703761,0.87234,0.123987


In [None]:
imp = mdn.feature_importance(10, X_test, Y_test, batch_size=4096)
fig = go.Figure()
for i in range(X_test.shape[1]):
    fig.add_trace(go.Box(x=imp['importances'][:, i], name=X_test.columns[i]))
fig.show()