In [None]:
!pip install einops

Collecting einops
  Downloading einops-0.4.1-py3-none-any.whl (28 kB)
Installing collected packages: einops
Successfully installed einops-0.4.1


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as opt
from torch.distributions.normal import Normal
from torch.distributions.log_normal import LogNormal
from torch.distributions.categorical import Categorical
from torch.distributions.mixture_same_family import MixtureSameFamily
from torch.distributions import constraints
from torch.distributions.exp_family import ExponentialFamily
from torch.distributions.utils import _standard_normal, broadcast_all
import math
from numbers import Real
from numbers import Number
from einops import rearrange
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, pairwise, r2_score
from statsmodels.tsa.seasonal import seasonal_decompose
from copy import copy, deepcopy
import plotly
from plotly import tools
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from copy import copy, deepcopy
import dask
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
torch.cuda.set_device(0)
torch.backends.cudnn.benchmark = True

  import pandas.util.testing as tm


Mounted at /content/drive


In [None]:
def plot_gp(mu, lb, ub, test_x, test_y, train_x=None, train_y=None, name='', samples=[], layout='v', 
            xaxis_title='Time', yaxis_title='', fig_size=[1000,500]):
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Samples"))
    if train_x is not None:
        fig.add_trace(go.Scatter(x=train_x, y=train_y, mode='lines', name='History', marker={'size':10})
                      , row=1, col=1) #plot training data
    
    fig.add_trace(
        go.Scatter(x=test_x, y=ub, fill=None, mode='lines', line_color='rgba(171,141,52,0.3)',
                  fillcolor='rgba(171,141,52,0.3)', showlegend=True, name='96% uncertainty interval'), row=1, col=1)
    fig.add_trace(
        go.Scatter(x=test_x, y=lb, fill='tonexty',mode='lines', line_color='rgba(171,141,52,0.3)',
                  fillcolor='rgba(171,141,52,0.3)', showlegend=True, name='96% uncertainty interval'), row=1, col=1)
    
    fig.add_trace(go.Scatter(x=test_x, y=mu, line_color='rgba(171,141,52,1)', mode='lines', name='Prediction'), row=1, col=1) #plot the mean
    fig.add_trace(go.Scatter(x=test_x, y=test_y, line_color='rgb(29, 181, 22)',mode='lines', name='Observed'), row=1, col=1)
    for i, s in enumerate(samples):
        fig.add_trace(go.Scatter(x=test_x, y=s, name='sample '+str(i), mode='lines'), row=1, col=1) #plot samples
    fig.update_layout(title_text=name,paper_bgcolor='rgba(23,23,23,255)', plot_bgcolor='rgba(23,23,23,255)',
                      xaxis_title=xaxis_title, yaxis_title=yaxis_title,
                      autosize=False, width=fig_size[0], height=fig_size[1], font=dict(family="Raleway", color="#ffffff"))
    fig.update_xaxes(showgrid=False, showline=True, linewidth=2, linecolor='rgba(23,23,23,255)')
    fig.update_yaxes(showgrid=False, showline=True, linewidth=2, linecolor='rgba(23,23,23,255)')
    return fig

def confidence_interval(transform, mu, cov, scaler):
    uncertainty = 1.96 * np.sqrt(np.diag(cov)) #compute std
    if transform == 'exp':
        mu, uncertainty = np.exp(mu), np.exp(uncertainty)
        return mu, mu - uncertainty, mu + uncertainty
    elif transform == 'linear':
        lb, ub = mu-uncertainty, mu+uncertainty
        return scaler.inverse_transform(mu[:,np.newaxis]).ravel(), scaler.inverse_transform(lb[:,np.newaxis]).ravel(), scaler.inverse_transform(ub[:,np.newaxis]).ravel()
    else:
        return mu, mu-uncertainty, mu+uncertainty

def add_time_delta(df, lag=1):
    index, gen = 0, df.iterrows()
    new_df = df.copy()
    new_df['delta_t'] = 0
    while index+lag+1 < len(df):
        index, row = gen.__next__()
        new_df.loc[index + lag, 'delta_t'] = (new_df.loc[index + lag, 'Date'] - df.loc[0, 'Date']).days
    return new_df

def plot_cov(covs, cols, subplot_titles, labels=None):
    fig = make_subplots(rows=int(len(covs)/cols) + 1, cols=cols, subplot_titles=subplot_titles)
    height = (1000/cols)*2
    for i, cov in enumerate(covs):
        row, col = int(i / cols)+1, (i%cols)+1
        fig.add_trace(go.Heatmap(z=cov, x=labels, y=labels, colorscale='Greys'), row=row, col=col)
    fig.update_layout(title_text='Cov matrix', height=height)#, yaxis1=dict(domain=[0, 1]), yaxis1=dict(domain=[0, 1])
    return fig

def plot_ts_decomposition(df, index, obs, model="additive", freq=None, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = seasonal_decompose(decompose, model=model, freq=freq)
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.iloc[:,0], mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.iloc[:,0], mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.iloc[:,0], mode='lines', name='residual')) #plot the residual
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.iloc[:,0], seasonal.iloc[:,0], residual.iloc[:,0]

In [None]:
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/time series/rossmann/rossmann.csv')
#df = df.loc[(df['Store']<400) | (df['Store']==1045)]
df = df.loc[(df['Store'].isin([45,175,1045, 224, 337]))]
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
df = df.drop(columns=[col for col in df.columns if len(df[col].unique()) == 1])
df

Unnamed: 0,Store,Date,Sales,Open,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,Promo2,PromoInterval,Competition
40344,45,2013-01-01,0,0,0,a,1,d,a,9710.0,0,No promo2,0.0
40345,45,2013-01-02,4270,1,0,0,1,d,a,9710.0,0,No promo2,0.0
40346,45,2013-01-03,4368,1,0,0,1,d,a,9710.0,0,No promo2,0.0
40347,45,2013-01-04,4988,1,0,0,1,d,a,9710.0,0,No promo2,0.0
40348,45,2013-01-05,5566,1,0,0,0,d,a,9710.0,0,No promo2,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
953288,1045,2015-07-27,14919,1,1,0,1,a,c,26990.0,0,No promo2,1.0
953289,1045,2015-07-28,10901,1,1,0,1,a,c,26990.0,0,No promo2,1.0
953290,1045,2015-07-29,11185,1,1,0,1,a,c,26990.0,0,No promo2,1.0
953291,1045,2015-07-30,12042,1,1,0,1,a,c,26990.0,0,No promo2,1.0


In [None]:
df['dayofweek'] = df['Date'].dt.dayofweek 
df['sin_dayofweek'] = np.sin(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
df['cos_dayofweek'] = np.cos(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
df.drop(columns=['dayofweek'], inplace=True)

df['dayofyear'] = df['Date'].dt.dayofyear
df['sin_dayofyear'] = np.sin(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
df['cos_dayofyear'] = np.cos(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
df.drop(columns=['dayofyear'], inplace=True)
df

Unnamed: 0,Store,Date,Sales,Open,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,Promo2,PromoInterval,Competition,sin_dayofweek,cos_dayofweek,sin_dayofyear,cos_dayofyear
40344,45,2013-01-01,0,0,0,a,1,d,a,9710.0,0,No promo2,0.0,8.660254e-01,0.5,0.017213,0.999852
40345,45,2013-01-02,4270,1,0,0,1,d,a,9710.0,0,No promo2,0.0,8.660254e-01,-0.5,0.034422,0.999407
40346,45,2013-01-03,4368,1,0,0,1,d,a,9710.0,0,No promo2,0.0,1.224647e-16,-1.0,0.051620,0.998667
40347,45,2013-01-04,4988,1,0,0,1,d,a,9710.0,0,No promo2,0.0,-8.660254e-01,-0.5,0.068802,0.997630
40348,45,2013-01-05,5566,1,0,0,0,d,a,9710.0,0,No promo2,0.0,-8.660254e-01,0.5,0.085965,0.996298
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
953288,1045,2015-07-27,14919,1,1,0,1,a,c,26990.0,0,No promo2,1.0,0.000000e+00,1.0,-0.425000,-0.905193
953289,1045,2015-07-28,10901,1,1,0,1,a,c,26990.0,0,No promo2,1.0,8.660254e-01,0.5,-0.440519,-0.897743
953290,1045,2015-07-29,11185,1,1,0,1,a,c,26990.0,0,No promo2,1.0,8.660254e-01,-0.5,-0.455907,-0.890028
953291,1045,2015-07-30,12042,1,1,0,1,a,c,26990.0,0,No promo2,1.0,1.224647e-16,-1.0,-0.471160,-0.882048


In [None]:
sub = df.loc[df['Store'] == 1045]
fig, trend, seasonal, residual = plot_ts_decomposition(sub, 'Date', 'Sales', freq=365)
fig.show()
print(np.mean(trend), np.var(trend), np.std(trend))
print(np.mean(seasonal), np.var(seasonal), np.std(seasonal))
print(np.mean(residual), np.var(residual), np.std(residual))

7086.742541593593 4110.71711676925 64.11487438004734
-69.72263072026297 7425585.390762507 2724.9927322403096
-11.414058922874668 2700782.914377841 1643.4058885065008


In [None]:
numerical = ['CompetitionDistance']
#categorical = ['Store', 'StateHoliday', 'StoreType', 'Assortment', 'PromoInterval']
categorical = ['Store', 'StateHoliday', 'StoreType', 'Assortment', 'PromoInterval', 'Open', 'Promo', 'SchoolHoliday', 'Promo2', 'Competition']
output_col = 'Sales'
cat_enc_d = {}

for cat in categorical:
  LE = LabelEncoder()
  df[cat] = LE.fit_transform(df[cat])
  cat_enc_d[cat] = LE

train = df[df['Date'] < '2015-03-11']
valid = df[(df['Date'] >= '2015-03-11') & (df['Date'] < '2015-06-14')]
test = df[df['Date']>='2015-06-14']

scaler = MinMaxScaler(feature_range=(-1, 1))
scaled_train = scaler.fit_transform(train[numerical])
scaled_valid = scaler.transform(valid[numerical])
scaled_test = scaler.transform(test[numerical])
train[numerical], valid[numerical], test[numerical] = scaled_train, scaled_valid, scaled_test
numerical += ['sin_dayofweek', 'cos_dayofweek', 'sin_dayofyear', 'cos_dayofyear']

y_scaler = MinMaxScaler(feature_range=(0, 1))
train_y, test_y = y_scaler.fit_transform(train[[output_col]]).ravel() + 1e-5, test[output_col].values + 1e-5
valid_y = y_scaler.transform(valid[[output_col]]).ravel() + 1e-5

'''train_y, test_y = train[output_col].values + 1e-10, test[output_col].values + 1e-10
valid_y = valid[output_col].values + 1e-10'''
train_idx, valid_idx, test_idx = train['Date'], valid['Date'], test['Date']
train_x, valid_x, test_x = train.drop(columns=['Date',output_col]), valid.drop(columns=['Date',output_col]), test.drop(columns=['Date',output_col])


train_x.shape, train_y.shape, valid_x.shape, valid_y.shape, test_x.shape, test_y.shape



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



((3995, 15), (3995,), (475, 15), (475,), (240, 15), (240,))

In [None]:
fig = make_subplots(rows=1, cols=1)
sub = train.loc[train['Store'] == 0]
fig.append_trace(go.Scatter(x=sub['Date'].tolist(), y=sub['sin_dayofweek'],mode='lines',name='Train'), row=1, col=1)
fig.append_trace(go.Scatter(x=sub['Date'].tolist(), y=sub['cos_dayofweek'],mode='lines',name='Train'), row=1, col=1)
fig.update_layout(height=1000, width=1500, title_text="Stacked Subplots")
fig.show()

In [None]:
test_idx

41237    2015-06-13
41238    2015-06-14
41239    2015-06-15
41240    2015-06-16
41241    2015-06-17
            ...    
953288   2015-07-27
953289   2015-07-28
953290   2015-07-29
953291   2015-07-30
953292   2015-07-31
Name: Date, Length: 245, dtype: datetime64[ns]

In [None]:
def gradient_clipper(model: nn.Module, val: float) -> nn.Module:
    def process_grad(grad):
        grad[grad != grad] = 1e-10
        return torch.clamp(grad, -val, val)
    for parameter in model.parameters():
        parameter.register_hook(lambda grad: process_grad(grad))
    
    return model

class RossmannDataLoader(Dataset):
    def __init__(self, X, Y, numerical_col, categorical_col):
        self.X1, self.X2, self.Y = X[numerical_col].values.astype(np.float32), X[categorical_col].values, Y.astype(np.float32)
 
    def __len__(self):
        return len(self.Y)
 
    def __getitem__(self, idx):
        return self.X1[idx], self.X2[idx], self.Y[idx] #numerical, categorical, label

class Embedder(nn.Module):
    def __init__(self, vocab_size, dim):
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, dim)

    def forward(self, x):
        return self.embeddings(x)

class NNModel(nn.Module):
    def __init__(self, input_shape, units=None, factors=None, activ=True, norm=True, dropout=False, slops=None):
        super().__init__()
        self.input_shape = input_shape
        self.units = units
        self.factors = factors
        self.activ, self.norm = activ, norm
        self.network = nn.ModuleList()
        if self.factors:
            self.units = np.round(self.input_shape * np.asarray(self.factors)).astype(int)
        if self.units is not None:
            self.dropout = np.zeros_like(self.units) if not dropout else dropout
            self.slops = np.full(len(self.units), 1) if slops is None else slops
            for i, j, k in zip(self.units, self.dropout, self.slops):
                if i >= 1:
                    block = self.__build_block__(input_shape, i, p=j, slop=k)
                    self.network.extend(block)
                    input_shape = i
        self.output_shape = input_shape
        self.reset_parameters()
    
    def __build_block__(self, input_shape, units, p, slop):
        block = []
        block.append(nn.Linear(input_shape, units, bias=not self.norm))
        if self.norm:
            #block.append(nn.BatchNorm1d(units))
            block.append(nn.LayerNorm(units, eps=1e-5))
        if self.activ:
            #block.append(nn.ELU(slop))
            block.append(nn.GELU())
        if p > 0:
            block.append(nn.Dropout(p))
        return block
 
    def forward(self, x):
        for layer in self.network:
          tmp = layer(x)
          x = tmp
        return x
 
    def reset_parameters(self):
        for layer in self.network:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_normal_(layer.weight)
                #layer.bias.data.fill_(0.1)

class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model))
        self.pe = torch.zeros(1, max_len, d_model)
        self.pe[0,:, 0::2] = torch.sin(position * div_term)
        self.pe[0,:, 1::2] = torch.cos(position * div_term)
    
    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return x

class MultiHeadSelfAttention(nn.Module):
    def __init__(self, dim, heads=8, dim_head=None):
        """
        Implementation of multi-head attention layer of the original transformer model.
        einsum and einops.rearrange is used whenever possible
        Args:
            dim: token's dimension, i.e. word embedding vector size
            heads: the number of distinct representations to learn
            dim_head: the dim of the head. In general dim_head<dim.
            However, it may not necessary be (dim/heads)
        """
        super().__init__()
        self.dim_head = (int(dim / heads)) if dim_head is None else dim_head
        _dim = self.dim_head * heads
        self.heads = heads
        self.to_qvk = nn.Linear(dim, _dim * 3, bias=False)
        self.W_0 = nn.Linear( _dim, dim, bias=False)
        self.scale_factor = self.dim_head ** -0.5
    def forward(self, x, mask=None):
        assert x.dim() == 3
        # Step 1
        qkv = self.to_qvk(x)  # [batch, tokens, dim*3*heads ]
        # Step 2
        # decomposition to q,v,k and cast to tuple
        # the resulted shape before casting to tuple will be:
        # [3, batch, heads, tokens, dim_head]
        q, k, v = tuple(rearrange(qkv, 'b t (d k h) -> k b h t d ', k=3, h=self.heads))
        # Step 3
        # resulted shape will be: [batch, heads, tokens, tokens]
        scaled_dot_prod = torch.einsum('b h i d , b h j d -> b h i j', q, k) * self.scale_factor
        if mask is not None:
            assert mask.shape == scaled_dot_prod.shape[2:]
            scaled_dot_prod = scaled_dot_prod.masked_fill(mask, -np.inf)
        attention = torch.softmax(scaled_dot_prod, dim=-1)
        # Step 4. Calc result per batch and per head h
        out = torch.einsum('b h i j , b h j d -> b h i d', attention, v)
        # Step 5. Re-compose: merge heads with dim_head d
        out = rearrange(out, "b h t d -> b t (h d)")
        # Step 6. Apply final linear transformation layer
        return self.W_0(out)

class TransformerBlock(nn.Module):
   """
   Vanilla transformer block from the original paper "Attention is all you need"
   Detailed analysis: https://theaisummer.com/transformer/
   """
   def __init__(self, dim, heads=8, dim_head=None, dim_linear_block=1024, dropout=0.1):
       """
       Args:
           dim: token's vector length
           heads: number of heads
           dim_head: if none dim/heads is used
           dim_linear_block: the inner projection dim
           dropout: probability of droppping values
       """
       super().__init__()
       self.mhsa = MultiHeadSelfAttention(dim=dim, heads=heads, dim_head=dim_head)
       self.drop = nn.Dropout(dropout)
       self.norm_1 = nn.LayerNorm(dim)
       self.norm_2 = nn.LayerNorm(dim)
       #self.linear = NNModel(dim, units=[dim_linear_block, dim], factors=None, dropout=[dropout, dropout], norm=False)
       self.linear = nn.Sequential(
           nn.Linear(dim, dim_linear_block), nn.ReLU(), nn.Dropout(dropout),
           nn.Linear(dim_linear_block, dim), nn.Dropout(dropout)
       )
   def forward(self, x, mask=None):
       y = self.norm_1(self.drop(self.mhsa(x, mask)) + x)
       return self.norm_2(self.linear(y) + y)

class Transformer(nn.Module):
    def __init__(self, blocks, dim, heads=8, dim_head=None, dim_linear_block=1024, dropout=0.1):
       super().__init__()
       self.block_list = [TransformerBlock(dim, heads, dim_head) for _ in range(blocks)]
       self.layers = nn.ModuleList(self.block_list)

    def forward(self, x, mask=None):
       for layer in self.layers:
           x = layer(x, mask)
       return x

class TabTransformer(nn.Module):
    def __init__(self, categories, numerical_nb, blocks, dim, mlp_units, heads=8, dim_head=None, dim_linear_block=1024, dropout=0.1, mlp_dropout=0.00001):
        """
        categories: tuple containing the number of unique values within each category
        """
        super().__init__()
        self.embed = Embedder(sum(categories), dim)
        #self.cont_embed = nn.ModuleList([NNModel(1, units=[dim], factors=None, norm=False) for i in range(numerical_nb)])
        self.cont_embed = NNModel(numerical_nb, units=[dim], factors=None, norm=False, activ=False)
        self.pe = PositionalEncoder(dim)
        self.transformer = Transformer(blocks, dim, heads, dim_head, dim_linear_block, dropout)
        #self.input_size = dim * (len(categories) + numerical_nb) + numerical_nb
        self.input_size = dim * (len(categories) + 1) + dim
        self.mlp = NNModel(self.input_size, units=mlp_units, factors=None, dropout=[mlp_dropout]*len(mlp_units))
        self.norm = nn.LayerNorm(dim)
        self.output_shape = mlp_units[-1]

    def forward(self, x_cont, x_cat):
        #x1 = torch.stack([e(x_cont[:,[i]]) for i,e in enumerate(self.cont_embed)], 1)#[batch_shape, num_cont, dim]
        x1 = self.cont_embed(x_cont)#[batch_shape, dim]
        x2 = self.embed(x_cat)#[batch_shape, num_cat, dim]
        x = torch.cat((torch.unsqueeze(x1, 1), x2), 1)#[batch_shape, num_cat+num_cont, dim]
        #x = self.pe(x)
        x = self.transformer(x)
        x = x.flatten(1)
        x = torch.cat((x, self.norm(x1)), dim = -1)
        return self.mlp(x)

    '''def forward(self, x_cont, x_cat):
        x = self.embed(x_cat) #[batch_shape, num_cat, dim]
        x = self.transformer(x)
        x = x.flatten(1)
        x = torch.cat((x, self.norm(x_cont)), dim = -1)
        return self.mlp(x)'''


class MDNModel(nn.Module):
    def __init__(self, shared, clf_nn, mu_nn, std_nn, n_comp):
        super(MDNModel, self).__init__()
        self.shared = shared
        self.clf_nn = clf_nn
        self.mu_nn = mu_nn
        self.std_nn = std_nn
        self.n_comp = n_comp
        self.pi = nn.Linear(self.clf_nn.output_shape, self.n_comp) if self.n_comp > 1 else None
        self.ai = nn.Linear(self.mu_nn.output_shape, self.n_comp)
        self.bi = nn.Linear(self.std_nn.output_shape, self.n_comp)
 
    def forward(self, x_cont, x_cat):
        x = self.shared(x_cont, x_cat)
        proba = self.proba_model(x) if self.n_comp > 1 else torch.ones((len(x),1))
        mu = self.mu_model(x)
        std = self.std_model(x)
        return proba, mu, std
 
    def proba_model(self, x):
        model = self.clf_nn(x)
        model = self.pi(model)
        model = nn.Softmax(dim=-1)(model)
        return model
 
    def mu_model(self, x):
        model = self.mu_nn(x)
        model = self.ai(model)
        #model = nn.ELU()(model) + 1 + 1e-10
        return model
 
    def std_model(self, x):
        model = self.std_nn(x)
        model = self.bi(model)
        model = nn.ELU()(model) + 1 + 1e-10
        return model

class MDN:
    def __init__(self, model, numerical_col, categorical_col, lognormal=True):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = model.to(self.device)
        self.lognormal = lognormal
        self.numerical_col, self.categorical_col = numerical_col, categorical_col
        self.losses = {'Epoch': [], 'Train': [], 'Test': [], 'State': [], 'LR': []}
        self.optim = None

    def loss_function(self, X1, X2, Y):
        pi, mu, std = self.model(X1, X2)
        mix = Categorical(pi)
        comp = LogNormal(mu, std, validate_args=None) if self.lognormal else Normal(mu, std, validate_args=None)
        pdf = MixtureSameFamily(mix, comp).log_prob(Y)
        ll1 = -torch.mean(pdf)
        return ll1

    def train_model(self, optim, train_load, grad_clip, l2_reg):
          total_loss = 0
          self.model = self.model.train()
        #with autograd.detect_anomaly():
          for i, (X1, X2, Y) in enumerate(train_load):
              #self.model.get_weight()
              X1, X2, Y = X1.to(self.device), X2.to(self.device), Y.to(self.device)
              self.optim.zero_grad()
              loss = self.loss_function(X1, X2, Y) #numerical, categorical, label
              loss.backward()
              torch.nn.utils.clip_grad_norm_(self.model.parameters(), grad_clip)
              self.optim.step()
              total_loss += loss.item()
          return total_loss/(i+1)
        
 
    def eval_model(self, test_load):
        self.model.eval()
        total_loss = 0
        for i, (X1, X2, Y) in enumerate(test_load):
            X1, X2, Y = X1.to(self.device), X2.to(self.device), Y.to(self.device)
            loss = self.loss_function(X1, X2, Y)
            total_loss += loss.item()
        return total_loss/(i+1)

    def fit(self, X_train, Y_train, epoch, lr, opt_kwarg, batch_size=None,  grad_clip=100, momentum=0.9, X_test=None, Y_test=None, l2_reg=0, eval=True, verbose=True):
        
        batch_size = len(X_train) if batch_size is None else batch_size
        train_load = DataLoader(RossmannDataLoader(X_train, Y_train, self.numerical_col, self.categorical_col), batch_size=batch_size, shuffle=False)  # DATALOADER obj
        if X_test is not None:
            test_load = DataLoader(RossmannDataLoader(X_test, Y_test, self.numerical_col, self.categorical_col), batch_size=batch_size, shuffle=False)

        best_loss = 1e100
        #self.optim = opt.Adam(self.model.parameters(), lr=lr)
        self.optim = opt.SGD(self.model.parameters(), lr=lr, momentum=momentum, nesterov=True)
        scheduler = None

        scheduler = opt.lr_scheduler.CyclicLR(self.optim, **opt_kwarg)
        #scheduler = opt.lr_scheduler.ReduceLROnPlateau(self.optim, **opt_kwarg)
        #scheduler = opt.lr_scheduler.MultiStepLR(self.optim, milestones=[28, 120], gamma=0.1)
 
        eval_score = ''
        for i in range(epoch):
            if verbose:
                print('##### EPOCH ' + str(i) + ' #####')
               
            train_loss = self.train_model(self.optim, train_load, grad_clip, l2_reg)
    
            if verbose:
                print('train loss : ', train_loss)
            self.losses['Epoch'].append(i), self.losses['Train'].append(train_loss)
    
            if X_test is not None:
                valid_loss = self.eval_model(test_load)

                if verbose:
                    print('test loss : ', valid_loss)
                self.losses['Test'].append(valid_loss)
    
                if scheduler is not None:
                    '''scheduler.step(valid_loss)
                    self.losses['LR'].append(self.optim.param_groups[0]['lr'])'''
                    scheduler.step()
                    self.losses['LR'].append(scheduler.get_last_lr()[0])
    
                if valid_loss < best_loss:
                    self.losses['State'] = deepcopy(self.model.state_dict())
                    best_loss = valid_loss
                    print('===========SAVE===========')

    def predict(self, X):
        self.model.eval()
        X1 = torch.tensor(X[self.numerical_col].values.astype(np.float32)).to(self.device)
        X2 = torch.tensor(X[self.categorical_col].values).to(self.device)
        pi, mu, std = self.model(X1, X2)
        mix = Categorical(pi)
        comp = LogNormal(mu, std, validate_args=None) if self.lognormal else Normal(mu, std, validate_args=None)
        msf = MixtureSameFamily(mix, comp)
        pred = msf.mean
        return pred, pi, mu, std

    def mixture_quantile(self, pi, mu, std, q):
        n, solutions, evaluations = pi.shape[0], [], []
        for i in range(n):
            mix = Categorical(pi[i])
            comp = LogNormal(mu[i], std[i], validate_args=None) if self.lognormal else Normal(mu[i], std[i], validate_args=None)
            pdf = MixtureSameFamily(mix, comp)

            def objf(x):
                x = torch.from_numpy(x)
                return torch.abs(pdf.cdf(x) - q).data.numpy()
            def bounds():
                dists = list(zip(pdf.component_distribution.mean, pdf.component_distribution.stddev))
                if self.lognormal:
                    res = [LogNormal(d[0], d[1], validate_args=None).icdf(torch.tensor(q)).data.numpy() for d in dists]
                else:
                    res = [Normal(d[0], d[1], validate_args=None).icdf(torch.tensor(q)).data.numpy() for d in dists]
                return np.min(res), np.max(res)

            lb, ub = bounds()
            result = dual_annealing(objf, list(zip([lb], [ub])),maxiter=1000)
            solution = result['x']
            evaluation = objf(solution)
            solutions.append(solution), evaluations.append(evaluation)
        return np.asarray(solutions), np.asarray(evaluations)

In [None]:
n_comp, epoch, lr, batch_size, d = 2, 50000, 1e-4, 4096, 0.000001
cyclic_kwarg = {'base_lr': lr, 'max_lr': 5e-3, 'step_size_up':300, 'step_size_down':300}
plateau_kwarg = {'factor':0.5, 'patience':200, 'verbose':True, 'min_lr':1e-7, 'mode':'min'}

categories = list(train_x[categorical].nunique())
shared_nn = TabTransformer(categories, len(numerical), blocks=3, dim=32, mlp_units=[768, 1024, 512], heads=4, dim_head=None, dim_linear_block=1024, dropout=0, mlp_dropout=d)
clf_nn = NNModel(shared_nn.output_shape , units=[256], factors=None, dropout=[d])
mu_nn = NNModel(shared_nn.output_shape, units=[256], factors=None, dropout=[d])
std_nn = NNModel(shared_nn.output_shape, units=[256], factors=None, dropout=[d])#, dropout=[d,d,d,d]

nn_model = gradient_clipper(MDNModel(shared_nn, clf_nn, mu_nn, std_nn, n_comp), 10)
#nn_model.load_state_dict(best_state)
print(nn_model)
mdn = MDN(nn_model, numerical, categorical, lognormal=True)
print(mdn.device)
print(sum(p.numel() for p in mdn.model.parameters() if p.requires_grad))
mdn.fit(train_x, train_y, epoch, lr, cyclic_kwarg, batch_size=batch_size,  grad_clip=10, momentum=0.9, X_test=valid_x, Y_test=valid_y, l2_reg=0, eval=False, verbose=True)

[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
test loss :  -3.0440104007720947
##### EPOCH 1142 #####
train loss :  -3.301452159881592
test loss :  -3.0605509281158447
##### EPOCH 1143 #####
train loss :  -3.302513837814331
test loss :  -3.0444259643554688
##### EPOCH 1144 #####
train loss :  -3.308117628097534
test loss :  -3.059988498687744
##### EPOCH 1145 #####
train loss :  -3.306676149368286
test loss :  -3.048494577407837
##### EPOCH 1146 #####
train loss :  -3.30906343460083
test loss :  -3.0674824714660645
##### EPOCH 1147 #####
train loss :  -3.3116278648376465
test loss :  -3.0549428462982178
##### EPOCH 1148 #####
train loss :  -3.3086390495300293
test loss :  -3.0742006301879883
##### EPOCH 1149 #####
train loss :  -3.312023401260376
test loss :  -3.0643012523651123
##### EPOCH 1150 #####
train loss :  -3.3181633949279785
test loss :  -3.0752007961273193
##### EPOCH 1151 #####
train loss :  -3.3232412338256836
test loss :  -3.0

KeyboardInterrupt: ignored

In [None]:
best_state = deepcopy(mdn.losses['State'])
mdn.model.load_state_dict(best_state), np.min(mdn.losses['Test'])
print(np.min(mdn.losses['Test']))
fig = make_subplots(rows=3, cols=1)
s = 0
fig.append_trace(go.Scatter(x=mdn.losses['Epoch'][s:], y=mdn.losses['Train'][s:],mode='lines',name='Train'), row=1, col=1)
fig.append_trace(go.Scatter(x=mdn.losses['Epoch'][s:], y=mdn.losses['Test'][s:],mode='lines',name='Test'), row=2, col=1)
fig.append_trace(go.Scatter(x=mdn.losses['Epoch'][s:], y=mdn.losses['LR'][s:],mode='lines',name='LR'), row=3, col=1)
fig.update_layout(height=1000, width=1500, title_text="Stacked Subplots")
fig.show()

-3.2208664417266846


In [None]:
from scipy.optimize import dual_annealing, minimize, fmin_bfgs
mdn_mu, pi, mu, std = mdn.predict(test_x)
nn_output = pd.DataFrame(torch.cat((pi, mu, std), 1).cpu().data.numpy())
'''mdn_lb, evaluations_lb = mdn.mixture_quantile(pi, mu, std, 0.025)
mdn_ub, evaluations_ub = mdn.mixture_quantile(pi, mu, std, 0.975)'''
mdn_mu = y_scaler.inverse_transform(mdn_mu.data.cpu().numpy()[:,np.newaxis]).ravel()
'''mdn_lb = y_scaler.inverse_transform(mdn_lb).ravel()
mdn_ub = y_scaler.inverse_transform(mdn_ub).ravel()'''
pred = pd.DataFrame({"mu": mdn_mu, "lb": mdn_mu, "ub": mdn_mu})
pred = pd.concat((test.reset_index(), pred), 1)
pred


In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only



Unnamed: 0,index,Store,Date,Sales,Open,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,...,Promo2,PromoInterval,Competition,sin_dayofweek,cos_dayofweek,sin_dayofyear,cos_dayofyear,mu,lb,ub
0,41238,0,2015-06-14,0,0,0,0,0,2,0,...,0,2,1,-2.449294e-16,1.0,0.296713,-0.954967,6.681173,6.681173,6.681173
1,41239,0,2015-06-15,8196,1,1,0,0,2,0,...,0,2,1,0.000000e+00,1.0,0.280231,-0.959933,7548.723633,7548.723633,7548.723633
2,41240,0,2015-06-16,6190,1,1,0,0,2,0,...,0,2,1,8.660254e-01,0.5,0.263665,-0.964614,6441.561035,6441.561035,6441.561035
3,41241,0,2015-06-17,6129,1,1,0,0,2,0,...,0,2,1,8.660254e-01,-0.5,0.247022,-0.969010,6305.367676,6305.367676,6305.367676
4,41242,0,2015-06-18,5481,1,1,0,0,2,0,...,0,2,1,1.224647e-16,-1.0,0.230306,-0.973118,6404.746094,6404.746094,6404.746094
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,953288,4,2015-07-27,14919,1,1,0,1,0,1,...,0,2,1,0.000000e+00,1.0,-0.425000,-0.905193,14018.703125,14018.703125,14018.703125
236,953289,4,2015-07-28,10901,1,1,0,1,0,1,...,0,2,1,8.660254e-01,0.5,-0.440519,-0.897743,12143.133789,12143.133789,12143.133789
237,953290,4,2015-07-29,11185,1,1,0,1,0,1,...,0,2,1,8.660254e-01,-0.5,-0.455907,-0.890028,12148.465820,12148.465820,12148.465820
238,953291,4,2015-07-30,12042,1,1,0,1,0,1,...,0,2,1,1.224647e-16,-1.0,-0.471160,-0.882048,12941.344727,12941.344727,12941.344727


In [None]:
def mase(train_y, test_y, pred):
    n = train_y.shape[0]
    d = np.abs(np.diff(train_y)).sum()/(n-1)
    errors = np.abs(test_y - pred)
    return errors.mean()/d

def mape(test_y, pred):
    return np.round(np.mean(np.abs(100*(test_y-pred)/(test_y + 1e-9))), 0)

def rmspe(test_y, pred):
    return (np.sqrt(np.mean(np.square((test_y - pred) / (test_y + 1e-9))))) * 100

def persistence(train_y, test_y):
    predictions, history = [], list(np.copy(train_y))
    for i in test_y:
        predictions.append(history[-1])
        history.append(i)
    return np.asarray(predictions)

naive = persistence(train_y, test_y)
errors = {'MAE':[mean_absolute_error(test_y, pred['mu']), mean_absolute_error(test_y, naive)], 
        'RMSE':[mean_squared_error(test_y, pred['mu']), mean_squared_error(test_y, naive)], 
        'R2':[r2_score(test_y, pred['mu']), r2_score(test_y, naive)], 
        'RMSPE': [rmspe(test_y, pred['mu']), rmspe(test_y, naive)],
        'MAPE':[mape(test_y, pred['mu']), mape(test_y, naive)],
        'MASE':[mase(train_y, test_y, pred['mu']), mase(train_y, test_y, naive)]} 
errors = pd.DataFrame(errors, index =['THIS', 'NAIVE']) 
errors

Unnamed: 0,MAE,RMSE,R2,RMSPE,MAPE,MASE
THIS,647.256413,996726.5,0.890901,29842140.0,10020410.0,4011.631434
NAIVE,2476.110056,14306230.0,-0.565922,21700830000.0,7202089000.0,15346.686013


In [None]:
store = 4
sub_train = train.loc[train['Store'] == store] 
sub_pred = pred.loc[pred['Store'] == store]
err = mean_absolute_error(sub_pred['Sales'], sub_pred['mu'])
fig = plot_gp(sub_pred['mu'], sub_pred['lb'], sub_pred['ub'], sub_pred['Date'], sub_pred['Sales'], sub_train['Date'], sub_train['Sales'], name=err, samples=[], layout='h')
fig.show()

In [None]:
pe = np.rot90(PositionalEncoder(512, 512).pe[0].data.numpy(), 2)
fig = go.Figure(data=go.Heatmap(z=pe))
fig.show()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
nn_output.to_csv('/content/drive/My Drive/Colab Notebooks/time series/rossmann/nor_nn_output.csv', index=False)
pred.to_csv('/content/drive/My Drive/Colab Notebooks/time series/rossmann/nor_pred.csv', index=False)

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as opt
from torch.distributions.normal import Normal
from torch.distributions.log_normal import LogNormal
from torch.distributions.categorical import Categorical
from torch.distributions.mixture_same_family import MixtureSameFamily
import pandas as pd
import numpy as np
from scipy.optimize import dual_annealing, minimize, fmin_bfgs

In [None]:
'''pi = torch.from_numpy(np.asarray([[0.5, 0.2, 0.3], [0.1, 0.6, 0.3]]))
mu = torch.from_numpy(np.asarray([[1400, 966, 1005], [5900, 7500, 3800]]))
std = torch.from_numpy(np.asarray([[5900, 7500, 3800], [123, 788, 912]]))'''
q = 0.975
for i in range(pi.shape[0]):
    mix = Categorical(pi[i])
    comp = LogNormal(mu[i], std[i], validate_args=None)
    pdf = MixtureSameFamily(mix, comp)

    def objf(x):
        x = torch.from_numpy(x)
        return torch.abs(pdf.cdf(x) - q).data.numpy()
    
    def bounds():
        dists, res = list(zip(pdf.component_distribution.mean, pdf.component_distribution.stddev)), []
        res = [LogNormal(d[0], d[1], validate_args=None).icdf(torch.tensor(q)).data.numpy() for d in dists]
        print(res)
        return np.min(res), np.max(res)

    lb, ub = bounds()
    print(pi[i], mu[i], std[i])
    print(list(zip([lb], [ub])))
    result = dual_annealing(objf, list(zip([lb], [ub])),maxiter=1000)

    print('Status : %s' % result['message'])
    print('Total Evaluations: %d' % result['nit'])
    # evaluate solution
    solution = result['x']
    evaluation = objf(solution)
    print('Solution: f(%s) = %.5f' % (solution, evaluation))

[array(inf, dtype=float32), array(1.5680429, dtype=float32)]
tensor([3.9413e-04, 9.9961e-01], grad_fn=<SelectBackward0>) tensor([-1.1320, -1.0237], grad_fn=<SelectBackward0>) tensor([3.0558, 0.1233], grad_fn=<SelectBackward0>)
[(1.5680429, inf)]


ValueError: ignored

In [None]:
dists = list(zip(pdf.component_distribution.mean, pdf.component_distribution.stddev))
for d in dists:
  Normal(d[0], d[1]).icdf(torch.tensor(q))

In [None]:
lower, upper = torch.tensor(0.0002), torch.tensor(0.4802)
if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(np.isnan(lower)) or np.any(np.isnan(upper))):
  print('tttttttttttt')

TypeError: ignored