In [None]:
# default_exp models

## models

In [None]:
#hide
from torchtools.core import *

In [None]:
#export
import torch.nn as nn
import torch as torch

In [None]:
torch.__version__

'1.3.1'

In [None]:
torch.sigmoid(torch.tensor([-2.])) * (1 - -1) + -1

tensor([-0.7616])

In [None]:
#export
# This is an unofficial PyTorch implementation by Ignacio Oguiza - oguiza@gmail.com based on:

# Fawaz, H. I., Lucas, B., Forestier, G., Pelletier, C., Schmidt, D. F., Weber, J., ... & Petitjean, F. (2019). InceptionTime: Finding AlexNet for Time Series Classification. arXiv preprint arXiv:1909.04939.
# Official InceptionTime tensorflow implementation: https://github.com/hfawaz/InceptionTime


def noop(x):
    return x

def shortcut(c_in, c_out):
    return nn.Sequential(*[nn.Conv1d(c_in, c_out, kernel_size=1), 
                           nn.BatchNorm1d(c_out)])
    
class Inception(nn.Module):
    def __init__(self, c_in, bottleneck=32, ks=40, nb_filters=32):

        super().__init__()
        self.bottleneck = nn.Conv1d(c_in, bottleneck, 1) if bottleneck and c_in > 1 else noop
        mts_feat = bottleneck or c_in
        conv_layers = []
        kss = [ks // (2**i) for i in range(3)]
        # ensure odd kss until nn.Conv1d with padding='same' is available in pytorch 1.3
        kss = [ksi if ksi % 2 != 0 else ksi - 1 for ksi in kss]  
        for i in range(len(kss)):
            conv_layers.append(
                nn.Conv1d(mts_feat, nb_filters, kernel_size=kss[i], padding=kss[i] // 2))
        self.conv_layers = nn.ModuleList(conv_layers)
        self.maxpool = nn.MaxPool1d(3, stride=1, padding=1)
        self.conv = nn.Conv1d(c_in, nb_filters, kernel_size=1)
        self.bn = nn.BatchNorm1d(nb_filters * 4)
        self.act = nn.ReLU()

    def forward(self, x):
        input_tensor = x.to(torch.float)
        x = self.bottleneck(input_tensor)
        for i in range(3):
            out_ = self.conv_layers[i](x)
            if i == 0: out = out_
            else: out = torch.cat((out, out_), 1)
        mp = self.conv(self.maxpool(input_tensor))
        inc_out = torch.cat((out, mp), 1)
        return self.act(self.bn(inc_out))


class InceptionBlock(nn.Module):
    def __init__(self,c_in,bottleneck=32,ks=40,nb_filters=32,residual=True,depth=6):

        super().__init__()

        self.residual = residual
        self.depth = depth

        #inception & residual layers
        inc_mods = []
        res_layers = []
        res = 0
        for d in range(depth):
            inc_mods.append(
                Inception(c_in if d == 0 else nb_filters * 4, bottleneck=bottleneck if d > 0 else 0,ks=ks,
                          nb_filters=nb_filters))
            if self.residual and d % 3 == 2:
                res_layers.append(shortcut(c_in if res == 0 else nb_filters * 4, nb_filters * 4))
                res += 1
            else: res_layer = res_layers.append(None)
        self.inc_mods = nn.ModuleList(inc_mods)
        self.res_layers = nn.ModuleList(res_layers)
        self.act = nn.ReLU()
        
    def forward(self, x):
        res = x
        for d, l in enumerate(range(self.depth)):
            x = self.inc_mods[d](x)
            if self.residual and d % 3 == 2:
                res = self.res_layers[d](res)
                x += res
                res = x
                x = self.act(x)
        return x

In [None]:
#export    
class InceptionTime(nn.Module):
    def __init__(self,c_in,c_out,bottleneck=32,ks=40,nb_filters=32,residual=True,depth=6):
        super().__init__()
        self.block = InceptionBlock(c_in,bottleneck=bottleneck,ks=ks,nb_filters=nb_filters,
                                    residual=residual,depth=depth)
        self.gap = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(nb_filters * 4, c_out)

    def forward(self, x):
        x = self.block(x)
        x = self.gap(x).squeeze(-1)
        x = self.fc(x)
        return x

In [None]:
#export
class Sigmoid(nn.Module):
    '''
    sigmoid layer
    '''
    def __init__(self, low, high):
        super().__init__()
        self.high, self.low = high, low
        
    def forward(self, x):
        return torch.sigmoid(x)*(self.high-self.low)+self.low

In [None]:
#export
class InceptionTimeSgmOld(nn.Module):
    '''
    add a sigmoid layer to InceptionTime to get the ouput in a certain range
    '''
    
    def __init__(self, n_in, n_out):
        super().__init__()
        nn.Sequential()
        self.inc = InceptionTime(n_in, n_out)
        self.low, self.high = -1., 1.
        
    def forward(self, x):
        return torch.sigmoid(self.inc(x)) * (self.high - self.low) + self.low
        

In [None]:
#export
class InceptionTimeSgm(nn.Module):
    '''
    add a sigmoid layer to InceptionTime to get the ouput in a certain range
    '''
    
    def __init__(self, n_in, n_out, range=(-1,1)):
        super().__init__()
        self.mod = nn.Sequential(InceptionTime(n_in, n_out), Sigmoid(*range))
        
    def forward(self, x):
        x = x.float()
        return self.mod(x)
        

In [None]:
#export
class InceptionTimeD(nn.Module):
    '''
    add a sigmoid layer to InceptionTime to get the ouput in a certain range
    '''
    
    def __init__(self, n_in, n_out):
        super().__init__()
        self.mod = nn.Sequential(InceptionTime(n_in, n_out), Sigmoid(-1., 1.))
        
    def forward(self, xc, xd):
        x = torch.cat([xc.float(), xd.float()], dim=-2)
        x = x.float()
#         print(f'InceptionTimeSgm dtype {x.dtype}')
        return self.mod(x)

### Development

In [None]:
#export
class InceptionTimeVar(nn.Module):
    '''
    output mean and variance
    regression model, sigmoid for the mean output optional
    '''
    
    def __init__(self, n_in, n_out, meanrange=None):
        super().__init__()
        models  = [InceptionTime(n_in, n_out+1)]
        if meanrange:
            self.sigmoid = Sigmoid(*meanrange)
        self.mod = nn.Sequential(*models)
        
    def forward(self, x):
        x = x.float()
        output = self.mod(x)
        ## enforce positivity of sigma^2
        ##output_sig_pos = tf.log(1 + tf.exp(output_sig)) + 1e-06
        output[:,-1] = (output[:,-1].exp()+1).log_() + 1e-06
        if getattr(self, 'sigmoid', None): output[:,:-1] = self.sigmoid(output[:,:-1])
        return output
        

In [None]:
(torch.rand((1,10)).exp()+1).log() + 1e-6

tensor([[1.2036, 0.9338, 0.8957, 0.8432, 0.8481, 1.2246, 0.9543, 1.1465, 0.7756,
         0.9615]])

In [None]:
model_var = InceptionTimeVar(10,1, meanrange=(-1,1))

In [None]:
model_var

InceptionTimeVar(
  (sigmoid): Sigmoid()
  (mod): Sequential(
    (0): InceptionTime(
      (block): InceptionBlock(
        (inc_mods): ModuleList(
          (0): Inception(
            (conv_layers): ModuleList(
              (0): Conv1d(10, 32, kernel_size=(39,), stride=(1,), padding=(19,))
              (1): Conv1d(10, 32, kernel_size=(19,), stride=(1,), padding=(9,))
              (2): Conv1d(10, 32, kernel_size=(9,), stride=(1,), padding=(4,))
            )
            (maxpool): MaxPool1d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
            (conv): Conv1d(10, 32, kernel_size=(1,), stride=(1,))
            (bn): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act): ReLU()
          )
          (1): Inception(
            (bottleneck): Conv1d(128, 32, kernel_size=(1,), stride=(1,))
            (conv_layers): ModuleList(
              (0): Conv1d(32, 32, kernel_size=(39,), stride=(1,), padding=(19,))
            

In [None]:
model_var(xb)

tensor([[0.4105, 0.2505],
        [0.4318, 0.2657],
        [0.3777, 0.2642],
        [0.3953, 0.2852],
        [0.4394, 0.2912],
        [0.3648, 0.2685],
        [0.3878, 0.2818],
        [0.4113, 0.2373],
        [0.4401, 0.2637],
        [0.4423, 0.2611],
        [0.3725, 0.2485],
        [0.4087, 0.2721],
        [0.3843, 0.2865],
        [0.4171, 0.2702],
        [0.4280, 0.2767],
        [0.4198, 0.2661],
        [0.4392, 0.2845],
        [0.4135, 0.2801],
        [0.4134, 0.2512],
        [0.4217, 0.2726],
        [0.3907, 0.2770],
        [0.4313, 0.2884],
        [0.4159, 0.2700],
        [0.4348, 0.2645],
        [0.4055, 0.2762],
        [0.4049, 0.2636],
        [0.3728, 0.2552],
        [0.4231, 0.2745],
        [0.4095, 0.2549],
        [0.4459, 0.2475],
        [0.4107, 0.2761],
        [0.4225, 0.2802],
        [0.4316, 0.2669],
        [0.4285, 0.2749],
        [0.3991, 0.2729],
        [0.4219, 0.2815],
        [0.4021, 0.2642],
        [0.4031, 0.2788],
        [0.3

In [None]:
# model sanity checks
xb = torch.randn((128,10,100))
yb = torch.rand(128,1)
model = InceptionTimeSgm(10,1)
model = InceptionTime(10,1)

y_m = model(xb)
assert y_m.shape == (128,1)


In [None]:
def nll_regression(preds, y_true, c=5):
    '''
    negative log likelihood loss for regression, both mu and sigma are predicted
    
    Simple and Scalable Predictive UncertaintyEstimation using Deep Ensembles
    Balaji Lakshminarayanan, Alexander Pritzel, Charles Blundell, DeepMind

    '''
    
    s1 = 0.5*preds[:,1].log() 
    s2 = 0.5*(yb.squeeze()-preds[:,0]).pow(2).div(preds[:,1])
    loss = (s1+s2).mean() + c
    return loss

In [None]:
loss = nll_regression(preds, yb)

In [None]:
print(loss)

tensor(32.0136, grad_fn=<AddBackward0>)


In [None]:
model_var = InceptionTimeVar(10,1)

In [None]:
from torch.optim import Adam

In [None]:
lr = 0.01
epochs = 10
loss_fn = nll_regression
m = model_var


for i in range(epochs):
    preds = m(xb)
    loss = nll_regression(preds, yb)
    print(loss)
    loss.backward()
    with torch.no_grad():
        for p in m.parameters():
            p.sub_(lr*p.grad)
    m.zero_grad()

tensor(4.3641, grad_fn=<AddBackward0>)
tensor(4.3060, grad_fn=<AddBackward0>)
tensor(4.2522, grad_fn=<AddBackward0>)
tensor(4.2023, grad_fn=<AddBackward0>)
tensor(4.1553, grad_fn=<AddBackward0>)
tensor(4.1099, grad_fn=<AddBackward0>)
tensor(4.0648, grad_fn=<AddBackward0>)
tensor(4.0278, grad_fn=<AddBackward0>)
tensor(4.5346, grad_fn=<AddBackward0>)
tensor(33.2470, grad_fn=<AddBackward0>)


In [None]:
model_var = InceptionTimeVar(10,1)
opt = Adam(model_var.parameters(), lr=0.001)

In [None]:
m = model_var
for i in range(epochs):
    preds = m(xb)
    loss = nll_regression(preds, yb)
    print(loss)
    loss.backward()
    with torch.no_grad():
        opt.step()
        opt.zero_grad()
        

tensor(5.0208, grad_fn=<AddBackward0>)
tensor(4.8659, grad_fn=<AddBackward0>)
tensor(4.7756, grad_fn=<AddBackward0>)
tensor(4.7244, grad_fn=<AddBackward0>)
tensor(4.6740, grad_fn=<AddBackward0>)
tensor(4.6212, grad_fn=<AddBackward0>)
tensor(4.5612, grad_fn=<AddBackward0>)
tensor(4.4978, grad_fn=<AddBackward0>)
tensor(4.4351, grad_fn=<AddBackward0>)
tensor(4.3768, grad_fn=<AddBackward0>)


In [None]:
m(xb)

tensor([[ 0.6883,  0.3206],
        [ 0.1993,  0.2503],
        [ 0.7823,  0.3220],
        [ 0.8343,  0.2718],
        [ 0.7920,  0.2804],
        [ 0.4914,  0.2229],
        [ 0.4477,  0.2060],
        [ 0.7541,  0.2245],
        [ 0.2930,  0.2679],
        [ 0.3321,  0.1942],
        [ 0.7895,  0.2710],
        [ 0.7996,  0.2916],
        [ 0.6036,  0.2444],
        [ 0.2867,  0.1834],
        [ 0.8796,  0.3102],
        [ 0.0324,  0.1723],
        [ 0.0474,  0.1506],
        [ 0.3758,  0.2717],
        [ 0.0773,  0.1942],
        [ 0.2873,  0.2005],
        [ 0.8112,  0.2479],
        [ 0.3492,  0.2456],
        [ 0.1308,  0.2137],
        [-0.0081,  0.1893],
        [ 0.3030,  0.2092],
        [ 0.8836,  0.3118],
        [ 0.2829,  0.1725],
        [ 0.8589,  0.3252],
        [ 0.7722,  0.2483],
        [ 0.3383,  0.2190],
        [ 0.8427,  0.3556],
        [ 0.2270,  0.2487],
        [ 0.5968,  0.3048],
        [ 0.6996,  0.2803],
        [ 0.2520,  0.2477],
        [ 0.3296,  0

In [None]:
import numpy as np

In [None]:
np.round(m(xb).detach().numpy(), 2)

array([[-13.67,  14.95],
       [-13.4 ,  14.7 ],
       [-13.59,  14.68],
       [-13.08,  14.41],
       [-13.2 ,  14.35],
       [-12.29,  13.37],
       [-12.93,  14.05],
       [-13.16,  14.2 ],
       [-12.83,  13.93],
       [-12.51,  13.71],
       [-12.9 ,  14.04],
       [-12.89,  13.99],
       [-12.57,  13.74],
       [-13.13,  14.35],
       [-13.13,  14.29],
       [-12.74,  13.99],
       [-13.03,  14.2 ],
       [-13.1 ,  14.23],
       [-13.45,  14.53],
       [-12.36,  13.47],
       [-13.03,  14.09],
       [-13.41,  14.55],
       [-13.65,  14.73],
       [-13.02,  14.3 ],
       [-13.26,  14.28],
       [-14.06,  15.14],
       [-12.45,  13.59],
       [-13.2 ,  14.22],
       [-13.46,  14.66],
       [-12.46,  13.56],
       [-13.14,  14.35],
       [-13.24,  14.36],
       [-12.95,  14.16],
       [-12.57,  13.64],
       [-13.48,  14.6 ],
       [-12.49,  13.52],
       [-13.26,  14.29],
       [-13.59,  14.66],
       [-13.16,  14.27],
       [-12.68,  13.94],


In [None]:
yb

tensor([[0.7950],
        [0.7399],
        [0.0699],
        [0.9656],
        [0.4764],
        [0.4740],
        [0.6303],
        [0.2540],
        [0.4151],
        [0.6722],
        [0.4814],
        [0.4797],
        [0.9125],
        [0.5585],
        [0.6816],
        [0.9544],
        [0.7428],
        [0.2958],
        [0.5067],
        [0.2693],
        [0.3197],
        [0.5284],
        [0.0573],
        [0.6996],
        [0.2437],
        [0.3375],
        [0.4211],
        [0.0990],
        [0.5340],
        [0.9474],
        [0.4812],
        [0.2803],
        [0.6376],
        [0.6415],
        [0.3024],
        [0.0556],
        [0.2722],
        [0.2621],
        [0.4258],
        [0.6229],
        [0.8274],
        [0.5367],
        [0.7236],
        [0.9387],
        [0.0994],
        [0.5417],
        [0.0348],
        [0.9079],
        [0.4295],
        [0.1127],
        [0.9783],
        [0.7960],
        [0.4782],
        [0.7561],
        [0.4944],
        [0

In [None]:
x1 = torch.zeros(128,1)
x2 = torch.ones(128,1)

In [None]:
x_p = torch.cat([x1,x2], dim=1)

In [None]:
nll_regression(x_p, yb)

tensor(5.4584)

In [None]:
yb

tensor([[ 0.4870],
        [ 1.2089],
        [ 2.9409],
        [-0.8186],
        [ 0.2653],
        [-0.7720],
        [ 0.0791],
        [ 0.3814],
        [ 0.9626],
        [ 0.3247],
        [-1.1993],
        [ 0.3751],
        [-0.6192],
        [-0.3383],
        [ 0.5900],
        [ 0.0607],
        [-0.4568],
        [ 0.3758],
        [-0.7539],
        [ 0.4403],
        [ 0.6936],
        [ 1.2951],
        [-0.2941],
        [-0.7952],
        [-0.3005],
        [-1.0045],
        [-2.1125],
        [-2.4088],
        [ 0.8342],
        [-1.8054],
        [-1.3376],
        [ 0.6540],
        [-0.1123],
        [ 1.0436],
        [-0.2022],
        [-1.0255],
        [-0.1646],
        [ 0.5818],
        [-0.5151],
        [-0.1190],
        [-1.3705],
        [ 0.7422],
        [ 1.8771],
        [ 0.0707],
        [-1.5921],
        [ 0.7774],
        [-1.4089],
        [ 0.2089],
        [-0.4930],
        [ 0.4699],
        [-0.0482],
        [-0.2985],
        [ 1.

In [None]:
xb.mean()

tensor(0.0012)

In [None]:


for i in range(10):
    

In [None]:
lr = 0.01
with torch.no_grad():
    for p in model_v.parameters():
        p -= p.grad * lr

In [None]:
preds_2 = model_v(xb)

In [None]:
loss_2 = nll_regression(preds_2, yb)

In [None]:
loss_2

tensor(6.6749, grad_fn=<AddBackward0>)

In [None]:
(0.5*preds[:,1].log() + 0.5*(yb.squeeze()-preds[:,0]).pow(2).div(preds[:,1].pow(2))).mean() + 5

tensor(9.1327, grad_fn=<AddBackward0>)

In [None]:
p = list(model.parameters())[1]

In [None]:
p.grad