# Modeling

In [1]:
import math

import torch
from torch import nn
import numpy as np
import pandas as pd


In [2]:
data = pd.read_csv('DSL-StrongPasswordData.csv')
time_stamp = data.iloc[:, 3:]
typist = np.zeros(data.shape[0])
typist_list = list(set(data.subject))
for i, s in data.subject.iteritems():
    typist[i] = typist_list.index(s)

np_time_stamp = np.array(time_stamp)

In [3]:

num_classes = 10
# For the weights, we set requires_grad after the initialization,
# since we don’t want that step included in the gradient. 
# Note that a trailling _ in PyTorch signifies that the operation is performed in-place.
weights = torch.randn(784, 10)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)

In [4]:
def softmax(x):
    return x.exp() / x.exp().sum(-1).unsqueeze(-1)

def model(xb):
    # @ means matrix multiplication
    return softmax(xb @ weights + bias)

In [5]:
fake = np.random.random(784) * 10
fake_tensor = torch.tensor(fake, dtype=torch.int8).float()
with torch.no_grad():
    # output = model(fake)
    # TypeError: unsupported operand type(s) for @: 'numpy.ndarray' and 'Tensor'
    output = model(fake_tensor)
output

tensor([nan, 0., 0., nan, 0., 0., 0., 0., nan, nan])

In [6]:
fake_tensor @ weights + bias, weights

(tensor([ 91.3079, -87.6783,  -9.5313, 145.9026, -39.3080,  69.0123, -13.5331,
          87.0673, 145.2812, 135.5267], grad_fn=<AddBackward0>),
 tensor([[ 0.7174,  0.9600,  0.8354,  ...,  0.6415,  0.1312,  0.2727],
         [ 1.5002, -0.7876, -0.8425,  ...,  0.8735,  0.5796, -0.3114],
         [-1.2711, -1.5224,  0.9312,  ...,  1.1718,  0.7821, -0.0384],
         ...,
         [ 0.9191,  0.3357,  0.3700,  ...,  1.7623,  0.8601,  0.2099],
         [ 0.0635, -0.3399,  0.8631,  ..., -1.5442, -0.2272, -0.8889],
         [-0.7783,  0.4008, -1.1185,  ...,  1.0344, -0.1838,  1.0786]],
        requires_grad=True))

## nn.Sequential

In [7]:
model = nn.Sequential(
            nn.Conv2d(1,20,5),
            nn.ReLU(),
            nn.Conv2d(20,64,5),
            nn.ReLU()
        )

In [8]:
model = nn.Sequential(
            nn.LSTMCell(1, 5),
            nn.LSTMCell(1, 5)
        )

## nn.Module

In [9]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
        self.bias = nn.Parameter(torch.zeros(10))

    def forward(self, xb):
        return xb @ self.weights + self.bias

In [10]:
class Model(nn.Module):
    def __init__(self, num_in, num_out):
        super().__init__()
        hidden_layer = nn.Linear(num_in, num_out, bias=True)
    
    def forward(self, input_vector):
        return hidden_layer(input_vector)

In [11]:
class LSTM(nn.Module):
    def __init__(self, num_channels, hidden_dim, num_layers=5, target_size=512):
        super(LSTM, self).__init__()
        self.num_channels = num_channels
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(num_channels, hidden_dim, num_layers=num_layers, batch_first=False)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2target = nn.Linear(hidden_dim, target_size)

    def forward(self, time_stamp):
        """
        Args:
            time_stamp: (num_feature, batch_size, num_channels)
        Output:
            metric: (target_size, batch_size)
        """
        # h.shape = c.shape = (num_layers, num_feature, hidden_dim)
        #                                     num_feature, batch_size, dim_vector
        _, (h, c) = self.lstm(time_stamp.view(-1, len(time_stamp), self.num_channels).double())

        # input the last h and c, whose shape = (num_feature, hidden_dim)
        # shape = (2 * num_feature, hidden_dim) after concatenation
        state_vector = torch.cat((h[-1, :, :], c[-1, :, :]), 0)
        state_metric = self.hidden2target(state_vector)
        metric = torch.sum(state_metric, dim=0)
        return metric

In [12]:
lstm = LSTM(1, 10)

In [13]:
for mod in lstm.modules():
    print(mod)

LSTM(
  (lstm): LSTM(1, 10, num_layers=5)
  (hidden2target): Linear(in_features=10, out_features=512, bias=True)
)
LSTM(1, 10, num_layers=5)
Linear(in_features=10, out_features=512, bias=True)


In [14]:
for chd in lstm.children():
    print(chd)

LSTM(1, 10, num_layers=5)
Linear(in_features=10, out_features=512, bias=True)


In [15]:
m = nn.Sequential(nn.Linear(2,2), 
                  nn.ReLU(),
                  nn.Sequential(nn.Sigmoid(), nn.ReLU()))

In [16]:
for chd in m.children():
    print(chd)

Linear(in_features=2, out_features=2, bias=True)
ReLU()
Sequential(
  (0): Sigmoid()
  (1): ReLU()
)


In [17]:
for mod in m.modules():
    print(mod)

Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): ReLU()
  (2): Sequential(
    (0): Sigmoid()
    (1): ReLU()
  )
)
Linear(in_features=2, out_features=2, bias=True)
ReLU()
Sequential(
  (0): Sigmoid()
  (1): ReLU()
)
Sigmoid()
ReLU()


In [18]:
lstm

LSTM(
  (lstm): LSTM(1, 10, num_layers=5)
  (hidden2target): Linear(in_features=10, out_features=512, bias=True)
)

In [19]:
lstm.lstm, lstm.hidden2target

(LSTM(1, 10, num_layers=5),
 Linear(in_features=10, out_features=512, bias=True))

In [20]:
lstm.state_dict().keys()

odict_keys(['lstm.weight_ih_l0', 'lstm.weight_hh_l0', 'lstm.bias_ih_l0', 'lstm.bias_hh_l0', 'lstm.weight_ih_l1', 'lstm.weight_hh_l1', 'lstm.bias_ih_l1', 'lstm.bias_hh_l1', 'lstm.weight_ih_l2', 'lstm.weight_hh_l2', 'lstm.bias_ih_l2', 'lstm.bias_hh_l2', 'lstm.weight_ih_l3', 'lstm.weight_hh_l3', 'lstm.bias_ih_l3', 'lstm.bias_hh_l3', 'lstm.weight_ih_l4', 'lstm.weight_hh_l4', 'lstm.bias_ih_l4', 'lstm.bias_hh_l4', 'hidden2target.weight', 'hidden2target.bias'])

In [21]:
lstm.lstm.weight_ih_l0.data

tensor([[ 0.0990],
        [-0.2571],
        [-0.0347],
        [-0.0636],
        [ 0.2415],
        [-0.0642],
        [ 0.2453],
        [-0.0210],
        [ 0.2082],
        [-0.2716],
        [ 0.1293],
        [-0.2409],
        [-0.2310],
        [ 0.0069],
        [ 0.1756],
        [ 0.0419],
        [ 0.2629],
        [-0.2821],
        [-0.0298],
        [ 0.2047],
        [-0.1811],
        [ 0.2435],
        [-0.0887],
        [ 0.0451],
        [ 0.0273],
        [-0.2344],
        [ 0.2546],
        [-0.1128],
        [ 0.0912],
        [-0.2527],
        [-0.0728],
        [-0.3118],
        [ 0.2609],
        [-0.0022],
        [-0.2772],
        [ 0.2821],
        [ 0.2772],
        [-0.2560],
        [-0.1240],
        [-0.2942]])

In [22]:
pytorch_total_params = 0
for p in lstm.parameters():
    print(p.numel())
    pytorch_total_params += p.numel() if p.requires_grad else 0
pytorch_total_params

40
400
40
40
400
400
40
40
400
400
40
40
400
400
40
40
400
400
40
40
5120
512


9672

In [23]:
_lstm = nn.LSTM(input_size=1, hidden_size=5, num_layers=3).double()
t = torch.tensor(np_time_stamp[0, :]).view(-1, 1, 1)
pred = _lstm(t)

In [24]:
pytorch_total_params = 0
for p in _lstm.parameters():
    print(p.numel())

20
100
20
20
100
100
20
20
100
100
20
20


In [25]:
class Cell2Lstm(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Cell2Lstm, self).__init__()
        self.seq_layer = torch.nn.Sequential()
        self.seq_layer.add_module("cell1", nn.LSTMCell(input_size=input_size, hidden_size=hidden_size))
        self.seq_layer.add_module("cell2", nn.LSTMCell(input_size=input_size * 5, hidden_size=hidden_size))
        self.seq_layer.add_module("cell3", nn.LSTMCell(input_size=input_size * 5, hidden_size=hidden_size))

In [26]:
cell2lstm = Cell2Lstm(1, 5)
for p in cell2lstm.parameters():
    print(p.numel())

20
100
20
20
100
100
20
20
100
100
20
20


In [27]:
class Cell2Lstm(nn.Module):
    def __init__(self, input_size, hidden_size, num_layer):
        super(Cell2Lstm, self).__init__()
        self.seq_layer = torch.nn.Sequential()
        for i in range(num_layer):
            i_s = input_size * hidden_size if i > 0 else input_size
            self.seq_layer.add_module("cell{}".format(i),
                                      nn.LSTMCell(input_size=i_s, hidden_size=hidden_size, bias=False))

In [28]:
cell2lstm = Cell2Lstm(1, 5, 1)
for p in cell2lstm.parameters():
    print(p, p.numel())

Parameter containing:
tensor([[-0.2071],
        [ 0.2067],
        [ 0.3792],
        [-0.2371],
        [ 0.4100],
        [ 0.2919],
        [-0.2507],
        [-0.3056],
        [ 0.1491],
        [ 0.4270],
        [ 0.2330],
        [-0.0349],
        [-0.3124],
        [ 0.0942],
        [ 0.0467],
        [-0.4152],
        [-0.2260],
        [ 0.1430],
        [ 0.3304],
        [ 0.0527]], requires_grad=True) 20
Parameter containing:
tensor([[-0.1925,  0.1680,  0.3851,  0.1182, -0.1571],
        [-0.2092, -0.2862,  0.0501, -0.0069, -0.1397],
        [-0.3955,  0.3161,  0.2232, -0.2489,  0.0062],
        [-0.4464,  0.0494,  0.1768,  0.4391,  0.2524],
        [-0.1627,  0.2647, -0.1970, -0.3628,  0.1867],
        [-0.3138, -0.4116, -0.0631, -0.1843,  0.3224],
        [ 0.2693, -0.0153, -0.0316,  0.3335, -0.2768],
        [ 0.2459,  0.0139,  0.2980, -0.2892, -0.1297],
        [ 0.1608, -0.0476, -0.4238,  0.3639, -0.0797],
        [ 0.3929,  0.2959,  0.2273,  0.1504,  0.2143],
  

In [29]:
class NaiveCustomLSTM(nn.Module):
    def __init__(self, input_sz: int, hidden_sz: int):
        super().__init__()
        self.input_size = input_sz
        self.hidden_size = hidden_sz
        
        #i_t
        self.U_i = nn.Parameter(torch.Tensor(input_sz, hidden_sz))
        self.V_i = nn.Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_i = nn.Parameter(torch.Tensor(hidden_sz))
        
        #f_t
        self.U_f = nn.Parameter(torch.Tensor(input_sz, hidden_sz))
        self.V_f = nn.Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_f = nn.Parameter(torch.Tensor(hidden_sz))
        
        #c_t
        self.U_c = nn.Parameter(torch.Tensor(input_sz, hidden_sz))
        self.V_c = nn.Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_c = nn.Parameter(torch.Tensor(hidden_sz))
        
        #o_t
        self.U_o = nn.Parameter(torch.Tensor(input_sz, hidden_sz))
        self.V_o = nn.Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_o = nn.Parameter(torch.Tensor(hidden_sz))
        
    def forward(self, x, init_states=None):
            bs, seq_sz, _ = x.size()
            hidden_seq = []

            if init_states is None:
                h_t, c_t = (
                    torch.zeros(bs, self.hidden_size).to(x.device),
                    torch.zeros(bs, self.hidden_size).to(x.device),
                )
            else:
                h_t, c_t = init_states

            for t in range(seq_sz):
                x_t = x[:, t, :]

                i_t = torch.sigmoid(x_t @ self.U_i + h_t @ self.V_i + self.b_i)
                f_t = torch.sigmoid(x_t @ self.U_f + h_t @ self.V_f + self.b_f)
                g_t = torch.tanh(x_t @ self.U_c + h_t @ self.V_c + self.b_c)
                o_t = torch.sigmoid(x_t @ self.U_o + h_t @ self.V_o + self.b_o)
                c_t = f_t * c_t + i_t * g_t
                h_t = o_t * torch.tanh(c_t)

                hidden_seq.append(h_t.unsqueeze(0))

            #reshape hidden_seq p/ retornar
            hidden_seq = torch.cat(hidden_seq, dim=0)
            hidden_seq = hidden_seq.transpose(0, 1).contiguous()
            return hidden_seq, (h_t, c_t)

In [30]:
nc_lstm = NaiveCustomLSTM(1, 5)
for p in nc_lstm.parameters():
    print(p.numel())

5
25
5
5
25
5
5
25
5
5
25
5


### Modulelist and ModuleDict

In [31]:
class LinearNet(nn.Module):
    def __init__(self, input_size, num_layers, layers_size, output_size):
        super(LinearNet, self).__init__()

        self.linears = nn.ModuleList([nn.Linear(input_size, layers_size)])
        self.linears.extend([nn.Linear(layers_size, layers_size) for i in range(1, self.num_layers-1)])
        # self.linears.append(nn.Linear(layers_size, output_size)

In [32]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features =  nn.ModuleDict({
            "Conv2d_1":nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            "BN_1":nn.BatchNorm2d(64),
            "Act_1":nn.ReLU(inplace=True),
            "Max_1":nn.MaxPool2d(kernel_size=3, stride=2),

            "Conv2d_2":nn.Conv2d(64, 192, kernel_size=5, padding=2),
            "BN_2":nn.BatchNorm2d(192),
            "Act_2":nn.ReLU(inplace=True),
            "Max_2":nn.MaxPool2d(kernel_size=3, stride=2),

            "Conv2d_3":nn.Conv2d(192, 384, kernel_size=3, padding=1),
            "BN_3":nn.BatchNorm2d(384),
            "Act_3":nn.ReLU(inplace=True),
            
            "Conv2d_4":nn.Conv2d(384, 256, kernel_size=3, padding=1),
            "BN_4":nn.BatchNorm2d(64),
            "Act_4":nn.ReLU(inplace=True),
            
            "Conv2d_5":nn.Conv2d(256, 256, kernel_size=3, padding=2),
            "BN_5":nn.BatchNorm2d(64),
            "Act_5":nn.ReLU(inplace=True),
            "Max_5":nn.MaxPool2d(kernel_size=3, stride=2)
        })

        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))

        self.fullyconnected = nn.ModuleDict({
            "Pool":nn.AdaptiveAvgPool2d((6, 6)),
            "drop_6":nn.Dropout(),
            "Linear_6":nn.Linear(256 * 6 * 6, 4096),
            #"BN_6":nn.BatchNorm1d(4096),
            "Act_6":nn.ReLU(inplace=True),
            "drop_7":nn.Dropout(),
            "Linear_7":nn.Linear(4096, 4096),
            #"BN_7":nn.BatchNorm1d(4096),
            "Act_7":nn.ReLU(inplace=True),
            "Linear_8":nn.Linear(4096, num_classes),
            #"BN_8":nn.BatchNorm1d(num_classes),
            #"Softmax":nn.LogSoftmax()
        })

    def forward(self, x):
        x = self.features['Conv2d_1'](x)
        x = self.features['Act_1'](x)
        x = self.features['Max_1'](x)
        x = self.features['Conv2d_2'](x)
        x = self.features['Act_2'](x)
        x = self.features['Max_2'](x)
        x = self.features['Conv2d_3'](x)
        x = self.features['Act_3'](x)
        x = self.features['Conv2d_4'](x)
        x = self.features['Act_4'](x)
        x = self.features['Conv2d_5'](x)
        x = self.features['Act_5'](x)
        x = self.features['Max_5'](x)
        x = self.avgpool(x)
        x = x.view(-1, 256 * 6 * 6)
        x = self.fullyconnected['Linear_6'](x)
        x = self.fullyconnected['Act_6'](x)
        x = self.fullyconnected['Linear_7'](x)
        x = self.fullyconnected['Act_7'](x)
        x = self.fullyconnected['Linear_8'](x)
        return x