In [1]:
import torch
from torch import nn

In [16]:
class Res_LSTM(nn.Module):
    def __init__(self, dimention, factor_num, sequence, fully_connect_layer_neural, layer_num=2,transformer = False):
        super(Res_LSTM, self).__init__()
        self.factor_num = factor_num  # 108
        self.sequence = sequence  #
        self.dimention = dimention  #
        self.fc2_neuron = fully_connect_layer_neural  # 32
        self.transformer = transformer

        # Layer
        self.bn1 = torch.nn.BatchNorm1d(self.dimention * self.factor_num * self.sequence)
        self.bn2 = torch.nn.BatchNorm1d(self.fc2_neuron * 2 * self.sequence)
        self.bn3 = torch.nn.BatchNorm1d(self.fc2_neuron * 2)
        if self.transformer == True:
            self.q_metrix = nn.Linear(self.factor_num, self.factor_num)
            self.k_metrix = nn.Linear(self.factor_num, self.factor_num)
            self.v_metrix = nn.Linear(self.factor_num, self.factor_num)
            self.MultiheadAttention = nn.MultiheadAttention(self.factor_num, layer_num, batch_first=True)
        
        self.lstm = nn.LSTM(self.factor_num, self.fc2_neuron, layer_num, batch_first=True, bidirectional=True,
                            dropout=0.2)
        self.dropout = nn.Dropout(0.2)
        self.LeakyReLU = nn.LeakyReLU()
        self.out = nn.Linear(self.fc2_neuron * 2, 1)

    def forward(self, x):
        x = self.norm_1(x)
        x = torch.transpose(x, 0, 1)  # x.shape: torch.Size([3, 512, 20, 108])

        final, (hn, cn) = self.lstm_layer(x[0], 2)  # torch.Size([512, 20, 128])
        for i in range(1, x.shape[0]):
            add, _ = self.lstm_layer(x[i], 2)
            final = self.skip_connection(final, add)
        # start = torch.Size([512, 20, 128])

        x, _ = self.attention_net(final, hn)
        x = self.bn3(x)
        x = self.LeakyReLU(x)
        x = self.dropout(x)
        y_pred = self.out(x)
        return y_pred

    def norm_1(self, x):
        batch_num, original_shape = x.shape[0], x.shape
        x = x.reshape(batch_num, -1)
        x = self.bn1(x)
        x = x.reshape(original_shape)
        return x

    def norm_2(self, x):
        batch_num, original_shape = x.shape[0], x.shape
        x = x.reshape(batch_num, -1)
        x = self.bn2(x)
        x = x.reshape(original_shape)
        return x

    def lstm_layer(self, x, layer_num):
        if self.transformer == True:
            q = self.q_metrix(x)
            k = self.k_metrix(x)
            v = self.v_metrix(x)
            x, x_weight = self.MultiheadAttention(q, k, v)  # attn_output = torch.Size([512, 20, 128])
        else:
            pass
        # out = torch.Size([512, 20, 128])
        out, (hn, cn) = self.lstm(x)
        return out,(hn, cn)

    def skip_connection(self, origin, add):
        return self.norm_2(origin + add)

    def attention_net(self, lstm_output, final_state):
        # lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix
        # final_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]

        hidden = torch.cat((final_state[0], final_state[1]), dim=1).unsqueeze(
            2)  # hidden : [batch_size, n_hidden * num_directions(=2), n_layer(=1)]
        attn_weights = torch.bmm(lstm_output, hidden).squeeze(2)  # [batch_size,sequence]
        attn_weights = torch.nn.functional.softmax(attn_weights, 1)  # [batch_size,sequence]   # torch.Size([512, 20])
        # context: [batch_size, n_hidden * num_directions(=2)]
        output = torch.bmm(lstm_output.transpose(1, 2), attn_weights.unsqueeze(2)).squeeze(
            2)  # [batch_size, n_hidden * num_directions(=2)]
        return output, attn_weights

In [22]:
class AlphaNet_LSTM_V1(nn.Module):
    def __init__(self, factor_num, sequence, fully_connect_layer_neural, attention=False,transformer = False):
        super(AlphaNet_LSTM_V1, self).__init__()
        self.factor_num = factor_num  # 108
        self.sequence = sequence
        self.fc2_neuron = fully_connect_layer_neural  # 32
        self.attention = attention
        self.transformer = transformer
        # Layer
        self.batch = torch.nn.BatchNorm1d(self.sequence * self.factor_num)
        self.lstm = nn.LSTM(self.factor_num, self.fc2_neuron, 3, batch_first=True, bidirectional=True, dropout=0.2)
        self.lstm2 = nn.LSTM(int(self.fc2_neuron * 2), int(self.fc2_neuron / 2), 3, batch_first=True,
                             bidirectional=True, dropout=0.2)
        self.batch2 = torch.nn.BatchNorm1d(int(self.fc2_neuron * 2))
        self.batch3 = torch.nn.BatchNorm1d(self.fc2_neuron)
        self.dropout = nn.Dropout(0.2)
        self.relu = nn.ReLU()
        self.LeakyReLU = nn.LeakyReLU()
        self.out = nn.Linear(self.fc2_neuron, 1)
        if self.transformer == True:
            self.TransformerLayer = nn.TransformerEncoderLayer(d_model=self.fc2_neuron * 2, nhead=2,batch_first=True)
    def forward(self, x):
        x = x.reshape(x.shape[0], -1).float()
        x = self.batch(x)
        x = x.reshape(x.shape[0], self.sequence, self.factor_num)
        
        
        x, _ = self.lstm(x)  # x.shape: torch.Size([6182, 10, 128])
        if self.transformer == True:
            x, _ = self.TransformerLayer(x)  # attn_output = torch.Size([512, 20, 128])
        else:
            x = self.LeakyReLU(x)
        
        x = torch.transpose(x, 1, 2)  # x.shape: torch.Size([6182, 128, 10])
        x = self.batch2(x)
        x = torch.transpose(x, 1, 2)

        x, (hn, cn) = self.lstm2(x)  # torch.Size([6182, 10, 64])
        if self.attention == True:
            x, _ = self.attention_net(x, hn)
        else:
            x = x[:, -1]  # torch.Size([6182, 64])
        x = self.batch3(x)  # torch.Size([6182, 64])
        x = self.relu(x)
        x = self.dropout(x)
        y_pred = self.out(x)
        return y_pred

    def attention_net(self, lstm_output, final_state):
        # lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix
        # final_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]

        hidden = torch.cat((final_state[0], final_state[1]), dim=1).unsqueeze(
            2)  # hidden : [batch_size, n_hidden * num_directions(=2), n_layer(=1)]
        attn_weights = torch.bmm(lstm_output, hidden).squeeze(2)  # [batch_size,sequence]
        attn_weights = torch.nn.functional.softmax(attn_weights, 1)  # [batch_size,sequence]   # torch.Size([512, 20])
        # context: [batch_size, n_hidden * num_directions(=2)]
        output = torch.bmm(lstm_output.transpose(1, 2), attn_weights.unsqueeze(2)).squeeze(
            2)  # [batch_size, n_hidden * num_directions(=2)]
        return output, attn_weights

In [25]:
import os
from os import walk
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.utils.data as Data
import torch.optim as optim
from torch.autograd import Variable
from progressbar import ProgressBar
from tqdm import tqdm
import time
import multiprocessing as mp
# trainx = torch.randn(10000,3,20,108)
trainx = torch.randn(10000,20,108)
trainy = torch.randn(10000,1)
print("trainx.shape: " , trainx.shape)

trainx.shape:  torch.Size([10000, 20, 108])


In [26]:
train_dataset = Data.TensorDataset(trainx, trainy)
batch_size = 1024
train_loader = Data.DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=16,
    pin_memory=True
)

In [None]:
alphanet = AlphaNet_LSTM_V1(108,20,64,attention=True)
# alphanet = Transformer_LSTM(3,108,20,64)
alphanet = alphanet.cuda()
# alphanet = torch.nn.parallel.DataParallel(alphanet)
print(alphanet)
total_length = trainx.shape[0]
LR = 0.01
loss_function = nn.MSELoss().cuda()
optimizer = optim.Adam(alphanet.parameters(), lr=LR)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=2,gamma = 0.5)
epoch_num = 20
loss_list = []

min_loss = float("inf")
for epoch in tqdm(range(epoch_num)):
    total_loss = 0
    for _, (inputs, outputs) in enumerate(train_loader):
        inputs = Variable(inputs).float().cuda()
        outputs = Variable(outputs).float().cuda()
        optimizer.zero_grad() # noticed:  the grad return to zero before starting the loop
        
        # forward + backward +update
        pred = alphanet(inputs.cuda())
        pred = pred.cuda()
        loss = loss_function(pred, outputs)
        loss.backward()
        optimizer.step()
        
#         lr_list.append(optimizer.state_dict()['param_groups'][0]['lr'])
        total_loss += loss.item()
    total_loss = total_loss * batch_size / total_length
    print('Epoch: ', epoch + 1, ' loss: ', total_loss)
    loss_list.append(total_loss)

  0%|          | 0/20 [00:00<?, ?it/s]

AlphaNet_LSTM_V1(
  (batch): BatchNorm1d(2160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm): LSTM(108, 64, num_layers=3, batch_first=True, dropout=0.2, bidirectional=True)
  (lstm2): LSTM(128, 32, num_layers=3, batch_first=True, dropout=0.2, bidirectional=True)
  (batch2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (relu): ReLU()
  (LeakyReLU): LeakyReLU(negative_slope=0.01)
  (out): Linear(in_features=64, out_features=1, bias=True)
)


  5%|▌         | 1/20 [00:02<00:38,  2.05s/it]

Epoch:  1  loss:  1.1134240112304687


 10%|█         | 2/20 [00:04<00:37,  2.11s/it]

Epoch:  2  loss:  1.049112548828125


 15%|█▌        | 3/20 [00:06<00:35,  2.07s/it]

Epoch:  3  loss:  1.03987265625


 20%|██        | 4/20 [00:08<00:32,  2.02s/it]

Epoch:  4  loss:  1.010880987548828


 25%|██▌       | 5/20 [00:10<00:30,  2.01s/it]

Epoch:  5  loss:  0.9534799926757812


 30%|███       | 6/20 [00:12<00:28,  2.04s/it]

Epoch:  6  loss:  0.8197265625


 35%|███▌      | 7/20 [00:14<00:26,  2.03s/it]

Epoch:  7  loss:  0.6417380981445312


 40%|████      | 8/20 [00:16<00:24,  2.06s/it]

Epoch:  8  loss:  0.466131982421875


 45%|████▌     | 9/20 [00:18<00:22,  2.09s/it]

Epoch:  9  loss:  0.3212902099609375


 50%|█████     | 10/20 [00:20<00:20,  2.07s/it]

Epoch:  10  loss:  0.23362308044433594


 55%|█████▌    | 11/20 [00:22<00:19,  2.18s/it]

Epoch:  11  loss:  0.1714110870361328


 60%|██████    | 12/20 [00:25<00:17,  2.14s/it]

Epoch:  12  loss:  0.13443065032958984


 65%|██████▌   | 13/20 [00:27<00:14,  2.12s/it]

Epoch:  13  loss:  0.11365663146972656


 70%|███████   | 14/20 [00:29<00:12,  2.15s/it]

Epoch:  14  loss:  0.09025854949951172


 75%|███████▌  | 15/20 [00:31<00:10,  2.11s/it]

Epoch:  15  loss:  0.08059934539794922


In [18]:
alphanet = AlphaNet_LSTM_V1(108,20,64,attention=True,transformer = True)
# alphanet = Transformer_LSTM(3,108,20,64)
alphanet = alphanet.cuda()
# alphanet = torch.nn.parallel.DataParallel(alphanet)
print(alphanet)
total_length = trainx.shape[0]
LR = 0.01
loss_function = nn.MSELoss().cuda()
optimizer = optim.Adam(alphanet.parameters(), lr=LR)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=2,gamma = 0.5)
epoch_num = 20
loss_list = []

min_loss = float("inf")
for epoch in tqdm(range(epoch_num)):
    total_loss = 0
    for _, (inputs, outputs) in enumerate(train_loader):
        inputs = Variable(inputs).float().cuda()
        outputs = Variable(outputs).float().cuda()
        optimizer.zero_grad() # noticed:  the grad return to zero before starting the loop
        
        # forward + backward +update
        pred = alphanet(inputs.cuda())
        pred = pred.cuda()
        loss = loss_function(pred, outputs)
        loss.backward()
        optimizer.step()
        
#         lr_list.append(optimizer.state_dict()['param_groups'][0]['lr'])
        total_loss += loss.item()
    total_loss = total_loss * batch_size / total_length
    print('Epoch: ', epoch + 1, ' loss: ', total_loss)
    loss_list.append(total_loss)

  0%|          | 0/20 [00:00<?, ?it/s]

Transformer_LSTM(
  (bn1): BatchNorm1d(6480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(2560, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (q_metrix): Linear(in_features=108, out_features=108, bias=True)
  (k_metrix): Linear(in_features=108, out_features=108, bias=True)
  (v_metrix): Linear(in_features=108, out_features=108, bias=True)
  (MultiheadAttention): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=108, out_features=108, bias=True)
  )
  (lstm): LSTM(108, 64, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (LeakyReLU): LeakyReLU(negative_slope=0.01)
  (out): Linear(in_features=128, out_features=1, bias=True)
)


  5%|▌         | 1/20 [00:02<00:43,  2.29s/it]

Epoch:  1  loss:  1.1639129760742188


 10%|█         | 2/20 [00:04<00:44,  2.50s/it]

Epoch:  2  loss:  1.0448883239746094


 15%|█▌        | 3/20 [00:07<00:44,  2.64s/it]

Epoch:  3  loss:  1.035843603515625


 20%|██        | 4/20 [00:10<00:43,  2.70s/it]

Epoch:  4  loss:  1.0386446533203124


 25%|██▌       | 5/20 [00:12<00:38,  2.58s/it]

Epoch:  5  loss:  1.0318886352539063


 30%|███       | 6/20 [00:15<00:34,  2.48s/it]

Epoch:  6  loss:  1.0264286254882813


 35%|███▌      | 7/20 [00:17<00:32,  2.54s/it]

Epoch:  7  loss:  1.0366453796386719


 40%|████      | 8/20 [00:20<00:30,  2.55s/it]

Epoch:  8  loss:  1.0355045043945312


 45%|████▌     | 9/20 [00:22<00:27,  2.53s/it]

Epoch:  9  loss:  1.0371146057128906


 50%|█████     | 10/20 [00:25<00:24,  2.47s/it]

Epoch:  10  loss:  1.0378154479980468


 55%|█████▌    | 11/20 [00:27<00:21,  2.41s/it]

Epoch:  11  loss:  1.037383349609375


 60%|██████    | 12/20 [00:29<00:19,  2.38s/it]

Epoch:  12  loss:  1.0386478637695313


Exception ignored in: <function _releaseLock at 0x7f80ea88e680>
Traceback (most recent call last):
  File "/usr/local/python3/lib/python3.7/logging/__init__.py", line 221, in _releaseLock
    def _releaseLock():
KeyboardInterrupt
 60%|██████    | 12/20 [00:36<00:24,  3.05s/it]


RuntimeError: DataLoader worker (pid(s) 266612, 266613, 266614, 266615, 266616, 266617, 266618) exited unexpectedly

In [20]:
encoder_layer = nn.TransformerEncoderLayer(d_model=108, nhead=2,batch_first=True)
src = torch.rand(512, 20, 108)
out = encoder_layer(src)

In [21]:
out.shape

torch.Size([512, 20, 108])