In [226]:
import torch
import torchvision
from torch import nn
from torchsummary import summary
import pandas as pd
import numpy as np

from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Setting

In [7]:
plt.style.use("dark_background")

# Model

In [356]:
class BiLSTM(nn.Module):
    def __init__(self, feature_size, hidden_size, lstm_layer, dropout = 0.2):
        super(BiLSTM, self).__init__()
        self.input_size = feature_size
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(p = dropout)
        self.lstm = nn.LSTM(input_size = self.input_size, 
                            hidden_size = hidden_size,
                            num_layers = lstm_layer,
                            dropout = dropout,
                            bidirectional = True)
        self.hidden2label = nn.Linear(hidden_size*2, 8)
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        label_space = self.hidden2label(lstm_out)
        # return label_space
        # label_scores = F.log_softmax(label_space, dim = 1)
        label_scores = nn.Softmax(dim = 1)(label_space)

        return label_scores

In [357]:
sample = torch.rand([1, 1000, 32])
net = BiLSTM(feature_size = 32, hidden_size = 128, lstm_layer = 4)

In [358]:
pred = net(sample)
pred.shape

torch.Size([1, 1000, 8])

In [360]:
# t = list(pred.flatten().detach().numpy())
# plt.hist(t)

In [361]:
# predicted label example
(pred.squeeze().detach().numpy().argmax(axis = 1) + 1)[:100]

array([1, 5, 5, 1, 5, 8, 1, 2, 2, 7, 7, 5, 5, 5, 5, 7, 2, 8, 2, 4, 1, 1,
       3, 4, 8, 3, 8, 6, 2, 3, 7, 3, 8, 7, 4, 2, 1, 6, 1, 1, 5, 4, 6, 4,
       5, 6, 6, 6, 4, 7, 4, 4, 6, 7, 4, 2, 5, 2, 2, 3, 6, 4, 7, 7, 3, 7,
       3, 1, 7, 5, 2, 4, 8, 5, 4, 5, 8, 1, 2, 2, 5, 2, 8, 8, 7, 2, 6, 7,
       1, 1, 8, 6, 1, 4, 5, 7, 1, 8, 5, 1])

# Data

In [2]:
data_df = pd.read_csv('data/mid_res/20210526_data_df.csv')
val_df = pd.read_csv('data/mid_res/20210526_val_df.csv')

In [5]:
col_drop_x = ['time', 'label']
col_label = 'label'
col_drop = ['latitude',
 'longitude',
 'altitude',
 'time_dlt',
 'valid_dlt',
 'east',
 'north',
 'east_dlt',
 'north_dlt',
 'east_speed',
 'north_speed',
 'cells_ctype_mode',
 'speed_dif',
 'speed_dlt',
 'speed'
 ]
X_train, y_train = data_df.drop(col_drop_x + col_drop, axis = 1).fillna(0), data_df[col_label]
X_val, y_val = val_df.drop(col_drop_x + col_drop, axis = 1).fillna(0), val_df[col_label]

In [19]:
X_train.shape

(980527, 32)

In [400]:
class TMR_Dataset(Dataset):
    def __init__(self, x_array, label):
        super().__init__()
        self.len, self.feature_size = x_array.shape
        self.data = x_array
        self.data = torch.tensor(self.data).float()
        self.label = label
    
    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return (self.data[idx,:], self.label[idx])

In [401]:
BATCH_SIZE = 256
EPOCHS = 1
LR = 0.001

In [402]:
data = TMR_Dataset(np.array(X_train), y_train)
train_loader = DataLoader(data, batch_size = BATCH_SIZE, shuffle = False)

In [405]:
X, y = next(iter(train_loader))

In [412]:
net(X.unsqueeze(0))

tensor([[[0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
         [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
         [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
         ...,
         [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
         [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
         [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039]]],
       grad_fn=<SoftmaxBackward>)

In [420]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optimizer = torch.optim.Adam(net.parameters(), lr = LR)
criterion = nn.CrossEntropyLoss()

In [433]:
for epoch in range(EPOCHS):
    for i, (X, y) in enumerate(train_loader):
        output = net(X.unsqueeze(0))
        output = output.squeeze().argmax(dim = 1) + 1
        print(output)
        print(y)
        loss = criterion(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        if i > 10:
            break

tensor([2, 5, 4, 8, 4, 2, 4, 3, 2, 8, 4, 2, 5, 5, 7, 2, 2, 7, 3, 3, 6, 6, 8, 8,
        6, 5, 6, 4, 7, 4, 7, 6, 6, 8, 5, 8, 4, 4, 5, 6, 2, 7, 7, 7, 4, 4, 3, 3,
        8, 6, 6, 4, 4, 2, 1, 1, 3, 8, 3, 2, 2, 5, 7, 5, 4, 1, 7, 3, 4, 6, 3, 5,
        7, 2, 5, 3, 7, 3, 6, 5, 7, 1, 7, 2, 2, 7, 7, 2, 8, 3, 7, 1, 3, 6, 4, 3,
        3, 8, 6, 2, 8, 1, 8, 7, 8, 5, 6, 6, 4, 1, 8, 5, 3, 5, 7, 6, 4, 3, 6, 4,
        1, 5, 8, 2, 6, 2, 3, 7, 4, 4, 4, 2, 7, 3, 3, 6, 8, 1, 7, 4, 2, 5, 1, 8,
        6, 7, 1, 6, 5, 4, 2, 7, 7, 7, 1, 3, 4, 1, 1, 1, 1, 4, 2, 7, 3, 8, 1, 4,
        5, 5, 6, 8, 7, 5, 7, 1, 1, 7, 3, 2, 8, 5, 7, 5, 7, 4, 1, 7, 3, 4, 1, 8,
        4, 5, 2, 3, 3, 7, 3, 1, 4, 2, 6, 8, 5, 6, 1, 8, 1, 1, 5, 4, 2, 7, 8, 5,
        1, 2, 7, 2, 8, 7, 7, 7, 7, 4, 8, 4, 4, 8, 8, 6, 4, 5, 2, 3, 5, 5, 3, 3,
        5, 3, 6, 2, 5, 1, 7, 4, 2, 6, 8, 4, 2, 4, 8, 1])
tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)