In [122]:
import torch
import torchvision
from torch import nn
from torchsummary import summary
import pandas as pd
import numpy as np
import datetime

from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import precision_score, confusion_matrix, f1_score, accuracy_score, recall_score, classification_report
from torch.utils.tensorboard import SummaryWriter

import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Setting

In [2]:
plt.style.use("dark_background")

# Model

In [13]:
class BiLSTM(nn.Module):
    def __init__(self, feature_size, hidden_size, lstm_layer, dropout = 0.2):
        super(BiLSTM, self).__init__()
        self.input_size = feature_size
        self.hidden_dim = hidden_size
        self.dropout = nn.Dropout(p = dropout)
        self.lstm = nn.LSTM(input_size = self.input_size, 
                            hidden_size = hidden_size,
                            num_layers = lstm_layer,
                            dropout = dropout,
                            bidirectional = True)
        self.hidden2label = nn.Linear(hidden_size*2, 8)
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        label_space = self.hidden2label(lstm_out)
        label_scores = nn.Softmax(dim = 1)(label_space)

        return label_scores

In [357]:
sample = torch.rand([1, 1000, 32])
net = BiLSTM(feature_size = 32, hidden_size = 128, lstm_layer = 4)

In [358]:
pred = net(sample)
pred.shape

torch.Size([1, 1000, 8])

In [360]:
# t = list(pred.flatten().detach().numpy())
# plt.hist(t)

In [361]:
# predicted label example
(pred.squeeze().detach().numpy().argmax(axis = 1) + 1)[:100]

array([1, 5, 5, 1, 5, 8, 1, 2, 2, 7, 7, 5, 5, 5, 5, 7, 2, 8, 2, 4, 1, 1,
       3, 4, 8, 3, 8, 6, 2, 3, 7, 3, 8, 7, 4, 2, 1, 6, 1, 1, 5, 4, 6, 4,
       5, 6, 6, 6, 4, 7, 4, 4, 6, 7, 4, 2, 5, 2, 2, 3, 6, 4, 7, 7, 3, 7,
       3, 1, 7, 5, 2, 4, 8, 5, 4, 5, 8, 1, 2, 2, 5, 2, 8, 8, 7, 2, 6, 7,
       1, 1, 8, 6, 1, 4, 5, 7, 1, 8, 5, 1])

# Data

In [4]:
data_df = pd.read_csv('data/mid_res/20210526_data_df.csv')
val_df = pd.read_csv('data/mid_res/20210526_val_df.csv')

In [5]:
col_drop_x = ['time', 'label']
col_label = 'label'
col_drop = ['latitude',
 'longitude',
 'altitude',
 'time_dlt',
 'valid_dlt',
 'east',
 'north',
 'east_dlt',
 'north_dlt',
 'east_speed',
 'north_speed',
 'cells_ctype_mode',
 'speed_dif',
 'speed_dlt',
 'speed'
 ]
X_train, y_train = data_df.drop(col_drop_x + col_drop, axis = 1).fillna(0), data_df[col_label]
X_val, y_val = val_df.drop(col_drop_x + col_drop, axis = 1).fillna(0), val_df[col_label]

In [6]:
X_train.shape

(980527, 32)

In [7]:
class TMR_Dataset(Dataset):
    def __init__(self, x_array, label):
        super().__init__()
        self.len, self.feature_size = x_array.shape
        self.data = x_array
        self.data = torch.tensor(self.data).float()
        self.label = label
    
    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return (self.data[idx,:], self.label[idx])

In [131]:
BATCH_SIZE = 256
EPOCHS = 10
LR = 0.01

In [132]:
data = TMR_Dataset(np.array(X_train), y_train - 1)
train_loader = DataLoader(data, batch_size = BATCH_SIZE, shuffle = False)
X_val = torch.tensor(np.array(X_val), dtype = torch.float32)
y_val = y_val - 1

In [133]:
# net = BiLSTM(feature_size = 32, hidden_size = 4, lstm_layer = 4)
# X, y = next(iter(train_loader))
# net(X.unsqueeze(0)).squeeze().argmax(dim = 1)

In [134]:
writer = SummaryWriter(log_dir = 'log', comment = "")

In [135]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = BiLSTM(feature_size = 32, hidden_size = 64, lstm_layer = 4).to(device)
optimizer = torch.optim.Adam(net.parameters(), lr = LR)
criterion = nn.CrossEntropyLoss()

In [138]:
global_step = 0
for epoch in range(EPOCHS):
    for i, (X, y) in enumerate(train_loader):
        output = net(X.unsqueeze(0)).squeeze()
        loss = criterion(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        global_step += 1

        if i % 100 == 0:
            net.eval()
            test_pred = net(X_val.unsqueeze(0)).squeeze().argmax(dim = 1)
            accuracy = accuracy_score(y_val, test_pred)
            print('epoch:{:<2d} | iteration:{:<4d} | loss:{:<6.4f} | accuracy:{:<4.2f}'.format(epoch, i, loss, accuracy))
            writer.add_scalar('loss_train', loss, global_step)
            writer.add_scalar('accuracy_val', accuracy, global_step)
            writer.add_scalar('lr', optimizer.state_dict()['param_groups'][0]['lr'], global_step)
            net.train()

cy:0.05
epoch:0  | iteration:3300 | loss:2.0794 | accuracy:0.05
epoch:0  | iteration:3400 | loss:2.0794 | accuracy:0.05
epoch:0  | iteration:3500 | loss:2.0794 | accuracy:0.09
epoch:0  | iteration:3600 | loss:2.0794 | accuracy:0.16
epoch:0  | iteration:3700 | loss:2.0794 | accuracy:0.05
epoch:0  | iteration:3800 | loss:2.0794 | accuracy:0.06
epoch:1  | iteration:0    | loss:2.0794 | accuracy:0.06
epoch:1  | iteration:100  | loss:2.0794 | accuracy:0.00
epoch:1  | iteration:200  | loss:2.0794 | accuracy:0.01
epoch:1  | iteration:300  | loss:2.0794 | accuracy:0.00
epoch:1  | iteration:400  | loss:2.0794 | accuracy:0.07
epoch:1  | iteration:500  | loss:2.0794 | accuracy:0.04
epoch:1  | iteration:600  | loss:2.0794 | accuracy:0.04
epoch:1  | iteration:700  | loss:2.0794 | accuracy:0.04
epoch:1  | iteration:800  | loss:2.0794 | accuracy:0.04
epoch:1  | iteration:900  | loss:2.0794 | accuracy:0.07
epoch:1  | iteration:1000 | loss:2.0794 | accuracy:0.13
epoch:1  | iteration:1100 | loss:2.0794 