In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
from sklearn import metrics
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline

In [2]:
class LSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden_layer_size = 30
        self.lstm = nn.LSTM(input_size=20, hidden_size=self.hidden_layer_size, num_layers=2, dropout=0.5)   
        self.linear = nn.Linear(self.hidden_layer_size, 11)  # equivalent to Dense in keras
        self.hidden_cell = (torch.zeros(2, 1, self.hidden_layer_size),
                            torch.zeros(2, 1, self.hidden_layer_size))
        self.softmax = nn.Softmax()

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        classifications = self.softmax(predictions)
        return classifications

In [3]:
model = LSTM()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

In [4]:
data = np.genfromtxt("ML_Datasets/processed_data_classification20.csv", delimiter=",")
X = data[:, :-1]
y = data[:, -1]

# normalize the data
X = MinMaxScaler().fit_transform(X)
# oversample the data to reduce imbalance
X, y = SMOTE().fit_resample(X, y)

In [5]:
# train, validation, test split
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.25)

In [6]:
X_train = torch.Tensor(X_train)
y_train = torch.Tensor(y_train).type(torch.LongTensor)
X_val = torch.Tensor(X_val)
y_val = torch.Tensor(y_val).type(torch.LongTensor)
X_test = torch.Tensor(X_test)
y_test = torch.Tensor(y_test).type(torch.LongTensor)

In [7]:
accuracy_stats = {
    "train": [],
    "val": []
}
loss_stats = {
    "train": [],
    "val": []
}

In [8]:
epochs = 100

for i in range(epochs):
    model.train()

    optimizer.zero_grad()
    model.hidden_cell = (torch.zeros(2, 1, model.hidden_layer_size),
                         torch.zeros(2, 1, model.hidden_layer_size))

    y_preds = model(X_train)
    _, y_pred_tags = torch.max(y_preds, dim=1)

    train_loss = loss_function(y_preds, y_train)
    train_loss.backward()
    optimizer.step()

    correct_pred = (y_pred_tags == y_train).float()
    train_acc = correct_pred.sum() * 100 / len(correct_pred)

    # VALIDATION
    model.eval()
    with torch.no_grad():

        y_val_preds = model(X_val)
        _, y_val_pred_tags = torch.max(y_val_preds, dim = 1)

        correct_val_pred = (y_val_pred_tags == y_val).float()

        val_loss = loss_function(y_val_preds, y_val)
        val_acc = correct_val_pred.sum() * 100 / len(correct_val_pred)
    
    loss_stats['train'].append(train_loss)
    loss_stats['val'].append(val_loss)
    accuracy_stats['train'].append(train_acc)
    accuracy_stats['val'].append(val_acc)

    print(f'Epoch {i}: | Train Loss: {train_loss:.5f} | Val Loss: {val_loss:.5f} | Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f}')

  classifications = self.softmax(predictions)
Epoch 0: | Train Loss: 2.39762 | Val Loss: 2.39787 | Train Acc: 9.122 | Val Acc: 8.954
Epoch 1: | Train Loss: 2.39781 | Val Loss: 2.39793 | Train Acc: 9.188 | Val Acc: 8.954
Epoch 2: | Train Loss: 2.39835 | Val Loss: 2.39780 | Train Acc: 9.188 | Val Acc: 8.954
Epoch 3: | Train Loss: 2.39807 | Val Loss: 2.39789 | Train Acc: 9.188 | Val Acc: 8.954
Epoch 4: | Train Loss: 2.39766 | Val Loss: 2.39787 | Train Acc: 9.187 | Val Acc: 8.954
Epoch 5: | Train Loss: 2.39792 | Val Loss: 2.39787 | Train Acc: 9.180 | Val Acc: 8.847
Epoch 6: | Train Loss: 2.39778 | Val Loss: 2.39776 | Train Acc: 9.109 | Val Acc: 8.870
Epoch 7: | Train Loss: 2.39762 | Val Loss: 2.39763 | Train Acc: 9.816 | Val Acc: 13.123
Epoch 8: | Train Loss: 2.39754 | Val Loss: 2.39759 | Train Acc: 11.034 | Val Acc: 9.648
Epoch 9: | Train Loss: 2.39728 | Val Loss: 2.39730 | Train Acc: 10.987 | Val Acc: 9.139
Epoch 10: | Train Loss: 2.39703 | Val Loss: 2.39696 | Train Acc: 10.289 | Val Acc

In [9]:
model.eval()
    
with torch.no_grad():

    y_test_preds = model(X_test)
    _, y_test_pred_tags = torch.max(y_test_preds, dim = 1)
    
    correct_test_pred = (y_test_pred_tags == y_test).float()
    test_loss = loss_function(y_test_preds, y_test)
    test_acc = correct_test_pred.sum() * 100 / len(correct_test_pred)
        
print(f'Validation Loss: {test_loss.item():10.10f}')
print(f'Accuracy: {metrics.accuracy_score(y_test_pred_tags, y_test)*100:10.10f}')
print(f'Precision: {metrics.precision_score(y_test_pred_tags, y_test, average="macro")*100:10.10f}')
print(f'Recall: {metrics.recall_score(y_test_pred_tags, y_test, average="macro")*100:10.10f}')
print(f'F1 Score: {metrics.f1_score(y_test_pred_tags, y_test, average="macro")*100:10.10f}')

Validation Loss: 2.3103382587
Accuracy: 19.9258928001
Precision: 19.9258928001
Recall: 13.9103545761
F1 Score: 14.3619214968
  classifications = self.softmax(predictions)
  _warn_prf(average, modifier, msg_start, len(result))
