In [31]:
# importing necessary dependancies
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import import_ipynb
from models import LSTMNet, CustomNet
from helpers import train_and_validate, count_parameters, plot_loss_comparison, plot_test_predictions, compare_mse_loss
import time

In [32]:
# use NVIDIA Geforce GTX 1650
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [33]:
# hyperparameters
batch_size = 100
input_size = 2
hidden_size = 100
num_classes = 1
learning_rate = 0.001
momentum = 0.8
use_nesterov = True
max_norm = 1.0
epochs = 1000

In [34]:
# function to create the adding problem dataset
def adding_problem_generator(N, seq_len=8, high=1):
    """ A data generator for adding problem.

    A single entry has a 2D vector with two rows of the same length. 
    The first row has random numbers, and the second row contains a 
    binary mask with ones at only two positions. The label for this 
    entry is the sum of the numbers from the first row where the mask 
    is one.

     input          label
     -----          -----
    1 4 5 3  ----->   9 (4 + 5)
    0 1 1 0

    N: the number of the entries.
    eq_len: the length of a single sequence.
    p: the probability of 1 in generated mask
    high: the random data is sampled from a [0, high] uniform distribution.
    return: (X, Y), X the data, Y the label.
    """
    X_num = np.random.uniform(low=0, high=high, size=(N, seq_len, 1))
    X_mask = np.zeros((N, seq_len, 1))
    Y = np.ones((N, 1))
    for i in range(N):
        # Default uniform distribution on position sampling
        positions = np.random.choice(seq_len, size=2, replace=False)
        X_mask[i, positions] = 1
        Y[i, 0] = np.sum(X_num[i, positions])
    X = np.append(X_num, X_mask, axis=2)
    return X, Y

In [35]:
# creating the train and test datasets
X_train, Y_train = adding_problem_generator(10000, 8, 1)
X_val, Y_val = adding_problem_generator(1000, 8, 1)
X_test, Y_test = adding_problem_generator(100, 8, 1)
X_train = torch.tensor(X_train).float()
Y_train = torch.tensor(Y_train).float()
X_val = torch.tensor(X_val).float()
Y_val = torch.tensor(Y_val).float()
X_test = torch.tensor(X_test).float()
Y_test = torch.tensor(Y_test).float()

In [36]:
print(X_train.shape, Y_train.shape)
print(X_val.shape, Y_val.shape)
print(X_test.shape, Y_test.shape)

torch.Size([10000, 8, 2]) torch.Size([10000, 1])
torch.Size([1000, 8, 2]) torch.Size([1000, 1])
torch.Size([100, 8, 2]) torch.Size([100, 1])


In [37]:
class AddingProblemDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        
    def __len__(self):
        return len(self.X)
        
    def __getitem__(self, i):
        return self.X[i], self.Y[i]

train_dataset = AddingProblemDataset(X_train, Y_train)
val_dataset = AddingProblemDataset(X_val, Y_val)

In [38]:
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False)

In [39]:
lstmnet = LSTMNet(input_size, hidden_size, num_classes).to(device)
customnet= CustomNet(input_size, hidden_size, num_classes).to(device)

In [40]:
total_lstmnet_parameters = count_parameters(lstmnet)
total_customnet_parameters = count_parameters(customnet)

In [41]:
criterion = nn.MSELoss()
lstmnet_optimizer = torch.optim.Adam(lstmnet.parameters(), lr = learning_rate)
customnet_optimizer = torch.optim.Adam(customnet.parameters(), lr = learning_rate)

In [42]:
lstmnet_train_loss_array = []
lstmnet_val_loss_array = []
customnet_train_loss_array = []
customnet_val_loss_array = []

In [43]:
lstmnet_start_time = time.time()
train_and_validate(epochs, device, lstmnet, train_loader, val_loader, criterion, lstmnet_optimizer, lstmnet_train_loss_array, lstmnet_val_loss_array)
lstmnet_end_time = time.time()
lstmnet_total_training_time = lstmnet_end_time - lstmnet_start_time
lstmnet_avg_training_time_per_epoch = lstmnet_total_training_time / epochs

Epoch no.: 1 | Training Loss: 0.7077418802678586 | Validation Loss: 0.2397512137889862
Epoch no.: 2 | Training Loss: 0.20147685214877128 | Validation Loss: 0.1750206708908081
Epoch no.: 3 | Training Loss: 0.1787444432079792 | Validation Loss: 0.16289855986833573
Epoch no.: 4 | Training Loss: 0.17031691469252108 | Validation Loss: 0.15555420219898225
Epoch no.: 5 | Training Loss: 0.16510420970618725 | Validation Loss: 0.15112572610378266
Epoch no.: 6 | Training Loss: 0.16207718662917614 | Validation Loss: 0.14807993173599243
Epoch no.: 7 | Training Loss: 0.15970669627189638 | Validation Loss: 0.14597581699490547
Epoch no.: 8 | Training Loss: 0.1574725940078497 | Validation Loss: 0.1439147762954235
Epoch no.: 9 | Training Loss: 0.15485644340515137 | Validation Loss: 0.14304085001349448
Epoch no.: 10 | Training Loss: 0.15226540878415107 | Validation Loss: 0.14041440486907958
Epoch no.: 11 | Training Loss: 0.1503749131411314 | Validation Loss: 0.1390171617269516
Epoch no.: 12 | Training Lo

In [None]:
customnet_start_time = time.time()
train_and_validate(epochs, device, customnet, train_loader, val_loader, criterion, customnet_optimizer, customnet_train_loss_array, customnet_val_loss_array)
customnet_end_time = time.time()
customnet_total_training_time = customnet_end_time - customnet_start_time
customnet_avg_training_time_per_epoch = customnet_total_training_time / epochs

Epoch no.: 1 | Training Loss: 0.5832641530036926 | Validation Loss: 0.3358241647481918
Epoch no.: 2 | Training Loss: 0.3178152504563332 | Validation Loss: 0.30462291538715364
Epoch no.: 3 | Training Loss: 0.28964419081807136 | Validation Loss: 0.2714419111609459
Epoch no.: 4 | Training Loss: 0.25578171372413633 | Validation Loss: 0.2324933871626854
Epoch no.: 5 | Training Loss: 0.222101067006588 | Validation Loss: 0.20115545839071275
Epoch no.: 6 | Training Loss: 0.19876268535852432 | Validation Loss: 0.18321079015731812
Epoch no.: 7 | Training Loss: 0.18663244307041169 | Validation Loss: 0.1737976387143135
Epoch no.: 8 | Training Loss: 0.17889287784695626 | Validation Loss: 0.16673085689544678
Epoch no.: 9 | Training Loss: 0.17249054580926895 | Validation Loss: 0.16090157330036164
Epoch no.: 10 | Training Loss: 0.1675539018213749 | Validation Loss: 0.15585823208093644
Epoch no.: 11 | Training Loss: 0.16364275246858598 | Validation Loss: 0.15246070325374603
Epoch no.: 12 | Training Los

In [None]:
lstmnet_predictions = lstmnet(X_test.to(device)).cpu().detach().numpy()
customnet_predictions = customnet(X_test.to(device)).cpu().detach().numpy()

In [None]:
# FINAL REPORT PRINT
print()
print()
print("------------------------------------------------------------")
print(f"Total Parameters:     LSTMNet: {total_lstmnet_parameters}  |  CustomNet: {total_customnet_parameters}")
print()
print(f"Total Training Time:     LSTMNet: {lstmnet_total_training_time}s  |  CustomNet: {customnet_total_training_time}s")
print()
print(f"Average Training Time Per Epoch:     LSTMNet: {lstmnet_avg_training_time_per_epoch}s  |  CustomNet: {customnet_avg_training_time_per_epoch}s")

print("------------------------------------------------------------")

In [None]:
plot_loss_comparison(epochs, lstmnet_train_loss_array, customnet_train_loss_array, "LSTM Training Loss", "CustomNet Training Loss", "Training Loss Comparison")

In [None]:
plot_loss_comparison(epochs, lstmnet_val_loss_array, customnet_val_loss_array, "LSTM Validation Loss", "CustomNet Validation Loss", "Validation Loss Comparison")

In [None]:
plot_test_predictions(Y_test, lstmnet_predictions, customnet_predictions)

In [None]:
compare_mse_loss(Y_test, lstmnet_predictions, customnet_predictions)