# Exercise C: Text Classification with RNNs
#### Tzanis Nikolaos mtn2217

In [3]:
# -*- coding: utf-8 -*-
"""

A RNN classifier applied to AG_NEWS dataset

Download dataset:
https://www.kaggle.com/datasets/amananandrai/ag-news-classification-dataset

"""

import torch

from torch.utils.data import DataLoader
from torchtext.data import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data.dataset import random_split
from torch import nn
from torch.nn import functional as F
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from time import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# HYPER-PARAMETERS
MAX_WORDS = 25
EPOCHS = 15
LEARNING_RATE = 1e-3
BATCH_SIZE = 1024
EMBEDDING_DIM = 100
HIDDEN_DIM = 64

In [5]:
######################################################################
# Read dataset files 
# ------------------

train_data = pd.read_csv('../data/train.csv')
test_data = pd.read_csv('../data/test.csv')

In [6]:
######################################################################
# Data processing 
# -----------------------------


tokenizer = get_tokenizer("basic_english")

In [7]:
# All texts are truncated and padded to MAX_WORDS tokens
def collate_batch(batch):
    Y, X = list(zip(*batch))
    Y = torch.tensor(Y) - 1 # Target names in range [0,1,2,3] instead of [1,2,3,4]
    X = [vocab(tokenizer(text)) for text in X]
    # Bringing all samples to MAX_WORDS length. Shorter texts are padded with <PAD> sequences, longer texts are truncated.
    X = [tokens+([vocab['<PAD>']]* (MAX_WORDS-len(tokens))) if len(tokens)<MAX_WORDS else tokens[:MAX_WORDS] for tokens in X]
    return torch.tensor(X, dtype=torch.int32).to(device), Y.to(device) 

In [8]:
train_dataset = [(label,train_data['Title'][i] + ' ' + train_data['Description'][i]) for i,label in enumerate(train_data['Class Index'])]
test_dataset = [(label,test_data['Title'][i] + ' ' + test_data['Description'][i]) for i,label in enumerate(test_data['Class Index'])]

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                              shuffle=True, collate_fn=collate_batch)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                              shuffle=False, collate_fn=collate_batch)

target_classes = ["World", "Sports", "Business", "Sci/Tech"]

In [9]:
def build_vocabulary(datasets):
    for dataset in datasets:
        for _, text in dataset:
            yield tokenizer(text)

In [10]:
# Vocabulary includes all tokens with at least 10 occurrences in the texts
# Special tokens <PAD> and <UNK> are used for padding sequences and unknown words respectively
vocab = build_vocab_from_iterator(build_vocabulary([train_dataset, test_dataset]), min_freq=10, specials=["<PAD>","<UNK>"])
vocab.set_default_index(vocab["<UNK>"])

In [11]:
######################################################################
# Define the model
# ----------------


class model(nn.Module):
    def __init__(self,input_dim, embedding_dim, hidden_dim, output_dim):
        super(model, self).__init__()
        self.embedding_layer = nn.Embedding(num_embeddings=input_dim, embedding_dim=embedding_dim)
        self.rnn = nn.RNN(input_size=embedding_dim, hidden_size=hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, output_dim)

    def forward(self, X_batch):
        embeddings = self.embedding_layer(X_batch)
        output, hidden = self.rnn(embeddings)
        logits = self.linear(output[:,-1])  # The last output of RNN is used for sequence classification
        probs = F.softmax(logits, dim=1)
        return probs

In [12]:
######################################################################
# Initiate an instance of the model
# ---------------------------------


classifier = model(len(vocab), EMBEDDING_DIM, HIDDEN_DIM, len(target_classes)).to(device)
# Define loss function and opimization algorithm
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam([param for param in classifier.parameters() if param.requires_grad == True],lr=LEARNING_RATE)

# Count model parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print('\nModel:')
print(classifier)
print('Total parameters: ',count_parameters(classifier))
print('\n\n')


Model:
model(
  (embedding_layer): Embedding(21254, 100)
  (rnn): RNN(100, 64, batch_first=True)
  (linear): Linear(in_features=64, out_features=4, bias=True)
)
Total parameters:  2136284





In [13]:
######################################################################
# Define functions to train and evaluate the model
# ------------------------------------------------


def EvaluateModel(model, loss_fn, val_loader):
    model.eval()
    with torch.no_grad():
        Y_actual, Y_preds, losses = [],[],[]
        for X, Y in val_loader:
            preds = model(X)
            loss = loss_fn(preds, Y)
            losses.append(loss.item())

            Y_actual.append(Y)
            Y_preds.append(preds.argmax(dim=-1))

        Y_actual = torch.cat(Y_actual)
        Y_preds = torch.cat(Y_preds)
    
    # Returns mean loss, actual labels, predicted labels 
    return torch.tensor(losses).mean(), Y_actual.detach().cpu().numpy(), Y_preds.detach().cpu().numpy()

In [14]:
def TrainModel(model, loss_fn, optimizer, train_loader, epochs):
    total_time = 0
    for i in range(1, epochs+1):
        start = time()
        model.train()
        print('Epoch:',i)
        losses = []
        for X, Y in tqdm(train_loader):
            Y_preds = model(X)

            loss = loss_fn(Y_preds, Y)
            losses.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        end = time()
        total_time += end - start

        print("Train Loss : {:.3f}".format(torch.tensor(losses).mean()))
    
    return total_time/epochs

## Question 1

The current implementation of the model is based on a one-directional RNN model with 1 layer. In order to test different models we have to change the class that is used to define the model.

Firstly, the option to choose between RNN or LSTM is added. An 'if' statement checks the param 'neural_network' when a new model is defined. Depending on the option, an appropriate neural network is initialized.

Secondly, we have to add an option for one-directional or bidirectional. This is once again given as a param and when creating the rnn or lstm model is used to create an appropriate neural network. Note that if the nn is bidirectional then the hidden dimension has to be doubled. This is specified in the nn.Linear function call.

Lastly, the number of layer is also given as a param which is being used at the nn.RNN or nn.LSTM for RNNs or LSTMs respectively.

In [15]:
class model(nn.Module):
    def __init__(self,input_dim, embedding_dim, hidden_dim, output_dim, bidirectional=False, neural_network='RNN', num_layers=1):
        super(model, self).__init__()
        self.embedding_layer = nn.Embedding(num_embeddings=input_dim, embedding_dim=embedding_dim)
        if neural_network=='RNN':
            self.rnn = nn.RNN(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=bidirectional)
            self.linear = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        elif neural_network=='LSTM':
            self.rnn = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=bidirectional)
            self.linear = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        else:
            print('Not supported NN')
            return 
        
    def forward(self, X_batch):
        embeddings = self.embedding_layer(X_batch)
        output, hidden = self.rnn(embeddings)
        logits = self.linear(output[:,-1])  # The last output of RNN is used for sequence classification
        probs = F.softmax(logits, dim=1)
        return probs

In order to evaluate the performance of each model, we create a function based on the given code. This function trains each model and then evaluates it while returning the accuracy, total parameters, time cost, the classification report, the misclassified texts and the predictions for the model in question.

In [16]:
def TrainAndEvaluateModelAndReturnMetrics(bidirectional, neural_network, num_layers):

    classifier = model(len(vocab), EMBEDDING_DIM, HIDDEN_DIM, len(target_classes), bidirectional=bidirectional, neural_network=neural_network, num_layers=num_layers).to(device)
    # Define loss function and opimization algorithm
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam([param for param in classifier.parameters() if param.requires_grad == True],lr=LEARNING_RATE)
    total_parameters = count_parameters(classifier)

    time_cost = TrainModel(classifier, loss_fn, optimizer, train_loader, EPOCHS)

    _, Y_actual, Y_preds = EvaluateModel(classifier, loss_fn, test_loader)

    accuracy = accuracy_score(Y_actual, Y_preds)

    report = classification_report(Y_actual, Y_preds, target_names=target_classes)

    cmatrix = confusion_matrix(Y_actual, Y_preds)

    misclassified = np.where(Y_actual != Y_preds)[0]

    return [np.round(accuracy, decimals=2), total_parameters, np.round(time_cost, decimals=2)], report, cmatrix, misclassified, Y_actual, Y_preds


Next we call the above function for each model and get the stats for each one of them.

In [17]:
rnn1 = TrainAndEvaluateModelAndReturnMetrics(bidirectional=False, neural_network='RNN', num_layers=1)
rnn1Bi = TrainAndEvaluateModelAndReturnMetrics(bidirectional=True, neural_network='RNN', num_layers=1)
rnn2Bi = TrainAndEvaluateModelAndReturnMetrics(bidirectional=True, neural_network='RNN', num_layers=2)
lstm1 = TrainAndEvaluateModelAndReturnMetrics(bidirectional=False, neural_network='LSTM', num_layers=1)
lstm1Bi = TrainAndEvaluateModelAndReturnMetrics(bidirectional=True, neural_network='LSTM', num_layers=1)
lstm2Bi = TrainAndEvaluateModelAndReturnMetrics(bidirectional=True, neural_network='LSTM', num_layers=2)

Epoch: 1


100%|██████████| 118/118 [00:09<00:00, 12.47it/s]


Train Loss : 1.297
Epoch: 2


100%|██████████| 118/118 [00:09<00:00, 13.11it/s]


Train Loss : 1.047
Epoch: 3


100%|██████████| 118/118 [00:08<00:00, 13.14it/s]


Train Loss : 0.952
Epoch: 4


100%|██████████| 118/118 [00:08<00:00, 13.21it/s]


Train Loss : 0.914
Epoch: 5


100%|██████████| 118/118 [00:09<00:00, 13.05it/s]


Train Loss : 0.893
Epoch: 6


100%|██████████| 118/118 [00:08<00:00, 13.45it/s]


Train Loss : 0.879
Epoch: 7


100%|██████████| 118/118 [00:08<00:00, 13.36it/s]


Train Loss : 0.868
Epoch: 8


100%|██████████| 118/118 [00:08<00:00, 13.48it/s]


Train Loss : 0.859
Epoch: 9


100%|██████████| 118/118 [00:08<00:00, 13.49it/s]


Train Loss : 0.852
Epoch: 10


100%|██████████| 118/118 [00:08<00:00, 13.44it/s]


Train Loss : 0.846
Epoch: 11


100%|██████████| 118/118 [00:09<00:00, 12.38it/s]


Train Loss : 0.842
Epoch: 12


100%|██████████| 118/118 [00:09<00:00, 12.49it/s]


Train Loss : 0.837
Epoch: 13


100%|██████████| 118/118 [00:09<00:00, 12.55it/s]


Train Loss : 0.833
Epoch: 14


100%|██████████| 118/118 [00:09<00:00, 12.35it/s]


Train Loss : 0.831
Epoch: 15


100%|██████████| 118/118 [00:09<00:00, 12.34it/s]


Train Loss : 0.828
Epoch: 1


100%|██████████| 118/118 [00:14<00:00,  8.18it/s]


Train Loss : 1.311
Epoch: 2


100%|██████████| 118/118 [00:14<00:00,  8.23it/s]


Train Loss : 1.072
Epoch: 3


100%|██████████| 118/118 [00:14<00:00,  8.14it/s]


Train Loss : 0.974
Epoch: 4


100%|██████████| 118/118 [00:14<00:00,  8.21it/s]


Train Loss : 0.933
Epoch: 5


100%|██████████| 118/118 [00:14<00:00,  8.29it/s]


Train Loss : 0.909
Epoch: 6


100%|██████████| 118/118 [00:14<00:00,  8.18it/s]


Train Loss : 0.893
Epoch: 7


100%|██████████| 118/118 [00:14<00:00,  8.21it/s]


Train Loss : 0.883
Epoch: 8


100%|██████████| 118/118 [00:14<00:00,  8.19it/s]


Train Loss : 0.872
Epoch: 9


100%|██████████| 118/118 [00:14<00:00,  8.21it/s]


Train Loss : 0.864
Epoch: 10


100%|██████████| 118/118 [00:14<00:00,  8.26it/s]


Train Loss : 0.860
Epoch: 11


100%|██████████| 118/118 [00:14<00:00,  8.20it/s]


Train Loss : 0.854
Epoch: 12


100%|██████████| 118/118 [00:14<00:00,  8.19it/s]


Train Loss : 0.851
Epoch: 13


100%|██████████| 118/118 [00:14<00:00,  8.32it/s]


Train Loss : 0.848
Epoch: 14


100%|██████████| 118/118 [00:14<00:00,  8.18it/s]


Train Loss : 0.844
Epoch: 15


100%|██████████| 118/118 [00:14<00:00,  8.24it/s]


Train Loss : 0.845
Epoch: 1


100%|██████████| 118/118 [00:22<00:00,  5.23it/s]


Train Loss : 1.267
Epoch: 2


100%|██████████| 118/118 [00:22<00:00,  5.27it/s]


Train Loss : 1.044
Epoch: 3


100%|██████████| 118/118 [00:22<00:00,  5.30it/s]


Train Loss : 0.966
Epoch: 4


100%|██████████| 118/118 [00:22<00:00,  5.31it/s]


Train Loss : 0.930
Epoch: 5


100%|██████████| 118/118 [00:22<00:00,  5.30it/s]


Train Loss : 0.911
Epoch: 6


100%|██████████| 118/118 [00:21<00:00,  5.39it/s]


Train Loss : 0.897
Epoch: 7


100%|██████████| 118/118 [00:22<00:00,  5.35it/s]


Train Loss : 0.889
Epoch: 8


100%|██████████| 118/118 [00:21<00:00,  5.39it/s]


Train Loss : 0.879
Epoch: 9


100%|██████████| 118/118 [00:22<00:00,  5.36it/s]


Train Loss : 0.873
Epoch: 10


100%|██████████| 118/118 [00:22<00:00,  5.35it/s]


Train Loss : 0.868
Epoch: 11


100%|██████████| 118/118 [00:21<00:00,  5.41it/s]


Train Loss : 0.863
Epoch: 12


100%|██████████| 118/118 [00:21<00:00,  5.39it/s]


Train Loss : 0.863
Epoch: 13


100%|██████████| 118/118 [00:21<00:00,  5.37it/s]


Train Loss : 0.859
Epoch: 14


100%|██████████| 118/118 [00:21<00:00,  5.39it/s]


Train Loss : 0.859
Epoch: 15


100%|██████████| 118/118 [00:21<00:00,  5.42it/s]


Train Loss : 0.852
Epoch: 1


100%|██████████| 118/118 [00:11<00:00, 10.62it/s]


Train Loss : 1.251
Epoch: 2


100%|██████████| 118/118 [00:11<00:00, 10.65it/s]


Train Loss : 0.974
Epoch: 3


100%|██████████| 118/118 [00:11<00:00, 10.72it/s]


Train Loss : 0.910
Epoch: 4


100%|██████████| 118/118 [00:11<00:00, 10.67it/s]


Train Loss : 0.883
Epoch: 5


100%|██████████| 118/118 [00:11<00:00, 10.63it/s]


Train Loss : 0.865
Epoch: 6


100%|██████████| 118/118 [00:10<00:00, 10.77it/s]


Train Loss : 0.852
Epoch: 7


100%|██████████| 118/118 [00:11<00:00, 10.64it/s]


Train Loss : 0.843
Epoch: 8


100%|██████████| 118/118 [00:11<00:00, 10.60it/s]


Train Loss : 0.836
Epoch: 9


100%|██████████| 118/118 [00:11<00:00, 10.68it/s]


Train Loss : 0.831
Epoch: 10


100%|██████████| 118/118 [00:10<00:00, 10.75it/s]


Train Loss : 0.824
Epoch: 11


100%|██████████| 118/118 [00:10<00:00, 10.79it/s]


Train Loss : 0.821
Epoch: 12


100%|██████████| 118/118 [00:11<00:00, 10.71it/s]


Train Loss : 0.817
Epoch: 13


100%|██████████| 118/118 [00:11<00:00, 10.63it/s]


Train Loss : 0.814
Epoch: 14


100%|██████████| 118/118 [00:10<00:00, 10.81it/s]


Train Loss : 0.813
Epoch: 15


100%|██████████| 118/118 [00:11<00:00, 10.65it/s]


Train Loss : 0.810
Epoch: 1


100%|██████████| 118/118 [00:17<00:00,  6.69it/s]


Train Loss : 1.248
Epoch: 2


100%|██████████| 118/118 [00:17<00:00,  6.70it/s]


Train Loss : 0.980
Epoch: 3


100%|██████████| 118/118 [00:17<00:00,  6.66it/s]


Train Loss : 0.911
Epoch: 4


100%|██████████| 118/118 [00:17<00:00,  6.65it/s]


Train Loss : 0.880
Epoch: 5


100%|██████████| 118/118 [00:17<00:00,  6.65it/s]


Train Loss : 0.864
Epoch: 6


100%|██████████| 118/118 [00:17<00:00,  6.69it/s]


Train Loss : 0.852
Epoch: 7


100%|██████████| 118/118 [00:17<00:00,  6.68it/s]


Train Loss : 0.842
Epoch: 8


100%|██████████| 118/118 [00:17<00:00,  6.67it/s]


Train Loss : 0.835
Epoch: 9


100%|██████████| 118/118 [00:17<00:00,  6.65it/s]


Train Loss : 0.830
Epoch: 10


100%|██████████| 118/118 [00:17<00:00,  6.69it/s]


Train Loss : 0.824
Epoch: 11


100%|██████████| 118/118 [00:17<00:00,  6.67it/s]


Train Loss : 0.819
Epoch: 12


100%|██████████| 118/118 [00:17<00:00,  6.67it/s]


Train Loss : 0.816
Epoch: 13


100%|██████████| 118/118 [00:17<00:00,  6.62it/s]


Train Loss : 0.814
Epoch: 14


100%|██████████| 118/118 [00:17<00:00,  6.67it/s]


Train Loss : 0.811
Epoch: 15


100%|██████████| 118/118 [00:17<00:00,  6.65it/s]


Train Loss : 0.809
Epoch: 1


100%|██████████| 118/118 [00:30<00:00,  3.85it/s]


Train Loss : 1.195
Epoch: 2


100%|██████████| 118/118 [00:30<00:00,  3.86it/s]


Train Loss : 0.951
Epoch: 3


100%|██████████| 118/118 [00:30<00:00,  3.86it/s]


Train Loss : 0.898
Epoch: 4


100%|██████████| 118/118 [00:30<00:00,  3.85it/s]


Train Loss : 0.874
Epoch: 5


100%|██████████| 118/118 [00:30<00:00,  3.84it/s]


Train Loss : 0.858
Epoch: 6


100%|██████████| 118/118 [00:30<00:00,  3.87it/s]


Train Loss : 0.848
Epoch: 7


100%|██████████| 118/118 [00:30<00:00,  3.85it/s]


Train Loss : 0.839
Epoch: 8


100%|██████████| 118/118 [00:30<00:00,  3.85it/s]


Train Loss : 0.833
Epoch: 9


100%|██████████| 118/118 [00:30<00:00,  3.85it/s]


Train Loss : 0.828
Epoch: 10


100%|██████████| 118/118 [00:30<00:00,  3.86it/s]


Train Loss : 0.823
Epoch: 11


100%|██████████| 118/118 [00:30<00:00,  3.87it/s]


Train Loss : 0.822
Epoch: 12


100%|██████████| 118/118 [00:30<00:00,  3.85it/s]


Train Loss : 0.818
Epoch: 13


100%|██████████| 118/118 [00:30<00:00,  3.85it/s]


Train Loss : 0.816
Epoch: 14


100%|██████████| 118/118 [00:30<00:00,  3.85it/s]


Train Loss : 0.814
Epoch: 15


100%|██████████| 118/118 [00:30<00:00,  3.85it/s]


Train Loss : 0.811


The last part is to create a pandas dataframe in order to display the results.

In [18]:
results_df = pd.DataFrame({'1RNN': rnn1[0], '1Bi-RNN': rnn1Bi[0], '2Bi-RNN': rnn2Bi[0], '1LSTM': lstm1[0], '1Bi-LSTM': lstm1Bi[0], '2Bi-LSTM': lstm2Bi[0]})
results_df = results_df.rename(index={0:'Accuracy', 1:'Parameters', 2:'Time Cost'})
display(results_df)

Unnamed: 0,1RNN,1Bi-RNN,2Bi-RNN,1LSTM,1Bi-LSTM,2Bi-LSTM
Accuracy,0.87,0.86,0.86,0.88,0.88,0.89
Parameters,2136284.0,2147164.0,2171996.0,2168156.0,2210908.0,2310236.0
Time Cost,9.12,14.37,22.07,11.04,17.7,30.61


From the results it is obvious that as we increase the complexity of a model, the same happens with its accuracy. Models with more parameters tend to have better results. Of course, LSTM models have better accuracy than RNN models accross the board, regardless of the number of parameters.

We can also see that models with multiple layers tend to use more resources and need more time to train. The same goes for models with more parameters as we can see they also take more time to train.

Finally, we can see that using 2 layers instead of 2 improves the accuracy of both the RNN and LSTM models.

## Question 2

Using the output of the function that we called earlier and especially the misclassified texts for each model we create a variable that stores all common misclassifications between all the models and then display the length of that list (which practically shows how many texts were misclassified by all the models).

In [19]:
common_misclassified = list(set(rnn1[3]).intersection(rnn1Bi[3], rnn2Bi[3], lstm1[3], lstm1Bi[3], lstm2Bi[3]))
len(common_misclassified)

326

Then, we pick a random number from the list of misclassified texts and diplay the text.

In [20]:
import random

random_text = common_misclassified[random.randint(0, len(common_misclassified))]
print(f"A sample text that was misclassified by every model was:\n{test_data['Title'][random_text]} {test_data['Description'][random_text]}")

A sample text that was misclassified by every model was:
Telescope snaps distant 'planet' The first direct image of a planet circling another star may have been obtained by a US-European team of astronomers.


Next we go through the misclassifications to find out what their original category was in order to specify the misclassified texts per category.

In [21]:
misclassifications_per_category = {
    'World': 0,
    'Sports:': 0,
    'Business:': 0,
    'Sci/Tech:': 0
}
for index in common_misclassified:
    if test_data['Class Index'][index] == 1:
        misclassifications_per_category['World'] += 1
    elif test_data['Class Index'][index] == 2:
        misclassifications_per_category['Sports:'] += 1
    elif test_data['Class Index'][index] == 3:
        misclassifications_per_category['Business:'] += 1
    elif test_data['Class Index'][index] == 4:
        misclassifications_per_category['Sci/Tech:'] += 1
    else:
        pass

print(f'Misclassifications per category:\n{misclassifications_per_category}')

Misclassifications per category:
{'World': 99, 'Sports:': 13, 'Business:': 127, 'Sci/Tech:': 87}


Then we look up all predictions to find the numbers of times that each pair of correct category - wrong prediction showed up.

In [22]:
models  = [rnn1, rnn1Bi, rnn2Bi, lstm1, lstm1Bi, lstm2Bi]
mistake_pairs = {}

for specific_model in models:
    for index in common_misclassified:
        try:
            mistake_pairs[(specific_model[4][index]+1, specific_model[5][index]+1)] += 1
        except:
            mistake_pairs[(specific_model[4][index]+1, specific_model[5][index]+1)] = 1

And then we prin that result.

In [23]:
most_common_pair = max(mistake_pairs, key=lambda k: mistake_pairs[k])
new_pair = []
if most_common_pair[0] == 1:
    new_pair.append('World')
elif  most_common_pair[0] == 2:
    new_pair.append('Sports')
elif most_common_pair[0] == 3:
    new_pair.append('Business')
elif  most_common_pair[0] == 4:
    new_pair.append('Sci/Tech')
else:
    pass
if most_common_pair[1] == 1:
    new_pair.append('World')
elif  most_common_pair[1] == 2:
    new_pair.append('Sports')
elif most_common_pair[1] == 3:
    new_pair.append('Business')
elif   most_common_pair[1] == 4:
    new_pair.append('Sci/Tech')
else:
    pass
print(f'The most common pair of correct category - wrong prediction was: {(new_pair[0], new_pair[1])}, which appeared a total of {mistake_pairs[most_common_pair]} times.' )

The most common pair of correct category - wrong prediction was: ('Business', 'Sci/Tech'), which appeared a total of 555 times.


## Question 3

For the next question the number of MAX_WORDS is set to 50 and then we call again the function to train and evaluate each model.

In [24]:
MAX_WORDS = 50

rnn1 = TrainAndEvaluateModelAndReturnMetrics(bidirectional=False, neural_network='RNN', num_layers=1)
rnn1Bi = TrainAndEvaluateModelAndReturnMetrics(bidirectional=True, neural_network='RNN', num_layers=1)
rnn2Bi = TrainAndEvaluateModelAndReturnMetrics(bidirectional=True, neural_network='RNN', num_layers=2)
lstm1 = TrainAndEvaluateModelAndReturnMetrics(bidirectional=False, neural_network='LSTM', num_layers=1)
lstm1Bi = TrainAndEvaluateModelAndReturnMetrics(bidirectional=True, neural_network='LSTM', num_layers=1)
lstm2Bi = TrainAndEvaluateModelAndReturnMetrics(bidirectional=True, neural_network='LSTM', num_layers=2)

Epoch: 1


100%|██████████| 118/118 [00:14<00:00,  8.08it/s]


Train Loss : 1.380
Epoch: 2


100%|██████████| 118/118 [00:14<00:00,  8.42it/s]


Train Loss : 1.362
Epoch: 3


100%|██████████| 118/118 [00:14<00:00,  8.40it/s]


Train Loss : 1.345
Epoch: 4


100%|██████████| 118/118 [00:14<00:00,  8.34it/s]


Train Loss : 1.290
Epoch: 5


100%|██████████| 118/118 [00:13<00:00,  8.45it/s]


Train Loss : 1.249
Epoch: 6


100%|██████████| 118/118 [00:14<00:00,  8.42it/s]


Train Loss : 1.223
Epoch: 7


100%|██████████| 118/118 [00:13<00:00,  8.44it/s]


Train Loss : 1.191
Epoch: 8


100%|██████████| 118/118 [00:14<00:00,  8.39it/s]


Train Loss : 1.159
Epoch: 9


100%|██████████| 118/118 [00:14<00:00,  8.40it/s]


Train Loss : 1.100
Epoch: 10


100%|██████████| 118/118 [00:14<00:00,  8.40it/s]


Train Loss : 1.027
Epoch: 11


100%|██████████| 118/118 [00:14<00:00,  8.38it/s]


Train Loss : 0.997
Epoch: 12


100%|██████████| 118/118 [00:14<00:00,  8.43it/s]


Train Loss : 1.094
Epoch: 13


100%|██████████| 118/118 [00:14<00:00,  8.40it/s]


Train Loss : 1.105
Epoch: 14


100%|██████████| 118/118 [00:13<00:00,  8.43it/s]


Train Loss : 1.197
Epoch: 15


100%|██████████| 118/118 [00:14<00:00,  8.36it/s]


Train Loss : 1.288
Epoch: 1


100%|██████████| 118/118 [00:24<00:00,  4.81it/s]


Train Loss : 1.373
Epoch: 2


100%|██████████| 118/118 [00:24<00:00,  4.86it/s]


Train Loss : 1.332
Epoch: 3


100%|██████████| 118/118 [00:24<00:00,  4.87it/s]


Train Loss : 1.325
Epoch: 4


100%|██████████| 118/118 [00:24<00:00,  4.88it/s]


Train Loss : 1.339
Epoch: 5


100%|██████████| 118/118 [00:24<00:00,  4.90it/s]


Train Loss : 1.337
Epoch: 6


100%|██████████| 118/118 [00:24<00:00,  4.92it/s]


Train Loss : 1.340
Epoch: 7


100%|██████████| 118/118 [00:24<00:00,  4.91it/s]


Train Loss : 1.339
Epoch: 8


100%|██████████| 118/118 [00:24<00:00,  4.89it/s]


Train Loss : 1.335
Epoch: 9


100%|██████████| 118/118 [00:24<00:00,  4.90it/s]


Train Loss : 1.329
Epoch: 10


100%|██████████| 118/118 [00:24<00:00,  4.91it/s]


Train Loss : 1.305
Epoch: 11


100%|██████████| 118/118 [00:24<00:00,  4.87it/s]


Train Loss : 1.311
Epoch: 12


100%|██████████| 118/118 [00:23<00:00,  4.92it/s]


Train Loss : 1.323
Epoch: 13


100%|██████████| 118/118 [00:24<00:00,  4.89it/s]


Train Loss : 1.343
Epoch: 14


100%|██████████| 118/118 [00:24<00:00,  4.87it/s]


Train Loss : 1.336
Epoch: 15


100%|██████████| 118/118 [00:24<00:00,  4.89it/s]


Train Loss : 1.333
Epoch: 1


100%|██████████| 118/118 [00:39<00:00,  2.96it/s]


Train Loss : 1.376
Epoch: 2


100%|██████████| 118/118 [00:40<00:00,  2.94it/s]


Train Loss : 1.370
Epoch: 3


100%|██████████| 118/118 [00:40<00:00,  2.93it/s]


Train Loss : 1.357
Epoch: 4


100%|██████████| 118/118 [00:40<00:00,  2.91it/s]


Train Loss : 1.360
Epoch: 5


100%|██████████| 118/118 [00:39<00:00,  2.96it/s]


Train Loss : 1.352
Epoch: 6


100%|██████████| 118/118 [00:40<00:00,  2.91it/s]


Train Loss : 1.279
Epoch: 7


100%|██████████| 118/118 [00:40<00:00,  2.91it/s]


Train Loss : 1.283
Epoch: 8


100%|██████████| 118/118 [00:40<00:00,  2.89it/s]


Train Loss : 1.301
Epoch: 9


100%|██████████| 118/118 [00:40<00:00,  2.89it/s]


Train Loss : 1.302
Epoch: 10


100%|██████████| 118/118 [00:40<00:00,  2.88it/s]


Train Loss : 1.311
Epoch: 11


100%|██████████| 118/118 [00:40<00:00,  2.90it/s]


Train Loss : 1.290
Epoch: 12


100%|██████████| 118/118 [00:40<00:00,  2.89it/s]


Train Loss : 1.281
Epoch: 13


100%|██████████| 118/118 [00:40<00:00,  2.90it/s]


Train Loss : 1.271
Epoch: 14


100%|██████████| 118/118 [00:40<00:00,  2.90it/s]


Train Loss : 1.284
Epoch: 15


100%|██████████| 118/118 [00:40<00:00,  2.88it/s]


Train Loss : 1.304
Epoch: 1


100%|██████████| 118/118 [00:19<00:00,  5.93it/s]


Train Loss : 1.345
Epoch: 2


100%|██████████| 118/118 [00:20<00:00,  5.85it/s]


Train Loss : 1.117
Epoch: 3


100%|██████████| 118/118 [00:20<00:00,  5.69it/s]


Train Loss : 0.981
Epoch: 4


100%|██████████| 118/118 [00:20<00:00,  5.89it/s]


Train Loss : 0.925
Epoch: 5


100%|██████████| 118/118 [00:19<00:00,  5.92it/s]


Train Loss : 0.898
Epoch: 6


100%|██████████| 118/118 [00:19<00:00,  5.92it/s]


Train Loss : 0.881
Epoch: 7


100%|██████████| 118/118 [00:19<00:00,  5.93it/s]


Train Loss : 0.871
Epoch: 8


100%|██████████| 118/118 [00:19<00:00,  5.92it/s]


Train Loss : 0.869
Epoch: 9


100%|██████████| 118/118 [00:19<00:00,  5.94it/s]


Train Loss : 0.859
Epoch: 10


100%|██████████| 118/118 [00:19<00:00,  5.90it/s]


Train Loss : 0.852
Epoch: 11


100%|██████████| 118/118 [00:19<00:00,  5.95it/s]


Train Loss : 0.846
Epoch: 12


100%|██████████| 118/118 [00:19<00:00,  5.94it/s]


Train Loss : 0.842
Epoch: 13


100%|██████████| 118/118 [00:19<00:00,  5.91it/s]


Train Loss : 0.836
Epoch: 14


100%|██████████| 118/118 [00:19<00:00,  5.94it/s]


Train Loss : 0.840
Epoch: 15


100%|██████████| 118/118 [00:19<00:00,  5.91it/s]


Train Loss : 0.833
Epoch: 1


100%|██████████| 118/118 [00:35<00:00,  3.33it/s]


Train Loss : 1.342
Epoch: 2


100%|██████████| 118/118 [00:35<00:00,  3.35it/s]


Train Loss : 1.109
Epoch: 3


100%|██████████| 118/118 [00:35<00:00,  3.35it/s]


Train Loss : 0.983
Epoch: 4


100%|██████████| 118/118 [00:35<00:00,  3.34it/s]


Train Loss : 0.934
Epoch: 5


100%|██████████| 118/118 [00:35<00:00,  3.34it/s]


Train Loss : 0.900
Epoch: 6


100%|██████████| 118/118 [00:35<00:00,  3.35it/s]


Train Loss : 0.881
Epoch: 7


100%|██████████| 118/118 [00:35<00:00,  3.36it/s]


Train Loss : 0.871
Epoch: 8


100%|██████████| 118/118 [00:35<00:00,  3.35it/s]


Train Loss : 0.863
Epoch: 9


100%|██████████| 118/118 [00:35<00:00,  3.35it/s]


Train Loss : 0.855
Epoch: 10


100%|██████████| 118/118 [00:35<00:00,  3.33it/s]


Train Loss : 0.847
Epoch: 11


100%|██████████| 118/118 [00:35<00:00,  3.34it/s]


Train Loss : 0.841
Epoch: 12


100%|██████████| 118/118 [00:35<00:00,  3.35it/s]


Train Loss : 0.843
Epoch: 13


100%|██████████| 118/118 [00:35<00:00,  3.35it/s]


Train Loss : 0.844
Epoch: 14


100%|██████████| 118/118 [00:35<00:00,  3.35it/s]


Train Loss : 0.836
Epoch: 15


100%|██████████| 118/118 [00:35<00:00,  3.35it/s]


Train Loss : 0.834
Epoch: 1


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 1.294
Epoch: 2


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 1.012
Epoch: 3


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.925
Epoch: 4


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.896
Epoch: 5


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.876
Epoch: 6


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.861
Epoch: 7


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.855
Epoch: 8


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.848
Epoch: 9


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.841
Epoch: 10


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.837
Epoch: 11


100%|██████████| 118/118 [01:04<00:00,  1.84it/s]


Train Loss : 0.833
Epoch: 12


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.827
Epoch: 13


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.823
Epoch: 14


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.822
Epoch: 15


100%|██████████| 118/118 [01:04<00:00,  1.83it/s]


Train Loss : 0.822


Then we display the results in the same form as the question 1.

In [25]:
results_df = pd.DataFrame({'1RNN': rnn1[0], '1Bi-RNN': rnn1Bi[0], '2Bi-RNN': rnn2Bi[0], '1LSTM': lstm1[0], '1Bi-LSTM': lstm1Bi[0], '2Bi-LSTM': lstm2Bi[0]})
results_df = results_df.rename(index={0:'Accuracy', 1:'Parameters', 2:'Time Cost'})
display(results_df)

Unnamed: 0,1RNN,1Bi-RNN,2Bi-RNN,1LSTM,1Bi-LSTM,2Bi-LSTM
Accuracy,0.49,0.35,0.41,0.89,0.88,0.89
Parameters,2136284.0,2147164.0,2171996.0,2168156.0,2210908.0,2310236.0
Time Cost,14.08,24.16,40.57,19.99,35.26,64.48


As we can see the accuracy of the RNN models was reduced significantly and this is probably attributed to the vanishing gradient problem. On the contrary, the LSTM models are not affected by this issue as they can 'remember' important information from each timestep. The number of parameters is not affected as the structure of the RNN or LSTM is not changed based on the MAX_WORDS value.

## Question 4

For this question we have to modify again the definition of the model. This time we have to add a param that specifies if pretrained embeddings are going to be used. Note that these embeddings could be modified while the model is being trained (freeze=false).

In [26]:
class model(nn.Module):
    def __init__(self,input_dim, embedding_dim, hidden_dim, output_dim, bidirectional=False, neural_network='RNN', num_layers=1, pretrained_embeddings = None, freeze = False):
        super(model, self).__init__()

        if pretrained_embeddings == None:
            self.embedding_layer = nn.Embedding(num_embeddings=input_dim, embedding_dim=embedding_dim)
        else:
            self.embedding_layer = nn.Embedding.from_pretrained(pretrained_embeddings, freeze=freeze)
        if neural_network=='RNN':
            self.rnn = nn.RNN(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=bidirectional)
            self.linear = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        elif neural_network=='LSTM':
            self.rnn = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=bidirectional)
            self.linear = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        else:
            print('Not supported NN')
            return 
        
    def forward(self, X_batch):
        embeddings = self.embedding_layer(X_batch)
        output, hidden = self.rnn(embeddings)
        logits = self.linear(output[:,-1])  # The last output of RNN is used for sequence classification
        probs = F.softmax(logits, dim=1)
        return probs

Then we import the glove embeddings and set the MAX_WORDS value back to 25.

In [27]:
from torchtext.vocab import GloVe

glove = GloVe(name='6B', dim=100)

MAX_WORDS = 25

And modify the function that trains and evaluates the models to take into account if we used pretrained embeddings and if the have to be frozen.

In [28]:
def TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional, neural_network, num_layers, pretrained_embeddings, freeze):

    classifier = model(len(vocab), EMBEDDING_DIM, HIDDEN_DIM, len(target_classes), bidirectional=bidirectional, neural_network=neural_network, num_layers=num_layers, pretrained_embeddings=pretrained_embeddings, freeze=freeze).to(device)
    # Define loss function and opimization algorithm
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam([param for param in classifier.parameters() if param.requires_grad == True],lr=LEARNING_RATE)
    total_parameters = count_parameters(classifier)

    time_cost = TrainModel(classifier, loss_fn, optimizer, train_loader, EPOCHS)

    _, Y_actual, Y_preds = EvaluateModel(classifier, loss_fn, test_loader)

    accuracy = accuracy_score(Y_actual, Y_preds)

    report = classification_report(Y_actual, Y_preds, target_names=target_classes)

    cmatrix = confusion_matrix(Y_actual, Y_preds)

    return [np.round(accuracy, decimals=2), total_parameters, np.round(time_cost, decimals=2)], report, cmatrix

After this is done, we train and evaluate each model once again.

In [29]:
rnn1 = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=False, neural_network='RNN', num_layers=1, pretrained_embeddings=glove.vectors.clone().detach(), freeze=False)
rnn1Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='RNN', num_layers=1, pretrained_embeddings=glove.vectors.clone().detach(), freeze=False)
rnn2Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='RNN', num_layers=2, pretrained_embeddings=glove.vectors.clone().detach(), freeze=False)
lstm1 = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=False, neural_network='LSTM', num_layers=1, pretrained_embeddings=glove.vectors.clone().detach(), freeze=False)
lstm1Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='LSTM', num_layers=1, pretrained_embeddings=glove.vectors.clone().detach(), freeze=False)
lstm2Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='LSTM', num_layers=2, pretrained_embeddings=glove.vectors.clone().detach(), freeze=False)

Epoch: 1


100%|██████████| 118/118 [00:26<00:00,  4.45it/s]


Train Loss : 1.248
Epoch: 2


100%|██████████| 118/118 [00:26<00:00,  4.47it/s]


Train Loss : 0.952
Epoch: 3


100%|██████████| 118/118 [00:26<00:00,  4.49it/s]


Train Loss : 0.887
Epoch: 4


100%|██████████| 118/118 [00:26<00:00,  4.49it/s]


Train Loss : 0.864
Epoch: 5


100%|██████████| 118/118 [00:26<00:00,  4.46it/s]


Train Loss : 0.854
Epoch: 6


100%|██████████| 118/118 [00:26<00:00,  4.47it/s]


Train Loss : 0.846
Epoch: 7


100%|██████████| 118/118 [00:26<00:00,  4.47it/s]


Train Loss : 0.835
Epoch: 8


100%|██████████| 118/118 [00:26<00:00,  4.48it/s]


Train Loss : 0.836
Epoch: 9


100%|██████████| 118/118 [00:26<00:00,  4.47it/s]


Train Loss : 0.828
Epoch: 10


100%|██████████| 118/118 [00:26<00:00,  4.48it/s]


Train Loss : 0.825
Epoch: 11


100%|██████████| 118/118 [00:26<00:00,  4.48it/s]


Train Loss : 0.824
Epoch: 12


100%|██████████| 118/118 [00:26<00:00,  4.46it/s]


Train Loss : 0.819
Epoch: 13


100%|██████████| 118/118 [00:26<00:00,  4.50it/s]


Train Loss : 0.818
Epoch: 14


100%|██████████| 118/118 [00:26<00:00,  4.50it/s]


Train Loss : 0.818
Epoch: 15


100%|██████████| 118/118 [00:26<00:00,  4.47it/s]


Train Loss : 0.818
Epoch: 1


100%|██████████| 118/118 [00:31<00:00,  3.80it/s]


Train Loss : 1.270
Epoch: 2


100%|██████████| 118/118 [00:31<00:00,  3.79it/s]


Train Loss : 1.004
Epoch: 3


100%|██████████| 118/118 [00:31<00:00,  3.80it/s]


Train Loss : 0.904
Epoch: 4


100%|██████████| 118/118 [00:31<00:00,  3.80it/s]


Train Loss : 0.874
Epoch: 5


100%|██████████| 118/118 [00:30<00:00,  3.83it/s]


Train Loss : 0.858
Epoch: 6


100%|██████████| 118/118 [00:30<00:00,  3.81it/s]


Train Loss : 0.846
Epoch: 7


100%|██████████| 118/118 [00:30<00:00,  3.82it/s]


Train Loss : 0.839
Epoch: 8


100%|██████████| 118/118 [00:30<00:00,  3.82it/s]


Train Loss : 0.833
Epoch: 9


100%|██████████| 118/118 [00:30<00:00,  3.83it/s]


Train Loss : 0.828
Epoch: 10


100%|██████████| 118/118 [00:30<00:00,  3.81it/s]


Train Loss : 0.828
Epoch: 11


100%|██████████| 118/118 [00:30<00:00,  3.81it/s]


Train Loss : 0.824
Epoch: 12


100%|██████████| 118/118 [00:31<00:00,  3.81it/s]


Train Loss : 0.824
Epoch: 13


100%|██████████| 118/118 [00:30<00:00,  3.82it/s]


Train Loss : 0.821
Epoch: 14


100%|██████████| 118/118 [00:31<00:00,  3.80it/s]


Train Loss : 0.816
Epoch: 15


100%|██████████| 118/118 [00:30<00:00,  3.81it/s]


Train Loss : 0.815
Epoch: 1


100%|██████████| 118/118 [00:38<00:00,  3.06it/s]


Train Loss : 1.222
Epoch: 2


100%|██████████| 118/118 [00:38<00:00,  3.07it/s]


Train Loss : 0.953
Epoch: 3


100%|██████████| 118/118 [00:38<00:00,  3.06it/s]


Train Loss : 0.898
Epoch: 4


100%|██████████| 118/118 [00:38<00:00,  3.06it/s]


Train Loss : 0.874
Epoch: 5


100%|██████████| 118/118 [00:38<00:00,  3.05it/s]


Train Loss : 0.864
Epoch: 6


100%|██████████| 118/118 [00:39<00:00,  3.03it/s]


Train Loss : 0.852
Epoch: 7


100%|██████████| 118/118 [00:38<00:00,  3.04it/s]


Train Loss : 0.845
Epoch: 8


100%|██████████| 118/118 [00:38<00:00,  3.06it/s]


Train Loss : 0.841
Epoch: 9


100%|██████████| 118/118 [00:38<00:00,  3.07it/s]


Train Loss : 0.844
Epoch: 10


100%|██████████| 118/118 [00:38<00:00,  3.07it/s]


Train Loss : 0.833
Epoch: 11


100%|██████████| 118/118 [00:38<00:00,  3.05it/s]


Train Loss : 0.833
Epoch: 12


100%|██████████| 118/118 [00:38<00:00,  3.06it/s]


Train Loss : 0.829
Epoch: 13


100%|██████████| 118/118 [00:38<00:00,  3.05it/s]


Train Loss : 0.826
Epoch: 14


100%|██████████| 118/118 [00:38<00:00,  3.06it/s]


Train Loss : 0.823
Epoch: 15


100%|██████████| 118/118 [00:38<00:00,  3.06it/s]


Train Loss : 0.818
Epoch: 1


100%|██████████| 118/118 [00:29<00:00,  4.04it/s]


Train Loss : 1.188
Epoch: 2


100%|██████████| 118/118 [00:29<00:00,  4.05it/s]


Train Loss : 0.913
Epoch: 3


100%|██████████| 118/118 [00:29<00:00,  4.06it/s]


Train Loss : 0.867
Epoch: 4


100%|██████████| 118/118 [00:29<00:00,  4.07it/s]


Train Loss : 0.848
Epoch: 5


100%|██████████| 118/118 [00:28<00:00,  4.08it/s]


Train Loss : 0.838
Epoch: 6


100%|██████████| 118/118 [00:29<00:00,  4.06it/s]


Train Loss : 0.828
Epoch: 7


100%|██████████| 118/118 [00:29<00:00,  4.06it/s]


Train Loss : 0.821
Epoch: 8


100%|██████████| 118/118 [00:28<00:00,  4.08it/s]


Train Loss : 0.816
Epoch: 9


100%|██████████| 118/118 [00:29<00:00,  4.07it/s]


Train Loss : 0.812
Epoch: 10


100%|██████████| 118/118 [00:29<00:00,  4.06it/s]


Train Loss : 0.811
Epoch: 11


100%|██████████| 118/118 [00:29<00:00,  4.06it/s]


Train Loss : 0.808
Epoch: 12


100%|██████████| 118/118 [00:29<00:00,  4.06it/s]


Train Loss : 0.807
Epoch: 13


100%|██████████| 118/118 [00:29<00:00,  4.06it/s]


Train Loss : 0.803
Epoch: 14


100%|██████████| 118/118 [00:29<00:00,  4.06it/s]


Train Loss : 0.801
Epoch: 15


100%|██████████| 118/118 [00:28<00:00,  4.08it/s]


Train Loss : 0.800
Epoch: 1


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 1.191
Epoch: 2


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 0.916
Epoch: 3


100%|██████████| 118/118 [00:36<00:00,  3.21it/s]


Train Loss : 0.870
Epoch: 4


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 0.848
Epoch: 5


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 0.836
Epoch: 6


100%|██████████| 118/118 [00:36<00:00,  3.21it/s]


Train Loss : 0.831
Epoch: 7


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 0.822
Epoch: 8


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 0.816
Epoch: 9


100%|██████████| 118/118 [00:36<00:00,  3.21it/s]


Train Loss : 0.813
Epoch: 10


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 0.811
Epoch: 11


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 0.809
Epoch: 12


100%|██████████| 118/118 [00:36<00:00,  3.21it/s]


Train Loss : 0.806
Epoch: 13


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 0.803
Epoch: 14


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 0.801
Epoch: 15


100%|██████████| 118/118 [00:36<00:00,  3.22it/s]


Train Loss : 0.800
Epoch: 1


100%|██████████| 118/118 [00:51<00:00,  2.29it/s]


Train Loss : 1.196
Epoch: 2


100%|██████████| 118/118 [00:51<00:00,  2.29it/s]


Train Loss : 0.920
Epoch: 3


100%|██████████| 118/118 [00:51<00:00,  2.29it/s]


Train Loss : 0.872
Epoch: 4


100%|██████████| 118/118 [00:51<00:00,  2.29it/s]


Train Loss : 0.853
Epoch: 5


100%|██████████| 118/118 [00:51<00:00,  2.30it/s]


Train Loss : 0.837
Epoch: 6


100%|██████████| 118/118 [00:51<00:00,  2.29it/s]


Train Loss : 0.832
Epoch: 7


100%|██████████| 118/118 [00:51<00:00,  2.30it/s]


Train Loss : 0.824
Epoch: 8


100%|██████████| 118/118 [00:51<00:00,  2.29it/s]


Train Loss : 0.818
Epoch: 9


100%|██████████| 118/118 [00:51<00:00,  2.29it/s]


Train Loss : 0.814
Epoch: 10


100%|██████████| 118/118 [00:51<00:00,  2.30it/s]


Train Loss : 0.811
Epoch: 11


100%|██████████| 118/118 [00:51<00:00,  2.29it/s]


Train Loss : 0.809
Epoch: 12


100%|██████████| 118/118 [00:51<00:00,  2.28it/s]


Train Loss : 0.807
Epoch: 13


100%|██████████| 118/118 [00:51<00:00,  2.29it/s]


Train Loss : 0.806
Epoch: 14


100%|██████████| 118/118 [00:51<00:00,  2.29it/s]


Train Loss : 0.804
Epoch: 15


100%|██████████| 118/118 [00:51<00:00,  2.30it/s]


Train Loss : 0.802


Then we display the results for each model using a pandas df.

In [30]:
results_df = pd.DataFrame({'1RNN': rnn1[0], '1Bi-RNN': rnn1Bi[0], '2Bi-RNN': rnn2Bi[0], '1LSTM': lstm1[0], '1Bi-LSTM': lstm1Bi[0], '2Bi-LSTM': lstm2Bi[0]})
results_df = results_df.rename(index={0:'Accuracy', 1:'Parameters', 2:'Time Cost'})
display(results_df)

Unnamed: 0,1RNN,1Bi-RNN,2Bi-RNN,1LSTM,1Bi-LSTM,2Bi-LSTM
Accuracy,0.88,0.89,0.88,0.89,0.89,0.89
Parameters,40010884.0,40021764.0,40046596.0,40042756.0,40085508.0,40184836.0
Time Cost,26.36,30.96,38.61,29.06,36.66,51.48


As we can see the accuracy of all the models increased significantly when we used pretrained embeddings. The number of parameters also increased as the embedding layer is alot larger because of the addition of the Glove embeddings.

## Question 5

For this question we will freeze the pretrained embeddings so they can't be changed during the training process.

In [31]:
rnn1 = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=False, neural_network='RNN', num_layers=1, pretrained_embeddings=glove.vectors.clone().detach(), freeze=True)
rnn1Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='RNN', num_layers=1, pretrained_embeddings=glove.vectors.clone().detach(), freeze=True)
rnn2Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='RNN', num_layers=2, pretrained_embeddings=glove.vectors.clone().detach(), freeze=True)
lstm1 = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=False, neural_network='LSTM', num_layers=1, pretrained_embeddings=glove.vectors.clone().detach(), freeze=True)
lstm1Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='LSTM', num_layers=1, pretrained_embeddings=glove.vectors.clone().detach(), freeze=True)
lstm2Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='LSTM', num_layers=2, pretrained_embeddings=glove.vectors.clone().detach(), freeze=True)

Epoch: 1


100%|██████████| 118/118 [00:07<00:00, 14.79it/s]


Train Loss : 1.349
Epoch: 2


100%|██████████| 118/118 [00:07<00:00, 15.27it/s]


Train Loss : 1.277
Epoch: 3


100%|██████████| 118/118 [00:07<00:00, 15.47it/s]


Train Loss : 1.246
Epoch: 4


100%|██████████| 118/118 [00:07<00:00, 15.52it/s]


Train Loss : 1.228
Epoch: 5


100%|██████████| 118/118 [00:07<00:00, 15.38it/s]


Train Loss : 1.218
Epoch: 6


100%|██████████| 118/118 [00:07<00:00, 15.59it/s]


Train Loss : 1.212
Epoch: 7


100%|██████████| 118/118 [00:07<00:00, 15.38it/s]


Train Loss : 1.198
Epoch: 8


100%|██████████| 118/118 [00:07<00:00, 15.43it/s]


Train Loss : 1.194
Epoch: 9


100%|██████████| 118/118 [00:07<00:00, 15.54it/s]


Train Loss : 1.177
Epoch: 10


100%|██████████| 118/118 [00:07<00:00, 15.32it/s]


Train Loss : 1.164
Epoch: 11


100%|██████████| 118/118 [00:07<00:00, 15.55it/s]


Train Loss : 1.158
Epoch: 12


100%|██████████| 118/118 [00:07<00:00, 15.59it/s]


Train Loss : 1.142
Epoch: 13


100%|██████████| 118/118 [00:07<00:00, 15.28it/s]


Train Loss : 1.125
Epoch: 14


100%|██████████| 118/118 [00:07<00:00, 15.46it/s]


Train Loss : 1.113
Epoch: 15


100%|██████████| 118/118 [00:07<00:00, 15.47it/s]


Train Loss : 1.104
Epoch: 1


100%|██████████| 118/118 [00:11<00:00,  9.94it/s]


Train Loss : 1.348
Epoch: 2


100%|██████████| 118/118 [00:11<00:00,  9.88it/s]


Train Loss : 1.287
Epoch: 3


100%|██████████| 118/118 [00:11<00:00,  9.99it/s]


Train Loss : 1.250
Epoch: 4


100%|██████████| 118/118 [00:11<00:00,  9.94it/s]


Train Loss : 1.229
Epoch: 5


100%|██████████| 118/118 [00:12<00:00,  9.82it/s]


Train Loss : 1.216
Epoch: 6


100%|██████████| 118/118 [00:11<00:00,  9.97it/s]


Train Loss : 1.201
Epoch: 7


100%|██████████| 118/118 [00:11<00:00,  9.88it/s]


Train Loss : 1.191
Epoch: 8


100%|██████████| 118/118 [00:11<00:00,  9.89it/s]


Train Loss : 1.178
Epoch: 9


100%|██████████| 118/118 [00:12<00:00,  9.82it/s]


Train Loss : 1.165
Epoch: 10


100%|██████████| 118/118 [00:11<00:00,  9.94it/s]


Train Loss : 1.151
Epoch: 11


100%|██████████| 118/118 [00:11<00:00,  9.89it/s]


Train Loss : 1.133
Epoch: 12


100%|██████████| 118/118 [00:11<00:00,  9.94it/s]


Train Loss : 1.120
Epoch: 13


100%|██████████| 118/118 [00:11<00:00,  9.91it/s]


Train Loss : 1.108
Epoch: 14


100%|██████████| 118/118 [00:11<00:00,  9.94it/s]


Train Loss : 1.095
Epoch: 15


100%|██████████| 118/118 [00:11<00:00, 10.02it/s]


Train Loss : 1.093
Epoch: 1


100%|██████████| 118/118 [00:19<00:00,  5.93it/s]


Train Loss : 1.327
Epoch: 2


100%|██████████| 118/118 [00:19<00:00,  5.97it/s]


Train Loss : 1.252
Epoch: 3


100%|██████████| 118/118 [00:19<00:00,  5.95it/s]


Train Loss : 1.234
Epoch: 4


100%|██████████| 118/118 [00:19<00:00,  5.95it/s]


Train Loss : 1.224
Epoch: 5


100%|██████████| 118/118 [00:19<00:00,  5.91it/s]


Train Loss : 1.214
Epoch: 6


100%|██████████| 118/118 [00:19<00:00,  5.94it/s]


Train Loss : 1.201
Epoch: 7


100%|██████████| 118/118 [00:19<00:00,  5.96it/s]


Train Loss : 1.191
Epoch: 8


100%|██████████| 118/118 [00:19<00:00,  5.94it/s]


Train Loss : 1.181
Epoch: 9


100%|██████████| 118/118 [00:19<00:00,  5.98it/s]


Train Loss : 1.159
Epoch: 10


100%|██████████| 118/118 [00:19<00:00,  5.97it/s]


Train Loss : 1.148
Epoch: 11


100%|██████████| 118/118 [00:19<00:00,  5.95it/s]


Train Loss : 1.126
Epoch: 12


100%|██████████| 118/118 [00:19<00:00,  6.00it/s]


Train Loss : 1.106
Epoch: 13


100%|██████████| 118/118 [00:19<00:00,  5.96it/s]


Train Loss : 1.091
Epoch: 14


100%|██████████| 118/118 [00:19<00:00,  5.96it/s]


Train Loss : 1.089
Epoch: 15


100%|██████████| 118/118 [00:19<00:00,  5.99it/s]


Train Loss : 1.071
Epoch: 1


100%|██████████| 118/118 [00:10<00:00, 10.95it/s]


Train Loss : 1.308
Epoch: 2


100%|██████████| 118/118 [00:10<00:00, 11.01it/s]


Train Loss : 1.186
Epoch: 3


100%|██████████| 118/118 [00:10<00:00, 11.09it/s]


Train Loss : 1.115
Epoch: 4


100%|██████████| 118/118 [00:10<00:00, 11.00it/s]


Train Loss : 1.066
Epoch: 5


100%|██████████| 118/118 [00:10<00:00, 11.00it/s]


Train Loss : 1.041
Epoch: 6


100%|██████████| 118/118 [00:10<00:00, 11.03it/s]


Train Loss : 1.021
Epoch: 7


100%|██████████| 118/118 [00:10<00:00, 10.98it/s]


Train Loss : 0.999
Epoch: 8


100%|██████████| 118/118 [00:10<00:00, 10.98it/s]


Train Loss : 0.983
Epoch: 9


100%|██████████| 118/118 [00:10<00:00, 11.02it/s]


Train Loss : 0.975
Epoch: 10


100%|██████████| 118/118 [00:10<00:00, 11.06it/s]


Train Loss : 0.964
Epoch: 11


100%|██████████| 118/118 [00:10<00:00, 11.06it/s]


Train Loss : 0.957
Epoch: 12


100%|██████████| 118/118 [00:10<00:00, 11.00it/s]


Train Loss : 0.955
Epoch: 13


100%|██████████| 118/118 [00:10<00:00, 11.07it/s]


Train Loss : 0.943
Epoch: 14


100%|██████████| 118/118 [00:10<00:00, 11.07it/s]


Train Loss : 0.937
Epoch: 15


100%|██████████| 118/118 [00:10<00:00, 10.93it/s]


Train Loss : 0.938
Epoch: 1


100%|██████████| 118/118 [00:18<00:00,  6.43it/s]


Train Loss : 1.320
Epoch: 2


100%|██████████| 118/118 [00:18<00:00,  6.39it/s]


Train Loss : 1.205
Epoch: 3


100%|██████████| 118/118 [00:18<00:00,  6.37it/s]


Train Loss : 1.136
Epoch: 4


100%|██████████| 118/118 [00:18<00:00,  6.44it/s]


Train Loss : 1.084
Epoch: 5


100%|██████████| 118/118 [00:18<00:00,  6.38it/s]


Train Loss : 1.042
Epoch: 6


100%|██████████| 118/118 [00:18<00:00,  6.42it/s]


Train Loss : 1.018
Epoch: 7


100%|██████████| 118/118 [00:18<00:00,  6.40it/s]


Train Loss : 1.003
Epoch: 8


100%|██████████| 118/118 [00:18<00:00,  6.42it/s]


Train Loss : 0.984
Epoch: 9


100%|██████████| 118/118 [00:18<00:00,  6.37it/s]


Train Loss : 0.973
Epoch: 10


100%|██████████| 118/118 [00:18<00:00,  6.43it/s]


Train Loss : 0.968
Epoch: 11


100%|██████████| 118/118 [00:18<00:00,  6.40it/s]


Train Loss : 0.960
Epoch: 12


100%|██████████| 118/118 [00:18<00:00,  6.40it/s]


Train Loss : 0.951
Epoch: 13


100%|██████████| 118/118 [00:18<00:00,  6.43it/s]


Train Loss : 0.942
Epoch: 14


100%|██████████| 118/118 [00:18<00:00,  6.39it/s]


Train Loss : 0.935
Epoch: 15


100%|██████████| 118/118 [00:18<00:00,  6.41it/s]


Train Loss : 0.934
Epoch: 1


100%|██████████| 118/118 [00:33<00:00,  3.54it/s]


Train Loss : 1.300
Epoch: 2


100%|██████████| 118/118 [00:33<00:00,  3.54it/s]


Train Loss : 1.187
Epoch: 3


100%|██████████| 118/118 [00:33<00:00,  3.55it/s]


Train Loss : 1.120
Epoch: 4


100%|██████████| 118/118 [00:33<00:00,  3.54it/s]


Train Loss : 1.073
Epoch: 5


100%|██████████| 118/118 [00:33<00:00,  3.54it/s]


Train Loss : 1.031
Epoch: 6


100%|██████████| 118/118 [00:33<00:00,  3.54it/s]


Train Loss : 0.999
Epoch: 7


100%|██████████| 118/118 [00:33<00:00,  3.54it/s]


Train Loss : 0.987
Epoch: 8


100%|██████████| 118/118 [00:33<00:00,  3.55it/s]


Train Loss : 0.973
Epoch: 9


100%|██████████| 118/118 [00:33<00:00,  3.55it/s]


Train Loss : 0.957
Epoch: 10


100%|██████████| 118/118 [00:33<00:00,  3.54it/s]


Train Loss : 0.944
Epoch: 11


100%|██████████| 118/118 [00:33<00:00,  3.55it/s]


Train Loss : 0.939
Epoch: 12


100%|██████████| 118/118 [00:33<00:00,  3.56it/s]


Train Loss : 0.931
Epoch: 13


100%|██████████| 118/118 [00:33<00:00,  3.54it/s]


Train Loss : 0.932
Epoch: 14


100%|██████████| 118/118 [00:33<00:00,  3.52it/s]


Train Loss : 0.924
Epoch: 15


100%|██████████| 118/118 [00:33<00:00,  3.53it/s]


Train Loss : 0.919


In [32]:
results_df = pd.DataFrame({'1RNN': rnn1[0], '1Bi-RNN': rnn1Bi[0], '2Bi-RNN': rnn2Bi[0], '1LSTM': lstm1[0], '1Bi-LSTM': lstm1Bi[0], '2Bi-LSTM': lstm2Bi[0]})
results_df = results_df.rename(index={0:'Accuracy', 1:'Parameters', 2:'Time Cost'})
display(results_df)

Unnamed: 0,1RNN,1Bi-RNN,2Bi-RNN,1LSTM,1Bi-LSTM,2Bi-LSTM
Accuracy,0.59,0.64,0.66,0.79,0.78,0.8
Parameters,10884.0,21764.0,46596.0,42756.0,85508.0,184836.0
Time Cost,7.67,11.9,19.82,10.71,18.43,33.31


This time the accuracy of the models was reduced but it's important to note that the number of parameters was also a lot smaller for each model.

## Question 6

In order to use the IMDB Dataset first of all we have to read it from the folder where it is stored. Then, by using sklearn's function train_test_split we split that dataframe to train and test with a 80:20 ratio. The random_state is used to always split the data in the same way.

In [33]:
from sklearn.model_selection import train_test_split

imdb_data = pd.read_csv('../data/IMDB Dataset.csv')
train_data, test_data = train_test_split(imdb_data, test_size=0.2, random_state=42)

The function in the next cell will help create the dataset from the raw data. 1 will be the label for 'negative' and 2 the label for 'positive' reviews. The new dataset will be comprised of tuples of the form (1, text), where the first number is the sentiment and the text refers to the text of the review. The data has to be formed in that way so the model can use them.

In [34]:
def createDataset(data):
    dataset = []
    for i in range(len(data['review'])):
        if data['sentiment'].iloc[i] == 'negative':
            dataset.append((1, data['review'].iloc[i]))
        else:
            dataset.append((2, data['review'].iloc[i]))
    return dataset

The function we created earlier is called for both the training and the test set.

In [35]:
train_dataset = createDataset(train_data)
test_dataset = createDataset(test_data)

Then the vocabulary is created (this function is not changed).

In [36]:
vocab = build_vocab_from_iterator(build_vocabulary([train_dataset, test_dataset]), min_freq=10, specials=["<PAD>","<UNK>"])
vocab.set_default_index(vocab["<UNK>"])

Then we call the Dataloader function for the train and test set (unchanged also) and specify the target classes which in this case will be 'negative' and 'positive'.

In [37]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                              shuffle=True, collate_fn=collate_batch)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                              shuffle=False, collate_fn=collate_batch)

target_classes = ["negative", "positive"]

The last step is to train the models.

In [38]:
rnn1 = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=False, neural_network='RNN', num_layers=1, pretrained_embeddings=None, freeze=False)
rnn1Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='RNN', num_layers=1, pretrained_embeddings=None, freeze=False)
rnn2Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='RNN', num_layers=2, pretrained_embeddings=None, freeze=False)
lstm1 = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=False, neural_network='LSTM', num_layers=1, pretrained_embeddings=None, freeze=False)
lstm1Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='LSTM', num_layers=1, pretrained_embeddings=None, freeze=False)
lstm2Bi = TrainAndEvaluateModelWithPretrainedEmbeddingsAndReturnMetrics(bidirectional=True, neural_network='LSTM', num_layers=2, pretrained_embeddings=None, freeze=False)

Epoch: 1


100%|██████████| 40/40 [00:07<00:00,  5.51it/s]


Train Loss : 0.696
Epoch: 2


100%|██████████| 40/40 [00:06<00:00,  5.81it/s]


Train Loss : 0.688
Epoch: 3


100%|██████████| 40/40 [00:06<00:00,  5.98it/s]


Train Loss : 0.668
Epoch: 4


100%|██████████| 40/40 [00:06<00:00,  5.95it/s]


Train Loss : 0.638
Epoch: 5


100%|██████████| 40/40 [00:06<00:00,  5.91it/s]


Train Loss : 0.608
Epoch: 6


100%|██████████| 40/40 [00:06<00:00,  5.88it/s]


Train Loss : 0.582
Epoch: 7


100%|██████████| 40/40 [00:06<00:00,  5.99it/s]


Train Loss : 0.561
Epoch: 8


100%|██████████| 40/40 [00:06<00:00,  5.97it/s]


Train Loss : 0.541
Epoch: 9


100%|██████████| 40/40 [00:06<00:00,  5.85it/s]


Train Loss : 0.525
Epoch: 10


100%|██████████| 40/40 [00:06<00:00,  5.93it/s]


Train Loss : 0.513
Epoch: 11


100%|██████████| 40/40 [00:06<00:00,  5.90it/s]


Train Loss : 0.500
Epoch: 12


100%|██████████| 40/40 [00:06<00:00,  5.95it/s]


Train Loss : 0.493
Epoch: 13


100%|██████████| 40/40 [00:06<00:00,  5.90it/s]


Train Loss : 0.480
Epoch: 14


100%|██████████| 40/40 [00:06<00:00,  5.93it/s]


Train Loss : 0.470
Epoch: 15


100%|██████████| 40/40 [00:06<00:00,  5.97it/s]


Train Loss : 0.464
Epoch: 1


100%|██████████| 40/40 [00:08<00:00,  4.86it/s]


Train Loss : 0.694
Epoch: 2


100%|██████████| 40/40 [00:08<00:00,  4.78it/s]


Train Loss : 0.682
Epoch: 3


100%|██████████| 40/40 [00:08<00:00,  4.86it/s]


Train Loss : 0.656
Epoch: 4


100%|██████████| 40/40 [00:08<00:00,  4.86it/s]


Train Loss : 0.626
Epoch: 5


100%|██████████| 40/40 [00:08<00:00,  4.77it/s]


Train Loss : 0.600
Epoch: 6


100%|██████████| 40/40 [00:08<00:00,  4.79it/s]


Train Loss : 0.582
Epoch: 7


100%|██████████| 40/40 [00:08<00:00,  4.81it/s]


Train Loss : 0.566
Epoch: 8


100%|██████████| 40/40 [00:08<00:00,  4.83it/s]


Train Loss : 0.544
Epoch: 9


100%|██████████| 40/40 [00:08<00:00,  4.83it/s]


Train Loss : 0.532
Epoch: 10


100%|██████████| 40/40 [00:08<00:00,  4.83it/s]


Train Loss : 0.517
Epoch: 11


100%|██████████| 40/40 [00:08<00:00,  4.83it/s]


Train Loss : 0.505
Epoch: 12


100%|██████████| 40/40 [00:08<00:00,  4.85it/s]


Train Loss : 0.494
Epoch: 13


100%|██████████| 40/40 [00:08<00:00,  4.81it/s]


Train Loss : 0.487
Epoch: 14


100%|██████████| 40/40 [00:08<00:00,  4.79it/s]


Train Loss : 0.476
Epoch: 15


100%|██████████| 40/40 [00:08<00:00,  4.83it/s]


Train Loss : 0.470
Epoch: 1


100%|██████████| 40/40 [00:10<00:00,  3.68it/s]


Train Loss : 0.694
Epoch: 2


100%|██████████| 40/40 [00:10<00:00,  3.73it/s]


Train Loss : 0.685
Epoch: 3


100%|██████████| 40/40 [00:10<00:00,  3.70it/s]


Train Loss : 0.666
Epoch: 4


100%|██████████| 40/40 [00:10<00:00,  3.70it/s]


Train Loss : 0.644
Epoch: 5


100%|██████████| 40/40 [00:10<00:00,  3.72it/s]


Train Loss : 0.623
Epoch: 6


100%|██████████| 40/40 [00:10<00:00,  3.72it/s]


Train Loss : 0.596
Epoch: 7


100%|██████████| 40/40 [00:10<00:00,  3.70it/s]


Train Loss : 0.577
Epoch: 8


100%|██████████| 40/40 [00:10<00:00,  3.70it/s]


Train Loss : 0.557
Epoch: 9


100%|██████████| 40/40 [00:10<00:00,  3.73it/s]


Train Loss : 0.542
Epoch: 10


100%|██████████| 40/40 [00:10<00:00,  3.73it/s]


Train Loss : 0.527
Epoch: 11


100%|██████████| 40/40 [00:10<00:00,  3.73it/s]


Train Loss : 0.511
Epoch: 12


100%|██████████| 40/40 [00:10<00:00,  3.73it/s]


Train Loss : 0.501
Epoch: 13


100%|██████████| 40/40 [00:10<00:00,  3.67it/s]


Train Loss : 0.489
Epoch: 14


100%|██████████| 40/40 [00:10<00:00,  3.71it/s]


Train Loss : 0.478
Epoch: 15


100%|██████████| 40/40 [00:10<00:00,  3.69it/s]


Train Loss : 0.473
Epoch: 1


100%|██████████| 40/40 [00:07<00:00,  5.50it/s]


Train Loss : 0.692
Epoch: 2


100%|██████████| 40/40 [00:07<00:00,  5.49it/s]


Train Loss : 0.673
Epoch: 3


100%|██████████| 40/40 [00:07<00:00,  5.49it/s]


Train Loss : 0.627
Epoch: 4


100%|██████████| 40/40 [00:07<00:00,  5.43it/s]


Train Loss : 0.591
Epoch: 5


100%|██████████| 40/40 [00:07<00:00,  5.49it/s]


Train Loss : 0.560
Epoch: 6


100%|██████████| 40/40 [00:07<00:00,  5.43it/s]


Train Loss : 0.539
Epoch: 7


100%|██████████| 40/40 [00:07<00:00,  5.41it/s]


Train Loss : 0.522
Epoch: 8


100%|██████████| 40/40 [00:07<00:00,  5.46it/s]


Train Loss : 0.508
Epoch: 9


100%|██████████| 40/40 [00:07<00:00,  5.44it/s]


Train Loss : 0.490
Epoch: 10


100%|██████████| 40/40 [00:07<00:00,  5.45it/s]


Train Loss : 0.481
Epoch: 11


100%|██████████| 40/40 [00:07<00:00,  5.44it/s]


Train Loss : 0.468
Epoch: 12


100%|██████████| 40/40 [00:07<00:00,  5.36it/s]


Train Loss : 0.456
Epoch: 13


100%|██████████| 40/40 [00:07<00:00,  5.47it/s]


Train Loss : 0.445
Epoch: 14


100%|██████████| 40/40 [00:07<00:00,  5.50it/s]


Train Loss : 0.441
Epoch: 15


100%|██████████| 40/40 [00:07<00:00,  5.43it/s]


Train Loss : 0.430
Epoch: 1


100%|██████████| 40/40 [00:09<00:00,  4.19it/s]


Train Loss : 0.692
Epoch: 2


100%|██████████| 40/40 [00:09<00:00,  4.18it/s]


Train Loss : 0.675
Epoch: 3


100%|██████████| 40/40 [00:09<00:00,  4.24it/s]


Train Loss : 0.629
Epoch: 4


100%|██████████| 40/40 [00:09<00:00,  4.18it/s]


Train Loss : 0.593
Epoch: 5


100%|██████████| 40/40 [00:09<00:00,  4.18it/s]


Train Loss : 0.563
Epoch: 6


100%|██████████| 40/40 [00:09<00:00,  4.16it/s]


Train Loss : 0.543
Epoch: 7


100%|██████████| 40/40 [00:09<00:00,  4.20it/s]


Train Loss : 0.522
Epoch: 8


100%|██████████| 40/40 [00:09<00:00,  4.19it/s]


Train Loss : 0.506
Epoch: 9


100%|██████████| 40/40 [00:09<00:00,  4.19it/s]


Train Loss : 0.491
Epoch: 10


100%|██████████| 40/40 [00:09<00:00,  4.17it/s]


Train Loss : 0.477
Epoch: 11


100%|██████████| 40/40 [00:09<00:00,  4.22it/s]


Train Loss : 0.466
Epoch: 12


100%|██████████| 40/40 [00:09<00:00,  4.19it/s]


Train Loss : 0.456
Epoch: 13


100%|██████████| 40/40 [00:09<00:00,  4.19it/s]


Train Loss : 0.444
Epoch: 14


100%|██████████| 40/40 [00:09<00:00,  4.18it/s]


Train Loss : 0.446
Epoch: 15


100%|██████████| 40/40 [00:09<00:00,  4.17it/s]


Train Loss : 0.428
Epoch: 1


100%|██████████| 40/40 [00:13<00:00,  2.87it/s]


Train Loss : 0.687
Epoch: 2


100%|██████████| 40/40 [00:13<00:00,  2.87it/s]


Train Loss : 0.647
Epoch: 3


100%|██████████| 40/40 [00:13<00:00,  2.88it/s]


Train Loss : 0.604
Epoch: 4


100%|██████████| 40/40 [00:13<00:00,  2.88it/s]


Train Loss : 0.571
Epoch: 5


100%|██████████| 40/40 [00:13<00:00,  2.87it/s]


Train Loss : 0.547
Epoch: 6


100%|██████████| 40/40 [00:13<00:00,  2.88it/s]


Train Loss : 0.524
Epoch: 7


100%|██████████| 40/40 [00:13<00:00,  2.88it/s]


Train Loss : 0.506
Epoch: 8


100%|██████████| 40/40 [00:13<00:00,  2.89it/s]


Train Loss : 0.490
Epoch: 9


100%|██████████| 40/40 [00:13<00:00,  2.87it/s]


Train Loss : 0.476
Epoch: 10


100%|██████████| 40/40 [00:13<00:00,  2.88it/s]


Train Loss : 0.466
Epoch: 11


100%|██████████| 40/40 [00:13<00:00,  2.89it/s]


Train Loss : 0.453
Epoch: 12


100%|██████████| 40/40 [00:13<00:00,  2.88it/s]


Train Loss : 0.448
Epoch: 13


100%|██████████| 40/40 [00:13<00:00,  2.89it/s]


Train Loss : 0.450
Epoch: 14


100%|██████████| 40/40 [00:13<00:00,  2.87it/s]


Train Loss : 0.434
Epoch: 15


100%|██████████| 40/40 [00:13<00:00,  2.88it/s]


Train Loss : 0.420


In [39]:
results_df = pd.DataFrame({'1RNN': rnn1[0], '1Bi-RNN': rnn1Bi[0], '2Bi-RNN': rnn2Bi[0], '1LSTM': lstm1[0], '1Bi-LSTM': lstm1Bi[0], '2Bi-LSTM': lstm2Bi[0]})
results_df = results_df.rename(index={0:'Accuracy', 1:'Parameters', 2:'Time Cost'})
display(results_df)

Unnamed: 0,1RNN,1Bi-RNN,2Bi-RNN,1LSTM,1Bi-LSTM,2Bi-LSTM
Accuracy,0.71,0.7,0.7,0.72,0.72,0.72
Parameters,2917254.0,2928006.0,2952838.0,2949126.0,2991750.0,3091078.0
Time Cost,6.79,8.3,10.79,7.34,9.55,13.89
