<a href="https://colab.research.google.com/github/QiaoYe-Learning/NLP-classwork/blob/main/%E2%80%9CAssignment_1_Simple_Sentiment_Analysis_ipynb%E2%80%9D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# download dataset
!pip install torchtext==0.4.0
# import
import torch
from torchtext import data
from torchtext import datasets # pre-define dataset supported by torchtext
import random
import torch.nn as nn # Neural Network
import time
import torch.optim as optim

Collecting torchtext==0.4.0
  Downloading torchtext-0.4.0-py3-none-any.whl.metadata (5.0 kB)
Downloading torchtext-0.4.0-py3-none-any.whl (53 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchtext
Successfully installed torchtext-0.4.0


In [2]:
# Model Building And Function Defining
class RNN(nn.Module): # inherited from nn.Module
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim): # vocabulary size, embedding_dim, hidden_dim, classification number

        super().__init__()

        self.embedding = nn.Embedding(input_dim, embedding_dim)

        self.rnn = nn.RNN(embedding_dim, hidden_dim)

        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text, text_lengths):
        #text = [sentence length, batch size], text_lengths: real length

        embedded = self.embedding(text)
        #embedded = [sent len, batch size, emb dim]

        output, hidden = self.rnn(embedded)
        #output = [sent len, batch size, hid dim] - The hidden status of each time step, containing information about all time steps.
        #hidden = [1, batch size, hid dim] - The hidden state of the last time step

        #assert torch.equal(output[-1,:,:], hidden.squeeze(0)) - Verify that the output of the last time step is equal to the hidden state

        return self.fc(hidden.squeeze(0))

def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division
    acc = correct.sum() / len(correct)
    return acc

def train(model, iterator, optimizer, criterion):

    epoch_loss = 0
    epoch_acc = 0

    model.train() # set model - train

    for batch in iterator:

        optimizer.zero_grad() # Clear the gradient in the optimizer before each forward propagation

        text, text_lengths = batch.text

        predictions = model(text, text_lengths).squeeze(1)

        loss = criterion(predictions, batch.label)

        acc = binary_accuracy(predictions, batch.label)

        loss.backward() # Calculate the gradient of the loss by back propagation

        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate(model, iterator, criterion):

    epoch_loss = 0
    epoch_acc = 0

    model.eval() # setting model - evaluate

    with torch.no_grad(): # No back propagation is required during the evaluation process, so the gradient calculation is turned off

        for batch in iterator:
            text, text_lengths = batch.text

            predictions = model(text, text_lengths).squeeze(1)

            loss = criterion(predictions, batch.label)

            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [3]:
# Initial setting
SEED = 1234 # random seed
torch.manual_seed(SEED) # normalize random seed

torch.backends.cudnn.deterministic = True # enture CUDA's behavior is stable（determinacy）

TEXT = data.Field(tokenize='spacy', # split word using spacy
                  tokenizer_language = 'en_core_web_sm', # enture spacy using English_model
                  include_lengths = True, # include the length of sentence when return the data
                  pad_first=True) # add a mark before the sequence
LABEL = data.LabelField(dtype = torch.float) # specfies the tyoe of label data is 'float'(binary classfication and regression)

training_data, test_data = datasets.IMDB.splits(TEXT, LABEL) # load dataset-IMDB(binary classfication task)
train_data, valid_data = training_data.split(random_state = random.seed(SEED)) # Default 70/30
# train_data, valid_data = train_data.split(split_ratio=0.8, random_state=random.seed(SEED)) 80/5 = train/valid


# Build a vocabulary
MAX_VOCAB_SIZE = 25_000 # max size of vocabulary, only top 25000, other is <unk>
TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE) # build train vocabulary
LABEL.build_vocab(train_data) # build vicabulary for the label fields <unk>,<pad>


# Using BucketIterator to create data iterator for train, valid, test to processes data in batch
BATCH_SIZE = 64 # size of samples per batch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # device choice, preferential using GPU(cuda)
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size = BATCH_SIZE,
    sort_within_batch = True, # sentences in each batch are sorted by their length, useful to reduce the using of <pad>
    device = device)

downloading aclImdb_v1.tar.gz


aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:21<00:00, 3.99MB/s]


In [4]:
# Model Settings
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
model_RNN_main = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

In [5]:
# Main-RNN
N_EPOCHS = 20
criterion = nn.BCEWithLogitsLoss() # Loss function of binary task
model_RNN = model_RNN_main.to(device)
criterion = criterion.to(device)

# Optimizer Settings
optimizer_SGD = optim.SGD(model_RNN_main.parameters(), lr=1e-3) # SGD optimizer - Stochastic gradient descent
optimizer_Adam = optim.Adam(model_RNN_main.parameters(), lr=1e-3) # Adam optimizer - Stochastic gradient descent
optimizer_Adagrad = optim.Adagrad(model_RNN_main.parameters(), lr=1e-3) # Adagrad optimizer - Stochastic gradient descent

In [6]:
## SGD_20
best_valid_loss_SGD_20 = float('inf')
epoch=0
for epoch in range(N_EPOCHS):
    start_time_SGD_20 = time.time()

    train_loss_SGD_20, train_acc_SGD_20 = train(model_RNN, train_iterator, optimizer_SGD, criterion)
    valid_loss_SGD_20, valid_acc_SGD_20 = evaluate(model_RNN, valid_iterator, criterion)

    end_time_SGD_20 = time.time()

    epoch_mins_SGD_20, epoch_secs_SGD_20 = epoch_time(start_time_SGD_20, end_time_SGD_20)

    if valid_loss_SGD_20 < best_valid_loss_SGD_20:
        best_valid_loss = valid_loss_SGD_20
        torch.save(model_RNN.state_dict(), 'tut1-model_SGD_20.pt')

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins_SGD_20}m {epoch_secs_SGD_20}s')
    print(f'\tTrain Loss-SGD_20: {train_loss_SGD_20:.3f} | Train Acc-SGD_20: {train_acc_SGD_20*100:.2f}%')
    print(f'\t Val. Loss-SGD_20: {valid_loss_SGD_20:.3f} |  Val. Acc-SGD_20: {valid_acc_SGD_20*100:.2f}%')

# SGD_20
model_RNN.load_state_dict(torch.load('tut1-model_SGD_20.pt'))
test_loss_SGD_20, test_acc_SGD_20 = evaluate(model_RNN, test_iterator, criterion)
print(f'Test Loss_SGD_20: {test_loss_SGD_20:.3f} | Test Acc_SGD_20: {test_acc_SGD_20*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 5s
	Train Loss-SGD_20: 0.695 | Train Acc-SGD_20: 50.21%
	 Val. Loss-SGD_20: 0.694 |  Val. Acc-SGD_20: 50.64%
Epoch: 02 | Epoch Time: 0m 3s
	Train Loss-SGD_20: 0.693 | Train Acc-SGD_20: 51.74%
	 Val. Loss-SGD_20: 0.694 |  Val. Acc-SGD_20: 51.02%
Epoch: 03 | Epoch Time: 0m 3s
	Train Loss-SGD_20: 0.691 | Train Acc-SGD_20: 52.13%
	 Val. Loss-SGD_20: 0.693 |  Val. Acc-SGD_20: 51.18%
Epoch: 04 | Epoch Time: 0m 4s
	Train Loss-SGD_20: 0.690 | Train Acc-SGD_20: 52.77%
	 Val. Loss-SGD_20: 0.692 |  Val. Acc-SGD_20: 51.39%
Epoch: 05 | Epoch Time: 0m 3s
	Train Loss-SGD_20: 0.690 | Train Acc-SGD_20: 52.83%
	 Val. Loss-SGD_20: 0.692 |  Val. Acc-SGD_20: 51.60%
Epoch: 06 | Epoch Time: 0m 3s
	Train Loss-SGD_20: 0.689 | Train Acc-SGD_20: 53.21%
	 Val. Loss-SGD_20: 0.691 |  Val. Acc-SGD_20: 51.88%
Epoch: 07 | Epoch Time: 0m 3s
	Train Loss-SGD_20: 0.688 | Train Acc-SGD_20: 53.57%
	 Val. Loss-SGD_20: 0.691 |  Val. Acc-SGD_20: 52.26%
Epoch: 08 | Epoch Time: 0m 3s
	Train Loss-SGD_20

  model_RNN.load_state_dict(torch.load('tut1-model_SGD_20.pt'))


Test Loss_SGD_20: 0.683 | Test Acc_SGD_20: 56.01%


In [7]:
## Adam_20
best_valid_loss_Adam_20 = float('inf')
epoch=0
for epoch in range(N_EPOCHS):
    start_time_Adam_20 = time.time()

    train_loss_Adam_20, train_acc_Adam_20 = train(model_RNN, train_iterator, optimizer_Adam, criterion)
    valid_loss_Adam_20, valid_acc_Adam_20 = evaluate(model_RNN, valid_iterator, criterion)

    end_time_Adam_20 = time.time()

    epoch_mins_Adam_20, epoch_secs_Adam_20 = epoch_time(start_time_Adam_20, end_time_Adam_20)

    if valid_loss_Adam_20 < best_valid_loss_Adam_20:
        best_valid_loss = valid_loss_Adam_20
        torch.save(model_RNN.state_dict(), 'tut1-model_Adam_20.pt')

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins_Adam_20}m {epoch_secs_Adam_20}s')
    print(f'\tTrain Loss-Adam_20: {train_loss_Adam_20:.3f} | Train Acc-Adam_20: {train_acc_Adam_20*100:.2f}%')
    print(f'\t Val. Loss-Adam_20: {valid_loss_Adam_20:.3f} |  Val. Acc-Adam_20: {valid_acc_Adam_20*100:.2f}%')

# Adam_20
model_RNN.load_state_dict(torch.load('tut1-model_Adam_20.pt'))
test_loss_Adam_20, test_acc_Adam_20 = evaluate(model_RNN, test_iterator, criterion)
print(f'Test Loss_Adam_20: {test_loss_Adam_20:.3f} | Test Acc_Adam_20: {test_acc_Adam_20*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 3s
	Train Loss-Adam_20: 0.679 | Train Acc-Adam_20: 57.54%
	 Val. Loss-Adam_20: 0.727 |  Val. Acc-Adam_20: 54.96%
Epoch: 02 | Epoch Time: 0m 4s
	Train Loss-Adam_20: 0.638 | Train Acc-Adam_20: 63.54%
	 Val. Loss-Adam_20: 0.648 |  Val. Acc-Adam_20: 61.59%
Epoch: 03 | Epoch Time: 0m 3s
	Train Loss-Adam_20: 0.654 | Train Acc-Adam_20: 60.68%
	 Val. Loss-Adam_20: 0.661 |  Val. Acc-Adam_20: 58.55%
Epoch: 04 | Epoch Time: 0m 3s
	Train Loss-Adam_20: 0.659 | Train Acc-Adam_20: 58.99%
	 Val. Loss-Adam_20: 0.692 |  Val. Acc-Adam_20: 52.24%
Epoch: 05 | Epoch Time: 0m 3s
	Train Loss-Adam_20: 0.673 | Train Acc-Adam_20: 57.92%
	 Val. Loss-Adam_20: 0.667 |  Val. Acc-Adam_20: 58.51%
Epoch: 06 | Epoch Time: 0m 3s
	Train Loss-Adam_20: 0.633 | Train Acc-Adam_20: 63.38%
	 Val. Loss-Adam_20: 0.661 |  Val. Acc-Adam_20: 59.38%
Epoch: 07 | Epoch Time: 0m 3s
	Train Loss-Adam_20: 0.597 | Train Acc-Adam_20: 67.49%
	 Val. Loss-Adam_20: 0.695 |  Val. Acc-Adam_20: 60.07%
Epoch: 08 | Epoch Ti

  model_RNN.load_state_dict(torch.load('tut1-model_Adam_20.pt'))


Test Loss_Adam_20: 0.683 | Test Acc_Adam_20: 66.69%


In [8]:
## Adagrad_20
best_valid_loss_Adagrad_20 = float('inf')
epoch = 0
for epoch in range(N_EPOCHS):
    start_time_Adagrad_20 = time.time()

    train_loss_Adagrad_20, train_acc_Adagrad_20 = train(model_RNN, train_iterator, optimizer_Adagrad, criterion)
    valid_loss_Adagrad_20, valid_acc_Adagrad_20 = evaluate(model_RNN, valid_iterator, criterion)

    end_time_Adagrad_20 = time.time()

    epoch_mins_Adagrad_20, epoch_secs_Adagrad_20 = epoch_time(start_time_Adagrad_20, end_time_Adagrad_20)

    if valid_loss_Adagrad_20 < best_valid_loss_Adagrad_20:
        best_valid_loss_Adagrad_20 = valid_loss_Adagrad_20
        torch.save(model_RNN.state_dict(), 'tut1-model_Adagrad_20.pt')

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins_Adagrad_20}m {epoch_secs_Adagrad_20}s')
    print(f'\tTrain Loss-Adagrad_20: {train_loss_Adagrad_20:.3f} | Train Acc-Adagrad_20: {train_acc_Adagrad_20*100:.2f}%')
    print(f'\t Val. Loss-Adagrad_20: {valid_loss_Adagrad_20:.3f} |  Val. Acc-Adagrad_20: {valid_acc_Adagrad_20*100:.2f}%')

# Adagrad_20
model_RNN.load_state_dict(torch.load('tut1-model_Adagrad_20.pt'))
test_loss_Adagrad_20, test_acc_Adagrad_20 = evaluate(model_RNN, test_iterator, criterion)
print(f'Test Loss_Adagrad_20: {test_loss_Adagrad_20:.3f} | Test Acc_Adagrad_20: {test_acc_Adagrad_20*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 3s
	Train Loss-Adagrad_20: 0.322 | Train Acc-Adagrad_20: 86.64%
	 Val. Loss-Adagrad_20: 0.710 |  Val. Acc-Adagrad_20: 67.18%
Epoch: 02 | Epoch Time: 0m 3s
	Train Loss-Adagrad_20: 0.307 | Train Acc-Adagrad_20: 87.71%
	 Val. Loss-Adagrad_20: 0.704 |  Val. Acc-Adagrad_20: 67.96%
Epoch: 03 | Epoch Time: 0m 3s
	Train Loss-Adagrad_20: 0.311 | Train Acc-Adagrad_20: 87.21%
	 Val. Loss-Adagrad_20: 0.712 |  Val. Acc-Adagrad_20: 66.13%
Epoch: 04 | Epoch Time: 0m 4s
	Train Loss-Adagrad_20: 0.302 | Train Acc-Adagrad_20: 87.85%
	 Val. Loss-Adagrad_20: 0.718 |  Val. Acc-Adagrad_20: 68.03%
Epoch: 05 | Epoch Time: 0m 3s
	Train Loss-Adagrad_20: 0.296 | Train Acc-Adagrad_20: 88.11%
	 Val. Loss-Adagrad_20: 0.718 |  Val. Acc-Adagrad_20: 68.14%
Epoch: 06 | Epoch Time: 0m 3s
	Train Loss-Adagrad_20: 0.296 | Train Acc-Adagrad_20: 88.03%
	 Val. Loss-Adagrad_20: 0.720 |  Val. Acc-Adagrad_20: 68.08%
Epoch: 07 | Epoch Time: 0m 3s
	Train Loss-Adagrad_20: 0.292 | Train Acc-Adagrad_20: 88.2

  model_RNN.load_state_dict(torch.load('tut1-model_Adagrad_20.pt'))


Test Loss_Adagrad_20: 0.702 | Test Acc_Adagrad_20: 67.78%


In [9]:
## Adam_5
# Model Settings
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
model_RNN_main = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
criterion = nn.BCEWithLogitsLoss() # Loss function of binary task
model_RNN = model_RNN_main.to(device)
criterion = criterion.to(device)

best_valid_loss_Adam_5 = float('inf')
epoch=0
N_EPOCHS=5
optimizer_Adam = optim.Adam(model_RNN_main.parameters(), lr=1e-3) # Adam optimizer - Stochastic gradient descent
for epoch in range(N_EPOCHS):
    start_time_Adam_5 = time.time()

    train_loss_Adam_5, train_acc_Adam_5 = train(model_RNN, train_iterator, optimizer_Adam, criterion)
    valid_loss_Adam_5, valid_acc_Adam_5 = evaluate(model_RNN, valid_iterator, criterion)

    end_time_Adam_5 = time.time()

    epoch_mins_Adam_5, epoch_secs_Adam_5 = epoch_time(start_time_Adam_5, end_time_Adam_5)

    if valid_loss_Adam_5 < best_valid_loss_Adam_5:
        best_valid_loss = valid_loss_Adam_5
        torch.save(model_RNN.state_dict(), 'tut1-model_Adam_5.pt')

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins_Adam_5}m {epoch_secs_Adam_5}s')
    print(f'\tTrain Loss-Adam_5: {train_loss_Adam_5:.3f} | Train Acc-Adam_5: {train_acc_Adam_5*100:.2f}%')
    print(f'\t Val. Loss-Adam_5: {valid_loss_Adam_5:.3f} |  Val. Acc-Adam_5: {valid_acc_Adam_5*100:.2f}%')

# Adam_5
model_RNN.load_state_dict(torch.load('tut1-model_Adam_5.pt'))
test_loss_Adam_5, test_acc_Adam_5 = evaluate(model_RNN, test_iterator, criterion)
print(f'Test Loss_Adam_5: {test_loss_Adam_5:.3f} | Test Acc_Adam_5: {test_acc_Adam_5*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 4s
	Train Loss-Adam_5: 0.685 | Train Acc-Adam_5: 56.00%
	 Val. Loss-Adam_5: 0.689 |  Val. Acc-Adam_5: 52.93%
Epoch: 02 | Epoch Time: 0m 3s
	Train Loss-Adam_5: 0.675 | Train Acc-Adam_5: 57.49%
	 Val. Loss-Adam_5: 0.679 |  Val. Acc-Adam_5: 55.85%
Epoch: 03 | Epoch Time: 0m 3s
	Train Loss-Adam_5: 0.659 | Train Acc-Adam_5: 60.24%
	 Val. Loss-Adam_5: 0.695 |  Val. Acc-Adam_5: 51.47%
Epoch: 04 | Epoch Time: 0m 3s
	Train Loss-Adam_5: 0.657 | Train Acc-Adam_5: 60.97%
	 Val. Loss-Adam_5: 0.640 |  Val. Acc-Adam_5: 64.19%
Epoch: 05 | Epoch Time: 0m 4s
	Train Loss-Adam_5: 0.605 | Train Acc-Adam_5: 67.02%
	 Val. Loss-Adam_5: 0.655 |  Val. Acc-Adam_5: 60.99%


  model_RNN.load_state_dict(torch.load('tut1-model_Adam_5.pt'))


Test Loss_Adam_5: 0.655 | Test Acc_Adam_5: 61.16%


In [12]:
## Adam_10
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
model_RNN_main = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
criterion = nn.BCEWithLogitsLoss() # Loss function of binary task
model_RNN = model_RNN_main.to(device)
criterion = criterion.to(device)

best_valid_loss_Adam_10 = float('inf')
epoch=0
N_EPOCHS=10
optimizer_Adam = optim.Adam(model_RNN_main.parameters(), lr=1e-3) # Adam optimizer - Stochastic gradient descent
for epoch in range(N_EPOCHS):
    start_time_Adam_10 = time.time()

    train_loss_Adam_10, train_acc_Adam_10 = train(model_RNN, train_iterator, optimizer_Adam, criterion)
    valid_loss_Adam_10, valid_acc_Adam_10 = evaluate(model_RNN, valid_iterator, criterion)

    end_time_Adam_10 = time.time()

    epoch_mins_Adam_10, epoch_secs_Adam_10 = epoch_time(start_time_Adam_10, end_time_Adam_10)

    if valid_loss_Adam_10 < best_valid_loss_Adam_10:
        best_valid_loss = valid_loss_Adam_10
        torch.save(model_RNN.state_dict(), 'tut1-model_Adam_10.pt')

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins_Adam_10}m {epoch_secs_Adam_10}s')
    print(f'\tTrain Loss-Adam_10: {train_loss_Adam_10:.3f} | Train Acc-Adam_10: {train_acc_Adam_10*100:.2f}%')
    print(f'\t Val. Loss-Adam_10: {valid_loss_Adam_10:.3f} |  Val. Acc-Adam_10: {valid_acc_Adam_10*100:.2f}%')

# Adam_10
model_RNN.load_state_dict(torch.load('tut1-model_Adam_10.pt'))
test_loss_Adam_10, test_acc_Adam_10 = evaluate(model_RNN, test_iterator, criterion)
print(f'Test Loss_Adam_10: {test_loss_Adam_10:.3f} | Test Acc_Adam_10: {test_acc_Adam_10*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 3s
	Train Loss-Adam_10: 0.684 | Train Acc-Adam_10: 56.22%
	 Val. Loss-Adam_10: 0.686 |  Val. Acc-Adam_10: 55.29%
Epoch: 02 | Epoch Time: 0m 3s
	Train Loss-Adam_10: 0.669 | Train Acc-Adam_10: 58.41%
	 Val. Loss-Adam_10: 0.677 |  Val. Acc-Adam_10: 56.29%
Epoch: 03 | Epoch Time: 0m 3s
	Train Loss-Adam_10: 0.684 | Train Acc-Adam_10: 54.48%
	 Val. Loss-Adam_10: 0.696 |  Val. Acc-Adam_10: 50.89%
Epoch: 04 | Epoch Time: 0m 3s
	Train Loss-Adam_10: 0.693 | Train Acc-Adam_10: 52.41%
	 Val. Loss-Adam_10: 0.691 |  Val. Acc-Adam_10: 51.69%
Epoch: 05 | Epoch Time: 0m 3s
	Train Loss-Adam_10: 0.693 | Train Acc-Adam_10: 51.80%
	 Val. Loss-Adam_10: 0.688 |  Val. Acc-Adam_10: 54.84%
Epoch: 06 | Epoch Time: 0m 3s
	Train Loss-Adam_10: 0.686 | Train Acc-Adam_10: 53.86%
	 Val. Loss-Adam_10: 0.683 |  Val. Acc-Adam_10: 55.17%
Epoch: 07 | Epoch Time: 0m 4s
	Train Loss-Adam_10: 0.674 | Train Acc-Adam_10: 57.13%
	 Val. Loss-Adam_10: 0.674 |  Val. Acc-Adam_10: 56.84%
Epoch: 08 | Epoch Ti

  model_RNN.load_state_dict(torch.load('tut1-model_Adam_10.pt'))


Test Loss_Adam_10: 0.631 | Test Acc_Adam_10: 64.19%


In [11]:
## Adam_50
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
model_RNN_main = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
criterion = nn.BCEWithLogitsLoss() # Loss function of binary task
model_RNN = model_RNN_main.to(device)
criterion = criterion.to(device)

best_valid_loss_Adam_50 = float('inf')
epoch=0
N_EPOCHS=50
optimizer_Adam = optim.Adam(model_RNN_main.parameters(), lr=1e-3) # Adam optimizer - Stochastic gradient descent
for epoch in range(N_EPOCHS):
    start_time_Adam_50 = time.time()

    train_loss_Adam_50, train_acc_Adam_50 = train(model_RNN, train_iterator, optimizer_Adam, criterion)
    valid_loss_Adam_50, valid_acc_Adam_50 = evaluate(model_RNN, valid_iterator, criterion)

    end_time_Adam_50 = time.time()

    epoch_mins_Adam_50, epoch_secs_Adam_50 = epoch_time(start_time_Adam_50, end_time_Adam_50)

    if valid_loss_Adam_50 < best_valid_loss_Adam_50:
        best_valid_loss = valid_loss_Adam_50
        torch.save(model_RNN.state_dict(), 'tut1-model_Adam_50.pt')

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins_Adam_50}m {epoch_secs_Adam_50}s')
    print(f'\tTrain Loss-Adam_50: {train_loss_Adam_50:.3f} | Train Acc-Adam_50: {train_acc_Adam_50*100:.2f}%')
    print(f'\t Val. Loss-Adam_50: {valid_loss_Adam_50:.3f} |  Val. Acc-Adam_50: {valid_acc_Adam_50*100:.2f}%')

# Adam_50
model_RNN.load_state_dict(torch.load('tut1-model_Adam_50.pt'))
test_loss_Adam_50, test_acc_Adam_50 = evaluate(model_RNN, test_iterator, criterion)
print(f'Test Loss_Adam_50: {test_loss_Adam_50:.3f} | Test Acc_Adam_50: {test_acc_Adam_50*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 3s
	Train Loss-Adam_50: 0.692 | Train Acc-Adam_50: 53.49%
	 Val. Loss-Adam_50: 0.689 |  Val. Acc-Adam_50: 52.29%
Epoch: 02 | Epoch Time: 0m 4s
	Train Loss-Adam_50: 0.674 | Train Acc-Adam_50: 57.09%
	 Val. Loss-Adam_50: 0.671 |  Val. Acc-Adam_50: 57.14%
Epoch: 03 | Epoch Time: 0m 3s
	Train Loss-Adam_50: 0.690 | Train Acc-Adam_50: 53.36%
	 Val. Loss-Adam_50: 0.693 |  Val. Acc-Adam_50: 51.39%
Epoch: 04 | Epoch Time: 0m 3s
	Train Loss-Adam_50: 0.680 | Train Acc-Adam_50: 56.05%
	 Val. Loss-Adam_50: 0.667 |  Val. Acc-Adam_50: 58.67%
Epoch: 05 | Epoch Time: 0m 4s
	Train Loss-Adam_50: 0.643 | Train Acc-Adam_50: 62.37%
	 Val. Loss-Adam_50: 0.669 |  Val. Acc-Adam_50: 59.88%
Epoch: 06 | Epoch Time: 0m 3s
	Train Loss-Adam_50: 0.606 | Train Acc-Adam_50: 67.01%
	 Val. Loss-Adam_50: 0.601 |  Val. Acc-Adam_50: 68.62%
Epoch: 07 | Epoch Time: 0m 3s
	Train Loss-Adam_50: 0.543 | Train Acc-Adam_50: 73.33%
	 Val. Loss-Adam_50: 0.610 |  Val. Acc-Adam_50: 65.73%
Epoch: 08 | Epoch Ti

  model_RNN.load_state_dict(torch.load('tut1-model_Adam_50.pt'))


Test Loss_Adam_50: 0.871 | Test Acc_Adam_50: 71.02%


In [6]:
# Model Settings - 4
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
OUTPUT_DIM = 1

# 定义模型
class FeedForwardNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dims, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        layers = []
        in_dim = embedding_dim
        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(in_dim, hidden_dim))
            layers.append(nn.ReLU())
            in_dim = hidden_dim
        layers.append(nn.Linear(in_dim, output_dim))
        self.fc = nn.Sequential(*layers)

    def forward(self, text, text_lengths=None):
        embedded = self.embedding(text).mean(dim=0)  # 平均池化
        return self.fc(embedded)

# 定义训练和验证函数
def run_experiment(model, optimizer, criterion, train_iterator, valid_iterator, test_iterator, model_name):
    best_valid_loss = float('inf')
    for epoch in range(50):
        start_time = time.time()
        train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
        end_time = time.time()
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), f'{model_name}.pt')
        print(f'Epoch: {epoch+1:02} | {model_name} | Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
    # 测试模型
    model.load_state_dict(torch.load(f'{model_name}.pt'))
    test_loss, test_acc = evaluate(model, test_iterator, criterion)
    print(f'{model_name} Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

# 实验设置
models = {
    "OneLayerFFNN": FeedForwardNN(INPUT_DIM, EMBEDDING_DIM, [500], OUTPUT_DIM),
    "TwoLayerFFNN": FeedForwardNN(INPUT_DIM, EMBEDDING_DIM, [500, 300], OUTPUT_DIM),
    "ThreeLayerFFNN": FeedForwardNN(INPUT_DIM, EMBEDDING_DIM, [500, 300, 200], OUTPUT_DIM),
}

for model_name, model in models.items():
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    run_experiment(model, optimizer, criterion, train_iterator, valid_iterator, test_iterator, model_name)

Epoch: 01 | OneLayerFFNN | Time: 0m 4s
	Train Loss: 0.558 | Train Acc: 70.21%
	 Val. Loss: 0.449 |  Val. Acc: 79.74%
Epoch: 02 | OneLayerFFNN | Time: 0m 2s
	Train Loss: 0.354 | Train Acc: 84.97%
	 Val. Loss: 0.346 |  Val. Acc: 85.58%
Epoch: 03 | OneLayerFFNN | Time: 0m 2s
	Train Loss: 0.254 | Train Acc: 89.97%
	 Val. Loss: 0.351 |  Val. Acc: 86.18%
Epoch: 04 | OneLayerFFNN | Time: 0m 2s
	Train Loss: 0.213 | Train Acc: 92.57%
	 Val. Loss: 0.313 |  Val. Acc: 88.26%
Epoch: 05 | OneLayerFFNN | Time: 0m 2s
	Train Loss: 0.156 | Train Acc: 94.32%
	 Val. Loss: 0.407 |  Val. Acc: 86.31%
Epoch: 06 | OneLayerFFNN | Time: 0m 2s
	Train Loss: 0.148 | Train Acc: 95.63%
	 Val. Loss: 0.336 |  Val. Acc: 88.61%
Epoch: 07 | OneLayerFFNN | Time: 0m 2s
	Train Loss: 0.078 | Train Acc: 97.80%
	 Val. Loss: 0.370 |  Val. Acc: 88.33%
Epoch: 08 | OneLayerFFNN | Time: 0m 2s
	Train Loss: 0.084 | Train Acc: 97.75%
	 Val. Loss: 0.456 |  Val. Acc: 87.56%
Epoch: 09 | OneLayerFFNN | Time: 0m 2s
	Train Loss: 0.061 | Trai

  model.load_state_dict(torch.load(f'{model_name}.pt'))


OneLayerFFNN Test Loss: 0.341 | Test Acc: 86.56%
Epoch: 01 | TwoLayerFFNN | Time: 0m 2s
	Train Loss: 0.549 | Train Acc: 71.43%
	 Val. Loss: 0.425 |  Val. Acc: 80.76%
Epoch: 02 | TwoLayerFFNN | Time: 0m 2s
	Train Loss: 0.357 | Train Acc: 84.53%
	 Val. Loss: 0.348 |  Val. Acc: 85.46%
Epoch: 03 | TwoLayerFFNN | Time: 0m 2s
	Train Loss: 0.252 | Train Acc: 89.82%
	 Val. Loss: 0.326 |  Val. Acc: 86.62%
Epoch: 04 | TwoLayerFFNN | Time: 0m 2s
	Train Loss: 0.185 | Train Acc: 92.80%
	 Val. Loss: 0.343 |  Val. Acc: 87.01%
Epoch: 05 | TwoLayerFFNN | Time: 0m 2s
	Train Loss: 0.137 | Train Acc: 95.18%
	 Val. Loss: 0.407 |  Val. Acc: 85.28%
Epoch: 06 | TwoLayerFFNN | Time: 0m 2s
	Train Loss: 0.093 | Train Acc: 97.01%
	 Val. Loss: 0.423 |  Val. Acc: 86.88%
Epoch: 07 | TwoLayerFFNN | Time: 0m 2s
	Train Loss: 0.064 | Train Acc: 97.86%
	 Val. Loss: 0.393 |  Val. Acc: 86.82%
Epoch: 08 | TwoLayerFFNN | Time: 0m 3s
	Train Loss: 0.053 | Train Acc: 98.32%
	 Val. Loss: 0.560 |  Val. Acc: 87.27%
Epoch: 09 | Two

In [12]:
# 修改训练和验证逻辑
def train_CNN(model, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    for batch in iterator:
        # print(f"Text shape: {batch.text[0].shape}")  # 如果使用 PackedSequence，检查 batch.text[0]
        # print(f"Target shape: {batch.label.shape}")  # 应为 [batch_size]
        optimizer.zero_grad()
        text, text_lengths = batch.text
        predictions = model(text).squeeze(1)  # 调整输出形状
        loss = criterion(predictions, batch.label)
        acc = binary_accuracy(predictions, batch.label)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def run_experiment_CNN(model, optimizer, criterion, train_iterator,
                       valid_iterator, test_iterator, model_name, kernel_sizes):
    best_valid_loss = float('inf')
    for epoch in range(50):
        start_time = time.time()
        train_loss, train_acc = train_CNN(model, train_iterator, optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
        end_time = time.time()
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), f'CNN.pt')
        print(f'Epoch: {epoch+1:02} | CNN | kernal_size = {kernel_sizes} | Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
    # 测试模型
    model.load_state_dict(torch.load(f'CNN.pt'))
    test_loss, test_acc = evaluate(model, test_iterator, criterion)
    print(f'{model_name} Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

# Model Setting-CNN
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
OUTPUT_DIM = 1
KERNEL_SIZES = [1, 2, 3]  # 卷积核大小
NUM_FILTERS = 100  # 每种卷积核的特征图数量

class CNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, output_dim, kernel_sizes, num_filters):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim) # 随机初始化嵌入
        self.convs = nn.ModuleList([
            nn.Conv2d(1, num_filters, (ks, embedding_dim)) for ks in kernel_sizes
        ])
        self.fc = nn.Linear(len(kernel_sizes) * num_filters, output_dim)

    def forward(self, text, text_lengths=None):
        # 确保输入形状为 [batch_size, seq_len]
        if text.size(0) != BATCH_SIZE:  # 如果输入是 [seq_len, batch_size]
            text = text.permute(1, 0)  # 转置为 [batch_size, seq_len]

        # 1. 嵌入层：将输入转换为嵌入向量
        embedded = self.embedding(text)  # [batch_size, seq_len, embedding_dim]
        embedded = embedded.unsqueeze(1)  # 添加通道维度 [batch_size, 1, seq_len, embedding_dim]
        ## print(f"Embedded shape: {embedded.shape}")  # 调试输出形状

        # 2. 卷积 + ReLU
        conved = [torch.relu(conv(embedded)).squeeze(3) for conv in self.convs]
        ##for i, conv_out in enumerate(conved):
        ##   print(f"Conv {i+1} output shape: {conv_out.shape}")  # 调试每个卷积输出

        # 3. 最大池化
        pooled = [torch.max(conv, dim=2)[0] for conv in conved]
        ##for i, pool_out in enumerate(pooled):
        ##    print(f"Pooled {i+1} output shape: {pool_out.shape}")  # 调试池化输出

        # 4. 特征拼接
        cat = torch.cat(pooled, dim=1)  # [batch_size, len(kernel_sizes) * num_filters]
        ##print(f"Concatenated shape: {cat.shape}")  # 调试拼接后形状

        # 5. 全连接层
        output = self.fc(cat)  # [batch_size, output_dim]
        ##print(f"Output shape: {output.shape}")  # 调试最终输出形状

        return output

# 实验设置
models_CNN = {
    "CNN_gram_1": CNN(INPUT_DIM, EMBEDDING_DIM, OUTPUT_DIM, [1], NUM_FILTERS),
    "CNN_gram_2": CNN(INPUT_DIM, EMBEDDING_DIM, OUTPUT_DIM, [2], NUM_FILTERS),
    "CNN_gram_3": CNN(INPUT_DIM, EMBEDDING_DIM, OUTPUT_DIM, [3], NUM_FILTERS),
}

for idx, (model_name, model) in enumerate(models_CNN.items(), start=1):
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    print(f"### Running CNN - Kernal size = {idx} ###")
    run_experiment_CNN(model, optimizer, criterion, train_iterator, valid_iterator, test_iterator, f"{model_name}_Run_{idx}", kernel_sizes=[int(model_name[-1])])

### Running CNN - Kernal size = 1 ###
Epoch: 01 | CNN | kernal_size = [1] | Time: 0m 9s
	Train Loss: 0.548 | Train Acc: 73.06%
	 Val. Loss: 0.423 |  Val. Acc: 81.68%
Epoch: 02 | CNN | kernal_size = [1] | Time: 0m 8s
	Train Loss: 0.363 | Train Acc: 84.67%
	 Val. Loss: 0.347 |  Val. Acc: 84.99%
Epoch: 03 | CNN | kernal_size = [1] | Time: 0m 9s
	Train Loss: 0.286 | Train Acc: 88.32%
	 Val. Loss: 0.323 |  Val. Acc: 86.10%
Epoch: 04 | CNN | kernal_size = [1] | Time: 0m 8s
	Train Loss: 0.234 | Train Acc: 90.97%
	 Val. Loss: 0.310 |  Val. Acc: 86.80%
Epoch: 05 | CNN | kernal_size = [1] | Time: 0m 8s
	Train Loss: 0.190 | Train Acc: 93.05%
	 Val. Loss: 0.323 |  Val. Acc: 86.47%
Epoch: 06 | CNN | kernal_size = [1] | Time: 0m 8s
	Train Loss: 0.144 | Train Acc: 95.51%
	 Val. Loss: 0.308 |  Val. Acc: 87.51%
Epoch: 07 | CNN | kernal_size = [1] | Time: 0m 8s
	Train Loss: 0.109 | Train Acc: 97.11%
	 Val. Loss: 0.310 |  Val. Acc: 87.67%
Epoch: 08 | CNN | kernal_size = [1] | Time: 0m 8s
	Train Loss: 0.0

  model.load_state_dict(torch.load(f'CNN.pt'))


CNN_gram_1_Run_1 Test Loss: 0.319 | Test Acc: 86.58%
### Running CNN - Kernal size = 2 ###
Epoch: 01 | CNN | kernal_size = [2] | Time: 0m 34s
	Train Loss: 0.551 | Train Acc: 72.37%
	 Val. Loss: 0.435 |  Val. Acc: 80.59%
Epoch: 02 | CNN | kernal_size = [2] | Time: 0m 34s
	Train Loss: 0.375 | Train Acc: 84.14%
	 Val. Loss: 0.370 |  Val. Acc: 83.37%
Epoch: 03 | CNN | kernal_size = [2] | Time: 0m 34s
	Train Loss: 0.290 | Train Acc: 88.48%
	 Val. Loss: 0.334 |  Val. Acc: 85.81%
Epoch: 04 | CNN | kernal_size = [2] | Time: 0m 34s
	Train Loss: 0.226 | Train Acc: 91.75%
	 Val. Loss: 0.326 |  Val. Acc: 85.94%
Epoch: 05 | CNN | kernal_size = [2] | Time: 0m 34s
	Train Loss: 0.167 | Train Acc: 94.76%
	 Val. Loss: 0.310 |  Val. Acc: 86.39%
Epoch: 06 | CNN | kernal_size = [2] | Time: 0m 34s
	Train Loss: 0.115 | Train Acc: 97.07%
	 Val. Loss: 0.308 |  Val. Acc: 86.69%
Epoch: 07 | CNN | kernal_size = [2] | Time: 0m 34s
	Train Loss: 0.076 | Train Acc: 98.68%
	 Val. Loss: 0.313 |  Val. Acc: 87.33%
Epoch:

In [7]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
# Model Setting-LSTM
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
OUTPUT_DIM = 1

# 定义训练和验证函数
def run_experiment_LSTM(model, optimizer, criterion, train_iterator, valid_iterator, test_iterator, model_name):
    best_valid_loss = float('inf')
    for epoch in range(50):
        start_time = time.time()
        train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
        end_time = time.time()
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), f'{model_name}.pt')
        print(f'Epoch: {epoch+1:02} | {model_name} | Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
    # 测试模型
    model.load_state_dict(torch.load(f'{model_name}.pt'))
    test_loss, test_acc = evaluate(model, test_iterator, criterion)
    print(f'{model_name} Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

class LSTM(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, bidirectional=False):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=1, bidirectional=bidirectional, batch_first=True)
        self.fc = nn.Linear(hidden_dim * (2 if bidirectional else 1), output_dim)

    def forward(self, text, text_lengths):
        # 确保输入形状为 [batch_size, seq_len]
        if text.size(0) != BATCH_SIZE:  # 如果输入是 [seq_len, batch_size]
            text = text.permute(1, 0)  # 转置为 [batch_size, seq_len]

        # 确保 text_lengths 在 CPU
        text_lengths = text_lengths.cpu()

        ##print(f"Input text shape: {text.shape}, device: {text.device}")
        ##print(f"Text lengths shape: {text_lengths.shape}, device: {text_lengths.device}")

        embedded = self.embedding(text)  # [batch_size, seq_len, embedding_dim]
        ##print(f"Embedded shape: {embedded.shape}, device: {embedded.device}")

        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True, enforce_sorted=False)
        packed_output, (hidden, _) = self.lstm(packed_embedded)

        ##print(f"Hidden shape before concatenation: {hidden.shape}, device: {hidden.device}")
        hidden = torch.cat((hidden[-2], hidden[-1]), dim=1) if self.lstm.bidirectional else hidden[-1]
        ##print(f"Hidden shape after concatenation: {hidden.shape}, device: {hidden.device}")
        ##print(f"Input shape to Linear: {hidden.shape}")  # 应与 Linear 层的输入形状一致

        # 输出层
        output = self.fc(hidden)  # [batch_size, output_dim]
        ##print(f"Output shape: {output.shape}, device: {output.device}")  # 调试最终输出形状

        return output

models = {
    "LSTM": LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, bidirectional=False),
    "BiLSTM": LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, bidirectional=True)
}

for model_name, model in models.items():
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    run_experiment_LSTM(model, optimizer, criterion, train_iterator, valid_iterator, test_iterator, model_name)

Epoch: 01 | LSTM | Time: 0m 8s
	Train Loss: 0.677 | Train Acc: 56.81%
	 Val. Loss: 0.629 |  Val. Acc: 64.75%
Epoch: 02 | LSTM | Time: 0m 9s
	Train Loss: 0.645 | Train Acc: 63.10%
	 Val. Loss: 0.666 |  Val. Acc: 59.60%
Epoch: 03 | LSTM | Time: 0m 8s
	Train Loss: 0.594 | Train Acc: 68.82%
	 Val. Loss: 0.605 |  Val. Acc: 68.36%
Epoch: 04 | LSTM | Time: 0m 8s
	Train Loss: 0.519 | Train Acc: 76.13%
	 Val. Loss: 0.482 |  Val. Acc: 78.56%
Epoch: 05 | LSTM | Time: 0m 8s
	Train Loss: 0.445 | Train Acc: 80.36%
	 Val. Loss: 0.452 |  Val. Acc: 80.46%
Epoch: 06 | LSTM | Time: 0m 8s
	Train Loss: 0.415 | Train Acc: 81.71%
	 Val. Loss: 0.488 |  Val. Acc: 76.49%
Epoch: 07 | LSTM | Time: 0m 9s
	Train Loss: 0.315 | Train Acc: 87.45%
	 Val. Loss: 0.388 |  Val. Acc: 82.76%
Epoch: 08 | LSTM | Time: 0m 8s
	Train Loss: 0.272 | Train Acc: 89.36%
	 Val. Loss: 0.369 |  Val. Acc: 84.69%
Epoch: 09 | LSTM | Time: 0m 8s
	Train Loss: 0.232 | Train Acc: 91.32%
	 Val. Loss: 0.402 |  Val. Acc: 83.54%
Epoch: 10 | LSTM | 

  model.load_state_dict(torch.load(f'{model_name}.pt'))


LSTM Test Loss: 0.385 | Test Acc: 83.67%
Epoch: 01 | BiLSTM | Time: 0m 14s
	Train Loss: 0.671 | Train Acc: 58.14%
	 Val. Loss: 0.645 |  Val. Acc: 63.70%
Epoch: 02 | BiLSTM | Time: 0m 15s
	Train Loss: 0.610 | Train Acc: 67.53%
	 Val. Loss: 0.585 |  Val. Acc: 68.71%
Epoch: 03 | BiLSTM | Time: 0m 15s
	Train Loss: 0.524 | Train Acc: 74.39%
	 Val. Loss: 0.525 |  Val. Acc: 73.96%
Epoch: 04 | BiLSTM | Time: 0m 15s
	Train Loss: 0.424 | Train Acc: 80.36%
	 Val. Loss: 0.568 |  Val. Acc: 70.38%
Epoch: 05 | BiLSTM | Time: 0m 15s
	Train Loss: 0.347 | Train Acc: 84.82%
	 Val. Loss: 0.483 |  Val. Acc: 78.30%
Epoch: 06 | BiLSTM | Time: 0m 15s
	Train Loss: 0.304 | Train Acc: 87.28%
	 Val. Loss: 0.400 |  Val. Acc: 82.83%
Epoch: 07 | BiLSTM | Time: 0m 15s
	Train Loss: 0.237 | Train Acc: 90.22%
	 Val. Loss: 0.402 |  Val. Acc: 83.43%
Epoch: 08 | BiLSTM | Time: 0m 15s
	Train Loss: 0.170 | Train Acc: 93.40%
	 Val. Loss: 0.431 |  Val. Acc: 83.21%
Epoch: 09 | BiLSTM | Time: 0m 15s
	Train Loss: 0.134 | Train Ac