In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

from sklearn.metrics import accuracy_score, f1_score

In [2]:
# Create device for GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

# Load datasets

In [3]:
class TweetDataset(Dataset):
    def __init__(self, data, vocab, max_len):
        self.data = data
        self.vocab = vocab
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        tokens = row['tokens']

        # Front paddings
        X = torch.zeros(self.max_len)
        for i, token in enumerate(tokens):
            X[self.max_len - len(tokens) + i] = self.vocab.get(token, 1)

        y = torch.tensor(row['label']).long()

        return X.long(), y

In [4]:
train_ds = torch.load('/content/drive/MyDrive/TweetContextClassifier/data/PyTorchDatasets/train_wdtk_sm_ds.pt')
test_ds = torch.load('/content/drive/MyDrive/TweetContextClassifier/data/PyTorchDatasets/test_wdtk_sm_ds.pt')

In [5]:
train_ds, test_ds

(<__main__.TweetDataset at 0x7f41badb8690>,
 <__main__.TweetDataset at 0x7f41575030d0>)

In [6]:
# Create dataloaders
train_dl = DataLoader(train_ds, batch_size=2_000, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=2_000, shuffle=False)

Create training functions

In [7]:
def save_checkpoint(path, model_name, model, optimizer, epoch, loss):
    # Save the model checkpoint
    savepath = f'{path}/{model_name}_{epoch+1}_{loss:.3f}.pt'
    checkpoint = {
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'epoch': epoch + 1,
        'loss': loss
    }
    torch.save(checkpoint, savepath)


def eval_metrics(y_hat, y):
    # Calculate accuracy and f1 score
    y_hat = torch.log_softmax(y_hat, dim=1)
    y_pred = torch.argmax(y_hat, dim=1).cpu().numpy()

    return accuracy_score(y_pred, y), f1_score(y_pred, y, average='weighted')


def one_pass(model, dl, loss_function, optimizer, update=True):
    if update:
        model.train()
    else:
        model.eval()

    losses, accs, f1s = [], [], []
    for X, y in tqdm(dl):
        X, y = X.to(device), y.to(device)
        y_hat = model(X)
        loss = loss_function(y_hat, y)
        losses.append(loss.item())

        # Calculate accuracy and f1 score
        acc, f1 = eval_metrics(y_hat, y.cpu())
        accs.append(acc)
        f1s.append(f1)

        if update:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    return np.mean(losses), np.mean(accs), np.mean(f1s)


def train_loop(model, epoches, train_dl, test_dl, optimizer, display=True, checkpoint_path=None, model_name='model'):
    loss_fun = nn.CrossEntropyLoss()
    # Early stop
    best_model ={
        'model': model,
        'loss': 1e100,
        'acc': None,
        'f1': None
    }

    num_worse_loss = 0 # counter to get out of loop if model does not improve
    for epoch in range(epoches):
        train_loss, train_acc, train_f1 = one_pass(model, train_dl, loss_fun, optimizer, update=True)
        test_loss, test_acc, test_f1 = one_pass(model, test_dl, loss_fun, optimizer, update=False)

        if display:
            print(f'***** Epoch {epoch+1} *****')
            print(f'Train Loss: {train_loss:.3f}, Train Accuracy: {train_acc:.3f} Train F1: {train_f1:.3f}')
            print(f'Test Loss: {test_loss:.3f}, Test Accuracy: {test_acc:.3f} Test F1: {test_f1:.3f}')

        # Update best model
        if test_loss < best_model['loss']:
            best_model['model'] = model
            best_model['loss'] = test_loss
            best_model['acc'] = test_acc
            best_model['f1'] = test_f1
            num_worse_loss = 0
        else:
            num_worse_loss += 1
            if num_worse_loss == 3:
                break
    
        # Save checkpoint
    if checkpoint_path:
        save_checkpoint(checkpoint_path, model_name, best_model['model'], optimizer, epoch, best_model['loss'])

    return best_model


Create LSTM

In [8]:
class BaselineLSTM(nn.Module):
    def __init__(self, embedding_size, hidden_size, vocab_size, lstm_p=0, linear_p=0, num_layers=1):
        super(BaselineLSTM, self).__init__()
        self.num_layers = num_layers
        # Word Embeddings
        self.embeddings = nn.Embedding(vocab_size, embedding_size, padding_idx=0)
        self.embeddings.weight.data.uniform_(0, 0.05)

        # LSTM
        self.lstm = nn.LSTM(embedding_size, hidden_size, 
                            batch_first=True, dropout=lstm_p,
                            num_layers=num_layers)

        # Linear Layer2
        hidden1_size = int(hidden_size * 2/3)
        self.l1 = nn.Linear(hidden_size, hidden1_size)
        self.l2 = nn.Linear(hidden1_size, 83) # 83 classes

        # Dropout
        self.dropout = nn.Dropout(linear_p)


    def forward(self, x):
        X_emb = self.embeddings(x)
        out, hidden = self.lstm(X_emb)

        if self.num_layers > 1:
            X = hidden[0][0].squeeze()
            # print(X.size())
        else:
            X = hidden[0].squeeze()
        X = self.l1(X)
        X = self.dropout(X)
        return self.l2(X)


In [9]:
vocab_size = len(train_ds.vocab)
vocab_size

39871

In [10]:
cpath = '/content/drive/MyDrive/TweetContextClassifier/data/checkpoints'

In [26]:
baseline_model = BaselineLSTM(embedding_size=100, hidden_size=50, vocab_size=vocab_size).to(device)
optimizer = optim.Adam(baseline_model.parameters(), lr=0.025)

In [27]:
best_baseline_model = train_loop(
    baseline_model,
    5,
    train_dl,
    test_dl,
    optimizer,
    checkpoint_path=cpath,
    model_name='baselineLSTM'
)

100%|██████████| 67/67 [03:21<00:00,  3.00s/it]
100%|██████████| 34/34 [01:30<00:00,  2.66s/it]


***** Epoch 1 *****
Train Loss: 2.437, Train Accuracy: 0.379 Train F1: 0.379
Test Loss: 1.453, Test Accuracy: 0.650 Test F1: 0.650


100%|██████████| 67/67 [03:20<00:00,  2.99s/it]
100%|██████████| 34/34 [01:30<00:00,  2.67s/it]


***** Epoch 2 *****
Train Loss: 1.212, Train Accuracy: 0.699 Train F1: 0.699
Test Loss: 1.154, Test Accuracy: 0.711 Test F1: 0.711


100%|██████████| 67/67 [03:21<00:00,  3.01s/it]
100%|██████████| 34/34 [01:30<00:00,  2.66s/it]


***** Epoch 3 *****
Train Loss: 0.988, Train Accuracy: 0.747 Train F1: 0.747
Test Loss: 1.097, Test Accuracy: 0.727 Test F1: 0.727


100%|██████████| 67/67 [03:21<00:00,  3.01s/it]
100%|██████████| 34/34 [01:30<00:00,  2.65s/it]


***** Epoch 4 *****
Train Loss: 0.854, Train Accuracy: 0.777 Train F1: 0.777
Test Loss: 1.101, Test Accuracy: 0.731 Test F1: 0.731


100%|██████████| 67/67 [03:21<00:00,  3.01s/it]
100%|██████████| 34/34 [01:29<00:00,  2.64s/it]

***** Epoch 5 *****
Train Loss: 0.749, Train Accuracy: 0.802 Train F1: 0.802
Test Loss: 1.129, Test Accuracy: 0.730 Test F1: 0.730





Train baseline model but with dropout = 0.3

In [13]:
baseline_model_drop = BaselineLSTM(embedding_size=100, hidden_size=50, vocab_size=vocab_size, lstm_p=0.3, linear_p=0.3).to(device)
optimizer = optim.Adam(baseline_model_drop.parameters(), lr=0.01)
best_baseline_model = train_loop(
    baseline_model_drop,
    10,
    train_dl,
    test_dl,
    optimizer,
    checkpoint_path=cpath,
    model_name='baselineLSTM_p0.3'
)

  "num_layers={}".format(dropout, num_layers))
100%|██████████| 67/67 [03:14<00:00,  2.91s/it]
100%|██████████| 34/34 [01:27<00:00,  2.56s/it]


***** Epoch 1 *****
Train Loss: 3.061, Train Accuracy: 0.194 Train F1: 0.278
Test Loss: 2.541, Test Accuracy: 0.328 Test F1: 0.402


100%|██████████| 67/67 [03:14<00:00,  2.90s/it]
100%|██████████| 34/34 [01:26<00:00,  2.54s/it]


***** Epoch 2 *****
Train Loss: 2.034, Train Accuracy: 0.478 Train F1: 0.531
Test Loss: 1.530, Test Accuracy: 0.629 Test F1: 0.661


100%|██████████| 67/67 [03:14<00:00,  2.90s/it]
100%|██████████| 34/34 [01:26<00:00,  2.54s/it]


***** Epoch 3 *****
Train Loss: 1.392, Train Accuracy: 0.666 Train F1: 0.691
Test Loss: 1.280, Test Accuracy: 0.691 Test F1: 0.710


100%|██████████| 67/67 [03:13<00:00,  2.90s/it]
100%|██████████| 34/34 [01:25<00:00,  2.51s/it]


***** Epoch 4 *****
Train Loss: 1.180, Train Accuracy: 0.714 Train F1: 0.729
Test Loss: 1.198, Test Accuracy: 0.712 Test F1: 0.724


100%|██████████| 67/67 [03:13<00:00,  2.90s/it]
100%|██████████| 34/34 [01:26<00:00,  2.53s/it]


***** Epoch 5 *****
Train Loss: 1.052, Train Accuracy: 0.742 Train F1: 0.753
Test Loss: 1.171, Test Accuracy: 0.721 Test F1: 0.730


100%|██████████| 67/67 [03:13<00:00,  2.89s/it]
100%|██████████| 34/34 [01:30<00:00,  2.65s/it]


***** Epoch 6 *****
Train Loss: 0.948, Train Accuracy: 0.765 Train F1: 0.772
Test Loss: 1.170, Test Accuracy: 0.725 Test F1: 0.732


100%|██████████| 67/67 [03:14<00:00,  2.90s/it]
100%|██████████| 34/34 [01:25<00:00,  2.51s/it]


***** Epoch 7 *****
Train Loss: 0.863, Train Accuracy: 0.783 Train F1: 0.790
Test Loss: 1.194, Test Accuracy: 0.727 Test F1: 0.735


100%|██████████| 67/67 [03:13<00:00,  2.89s/it]
100%|██████████| 34/34 [01:25<00:00,  2.53s/it]

***** Epoch 8 *****
Train Loss: 0.787, Train Accuracy: 0.800 Train F1: 0.805
Test Loss: 1.224, Test Accuracy: 0.728 Test F1: 0.734





Two layer LSTM

In [13]:
baseline_model_twol = BaselineLSTM(embedding_size=100, hidden_size=50, 
                                   vocab_size=vocab_size,
                                   lstm_p=0.3,
                                   linear_p=0.3,
                                   num_layers=2).to(device)
optimizer = optim.Adam(baseline_model_twol.parameters(), lr=0.01)
best_baseline_model = train_loop(
    baseline_model_twol,
    10,
    train_dl,
    test_dl,
    optimizer,
    checkpoint_path=cpath,
    model_name='baselineLSTM_2layer'
)

100%|██████████| 75/75 [03:35<00:00,  2.88s/it]
100%|██████████| 38/38 [01:27<00:00,  2.30s/it]


***** Epoch 1 *****
Train Loss: 2.930, Train Accuracy: 0.238 Train F1: 0.321
Test Loss: 2.244, Test Accuracy: 0.424 Test F1: 0.500


100%|██████████| 75/75 [03:33<00:00,  2.85s/it]
100%|██████████| 38/38 [01:27<00:00,  2.29s/it]


***** Epoch 2 *****
Train Loss: 1.815, Train Accuracy: 0.543 Train F1: 0.593
Test Loss: 1.466, Test Accuracy: 0.643 Test F1: 0.673


100%|██████████| 75/75 [03:34<00:00,  2.86s/it]
100%|██████████| 38/38 [01:27<00:00,  2.29s/it]


***** Epoch 3 *****
Train Loss: 1.371, Train Accuracy: 0.670 Train F1: 0.692
Test Loss: 1.274, Test Accuracy: 0.692 Test F1: 0.708


100%|██████████| 75/75 [03:34<00:00,  2.86s/it]
100%|██████████| 38/38 [01:25<00:00,  2.26s/it]


***** Epoch 4 *****
Train Loss: 1.171, Train Accuracy: 0.717 Train F1: 0.731
Test Loss: 1.200, Test Accuracy: 0.710 Test F1: 0.723


100%|██████████| 75/75 [03:33<00:00,  2.85s/it]
100%|██████████| 38/38 [01:26<00:00,  2.27s/it]


***** Epoch 5 *****
Train Loss: 1.055, Train Accuracy: 0.743 Train F1: 0.753
Test Loss: 1.170, Test Accuracy: 0.718 Test F1: 0.728


100%|██████████| 75/75 [03:33<00:00,  2.84s/it]
100%|██████████| 38/38 [01:26<00:00,  2.27s/it]


***** Epoch 6 *****
Train Loss: 0.963, Train Accuracy: 0.762 Train F1: 0.770
Test Loss: 1.159, Test Accuracy: 0.725 Test F1: 0.733


100%|██████████| 75/75 [03:31<00:00,  2.82s/it]
100%|██████████| 38/38 [01:25<00:00,  2.25s/it]


***** Epoch 7 *****
Train Loss: 0.884, Train Accuracy: 0.779 Train F1: 0.786
Test Loss: 1.176, Test Accuracy: 0.724 Test F1: 0.732


100%|██████████| 75/75 [03:32<00:00,  2.83s/it]
100%|██████████| 38/38 [01:25<00:00,  2.26s/it]

***** Epoch 8 *****
Train Loss: 0.813, Train Accuracy: 0.795 Train F1: 0.801
Test Loss: 1.202, Test Accuracy: 0.726 Test F1: 0.733





Two Layer with larger embedding and hidden size

In [17]:
model_2 = BaselineLSTM(embedding_size=200, hidden_size=100, 
                                   vocab_size=vocab_size,
                                   lstm_p=0,
                                   linear_p=0,
                                   num_layers=2).to(device)
optimizer = optim.Adam(model_2.parameters(), lr=0.01)
best_baseline_model = train_loop(
    model_2,
    10,
    train_dl,
    test_dl,
    optimizer,
    checkpoint_path=cpath,
    model_name='baselineLSTM_embed200'
)

100%|██████████| 334/334 [03:52<00:00,  1.43it/s]
100%|██████████| 167/167 [01:34<00:00,  1.77it/s]


***** Epoch 1 *****
Train Loss: 1.698, Train Accuracy: 0.565 Train F1: 0.601
Test Loss: 1.148, Test Accuracy: 0.710 Test F1: 0.721


100%|██████████| 334/334 [03:51<00:00,  1.44it/s]
100%|██████████| 167/167 [01:34<00:00,  1.76it/s]


***** Epoch 2 *****
Train Loss: 1.007, Train Accuracy: 0.741 Train F1: 0.749
Test Loss: 1.052, Test Accuracy: 0.734 Test F1: 0.743


100%|██████████| 334/334 [03:52<00:00,  1.43it/s]
100%|██████████| 167/167 [01:34<00:00,  1.77it/s]


***** Epoch 3 *****
Train Loss: 0.846, Train Accuracy: 0.777 Train F1: 0.783
Test Loss: 1.056, Test Accuracy: 0.738 Test F1: 0.744


100%|██████████| 334/334 [03:52<00:00,  1.44it/s]
100%|██████████| 167/167 [01:34<00:00,  1.77it/s]


***** Epoch 4 *****
Train Loss: 0.722, Train Accuracy: 0.806 Train F1: 0.810
Test Loss: 1.100, Test Accuracy: 0.736 Test F1: 0.743


100%|██████████| 334/334 [03:52<00:00,  1.44it/s]
100%|██████████| 167/167 [01:33<00:00,  1.79it/s]


***** Epoch 5 *****
Train Loss: 0.621, Train Accuracy: 0.830 Train F1: 0.834
Test Loss: 1.158, Test Accuracy: 0.734 Test F1: 0.739


Add more linear layers

In [12]:
class BaselineLSTM(nn.Module):
    def __init__(self, embedding_size, hidden_size, vocab_size, lstm_p=0, linear_p=0, num_layers=1):
        super(BaselineLSTM, self).__init__()
        self.num_layers = num_layers
        # Word Embeddings
        self.embeddings = nn.Embedding(vocab_size, embedding_size, padding_idx=0)
        self.embeddings.weight.data.uniform_(0, 0.05)

        # LSTM
        self.lstm = nn.LSTM(embedding_size, hidden_size, 
                            batch_first=True, dropout=lstm_p,
                            num_layers=num_layers)

        # Linear Layer2
        hidden1_size = int(hidden_size * 2/3)
        hidden2_size = int(hidden1_size * 2/3)
        hidden3_size = int(hidden2_size * 2/3)
        self.l1 = nn.Linear(hidden_size, hidden1_size)
        self.l2 = nn.Linear(hidden1_size, hidden2_size)
        self.l3 = nn.Linear(hidden2_size, hidden3_size) 
        self.l4 = nn.Linear(hidden3_size, 83) # 83 classes

        # Dropout
        self.dropout = nn.Dropout(linear_p)

        # ReLu
        self.relu = nn.ReLU()


    def forward(self, x):
        X_emb = self.embeddings(x)
        out, hidden = self.lstm(X_emb)

        if self.num_layers > 1:
            X = hidden[0][0].squeeze()
            # print(X.size())
        else:
            X = hidden[0].squeeze()
        X = self.l1(X)
        X = self.relu(self.dropout(X))
        X = self.l2(X)
        X = self.relu(self.dropout(X))
        X = self.l3(X)
        X = self.relu(self.dropout(X))
        return self.l4(X)


In [15]:
model_3 = BaselineLSTM(embedding_size=200, hidden_size=100, 
                                   vocab_size=vocab_size,
                                   lstm_p=0.3,
                                   linear_p=0.3,
                                   num_layers=1).to(device)
optimizer = optim.Adam(model_3.parameters(), lr=0.02)
best_baseline_model = train_loop(
    model_3,
    10,
    train_dl,
    test_dl,
    optimizer,
    checkpoint_path=cpath,
    model_name='baselineLSTM_4layer'
)

  "num_layers={}".format(dropout, num_layers))
100%|██████████| 334/334 [03:27<00:00,  1.61it/s]
100%|██████████| 167/167 [01:28<00:00,  1.89it/s]


***** Epoch 1 *****
Train Loss: 2.780, Train Accuracy: 0.248 Train F1: 0.343
Test Loss: 2.370, Test Accuracy: 0.350 Test F1: 0.452


100%|██████████| 334/334 [03:27<00:00,  1.61it/s]
100%|██████████| 167/167 [01:28<00:00,  1.89it/s]


***** Epoch 2 *****
Train Loss: 2.296, Train Accuracy: 0.378 Train F1: 0.461
Test Loss: 2.077, Test Accuracy: 0.433 Test F1: 0.515


100%|██████████| 334/334 [03:27<00:00,  1.61it/s]
100%|██████████| 167/167 [01:28<00:00,  1.88it/s]


***** Epoch 3 *****
Train Loss: 2.137, Train Accuracy: 0.435 Train F1: 0.508
Test Loss: 1.993, Test Accuracy: 0.468 Test F1: 0.541


100%|██████████| 334/334 [03:27<00:00,  1.61it/s]
100%|██████████| 167/167 [01:28<00:00,  1.89it/s]


***** Epoch 4 *****
Train Loss: 2.087, Train Accuracy: 0.452 Train F1: 0.522
Test Loss: 1.938, Test Accuracy: 0.488 Test F1: 0.557


100%|██████████| 334/334 [03:26<00:00,  1.61it/s]
100%|██████████| 167/167 [01:28<00:00,  1.88it/s]


***** Epoch 5 *****
Train Loss: 2.064, Train Accuracy: 0.464 Train F1: 0.530
Test Loss: 1.926, Test Accuracy: 0.493 Test F1: 0.564


100%|██████████| 334/334 [03:27<00:00,  1.61it/s]
100%|██████████| 167/167 [01:28<00:00,  1.89it/s]


***** Epoch 6 *****
Train Loss: 2.037, Train Accuracy: 0.473 Train F1: 0.539
Test Loss: 1.911, Test Accuracy: 0.503 Test F1: 0.564


100%|██████████| 334/334 [03:27<00:00,  1.61it/s]
100%|██████████| 167/167 [01:28<00:00,  1.89it/s]


***** Epoch 7 *****
Train Loss: 2.015, Train Accuracy: 0.478 Train F1: 0.542
Test Loss: 1.874, Test Accuracy: 0.511 Test F1: 0.570


100%|██████████| 334/334 [03:27<00:00,  1.61it/s]
100%|██████████| 167/167 [01:28<00:00,  1.89it/s]


***** Epoch 8 *****
Train Loss: 2.003, Train Accuracy: 0.483 Train F1: 0.544
Test Loss: 1.855, Test Accuracy: 0.520 Test F1: 0.583


100%|██████████| 334/334 [03:26<00:00,  1.62it/s]
100%|██████████| 167/167 [01:28<00:00,  1.89it/s]


***** Epoch 9 *****
Train Loss: 1.998, Train Accuracy: 0.486 Train F1: 0.547
Test Loss: 1.872, Test Accuracy: 0.513 Test F1: 0.574


100%|██████████| 334/334 [03:26<00:00,  1.61it/s]
100%|██████████| 167/167 [01:27<00:00,  1.91it/s]


***** Epoch 10 *****
Train Loss: 2.009, Train Accuracy: 0.483 Train F1: 0.545
Test Loss: 1.879, Test Accuracy: 0.514 Test F1: 0.576


In [16]:
model_4 = BaselineLSTM(embedding_size=200, hidden_size=100, 
                                   vocab_size=vocab_size,
                                   lstm_p=0.3,
                                   linear_p=0.3,
                                   num_layers=2).to(device)
optimizer = optim.Adam(model_4.parameters(), lr=0.02)
best_baseline_model = train_loop(
    model_4,
    10,
    train_dl,
    test_dl,
    optimizer,
    checkpoint_path=cpath,
    model_name='baselineLSTM_4layer2'
)

100%|██████████| 334/334 [03:57<00:00,  1.41it/s]
100%|██████████| 167/167 [01:31<00:00,  1.82it/s]


***** Epoch 1 *****
Train Loss: 2.576, Train Accuracy: 0.304 Train F1: 0.391
Test Loss: 2.144, Test Accuracy: 0.403 Test F1: 0.478


100%|██████████| 334/334 [03:57<00:00,  1.41it/s]
100%|██████████| 167/167 [01:31<00:00,  1.82it/s]


***** Epoch 2 *****
Train Loss: 2.120, Train Accuracy: 0.433 Train F1: 0.486
Test Loss: 1.953, Test Accuracy: 0.479 Test F1: 0.528


100%|██████████| 334/334 [03:56<00:00,  1.41it/s]
100%|██████████| 167/167 [01:31<00:00,  1.82it/s]


***** Epoch 3 *****
Train Loss: 2.011, Train Accuracy: 0.475 Train F1: 0.517
Test Loss: 1.897, Test Accuracy: 0.513 Test F1: 0.552


100%|██████████| 334/334 [03:56<00:00,  1.41it/s]
100%|██████████| 167/167 [01:31<00:00,  1.83it/s]


***** Epoch 4 *****
Train Loss: 1.972, Train Accuracy: 0.488 Train F1: 0.530
Test Loss: 1.856, Test Accuracy: 0.527 Test F1: 0.563


100%|██████████| 334/334 [03:56<00:00,  1.41it/s]
100%|██████████| 167/167 [01:31<00:00,  1.82it/s]


***** Epoch 5 *****
Train Loss: 1.954, Train Accuracy: 0.498 Train F1: 0.543
Test Loss: 1.842, Test Accuracy: 0.535 Test F1: 0.586


100%|██████████| 334/334 [03:56<00:00,  1.41it/s]
100%|██████████| 167/167 [01:31<00:00,  1.83it/s]


***** Epoch 6 *****
Train Loss: 1.933, Train Accuracy: 0.506 Train F1: 0.557
Test Loss: 1.803, Test Accuracy: 0.547 Test F1: 0.596


100%|██████████| 334/334 [03:56<00:00,  1.41it/s]
100%|██████████| 167/167 [01:30<00:00,  1.84it/s]


***** Epoch 7 *****
Train Loss: 1.922, Train Accuracy: 0.511 Train F1: 0.563
Test Loss: 1.776, Test Accuracy: 0.548 Test F1: 0.607


100%|██████████| 334/334 [03:56<00:00,  1.41it/s]
100%|██████████| 167/167 [01:31<00:00,  1.82it/s]


***** Epoch 8 *****
Train Loss: 1.910, Train Accuracy: 0.514 Train F1: 0.567
Test Loss: 1.776, Test Accuracy: 0.548 Test F1: 0.605


100%|██████████| 334/334 [03:56<00:00,  1.41it/s]
100%|██████████| 167/167 [01:31<00:00,  1.83it/s]


***** Epoch 9 *****
Train Loss: 1.901, Train Accuracy: 0.519 Train F1: 0.571
Test Loss: 1.763, Test Accuracy: 0.550 Test F1: 0.607


100%|██████████| 334/334 [03:56<00:00,  1.41it/s]
100%|██████████| 167/167 [01:31<00:00,  1.83it/s]


***** Epoch 10 *****
Train Loss: 1.900, Train Accuracy: 0.519 Train F1: 0.571
Test Loss: 1.781, Test Accuracy: 0.549 Test F1: 0.599
