In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn

from tqdm import tqdm
import pickle

from sklearn.model_selection import train_test_split
from transformers import BertTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
df = pd.read_csv("data/IMDB Dataset.csv")

df

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
...,...,...
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative


In [4]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")


class ImdbDataset(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer):
        self.df = df
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        review = self.df.iloc[idx]["review"]
        label = self.df.iloc[idx]["sentiment"]
        tokens = self.tokenizer(
            review,
            padding="max_length",
            truncation=True,
            max_length=256,
            return_tensors="pt",
        )
        label = torch.tensor(1. if label == "positive" else 0.)
        return tokens["input_ids"][0], label


train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)


train_dataset = ImdbDataset(train_df, tokenizer)
val_dataset = ImdbDataset(val_df, tokenizer)

batch_size = 1024

train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    drop_last=True,
    pin_memory=True,
    num_workers=4,
)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=4
)

In [5]:
class LSTMCell(nn.Module):
    def __init__(self, input_size, hidden_size, dropout):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.gate = nn.Linear(input_size + hidden_size, 4 * hidden_size, bias=True)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hx=None):
        batch_size, _ = input.size()
        if hx is None:
            hx = (
                torch.zeros(batch_size, self.hidden_size, device=input.get_device()),
                torch.zeros(batch_size, self.hidden_size, device=input.get_device()),
            )
        hidden, cell = hx

        combined = torch.cat((input, hidden), 1)
        gates = self.gate(combined)
        i_gate, f_gate, g_gate, o_gate = gates.chunk(4, dim=1)
        f_gate = torch.sigmoid(f_gate)
        i_gate = torch.sigmoid(i_gate)
        g_gate = torch.tanh(g_gate)
        o_gate = torch.sigmoid(o_gate)
        cell = torch.add(torch.mul(f_gate, cell), torch.mul(i_gate, g_gate))
        hidden = torch.mul(o_gate, torch.tanh(cell))
        hidden = self.dropout(hidden)
        return hidden, cell


class LSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.ModuleList(
            [
                (
                    LSTMCell(
                        embedding_dim, hidden_dim, 0 if layer == n_layers - 1 else 0.2
                    )
                    if layer == 0
                    else LSTMCell(
                        hidden_dim, hidden_dim, 0 if layer == n_layers - 1 else 0.2
                    )
                )
                for layer in range(n_layers)
            ]
        )
        self.dropout = nn.Dropout(p=0.3)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, input):
        x = self.embedding(input)
        _, seq_size, _ = x.size()
        hidden = [None for _ in range(len(self.rnn))]
        for t in range(seq_size):
            input_t = x[:, t, :]
            for layer_idx in range(len(self.rnn)):
                hidden[layer_idx] = self.rnn[layer_idx](input_t, hidden[layer_idx])
                input_t = hidden[layer_idx][0]
        x = input_t
        x = self.dropout(x)
        x = self.fc(x)
        x = x.squeeze()
        x = torch.sigmoid(x)
        return x


class GRUCell(nn.Module):
    def __init__(self, input_size, hidden_size, dropout):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.gate = nn.Linear(input_size + hidden_size, 2 * hidden_size, bias=True)
        self.gate_i = nn.Linear(input_size, hidden_size, bias=True)
        self.gate_h = nn.Linear(hidden_size, hidden_size, bias=True)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden=None):
        batch_size, _ = input.size()
        if hidden is None:
            hidden = torch.zeros(batch_size, self.hidden_size, device=input.get_device())
        combined = torch.cat((input, hidden), 1)

        gates = self.gate(combined)
        r_gate, z_gate = gates.chunk(2, dim=1)
        r_gate = torch.sigmoid(r_gate)
        z_gate = torch.sigmoid(z_gate)
        i_gate = self.gate_i(input)
        h_gate = self.gate_h(hidden)
        n_gate = torch.add(i_gate, torch.mul(r_gate, h_gate))
        n_gate = torch.tanh(n_gate)
        hidden = torch.add(
            torch.mul(torch.sub(1, z_gate), n_gate), torch.mul(z_gate, hidden)
        )
        hidden = self.dropout(hidden)
        return hidden


class GRU(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.ModuleList(
            [
                (
                    GRUCell(embedding_dim, hidden_dim, 0 if layer == n_layers - 1 else 0.2)
                    if layer == 0
                    else GRUCell(hidden_dim, hidden_dim, 0 if layer == n_layers - 1 else 0.2)
                )
                for layer in range(n_layers)
            ]
        )
        self.dropout = nn.Dropout(p=0.3)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, input):
        x = self.embedding(input)
        _, seq_size, _ = x.size()
        hidden = [None for _ in range(len(self.rnn))]
        for t in range(seq_size):
            input_t = x[:, t, :]
            for layer_idx in range(len(self.rnn)):
                hidden[layer_idx] = self.rnn[layer_idx](input_t, hidden[layer_idx])
                input_t = hidden[layer_idx]
        x = input_t
        x = self.dropout(x)
        x = self.fc(x)
        x = x.squeeze()
        x = torch.sigmoid(x)
        return x


class RNNCell(nn.Module):
    def __init__(self, input_size, hidden_size, dropout):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.gate = nn.Linear(input_size + hidden_size, hidden_size, bias=True)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden=None):
        batch_size, _ = input.size()
        if hidden is None:
            hidden = torch.zeros(batch_size, self.hidden_size, device=input.get_device())
        combined = torch.cat((input, hidden), 1)
        h_gate = self.gate(combined)
        h_gate = torch.tanh(h_gate)
        h_gate = self.dropout(h_gate)
        return h_gate


class RNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.ModuleList(
            [
                (
                    RNNCell(embedding_dim, hidden_dim, 0 if layer == n_layers - 1 else 0.2)
                    if layer == 0
                    else RNNCell(hidden_dim, hidden_dim, 0 if layer == n_layers - 1 else 0.2)
                )
                for layer in range(n_layers)
            ]
        )
        self.dropout = nn.Dropout(p=0.3)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, input):
        x = self.embedding(input)
        _, seq_size, _ = x.size()
        hidden = [None for _ in range(len(self.rnn))]
        for t in range(seq_size):
            input_t = x[:, t, :]
            for layer_idx in range(len(self.rnn)):
                hidden[layer_idx] = self.rnn[layer_idx](input_t, hidden[layer_idx])
                input_t = hidden[layer_idx]
        x = input_t
        x = self.dropout(x)
        x = self.fc(x)
        x = x.squeeze()
        x = torch.sigmoid(x)
        return x

In [6]:
class Model:
    def __init__(self, model, optimizer, criterion, lr_scheduler):
        self.model = model
        self.optimizer = optimizer
        self.criterion = criterion
        self.lr_scheduler = lr_scheduler
        self.train_losses = []
        self.train_accs = []
        self.val_losses = []
        self.val_accs = []
        self.best_valid = float(0)
        if isinstance(self.model, LSTM):
            self.name = "LSTM"
        elif isinstance(self.model, GRU):
            self.name = "GRU"
        elif isinstance(self.model, RNN):
            self.name = "RNN"
        self.lr = []

    def optim_to_str(self):
        if isinstance(self.optimizer, torch.optim.SGD):
            if self.optimizer.param_groups[0]["nesterov"]:
                return "NAG"
            else:
                return "SGD"
        # if isinstance(self.optimizer, torch.optim.Adadelta):
        #     return "Adadelta"
        if isinstance(self.optimizer, torch.optim.NAdam):
            return "NAdam"
        if isinstance(self.optimizer, torch.optim.RAdam):
            return "RAdam"
        if isinstance(self.optimizer, torch.optim.Adamax):
            return "Adamax"
        if isinstance(self.optimizer, torch.optim.AdamW):
            return "AdamW"

    def __str__(self):
        
        return self.name + "_with_" + self.optim_to_str()

    def train_one_epoch(self, trainloader):
        self.model.train()
        print(f"Training {self}")
        train_running_loss = 0.0
        train_running_correct = 0
        counter = 0
        clip = 5
        for i, data in tqdm(enumerate(trainloader), total=len(trainloader)):
            counter += 1
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            self.optimizer.zero_grad()
            outputs = self.model(inputs)
            loss = self.criterion(outputs, labels)
            train_running_loss += loss.item()
            preds = torch.round(outputs.data)
            train_running_correct += (preds == labels).sum().item()
            loss.backward()
            nn.utils.clip_grad_norm_(self.model.parameters(), clip)
            self.optimizer.step()
        self.train_losses.append(train_running_loss / counter)
        self.train_accs.append(100.0 * (train_running_correct / len(trainloader.dataset)))

    def validate(self, testloader):
        self.model.eval()
        print(f"Validation {self}")
        valid_running_loss = 0.0
        valid_running_correct = 0
        counter = 0
        with torch.no_grad():
            for i, data in tqdm(enumerate(testloader), total=len(testloader)):
                counter += 1
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                valid_running_loss += loss.item()
                preds = torch.round(outputs.data)
                valid_running_correct += (preds == labels).sum().item()
        self.val_losses.append(valid_running_loss / counter)
        self.val_accs.append(
            100.0 * (valid_running_correct / len(testloader.dataset))
        )
    def step(self,epoch, train_loader, val_loader):
        self.train_one_epoch(train_loader)
        self.validate(val_loader)
        if isinstance(self.lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            self.lr_scheduler.step(self.val_accs[-1])
        else:
            self.lr_scheduler.step()
        print(f"Training loss: {self.train_losses[-1]:.3f}, training acc: {self.train_accs[-1]:.3f}")
        print(f"Validation loss: {self.val_losses[-1]:.3f}, validation acc: {self.val_accs[-1]:.3f}")
        print(f'Lr: {self.lr_scheduler.get_last_lr()[0]}')
        self.lr.append(self.lr_scheduler.get_last_lr()[0])
        self.save_best(epoch)
        print('-'*50)
        
        

    def save_best(self, epoch):
        if self.val_accs[-1] > self.best_valid:
            self.best_valid = self.val_accs[-1]
            print(f"\nBEST VALIDATION: {self.best_valid}")
            print(f"\nSAVING BEST MODEL FOR EPOCH: {epoch+1}\n")
            torch.save(
                {
                    "epoch": epoch + 1,
                    "model_state_dict": self.model.state_dict(),
                },
                f"best_{self}.pth",
            )
    def save_model(self, epochs):
        torch.save({
                    'epoch': epochs,
                    'model_state_dict': self.model.state_dict(),
                    'optimizer_state_dict': self.optimizer.state_dict(),
                    'loss': self.criterion,
                    }, f"{self}.pth")
    def save_train_data(self, epochs):
        data = {'name': str(self),'epochs': epochs, 'train_losses': self.train_losses,
                'train_accs': self.train_accs, 'val_losses': self.val_losses,
                'val_accs': self.val_accs, 'best_valid': self.best_valid, 'lr': self.lr}
        with open(f'{self}_dat.pkl', 'wb') as f:
            pickle.dump(data, f)

In [None]:
vocab_size = len(tokenizer)
embedding_dim = 256
hidden_dim = 128
output_dim = 1
n_layers = 2
models = []
models_classes = [LSTM, GRU, RNN]
opt_classes = [
    torch.optim.SGD,
    torch.optim.Adamax,
    torch.optim.NAdam,
    torch.optim.RAdam,
]
weight_decay = 1e-4
for model_cl in models_classes:
    for opt_cl in opt_classes:

        model = model_cl(
            vocab_size=vocab_size,
            embedding_dim=embedding_dim,
            hidden_dim=hidden_dim,
            output_dim=output_dim,
            n_layers=n_layers,
        )
        match opt_cl:
            case torch.optim.SGD:
                optimizer = torch.optim.SGD(
                    model.parameters(),
                    weight_decay=weight_decay,
                    momentum=0.9,
                    nesterov=True,
                )
            case torch.optim.Adamax:
                optimizer = torch.optim.Adamax(
                    model.parameters(), weight_decay=weight_decay
                )
            case torch.optim.NAdam:
                optimizer = torch.optim.NAdam(
                    model.parameters(),
                    decoupled_weight_decay=True,
                    weight_decay=weight_decay,
                )
            case torch.optim.RAdam:
                optimizer = torch.optim.RAdam(
                    model.parameters(),
                    decoupled_weight_decay=True,
                    weight_decay=weight_decay,
                )
        criterion = torch.nn.BCELoss()
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer, T_0=5, eta_min=1e-5
        )
        models.append(Model(model, optimizer, criterion, lr_scheduler))

In [8]:
def print_model_size(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2
    print('model size: {:.3f}MB'.format(size_all_mb))

In [9]:
[print_model_size(model.model) for model in models[::4]]

model size: 31.061MB
model size: 30.749MB
model size: 30.121MB


[None, None, None]

In [10]:
epochs = 20
for model in models[4:]:
    print(f"[INFO]: {model}")
    model.model.to(device)
    for epoch in range(0, epochs):
        print(f"[INFO]: Epoch {epoch+1} of {epochs}")
        model.step(epoch, train_dataloader, val_dataloader)
    model.save_model(epoch)
    model.save_train_data(epoch)
    del model.model
    torch.cuda.empty_cache()

[INFO]: GRU_with_NAG
[INFO]: Epoch 1 of 20
Training GRU_with_NAG


100%|██████████| 39/39 [02:12<00:00,  3.39s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:31<00:00,  3.16s/it]


Training loss: 0.695, training acc: 49.628
Validation loss: 0.693, validation acc: 50.910
Lr: 0.000905463412215599

BEST VALIDATION: 50.91

SAVING BEST MODEL FOR EPOCH: 1

--------------------------------------------------
[INFO]: Epoch 2 of 20
Training GRU_with_NAG


100%|██████████| 39/39 [02:11<00:00,  3.37s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:31<00:00,  3.18s/it]


Training loss: 0.694, training acc: 50.197
Validation loss: 0.693, validation acc: 50.960
Lr: 0.000657963412215599

BEST VALIDATION: 50.96000000000001

SAVING BEST MODEL FOR EPOCH: 2

--------------------------------------------------
[INFO]: Epoch 3 of 20
Training GRU_with_NAG


100%|██████████| 39/39 [02:11<00:00,  3.38s/it]

Validation GRU_with_NAG



100%|██████████| 10/10 [00:31<00:00,  3.19s/it]


Training loss: 0.694, training acc: 50.208
Validation loss: 0.693, validation acc: 51.080
Lr: 0.0003520365877844011

BEST VALIDATION: 51.080000000000005

SAVING BEST MODEL FOR EPOCH: 3

--------------------------------------------------
[INFO]: Epoch 4 of 20
Training GRU_with_NAG


100%|██████████| 39/39 [02:13<00:00,  3.42s/it]

Validation GRU_with_NAG



100%|██████████| 10/10 [00:33<00:00,  3.32s/it]


Training loss: 0.694, training acc: 49.995
Validation loss: 0.693, validation acc: 51.090
Lr: 0.00010453658778440107

BEST VALIDATION: 51.09

SAVING BEST MODEL FOR EPOCH: 4

--------------------------------------------------
[INFO]: Epoch 5 of 20
Training GRU_with_NAG


100%|██████████| 39/39 [02:15<00:00,  3.47s/it]

Validation GRU_with_NAG



100%|██████████| 10/10 [00:32<00:00,  3.28s/it]


Training loss: 0.694, training acc: 50.385
Validation loss: 0.693, validation acc: 51.110
Lr: 0.001

BEST VALIDATION: 51.11

SAVING BEST MODEL FOR EPOCH: 5

--------------------------------------------------
[INFO]: Epoch 6 of 20
Training GRU_with_NAG


100%|██████████| 39/39 [02:12<00:00,  3.41s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:32<00:00,  3.27s/it]


Training loss: 0.694, training acc: 50.248
Validation loss: 0.693, validation acc: 51.170
Lr: 0.000905463412215599

BEST VALIDATION: 51.17

SAVING BEST MODEL FOR EPOCH: 6

--------------------------------------------------
[INFO]: Epoch 7 of 20
Training GRU_with_NAG


100%|██████████| 39/39 [02:11<00:00,  3.38s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:32<00:00,  3.24s/it]

Training loss: 0.694, training acc: 50.410
Validation loss: 0.693, validation acc: 51.170
Lr: 0.000657963412215599
--------------------------------------------------
[INFO]: Epoch 8 of 20
Training GRU_with_NAG



100%|██████████| 39/39 [02:11<00:00,  3.37s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:32<00:00,  3.24s/it]

Training loss: 0.694, training acc: 50.165
Validation loss: 0.693, validation acc: 51.130
Lr: 0.0003520365877844011
--------------------------------------------------
[INFO]: Epoch 9 of 20
Training GRU_with_NAG



100%|██████████| 39/39 [02:13<00:00,  3.43s/it]

Validation GRU_with_NAG



100%|██████████| 10/10 [00:33<00:00,  3.37s/it]

Training loss: 0.694, training acc: 50.223
Validation loss: 0.693, validation acc: 51.140
Lr: 0.00010453658778440107
--------------------------------------------------
[INFO]: Epoch 10 of 20
Training GRU_with_NAG



100%|██████████| 39/39 [02:12<00:00,  3.39s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:31<00:00,  3.14s/it]

Training loss: 0.693, training acc: 50.820
Validation loss: 0.693, validation acc: 51.120
Lr: 0.001
--------------------------------------------------
[INFO]: Epoch 11 of 20
Training GRU_with_NAG



100%|██████████| 39/39 [02:12<00:00,  3.39s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:31<00:00,  3.11s/it]


Training loss: 0.694, training acc: 49.902
Validation loss: 0.693, validation acc: 51.230
Lr: 0.000905463412215599

BEST VALIDATION: 51.23

SAVING BEST MODEL FOR EPOCH: 11

--------------------------------------------------
[INFO]: Epoch 12 of 20
Training GRU_with_NAG


100%|██████████| 39/39 [02:12<00:00,  3.41s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:31<00:00,  3.11s/it]


Training loss: 0.694, training acc: 50.405
Validation loss: 0.693, validation acc: 51.240
Lr: 0.000657963412215599

BEST VALIDATION: 51.239999999999995

SAVING BEST MODEL FOR EPOCH: 12

--------------------------------------------------
[INFO]: Epoch 13 of 20
Training GRU_with_NAG


100%|██████████| 39/39 [02:11<00:00,  3.36s/it]

Validation GRU_with_NAG



100%|██████████| 10/10 [00:33<00:00,  3.30s/it]

Training loss: 0.694, training acc: 50.423
Validation loss: 0.693, validation acc: 51.220
Lr: 0.0003520365877844011
--------------------------------------------------
[INFO]: Epoch 14 of 20
Training GRU_with_NAG



100%|██████████| 39/39 [02:13<00:00,  3.42s/it]

Validation GRU_with_NAG



100%|██████████| 10/10 [00:33<00:00,  3.33s/it]

Training loss: 0.694, training acc: 50.335
Validation loss: 0.693, validation acc: 51.210
Lr: 0.00010453658778440107
--------------------------------------------------
[INFO]: Epoch 15 of 20
Training GRU_with_NAG



100%|██████████| 39/39 [02:12<00:00,  3.40s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:33<00:00,  3.37s/it]

Training loss: 0.694, training acc: 50.455
Validation loss: 0.693, validation acc: 51.240
Lr: 0.001
--------------------------------------------------
[INFO]: Epoch 16 of 20
Training GRU_with_NAG



100%|██████████| 39/39 [02:12<00:00,  3.39s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:32<00:00,  3.25s/it]


Training loss: 0.694, training acc: 50.285
Validation loss: 0.693, validation acc: 51.320
Lr: 0.000905463412215599

BEST VALIDATION: 51.32

SAVING BEST MODEL FOR EPOCH: 16

--------------------------------------------------
[INFO]: Epoch 17 of 20
Training GRU_with_NAG


100%|██████████| 39/39 [02:10<00:00,  3.36s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:32<00:00,  3.22s/it]

Training loss: 0.694, training acc: 50.000
Validation loss: 0.693, validation acc: 51.250
Lr: 0.000657963412215599
--------------------------------------------------
[INFO]: Epoch 18 of 20
Training GRU_with_NAG



100%|██████████| 39/39 [02:13<00:00,  3.43s/it]

Validation GRU_with_NAG



100%|██████████| 10/10 [00:35<00:00,  3.57s/it]

Training loss: 0.694, training acc: 50.113
Validation loss: 0.693, validation acc: 51.240
Lr: 0.0003520365877844011
--------------------------------------------------
[INFO]: Epoch 19 of 20
Training GRU_with_NAG



100%|██████████| 39/39 [02:14<00:00,  3.44s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:34<00:00,  3.44s/it]

Training loss: 0.694, training acc: 50.055
Validation loss: 0.693, validation acc: 51.240
Lr: 0.00010453658778440107
--------------------------------------------------
[INFO]: Epoch 20 of 20
Training GRU_with_NAG



100%|██████████| 39/39 [02:12<00:00,  3.39s/it]


Validation GRU_with_NAG


100%|██████████| 10/10 [00:32<00:00,  3.22s/it]


Training loss: 0.694, training acc: 50.043
Validation loss: 0.693, validation acc: 51.230
Lr: 0.001
--------------------------------------------------
[INFO]: GRU_with_Adamax
[INFO]: Epoch 1 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:12<00:00,  3.40s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:31<00:00,  3.20s/it]


Training loss: 0.696, training acc: 50.413
Validation loss: 0.692, validation acc: 51.740
Lr: 0.0018099719094030727

BEST VALIDATION: 51.739999999999995

SAVING BEST MODEL FOR EPOCH: 1

--------------------------------------------------
[INFO]: Epoch 2 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:12<00:00,  3.39s/it]

Validation GRU_with_Adamax



100%|██████████| 10/10 [00:34<00:00,  3.46s/it]

Training loss: 0.693, training acc: 51.215
Validation loss: 0.692, validation acc: 50.890
Lr: 0.0013124719094030728
--------------------------------------------------
[INFO]: Epoch 3 of 20
Training GRU_with_Adamax



100%|██████████| 39/39 [02:14<00:00,  3.46s/it]

Validation GRU_with_Adamax



100%|██████████| 10/10 [00:32<00:00,  3.22s/it]


Training loss: 0.692, training acc: 52.072
Validation loss: 0.691, validation acc: 52.400
Lr: 0.0006975280905969274

BEST VALIDATION: 52.400000000000006

SAVING BEST MODEL FOR EPOCH: 3

--------------------------------------------------
[INFO]: Epoch 4 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:12<00:00,  3.39s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:32<00:00,  3.26s/it]


Training loss: 0.690, training acc: 52.543
Validation loss: 0.690, validation acc: 52.480
Lr: 0.00020002809059692738

BEST VALIDATION: 52.480000000000004

SAVING BEST MODEL FOR EPOCH: 4

--------------------------------------------------
[INFO]: Epoch 5 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:13<00:00,  3.42s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:32<00:00,  3.26s/it]


Training loss: 0.689, training acc: 52.610
Validation loss: 0.690, validation acc: 52.900
Lr: 0.002

BEST VALIDATION: 52.900000000000006

SAVING BEST MODEL FOR EPOCH: 5

--------------------------------------------------
[INFO]: Epoch 6 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:12<00:00,  3.41s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:32<00:00,  3.28s/it]

Training loss: 0.687, training acc: 53.580
Validation loss: 0.685, validation acc: 52.760
Lr: 0.0018099719094030727
--------------------------------------------------
[INFO]: Epoch 7 of 20
Training GRU_with_Adamax



100%|██████████| 39/39 [02:14<00:00,  3.44s/it]

Validation GRU_with_Adamax



100%|██████████| 10/10 [00:33<00:00,  3.35s/it]


Training loss: 0.676, training acc: 55.685
Validation loss: 0.659, validation acc: 57.720
Lr: 0.0013124719094030728

BEST VALIDATION: 57.720000000000006

SAVING BEST MODEL FOR EPOCH: 7

--------------------------------------------------
[INFO]: Epoch 8 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:14<00:00,  3.44s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:32<00:00,  3.22s/it]


Training loss: 0.667, training acc: 58.050
Validation loss: 0.642, validation acc: 67.490
Lr: 0.0006975280905969274

BEST VALIDATION: 67.49000000000001

SAVING BEST MODEL FOR EPOCH: 8

--------------------------------------------------
[INFO]: Epoch 9 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:14<00:00,  3.45s/it]

Validation GRU_with_Adamax



100%|██████████| 10/10 [00:31<00:00,  3.16s/it]


Training loss: 0.624, training acc: 67.455
Validation loss: 0.589, validation acc: 71.170
Lr: 0.00020002809059692738

BEST VALIDATION: 71.17

SAVING BEST MODEL FOR EPOCH: 9

--------------------------------------------------
[INFO]: Epoch 10 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:11<00:00,  3.37s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:31<00:00,  3.15s/it]


Training loss: 0.598, training acc: 69.875
Validation loss: 0.581, validation acc: 71.720
Lr: 0.002

BEST VALIDATION: 71.72

SAVING BEST MODEL FOR EPOCH: 10

--------------------------------------------------
[INFO]: Epoch 11 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:10<00:00,  3.34s/it]

Validation GRU_with_Adamax



100%|██████████| 10/10 [00:34<00:00,  3.44s/it]


Training loss: 0.590, training acc: 70.345
Validation loss: 0.560, validation acc: 72.380
Lr: 0.0018099719094030727

BEST VALIDATION: 72.38

SAVING BEST MODEL FOR EPOCH: 11

--------------------------------------------------
[INFO]: Epoch 12 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:16<00:00,  3.50s/it]

Validation GRU_with_Adamax



100%|██████████| 10/10 [00:32<00:00,  3.27s/it]


Training loss: 0.561, training acc: 72.588
Validation loss: 0.527, validation acc: 75.230
Lr: 0.0013124719094030728

BEST VALIDATION: 75.22999999999999

SAVING BEST MODEL FOR EPOCH: 12

--------------------------------------------------
[INFO]: Epoch 13 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:12<00:00,  3.39s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:31<00:00,  3.19s/it]


Training loss: 0.534, training acc: 74.623
Validation loss: 0.501, validation acc: 76.620
Lr: 0.0006975280905969274

BEST VALIDATION: 76.62

SAVING BEST MODEL FOR EPOCH: 13

--------------------------------------------------
[INFO]: Epoch 14 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:14<00:00,  3.44s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:31<00:00,  3.12s/it]


Training loss: 0.515, training acc: 75.960
Validation loss: 0.493, validation acc: 77.490
Lr: 0.00020002809059692738

BEST VALIDATION: 77.49000000000001

SAVING BEST MODEL FOR EPOCH: 14

--------------------------------------------------
[INFO]: Epoch 15 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:11<00:00,  3.38s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:32<00:00,  3.24s/it]


Training loss: 0.506, training acc: 76.380
Validation loss: 0.480, validation acc: 77.830
Lr: 0.002

BEST VALIDATION: 77.83

SAVING BEST MODEL FOR EPOCH: 15

--------------------------------------------------
[INFO]: Epoch 16 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:13<00:00,  3.43s/it]

Validation GRU_with_Adamax



100%|██████████| 10/10 [00:33<00:00,  3.36s/it]


Training loss: 0.515, training acc: 75.595
Validation loss: 0.467, validation acc: 78.860
Lr: 0.0018099719094030727

BEST VALIDATION: 78.86

SAVING BEST MODEL FOR EPOCH: 16

--------------------------------------------------
[INFO]: Epoch 17 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:14<00:00,  3.44s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:31<00:00,  3.16s/it]


Training loss: 0.484, training acc: 77.567
Validation loss: 0.508, validation acc: 76.110
Lr: 0.0013124719094030728
--------------------------------------------------
[INFO]: Epoch 18 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:13<00:00,  3.43s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:31<00:00,  3.16s/it]


Training loss: 0.464, training acc: 78.830
Validation loss: 0.425, validation acc: 80.720
Lr: 0.0006975280905969274

BEST VALIDATION: 80.72

SAVING BEST MODEL FOR EPOCH: 18

--------------------------------------------------
[INFO]: Epoch 19 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:12<00:00,  3.39s/it]


Validation GRU_with_Adamax


100%|██████████| 10/10 [00:32<00:00,  3.20s/it]


Training loss: 0.440, training acc: 80.015
Validation loss: 0.421, validation acc: 81.240
Lr: 0.00020002809059692738

BEST VALIDATION: 81.24

SAVING BEST MODEL FOR EPOCH: 19

--------------------------------------------------
[INFO]: Epoch 20 of 20
Training GRU_with_Adamax


100%|██████████| 39/39 [02:13<00:00,  3.43s/it]

Validation GRU_with_Adamax



100%|██████████| 10/10 [00:34<00:00,  3.49s/it]


Training loss: 0.432, training acc: 80.225
Validation loss: 0.414, validation acc: 81.400
Lr: 0.002

BEST VALIDATION: 81.39999999999999

SAVING BEST MODEL FOR EPOCH: 20

--------------------------------------------------
[INFO]: GRU_with_NAdam
[INFO]: Epoch 1 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:14<00:00,  3.46s/it]

Validation GRU_with_NAdam



100%|██████████| 10/10 [00:32<00:00,  3.22s/it]


Training loss: 0.698, training acc: 50.397
Validation loss: 0.695, validation acc: 50.410
Lr: 0.0018099719094030727

BEST VALIDATION: 50.41

SAVING BEST MODEL FOR EPOCH: 1

--------------------------------------------------
[INFO]: Epoch 2 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:13<00:00,  3.42s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:30<00:00,  3.08s/it]


Training loss: 0.692, training acc: 52.188
Validation loss: 0.686, validation acc: 53.390
Lr: 0.0013124719094030728

BEST VALIDATION: 53.39

SAVING BEST MODEL FOR EPOCH: 2

--------------------------------------------------
[INFO]: Epoch 3 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:14<00:00,  3.44s/it]

Validation GRU_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.18s/it]


Training loss: 0.679, training acc: 55.100
Validation loss: 0.653, validation acc: 66.540
Lr: 0.0006975280905969274

BEST VALIDATION: 66.53999999999999

SAVING BEST MODEL FOR EPOCH: 3

--------------------------------------------------
[INFO]: Epoch 4 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:12<00:00,  3.39s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:34<00:00,  3.45s/it]


Training loss: 0.601, training acc: 69.532
Validation loss: 0.519, validation acc: 76.710
Lr: 0.00020002809059692738

BEST VALIDATION: 76.71

SAVING BEST MODEL FOR EPOCH: 4

--------------------------------------------------
[INFO]: Epoch 5 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:14<00:00,  3.44s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:33<00:00,  3.35s/it]

Training loss: 0.522, training acc: 76.263
Validation loss: 0.514, validation acc: 76.440
Lr: 0.002
--------------------------------------------------
[INFO]: Epoch 6 of 20
Training GRU_with_NAdam



100%|██████████| 39/39 [02:16<00:00,  3.50s/it]

Validation GRU_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.17s/it]


Training loss: 0.542, training acc: 74.770
Validation loss: 0.477, validation acc: 79.500
Lr: 0.0018099719094030727

BEST VALIDATION: 79.5

SAVING BEST MODEL FOR EPOCH: 6

--------------------------------------------------
[INFO]: Epoch 7 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:12<00:00,  3.40s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:32<00:00,  3.26s/it]

Training loss: 0.493, training acc: 77.512
Validation loss: 0.527, validation acc: 75.570
Lr: 0.0013124719094030728
--------------------------------------------------
[INFO]: Epoch 8 of 20
Training GRU_with_NAdam



100%|██████████| 39/39 [02:14<00:00,  3.44s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:31<00:00,  3.10s/it]

Training loss: 0.435, training acc: 81.278
Validation loss: 0.558, validation acc: 75.370
Lr: 0.0006975280905969274
--------------------------------------------------
[INFO]: Epoch 9 of 20
Training GRU_with_NAdam



100%|██████████| 39/39 [02:14<00:00,  3.44s/it]

Validation GRU_with_NAdam



100%|██████████| 10/10 [00:33<00:00,  3.31s/it]


Training loss: 0.398, training acc: 82.690
Validation loss: 0.399, validation acc: 82.910
Lr: 0.00020002809059692738

BEST VALIDATION: 82.91

SAVING BEST MODEL FOR EPOCH: 9

--------------------------------------------------
[INFO]: Epoch 10 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:14<00:00,  3.45s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:32<00:00,  3.22s/it]


Training loss: 0.324, training acc: 86.490
Validation loss: 0.363, validation acc: 84.590
Lr: 0.002

BEST VALIDATION: 84.59

SAVING BEST MODEL FOR EPOCH: 10

--------------------------------------------------
[INFO]: Epoch 11 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:14<00:00,  3.45s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:30<00:00,  3.08s/it]

Training loss: 0.459, training acc: 79.525
Validation loss: 0.414, validation acc: 82.030
Lr: 0.0018099719094030727
--------------------------------------------------
[INFO]: Epoch 12 of 20
Training GRU_with_NAdam



100%|██████████| 39/39 [02:11<00:00,  3.37s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:31<00:00,  3.11s/it]

Training loss: 0.357, training acc: 84.898
Validation loss: 0.407, validation acc: 82.160
Lr: 0.0013124719094030728
--------------------------------------------------
[INFO]: Epoch 13 of 20
Training GRU_with_NAdam



100%|██████████| 39/39 [02:13<00:00,  3.41s/it]

Validation GRU_with_NAdam



100%|██████████| 10/10 [00:36<00:00,  3.61s/it]


Training loss: 0.269, training acc: 89.112
Validation loss: 0.328, validation acc: 86.370
Lr: 0.0006975280905969274

BEST VALIDATION: 86.37

SAVING BEST MODEL FOR EPOCH: 13

--------------------------------------------------
[INFO]: Epoch 14 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:17<00:00,  3.52s/it]

Validation GRU_with_NAdam



100%|██████████| 10/10 [00:34<00:00,  3.47s/it]


Training loss: 0.224, training acc: 91.150
Validation loss: 0.315, validation acc: 86.870
Lr: 0.00020002809059692738

BEST VALIDATION: 86.87

SAVING BEST MODEL FOR EPOCH: 14

--------------------------------------------------
[INFO]: Epoch 15 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:11<00:00,  3.37s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:31<00:00,  3.20s/it]


Training loss: 0.208, training acc: 91.998
Validation loss: 0.319, validation acc: 87.080
Lr: 0.002

BEST VALIDATION: 87.08

SAVING BEST MODEL FOR EPOCH: 15

--------------------------------------------------
[INFO]: Epoch 16 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:14<00:00,  3.44s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:34<00:00,  3.43s/it]

Training loss: 0.294, training acc: 87.930
Validation loss: 0.319, validation acc: 86.910
Lr: 0.0018099719094030727
--------------------------------------------------
[INFO]: Epoch 17 of 20
Training GRU_with_NAdam



100%|██████████| 39/39 [02:15<00:00,  3.47s/it]

Validation GRU_with_NAdam



100%|██████████| 10/10 [00:32<00:00,  3.28s/it]

Training loss: 0.218, training acc: 91.270
Validation loss: 0.326, validation acc: 86.730
Lr: 0.0013124719094030728
--------------------------------------------------
[INFO]: Epoch 18 of 20
Training GRU_with_NAdam



100%|██████████| 39/39 [02:14<00:00,  3.46s/it]

Validation GRU_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.17s/it]


Training loss: 0.168, training acc: 93.695
Validation loss: 0.326, validation acc: 87.670
Lr: 0.0006975280905969274

BEST VALIDATION: 87.67

SAVING BEST MODEL FOR EPOCH: 18

--------------------------------------------------
[INFO]: Epoch 19 of 20
Training GRU_with_NAdam


100%|██████████| 39/39 [02:13<00:00,  3.41s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:31<00:00,  3.16s/it]

Training loss: 0.145, training acc: 94.688
Validation loss: 0.343, validation acc: 87.650
Lr: 0.00020002809059692738
--------------------------------------------------
[INFO]: Epoch 20 of 20
Training GRU_with_NAdam



100%|██████████| 39/39 [02:13<00:00,  3.41s/it]


Validation GRU_with_NAdam


100%|██████████| 10/10 [00:33<00:00,  3.32s/it]


Training loss: 0.134, training acc: 95.237
Validation loss: 0.347, validation acc: 87.570
Lr: 0.002
--------------------------------------------------
[INFO]: GRU_with_RAdam
[INFO]: Epoch 1 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:11<00:00,  3.38s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.29s/it]


Training loss: 0.696, training acc: 49.807
Validation loss: 0.693, validation acc: 50.580
Lr: 0.000905463412215599

BEST VALIDATION: 50.580000000000005

SAVING BEST MODEL FOR EPOCH: 1

--------------------------------------------------
[INFO]: Epoch 2 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:13<00:00,  3.42s/it]

Validation GRU_with_RAdam



100%|██████████| 10/10 [00:36<00:00,  3.63s/it]

Training loss: 0.694, training acc: 50.525
Validation loss: 0.693, validation acc: 49.880
Lr: 0.000657963412215599
--------------------------------------------------
[INFO]: Epoch 3 of 20
Training GRU_with_RAdam



100%|██████████| 39/39 [02:17<00:00,  3.53s/it]

Validation GRU_with_RAdam



100%|██████████| 10/10 [00:32<00:00,  3.21s/it]


Training loss: 0.693, training acc: 51.213
Validation loss: 0.693, validation acc: 51.790
Lr: 0.0003520365877844011

BEST VALIDATION: 51.790000000000006

SAVING BEST MODEL FOR EPOCH: 3

--------------------------------------------------
[INFO]: Epoch 4 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:13<00:00,  3.41s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.21s/it]

Training loss: 0.692, training acc: 51.462
Validation loss: 0.692, validation acc: 51.740
Lr: 0.00010453658778440107
--------------------------------------------------
[INFO]: Epoch 5 of 20
Training GRU_with_RAdam



100%|██████████| 39/39 [02:13<00:00,  3.42s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.26s/it]

Training loss: 0.692, training acc: 51.543
Validation loss: 0.692, validation acc: 50.690
Lr: 0.001
--------------------------------------------------
[INFO]: Epoch 6 of 20
Training GRU_with_RAdam



100%|██████████| 39/39 [02:12<00:00,  3.39s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:33<00:00,  3.34s/it]


Training loss: 0.693, training acc: 51.905
Validation loss: 0.695, validation acc: 51.300
Lr: 0.000905463412215599
--------------------------------------------------
[INFO]: Epoch 7 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:14<00:00,  3.44s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:31<00:00,  3.15s/it]


Training loss: 0.692, training acc: 52.223
Validation loss: 0.691, validation acc: 52.790
Lr: 0.000657963412215599

BEST VALIDATION: 52.790000000000006

SAVING BEST MODEL FOR EPOCH: 7

--------------------------------------------------
[INFO]: Epoch 8 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:14<00:00,  3.44s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.23s/it]

Training loss: 0.689, training acc: 53.197
Validation loss: 0.689, validation acc: 51.810
Lr: 0.0003520365877844011
--------------------------------------------------
[INFO]: Epoch 9 of 20
Training GRU_with_RAdam



100%|██████████| 39/39 [02:12<00:00,  3.40s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:30<00:00,  3.10s/it]


Training loss: 0.683, training acc: 54.238
Validation loss: 0.685, validation acc: 53.850
Lr: 0.00010453658778440107

BEST VALIDATION: 53.849999999999994

SAVING BEST MODEL FOR EPOCH: 9

--------------------------------------------------
[INFO]: Epoch 10 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:13<00:00,  3.42s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.22s/it]

Training loss: 0.678, training acc: 54.850
Validation loss: 0.684, validation acc: 53.130
Lr: 0.001
--------------------------------------------------
[INFO]: Epoch 11 of 20
Training GRU_with_RAdam



100%|██████████| 39/39 [02:14<00:00,  3.45s/it]

Validation GRU_with_RAdam



100%|██████████| 10/10 [00:33<00:00,  3.39s/it]

Training loss: 0.678, training acc: 55.380
Validation loss: 0.697, validation acc: 51.010
Lr: 0.000905463412215599
--------------------------------------------------
[INFO]: Epoch 12 of 20
Training GRU_with_RAdam



100%|██████████| 39/39 [02:12<00:00,  3.41s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.30s/it]


Training loss: 0.680, training acc: 54.587
Validation loss: 0.680, validation acc: 55.080
Lr: 0.000657963412215599

BEST VALIDATION: 55.08

SAVING BEST MODEL FOR EPOCH: 12

--------------------------------------------------
[INFO]: Epoch 13 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:11<00:00,  3.37s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.20s/it]


Training loss: 0.661, training acc: 57.710
Validation loss: 0.654, validation acc: 59.360
Lr: 0.0003520365877844011

BEST VALIDATION: 59.36

SAVING BEST MODEL FOR EPOCH: 13

--------------------------------------------------
[INFO]: Epoch 14 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:12<00:00,  3.41s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.28s/it]


Training loss: 0.609, training acc: 68.542
Validation loss: 0.635, validation acc: 69.490
Lr: 0.00010453658778440107

BEST VALIDATION: 69.49

SAVING BEST MODEL FOR EPOCH: 14

--------------------------------------------------
[INFO]: Epoch 15 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:12<00:00,  3.40s/it]

Validation GRU_with_RAdam



100%|██████████| 10/10 [00:34<00:00,  3.46s/it]


Training loss: 0.560, training acc: 73.540
Validation loss: 0.549, validation acc: 74.620
Lr: 0.001

BEST VALIDATION: 74.62

SAVING BEST MODEL FOR EPOCH: 15

--------------------------------------------------
[INFO]: Epoch 16 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:15<00:00,  3.48s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.22s/it]


Training loss: 0.544, training acc: 73.880
Validation loss: 0.529, validation acc: 77.510
Lr: 0.000905463412215599

BEST VALIDATION: 77.51

SAVING BEST MODEL FOR EPOCH: 16

--------------------------------------------------
[INFO]: Epoch 17 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:12<00:00,  3.41s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.27s/it]


Training loss: 0.497, training acc: 77.028
Validation loss: 0.453, validation acc: 79.220
Lr: 0.000657963412215599

BEST VALIDATION: 79.22

SAVING BEST MODEL FOR EPOCH: 17

--------------------------------------------------
[INFO]: Epoch 18 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:12<00:00,  3.40s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:31<00:00,  3.16s/it]


Training loss: 0.421, training acc: 81.513
Validation loss: 0.407, validation acc: 81.970
Lr: 0.0003520365877844011

BEST VALIDATION: 81.97

SAVING BEST MODEL FOR EPOCH: 18

--------------------------------------------------
[INFO]: Epoch 19 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:14<00:00,  3.45s/it]


Validation GRU_with_RAdam


100%|██████████| 10/10 [00:31<00:00,  3.15s/it]


Training loss: 0.380, training acc: 83.540
Validation loss: 0.391, validation acc: 82.820
Lr: 0.00010453658778440107

BEST VALIDATION: 82.82000000000001

SAVING BEST MODEL FOR EPOCH: 19

--------------------------------------------------
[INFO]: Epoch 20 of 20
Training GRU_with_RAdam


100%|██████████| 39/39 [02:13<00:00,  3.41s/it]

Validation GRU_with_RAdam



100%|██████████| 10/10 [00:33<00:00,  3.33s/it]


Training loss: 0.363, training acc: 84.547
Validation loss: 0.381, validation acc: 83.360
Lr: 0.001

BEST VALIDATION: 83.36

SAVING BEST MODEL FOR EPOCH: 20

--------------------------------------------------
[INFO]: RNN_with_NAG
[INFO]: Epoch 1 of 20
Training RNN_with_NAG


100%|██████████| 39/39 [01:50<00:00,  2.84s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:29<00:00,  2.97s/it]


Training loss: 0.697, training acc: 49.955
Validation loss: 0.693, validation acc: 50.610
Lr: 0.000905463412215599

BEST VALIDATION: 50.61

SAVING BEST MODEL FOR EPOCH: 1

--------------------------------------------------
[INFO]: Epoch 2 of 20
Training RNN_with_NAG


100%|██████████| 39/39 [01:51<00:00,  2.87s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:30<00:00,  3.09s/it]


Training loss: 0.694, training acc: 50.335
Validation loss: 0.693, validation acc: 51.140
Lr: 0.000657963412215599

BEST VALIDATION: 51.13999999999999

SAVING BEST MODEL FOR EPOCH: 2

--------------------------------------------------
[INFO]: Epoch 3 of 20
Training RNN_with_NAG


100%|██████████| 39/39 [01:51<00:00,  2.87s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:31<00:00,  3.16s/it]


Training loss: 0.694, training acc: 50.320
Validation loss: 0.693, validation acc: 51.470
Lr: 0.0003520365877844011

BEST VALIDATION: 51.470000000000006

SAVING BEST MODEL FOR EPOCH: 3

--------------------------------------------------
[INFO]: Epoch 4 of 20
Training RNN_with_NAG


100%|██████████| 39/39 [01:49<00:00,  2.82s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:30<00:00,  3.02s/it]

Training loss: 0.694, training acc: 50.128
Validation loss: 0.693, validation acc: 51.340
Lr: 0.00010453658778440107
--------------------------------------------------
[INFO]: Epoch 5 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:53<00:00,  2.90s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:31<00:00,  3.17s/it]

Training loss: 0.694, training acc: 50.148
Validation loss: 0.693, validation acc: 51.310
Lr: 0.001
--------------------------------------------------
[INFO]: Epoch 6 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:51<00:00,  2.86s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:30<00:00,  3.02s/it]

Training loss: 0.694, training acc: 50.287
Validation loss: 0.693, validation acc: 51.260
Lr: 0.000905463412215599
--------------------------------------------------
[INFO]: Epoch 7 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:50<00:00,  2.84s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:29<00:00,  2.98s/it]

Training loss: 0.694, training acc: 50.095
Validation loss: 0.693, validation acc: 51.260
Lr: 0.000657963412215599
--------------------------------------------------
[INFO]: Epoch 8 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:53<00:00,  2.90s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:31<00:00,  3.17s/it]

Training loss: 0.694, training acc: 50.362
Validation loss: 0.693, validation acc: 51.330
Lr: 0.0003520365877844011
--------------------------------------------------
[INFO]: Epoch 9 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:50<00:00,  2.84s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:30<00:00,  3.05s/it]

Training loss: 0.694, training acc: 49.913
Validation loss: 0.693, validation acc: 51.330
Lr: 0.00010453658778440107
--------------------------------------------------
[INFO]: Epoch 10 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:50<00:00,  2.85s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:31<00:00,  3.17s/it]

Training loss: 0.694, training acc: 49.992
Validation loss: 0.693, validation acc: 51.340
Lr: 0.001
--------------------------------------------------
[INFO]: Epoch 11 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:52<00:00,  2.89s/it]


Validation RNN_with_NAG


100%|██████████| 10/10 [00:31<00:00,  3.14s/it]

Training loss: 0.694, training acc: 50.003
Validation loss: 0.693, validation acc: 51.250
Lr: 0.000905463412215599
--------------------------------------------------
[INFO]: Epoch 12 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:51<00:00,  2.85s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:30<00:00,  3.04s/it]

Training loss: 0.694, training acc: 50.730
Validation loss: 0.693, validation acc: 51.230
Lr: 0.000657963412215599
--------------------------------------------------
[INFO]: Epoch 13 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:52<00:00,  2.90s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:31<00:00,  3.15s/it]

Training loss: 0.694, training acc: 50.050
Validation loss: 0.693, validation acc: 51.280
Lr: 0.0003520365877844011
--------------------------------------------------
[INFO]: Epoch 14 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:49<00:00,  2.80s/it]


Validation RNN_with_NAG


100%|██████████| 10/10 [00:33<00:00,  3.30s/it]

Training loss: 0.694, training acc: 50.065
Validation loss: 0.693, validation acc: 51.350
Lr: 0.00010453658778440107
--------------------------------------------------
[INFO]: Epoch 15 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:53<00:00,  2.92s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:32<00:00,  3.25s/it]

Training loss: 0.694, training acc: 50.055
Validation loss: 0.693, validation acc: 51.330
Lr: 0.001
--------------------------------------------------
[INFO]: Epoch 16 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:55<00:00,  2.95s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:31<00:00,  3.15s/it]

Training loss: 0.694, training acc: 50.582
Validation loss: 0.693, validation acc: 51.230
Lr: 0.000905463412215599
--------------------------------------------------
[INFO]: Epoch 17 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:52<00:00,  2.88s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:29<00:00,  2.98s/it]

Training loss: 0.694, training acc: 50.430
Validation loss: 0.693, validation acc: 51.270
Lr: 0.000657963412215599
--------------------------------------------------
[INFO]: Epoch 18 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:50<00:00,  2.83s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:32<00:00,  3.22s/it]

Training loss: 0.694, training acc: 49.870
Validation loss: 0.693, validation acc: 51.280
Lr: 0.0003520365877844011
--------------------------------------------------
[INFO]: Epoch 19 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:50<00:00,  2.84s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:29<00:00,  2.99s/it]

Training loss: 0.694, training acc: 50.092
Validation loss: 0.693, validation acc: 51.380
Lr: 0.00010453658778440107
--------------------------------------------------
[INFO]: Epoch 20 of 20
Training RNN_with_NAG



100%|██████████| 39/39 [01:52<00:00,  2.90s/it]

Validation RNN_with_NAG



100%|██████████| 10/10 [00:31<00:00,  3.20s/it]


Training loss: 0.694, training acc: 50.230
Validation loss: 0.693, validation acc: 51.280
Lr: 0.001
--------------------------------------------------
[INFO]: RNN_with_Adamax
[INFO]: Epoch 1 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:52<00:00,  2.89s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:30<00:00,  3.04s/it]


Training loss: 0.697, training acc: 50.113
Validation loss: 0.693, validation acc: 50.880
Lr: 0.0018099719094030727

BEST VALIDATION: 50.88

SAVING BEST MODEL FOR EPOCH: 1

--------------------------------------------------
[INFO]: Epoch 2 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:52<00:00,  2.88s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:30<00:00,  3.09s/it]


Training loss: 0.693, training acc: 51.230
Validation loss: 0.693, validation acc: 50.960
Lr: 0.0013124719094030728

BEST VALIDATION: 50.96000000000001

SAVING BEST MODEL FOR EPOCH: 2

--------------------------------------------------
[INFO]: Epoch 3 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:54<00:00,  2.95s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:30<00:00,  3.06s/it]


Training loss: 0.692, training acc: 51.355
Validation loss: 0.693, validation acc: 51.350
Lr: 0.0006975280905969274

BEST VALIDATION: 51.349999999999994

SAVING BEST MODEL FOR EPOCH: 3

--------------------------------------------------
[INFO]: Epoch 4 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:56<00:00,  2.99s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:31<00:00,  3.15s/it]


Training loss: 0.691, training acc: 52.365
Validation loss: 0.693, validation acc: 51.420
Lr: 0.00020002809059692738

BEST VALIDATION: 51.42

SAVING BEST MODEL FOR EPOCH: 4

--------------------------------------------------
[INFO]: Epoch 5 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:52<00:00,  2.88s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:32<00:00,  3.27s/it]


Training loss: 0.690, training acc: 52.225
Validation loss: 0.693, validation acc: 51.230
Lr: 0.002
--------------------------------------------------
[INFO]: Epoch 6 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:55<00:00,  2.96s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:30<00:00,  3.07s/it]

Training loss: 0.691, training acc: 51.990
Validation loss: 0.693, validation acc: 51.320
Lr: 0.0018099719094030727
--------------------------------------------------
[INFO]: Epoch 7 of 20
Training RNN_with_Adamax



100%|██████████| 39/39 [01:52<00:00,  2.88s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:30<00:00,  3.03s/it]

Training loss: 0.690, training acc: 52.807
Validation loss: 0.694, validation acc: 51.400
Lr: 0.0013124719094030728
--------------------------------------------------
[INFO]: Epoch 8 of 20
Training RNN_with_Adamax



100%|██████████| 39/39 [01:49<00:00,  2.82s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:31<00:00,  3.15s/it]

Training loss: 0.688, training acc: 53.165
Validation loss: 0.694, validation acc: 51.410
Lr: 0.0006975280905969274
--------------------------------------------------
[INFO]: Epoch 9 of 20
Training RNN_with_Adamax



100%|██████████| 39/39 [01:49<00:00,  2.80s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:30<00:00,  3.02s/it]


Training loss: 0.687, training acc: 52.990
Validation loss: 0.694, validation acc: 51.770
Lr: 0.00020002809059692738

BEST VALIDATION: 51.77

SAVING BEST MODEL FOR EPOCH: 9

--------------------------------------------------
[INFO]: Epoch 10 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:51<00:00,  2.85s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:31<00:00,  3.12s/it]


Training loss: 0.684, training acc: 53.715
Validation loss: 0.694, validation acc: 52.020
Lr: 0.002

BEST VALIDATION: 52.019999999999996

SAVING BEST MODEL FOR EPOCH: 10

--------------------------------------------------
[INFO]: Epoch 11 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:53<00:00,  2.91s/it]


Validation RNN_with_Adamax


100%|██████████| 10/10 [00:31<00:00,  3.11s/it]

Training loss: 0.687, training acc: 53.093
Validation loss: 0.696, validation acc: 50.980
Lr: 0.0018099719094030727
--------------------------------------------------
[INFO]: Epoch 12 of 20
Training RNN_with_Adamax



100%|██████████| 39/39 [01:50<00:00,  2.83s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:30<00:00,  3.05s/it]


Training loss: 0.684, training acc: 54.163
Validation loss: 0.688, validation acc: 52.390
Lr: 0.0013124719094030728

BEST VALIDATION: 52.39

SAVING BEST MODEL FOR EPOCH: 12

--------------------------------------------------
[INFO]: Epoch 13 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:51<00:00,  2.85s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:31<00:00,  3.10s/it]


Training loss: 0.681, training acc: 54.263
Validation loss: 0.685, validation acc: 54.740
Lr: 0.0006975280905969274

BEST VALIDATION: 54.74

SAVING BEST MODEL FOR EPOCH: 13

--------------------------------------------------
[INFO]: Epoch 14 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:50<00:00,  2.83s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:31<00:00,  3.13s/it]


Training loss: 0.673, training acc: 56.165
Validation loss: 0.678, validation acc: 56.740
Lr: 0.00020002809059692738

BEST VALIDATION: 56.74

SAVING BEST MODEL FOR EPOCH: 14

--------------------------------------------------
[INFO]: Epoch 15 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:50<00:00,  2.83s/it]


Validation RNN_with_Adamax


100%|██████████| 10/10 [00:32<00:00,  3.26s/it]

Training loss: 0.668, training acc: 57.178
Validation loss: 0.683, validation acc: 55.470
Lr: 0.002
--------------------------------------------------
[INFO]: Epoch 16 of 20
Training RNN_with_Adamax



100%|██████████| 39/39 [01:55<00:00,  2.95s/it]


Validation RNN_with_Adamax


100%|██████████| 10/10 [00:32<00:00,  3.28s/it]

Training loss: 0.682, training acc: 54.693
Validation loss: 0.691, validation acc: 52.910
Lr: 0.0018099719094030727
--------------------------------------------------
[INFO]: Epoch 17 of 20
Training RNN_with_Adamax



100%|██████████| 39/39 [01:54<00:00,  2.95s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:31<00:00,  3.18s/it]

Training loss: 0.676, training acc: 55.437
Validation loss: 0.677, validation acc: 56.250
Lr: 0.0013124719094030728
--------------------------------------------------
[INFO]: Epoch 18 of 20
Training RNN_with_Adamax



100%|██████████| 39/39 [01:49<00:00,  2.81s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:32<00:00,  3.22s/it]

Training loss: 0.674, training acc: 55.810
Validation loss: 0.683, validation acc: 55.340
Lr: 0.0006975280905969274
--------------------------------------------------
[INFO]: Epoch 19 of 20
Training RNN_with_Adamax



100%|██████████| 39/39 [01:56<00:00,  3.00s/it]

Validation RNN_with_Adamax



100%|██████████| 10/10 [00:32<00:00,  3.26s/it]


Training loss: 0.669, training acc: 56.820
Validation loss: 0.671, validation acc: 59.980
Lr: 0.00020002809059692738

BEST VALIDATION: 59.98

SAVING BEST MODEL FOR EPOCH: 19

--------------------------------------------------
[INFO]: Epoch 20 of 20
Training RNN_with_Adamax


100%|██████████| 39/39 [01:50<00:00,  2.84s/it]


Validation RNN_with_Adamax


100%|██████████| 10/10 [00:30<00:00,  3.08s/it]


Training loss: 0.665, training acc: 61.157
Validation loss: 0.666, validation acc: 63.360
Lr: 0.002

BEST VALIDATION: 63.36000000000001

SAVING BEST MODEL FOR EPOCH: 20

--------------------------------------------------
[INFO]: RNN_with_NAdam
[INFO]: Epoch 1 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:53<00:00,  2.92s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:34<00:00,  3.42s/it]


Training loss: 0.699, training acc: 49.935
Validation loss: 0.696, validation acc: 50.700
Lr: 0.0018099719094030727

BEST VALIDATION: 50.7

SAVING BEST MODEL FOR EPOCH: 1

--------------------------------------------------
[INFO]: Epoch 2 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:53<00:00,  2.91s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:30<00:00,  3.03s/it]


Training loss: 0.695, training acc: 50.787
Validation loss: 0.694, validation acc: 50.950
Lr: 0.0013124719094030728

BEST VALIDATION: 50.949999999999996

SAVING BEST MODEL FOR EPOCH: 2

--------------------------------------------------
[INFO]: Epoch 3 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:51<00:00,  2.86s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:30<00:00,  3.04s/it]

Training loss: 0.689, training acc: 52.487
Validation loss: 0.696, validation acc: 50.920
Lr: 0.0006975280905969274
--------------------------------------------------
[INFO]: Epoch 4 of 20
Training RNN_with_NAdam



100%|██████████| 39/39 [01:49<00:00,  2.81s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:30<00:00,  3.09s/it]


Training loss: 0.682, training acc: 54.078
Validation loss: 0.700, validation acc: 51.150
Lr: 0.00020002809059692738

BEST VALIDATION: 51.15

SAVING BEST MODEL FOR EPOCH: 4

--------------------------------------------------
[INFO]: Epoch 5 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:52<00:00,  2.87s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:33<00:00,  3.32s/it]


Training loss: 0.672, training acc: 55.718
Validation loss: 0.706, validation acc: 51.220
Lr: 0.002

BEST VALIDATION: 51.22

SAVING BEST MODEL FOR EPOCH: 5

--------------------------------------------------
[INFO]: Epoch 6 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:55<00:00,  2.97s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.15s/it]

Training loss: 0.679, training acc: 55.053
Validation loss: 0.698, validation acc: 51.090
Lr: 0.0018099719094030727
--------------------------------------------------
[INFO]: Epoch 7 of 20
Training RNN_with_NAdam



100%|██████████| 39/39 [01:50<00:00,  2.84s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:30<00:00,  3.05s/it]


Training loss: 0.658, training acc: 57.588
Validation loss: 0.718, validation acc: 51.360
Lr: 0.0013124719094030728

BEST VALIDATION: 51.35999999999999

SAVING BEST MODEL FOR EPOCH: 7

--------------------------------------------------
[INFO]: Epoch 8 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:51<00:00,  2.85s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:32<00:00,  3.26s/it]


Training loss: 0.636, training acc: 59.337
Validation loss: 0.732, validation acc: 52.270
Lr: 0.0006975280905969274

BEST VALIDATION: 52.27

SAVING BEST MODEL FOR EPOCH: 8

--------------------------------------------------
[INFO]: Epoch 9 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:54<00:00,  2.92s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.13s/it]


Training loss: 0.612, training acc: 61.062
Validation loss: 0.763, validation acc: 52.390
Lr: 0.00020002809059692738

BEST VALIDATION: 52.39

SAVING BEST MODEL FOR EPOCH: 9

--------------------------------------------------
[INFO]: Epoch 10 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:53<00:00,  2.92s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:32<00:00,  3.25s/it]

Training loss: 0.589, training acc: 63.153
Validation loss: 0.764, validation acc: 51.810
Lr: 0.002
--------------------------------------------------
[INFO]: Epoch 11 of 20
Training RNN_with_NAdam



100%|██████████| 39/39 [01:53<00:00,  2.92s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.15s/it]

Training loss: 0.614, training acc: 61.538
Validation loss: 0.752, validation acc: 51.230
Lr: 0.0018099719094030727
--------------------------------------------------
[INFO]: Epoch 12 of 20
Training RNN_with_NAdam



100%|██████████| 39/39 [01:51<00:00,  2.87s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.10s/it]


Training loss: 0.596, training acc: 62.275
Validation loss: 0.781, validation acc: 52.680
Lr: 0.0013124719094030728

BEST VALIDATION: 52.68000000000001

SAVING BEST MODEL FOR EPOCH: 12

--------------------------------------------------
[INFO]: Epoch 13 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:52<00:00,  2.87s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.17s/it]


Training loss: 0.564, training acc: 64.460
Validation loss: 0.811, validation acc: 52.770
Lr: 0.0006975280905969274

BEST VALIDATION: 52.769999999999996

SAVING BEST MODEL FOR EPOCH: 13

--------------------------------------------------
[INFO]: Epoch 14 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:52<00:00,  2.89s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.11s/it]


Training loss: 0.542, training acc: 65.690
Validation loss: 0.822, validation acc: 53.280
Lr: 0.00020002809059692738

BEST VALIDATION: 53.28000000000001

SAVING BEST MODEL FOR EPOCH: 14

--------------------------------------------------
[INFO]: Epoch 15 of 20
Training RNN_with_NAdam


100%|██████████| 39/39 [01:50<00:00,  2.82s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:30<00:00,  3.10s/it]

Training loss: 0.526, training acc: 66.347
Validation loss: 0.833, validation acc: 52.590
Lr: 0.002
--------------------------------------------------
[INFO]: Epoch 16 of 20
Training RNN_with_NAdam



100%|██████████| 39/39 [01:56<00:00,  2.99s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:34<00:00,  3.49s/it]

Training loss: 0.546, training acc: 65.553
Validation loss: 0.819, validation acc: 52.730
Lr: 0.0018099719094030727
--------------------------------------------------
[INFO]: Epoch 17 of 20
Training RNN_with_NAdam



100%|██████████| 39/39 [01:53<00:00,  2.90s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:34<00:00,  3.44s/it]

Training loss: 0.532, training acc: 66.340
Validation loss: 0.838, validation acc: 53.180
Lr: 0.0013124719094030728
--------------------------------------------------
[INFO]: Epoch 18 of 20
Training RNN_with_NAdam



100%|██████████| 39/39 [01:49<00:00,  2.81s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.11s/it]

Training loss: 0.509, training acc: 67.332
Validation loss: 0.878, validation acc: 52.090
Lr: 0.0006975280905969274
--------------------------------------------------
[INFO]: Epoch 19 of 20
Training RNN_with_NAdam



100%|██████████| 39/39 [01:49<00:00,  2.80s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:29<00:00,  3.00s/it]

Training loss: 0.495, training acc: 68.110
Validation loss: 0.875, validation acc: 53.020
Lr: 0.00020002809059692738
--------------------------------------------------
[INFO]: Epoch 20 of 20
Training RNN_with_NAdam



100%|██████████| 39/39 [01:51<00:00,  2.87s/it]

Validation RNN_with_NAdam



100%|██████████| 10/10 [00:31<00:00,  3.11s/it]


Training loss: 0.481, training acc: 68.810
Validation loss: 0.891, validation acc: 53.990
Lr: 0.002

BEST VALIDATION: 53.99

SAVING BEST MODEL FOR EPOCH: 20

--------------------------------------------------
[INFO]: RNN_with_RAdam
[INFO]: Epoch 1 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:55<00:00,  2.96s/it]


Validation RNN_with_RAdam


100%|██████████| 10/10 [00:33<00:00,  3.32s/it]


Training loss: 0.696, training acc: 49.422
Validation loss: 0.694, validation acc: 49.610
Lr: 0.000905463412215599

BEST VALIDATION: 49.61

SAVING BEST MODEL FOR EPOCH: 1

--------------------------------------------------
[INFO]: Epoch 2 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:54<00:00,  2.92s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:31<00:00,  3.12s/it]


Training loss: 0.694, training acc: 50.688
Validation loss: 0.693, validation acc: 50.800
Lr: 0.000657963412215599

BEST VALIDATION: 50.8

SAVING BEST MODEL FOR EPOCH: 2

--------------------------------------------------
[INFO]: Epoch 3 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:50<00:00,  2.83s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:31<00:00,  3.10s/it]


Training loss: 0.693, training acc: 50.525
Validation loss: 0.693, validation acc: 50.950
Lr: 0.0003520365877844011

BEST VALIDATION: 50.949999999999996

SAVING BEST MODEL FOR EPOCH: 3

--------------------------------------------------
[INFO]: Epoch 4 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:51<00:00,  2.85s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:30<00:00,  3.05s/it]


Training loss: 0.693, training acc: 50.785
Validation loss: 0.693, validation acc: 51.050
Lr: 0.00010453658778440107

BEST VALIDATION: 51.05

SAVING BEST MODEL FOR EPOCH: 4

--------------------------------------------------
[INFO]: Epoch 5 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:50<00:00,  2.83s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:30<00:00,  3.07s/it]

Training loss: 0.693, training acc: 51.090
Validation loss: 0.693, validation acc: 51.020
Lr: 0.001
--------------------------------------------------
[INFO]: Epoch 6 of 20
Training RNN_with_RAdam



100%|██████████| 39/39 [01:57<00:00,  3.02s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:32<00:00,  3.27s/it]


Training loss: 0.693, training acc: 51.252
Validation loss: 0.693, validation acc: 51.070
Lr: 0.000905463412215599

BEST VALIDATION: 51.07000000000001

SAVING BEST MODEL FOR EPOCH: 6

--------------------------------------------------
[INFO]: Epoch 7 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:58<00:00,  3.04s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:30<00:00,  3.07s/it]

Training loss: 0.692, training acc: 51.133
Validation loss: 0.693, validation acc: 50.110
Lr: 0.000657963412215599
--------------------------------------------------
[INFO]: Epoch 8 of 20
Training RNN_with_RAdam



100%|██████████| 39/39 [01:54<00:00,  2.94s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:31<00:00,  3.14s/it]

Training loss: 0.690, training acc: 52.033
Validation loss: 0.694, validation acc: 50.470
Lr: 0.0003520365877844011
--------------------------------------------------
[INFO]: Epoch 9 of 20
Training RNN_with_RAdam



100%|██████████| 39/39 [01:51<00:00,  2.87s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:31<00:00,  3.13s/it]

Training loss: 0.688, training acc: 52.765
Validation loss: 0.694, validation acc: 50.700
Lr: 0.00010453658778440107
--------------------------------------------------
[INFO]: Epoch 10 of 20
Training RNN_with_RAdam



100%|██████████| 39/39 [01:55<00:00,  2.96s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:30<00:00,  3.04s/it]

Training loss: 0.687, training acc: 53.348
Validation loss: 0.693, validation acc: 50.810
Lr: 0.001
--------------------------------------------------
[INFO]: Epoch 11 of 20
Training RNN_with_RAdam



100%|██████████| 39/39 [01:51<00:00,  2.86s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:33<00:00,  3.36s/it]


Training loss: 0.689, training acc: 53.190
Validation loss: 0.695, validation acc: 51.380
Lr: 0.000905463412215599

BEST VALIDATION: 51.38

SAVING BEST MODEL FOR EPOCH: 11

--------------------------------------------------
[INFO]: Epoch 12 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:53<00:00,  2.92s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:30<00:00,  3.08s/it]


Training loss: 0.685, training acc: 53.790
Validation loss: 0.696, validation acc: 51.880
Lr: 0.000657963412215599

BEST VALIDATION: 51.88

SAVING BEST MODEL FOR EPOCH: 12

--------------------------------------------------
[INFO]: Epoch 13 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:51<00:00,  2.87s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:30<00:00,  3.05s/it]


Training loss: 0.680, training acc: 54.535
Validation loss: 0.695, validation acc: 52.150
Lr: 0.0003520365877844011

BEST VALIDATION: 52.15

SAVING BEST MODEL FOR EPOCH: 13

--------------------------------------------------
[INFO]: Epoch 14 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:50<00:00,  2.82s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:31<00:00,  3.11s/it]


Training loss: 0.673, training acc: 55.745
Validation loss: 0.691, validation acc: 52.820
Lr: 0.00010453658778440107

BEST VALIDATION: 52.82

SAVING BEST MODEL FOR EPOCH: 14

--------------------------------------------------
[INFO]: Epoch 15 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:49<00:00,  2.82s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:31<00:00,  3.15s/it]


Training loss: 0.665, training acc: 58.337
Validation loss: 0.689, validation acc: 54.800
Lr: 0.001

BEST VALIDATION: 54.800000000000004

SAVING BEST MODEL FOR EPOCH: 15

--------------------------------------------------
[INFO]: Epoch 16 of 20
Training RNN_with_RAdam


100%|██████████| 39/39 [01:52<00:00,  2.89s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:33<00:00,  3.38s/it]

Training loss: 0.675, training acc: 56.340
Validation loss: 0.689, validation acc: 54.360
Lr: 0.000905463412215599
--------------------------------------------------
[INFO]: Epoch 17 of 20
Training RNN_with_RAdam



100%|██████████| 39/39 [01:56<00:00,  2.99s/it]


Validation RNN_with_RAdam


100%|██████████| 10/10 [00:32<00:00,  3.24s/it]

Training loss: 0.676, training acc: 55.755
Validation loss: 0.703, validation acc: 51.530
Lr: 0.000657963412215599
--------------------------------------------------
[INFO]: Epoch 18 of 20
Training RNN_with_RAdam



100%|██████████| 39/39 [01:51<00:00,  2.87s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:30<00:00,  3.03s/it]

Training loss: 0.671, training acc: 55.865
Validation loss: 0.706, validation acc: 51.340
Lr: 0.0003520365877844011
--------------------------------------------------
[INFO]: Epoch 19 of 20
Training RNN_with_RAdam



100%|██████████| 39/39 [01:50<00:00,  2.83s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:29<00:00,  3.00s/it]

Training loss: 0.663, training acc: 56.693
Validation loss: 0.709, validation acc: 51.770
Lr: 0.00010453658778440107
--------------------------------------------------
[INFO]: Epoch 20 of 20
Training RNN_with_RAdam



100%|██████████| 39/39 [01:50<00:00,  2.83s/it]

Validation RNN_with_RAdam



100%|██████████| 10/10 [00:30<00:00,  3.06s/it]


Training loss: 0.658, training acc: 57.455
Validation loss: 0.708, validation acc: 52.310
Lr: 0.001
--------------------------------------------------
