# Grammar Correction using LLM

In [1]:
import torch
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## Dataset

In [2]:
# Grammar Correction Dataset : https://www.kaggle.com/datasets/satishgunjal/grammar-correction

data = pd.read_csv("./dataset/Grammar Correction.csv", sep=",")
data.head()

Unnamed: 0,Serial Number,Error Type,Ungrammatical Statement,Standard English
0,1,Verb Tense Errors,I goes to the store everyday.,I go to the store everyday.
1,2,Verb Tense Errors,They was playing soccer last night.,They were playing soccer last night.
2,3,Verb Tense Errors,She have completed her homework.,She has completed her homework.
3,4,Verb Tense Errors,He don't know the answer.,He doesn't know the answer.
4,5,Verb Tense Errors,The sun rise in the east.,The sun rises in the east.


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2018 entries, 0 to 2017
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Serial Number            2018 non-null   int64 
 1   Error Type               2018 non-null   object
 2   Ungrammatical Statement  2018 non-null   object
 3   Standard English         2018 non-null   object
dtypes: int64(1), object(3)
memory usage: 63.2+ KB


In [4]:
# count of error types

error_counts = data['Error Type'].value_counts()
print(error_counts)

Error Type
Sentence Structure Errors                         103
Verb Tense Errors                                 100
Subject-Verb Agreement                            100
Article Usage                                     100
Spelling Mistakes                                 100
Preposition Usage                                  95
Punctuation Errors                                 60
Relative Clause Errors                             51
Gerund and Participle Errors                       50
Abbreviation Errors                                50
Slang, Jargon, and Colloquialisms                  50
Negation Errors                                    50
Incorrect Auxiliaries                              50
Ambiguity                                          50
Tautology                                          50
Lack of Parallelism in Lists or Series             50
Mixed Metaphors/Idioms                             50
Parallelism Errors                                 49
Contractions Erro

## Model: T5

In [5]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# load tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [6]:
model

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

## Train Test Split

In [7]:
from sklearn.model_selection import train_test_split

# 80 : 20
train_data, temp_data = train_test_split(data, test_size=0.2, random_state=42)

# 10 : 10
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

print(f"Training set size: {len(train_data)}")
print(f"Validation set size: {len(val_data)}")
print(f"Test set size: {len(test_data)}")

Training set size: 1614
Validation set size: 202
Test set size: 202


## Tokenize data

In [8]:
def tokenize_data(data, tokenizer, max_length=128):
    inputs = tokenizer(
        data['Ungrammatical Statement'].tolist(),
        max_length=max_length,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    )

    outputs = tokenizer(
        data['Standard English'].tolist(),
        max_length=max_length,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    )

    return inputs, outputs

# tokenmize datasets
train_inputs, train_outputs = tokenize_data(train_data[['Ungrammatical Statement', 'Standard English']], tokenizer)
val_inputs, val_outputs = tokenize_data(val_data[['Ungrammatical Statement', 'Standard English']], tokenizer)
test_inputs, test_outputs = tokenize_data(test_data[['Ungrammatical Statement', 'Standard English']], tokenizer)

train_inputs = {key: value.to(device) for key, value in train_inputs.items()}
train_outputs = train_outputs['input_ids'].to(device)

val_inputs = {key: value.to(device) for key, value in val_inputs.items()}
val_outputs = val_outputs['input_ids'].to(device)

test_inputs = {key: value.to(device) for key, value in test_inputs.items()}
test_outputs = test_outputs['input_ids'].to(device)

In [9]:
train_inputs

{'input_ids': tensor([[  451, 15687,   160,  ...,     0,     0,     0],
         [  216,   228,    43,  ...,     0,     0,     0],
         [   27,    43,     3,  ...,     0,     0,     0],
         ...,
         [ 1902,     5, 12587,  ...,     0,     0,     0],
         [   37,  1595,   562,  ...,     0,     0,     0],
         [   37,   167,   167,  ...,     0,     0,     0]], device='cuda:0'),
 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]], device='cuda:0')}

## Data Loader

In [10]:
from torch.utils.data import DataLoader, TensorDataset

# tensordatasets
train_dataset = TensorDataset(train_inputs['input_ids'], train_inputs['attention_mask'], train_outputs)
val_dataset = TensorDataset(val_inputs['input_ids'], val_inputs['attention_mask'], val_outputs)
test_dataset = TensorDataset(test_inputs['input_ids'], test_inputs['attention_mask'], test_outputs)

# dataloaders
train_loader = DataLoader(train_dataset, batch_size=8)
val_loader = DataLoader(val_dataset, batch_size=8)
test_loader = DataLoader(test_dataset, batch_size=8)

In [11]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7ae3e9a55cc0>

## Fine-Tune T5 with Error Type

We use the Standard English as the target output and input the ungrammatical sentence with the error type prepended so that the model may also identify type of error

In [12]:
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset

optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

# fine-tuning loop
model.train()

epochs = 3

for epoch in tqdm(range(epochs), desc='Finetuning..'):
    print(f"Epoch {epoch + 1}")

    # training
    model.train()
    for batch in tqdm(train_loader, desc='Training..'):
        input_ids, attention_mask, labels = batch
        optimizer.zero_grad()

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        loss.backward()
        optimizer.step()

        print(f"Training Loss: {loss.item()}")

    # validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in tqdm(val_loader, desc='Validating..'):
            input_ids, attention_mask, labels = batch
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            val_loss += outputs.loss.item()

    avg_val_loss = val_loss / len(val_loader)
    print(f"Validation Loss: {avg_val_loss}")

Finetuning..:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 1



Training..:   0%|          | 0/202 [00:00<?, ?it/s][A
Training..:   0%|          | 1/202 [00:01<06:23,  1.91s/it][A

Training Loss: 16.158187866210938



Training..:   1%|          | 2/202 [00:02<03:31,  1.06s/it][A

Training Loss: 14.548454284667969



Training..:   1%|▏         | 3/202 [00:02<02:36,  1.27it/s][A

Training Loss: 14.082308769226074



Training..:   2%|▏         | 4/202 [00:03<02:10,  1.51it/s][A

Training Loss: 12.819028854370117



Training..:   2%|▏         | 5/202 [00:03<01:56,  1.70it/s][A

Training Loss: 11.750520706176758



Training..:   3%|▎         | 6/202 [00:04<01:47,  1.83it/s][A

Training Loss: 11.296401023864746



Training..:   3%|▎         | 7/202 [00:04<01:41,  1.93it/s][A

Training Loss: 11.217639923095703



Training..:   4%|▍         | 8/202 [00:05<01:37,  1.99it/s][A

Training Loss: 10.538874626159668



Training..:   4%|▍         | 9/202 [00:05<01:34,  2.04it/s][A

Training Loss: 9.716626167297363



Training..:   5%|▍         | 10/202 [00:06<01:33,  2.06it/s][A

Training Loss: 9.038046836853027



Training..:   5%|▌         | 11/202 [00:06<01:31,  2.08it/s][A

Training Loss: 9.229504585266113



Training..:   6%|▌         | 12/202 [00:07<01:30,  2.10it/s][A

Training Loss: 8.33045482635498



Training..:   6%|▋         | 13/202 [00:07<01:29,  2.10it/s][A

Training Loss: 8.574920654296875



Training..:   7%|▋         | 14/202 [00:07<01:28,  2.11it/s][A

Training Loss: 6.734745502471924



Training..:   7%|▋         | 15/202 [00:08<01:28,  2.12it/s][A

Training Loss: 7.281794548034668



Training..:   8%|▊         | 16/202 [00:08<01:27,  2.12it/s][A

Training Loss: 6.047216892242432



Training..:   8%|▊         | 17/202 [00:09<01:26,  2.13it/s][A

Training Loss: 7.198516845703125



Training..:   9%|▉         | 18/202 [00:09<01:26,  2.13it/s][A

Training Loss: 5.478100776672363



Training..:   9%|▉         | 19/202 [00:10<01:25,  2.13it/s][A

Training Loss: 5.263735294342041



Training..:  10%|▉         | 20/202 [00:10<01:25,  2.14it/s][A

Training Loss: 5.488060474395752



Training..:  10%|█         | 21/202 [00:11<01:24,  2.13it/s][A

Training Loss: 4.346961975097656



Training..:  11%|█         | 22/202 [00:11<01:24,  2.13it/s][A

Training Loss: 3.643319845199585



Training..:  11%|█▏        | 23/202 [00:12<01:24,  2.13it/s][A

Training Loss: 3.9051051139831543



Training..:  12%|█▏        | 24/202 [00:12<01:23,  2.13it/s][A

Training Loss: 3.8253488540649414



Training..:  12%|█▏        | 25/202 [00:13<01:23,  2.13it/s][A

Training Loss: 3.9248173236846924



Training..:  13%|█▎        | 26/202 [00:13<01:22,  2.13it/s][A

Training Loss: 2.649428606033325



Training..:  13%|█▎        | 27/202 [00:14<01:22,  2.13it/s][A

Training Loss: 3.1572792530059814



Training..:  14%|█▍        | 28/202 [00:14<01:21,  2.12it/s][A

Training Loss: 2.627908229827881



Training..:  14%|█▍        | 29/202 [00:15<01:21,  2.12it/s][A

Training Loss: 2.540268659591675



Training..:  15%|█▍        | 30/202 [00:15<01:20,  2.13it/s][A

Training Loss: 1.9276278018951416



Training..:  15%|█▌        | 31/202 [00:15<01:20,  2.13it/s][A

Training Loss: 1.9089219570159912



Training..:  16%|█▌        | 32/202 [00:16<01:20,  2.12it/s][A

Training Loss: 1.7227388620376587



Training..:  16%|█▋        | 33/202 [00:16<01:19,  2.12it/s][A

Training Loss: 1.4689892530441284



Training..:  17%|█▋        | 34/202 [00:17<01:19,  2.12it/s][A

Training Loss: 1.400622844696045



Training..:  17%|█▋        | 35/202 [00:17<01:18,  2.12it/s][A

Training Loss: 1.1888957023620605



Training..:  18%|█▊        | 36/202 [00:18<01:18,  2.12it/s][A

Training Loss: 1.1961842775344849



Training..:  18%|█▊        | 37/202 [00:18<01:17,  2.12it/s][A

Training Loss: 0.9767911434173584



Training..:  19%|█▉        | 38/202 [00:19<01:17,  2.12it/s][A

Training Loss: 0.8389940857887268



Training..:  19%|█▉        | 39/202 [00:19<01:17,  2.11it/s][A

Training Loss: 0.8988527059555054



Training..:  20%|█▉        | 40/202 [00:20<01:16,  2.11it/s][A

Training Loss: 0.7220699191093445



Training..:  20%|██        | 41/202 [00:20<01:16,  2.11it/s][A

Training Loss: 0.838057279586792



Training..:  21%|██        | 42/202 [00:21<01:15,  2.11it/s][A

Training Loss: 0.7540006637573242



Training..:  21%|██▏       | 43/202 [00:21<01:15,  2.11it/s][A

Training Loss: 0.5600711107254028



Training..:  22%|██▏       | 44/202 [00:22<01:14,  2.11it/s][A

Training Loss: 0.5209175944328308



Training..:  22%|██▏       | 45/202 [00:22<01:14,  2.11it/s][A

Training Loss: 0.5023812651634216



Training..:  23%|██▎       | 46/202 [00:23<01:13,  2.11it/s][A

Training Loss: 0.6759122014045715



Training..:  23%|██▎       | 47/202 [00:23<01:13,  2.11it/s][A

Training Loss: 0.4728981554508209



Training..:  24%|██▍       | 48/202 [00:23<01:12,  2.12it/s][A

Training Loss: 0.5549396872520447



Training..:  24%|██▍       | 49/202 [00:24<01:12,  2.11it/s][A

Training Loss: 0.45381948351860046



Training..:  25%|██▍       | 50/202 [00:24<01:11,  2.12it/s][A

Training Loss: 0.4202094078063965



Training..:  25%|██▌       | 51/202 [00:25<01:11,  2.12it/s][A

Training Loss: 0.4291210472583771



Training..:  26%|██▌       | 52/202 [00:25<01:11,  2.11it/s][A

Training Loss: 0.5039103031158447



Training..:  26%|██▌       | 53/202 [00:26<01:10,  2.11it/s][A

Training Loss: 0.4135395884513855



Training..:  27%|██▋       | 54/202 [00:26<01:10,  2.11it/s][A

Training Loss: 0.43455439805984497



Training..:  27%|██▋       | 55/202 [00:27<01:09,  2.11it/s][A

Training Loss: 0.3290570080280304



Training..:  28%|██▊       | 56/202 [00:27<01:09,  2.11it/s][A

Training Loss: 0.3535781502723694



Training..:  28%|██▊       | 57/202 [00:28<01:08,  2.11it/s][A

Training Loss: 0.3893149793148041



Training..:  29%|██▊       | 58/202 [00:28<01:08,  2.11it/s][A

Training Loss: 0.390609472990036



Training..:  29%|██▉       | 59/202 [00:29<01:08,  2.10it/s][A

Training Loss: 0.28484588861465454



Training..:  30%|██▉       | 60/202 [00:29<01:07,  2.10it/s][A

Training Loss: 0.37937480211257935



Training..:  30%|███       | 61/202 [00:30<01:07,  2.10it/s][A

Training Loss: 0.32333239912986755



Training..:  31%|███       | 62/202 [00:30<01:06,  2.10it/s][A

Training Loss: 0.3279130160808563



Training..:  31%|███       | 63/202 [00:31<01:06,  2.10it/s][A

Training Loss: 0.30793437361717224



Training..:  32%|███▏      | 64/202 [00:31<01:05,  2.10it/s][A

Training Loss: 0.22336265444755554



Training..:  32%|███▏      | 65/202 [00:32<01:05,  2.09it/s][A

Training Loss: 0.28309229016304016



Training..:  33%|███▎      | 66/202 [00:32<01:04,  2.10it/s][A

Training Loss: 0.3050287067890167



Training..:  33%|███▎      | 67/202 [00:33<01:04,  2.09it/s][A

Training Loss: 0.28256091475486755



Training..:  34%|███▎      | 68/202 [00:33<01:03,  2.09it/s][A

Training Loss: 0.23771065473556519



Training..:  34%|███▍      | 69/202 [00:33<01:03,  2.09it/s][A

Training Loss: 0.3133109509944916



Training..:  35%|███▍      | 70/202 [00:34<01:03,  2.09it/s][A

Training Loss: 0.2693745195865631



Training..:  35%|███▌      | 71/202 [00:34<01:02,  2.09it/s][A

Training Loss: 0.2868708074092865



Training..:  36%|███▌      | 72/202 [00:35<01:02,  2.09it/s][A

Training Loss: 0.2912801504135132



Training..:  36%|███▌      | 73/202 [00:35<01:01,  2.09it/s][A

Training Loss: 0.24093283712863922



Training..:  37%|███▋      | 74/202 [00:36<01:01,  2.09it/s][A

Training Loss: 0.26415398716926575



Training..:  37%|███▋      | 75/202 [00:36<01:00,  2.08it/s][A

Training Loss: 0.2802063524723053



Training..:  38%|███▊      | 76/202 [00:37<01:00,  2.09it/s][A

Training Loss: 0.19722476601600647



Training..:  38%|███▊      | 77/202 [00:37<00:59,  2.09it/s][A

Training Loss: 0.2663070261478424



Training..:  39%|███▊      | 78/202 [00:38<00:59,  2.09it/s][A

Training Loss: 0.23593758046627045



Training..:  39%|███▉      | 79/202 [00:38<00:58,  2.09it/s][A

Training Loss: 0.21893049776554108



Training..:  40%|███▉      | 80/202 [00:39<00:58,  2.08it/s][A

Training Loss: 0.2284763902425766



Training..:  40%|████      | 81/202 [00:39<00:57,  2.09it/s][A

Training Loss: 0.24090296030044556



Training..:  41%|████      | 82/202 [00:40<00:57,  2.09it/s][A

Training Loss: 0.20808079838752747



Training..:  41%|████      | 83/202 [00:40<00:57,  2.08it/s][A

Training Loss: 0.253851979970932



Training..:  42%|████▏     | 84/202 [00:41<00:56,  2.08it/s][A

Training Loss: 0.23560278117656708



Training..:  42%|████▏     | 85/202 [00:41<00:56,  2.08it/s][A

Training Loss: 0.20344291627407074



Training..:  43%|████▎     | 86/202 [00:42<00:55,  2.08it/s][A

Training Loss: 0.2764950692653656



Training..:  43%|████▎     | 87/202 [00:42<00:55,  2.08it/s][A

Training Loss: 0.17232804000377655



Training..:  44%|████▎     | 88/202 [00:43<00:54,  2.08it/s][A

Training Loss: 0.18775534629821777



Training..:  44%|████▍     | 89/202 [00:43<00:54,  2.07it/s][A

Training Loss: 0.20768901705741882



Training..:  45%|████▍     | 90/202 [00:44<00:53,  2.08it/s][A

Training Loss: 0.21975597739219666



Training..:  45%|████▌     | 91/202 [00:44<00:53,  2.08it/s][A

Training Loss: 0.1972503364086151



Training..:  46%|████▌     | 92/202 [00:45<00:52,  2.08it/s][A

Training Loss: 0.18979275226593018



Training..:  46%|████▌     | 93/202 [00:45<00:52,  2.07it/s][A

Training Loss: 0.19940799474716187



Training..:  47%|████▋     | 94/202 [00:46<00:51,  2.08it/s][A

Training Loss: 0.1682264655828476



Training..:  47%|████▋     | 95/202 [00:46<00:51,  2.08it/s][A

Training Loss: 0.19501258432865143



Training..:  48%|████▊     | 96/202 [00:46<00:50,  2.08it/s][A

Training Loss: 0.180463507771492



Training..:  48%|████▊     | 97/202 [00:47<00:50,  2.08it/s][A

Training Loss: 0.22122862935066223



Training..:  49%|████▊     | 98/202 [00:47<00:50,  2.08it/s][A

Training Loss: 0.15994253754615784



Training..:  49%|████▉     | 99/202 [00:48<00:49,  2.08it/s][A

Training Loss: 0.257769912481308



Training..:  50%|████▉     | 100/202 [00:48<00:49,  2.08it/s][A

Training Loss: 0.16099722683429718



Training..:  50%|█████     | 101/202 [00:49<00:48,  2.07it/s][A

Training Loss: 0.16586154699325562



Training..:  50%|█████     | 102/202 [00:49<00:48,  2.08it/s][A

Training Loss: 0.17749309539794922



Training..:  51%|█████     | 103/202 [00:50<00:47,  2.07it/s][A

Training Loss: 0.18169111013412476



Training..:  51%|█████▏    | 104/202 [00:50<00:47,  2.07it/s][A

Training Loss: 0.20552648603916168



Training..:  52%|█████▏    | 105/202 [00:51<00:46,  2.07it/s][A

Training Loss: 0.20030267536640167



Training..:  52%|█████▏    | 106/202 [00:51<00:46,  2.07it/s][A

Training Loss: 0.1967887282371521



Training..:  53%|█████▎    | 107/202 [00:52<00:45,  2.07it/s][A

Training Loss: 0.17032073438167572



Training..:  53%|█████▎    | 108/202 [00:52<00:45,  2.07it/s][A

Training Loss: 0.19029106199741364



Training..:  54%|█████▍    | 109/202 [00:53<00:45,  2.07it/s][A

Training Loss: 0.18935757875442505



Training..:  54%|█████▍    | 110/202 [00:53<00:44,  2.06it/s][A

Training Loss: 0.1892077922821045



Training..:  55%|█████▍    | 111/202 [00:54<00:44,  2.06it/s][A

Training Loss: 0.19320593774318695



Training..:  55%|█████▌    | 112/202 [00:54<00:43,  2.06it/s][A

Training Loss: 0.19400683045387268



Training..:  56%|█████▌    | 113/202 [00:55<00:43,  2.06it/s][A

Training Loss: 0.1645766645669937



Training..:  56%|█████▋    | 114/202 [00:55<00:42,  2.06it/s][A

Training Loss: 0.18265916407108307



Training..:  57%|█████▋    | 115/202 [00:56<00:42,  2.06it/s][A

Training Loss: 0.16190025210380554



Training..:  57%|█████▋    | 116/202 [00:56<00:41,  2.06it/s][A

Training Loss: 0.181467667222023



Training..:  58%|█████▊    | 117/202 [00:57<00:43,  1.95it/s][A

Training Loss: 0.22768297791481018



Training..:  58%|█████▊    | 118/202 [00:57<00:45,  1.87it/s][A

Training Loss: 0.15995173156261444



Training..:  59%|█████▉    | 119/202 [00:58<00:43,  1.90it/s][A

Training Loss: 0.1600380688905716



Training..:  59%|█████▉    | 120/202 [00:59<00:47,  1.71it/s][A

Training Loss: 0.15127147734165192



Training..:  60%|█████▉    | 121/202 [00:59<00:45,  1.80it/s][A

Training Loss: 0.21179261803627014



Training..:  60%|██████    | 122/202 [01:00<00:49,  1.61it/s][A

Training Loss: 0.17973002791404724



Training..:  61%|██████    | 123/202 [01:00<00:45,  1.72it/s][A

Training Loss: 0.1621013879776001



Training..:  61%|██████▏   | 124/202 [01:01<00:43,  1.80it/s][A

Training Loss: 0.16175980865955353



Training..:  62%|██████▏   | 125/202 [01:01<00:41,  1.87it/s][A

Training Loss: 0.1521773785352707



Training..:  62%|██████▏   | 126/202 [01:02<00:39,  1.91it/s][A

Training Loss: 0.1510436236858368



Training..:  63%|██████▎   | 127/202 [01:02<00:38,  1.96it/s][A

Training Loss: 0.17204898595809937



Training..:  63%|██████▎   | 128/202 [01:03<00:37,  1.99it/s][A

Training Loss: 0.1480303406715393



Training..:  64%|██████▍   | 129/202 [01:03<00:36,  2.01it/s][A

Training Loss: 0.16074591875076294



Training..:  64%|██████▍   | 130/202 [01:04<00:35,  2.02it/s][A

Training Loss: 0.14693708717823029



Training..:  65%|██████▍   | 131/202 [01:04<00:34,  2.04it/s][A

Training Loss: 0.1849546581506729



Training..:  65%|██████▌   | 132/202 [01:05<00:34,  2.04it/s][A

Training Loss: 0.1795751452445984



Training..:  66%|██████▌   | 133/202 [01:05<00:33,  2.05it/s][A

Training Loss: 0.18505792319774628



Training..:  66%|██████▋   | 134/202 [01:06<00:33,  2.05it/s][A

Training Loss: 0.20438988506793976



Training..:  67%|██████▋   | 135/202 [01:06<00:32,  2.05it/s][A

Training Loss: 0.16734036803245544



Training..:  67%|██████▋   | 136/202 [01:07<00:32,  2.05it/s][A

Training Loss: 0.15873867273330688



Training..:  68%|██████▊   | 137/202 [01:07<00:31,  2.05it/s][A

Training Loss: 0.16336895525455475



Training..:  68%|██████▊   | 138/202 [01:08<00:31,  2.05it/s][A

Training Loss: 0.17119711637496948



Training..:  69%|██████▉   | 139/202 [01:08<00:30,  2.06it/s][A

Training Loss: 0.16643501818180084



Training..:  69%|██████▉   | 140/202 [01:09<00:30,  2.05it/s][A

Training Loss: 0.16962991654872894



Training..:  70%|██████▉   | 141/202 [01:09<00:29,  2.06it/s][A

Training Loss: 0.1508205235004425



Training..:  70%|███████   | 142/202 [01:10<00:29,  2.06it/s][A

Training Loss: 0.13375911116600037



Training..:  71%|███████   | 143/202 [01:10<00:28,  2.06it/s][A

Training Loss: 0.13967277109622955



Training..:  71%|███████▏  | 144/202 [01:11<00:28,  2.06it/s][A

Training Loss: 0.17850616574287415



Training..:  72%|███████▏  | 145/202 [01:11<00:27,  2.06it/s][A

Training Loss: 0.18236742913722992



Training..:  72%|███████▏  | 146/202 [01:11<00:27,  2.06it/s][A

Training Loss: 0.15687014162540436



Training..:  73%|███████▎  | 147/202 [01:12<00:26,  2.06it/s][A

Training Loss: 0.15930446982383728



Training..:  73%|███████▎  | 148/202 [01:12<00:26,  2.06it/s][A

Training Loss: 0.18206633627414703



Training..:  74%|███████▍  | 149/202 [01:13<00:25,  2.05it/s][A

Training Loss: 0.15499931573867798



Training..:  74%|███████▍  | 150/202 [01:13<00:25,  2.05it/s][A

Training Loss: 0.13138696551322937



Training..:  75%|███████▍  | 151/202 [01:14<00:24,  2.05it/s][A

Training Loss: 0.14451782405376434



Training..:  75%|███████▌  | 152/202 [01:14<00:24,  2.05it/s][A

Training Loss: 0.16466966271400452



Training..:  76%|███████▌  | 153/202 [01:15<00:23,  2.05it/s][A

Training Loss: 0.17543603479862213



Training..:  76%|███████▌  | 154/202 [01:15<00:23,  2.05it/s][A

Training Loss: 0.18540464341640472



Training..:  77%|███████▋  | 155/202 [01:16<00:22,  2.05it/s][A

Training Loss: 0.18867890536785126



Training..:  77%|███████▋  | 156/202 [01:16<00:22,  2.05it/s][A

Training Loss: 0.16715764999389648



Training..:  78%|███████▊  | 157/202 [01:17<00:21,  2.05it/s][A

Training Loss: 0.13036715984344482



Training..:  78%|███████▊  | 158/202 [01:17<00:21,  2.05it/s][A

Training Loss: 0.12192483246326447



Training..:  79%|███████▊  | 159/202 [01:18<00:20,  2.05it/s][A

Training Loss: 0.1182166263461113



Training..:  79%|███████▉  | 160/202 [01:18<00:20,  2.05it/s][A

Training Loss: 0.15247659385204315



Training..:  80%|███████▉  | 161/202 [01:19<00:20,  2.04it/s][A

Training Loss: 0.17207129299640656



Training..:  80%|████████  | 162/202 [01:19<00:19,  2.04it/s][A

Training Loss: 0.11706843227148056



Training..:  81%|████████  | 163/202 [01:20<00:19,  2.04it/s][A

Training Loss: 0.13909845054149628



Training..:  81%|████████  | 164/202 [01:20<00:18,  2.04it/s][A

Training Loss: 0.1257503777742386



Training..:  82%|████████▏ | 165/202 [01:21<00:18,  2.04it/s][A

Training Loss: 0.1506631225347519



Training..:  82%|████████▏ | 166/202 [01:21<00:17,  2.04it/s][A

Training Loss: 0.12669269740581512



Training..:  83%|████████▎ | 167/202 [01:22<00:17,  2.04it/s][A

Training Loss: 0.15434938669204712



Training..:  83%|████████▎ | 168/202 [01:22<00:16,  2.04it/s][A

Training Loss: 0.12632834911346436



Training..:  84%|████████▎ | 169/202 [01:23<00:16,  2.04it/s][A

Training Loss: 0.16985328495502472



Training..:  84%|████████▍ | 170/202 [01:23<00:15,  2.04it/s][A

Training Loss: 0.18640434741973877



Training..:  85%|████████▍ | 171/202 [01:24<00:15,  2.03it/s][A

Training Loss: 0.14659607410430908



Training..:  85%|████████▌ | 172/202 [01:24<00:14,  2.03it/s][A

Training Loss: 0.15698200464248657



Training..:  86%|████████▌ | 173/202 [01:25<00:14,  2.03it/s][A

Training Loss: 0.14045150578022003



Training..:  86%|████████▌ | 174/202 [01:25<00:13,  2.03it/s][A

Training Loss: 0.1256723701953888



Training..:  87%|████████▋ | 175/202 [01:26<00:13,  2.03it/s][A

Training Loss: 0.16651561856269836



Training..:  87%|████████▋ | 176/202 [01:26<00:12,  2.03it/s][A

Training Loss: 0.1519530713558197



Training..:  88%|████████▊ | 177/202 [01:27<00:12,  2.03it/s][A

Training Loss: 0.1320839375257492



Training..:  88%|████████▊ | 178/202 [01:27<00:11,  2.03it/s][A

Training Loss: 0.13419097661972046



Training..:  89%|████████▊ | 179/202 [01:28<00:11,  2.03it/s][A

Training Loss: 0.1369861364364624



Training..:  89%|████████▉ | 180/202 [01:28<00:10,  2.03it/s][A

Training Loss: 0.12601493299007416



Training..:  90%|████████▉ | 181/202 [01:29<00:10,  2.03it/s][A

Training Loss: 0.1261839121580124



Training..:  90%|█████████ | 182/202 [01:29<00:09,  2.03it/s][A

Training Loss: 0.14671678841114044



Training..:  91%|█████████ | 183/202 [01:30<00:09,  2.03it/s][A

Training Loss: 0.1327182501554489



Training..:  91%|█████████ | 184/202 [01:30<00:08,  2.03it/s][A

Training Loss: 0.12679032981395721



Training..:  92%|█████████▏| 185/202 [01:31<00:08,  2.03it/s][A

Training Loss: 0.12964612245559692



Training..:  92%|█████████▏| 186/202 [01:31<00:07,  2.03it/s][A

Training Loss: 0.12458294630050659



Training..:  93%|█████████▎| 187/202 [01:32<00:07,  2.03it/s][A

Training Loss: 0.14379724860191345



Training..:  93%|█████████▎| 188/202 [01:32<00:06,  2.03it/s][A

Training Loss: 0.21941883862018585



Training..:  94%|█████████▎| 189/202 [01:33<00:06,  2.02it/s][A

Training Loss: 0.14794376492500305



Training..:  94%|█████████▍| 190/202 [01:33<00:05,  2.03it/s][A

Training Loss: 0.09827662259340286



Training..:  95%|█████████▍| 191/202 [01:34<00:05,  2.03it/s][A

Training Loss: 0.1151118129491806



Training..:  95%|█████████▌| 192/202 [01:34<00:04,  2.03it/s][A

Training Loss: 0.09623011201620102



Training..:  96%|█████████▌| 193/202 [01:35<00:04,  2.03it/s][A

Training Loss: 0.1163177341222763



Training..:  96%|█████████▌| 194/202 [01:35<00:03,  2.02it/s][A

Training Loss: 0.12214352190494537



Training..:  97%|█████████▋| 195/202 [01:36<00:03,  2.02it/s][A

Training Loss: 0.11961586773395538



Training..:  97%|█████████▋| 196/202 [01:36<00:02,  2.03it/s][A

Training Loss: 0.12270403653383255



Training..:  98%|█████████▊| 197/202 [01:37<00:02,  2.02it/s][A

Training Loss: 0.10066527873277664



Training..:  98%|█████████▊| 198/202 [01:37<00:01,  2.02it/s][A

Training Loss: 0.10868676751852036



Training..:  99%|█████████▊| 199/202 [01:38<00:01,  2.02it/s][A

Training Loss: 0.11724640429019928



Training..:  99%|█████████▉| 200/202 [01:38<00:00,  2.01it/s][A

Training Loss: 0.1306128203868866



Training..: 100%|█████████▉| 201/202 [01:39<00:00,  2.02it/s][A

Training Loss: 0.18295368552207947



Training..: 100%|██████████| 202/202 [01:39<00:00,  2.03it/s]


Training Loss: 0.13944651186466217



Validating..:   0%|          | 0/26 [00:00<?, ?it/s][A
Validating..:   4%|▍         | 1/26 [00:00<00:03,  7.41it/s][A
Validating..:   8%|▊         | 2/26 [00:00<00:03,  7.12it/s][A
Validating..:  12%|█▏        | 3/26 [00:00<00:03,  6.80it/s][A
Validating..:  15%|█▌        | 4/26 [00:00<00:03,  6.68it/s][A
Validating..:  19%|█▉        | 5/26 [00:00<00:03,  6.69it/s][A
Validating..:  23%|██▎       | 6/26 [00:00<00:03,  6.63it/s][A
Validating..:  27%|██▋       | 7/26 [00:01<00:02,  6.66it/s][A
Validating..:  31%|███       | 8/26 [00:01<00:02,  6.74it/s][A
Validating..:  35%|███▍      | 9/26 [00:01<00:02,  6.79it/s][A
Validating..:  38%|███▊      | 10/26 [00:01<00:02,  6.76it/s][A
Validating..:  42%|████▏     | 11/26 [00:01<00:02,  6.74it/s][A
Validating..:  46%|████▌     | 12/26 [00:01<00:02,  6.74it/s][A
Validating..:  50%|█████     | 13/26 [00:01<00:01,  6.66it/s][A
Validating..:  54%|█████▍    | 14/26 [00:02<00:01,  6.69it/s][A
Validating..:  58%|█████▊    | 15/26 [00:0

Validation Loss: 0.10106786512411557
Epoch 2



Training..:   0%|          | 0/202 [00:00<?, ?it/s][A
Training..:   0%|          | 1/202 [00:00<01:37,  2.06it/s][A

Training Loss: 0.13255585730075836



Training..:   1%|          | 2/202 [00:00<01:38,  2.03it/s][A

Training Loss: 0.10033150762319565



Training..:   1%|▏         | 3/202 [00:01<01:38,  2.02it/s][A

Training Loss: 0.11352991312742233



Training..:   2%|▏         | 4/202 [00:01<01:37,  2.02it/s][A

Training Loss: 0.12889230251312256



Training..:   2%|▏         | 5/202 [00:02<01:38,  2.01it/s][A

Training Loss: 0.07465095072984695



Training..:   3%|▎         | 6/202 [00:02<01:36,  2.02it/s][A

Training Loss: 0.09854462742805481



Training..:   3%|▎         | 7/202 [00:03<01:36,  2.02it/s][A

Training Loss: 0.11367788910865784



Training..:   4%|▍         | 8/202 [00:03<01:36,  2.02it/s][A

Training Loss: 0.164931520819664



Training..:   4%|▍         | 9/202 [00:04<01:35,  2.02it/s][A

Training Loss: 0.09486793726682663



Training..:   5%|▍         | 10/202 [00:04<01:35,  2.01it/s][A

Training Loss: 0.11246809363365173



Training..:   5%|▌         | 11/202 [00:05<01:35,  2.01it/s][A

Training Loss: 0.09973352402448654



Training..:   6%|▌         | 12/202 [00:05<01:34,  2.01it/s][A

Training Loss: 0.10195034742355347



Training..:   6%|▋         | 13/202 [00:06<01:34,  2.01it/s][A

Training Loss: 0.1372835338115692



Training..:   7%|▋         | 14/202 [00:06<01:33,  2.01it/s][A

Training Loss: 0.12004843354225159



Training..:   7%|▋         | 15/202 [00:07<01:33,  2.00it/s][A

Training Loss: 0.08429661393165588



Training..:   8%|▊         | 16/202 [00:07<01:32,  2.00it/s][A

Training Loss: 0.10831460356712341



Training..:   8%|▊         | 17/202 [00:08<01:31,  2.01it/s][A

Training Loss: 0.10412734001874924



Training..:   9%|▉         | 18/202 [00:08<01:31,  2.01it/s][A

Training Loss: 0.12112365663051605



Training..:   9%|▉         | 19/202 [00:09<01:30,  2.02it/s][A

Training Loss: 0.112115278840065



Training..:  10%|▉         | 20/202 [00:09<01:30,  2.02it/s][A

Training Loss: 0.08604780584573746



Training..:  10%|█         | 21/202 [00:10<01:29,  2.02it/s][A

Training Loss: 0.12440820783376694



Training..:  11%|█         | 22/202 [00:10<01:29,  2.02it/s][A

Training Loss: 0.1619894951581955



Training..:  11%|█▏        | 23/202 [00:11<01:28,  2.01it/s][A

Training Loss: 0.08840683102607727



Training..:  12%|█▏        | 24/202 [00:11<01:28,  2.01it/s][A

Training Loss: 0.09868723899126053



Training..:  12%|█▏        | 25/202 [00:12<01:28,  2.01it/s][A

Training Loss: 0.08921337127685547



Training..:  13%|█▎        | 26/202 [00:12<01:27,  2.01it/s][A

Training Loss: 0.12527629733085632



Training..:  13%|█▎        | 27/202 [00:13<01:27,  2.01it/s][A

Training Loss: 0.11907841265201569



Training..:  14%|█▍        | 28/202 [00:13<01:26,  2.01it/s][A

Training Loss: 0.10598873347043991



Training..:  14%|█▍        | 29/202 [00:14<01:26,  2.01it/s][A

Training Loss: 0.07014697045087814



Training..:  15%|█▍        | 30/202 [00:14<01:25,  2.01it/s][A

Training Loss: 0.08192970603704453



Training..:  15%|█▌        | 31/202 [00:15<01:24,  2.01it/s][A

Training Loss: 0.1121867299079895



Training..:  16%|█▌        | 32/202 [00:15<01:24,  2.01it/s][A

Training Loss: 0.08652468025684357



Training..:  16%|█▋        | 33/202 [00:16<01:24,  2.01it/s][A

Training Loss: 0.08651942014694214



Training..:  17%|█▋        | 34/202 [00:16<01:23,  2.01it/s][A

Training Loss: 0.12468049675226212



Training..:  17%|█▋        | 35/202 [00:17<01:23,  2.01it/s][A

Training Loss: 0.0717010349035263



Training..:  18%|█▊        | 36/202 [00:17<01:22,  2.00it/s][A

Training Loss: 0.09676232933998108



Training..:  18%|█▊        | 37/202 [00:18<01:22,  2.00it/s][A

Training Loss: 0.11726566404104233



Training..:  19%|█▉        | 38/202 [00:18<01:21,  2.00it/s][A

Training Loss: 0.09468492865562439



Training..:  19%|█▉        | 39/202 [00:19<01:21,  2.00it/s][A

Training Loss: 0.09736175835132599



Training..:  20%|█▉        | 40/202 [00:19<01:21,  1.99it/s][A

Training Loss: 0.08368627727031708



Training..:  20%|██        | 41/202 [00:20<01:20,  2.00it/s][A

Training Loss: 0.09161201864480972



Training..:  21%|██        | 42/202 [00:20<01:20,  1.99it/s][A

Training Loss: 0.112314373254776



Training..:  21%|██▏       | 43/202 [00:21<01:19,  2.00it/s][A

Training Loss: 0.07995229214429855



Training..:  22%|██▏       | 44/202 [00:21<01:18,  2.00it/s][A

Training Loss: 0.10196007043123245



Training..:  22%|██▏       | 45/202 [00:22<01:18,  2.00it/s][A

Training Loss: 0.07579883188009262



Training..:  23%|██▎       | 46/202 [00:22<01:17,  2.01it/s][A

Training Loss: 0.10710595548152924



Training..:  23%|██▎       | 47/202 [00:23<01:17,  2.01it/s][A

Training Loss: 0.10509619116783142



Training..:  24%|██▍       | 48/202 [00:23<01:16,  2.01it/s][A

Training Loss: 0.10359375923871994



Training..:  24%|██▍       | 49/202 [00:24<01:16,  2.00it/s][A

Training Loss: 0.09554392099380493



Training..:  25%|██▍       | 50/202 [00:24<01:15,  2.00it/s][A

Training Loss: 0.08875808864831924



Training..:  25%|██▌       | 51/202 [00:25<01:15,  2.00it/s][A

Training Loss: 0.08421377092599869



Training..:  26%|██▌       | 52/202 [00:25<01:15,  2.00it/s][A

Training Loss: 0.08610968291759491



Training..:  26%|██▌       | 53/202 [00:26<01:14,  1.99it/s][A

Training Loss: 0.13003602623939514



Training..:  27%|██▋       | 54/202 [00:26<01:14,  1.98it/s][A

Training Loss: 0.10645373165607452



Training..:  27%|██▋       | 55/202 [00:27<01:14,  1.98it/s][A

Training Loss: 0.09151594340801239



Training..:  28%|██▊       | 56/202 [00:27<01:13,  1.98it/s][A

Training Loss: 0.10317876189947128



Training..:  28%|██▊       | 57/202 [00:28<01:13,  1.98it/s][A

Training Loss: 0.07957639545202255



Training..:  29%|██▊       | 58/202 [00:28<01:12,  1.98it/s][A

Training Loss: 0.11827234923839569



Training..:  29%|██▉       | 59/202 [00:29<01:12,  1.98it/s][A

Training Loss: 0.07508457452058792



Training..:  30%|██▉       | 60/202 [00:29<01:11,  1.97it/s][A

Training Loss: 0.08321680128574371



Training..:  30%|███       | 61/202 [00:30<01:11,  1.96it/s][A

Training Loss: 0.11987761408090591



Training..:  31%|███       | 62/202 [00:30<01:11,  1.94it/s][A

Training Loss: 0.1000344529747963



Training..:  31%|███       | 63/202 [00:31<01:11,  1.96it/s][A

Training Loss: 0.10255151987075806



Training..:  32%|███▏      | 64/202 [00:31<01:10,  1.96it/s][A

Training Loss: 0.0630822479724884



Training..:  32%|███▏      | 65/202 [00:32<01:09,  1.97it/s][A

Training Loss: 0.07174241542816162



Training..:  33%|███▎      | 66/202 [00:33<01:09,  1.97it/s][A

Training Loss: 0.09458503872156143



Training..:  33%|███▎      | 67/202 [00:33<01:08,  1.97it/s][A

Training Loss: 0.08798833936452866



Training..:  34%|███▎      | 68/202 [00:34<01:08,  1.95it/s][A

Training Loss: 0.08545155823230743



Training..:  34%|███▍      | 69/202 [00:34<01:07,  1.96it/s][A

Training Loss: 0.06929567456245422



Training..:  35%|███▍      | 70/202 [00:35<01:06,  1.98it/s][A

Training Loss: 0.08051545917987823



Training..:  35%|███▌      | 71/202 [00:35<01:06,  1.98it/s][A

Training Loss: 0.07136468589305878



Training..:  36%|███▌      | 72/202 [00:36<01:05,  1.99it/s][A

Training Loss: 0.08793777972459793



Training..:  36%|███▌      | 73/202 [00:36<01:04,  2.00it/s][A

Training Loss: 0.06646855920553207



Training..:  37%|███▋      | 74/202 [00:37<01:04,  1.99it/s][A

Training Loss: 0.0975390076637268



Training..:  37%|███▋      | 75/202 [00:37<01:03,  1.99it/s][A

Training Loss: 0.08854268491268158



Training..:  38%|███▊      | 76/202 [00:38<01:03,  2.00it/s][A

Training Loss: 0.04501715674996376



Training..:  38%|███▊      | 77/202 [00:38<01:02,  1.99it/s][A

Training Loss: 0.07960689067840576



Training..:  39%|███▊      | 78/202 [00:39<01:02,  1.99it/s][A

Training Loss: 0.07544316351413727



Training..:  39%|███▉      | 79/202 [00:39<01:01,  2.00it/s][A

Training Loss: 0.051065973937511444



Training..:  40%|███▉      | 80/202 [00:40<01:01,  2.00it/s][A

Training Loss: 0.09891531616449356



Training..:  40%|████      | 81/202 [00:40<01:00,  2.00it/s][A

Training Loss: 0.07828779518604279



Training..:  41%|████      | 82/202 [00:41<01:00,  2.00it/s][A

Training Loss: 0.055634625256061554



Training..:  41%|████      | 83/202 [00:41<00:59,  2.00it/s][A

Training Loss: 0.09697026759386063



Training..:  42%|████▏     | 84/202 [00:42<00:59,  2.00it/s][A

Training Loss: 0.07768310606479645



Training..:  42%|████▏     | 85/202 [00:42<00:58,  1.99it/s][A

Training Loss: 0.05724826082587242



Training..:  43%|████▎     | 86/202 [00:43<00:58,  1.99it/s][A

Training Loss: 0.08151651918888092



Training..:  43%|████▎     | 87/202 [00:43<00:57,  2.00it/s][A

Training Loss: 0.08223838359117508



Training..:  44%|████▎     | 88/202 [00:44<00:57,  2.00it/s][A

Training Loss: 0.07240287214517593



Training..:  44%|████▍     | 89/202 [00:44<00:56,  1.99it/s][A

Training Loss: 0.08313468098640442



Training..:  45%|████▍     | 90/202 [00:45<00:56,  1.99it/s][A

Training Loss: 0.06895651668310165



Training..:  45%|████▌     | 91/202 [00:45<00:56,  1.98it/s][A

Training Loss: 0.06757375597953796



Training..:  46%|████▌     | 92/202 [00:46<00:55,  1.98it/s][A

Training Loss: 0.06260323524475098



Training..:  46%|████▌     | 93/202 [00:46<00:55,  1.98it/s][A

Training Loss: 0.059231407940387726



Training..:  47%|████▋     | 94/202 [00:47<00:54,  1.98it/s][A

Training Loss: 0.055824361741542816



Training..:  47%|████▋     | 95/202 [00:47<00:53,  1.99it/s][A

Training Loss: 0.07205051183700562



Training..:  48%|████▊     | 96/202 [00:48<00:53,  1.99it/s][A

Training Loss: 0.06020212545990944



Training..:  48%|████▊     | 97/202 [00:48<00:52,  1.99it/s][A

Training Loss: 0.06620911508798599



Training..:  49%|████▊     | 98/202 [00:49<00:52,  1.99it/s][A

Training Loss: 0.0636269822716713



Training..:  49%|████▉     | 99/202 [00:49<00:51,  1.99it/s][A

Training Loss: 0.11122491955757141



Training..:  50%|████▉     | 100/202 [00:50<00:51,  1.99it/s][A

Training Loss: 0.07162352651357651



Training..:  50%|█████     | 101/202 [00:50<00:50,  1.99it/s][A

Training Loss: 0.04058932885527611



Training..:  50%|█████     | 102/202 [00:51<00:50,  1.99it/s][A

Training Loss: 0.05563054233789444



Training..:  51%|█████     | 103/202 [00:51<00:49,  1.99it/s][A

Training Loss: 0.055164776742458344



Training..:  51%|█████▏    | 104/202 [00:52<00:49,  1.99it/s][A

Training Loss: 0.12521211802959442



Training..:  52%|█████▏    | 105/202 [00:52<00:48,  1.98it/s][A

Training Loss: 0.07249268144369125



Training..:  52%|█████▏    | 106/202 [00:53<00:48,  1.99it/s][A

Training Loss: 0.09018947929143906



Training..:  53%|█████▎    | 107/202 [00:53<00:48,  1.94it/s][A

Training Loss: 0.05622377619147301



Training..:  53%|█████▎    | 108/202 [00:54<00:48,  1.93it/s][A

Training Loss: 0.09819278120994568



Training..:  54%|█████▍    | 109/202 [00:54<00:47,  1.95it/s][A

Training Loss: 0.0581674724817276



Training..:  54%|█████▍    | 110/202 [00:55<00:46,  1.96it/s][A

Training Loss: 0.08020041882991791



Training..:  55%|█████▍    | 111/202 [00:55<00:46,  1.97it/s][A

Training Loss: 0.07896361500024796



Training..:  55%|█████▌    | 112/202 [00:56<00:45,  1.98it/s][A

Training Loss: 0.0824645534157753



Training..:  56%|█████▌    | 113/202 [00:56<00:45,  1.97it/s][A

Training Loss: 0.0511476993560791



Training..:  56%|█████▋    | 114/202 [00:57<00:44,  1.96it/s][A

Training Loss: 0.08210176974534988



Training..:  57%|█████▋    | 115/202 [00:57<00:44,  1.96it/s][A

Training Loss: 0.06007888913154602



Training..:  57%|█████▋    | 116/202 [00:58<00:44,  1.93it/s][A

Training Loss: 0.06283605098724365



Training..:  58%|█████▊    | 117/202 [00:58<00:44,  1.93it/s][A

Training Loss: 0.083583764731884



Training..:  58%|█████▊    | 118/202 [00:59<00:43,  1.92it/s][A

Training Loss: 0.0668286681175232



Training..:  59%|█████▉    | 119/202 [00:59<00:43,  1.91it/s][A

Training Loss: 0.05680239200592041



Training..:  59%|█████▉    | 120/202 [01:00<00:42,  1.94it/s][A

Training Loss: 0.05377020686864853



Training..:  60%|█████▉    | 121/202 [01:00<00:41,  1.95it/s][A

Training Loss: 0.05853186547756195



Training..:  60%|██████    | 122/202 [01:01<00:41,  1.95it/s][A

Training Loss: 0.05149461328983307



Training..:  61%|██████    | 123/202 [01:01<00:40,  1.94it/s][A

Training Loss: 0.059151917695999146



Training..:  61%|██████▏   | 124/202 [01:02<00:40,  1.92it/s][A

Training Loss: 0.049608923494815826



Training..:  62%|██████▏   | 125/202 [01:02<00:40,  1.90it/s][A

Training Loss: 0.053170934319496155



Training..:  62%|██████▏   | 126/202 [01:03<00:39,  1.91it/s][A

Training Loss: 0.07896536588668823



Training..:  63%|██████▎   | 127/202 [01:03<00:39,  1.92it/s][A

Training Loss: 0.07192271202802658



Training..:  63%|██████▎   | 128/202 [01:04<00:38,  1.93it/s][A

Training Loss: 0.059679143130779266



Training..:  64%|██████▍   | 129/202 [01:05<00:37,  1.93it/s][A

Training Loss: 0.07050062716007233



Training..:  64%|██████▍   | 130/202 [01:05<00:37,  1.93it/s][A

Training Loss: 0.060384709388017654



Training..:  65%|██████▍   | 131/202 [01:06<00:36,  1.94it/s][A

Training Loss: 0.08257370442152023



Training..:  65%|██████▌   | 132/202 [01:06<00:36,  1.93it/s][A

Training Loss: 0.07428596168756485



Training..:  66%|██████▌   | 133/202 [01:07<00:35,  1.93it/s][A

Training Loss: 0.08071889728307724



Training..:  66%|██████▋   | 134/202 [01:07<00:35,  1.93it/s][A

Training Loss: 0.09145129472017288



Training..:  67%|██████▋   | 135/202 [01:08<00:34,  1.94it/s][A

Training Loss: 0.051565054804086685



Training..:  67%|██████▋   | 136/202 [01:08<00:33,  1.94it/s][A

Training Loss: 0.06607529520988464



Training..:  68%|██████▊   | 137/202 [01:09<00:33,  1.95it/s][A

Training Loss: 0.05912935733795166



Training..:  68%|██████▊   | 138/202 [01:09<00:32,  1.96it/s][A

Training Loss: 0.06834346055984497



Training..:  69%|██████▉   | 139/202 [01:10<00:32,  1.95it/s][A

Training Loss: 0.05891089513897896



Training..:  69%|██████▉   | 140/202 [01:10<00:33,  1.85it/s][A

Training Loss: 0.0620771162211895



Training..:  70%|██████▉   | 141/202 [01:11<00:32,  1.85it/s][A

Training Loss: 0.051495593041181564



Training..:  70%|███████   | 142/202 [01:11<00:31,  1.88it/s][A

Training Loss: 0.04936930164694786



Training..:  71%|███████   | 143/202 [01:12<00:31,  1.90it/s][A

Training Loss: 0.057203538715839386



Training..:  71%|███████▏  | 144/202 [01:12<00:30,  1.92it/s][A

Training Loss: 0.07775717973709106



Training..:  72%|███████▏  | 145/202 [01:13<00:29,  1.93it/s][A

Training Loss: 0.09885352104902267



Training..:  72%|███████▏  | 146/202 [01:13<00:28,  1.95it/s][A

Training Loss: 0.06829319149255753



Training..:  73%|███████▎  | 147/202 [01:14<00:28,  1.95it/s][A

Training Loss: 0.06490540504455566



Training..:  73%|███████▎  | 148/202 [01:14<00:27,  1.96it/s][A

Training Loss: 0.07151348888874054



Training..:  74%|███████▍  | 149/202 [01:15<00:27,  1.96it/s][A

Training Loss: 0.07242513447999954



Training..:  74%|███████▍  | 150/202 [01:15<00:26,  1.96it/s][A

Training Loss: 0.0628838911652565



Training..:  75%|███████▍  | 151/202 [01:16<00:26,  1.96it/s][A

Training Loss: 0.05587756633758545



Training..:  75%|███████▌  | 152/202 [01:16<00:25,  1.96it/s][A

Training Loss: 0.0633598193526268



Training..:  76%|███████▌  | 153/202 [01:17<00:24,  1.96it/s][A

Training Loss: 0.061981409788131714



Training..:  76%|███████▌  | 154/202 [01:17<00:24,  1.96it/s][A

Training Loss: 0.11394762247800827



Training..:  77%|███████▋  | 155/202 [01:18<00:23,  1.96it/s][A

Training Loss: 0.10064087808132172



Training..:  77%|███████▋  | 156/202 [01:18<00:23,  1.97it/s][A

Training Loss: 0.06988757103681564



Training..:  78%|███████▊  | 157/202 [01:19<00:22,  1.97it/s][A

Training Loss: 0.048106539994478226



Training..:  78%|███████▊  | 158/202 [01:19<00:22,  1.97it/s][A

Training Loss: 0.051640164107084274



Training..:  79%|███████▊  | 159/202 [01:20<00:21,  1.97it/s][A

Training Loss: 0.06764334440231323



Training..:  79%|███████▉  | 160/202 [01:20<00:21,  1.97it/s][A

Training Loss: 0.05899607762694359



Training..:  80%|███████▉  | 161/202 [01:21<00:20,  1.97it/s][A

Training Loss: 0.08646034449338913



Training..:  80%|████████  | 162/202 [01:21<00:20,  1.97it/s][A

Training Loss: 0.046816445887088776



Training..:  81%|████████  | 163/202 [01:22<00:19,  1.97it/s][A

Training Loss: 0.05234504118561745



Training..:  81%|████████  | 164/202 [01:22<00:19,  1.97it/s][A

Training Loss: 0.04996193200349808



Training..:  82%|████████▏ | 165/202 [01:23<00:18,  1.96it/s][A

Training Loss: 0.06284582614898682



Training..:  82%|████████▏ | 166/202 [01:24<00:18,  1.97it/s][A

Training Loss: 0.04133198782801628



Training..:  83%|████████▎ | 167/202 [01:24<00:17,  1.97it/s][A

Training Loss: 0.06095809489488602



Training..:  83%|████████▎ | 168/202 [01:25<00:17,  1.96it/s][A

Training Loss: 0.04641442745923996



Training..:  84%|████████▎ | 169/202 [01:25<00:16,  1.96it/s][A

Training Loss: 0.07110058516263962



Training..:  84%|████████▍ | 170/202 [01:26<00:16,  1.96it/s][A

Training Loss: 0.13043604791164398



Training..:  85%|████████▍ | 171/202 [01:26<00:15,  1.96it/s][A

Training Loss: 0.05720753222703934



Training..:  85%|████████▌ | 172/202 [01:27<00:15,  1.97it/s][A

Training Loss: 0.07143781334161758



Training..:  86%|████████▌ | 173/202 [01:27<00:14,  1.96it/s][A

Training Loss: 0.05576995387673378



Training..:  86%|████████▌ | 174/202 [01:28<00:14,  1.96it/s][A

Training Loss: 0.05693840980529785



Training..:  87%|████████▋ | 175/202 [01:28<00:13,  1.96it/s][A

Training Loss: 0.09588153660297394



Training..:  87%|████████▋ | 176/202 [01:29<00:13,  1.97it/s][A

Training Loss: 0.06683654338121414



Training..:  88%|████████▊ | 177/202 [01:29<00:12,  1.96it/s][A

Training Loss: 0.060700900852680206



Training..:  88%|████████▊ | 178/202 [01:30<00:12,  1.97it/s][A

Training Loss: 0.05650237202644348



Training..:  89%|████████▊ | 179/202 [01:30<00:11,  1.97it/s][A

Training Loss: 0.05874926224350929



Training..:  89%|████████▉ | 180/202 [01:31<00:11,  1.97it/s][A

Training Loss: 0.06008944660425186



Training..:  90%|████████▉ | 181/202 [01:31<00:10,  1.97it/s][A

Training Loss: 0.05036274716258049



Training..:  90%|█████████ | 182/202 [01:32<00:10,  1.97it/s][A

Training Loss: 0.06884729862213135



Training..:  91%|█████████ | 183/202 [01:32<00:09,  1.98it/s][A

Training Loss: 0.03886999934911728



Training..:  91%|█████████ | 184/202 [01:33<00:09,  1.97it/s][A

Training Loss: 0.05416995659470558



Training..:  92%|█████████▏| 185/202 [01:33<00:08,  1.97it/s][A

Training Loss: 0.05934532731771469



Training..:  92%|█████████▏| 186/202 [01:34<00:08,  1.97it/s][A

Training Loss: 0.05181758850812912



Training..:  93%|█████████▎| 187/202 [01:34<00:07,  1.97it/s][A

Training Loss: 0.05390496551990509



Training..:  93%|█████████▎| 188/202 [01:35<00:07,  1.96it/s][A

Training Loss: 0.16804999113082886



Training..:  94%|█████████▎| 189/202 [01:35<00:06,  1.96it/s][A

Training Loss: 0.0776621475815773



Training..:  94%|█████████▍| 190/202 [01:36<00:06,  1.96it/s][A

Training Loss: 0.031584084033966064



Training..:  95%|█████████▍| 191/202 [01:36<00:05,  1.95it/s][A

Training Loss: 0.032656412571668625



Training..:  95%|█████████▌| 192/202 [01:37<00:05,  1.95it/s][A

Training Loss: 0.04970332235097885



Training..:  96%|█████████▌| 193/202 [01:37<00:04,  1.96it/s][A

Training Loss: 0.059345658868551254



Training..:  96%|█████████▌| 194/202 [01:38<00:04,  1.96it/s][A

Training Loss: 0.040366075932979584



Training..:  97%|█████████▋| 195/202 [01:38<00:03,  1.96it/s][A

Training Loss: 0.05115971341729164



Training..:  97%|█████████▋| 196/202 [01:39<00:03,  1.96it/s][A

Training Loss: 0.05926497280597687



Training..:  98%|█████████▊| 197/202 [01:39<00:02,  1.96it/s][A

Training Loss: 0.032695140689611435



Training..:  98%|█████████▊| 198/202 [01:40<00:02,  1.96it/s][A

Training Loss: 0.029613072052598



Training..:  99%|█████████▊| 199/202 [01:40<00:01,  1.96it/s][A

Training Loss: 0.04435290768742561



Training..:  99%|█████████▉| 200/202 [01:41<00:01,  1.96it/s][A

Training Loss: 0.0750633105635643



Training..: 100%|█████████▉| 201/202 [01:41<00:00,  1.96it/s][A

Training Loss: 0.08494839072227478



Training..: 100%|██████████| 202/202 [01:42<00:00,  1.98it/s]


Training Loss: 0.046618204563856125



Validating..:   0%|          | 0/26 [00:00<?, ?it/s][A
Validating..:   4%|▍         | 1/26 [00:00<00:03,  7.22it/s][A
Validating..:   8%|▊         | 2/26 [00:00<00:03,  6.86it/s][A
Validating..:  12%|█▏        | 3/26 [00:00<00:03,  6.48it/s][A
Validating..:  15%|█▌        | 4/26 [00:00<00:03,  6.52it/s][A
Validating..:  19%|█▉        | 5/26 [00:00<00:03,  6.54it/s][A
Validating..:  23%|██▎       | 6/26 [00:00<00:03,  6.42it/s][A
Validating..:  27%|██▋       | 7/26 [00:01<00:02,  6.49it/s][A
Validating..:  31%|███       | 8/26 [00:01<00:02,  6.53it/s][A
Validating..:  35%|███▍      | 9/26 [00:01<00:02,  6.52it/s][A
Validating..:  38%|███▊      | 10/26 [00:01<00:02,  6.47it/s][A
Validating..:  42%|████▏     | 11/26 [00:01<00:02,  6.48it/s][A
Validating..:  46%|████▌     | 12/26 [00:01<00:02,  6.42it/s][A
Validating..:  50%|█████     | 13/26 [00:02<00:02,  6.39it/s][A
Validating..:  54%|█████▍    | 14/26 [00:02<00:01,  6.40it/s][A
Validating..:  58%|█████▊    | 15/26 [00:0

Validation Loss: 0.03946529386135248
Epoch 3



Training..:   0%|          | 0/202 [00:00<?, ?it/s][A
Training..:   0%|          | 1/202 [00:00<01:40,  2.00it/s][A

Training Loss: 0.07787132263183594



Training..:   1%|          | 2/202 [00:01<01:41,  1.98it/s][A

Training Loss: 0.047682926058769226



Training..:   1%|▏         | 3/202 [00:01<01:41,  1.96it/s][A

Training Loss: 0.052820321172475815



Training..:   2%|▏         | 4/202 [00:02<01:41,  1.95it/s][A

Training Loss: 0.07516667991876602



Training..:   2%|▏         | 5/202 [00:02<01:41,  1.94it/s][A

Training Loss: 0.037631306797266006



Training..:   3%|▎         | 6/202 [00:03<01:41,  1.93it/s][A

Training Loss: 0.044619347900152206



Training..:   3%|▎         | 7/202 [00:03<01:41,  1.93it/s][A

Training Loss: 0.04359079897403717



Training..:   4%|▍         | 8/202 [00:04<01:39,  1.94it/s][A

Training Loss: 0.08242549002170563



Training..:   4%|▍         | 9/202 [00:04<01:39,  1.95it/s][A

Training Loss: 0.03286716341972351



Training..:   5%|▍         | 10/202 [00:05<01:38,  1.95it/s][A

Training Loss: 0.047656767070293427



Training..:   5%|▌         | 11/202 [00:05<01:38,  1.95it/s][A

Training Loss: 0.05151256173849106



Training..:   6%|▌         | 12/202 [00:06<01:37,  1.95it/s][A

Training Loss: 0.04562356695532799



Training..:   6%|▋         | 13/202 [00:06<01:36,  1.95it/s][A

Training Loss: 0.07873456180095673



Training..:   7%|▋         | 14/202 [00:07<01:36,  1.96it/s][A

Training Loss: 0.050849687308073044



Training..:   7%|▋         | 15/202 [00:07<01:35,  1.96it/s][A

Training Loss: 0.03203209117054939



Training..:   8%|▊         | 16/202 [00:08<01:35,  1.95it/s][A

Training Loss: 0.06293165683746338



Training..:   8%|▊         | 17/202 [00:08<01:34,  1.95it/s][A

Training Loss: 0.05272969603538513



Training..:   9%|▉         | 18/202 [00:09<01:34,  1.95it/s][A

Training Loss: 0.061133988201618195



Training..:   9%|▉         | 19/202 [00:09<01:33,  1.95it/s][A

Training Loss: 0.05552968010306358



Training..:  10%|▉         | 20/202 [00:10<01:33,  1.95it/s][A

Training Loss: 0.0382956862449646



Training..:  10%|█         | 21/202 [00:10<01:32,  1.95it/s][A

Training Loss: 0.062366779893636703



Training..:  11%|█         | 22/202 [00:11<01:32,  1.95it/s][A

Training Loss: 0.09892826527357101



Training..:  11%|█▏        | 23/202 [00:11<01:31,  1.95it/s][A

Training Loss: 0.055634185671806335



Training..:  12%|█▏        | 24/202 [00:12<01:31,  1.95it/s][A

Training Loss: 0.043457452207803726



Training..:  12%|█▏        | 25/202 [00:12<01:30,  1.95it/s][A

Training Loss: 0.0509960874915123



Training..:  13%|█▎        | 26/202 [00:13<01:30,  1.95it/s][A

Training Loss: 0.08550743013620377



Training..:  13%|█▎        | 27/202 [00:13<01:29,  1.95it/s][A

Training Loss: 0.0534384585916996



Training..:  14%|█▍        | 28/202 [00:14<01:28,  1.96it/s][A

Training Loss: 0.048961859196424484



Training..:  14%|█▍        | 29/202 [00:14<01:28,  1.95it/s][A

Training Loss: 0.024532396346330643



Training..:  15%|█▍        | 30/202 [00:15<01:28,  1.95it/s][A

Training Loss: 0.04898466542363167



Training..:  15%|█▌        | 31/202 [00:15<01:27,  1.95it/s][A

Training Loss: 0.033527180552482605



Training..:  16%|█▌        | 32/202 [00:16<01:27,  1.95it/s][A

Training Loss: 0.04359437897801399



Training..:  16%|█▋        | 33/202 [00:16<01:26,  1.95it/s][A

Training Loss: 0.054959483444690704



Training..:  17%|█▋        | 34/202 [00:17<01:26,  1.95it/s][A

Training Loss: 0.04574205353856087



Training..:  17%|█▋        | 35/202 [00:17<01:25,  1.95it/s][A

Training Loss: 0.03257818892598152



Training..:  18%|█▊        | 36/202 [00:18<01:25,  1.95it/s][A

Training Loss: 0.048936981707811356



Training..:  18%|█▊        | 37/202 [00:18<01:24,  1.95it/s][A

Training Loss: 0.08682186901569366



Training..:  19%|█▉        | 38/202 [00:19<01:24,  1.95it/s][A

Training Loss: 0.04386032372713089



Training..:  19%|█▉        | 39/202 [00:19<01:23,  1.95it/s][A

Training Loss: 0.03993997722864151



Training..:  20%|█▉        | 40/202 [00:20<01:23,  1.95it/s][A

Training Loss: 0.04465110972523689



Training..:  20%|██        | 41/202 [00:21<01:22,  1.95it/s][A

Training Loss: 0.04328850656747818



Training..:  21%|██        | 42/202 [00:21<01:21,  1.95it/s][A

Training Loss: 0.06677770614624023



Training..:  21%|██▏       | 43/202 [00:22<01:21,  1.95it/s][A

Training Loss: 0.04036759212613106



Training..:  22%|██▏       | 44/202 [00:22<01:20,  1.95it/s][A

Training Loss: 0.04765469580888748



Training..:  22%|██▏       | 45/202 [00:23<01:20,  1.95it/s][A

Training Loss: 0.043219633400440216



Training..:  23%|██▎       | 46/202 [00:23<01:19,  1.95it/s][A

Training Loss: 0.044265735894441605



Training..:  23%|██▎       | 47/202 [00:24<01:19,  1.95it/s][A

Training Loss: 0.05431854724884033



Training..:  24%|██▍       | 48/202 [00:24<01:18,  1.95it/s][A

Training Loss: 0.06682675331830978



Training..:  24%|██▍       | 49/202 [00:25<01:18,  1.95it/s][A

Training Loss: 0.04499345272779465



Training..:  25%|██▍       | 50/202 [00:25<01:17,  1.96it/s][A

Training Loss: 0.05674907937645912



Training..:  25%|██▌       | 51/202 [00:26<01:17,  1.95it/s][A

Training Loss: 0.057420771569013596



Training..:  26%|██▌       | 52/202 [00:26<01:16,  1.95it/s][A

Training Loss: 0.032778870314359665



Training..:  26%|██▌       | 53/202 [00:27<01:16,  1.95it/s][A

Training Loss: 0.0871385931968689



Training..:  27%|██▋       | 54/202 [00:27<01:15,  1.95it/s][A

Training Loss: 0.052247513085603714



Training..:  27%|██▋       | 55/202 [00:28<01:15,  1.95it/s][A

Training Loss: 0.04051419720053673



Training..:  28%|██▊       | 56/202 [00:28<01:15,  1.94it/s][A

Training Loss: 0.05138407647609711



Training..:  28%|██▊       | 57/202 [00:29<01:14,  1.94it/s][A

Training Loss: 0.04432852193713188



Training..:  29%|██▊       | 58/202 [00:29<01:14,  1.94it/s][A

Training Loss: 0.06678509712219238



Training..:  29%|██▉       | 59/202 [00:30<01:13,  1.94it/s][A

Training Loss: 0.028148185461759567



Training..:  30%|██▉       | 60/202 [00:30<01:12,  1.95it/s][A

Training Loss: 0.04684248939156532



Training..:  30%|███       | 61/202 [00:31<01:12,  1.95it/s][A

Training Loss: 0.07253319770097733



Training..:  31%|███       | 62/202 [00:31<01:11,  1.95it/s][A

Training Loss: 0.05236560478806496



Training..:  31%|███       | 63/202 [00:32<01:11,  1.95it/s][A

Training Loss: 0.06565152108669281



Training..:  32%|███▏      | 64/202 [00:32<01:10,  1.95it/s][A

Training Loss: 0.03611580282449722



Training..:  32%|███▏      | 65/202 [00:33<01:10,  1.95it/s][A

Training Loss: 0.02659350447356701



Training..:  33%|███▎      | 66/202 [00:33<01:09,  1.95it/s][A

Training Loss: 0.06584583222866058



Training..:  33%|███▎      | 67/202 [00:34<01:09,  1.95it/s][A

Training Loss: 0.04289812222123146



Training..:  34%|███▎      | 68/202 [00:34<01:08,  1.95it/s][A

Training Loss: 0.049634940922260284



Training..:  34%|███▍      | 69/202 [00:35<01:08,  1.95it/s][A

Training Loss: 0.03796176239848137



Training..:  35%|███▍      | 70/202 [00:35<01:07,  1.95it/s][A

Training Loss: 0.042622316628694534



Training..:  35%|███▌      | 71/202 [00:36<01:07,  1.95it/s][A

Training Loss: 0.03870029002428055



Training..:  36%|███▌      | 72/202 [00:36<01:06,  1.95it/s][A

Training Loss: 0.044359080493450165



Training..:  36%|███▌      | 73/202 [00:37<01:06,  1.95it/s][A

Training Loss: 0.03394061326980591



Training..:  37%|███▋      | 74/202 [00:37<01:05,  1.95it/s][A

Training Loss: 0.04881623014807701



Training..:  37%|███▋      | 75/202 [00:38<01:05,  1.94it/s][A

Training Loss: 0.051512762904167175



Training..:  38%|███▊      | 76/202 [00:38<01:04,  1.94it/s][A

Training Loss: 0.045201171189546585



Training..:  38%|███▊      | 77/202 [00:39<01:04,  1.94it/s][A

Training Loss: 0.039300378412008286



Training..:  39%|███▊      | 78/202 [00:40<01:03,  1.95it/s][A

Training Loss: 0.04869239777326584



Training..:  39%|███▉      | 79/202 [00:40<01:03,  1.94it/s][A

Training Loss: 0.03525392338633537



Training..:  40%|███▉      | 80/202 [00:41<01:02,  1.94it/s][A

Training Loss: 0.04285985976457596



Training..:  40%|████      | 81/202 [00:41<01:02,  1.94it/s][A

Training Loss: 0.028835853561758995



Training..:  41%|████      | 82/202 [00:42<01:01,  1.94it/s][A

Training Loss: 0.028306271880865097



Training..:  41%|████      | 83/202 [00:42<01:01,  1.94it/s][A

Training Loss: 0.05053499713540077



Training..:  42%|████▏     | 84/202 [00:43<01:00,  1.94it/s][A

Training Loss: 0.03995545953512192



Training..:  42%|████▏     | 85/202 [00:43<01:00,  1.94it/s][A

Training Loss: 0.024496739730238914



Training..:  43%|████▎     | 86/202 [00:44<00:59,  1.95it/s][A

Training Loss: 0.051150817424058914



Training..:  43%|████▎     | 87/202 [00:44<00:59,  1.94it/s][A

Training Loss: 0.0391845628619194



Training..:  44%|████▎     | 88/202 [00:45<00:58,  1.94it/s][A

Training Loss: 0.0294567309319973



Training..:  44%|████▍     | 89/202 [00:45<00:58,  1.94it/s][A

Training Loss: 0.04744419828057289



Training..:  45%|████▍     | 90/202 [00:46<00:57,  1.94it/s][A

Training Loss: 0.03640706092119217



Training..:  45%|████▌     | 91/202 [00:46<00:57,  1.94it/s][A

Training Loss: 0.039628300815820694



Training..:  46%|████▌     | 92/202 [00:47<00:56,  1.94it/s][A

Training Loss: 0.033646658062934875



Training..:  46%|████▌     | 93/202 [00:47<00:56,  1.94it/s][A

Training Loss: 0.03437233716249466



Training..:  47%|████▋     | 94/202 [00:48<00:55,  1.94it/s][A

Training Loss: 0.03101184219121933



Training..:  47%|████▋     | 95/202 [00:48<00:55,  1.94it/s][A

Training Loss: 0.03919664025306702



Training..:  48%|████▊     | 96/202 [00:49<00:54,  1.94it/s][A

Training Loss: 0.03263537958264351



Training..:  48%|████▊     | 97/202 [00:49<00:53,  1.94it/s][A

Training Loss: 0.03965083509683609



Training..:  49%|████▊     | 98/202 [00:50<00:53,  1.94it/s][A

Training Loss: 0.03020840510725975



Training..:  49%|████▉     | 99/202 [00:50<00:52,  1.95it/s][A

Training Loss: 0.07606187462806702



Training..:  50%|████▉     | 100/202 [00:51<00:52,  1.94it/s][A

Training Loss: 0.038639701902866364



Training..:  50%|█████     | 101/202 [00:51<00:51,  1.95it/s][A

Training Loss: 0.02983921393752098



Training..:  50%|█████     | 102/202 [00:52<00:51,  1.95it/s][A

Training Loss: 0.039436180144548416



Training..:  51%|█████     | 103/202 [00:52<00:50,  1.95it/s][A

Training Loss: 0.0330275259912014



Training..:  51%|█████▏    | 104/202 [00:53<00:50,  1.94it/s][A

Training Loss: 0.07083448022603989



Training..:  52%|█████▏    | 105/202 [00:53<00:50,  1.94it/s][A

Training Loss: 0.031870126724243164



Training..:  52%|█████▏    | 106/202 [00:54<00:49,  1.94it/s][A

Training Loss: 0.060007788240909576



Training..:  53%|█████▎    | 107/202 [00:54<00:49,  1.94it/s][A

Training Loss: 0.02622789517045021



Training..:  53%|█████▎    | 108/202 [00:55<00:48,  1.94it/s][A

Training Loss: 0.0473497100174427



Training..:  54%|█████▍    | 109/202 [00:55<00:47,  1.94it/s][A

Training Loss: 0.04343603551387787



Training..:  54%|█████▍    | 110/202 [00:56<00:47,  1.94it/s][A

Training Loss: 0.058644965291023254



Training..:  55%|█████▍    | 111/202 [00:57<00:46,  1.94it/s][A

Training Loss: 0.06176290661096573



Training..:  55%|█████▌    | 112/202 [00:57<00:46,  1.94it/s][A

Training Loss: 0.04785047471523285



Training..:  56%|█████▌    | 113/202 [00:58<00:45,  1.94it/s][A

Training Loss: 0.025356072932481766



Training..:  56%|█████▋    | 114/202 [00:58<00:45,  1.94it/s][A

Training Loss: 0.04107360541820526



Training..:  57%|█████▋    | 115/202 [00:59<00:44,  1.94it/s][A

Training Loss: 0.034070421010255814



Training..:  57%|█████▋    | 116/202 [00:59<00:44,  1.94it/s][A

Training Loss: 0.04008464515209198



Training..:  58%|█████▊    | 117/202 [01:00<00:43,  1.94it/s][A

Training Loss: 0.03928228095173836



Training..:  58%|█████▊    | 118/202 [01:00<00:43,  1.94it/s][A

Training Loss: 0.036538366228342056



Training..:  59%|█████▉    | 119/202 [01:01<00:42,  1.95it/s][A

Training Loss: 0.03423203527927399



Training..:  59%|█████▉    | 120/202 [01:01<00:42,  1.94it/s][A

Training Loss: 0.03688591718673706



Training..:  60%|█████▉    | 121/202 [01:02<00:41,  1.94it/s][A

Training Loss: 0.0438791885972023



Training..:  60%|██████    | 122/202 [01:02<00:41,  1.94it/s][A

Training Loss: 0.015772495418787003



Training..:  61%|██████    | 123/202 [01:03<00:40,  1.94it/s][A

Training Loss: 0.022569769993424416



Training..:  61%|██████▏   | 124/202 [01:03<00:40,  1.94it/s][A

Training Loss: 0.03719611465930939



Training..:  62%|██████▏   | 125/202 [01:04<00:39,  1.94it/s][A

Training Loss: 0.026237592101097107



Training..:  62%|██████▏   | 126/202 [01:04<00:39,  1.94it/s][A

Training Loss: 0.05343722179532051



Training..:  63%|██████▎   | 127/202 [01:05<00:38,  1.94it/s][A

Training Loss: 0.038300205022096634



Training..:  63%|██████▎   | 128/202 [01:05<00:38,  1.94it/s][A

Training Loss: 0.03670085221529007



Training..:  64%|██████▍   | 129/202 [01:06<00:37,  1.94it/s][A

Training Loss: 0.03737366572022438



Training..:  64%|██████▍   | 130/202 [01:06<00:37,  1.93it/s][A

Training Loss: 0.031116873025894165



Training..:  65%|██████▍   | 131/202 [01:07<00:36,  1.93it/s][A

Training Loss: 0.04341820627450943



Training..:  65%|██████▌   | 132/202 [01:07<00:36,  1.93it/s][A

Training Loss: 0.040705177932977676



Training..:  66%|██████▌   | 133/202 [01:08<00:35,  1.93it/s][A

Training Loss: 0.0630088523030281



Training..:  66%|██████▋   | 134/202 [01:08<00:35,  1.94it/s][A

Training Loss: 0.06068195030093193



Training..:  67%|██████▋   | 135/202 [01:09<00:34,  1.94it/s][A

Training Loss: 0.03731941431760788



Training..:  67%|██████▋   | 136/202 [01:09<00:34,  1.94it/s][A

Training Loss: 0.02499178610742092



Training..:  68%|██████▊   | 137/202 [01:10<00:33,  1.94it/s][A

Training Loss: 0.03608930483460426



Training..:  68%|██████▊   | 138/202 [01:10<00:33,  1.94it/s][A

Training Loss: 0.03503083437681198



Training..:  69%|██████▉   | 139/202 [01:11<00:32,  1.94it/s][A

Training Loss: 0.038314953446388245



Training..:  69%|██████▉   | 140/202 [01:11<00:31,  1.94it/s][A

Training Loss: 0.036731161177158356



Training..:  70%|██████▉   | 141/202 [01:12<00:31,  1.94it/s][A

Training Loss: 0.054656386375427246



Training..:  70%|███████   | 142/202 [01:12<00:30,  1.94it/s][A

Training Loss: 0.03471967577934265



Training..:  71%|███████   | 143/202 [01:13<00:30,  1.93it/s][A

Training Loss: 0.0452970527112484



Training..:  71%|███████▏  | 144/202 [01:14<00:30,  1.93it/s][A

Training Loss: 0.0555867962539196



Training..:  72%|███████▏  | 145/202 [01:14<00:29,  1.93it/s][A

Training Loss: 0.06388792395591736



Training..:  72%|███████▏  | 146/202 [01:15<00:28,  1.94it/s][A

Training Loss: 0.05480528250336647



Training..:  73%|███████▎  | 147/202 [01:15<00:28,  1.94it/s][A

Training Loss: 0.05135926231741905



Training..:  73%|███████▎  | 148/202 [01:16<00:27,  1.94it/s][A

Training Loss: 0.05845542252063751



Training..:  74%|███████▍  | 149/202 [01:16<00:27,  1.94it/s][A

Training Loss: 0.05565020442008972



Training..:  74%|███████▍  | 150/202 [01:17<00:26,  1.94it/s][A

Training Loss: 0.03438451513648033



Training..:  75%|███████▍  | 151/202 [01:17<00:26,  1.94it/s][A

Training Loss: 0.03505042567849159



Training..:  75%|███████▌  | 152/202 [01:18<00:25,  1.94it/s][A

Training Loss: 0.043236084282398224



Training..:  76%|███████▌  | 153/202 [01:18<00:25,  1.94it/s][A

Training Loss: 0.04717866703867912



Training..:  76%|███████▌  | 154/202 [01:19<00:24,  1.93it/s][A

Training Loss: 0.08011244237422943



Training..:  77%|███████▋  | 155/202 [01:19<00:24,  1.94it/s][A

Training Loss: 0.06621009856462479



Training..:  77%|███████▋  | 156/202 [01:20<00:23,  1.93it/s][A

Training Loss: 0.05957737937569618



Training..:  78%|███████▊  | 157/202 [01:20<00:23,  1.93it/s][A

Training Loss: 0.03766663372516632



Training..:  78%|███████▊  | 158/202 [01:21<00:22,  1.93it/s][A

Training Loss: 0.031374309211969376



Training..:  79%|███████▊  | 159/202 [01:21<00:22,  1.94it/s][A

Training Loss: 0.033372726291418076



Training..:  79%|███████▉  | 160/202 [01:22<00:21,  1.94it/s][A

Training Loss: 0.036294709891080856



Training..:  80%|███████▉  | 161/202 [01:22<00:21,  1.94it/s][A

Training Loss: 0.05195554718375206



Training..:  80%|████████  | 162/202 [01:23<00:20,  1.94it/s][A

Training Loss: 0.030510956421494484



Training..:  81%|████████  | 163/202 [01:23<00:20,  1.94it/s][A

Training Loss: 0.027721215039491653



Training..:  81%|████████  | 164/202 [01:24<00:19,  1.94it/s][A

Training Loss: 0.02137567661702633



Training..:  82%|████████▏ | 165/202 [01:24<00:19,  1.94it/s][A

Training Loss: 0.04084860533475876



Training..:  82%|████████▏ | 166/202 [01:25<00:18,  1.94it/s][A

Training Loss: 0.020918002352118492



Training..:  83%|████████▎ | 167/202 [01:25<00:18,  1.94it/s][A

Training Loss: 0.04974997416138649



Training..:  83%|████████▎ | 168/202 [01:26<00:17,  1.95it/s][A

Training Loss: 0.040822844952344894



Training..:  84%|████████▎ | 169/202 [01:26<00:16,  1.95it/s][A

Training Loss: 0.038156937807798386



Training..:  84%|████████▍ | 170/202 [01:27<00:16,  1.95it/s][A

Training Loss: 0.08661285042762756



Training..:  85%|████████▍ | 171/202 [01:27<00:15,  1.94it/s][A

Training Loss: 0.050248343497514725



Training..:  85%|████████▌ | 172/202 [01:28<00:15,  1.94it/s][A

Training Loss: 0.0517096221446991



Training..:  86%|████████▌ | 173/202 [01:28<00:14,  1.95it/s][A

Training Loss: 0.045346833765506744



Training..:  86%|████████▌ | 174/202 [01:29<00:14,  1.94it/s][A

Training Loss: 0.044245053082704544



Training..:  87%|████████▋ | 175/202 [01:30<00:13,  1.95it/s][A

Training Loss: 0.06904353201389313



Training..:  87%|████████▋ | 176/202 [01:30<00:13,  1.94it/s][A

Training Loss: 0.042508188635110855



Training..:  88%|████████▊ | 177/202 [01:31<00:12,  1.94it/s][A

Training Loss: 0.04003540799021721



Training..:  88%|████████▊ | 178/202 [01:31<00:12,  1.93it/s][A

Training Loss: 0.03797890990972519



Training..:  89%|████████▊ | 179/202 [01:32<00:11,  1.93it/s][A

Training Loss: 0.05565441772341728



Training..:  89%|████████▉ | 180/202 [01:32<00:11,  1.93it/s][A

Training Loss: 0.03886859491467476



Training..:  90%|████████▉ | 181/202 [01:33<00:10,  1.93it/s][A

Training Loss: 0.03848262503743172



Training..:  90%|█████████ | 182/202 [01:33<00:10,  1.92it/s][A

Training Loss: 0.057322438806295395



Training..:  91%|█████████ | 183/202 [01:34<00:09,  1.93it/s][A

Training Loss: 0.0267663411796093



Training..:  91%|█████████ | 184/202 [01:34<00:09,  1.94it/s][A

Training Loss: 0.02916916459798813



Training..:  92%|█████████▏| 185/202 [01:35<00:08,  1.94it/s][A

Training Loss: 0.03681124001741409



Training..:  92%|█████████▏| 186/202 [01:35<00:08,  1.94it/s][A

Training Loss: 0.030720997601747513



Training..:  93%|█████████▎| 187/202 [01:36<00:07,  1.94it/s][A

Training Loss: 0.04651404917240143



Training..:  93%|█████████▎| 188/202 [01:36<00:07,  1.94it/s][A

Training Loss: 0.12056410312652588



Training..:  94%|█████████▎| 189/202 [01:37<00:06,  1.94it/s][A

Training Loss: 0.04565591737627983



Training..:  94%|█████████▍| 190/202 [01:37<00:06,  1.94it/s][A

Training Loss: 0.0212395079433918



Training..:  95%|█████████▍| 191/202 [01:38<00:05,  1.94it/s][A

Training Loss: 0.028409186750650406



Training..:  95%|█████████▌| 192/202 [01:38<00:05,  1.94it/s][A

Training Loss: 0.02368742786347866



Training..:  96%|█████████▌| 193/202 [01:39<00:04,  1.93it/s][A

Training Loss: 0.04154326394200325



Training..:  96%|█████████▌| 194/202 [01:39<00:04,  1.94it/s][A

Training Loss: 0.034453440457582474



Training..:  97%|█████████▋| 195/202 [01:40<00:03,  1.94it/s][A

Training Loss: 0.03237689658999443



Training..:  97%|█████████▋| 196/202 [01:40<00:03,  1.94it/s][A

Training Loss: 0.038814421743154526



Training..:  98%|█████████▊| 197/202 [01:41<00:02,  1.94it/s][A

Training Loss: 0.02445274405181408



Training..:  98%|█████████▊| 198/202 [01:41<00:02,  1.94it/s][A

Training Loss: 0.025349628180265427



Training..:  99%|█████████▊| 199/202 [01:42<00:01,  1.94it/s][A

Training Loss: 0.032209642231464386



Training..:  99%|█████████▉| 200/202 [01:42<00:01,  1.94it/s][A

Training Loss: 0.03984624147415161



Training..: 100%|█████████▉| 201/202 [01:43<00:00,  1.94it/s][A

Training Loss: 0.07712844759225845



Training..: 100%|██████████| 202/202 [01:43<00:00,  1.95it/s]


Training Loss: 0.03663608431816101



Validating..:   0%|          | 0/26 [00:00<?, ?it/s][A
Validating..:   4%|▍         | 1/26 [00:00<00:03,  7.04it/s][A
Validating..:   8%|▊         | 2/26 [00:00<00:03,  6.68it/s][A
Validating..:  12%|█▏        | 3/26 [00:00<00:03,  6.29it/s][A
Validating..:  15%|█▌        | 4/26 [00:00<00:03,  6.38it/s][A
Validating..:  19%|█▉        | 5/26 [00:00<00:03,  6.37it/s][A
Validating..:  23%|██▎       | 6/26 [00:00<00:03,  6.25it/s][A
Validating..:  27%|██▋       | 7/26 [00:01<00:03,  6.33it/s][A
Validating..:  31%|███       | 8/26 [00:01<00:02,  6.35it/s][A
Validating..:  35%|███▍      | 9/26 [00:01<00:02,  6.35it/s][A
Validating..:  38%|███▊      | 10/26 [00:01<00:02,  6.35it/s][A
Validating..:  42%|████▏     | 11/26 [00:01<00:02,  6.35it/s][A
Validating..:  46%|████▌     | 12/26 [00:01<00:02,  6.29it/s][A
Validating..:  50%|█████     | 13/26 [00:02<00:02,  6.30it/s][A
Validating..:  54%|█████▍    | 14/26 [00:02<00:01,  6.35it/s][A
Validating..:  58%|█████▊    | 15/26 [00:0

Validation Loss: 0.030435680161015347





In [13]:
# evaluate on test set

model.eval()
test_loss = 0
with torch.no_grad():
    for batch in test_loader:
        input_ids, attention_mask, labels = batch
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        test_loss += outputs.loss.item()

avg_test_loss = test_loss / len(test_loader)
print(f"Test Loss: {avg_test_loss}")

Test Loss: 0.025267765391618013
