# 💫Training

### User의 질문을 아래 19가지의 의도로 분류하는 모델
#### Accuracy : 92~3%

In [None]:
!pip3 install transformers==4.6.1
!pip install adamp



In [None]:
#필요한 라이브러리 불러오기
import argparse
import os
import pickle
import random
import time
import warnings
from copy import deepcopy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from adamp import AdamP
from sklearn.metrics import f1_score
from torch import nn, optim
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from tqdm import tqdm
from sklearn.metrics import classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, \
    ElectraForSequenceClassification, AdamW, get_cosine_with_hard_restarts_schedule_with_warmup

In [None]:
GPU_NUM = 1 # 원하는 GPU 번호 입력
# device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# torch.cuda.set_device(device) # change allocation of current GPU
device

device(type='cuda')

In [None]:
#random seed 고정
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
seed_everything(42)

In [None]:
#데이터를 불러오기 위한 함수
def load_data(file_path):
    with open(file_path, 'r') as f:
        data = [line.strip() for line in f.readlines()]
    return data

#전처리한 train 데이터 불러오기
# train_data = pd.read_csv("final.csv")
train_data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/fashion_reader/question_intention_classification/new_data.csv")

In [None]:
train_data.head()

Unnamed: 0,question,answer,label
0,상품 이름이 뭐야?,이름,0
1,모델이 입고 있는 옷 이름이 뭐야?,이름,0
2,이거 이름 알려줘,이름,0
3,상품 이름이 어떻게 돼?,이름,0
4,이름 좀 알려줘,이름,0


In [None]:
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 306 entries, 0 to 305
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   question  306 non-null    object
 1   answer    306 non-null    object
 2   label     306 non-null    int64 
dtypes: int64(1), object(2)
memory usage: 7.3+ KB


### NLP_Dataset 클래스 선언

In [None]:
class NLP_Dataset(Dataset):
    def __init__(self, dataframe, tokenizer):
        self.dataset = dataframe
        self.question = dataframe['question']
        self.labels = dataframe['label']
        self.tokenizer = tokenizer

    def __getitem__(self, idx) :
        tokenized_text = self.tokenizer(self.question[idx], 
                                        max_length=CFG.tokenizer_max_length, 
                                        padding='max_length', 
                                        truncation=True, 
                                        return_tensors='pt',
                                        add_special_tokens=True)
        
        tokenized_text['label'] = self.labels[idx]
        return tokenized_text

    def __len__(self):
        return len(self.labels)

### get_model 함수 선언
network, optimizer, scaler, scheduler, criterion 반환

In [None]:
def get_model():
    network = AutoModelForSequenceClassification.from_pretrained(CFG.model_name, num_labels=CFG.n_classes, hidden_dropout_prob=CFG.dropout_rate).to("cuda")
    optimizer = AdamP(network.parameters(), lr=CFG.lr, betas=(0.9, 0.999), weight_decay=1e-2)
    scaler = GradScaler()
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=300)
    criterion = nn.CrossEntropyLoss().to("cuda")
    return network, optimizer, scaler, scheduler, criterion

### training_per_step 선언

In [None]:
def training_per_step(model, loss_fn, optimizer, scaler, input_ids, attention_mask, labels, device):
    '''매 step마다 학습을 하는 함수'''
    model.train()
    
    with autocast():
        labels = labels.to(device)
        outputs = model(input_ids.to(device), attention_mask = attention_mask.to(device))[0]
        loss = loss_fn(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

    return loss

### validating_per_steps 선언

In [None]:
def validating_per_steps(epoch, model, loss_fn, test_loader, device):
    '''특정 step마다 검증을 하는 함수'''
    model.eval()

    loss_sum = 0
    sample_num = 0
    preds_all = []
    targets_all = []

    pbar = tqdm(test_loader, total=len(test_loader), position=0, leave=True)
    for batch in pbar :
        labels = batch.pop('label').to(device)
        input_ids = batch['input_ids'].squeeze(1).to(device)
        attention_mask = batch['attention_mask'].squeeze(1).to(device)
        
        preds = model(input_ids, attention_mask = attention_mask)[0]
        
        preds_all += [torch.argmax(preds, 1).detach().cpu().numpy()]
        targets_all += [labels.detach().cpu().numpy()]

        loss = loss_fn(preds, labels)

        loss_sum += loss.item()*labels.shape[0]
        sample_num += labels.shape[0]

        description = f"epoch {epoch + 1} loss: {loss_sum/sample_num:.4f}"
        pbar.set_description(description)
    
    preds_all = np.concatenate(preds_all)
    targets_all = np.concatenate(targets_all)
    accuracy = (preds_all == targets_all).mean()
    f1 = f1_score(preds_all, targets_all, average='macro')
    print('validation multi-class accuracy = {:.4f}, f1 score = {:.4f}'.format(accuracy, f1))
    print(classification_report(targets_all, preds_all))
    return accuracy, f1

### Train 함수 선언

In [None]:
def train(model, loss_fn, optimizer, scaler, train_loader, test_loader, scheduler, device):
    '''training과 validating을 진행하는 함수'''
    prev_acc = 0
    prev_f1 = 0
    global_steps = 0
    for epoch in range(CFG.epochs):
        running_loss = 0
        sample_num = 0
        preds_all = []
        targets_all = []
        
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), position=0, leave=True)
        for step, batch in pbar:
            # training phase
            labels = batch.pop('label')
            input_ids = batch['input_ids'].squeeze(1)
            attention_mask = batch['attention_mask'].squeeze(1)
    
            loss = training_per_step(model, loss_fn, optimizer, scaler, input_ids.to(device), attention_mask.to(device), labels.to(device), device)
            running_loss += loss.item()*labels.shape[0]
            sample_num += labels.shape[0]
            
            global_steps += 1
            description = f"{epoch+1}epoch {global_steps: >4d}step | loss: {running_loss/sample_num: .4f} "
            pbar.set_description(description)

            if scheduler is not None :
                scheduler.step() 
                
            # validating phase
            if global_steps % CFG.logging_step == 0 :
                with torch.no_grad():
                    acc, f1 = validating_per_steps(epoch, model, loss_fn, test_loader, device)
                if f1 > prev_f1:
                    # torch.save(model, "./output/baseline.pt")
                    torch.save(model, "/content/drive/MyDrive/Colab Notebooks/fashion_reader/question_intention_classification/output/new_baseline.pt")
                    prev_f1 = f1
                    
                # wandb.log({
                # 'eval/acc' : acc,
                # 'eval/f1' : f1,
                # 'global_steps': global_steps
                # })
                print(f'eval/acc : {acc}, eval/f1 : {f1}, global_steps: {global_steps}')
                    
            else:
                print(f" global_steps : {global_steps}")
                # wandb.log({'global_steps':global_steps})  

### 학습을 위한 config 설정

In [None]:
class CFG:
    tokenizer_max_length = 35
    batch_size = 32
#     model_name = "xlm-roberta-large" # 19,069 #93%
    model_name = "xlm-roberta-base" # 11,548 #93%
    # model_name = "bert-base-multilingual-cased" # 7,461 #93%
#     model_name = 'albert-base-v2' # 4,445 #23%
    lr = 0.000005
    epochs = 100
    change_mask_prop = 0.
    dropout_rate = 0
    n_classes = 19
    logging_step = 15

### tokenizer와 데이터 불러오기 

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train_data['question'], train_data['label'],
                                                    stratify= train_data['label'], 
                                                    test_size=0.2)

In [None]:
train_data = pd.DataFrame(zip(X_train, y_train), columns=['question', 'label'])
valid_data = pd.DataFrame(zip(X_test, y_test), columns=['question', 'label'])

In [None]:
train_data.label.value_counts()

10    21
4     17
14    17
15    16
16    14
8     14
17    13
6     13
5     13
9     12
11    12
3     12
1     10
2     10
18    10
7     10
12    10
13    10
0     10
Name: label, dtype: int64

In [None]:
valid_data.label.value_counts()

10    6
15    4
14    4
4     4
5     4
17    4
18    3
7     3
1     3
3     3
6     3
9     3
8     3
11    3
16    3
0     3
12    2
13    2
2     2
Name: label, dtype: int64

In [None]:
tokenizer = AutoTokenizer.from_pretrained(CFG.model_name)
train_set = NLP_Dataset(train_data, tokenizer)
valid_set = NLP_Dataset(valid_data, tokenizer)

Downloading:   0%|          | 0.00/512 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

In [None]:
train_iter = DataLoader(train_set, batch_size=CFG.batch_size, shuffle=True)
val_iter = DataLoader(valid_set, batch_size=CFG.batch_size, shuffle=True)

In [None]:
network, optimizer, scaler, scheduler, criterion = get_model()

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

### wandb setting

In [None]:
# import wandb
# os.environ['WANDB_LOG_MODEL'] = 'true'
# os.environ['WANDB_WATCH'] = 'all'
# os.environ['WANDB_SILENT'] = 'true'
# wandb.login()
# wandb.init(project='fashion_reader_question_classification', name='baseline_tjdnsgkek')

In [None]:
train(network, criterion, optimizer, scaler, train_iter, val_iter, scheduler, "cuda")

1epoch    1step | loss:  2.9358 :  12%|█▎        | 1/8 [00:00<00:04,  1.54it/s]

 global_steps : 1


1epoch    2step | loss:  2.9475 :  25%|██▌       | 2/8 [00:00<00:02,  2.17it/s]

 global_steps : 2


1epoch    3step | loss:  2.9591 :  38%|███▊      | 3/8 [00:01<00:01,  2.58it/s]

 global_steps : 3


1epoch    4step | loss:  2.9597 :  50%|█████     | 4/8 [00:01<00:01,  2.88it/s]

 global_steps : 4


1epoch    5step | loss:  2.9580 :  62%|██████▎   | 5/8 [00:01<00:00,  3.08it/s]

 global_steps : 5


1epoch    6step | loss:  2.9613 :  75%|███████▌  | 6/8 [00:02<00:00,  3.18it/s]

 global_steps : 6


1epoch    7step | loss:  2.9649 :  88%|████████▊ | 7/8 [00:02<00:00,  3.30it/s]

 global_steps : 7


1epoch    8step | loss:  2.9639 : 100%|██████████| 8/8 [00:02<00:00,  2.99it/s]


 global_steps : 8


2epoch    9step | loss:  2.9565 :  12%|█▎        | 1/8 [00:00<00:02,  3.46it/s]

 global_steps : 9


2epoch   10step | loss:  2.9565 :  25%|██▌       | 2/8 [00:00<00:01,  3.50it/s]

 global_steps : 10


2epoch   11step | loss:  2.9758 :  38%|███▊      | 3/8 [00:00<00:01,  3.50it/s]

 global_steps : 11


2epoch   12step | loss:  2.9657 :  50%|█████     | 4/8 [00:01<00:01,  3.51it/s]

 global_steps : 12


2epoch   13step | loss:  2.9705 :  62%|██████▎   | 5/8 [00:01<00:00,  3.50it/s]

 global_steps : 13


2epoch   14step | loss:  2.9606 :  75%|███████▌  | 6/8 [00:01<00:00,  3.50it/s]

 global_steps : 14


epoch 2 loss: 2.9646: 100%|██████████| 2/2 [00:00<00:00, 20.72it/s]
  _warn_prf(average, modifier, msg_start, len(result))


validation multi-class accuracy = 0.0484, f1 score = 0.0166
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         4
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.25      0.25      0.25         4
          15       0.00      0.00      0.00         4
          16       0.

2epoch   15step | loss:  2.9592 :  88%|████████▊ | 7/8 [00:07<00:02,  2.12s/it]

eval/acc : 0.04838709677419355, eval/f1 : 0.016609145815358068, global_steps: 15


2epoch   16step | loss:  2.9548 : 100%|██████████| 8/8 [00:07<00:00,  1.02it/s]


 global_steps : 16


3epoch   17step | loss:  2.9007 :  12%|█▎        | 1/8 [00:00<00:02,  3.48it/s]

 global_steps : 17


3epoch   18step | loss:  2.9179 :  25%|██▌       | 2/8 [00:00<00:01,  3.28it/s]

 global_steps : 18


3epoch   19step | loss:  2.9313 :  38%|███▊      | 3/8 [00:00<00:01,  3.20it/s]

 global_steps : 19


3epoch   20step | loss:  2.9260 :  50%|█████     | 4/8 [00:01<00:01,  3.19it/s]

 global_steps : 20


3epoch   21step | loss:  2.9314 :  62%|██████▎   | 5/8 [00:01<00:00,  3.31it/s]

 global_steps : 21


3epoch   22step | loss:  2.9388 :  75%|███████▌  | 6/8 [00:01<00:00,  3.38it/s]

 global_steps : 22


3epoch   23step | loss:  2.9382 :  88%|████████▊ | 7/8 [00:02<00:00,  3.33it/s]

 global_steps : 23


3epoch   24step | loss:  2.9415 : 100%|██████████| 8/8 [00:02<00:00,  3.35it/s]


 global_steps : 24


4epoch   25step | loss:  2.9089 :  12%|█▎        | 1/8 [00:00<00:02,  3.33it/s]

 global_steps : 25


4epoch   26step | loss:  2.9180 :  25%|██▌       | 2/8 [00:00<00:01,  3.24it/s]

 global_steps : 26


4epoch   27step | loss:  2.9125 :  38%|███▊      | 3/8 [00:00<00:01,  3.34it/s]

 global_steps : 27


4epoch   29step | loss:  2.9175 :  62%|██████▎   | 5/8 [00:01<00:00,  4.10it/s]

 global_steps : 28
 global_steps : 29


epoch 4 loss: 2.9575: 100%|██████████| 2/2 [00:00<00:00, 21.70it/s]
4epoch   30step | loss:  2.9273 :  75%|███████▌  | 6/8 [00:01<00:00,  3.40it/s]

validation multi-class accuracy = 0.0645, f1 score = 0.0125
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         4
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.10      0.75      0.18         4
          15       0.00      0.00      0.00         4
          16       0.

4epoch   32step | loss:  2.9239 : 100%|██████████| 8/8 [00:02<00:00,  3.76it/s]


 global_steps : 31
 global_steps : 32


5epoch   33step | loss:  2.8947 :  12%|█▎        | 1/8 [00:00<00:01,  3.51it/s]

 global_steps : 33


5epoch   34step | loss:  2.9136 :  25%|██▌       | 2/8 [00:00<00:01,  3.52it/s]

 global_steps : 34


5epoch   35step | loss:  2.9030 :  38%|███▊      | 3/8 [00:00<00:01,  3.42it/s]

 global_steps : 35


5epoch   36step | loss:  2.9171 :  50%|█████     | 4/8 [00:01<00:01,  3.46it/s]

 global_steps : 36


5epoch   37step | loss:  2.9099 :  62%|██████▎   | 5/8 [00:01<00:00,  3.51it/s]

 global_steps : 37


5epoch   38step | loss:  2.9125 :  75%|███████▌  | 6/8 [00:01<00:00,  3.39it/s]

 global_steps : 38


5epoch   39step | loss:  2.9162 :  88%|████████▊ | 7/8 [00:02<00:00,  3.44it/s]

 global_steps : 39


5epoch   40step | loss:  2.9146 : 100%|██████████| 8/8 [00:02<00:00,  3.51it/s]


 global_steps : 40


6epoch   41step | loss:  2.9073 :  12%|█▎        | 1/8 [00:00<00:02,  3.48it/s]

 global_steps : 41


6epoch   42step | loss:  2.9092 :  25%|██▌       | 2/8 [00:00<00:01,  3.27it/s]

 global_steps : 42


6epoch   43step | loss:  2.8956 :  38%|███▊      | 3/8 [00:00<00:01,  3.19it/s]

 global_steps : 43


6epoch   44step | loss:  2.9055 :  50%|█████     | 4/8 [00:01<00:01,  3.23it/s]

 global_steps : 44


epoch 6 loss: 2.9287: 100%|██████████| 2/2 [00:00<00:00, 21.37it/s]
6epoch   45step | loss:  2.9030 :  62%|██████▎   | 5/8 [00:01<00:01,  2.85it/s]

validation multi-class accuracy = 0.0806, f1 score = 0.0139
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         4
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.11      1.00      0.19         4
          15       0.00      0.00      0.00         4
          16       0.

6epoch   46step | loss:  2.8973 :  75%|███████▌  | 6/8 [00:01<00:00,  3.03it/s]

 global_steps : 46


6epoch   47step | loss:  2.9049 :  88%|████████▊ | 7/8 [00:02<00:00,  3.11it/s]

 global_steps : 47


6epoch   48step | loss:  2.9039 : 100%|██████████| 8/8 [00:02<00:00,  3.16it/s]


 global_steps : 48


7epoch   49step | loss:  2.8599 :  12%|█▎        | 1/8 [00:00<00:02,  3.15it/s]

 global_steps : 49


7epoch   50step | loss:  2.8618 :  25%|██▌       | 2/8 [00:00<00:01,  3.14it/s]

 global_steps : 50


7epoch   51step | loss:  2.8729 :  38%|███▊      | 3/8 [00:00<00:01,  2.95it/s]

 global_steps : 51


7epoch   52step | loss:  2.8748 :  50%|█████     | 4/8 [00:01<00:01,  2.93it/s]

 global_steps : 52


7epoch   53step | loss:  2.8628 :  62%|██████▎   | 5/8 [00:01<00:01,  2.88it/s]

 global_steps : 53


7epoch   54step | loss:  2.8716 :  75%|███████▌  | 6/8 [00:02<00:00,  2.96it/s]

 global_steps : 54


7epoch   55step | loss:  2.8719 :  88%|████████▊ | 7/8 [00:02<00:00,  3.01it/s]

 global_steps : 55


7epoch   56step | loss:  2.8767 : 100%|██████████| 8/8 [00:02<00:00,  3.04it/s]


 global_steps : 56


8epoch   57step | loss:  2.8796 :  12%|█▎        | 1/8 [00:00<00:02,  2.62it/s]

 global_steps : 57


8epoch   58step | loss:  2.8361 :  25%|██▌       | 2/8 [00:00<00:02,  2.73it/s]

 global_steps : 58


8epoch   59step | loss:  2.8557 :  38%|███▊      | 3/8 [00:01<00:01,  2.82it/s]

 global_steps : 59


epoch 8 loss: 2.9035: 100%|██████████| 2/2 [00:00<00:00, 18.83it/s]


validation multi-class accuracy = 0.0806, f1 score = 0.0181
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         4
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.17      1.00      0.30         4
          15       0.00      0.00      0.00         4
          16       0.

8epoch   60step | loss:  2.8748 :  50%|█████     | 4/8 [00:07<00:10,  2.61s/it]

eval/acc : 0.08064516129032258, eval/f1 : 0.018100807574491787, global_steps: 60


8epoch   61step | loss:  2.8688 :  62%|██████▎   | 5/8 [00:07<00:05,  1.78s/it]

 global_steps : 61


8epoch   62step | loss:  2.8715 :  75%|███████▌  | 6/8 [00:07<00:02,  1.28s/it]

 global_steps : 62


8epoch   63step | loss:  2.8638 :  88%|████████▊ | 7/8 [00:08<00:00,  1.04it/s]

 global_steps : 63


8epoch   64step | loss:  2.8644 : 100%|██████████| 8/8 [00:08<00:00,  1.04s/it]


 global_steps : 64


9epoch   65step | loss:  2.8314 :  12%|█▎        | 1/8 [00:00<00:01,  3.50it/s]

 global_steps : 65


9epoch   66step | loss:  2.8784 :  25%|██▌       | 2/8 [00:00<00:01,  3.34it/s]

 global_steps : 66


9epoch   67step | loss:  2.8722 :  38%|███▊      | 3/8 [00:00<00:01,  3.25it/s]

 global_steps : 67


9epoch   68step | loss:  2.8639 :  50%|█████     | 4/8 [00:01<00:01,  3.22it/s]

 global_steps : 68


9epoch   69step | loss:  2.8576 :  62%|██████▎   | 5/8 [00:01<00:00,  3.30it/s]

 global_steps : 69


9epoch   70step | loss:  2.8640 :  75%|███████▌  | 6/8 [00:01<00:00,  3.34it/s]

 global_steps : 70


9epoch   71step | loss:  2.8621 :  88%|████████▊ | 7/8 [00:02<00:00,  3.34it/s]

 global_steps : 71


9epoch   72step | loss:  2.8625 : 100%|██████████| 8/8 [00:02<00:00,  3.39it/s]


 global_steps : 72


10epoch   73step | loss:  2.8712 :  12%|█▎        | 1/8 [00:00<00:02,  3.14it/s]

 global_steps : 73


10epoch   74step | loss:  2.8370 :  25%|██▌       | 2/8 [00:00<00:01,  3.30it/s]

 global_steps : 74


epoch 10 loss: 2.8779: 100%|██████████| 2/2 [00:00<00:00, 20.75it/s]


validation multi-class accuracy = 0.0806, f1 score = 0.0318
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         4
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.11      1.00      0.21         4
          15       1.00      0.25      0.40         4
          16       0.

10epoch   75step | loss:  2.8428 :  38%|███▊      | 3/8 [00:06<00:15,  3.06s/it]

eval/acc : 0.08064516129032258, eval/f1 : 0.03184885290148448, global_steps: 75


10epoch   76step | loss:  2.8230 :  50%|█████     | 4/8 [00:07<00:07,  1.97s/it]

 global_steps : 76


10epoch   77step | loss:  2.8326 :  62%|██████▎   | 5/8 [00:07<00:04,  1.37s/it]

 global_steps : 77


10epoch   78step | loss:  2.8202 :  75%|███████▌  | 6/8 [00:07<00:01,  1.00it/s]

 global_steps : 78


10epoch   79step | loss:  2.8112 :  88%|████████▊ | 7/8 [00:08<00:00,  1.30it/s]

 global_steps : 79


10epoch   80step | loss:  2.8094 : 100%|██████████| 8/8 [00:08<00:00,  1.05s/it]


 global_steps : 80


11epoch   81step | loss:  2.9299 :  12%|█▎        | 1/8 [00:00<00:02,  3.37it/s]

 global_steps : 81


11epoch   82step | loss:  2.8204 :  25%|██▌       | 2/8 [00:00<00:01,  3.38it/s]

 global_steps : 82


11epoch   83step | loss:  2.8257 :  38%|███▊      | 3/8 [00:00<00:01,  3.43it/s]

 global_steps : 83


11epoch   84step | loss:  2.8051 :  50%|█████     | 4/8 [00:01<00:01,  3.38it/s]

 global_steps : 84


11epoch   85step | loss:  2.8011 :  62%|██████▎   | 5/8 [00:01<00:00,  3.40it/s]

 global_steps : 85


11epoch   86step | loss:  2.8107 :  75%|███████▌  | 6/8 [00:01<00:00,  3.46it/s]

 global_steps : 86


11epoch   87step | loss:  2.8192 :  88%|████████▊ | 7/8 [00:02<00:00,  3.51it/s]

 global_steps : 87


11epoch   88step | loss:  2.7979 : 100%|██████████| 8/8 [00:02<00:00,  3.47it/s]


 global_steps : 88


12epoch   89step | loss:  2.8030 :  12%|█▎        | 1/8 [00:00<00:02,  3.30it/s]

 global_steps : 89


epoch 12 loss: 2.8524: 100%|██████████| 2/2 [00:00<00:00, 20.58it/s]
12epoch   90step | loss:  2.7920 :  25%|██▌       | 2/8 [00:00<00:02,  2.76it/s]

validation multi-class accuracy = 0.0806, f1 score = 0.0316
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         4
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.11      1.00      0.20         4
          15       1.00      0.25      0.40         4
          16       0.

12epoch   91step | loss:  2.7816 :  38%|███▊      | 3/8 [00:01<00:01,  3.00it/s]

 global_steps : 91


12epoch   92step | loss:  2.7699 :  50%|█████     | 4/8 [00:01<00:01,  3.18it/s]

 global_steps : 92


12epoch   93step | loss:  2.7499 :  62%|██████▎   | 5/8 [00:01<00:00,  3.28it/s]

 global_steps : 93


12epoch   94step | loss:  2.7432 :  75%|███████▌  | 6/8 [00:01<00:00,  3.28it/s]

 global_steps : 94


12epoch   95step | loss:  2.7570 :  88%|████████▊ | 7/8 [00:02<00:00,  3.34it/s]

 global_steps : 95


12epoch   96step | loss:  2.7525 : 100%|██████████| 8/8 [00:02<00:00,  3.30it/s]


 global_steps : 96


13epoch   97step | loss:  2.6407 :  12%|█▎        | 1/8 [00:00<00:02,  3.32it/s]

 global_steps : 97


13epoch   98step | loss:  2.6718 :  25%|██▌       | 2/8 [00:00<00:01,  3.26it/s]

 global_steps : 98


13epoch   99step | loss:  2.7365 :  38%|███▊      | 3/8 [00:00<00:01,  3.13it/s]

 global_steps : 99


13epoch  100step | loss:  2.7492 :  50%|█████     | 4/8 [00:01<00:01,  3.16it/s]

 global_steps : 100


13epoch  101step | loss:  2.7395 :  62%|██████▎   | 5/8 [00:01<00:00,  3.20it/s]

 global_steps : 101


13epoch  102step | loss:  2.7445 :  75%|███████▌  | 6/8 [00:01<00:00,  3.22it/s]

 global_steps : 102


13epoch  103step | loss:  2.7448 :  88%|████████▊ | 7/8 [00:02<00:00,  3.23it/s]

 global_steps : 103


13epoch  104step | loss:  2.7452 : 100%|██████████| 8/8 [00:02<00:00,  3.23it/s]


 global_steps : 104


epoch 14 loss: 2.8247: 100%|██████████| 2/2 [00:00<00:00, 20.86it/s]
14epoch  105step | loss:  2.8104 :  12%|█▎        | 1/8 [00:00<00:03,  2.25it/s]

validation multi-class accuracy = 0.0806, f1 score = 0.0296
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         4
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.13      1.00      0.23         4
          15       0.50      0.25      0.33         4
          16       0.

14epoch  106step | loss:  2.7609 :  25%|██▌       | 2/8 [00:00<00:02,  2.78it/s]

 global_steps : 106


14epoch  107step | loss:  2.7467 :  38%|███▊      | 3/8 [00:01<00:02,  2.48it/s]

 global_steps : 107


14epoch  108step | loss:  2.7428 :  50%|█████     | 4/8 [00:01<00:01,  2.50it/s]

 global_steps : 108


14epoch  109step | loss:  2.7263 :  62%|██████▎   | 5/8 [00:01<00:01,  2.54it/s]

 global_steps : 109


14epoch  110step | loss:  2.7139 :  75%|███████▌  | 6/8 [00:02<00:00,  2.73it/s]

 global_steps : 110


14epoch  111step | loss:  2.7086 :  88%|████████▊ | 7/8 [00:02<00:00,  2.82it/s]

 global_steps : 111


14epoch  112step | loss:  2.7022 : 100%|██████████| 8/8 [00:02<00:00,  2.74it/s]


 global_steps : 112


15epoch  113step | loss:  2.7289 :  12%|█▎        | 1/8 [00:00<00:02,  3.38it/s]

 global_steps : 113


15epoch  114step | loss:  2.6978 :  25%|██▌       | 2/8 [00:00<00:01,  3.21it/s]

 global_steps : 114


15epoch  115step | loss:  2.6715 :  38%|███▊      | 3/8 [00:00<00:01,  3.12it/s]

 global_steps : 115


15epoch  116step | loss:  2.6627 :  50%|█████     | 4/8 [00:01<00:01,  3.02it/s]

 global_steps : 116


15epoch  117step | loss:  2.6690 :  62%|██████▎   | 5/8 [00:01<00:01,  2.91it/s]

 global_steps : 117


15epoch  118step | loss:  2.6552 :  75%|███████▌  | 6/8 [00:02<00:00,  2.93it/s]

 global_steps : 118


15epoch  119step | loss:  2.6483 :  88%|████████▊ | 7/8 [00:02<00:00,  2.85it/s]

 global_steps : 119


epoch 15 loss: 2.7749: 100%|██████████| 2/2 [00:00<00:00, 19.33it/s]


validation multi-class accuracy = 0.1452, f1 score = 0.0861
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         4
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       1.00      0.33      0.50         3
           9       0.00      0.00      0.00         3
          10       0.67      0.33      0.44         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.17      1.00      0.30         4
          15       0.50      0.25      0.33         4
          16       0.

15epoch  120step | loss:  2.6538 : 100%|██████████| 8/8 [00:08<00:00,  1.08s/it]


eval/acc : 0.14516129032258066, eval/f1 : 0.086135477582846, global_steps: 120


16epoch  121step | loss:  2.6568 :  12%|█▎        | 1/8 [00:00<00:02,  3.21it/s]

 global_steps : 121


16epoch  122step | loss:  2.6395 :  25%|██▌       | 2/8 [00:00<00:01,  3.34it/s]

 global_steps : 122


16epoch  123step | loss:  2.5975 :  38%|███▊      | 3/8 [00:00<00:01,  3.41it/s]

 global_steps : 123


16epoch  124step | loss:  2.5956 :  50%|█████     | 4/8 [00:01<00:01,  3.37it/s]

 global_steps : 124


16epoch  125step | loss:  2.6210 :  62%|██████▎   | 5/8 [00:01<00:00,  3.31it/s]

 global_steps : 125


16epoch  126step | loss:  2.6199 :  75%|███████▌  | 6/8 [00:01<00:00,  3.25it/s]

 global_steps : 126


16epoch  127step | loss:  2.6028 :  88%|████████▊ | 7/8 [00:02<00:00,  3.25it/s]

 global_steps : 127


16epoch  128step | loss:  2.6067 : 100%|██████████| 8/8 [00:02<00:00,  3.31it/s]


 global_steps : 128


17epoch  129step | loss:  2.5327 :  12%|█▎        | 1/8 [00:00<00:02,  3.31it/s]

 global_steps : 129


17epoch  130step | loss:  2.5018 :  25%|██▌       | 2/8 [00:00<00:01,  3.23it/s]

 global_steps : 130


17epoch  131step | loss:  2.5167 :  38%|███▊      | 3/8 [00:00<00:01,  3.25it/s]

 global_steps : 131


17epoch  132step | loss:  2.5537 :  50%|█████     | 4/8 [00:01<00:01,  3.23it/s]

 global_steps : 132


17epoch  133step | loss:  2.5596 :  62%|██████▎   | 5/8 [00:01<00:00,  3.15it/s]

 global_steps : 133


17epoch  134step | loss:  2.5660 :  75%|███████▌  | 6/8 [00:01<00:00,  3.20it/s]

 global_steps : 134


epoch 17 loss: 2.6825: 100%|██████████| 2/2 [00:00<00:00, 20.94it/s]


validation multi-class accuracy = 0.1774, f1 score = 0.1005
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         3
           4       0.14      0.25      0.18         4
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.67      0.67      0.67         3
           9       0.00      0.00      0.00         3
          10       0.43      0.50      0.46         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.15      1.00      0.27         4
          15       0.50      0.25      0.33         4
          16       0.

17epoch  135step | loss:  2.5743 :  88%|████████▊ | 7/8 [00:08<00:02,  2.34s/it]

eval/acc : 0.1774193548387097, eval/f1 : 0.10052754263280579, global_steps: 135


17epoch  136step | loss:  2.5737 : 100%|██████████| 8/8 [00:08<00:00,  1.09s/it]


 global_steps : 136


18epoch  137step | loss:  2.4662 :  12%|█▎        | 1/8 [00:00<00:02,  3.42it/s]

 global_steps : 137


18epoch  138step | loss:  2.5670 :  25%|██▌       | 2/8 [00:00<00:01,  3.38it/s]

 global_steps : 138


18epoch  139step | loss:  2.5704 :  38%|███▊      | 3/8 [00:00<00:01,  3.28it/s]

 global_steps : 139


18epoch  140step | loss:  2.5662 :  50%|█████     | 4/8 [00:01<00:01,  3.28it/s]

 global_steps : 140


18epoch  141step | loss:  2.5479 :  62%|██████▎   | 5/8 [00:01<00:00,  3.34it/s]

 global_steps : 141


18epoch  142step | loss:  2.5394 :  75%|███████▌  | 6/8 [00:01<00:00,  3.31it/s]

 global_steps : 142


18epoch  143step | loss:  2.5403 :  88%|████████▊ | 7/8 [00:02<00:00,  3.23it/s]

 global_steps : 143


18epoch  144step | loss:  2.5211 : 100%|██████████| 8/8 [00:02<00:00,  3.32it/s]


 global_steps : 144


19epoch  145step | loss:  2.2685 :  12%|█▎        | 1/8 [00:00<00:02,  3.06it/s]

 global_steps : 145


19epoch  146step | loss:  2.4173 :  25%|██▌       | 2/8 [00:00<00:01,  3.19it/s]

 global_steps : 146


19epoch  147step | loss:  2.5049 :  38%|███▊      | 3/8 [00:00<00:01,  3.21it/s]

 global_steps : 147


19epoch  148step | loss:  2.5115 :  50%|█████     | 4/8 [00:01<00:01,  3.14it/s]

 global_steps : 148


19epoch  149step | loss:  2.5470 :  62%|██████▎   | 5/8 [00:01<00:00,  3.18it/s]

 global_steps : 149


epoch 19 loss: 2.6407: 100%|██████████| 2/2 [00:00<00:00, 19.63it/s]


validation multi-class accuracy = 0.2097, f1 score = 0.1366
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.14      0.25      0.18         4
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.33      0.67      0.44         3
           9       0.00      0.00      0.00         3
          10       0.33      0.50      0.40         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.19      1.00      0.32         4
          15       0.25      0.25      0.25         4
          16       0.

19epoch  150step | loss:  2.5338 :  75%|███████▌  | 6/8 [00:07<00:04,  2.23s/it]

eval/acc : 0.20967741935483872, eval/f1 : 0.1366454013822435, global_steps: 150


19epoch  151step | loss:  2.5287 :  88%|████████▊ | 7/8 [00:07<00:01,  1.60s/it]

 global_steps : 151


19epoch  152step | loss:  2.5271 : 100%|██████████| 8/8 [00:08<00:00,  1.01s/it]


 global_steps : 152


20epoch  153step | loss:  2.5144 :  12%|█▎        | 1/8 [00:00<00:02,  3.17it/s]

 global_steps : 153


20epoch  154step | loss:  2.4996 :  25%|██▌       | 2/8 [00:00<00:01,  3.21it/s]

 global_steps : 154


20epoch  155step | loss:  2.5306 :  38%|███▊      | 3/8 [00:00<00:01,  3.28it/s]

 global_steps : 155


20epoch  156step | loss:  2.5261 :  50%|█████     | 4/8 [00:01<00:01,  3.36it/s]

 global_steps : 156


20epoch  157step | loss:  2.5067 :  62%|██████▎   | 5/8 [00:01<00:00,  3.26it/s]

 global_steps : 157


20epoch  158step | loss:  2.5052 :  75%|███████▌  | 6/8 [00:01<00:00,  3.32it/s]

 global_steps : 158


20epoch  159step | loss:  2.4845 :  88%|████████▊ | 7/8 [00:02<00:00,  3.40it/s]

 global_steps : 159


20epoch  160step | loss:  2.4864 : 100%|██████████| 8/8 [00:02<00:00,  3.38it/s]


 global_steps : 160


21epoch  161step | loss:  2.3386 :  12%|█▎        | 1/8 [00:00<00:02,  3.24it/s]

 global_steps : 161


21epoch  162step | loss:  2.3468 :  25%|██▌       | 2/8 [00:00<00:01,  3.33it/s]

 global_steps : 162


21epoch  163step | loss:  2.3733 :  38%|███▊      | 3/8 [00:00<00:01,  3.41it/s]

 global_steps : 163


21epoch  164step | loss:  2.3852 :  50%|█████     | 4/8 [00:01<00:01,  3.41it/s]

 global_steps : 164


epoch 21 loss: 2.5685: 100%|██████████| 2/2 [00:00<00:00, 20.48it/s]


validation multi-class accuracy = 0.2581, f1 score = 0.1845
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.25      0.50      0.33         4
           5       1.00      0.50      0.67         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.50      0.67      0.57         3
           9       0.00      0.00      0.00         3
          10       0.21      0.50      0.30         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.21      1.00      0.35         4
          15       0.33      0.25      0.29         4
          16       0.

21epoch  165step | loss:  2.4165 :  62%|██████▎   | 5/8 [00:07<00:07,  2.57s/it]

eval/acc : 0.25806451612903225, eval/f1 : 0.184472049689441, global_steps: 165


21epoch  166step | loss:  2.4114 :  75%|███████▌  | 6/8 [00:08<00:03,  1.80s/it]

 global_steps : 166


21epoch  167step | loss:  2.4169 :  88%|████████▊ | 7/8 [00:08<00:01,  1.31s/it]

 global_steps : 167


21epoch  168step | loss:  2.4149 : 100%|██████████| 8/8 [00:08<00:00,  1.08s/it]


 global_steps : 168


22epoch  169step | loss:  2.2700 :  12%|█▎        | 1/8 [00:00<00:02,  3.48it/s]

 global_steps : 169


22epoch  170step | loss:  2.3609 :  25%|██▌       | 2/8 [00:00<00:01,  3.31it/s]

 global_steps : 170


22epoch  171step | loss:  2.3453 :  38%|███▊      | 3/8 [00:00<00:01,  3.24it/s]

 global_steps : 171


22epoch  172step | loss:  2.3241 :  50%|█████     | 4/8 [00:01<00:01,  3.24it/s]

 global_steps : 172


22epoch  173step | loss:  2.3732 :  62%|██████▎   | 5/8 [00:01<00:00,  3.24it/s]

 global_steps : 173


22epoch  174step | loss:  2.3923 :  75%|███████▌  | 6/8 [00:01<00:00,  3.20it/s]

 global_steps : 174


22epoch  175step | loss:  2.4084 :  88%|████████▊ | 7/8 [00:02<00:00,  3.21it/s]

 global_steps : 175


22epoch  176step | loss:  2.4131 : 100%|██████████| 8/8 [00:02<00:00,  3.26it/s]


 global_steps : 176


23epoch  177step | loss:  2.1817 :  12%|█▎        | 1/8 [00:00<00:02,  3.27it/s]

 global_steps : 177


23epoch  178step | loss:  2.3392 :  25%|██▌       | 2/8 [00:00<00:01,  3.19it/s]

 global_steps : 178


23epoch  179step | loss:  2.3009 :  38%|███▊      | 3/8 [00:00<00:01,  3.13it/s]

 global_steps : 179


epoch 23 loss: 2.5180: 100%|██████████| 2/2 [00:00<00:00, 20.58it/s]


validation multi-class accuracy = 0.3065, f1 score = 0.2148
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.60      0.75      0.67         4
           5       1.00      0.75      0.86         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.50      1.00      0.67         3
           9       0.00      0.00      0.00         3
          10       0.20      0.50      0.29         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.19      1.00      0.32         4
          15       0.33      0.25      0.29         4
          16       0.

23epoch  180step | loss:  2.2977 :  50%|█████     | 4/8 [00:08<00:12,  3.20s/it]

eval/acc : 0.3064516129032258, eval/f1 : 0.21483709273182958, global_steps: 180


23epoch  181step | loss:  2.3201 :  62%|██████▎   | 5/8 [00:08<00:06,  2.15s/it]

 global_steps : 181


23epoch  182step | loss:  2.3462 :  75%|███████▌  | 6/8 [00:09<00:03,  1.53s/it]

 global_steps : 182


23epoch  183step | loss:  2.3784 :  88%|████████▊ | 7/8 [00:09<00:01,  1.14s/it]

 global_steps : 183


23epoch  184step | loss:  2.3763 : 100%|██████████| 8/8 [00:09<00:00,  1.22s/it]


 global_steps : 184


24epoch  185step | loss:  2.5114 :  12%|█▎        | 1/8 [00:00<00:02,  3.24it/s]

 global_steps : 185


24epoch  186step | loss:  2.4348 :  25%|██▌       | 2/8 [00:00<00:01,  3.16it/s]

 global_steps : 186


24epoch  187step | loss:  2.3726 :  38%|███▊      | 3/8 [00:00<00:01,  3.17it/s]

 global_steps : 187


24epoch  188step | loss:  2.3288 :  50%|█████     | 4/8 [00:01<00:01,  3.20it/s]

 global_steps : 188


24epoch  189step | loss:  2.3739 :  62%|██████▎   | 5/8 [00:01<00:00,  3.22it/s]

 global_steps : 189


24epoch  190step | loss:  2.3781 :  75%|███████▌  | 6/8 [00:01<00:00,  3.21it/s]

 global_steps : 190


24epoch  191step | loss:  2.3685 :  88%|████████▊ | 7/8 [00:02<00:00,  3.20it/s]

 global_steps : 191


24epoch  192step | loss:  2.3493 : 100%|██████████| 8/8 [00:02<00:00,  3.28it/s]


 global_steps : 192


25epoch  193step | loss:  2.3150 :  12%|█▎        | 1/8 [00:00<00:02,  3.40it/s]

 global_steps : 193


25epoch  194step | loss:  2.3705 :  25%|██▌       | 2/8 [00:00<00:01,  3.45it/s]

 global_steps : 194


epoch 25 loss: 2.4765: 100%|██████████| 2/2 [00:00<00:00, 20.65it/s]


validation multi-class accuracy = 0.3226, f1 score = 0.2335
              precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       0.00      0.00      0.00         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.50      0.75      0.60         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.50      1.00      0.67         3
           9       0.00      0.00      0.00         3
          10       0.21      0.50      0.30         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.20      1.00      0.33         4
          15       0.33      0.25      0.29         4
          16       0.

25epoch  195step | loss:  2.3944 :  38%|███▊      | 3/8 [00:08<00:18,  3.77s/it]

eval/acc : 0.3225806451612903, eval/f1 : 0.23345864661654137, global_steps: 195


25epoch  196step | loss:  2.3444 :  50%|█████     | 4/8 [00:08<00:09,  2.40s/it]

 global_steps : 196


25epoch  197step | loss:  2.3229 :  62%|██████▎   | 5/8 [00:09<00:04,  1.65s/it]

 global_steps : 197


25epoch  198step | loss:  2.3316 :  75%|███████▌  | 6/8 [00:09<00:02,  1.20s/it]

 global_steps : 198


25epoch  199step | loss:  2.3381 :  88%|████████▊ | 7/8 [00:09<00:00,  1.10it/s]

 global_steps : 199


25epoch  200step | loss:  2.3393 : 100%|██████████| 8/8 [00:10<00:00,  1.25s/it]


 global_steps : 200


26epoch  201step | loss:  2.2770 :  12%|█▎        | 1/8 [00:00<00:02,  3.05it/s]

 global_steps : 201


26epoch  202step | loss:  2.2422 :  25%|██▌       | 2/8 [00:00<00:01,  3.11it/s]

 global_steps : 202


26epoch  203step | loss:  2.2984 :  38%|███▊      | 3/8 [00:00<00:01,  3.10it/s]

 global_steps : 203


26epoch  204step | loss:  2.3170 :  50%|█████     | 4/8 [00:01<00:01,  3.13it/s]

 global_steps : 204


26epoch  205step | loss:  2.2909 :  62%|██████▎   | 5/8 [00:01<00:00,  3.19it/s]

 global_steps : 205


26epoch  206step | loss:  2.3033 :  75%|███████▌  | 6/8 [00:01<00:00,  3.28it/s]

 global_steps : 206


26epoch  207step | loss:  2.2845 :  88%|████████▊ | 7/8 [00:02<00:00,  3.31it/s]

 global_steps : 207


26epoch  208step | loss:  2.2827 : 100%|██████████| 8/8 [00:02<00:00,  3.29it/s]


 global_steps : 208


27epoch  209step | loss:  2.1346 :  12%|█▎        | 1/8 [00:00<00:02,  3.27it/s]

 global_steps : 209


epoch 27 loss: 2.4439: 100%|██████████| 2/2 [00:00<00:00, 20.02it/s]


validation multi-class accuracy = 0.3226, f1 score = 0.2347
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.43      0.75      0.55         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.21      0.50      0.30         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.22      1.00      0.36         4
          15       0.25      0.25      0.25         4
          16       0.

27epoch  210step | loss:  2.2709 :  25%|██▌       | 2/8 [00:08<00:29,  4.94s/it]

eval/acc : 0.3225806451612903, eval/f1 : 0.234688995215311, global_steps: 210


27epoch  211step | loss:  2.2834 :  38%|███▊      | 3/8 [00:08<00:14,  2.81s/it]

 global_steps : 211


27epoch  212step | loss:  2.2811 :  50%|█████     | 4/8 [00:09<00:07,  1.83s/it]

 global_steps : 212


27epoch  213step | loss:  2.2898 :  62%|██████▎   | 5/8 [00:09<00:03,  1.28s/it]

 global_steps : 213


27epoch  214step | loss:  2.2914 :  75%|███████▌  | 6/8 [00:09<00:01,  1.05it/s]

 global_steps : 214


27epoch  215step | loss:  2.2882 :  88%|████████▊ | 7/8 [00:10<00:00,  1.36it/s]

 global_steps : 215


27epoch  216step | loss:  2.2980 : 100%|██████████| 8/8 [00:10<00:00,  1.29s/it]


 global_steps : 216


28epoch  217step | loss:  2.4272 :  12%|█▎        | 1/8 [00:00<00:02,  3.46it/s]

 global_steps : 217


28epoch  218step | loss:  2.3552 :  25%|██▌       | 2/8 [00:00<00:01,  3.51it/s]

 global_steps : 218


28epoch  219step | loss:  2.2831 :  38%|███▊      | 3/8 [00:00<00:01,  3.36it/s]

 global_steps : 219


28epoch  220step | loss:  2.3087 :  50%|█████     | 4/8 [00:01<00:01,  3.28it/s]

 global_steps : 220


28epoch  221step | loss:  2.2661 :  62%|██████▎   | 5/8 [00:01<00:00,  3.36it/s]

 global_steps : 221


28epoch  222step | loss:  2.2652 :  75%|███████▌  | 6/8 [00:01<00:00,  3.39it/s]

 global_steps : 222


28epoch  223step | loss:  2.2676 :  88%|████████▊ | 7/8 [00:02<00:00,  3.37it/s]

 global_steps : 223


28epoch  224step | loss:  2.2621 : 100%|██████████| 8/8 [00:02<00:00,  3.43it/s]


 global_steps : 224


epoch 29 loss: 2.4244: 100%|██████████| 2/2 [00:00<00:00, 21.23it/s]
29epoch  225step | loss:  2.2131 :  12%|█▎        | 1/8 [00:00<00:02,  2.45it/s]

validation multi-class accuracy = 0.3226, f1 score = 0.2323
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.38      0.75      0.50         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.23      0.50      0.32         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.21      1.00      0.35         4
          15       0.25      0.25      0.25         4
          16       0.

29epoch  226step | loss:  2.2242 :  25%|██▌       | 2/8 [00:00<00:02,  2.95it/s]

 global_steps : 226


29epoch  227step | loss:  2.1988 :  38%|███▊      | 3/8 [00:00<00:01,  3.15it/s]

 global_steps : 227


29epoch  228step | loss:  2.2489 :  50%|█████     | 4/8 [00:01<00:01,  3.26it/s]

 global_steps : 228


29epoch  229step | loss:  2.2101 :  62%|██████▎   | 5/8 [00:01<00:00,  3.29it/s]

 global_steps : 229


29epoch  230step | loss:  2.1933 :  75%|███████▌  | 6/8 [00:01<00:00,  3.28it/s]

 global_steps : 230


29epoch  231step | loss:  2.2141 :  88%|████████▊ | 7/8 [00:02<00:00,  3.18it/s]

 global_steps : 231


29epoch  232step | loss:  2.2267 : 100%|██████████| 8/8 [00:02<00:00,  3.10it/s]


 global_steps : 232


30epoch  234step | loss:  2.2070 :  25%|██▌       | 2/8 [00:00<00:01,  4.19it/s]

 global_steps : 233
 global_steps : 234


30epoch  235step | loss:  2.2165 :  38%|███▊      | 3/8 [00:00<00:01,  3.52it/s]

 global_steps : 235


30epoch  236step | loss:  2.2167 :  50%|█████     | 4/8 [00:01<00:01,  3.36it/s]

 global_steps : 236


30epoch  237step | loss:  2.1987 :  62%|██████▎   | 5/8 [00:01<00:00,  3.18it/s]

 global_steps : 237


30epoch  238step | loss:  2.1678 :  75%|███████▌  | 6/8 [00:01<00:00,  3.14it/s]

 global_steps : 238


30epoch  239step | loss:  2.1745 :  88%|████████▊ | 7/8 [00:02<00:00,  3.17it/s]

 global_steps : 239


epoch 30 loss: 2.4035: 100%|██████████| 2/2 [00:00<00:00, 17.52it/s]


validation multi-class accuracy = 0.3226, f1 score = 0.2356
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.43      0.75      0.55         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.21      0.50      0.30         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

30epoch  240step | loss:  2.1959 : 100%|██████████| 8/8 [00:08<00:00,  1.09s/it]


eval/acc : 0.3225806451612903, eval/f1 : 0.23560036454773295, global_steps: 240


31epoch  241step | loss:  2.1975 :  12%|█▎        | 1/8 [00:00<00:02,  3.48it/s]

 global_steps : 241


31epoch  242step | loss:  2.1915 :  25%|██▌       | 2/8 [00:00<00:01,  3.22it/s]

 global_steps : 242


31epoch  243step | loss:  2.2082 :  38%|███▊      | 3/8 [00:00<00:01,  3.28it/s]

 global_steps : 243


31epoch  244step | loss:  2.1977 :  50%|█████     | 4/8 [00:01<00:01,  3.31it/s]

 global_steps : 244


31epoch  245step | loss:  2.2048 :  62%|██████▎   | 5/8 [00:01<00:00,  3.28it/s]

 global_steps : 245


31epoch  246step | loss:  2.2196 :  75%|███████▌  | 6/8 [00:01<00:00,  3.34it/s]

 global_steps : 246


31epoch  247step | loss:  2.2201 :  88%|████████▊ | 7/8 [00:02<00:00,  3.38it/s]

 global_steps : 247


31epoch  248step | loss:  2.2236 : 100%|██████████| 8/8 [00:02<00:00,  3.40it/s]


 global_steps : 248


32epoch  249step | loss:  2.2372 :  12%|█▎        | 1/8 [00:00<00:02,  3.49it/s]

 global_steps : 249


32epoch  250step | loss:  2.2147 :  25%|██▌       | 2/8 [00:00<00:01,  3.45it/s]

 global_steps : 250


32epoch  251step | loss:  2.2125 :  38%|███▊      | 3/8 [00:00<00:01,  3.46it/s]

 global_steps : 251


32epoch  252step | loss:  2.2362 :  50%|█████     | 4/8 [00:01<00:01,  3.28it/s]

 global_steps : 252


32epoch  253step | loss:  2.2284 :  62%|██████▎   | 5/8 [00:01<00:00,  3.25it/s]

 global_steps : 253


32epoch  254step | loss:  2.2094 :  75%|███████▌  | 6/8 [00:01<00:00,  3.28it/s]

 global_steps : 254


epoch 32 loss: 2.3966: 100%|██████████| 2/2 [00:00<00:00, 20.63it/s]
32epoch  255step | loss:  2.2105 :  88%|████████▊ | 7/8 [00:02<00:00,  2.90it/s]

validation multi-class accuracy = 0.3226, f1 score = 0.2356
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.43      0.75      0.55         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.21      0.50      0.30         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

32epoch  256step | loss:  2.2044 : 100%|██████████| 8/8 [00:02<00:00,  3.18it/s]


 global_steps : 256


33epoch  257step | loss:  2.1246 :  12%|█▎        | 1/8 [00:00<00:02,  3.20it/s]

 global_steps : 257


33epoch  258step | loss:  2.1765 :  25%|██▌       | 2/8 [00:00<00:01,  3.19it/s]

 global_steps : 258


33epoch  259step | loss:  2.1799 :  38%|███▊      | 3/8 [00:00<00:01,  3.33it/s]

 global_steps : 259


33epoch  260step | loss:  2.1513 :  50%|█████     | 4/8 [00:01<00:01,  3.40it/s]

 global_steps : 260


33epoch  261step | loss:  2.1612 :  62%|██████▎   | 5/8 [00:01<00:00,  3.27it/s]

 global_steps : 261


33epoch  262step | loss:  2.1898 :  75%|███████▌  | 6/8 [00:01<00:00,  3.19it/s]

 global_steps : 262


33epoch  263step | loss:  2.1846 :  88%|████████▊ | 7/8 [00:02<00:00,  3.07it/s]

 global_steps : 263


33epoch  264step | loss:  2.1920 : 100%|██████████| 8/8 [00:02<00:00,  3.14it/s]


 global_steps : 264


34epoch  265step | loss:  2.3552 :  12%|█▎        | 1/8 [00:00<00:02,  2.88it/s]

 global_steps : 265


34epoch  266step | loss:  2.2386 :  25%|██▌       | 2/8 [00:00<00:02,  2.93it/s]

 global_steps : 266


34epoch  267step | loss:  2.2443 :  38%|███▊      | 3/8 [00:01<00:01,  2.87it/s]

 global_steps : 267


34epoch  268step | loss:  2.2072 :  50%|█████     | 4/8 [00:01<00:01,  2.85it/s]

 global_steps : 268


34epoch  269step | loss:  2.2036 :  62%|██████▎   | 5/8 [00:01<00:01,  2.84it/s]

 global_steps : 269


epoch 34 loss: 2.3930: 100%|██████████| 2/2 [00:00<00:00, 17.30it/s]
34epoch  270step | loss:  2.2093 :  75%|███████▌  | 6/8 [00:02<00:00,  2.59it/s]

validation multi-class accuracy = 0.3226, f1 score = 0.2340
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.38      0.75      0.50         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.23      0.50      0.32         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

34epoch  271step | loss:  2.1784 :  88%|████████▊ | 7/8 [00:02<00:00,  2.77it/s]

 global_steps : 271


34epoch  272step | loss:  2.1898 : 100%|██████████| 8/8 [00:02<00:00,  2.87it/s]


 global_steps : 272


35epoch  273step | loss:  2.2614 :  12%|█▎        | 1/8 [00:00<00:02,  3.12it/s]

 global_steps : 273


35epoch  274step | loss:  2.2235 :  25%|██▌       | 2/8 [00:00<00:01,  3.09it/s]

 global_steps : 274


35epoch  275step | loss:  2.2237 :  38%|███▊      | 3/8 [00:00<00:01,  3.09it/s]

 global_steps : 275


35epoch  276step | loss:  2.1794 :  50%|█████     | 4/8 [00:01<00:01,  3.15it/s]

 global_steps : 276


35epoch  277step | loss:  2.1856 :  62%|██████▎   | 5/8 [00:01<00:00,  3.03it/s]

 global_steps : 277


35epoch  278step | loss:  2.2012 :  75%|███████▌  | 6/8 [00:02<00:00,  2.87it/s]

 global_steps : 278


35epoch  279step | loss:  2.1738 :  88%|████████▊ | 7/8 [00:02<00:00,  2.90it/s]

 global_steps : 279


35epoch  280step | loss:  2.1766 : 100%|██████████| 8/8 [00:02<00:00,  3.03it/s]


 global_steps : 280


36epoch  281step | loss:  2.2180 :  12%|█▎        | 1/8 [00:00<00:02,  3.00it/s]

 global_steps : 281


36epoch  282step | loss:  2.2499 :  25%|██▌       | 2/8 [00:00<00:01,  3.06it/s]

 global_steps : 282


36epoch  283step | loss:  2.2404 :  38%|███▊      | 3/8 [00:00<00:01,  3.12it/s]

 global_steps : 283


36epoch  284step | loss:  2.1912 :  50%|█████     | 4/8 [00:01<00:01,  3.17it/s]

 global_steps : 284


epoch 36 loss: 2.3915: 100%|██████████| 2/2 [00:00<00:00, 16.66it/s]
36epoch  285step | loss:  2.1931 :  62%|██████▎   | 5/8 [00:01<00:01,  2.60it/s]

validation multi-class accuracy = 0.3226, f1 score = 0.2332
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.38      0.75      0.50         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.21      0.50      0.30         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

36epoch  287step | loss:  2.2393 :  88%|████████▊ | 7/8 [00:02<00:00,  3.31it/s]

 global_steps : 286
 global_steps : 287


36epoch  288step | loss:  2.2423 : 100%|██████████| 8/8 [00:02<00:00,  3.06it/s]


 global_steps : 288


37epoch  289step | loss:  2.1817 :  12%|█▎        | 1/8 [00:00<00:02,  2.76it/s]

 global_steps : 289


37epoch  290step | loss:  2.2998 :  25%|██▌       | 2/8 [00:00<00:02,  2.74it/s]

 global_steps : 290


37epoch  291step | loss:  2.2498 :  38%|███▊      | 3/8 [00:01<00:01,  2.96it/s]

 global_steps : 291


37epoch  292step | loss:  2.2662 :  50%|█████     | 4/8 [00:01<00:01,  3.04it/s]

 global_steps : 292


37epoch  293step | loss:  2.2424 :  62%|██████▎   | 5/8 [00:01<00:00,  3.04it/s]

 global_steps : 293


37epoch  294step | loss:  2.2115 :  75%|███████▌  | 6/8 [00:01<00:00,  3.17it/s]

 global_steps : 294


37epoch  295step | loss:  2.1992 :  88%|████████▊ | 7/8 [00:02<00:00,  3.10it/s]

 global_steps : 295


37epoch  296step | loss:  2.1789 : 100%|██████████| 8/8 [00:02<00:00,  3.05it/s]


 global_steps : 296


38epoch  297step | loss:  2.0911 :  12%|█▎        | 1/8 [00:00<00:02,  2.86it/s]

 global_steps : 297


38epoch  298step | loss:  2.0862 :  25%|██▌       | 2/8 [00:00<00:02,  2.99it/s]

 global_steps : 298


38epoch  299step | loss:  2.0979 :  38%|███▊      | 3/8 [00:01<00:01,  2.89it/s]

 global_steps : 299


epoch 38 loss: 2.3914: 100%|██████████| 2/2 [00:00<00:00, 16.29it/s]
38epoch  300step | loss:  2.1309 :  50%|█████     | 4/8 [00:01<00:01,  2.39it/s]

validation multi-class accuracy = 0.3226, f1 score = 0.2332
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.38      0.75      0.50         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.21      0.50      0.30         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

38epoch  301step | loss:  2.1429 :  62%|██████▎   | 5/8 [00:01<00:01,  2.47it/s]

 global_steps : 301


38epoch  302step | loss:  2.1464 :  75%|███████▌  | 6/8 [00:02<00:00,  2.55it/s]

 global_steps : 302


38epoch  303step | loss:  2.1840 :  88%|████████▊ | 7/8 [00:02<00:00,  2.58it/s]

 global_steps : 303


38epoch  304step | loss:  2.1902 : 100%|██████████| 8/8 [00:03<00:00,  2.65it/s]


 global_steps : 304


39epoch  305step | loss:  2.1578 :  12%|█▎        | 1/8 [00:00<00:02,  2.84it/s]

 global_steps : 305


39epoch  306step | loss:  2.1990 :  25%|██▌       | 2/8 [00:00<00:02,  2.60it/s]

 global_steps : 306


39epoch  307step | loss:  2.1971 :  38%|███▊      | 3/8 [00:01<00:01,  2.65it/s]

 global_steps : 307


39epoch  308step | loss:  2.1849 :  50%|█████     | 4/8 [00:01<00:01,  2.70it/s]

 global_steps : 308


39epoch  309step | loss:  2.1516 :  62%|██████▎   | 5/8 [00:01<00:01,  2.74it/s]

 global_steps : 309


39epoch  310step | loss:  2.1809 :  75%|███████▌  | 6/8 [00:02<00:00,  2.79it/s]

 global_steps : 310


39epoch  311step | loss:  2.1912 :  88%|████████▊ | 7/8 [00:02<00:00,  2.80it/s]

 global_steps : 311


39epoch  312step | loss:  2.1895 : 100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


 global_steps : 312


40epoch  313step | loss:  2.1701 :  12%|█▎        | 1/8 [00:00<00:02,  3.42it/s]

 global_steps : 313


40epoch  314step | loss:  2.2131 :  25%|██▌       | 2/8 [00:00<00:01,  3.39it/s]

 global_steps : 314


epoch 40 loss: 2.3912: 100%|██████████| 2/2 [00:00<00:00, 20.72it/s]
40epoch  315step | loss:  2.1802 :  38%|███▊      | 3/8 [00:01<00:01,  2.86it/s]

validation multi-class accuracy = 0.3226, f1 score = 0.2332
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.38      0.75      0.50         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.21      0.50      0.30         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

40epoch  316step | loss:  2.2024 :  50%|█████     | 4/8 [00:01<00:01,  2.99it/s]

 global_steps : 316


40epoch  317step | loss:  2.1941 :  62%|██████▎   | 5/8 [00:01<00:00,  3.04it/s]

 global_steps : 317


40epoch  318step | loss:  2.1905 :  75%|███████▌  | 6/8 [00:01<00:00,  3.09it/s]

 global_steps : 318


40epoch  319step | loss:  2.1954 :  88%|████████▊ | 7/8 [00:02<00:00,  3.11it/s]

 global_steps : 319


40epoch  320step | loss:  2.2152 : 100%|██████████| 8/8 [00:02<00:00,  3.14it/s]


 global_steps : 320


41epoch  321step | loss:  1.9474 :  12%|█▎        | 1/8 [00:00<00:02,  3.34it/s]

 global_steps : 321


41epoch  322step | loss:  2.0543 :  25%|██▌       | 2/8 [00:00<00:01,  3.36it/s]

 global_steps : 322


41epoch  323step | loss:  2.1749 :  38%|███▊      | 3/8 [00:00<00:01,  3.36it/s]

 global_steps : 323


41epoch  324step | loss:  2.2301 :  50%|█████     | 4/8 [00:01<00:01,  3.38it/s]

 global_steps : 324


41epoch  325step | loss:  2.2182 :  62%|██████▎   | 5/8 [00:01<00:00,  3.37it/s]

 global_steps : 325


41epoch  326step | loss:  2.2124 :  75%|███████▌  | 6/8 [00:01<00:00,  3.28it/s]

 global_steps : 326


41epoch  327step | loss:  2.2059 :  88%|████████▊ | 7/8 [00:02<00:00,  3.21it/s]

 global_steps : 327


41epoch  328step | loss:  2.2020 : 100%|██████████| 8/8 [00:02<00:00,  3.29it/s]


 global_steps : 328


42epoch  329step | loss:  2.0396 :  12%|█▎        | 1/8 [00:00<00:02,  3.50it/s]

 global_steps : 329


epoch 42 loss: 2.3900: 100%|██████████| 2/2 [00:00<00:00, 18.40it/s]
42epoch  330step | loss:  2.0875 :  25%|██▌       | 2/8 [00:00<00:02,  2.69it/s]

validation multi-class accuracy = 0.3226, f1 score = 0.2332
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.38      0.75      0.50         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.21      0.50      0.30         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

42epoch  331step | loss:  2.1694 :  38%|███▊      | 3/8 [00:00<00:01,  3.03it/s]

 global_steps : 331


42epoch  332step | loss:  2.1577 :  50%|█████     | 4/8 [00:01<00:01,  3.17it/s]

 global_steps : 332


42epoch  333step | loss:  2.1369 :  62%|██████▎   | 5/8 [00:01<00:00,  3.22it/s]

 global_steps : 333


42epoch  334step | loss:  2.1839 :  75%|███████▌  | 6/8 [00:01<00:00,  3.25it/s]

 global_steps : 334


42epoch  335step | loss:  2.1699 :  88%|████████▊ | 7/8 [00:02<00:00,  3.28it/s]

 global_steps : 335


42epoch  336step | loss:  2.1850 : 100%|██████████| 8/8 [00:02<00:00,  3.26it/s]


 global_steps : 336


43epoch  337step | loss:  2.2354 :  12%|█▎        | 1/8 [00:00<00:02,  3.40it/s]

 global_steps : 337


43epoch  338step | loss:  2.1723 :  25%|██▌       | 2/8 [00:00<00:01,  3.34it/s]

 global_steps : 338


43epoch  339step | loss:  2.1819 :  38%|███▊      | 3/8 [00:00<00:01,  3.34it/s]

 global_steps : 339


43epoch  340step | loss:  2.1734 :  50%|█████     | 4/8 [00:01<00:01,  3.36it/s]

 global_steps : 340


43epoch  341step | loss:  2.1852 :  62%|██████▎   | 5/8 [00:01<00:00,  3.36it/s]

 global_steps : 341


43epoch  342step | loss:  2.1795 :  75%|███████▌  | 6/8 [00:01<00:00,  3.31it/s]

 global_steps : 342


43epoch  343step | loss:  2.1903 :  88%|████████▊ | 7/8 [00:02<00:00,  3.30it/s]

 global_steps : 343


43epoch  344step | loss:  2.1783 : 100%|██████████| 8/8 [00:02<00:00,  3.36it/s]


 global_steps : 344


epoch 44 loss: 2.3872: 100%|██████████| 2/2 [00:00<00:00, 21.73it/s]
44epoch  345step | loss:  2.2245 :  12%|█▎        | 1/8 [00:00<00:02,  2.44it/s]

validation multi-class accuracy = 0.3226, f1 score = 0.2340
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.38      0.75      0.50         4
           5       0.75      0.75      0.75         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.23      0.50      0.32         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

44epoch  346step | loss:  2.1607 :  25%|██▌       | 2/8 [00:00<00:02,  2.95it/s]

 global_steps : 346


44epoch  347step | loss:  2.1259 :  38%|███▊      | 3/8 [00:01<00:01,  3.03it/s]

 global_steps : 347


44epoch  348step | loss:  2.1471 :  50%|█████     | 4/8 [00:01<00:01,  3.12it/s]

 global_steps : 348


44epoch  349step | loss:  2.1443 :  62%|██████▎   | 5/8 [00:01<00:00,  3.13it/s]

 global_steps : 349


44epoch  350step | loss:  2.1702 :  75%|███████▌  | 6/8 [00:01<00:00,  3.14it/s]

 global_steps : 350


44epoch  351step | loss:  2.1838 :  88%|████████▊ | 7/8 [00:02<00:00,  3.16it/s]

 global_steps : 351


44epoch  352step | loss:  2.1862 : 100%|██████████| 8/8 [00:02<00:00,  3.17it/s]


 global_steps : 352


45epoch  353step | loss:  2.2938 :  12%|█▎        | 1/8 [00:00<00:02,  3.24it/s]

 global_steps : 353


45epoch  354step | loss:  2.2477 :  25%|██▌       | 2/8 [00:00<00:01,  3.13it/s]

 global_steps : 354


45epoch  355step | loss:  2.2585 :  38%|███▊      | 3/8 [00:00<00:01,  3.27it/s]

 global_steps : 355


45epoch  356step | loss:  2.2282 :  50%|█████     | 4/8 [00:01<00:01,  3.33it/s]

 global_steps : 356


45epoch  357step | loss:  2.2038 :  62%|██████▎   | 5/8 [00:01<00:00,  3.38it/s]

 global_steps : 357


45epoch  358step | loss:  2.1958 :  75%|███████▌  | 6/8 [00:01<00:00,  3.41it/s]

 global_steps : 358


45epoch  359step | loss:  2.1859 :  88%|████████▊ | 7/8 [00:02<00:00,  3.40it/s]

 global_steps : 359


epoch 45 loss: 2.3785: 100%|██████████| 2/2 [00:00<00:00, 20.50it/s]


validation multi-class accuracy = 0.3387, f1 score = 0.2413
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.38      0.75      0.50         4
           5       0.80      1.00      0.89         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.23      0.50      0.32         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

45epoch  360step | loss:  2.1735 : 100%|██████████| 8/8 [00:08<00:00,  1.03s/it]


eval/acc : 0.3387096774193548, eval/f1 : 0.24134898650134107, global_steps: 360


46epoch  361step | loss:  2.1636 :  12%|█▎        | 1/8 [00:00<00:02,  3.05it/s]

 global_steps : 361


46epoch  362step | loss:  2.1057 :  25%|██▌       | 2/8 [00:00<00:01,  3.16it/s]

 global_steps : 362


46epoch  363step | loss:  2.1429 :  38%|███▊      | 3/8 [00:00<00:01,  3.26it/s]

 global_steps : 363


46epoch  364step | loss:  2.1378 :  50%|█████     | 4/8 [00:01<00:01,  3.34it/s]

 global_steps : 364


46epoch  365step | loss:  2.1395 :  62%|██████▎   | 5/8 [00:01<00:00,  3.32it/s]

 global_steps : 365


46epoch  366step | loss:  2.1916 :  75%|███████▌  | 6/8 [00:01<00:00,  3.30it/s]

 global_steps : 366


46epoch  367step | loss:  2.1738 :  88%|████████▊ | 7/8 [00:02<00:00,  3.33it/s]

 global_steps : 367


46epoch  368step | loss:  2.1752 : 100%|██████████| 8/8 [00:02<00:00,  3.32it/s]


 global_steps : 368


47epoch  369step | loss:  2.0338 :  12%|█▎        | 1/8 [00:00<00:02,  3.39it/s]

 global_steps : 369


47epoch  370step | loss:  2.0960 :  25%|██▌       | 2/8 [00:00<00:01,  3.41it/s]

 global_steps : 370


47epoch  371step | loss:  2.1697 :  38%|███▊      | 3/8 [00:00<00:01,  3.40it/s]

 global_steps : 371


47epoch  372step | loss:  2.1678 :  50%|█████     | 4/8 [00:01<00:01,  3.29it/s]

 global_steps : 372


47epoch  373step | loss:  2.1923 :  62%|██████▎   | 5/8 [00:01<00:00,  3.34it/s]

 global_steps : 373


47epoch  374step | loss:  2.1829 :  75%|███████▌  | 6/8 [00:01<00:00,  3.32it/s]

 global_steps : 374


epoch 47 loss: 2.3598: 100%|██████████| 2/2 [00:00<00:00, 20.89it/s]
47epoch  375step | loss:  2.1693 :  88%|████████▊ | 7/8 [00:02<00:00,  2.91it/s]

validation multi-class accuracy = 0.3387, f1 score = 0.2413
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.38      0.75      0.50         4
           5       0.80      1.00      0.89         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.23      0.50      0.32         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

47epoch  376step | loss:  2.1700 : 100%|██████████| 8/8 [00:02<00:00,  3.19it/s]


 global_steps : 376


48epoch  377step | loss:  2.0765 :  12%|█▎        | 1/8 [00:00<00:02,  3.48it/s]

 global_steps : 377


48epoch  378step | loss:  2.1435 :  25%|██▌       | 2/8 [00:00<00:01,  3.24it/s]

 global_steps : 378


48epoch  379step | loss:  2.1577 :  38%|███▊      | 3/8 [00:00<00:01,  3.25it/s]

 global_steps : 379


48epoch  380step | loss:  2.1812 :  50%|█████     | 4/8 [00:01<00:01,  3.33it/s]

 global_steps : 380


48epoch  381step | loss:  2.1745 :  62%|██████▎   | 5/8 [00:01<00:00,  3.31it/s]

 global_steps : 381


48epoch  382step | loss:  2.1526 :  75%|███████▌  | 6/8 [00:01<00:00,  3.23it/s]

 global_steps : 382


48epoch  383step | loss:  2.1465 :  88%|████████▊ | 7/8 [00:02<00:00,  3.19it/s]

 global_steps : 383


48epoch  384step | loss:  2.1447 : 100%|██████████| 8/8 [00:02<00:00,  3.27it/s]


 global_steps : 384


49epoch  385step | loss:  2.0922 :  12%|█▎        | 1/8 [00:00<00:01,  3.53it/s]

 global_steps : 385


49epoch  386step | loss:  2.0991 :  25%|██▌       | 2/8 [00:00<00:01,  3.49it/s]

 global_steps : 386


49epoch  387step | loss:  2.1426 :  38%|███▊      | 3/8 [00:00<00:01,  3.47it/s]

 global_steps : 387


49epoch  388step | loss:  2.1550 :  50%|█████     | 4/8 [00:01<00:01,  3.40it/s]

 global_steps : 388


49epoch  389step | loss:  2.1294 :  62%|██████▎   | 5/8 [00:01<00:00,  3.39it/s]

 global_steps : 389


epoch 49 loss: 2.3446: 100%|██████████| 2/2 [00:00<00:00, 20.51it/s]
49epoch  390step | loss:  2.1090 :  75%|███████▌  | 6/8 [00:01<00:00,  2.98it/s]

validation multi-class accuracy = 0.3387, f1 score = 0.2413
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.38      0.75      0.50         4
           5       0.80      1.00      0.89         4
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.23      0.50      0.32         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.24      1.00      0.38         4
          15       0.25      0.25      0.25         4
          16       0.

49epoch  391step | loss:  2.1339 :  88%|████████▊ | 7/8 [00:02<00:00,  3.11it/s]

 global_steps : 391


49epoch  392step | loss:  2.1200 : 100%|██████████| 8/8 [00:02<00:00,  3.16it/s]


 global_steps : 392


50epoch  393step | loss:  2.0631 :  12%|█▎        | 1/8 [00:00<00:02,  2.53it/s]

 global_steps : 393


50epoch  394step | loss:  2.0740 :  25%|██▌       | 2/8 [00:00<00:02,  2.82it/s]

 global_steps : 394


50epoch  395step | loss:  2.1019 :  38%|███▊      | 3/8 [00:01<00:01,  2.83it/s]

 global_steps : 395


50epoch  396step | loss:  2.1271 :  50%|█████     | 4/8 [00:01<00:01,  2.82it/s]

 global_steps : 396


50epoch  397step | loss:  2.1529 :  62%|██████▎   | 5/8 [00:01<00:01,  2.82it/s]

 global_steps : 397


50epoch  398step | loss:  2.1741 :  75%|███████▌  | 6/8 [00:02<00:00,  2.87it/s]

 global_steps : 398


50epoch  399step | loss:  2.1448 :  88%|████████▊ | 7/8 [00:02<00:00,  2.86it/s]

 global_steps : 399


50epoch  400step | loss:  2.1473 : 100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


 global_steps : 400


51epoch  401step | loss:  2.0388 :  12%|█▎        | 1/8 [00:00<00:02,  2.89it/s]

 global_steps : 401


51epoch  402step | loss:  2.0585 :  25%|██▌       | 2/8 [00:00<00:02,  2.97it/s]

 global_steps : 402


51epoch  403step | loss:  2.0541 :  38%|███▊      | 3/8 [00:01<00:01,  2.92it/s]

 global_steps : 403


51epoch  404step | loss:  2.0971 :  50%|█████     | 4/8 [00:01<00:01,  3.03it/s]

 global_steps : 404


epoch 51 loss: 2.3096: 100%|██████████| 2/2 [00:00<00:00, 20.13it/s]


validation multi-class accuracy = 0.3871, f1 score = 0.2734
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.44      1.00      0.62         4
           5       0.80      1.00      0.89         4
           6       0.33      0.33      0.33         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.31      0.67      0.42         6
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       0.25      1.00      0.40         4
          15       0.33      0.25      0.29         4
          16       0.

51epoch  405step | loss:  2.1119 :  62%|██████▎   | 5/8 [00:08<00:07,  2.65s/it]

eval/acc : 0.3870967741935484, eval/f1 : 0.2733880923631616, global_steps: 405


51epoch  406step | loss:  2.0906 :  75%|███████▌  | 6/8 [00:08<00:03,  1.85s/it]

 global_steps : 406


51epoch  407step | loss:  2.1275 :  88%|████████▊ | 7/8 [00:08<00:01,  1.34s/it]

 global_steps : 407


51epoch  408step | loss:  2.1197 : 100%|██████████| 8/8 [00:08<00:00,  1.12s/it]


 global_steps : 408


52epoch  409step | loss:  2.0935 :  12%|█▎        | 1/8 [00:00<00:01,  3.54it/s]

 global_steps : 409


52epoch  410step | loss:  2.1167 :  25%|██▌       | 2/8 [00:00<00:01,  3.52it/s]

 global_steps : 410


52epoch  411step | loss:  2.1131 :  38%|███▊      | 3/8 [00:00<00:01,  3.33it/s]

 global_steps : 411


52epoch  412step | loss:  2.1002 :  50%|█████     | 4/8 [00:01<00:01,  3.22it/s]

 global_steps : 412


52epoch  413step | loss:  2.0892 :  62%|██████▎   | 5/8 [00:01<00:00,  3.21it/s]

 global_steps : 413


52epoch  414step | loss:  2.0843 :  75%|███████▌  | 6/8 [00:01<00:00,  3.25it/s]

 global_steps : 414


52epoch  415step | loss:  2.0796 :  88%|████████▊ | 7/8 [00:02<00:00,  3.16it/s]

 global_steps : 415


52epoch  416step | loss:  2.0966 : 100%|██████████| 8/8 [00:02<00:00,  3.26it/s]


 global_steps : 416


53epoch  417step | loss:  2.1616 :  12%|█▎        | 1/8 [00:00<00:02,  3.37it/s]

 global_steps : 417


53epoch  418step | loss:  2.0464 :  25%|██▌       | 2/8 [00:00<00:01,  3.33it/s]

 global_steps : 418


53epoch  419step | loss:  2.0847 :  38%|███▊      | 3/8 [00:00<00:01,  3.34it/s]

 global_steps : 419


epoch 53 loss: 2.2703: 100%|██████████| 2/2 [00:00<00:00, 18.72it/s]


validation multi-class accuracy = 0.4194, f1 score = 0.3158
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.50      1.00      0.67         4
           5       0.80      1.00      0.89         4
           6       0.33      0.33      0.33         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.36      0.83      0.50         6
          11       0.00      0.00      0.00         3
          12       1.00      0.50      0.67         2
          13       0.00      0.00      0.00         2
          14       0.29      1.00      0.44         4
          15       0.25      0.25      0.25         4
          16       0.

53epoch  420step | loss:  2.0731 :  50%|█████     | 4/8 [00:06<00:09,  2.50s/it]

eval/acc : 0.41935483870967744, eval/f1 : 0.3157894736842105, global_steps: 420


53epoch  421step | loss:  2.1205 :  62%|██████▎   | 5/8 [00:07<00:05,  1.70s/it]

 global_steps : 421


53epoch  422step | loss:  2.0560 :  75%|███████▌  | 6/8 [00:07<00:02,  1.23s/it]

 global_steps : 422


53epoch  423step | loss:  2.0674 :  88%|████████▊ | 7/8 [00:07<00:00,  1.08it/s]

 global_steps : 423


53epoch  424step | loss:  2.0623 : 100%|██████████| 8/8 [00:07<00:00,  1.01it/s]


 global_steps : 424


54epoch  425step | loss:  2.0641 :  12%|█▎        | 1/8 [00:00<00:02,  3.42it/s]

 global_steps : 425


54epoch  426step | loss:  2.0775 :  25%|██▌       | 2/8 [00:00<00:01,  3.29it/s]

 global_steps : 426


54epoch  427step | loss:  2.1185 :  38%|███▊      | 3/8 [00:00<00:01,  3.30it/s]

 global_steps : 427


54epoch  428step | loss:  2.1079 :  50%|█████     | 4/8 [00:01<00:01,  3.34it/s]

 global_steps : 428


54epoch  429step | loss:  2.0616 :  62%|██████▎   | 5/8 [00:01<00:00,  3.30it/s]

 global_steps : 429


54epoch  430step | loss:  2.0207 :  75%|███████▌  | 6/8 [00:01<00:00,  3.25it/s]

 global_steps : 430


54epoch  431step | loss:  2.0341 :  88%|████████▊ | 7/8 [00:02<00:00,  3.25it/s]

 global_steps : 431


54epoch  432step | loss:  2.0330 : 100%|██████████| 8/8 [00:02<00:00,  3.34it/s]


 global_steps : 432


55epoch  433step | loss:  1.8328 :  12%|█▎        | 1/8 [00:00<00:02,  3.33it/s]

 global_steps : 433


55epoch  434step | loss:  1.9601 :  25%|██▌       | 2/8 [00:00<00:01,  3.35it/s]

 global_steps : 434


epoch 55 loss: 2.2310: 100%|██████████| 2/2 [00:00<00:00, 20.51it/s]


validation multi-class accuracy = 0.4194, f1 score = 0.3229
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.57      1.00      0.73         4
           5       1.00      1.00      1.00         4
           6       0.25      0.33      0.29         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.33      0.83      0.48         6
          11       0.00      0.00      0.00         3
          12       1.00      0.50      0.67         2
          13       0.00      0.00      0.00         2
          14       0.29      1.00      0.44         4
          15       0.33      0.25      0.29         4
          16       0.

55epoch  435step | loss:  1.9907 :  38%|███▊      | 3/8 [00:06<00:14,  2.95s/it]

eval/acc : 0.41935483870967744, eval/f1 : 0.32294752031594137, global_steps: 435


55epoch  436step | loss:  1.9910 :  50%|█████     | 4/8 [00:07<00:07,  1.90s/it]

 global_steps : 436


55epoch  437step | loss:  1.9702 :  62%|██████▎   | 5/8 [00:07<00:03,  1.32s/it]

 global_steps : 437


55epoch  438step | loss:  1.9684 :  75%|███████▌  | 6/8 [00:07<00:01,  1.03it/s]

 global_steps : 438


55epoch  439step | loss:  1.9702 :  88%|████████▊ | 7/8 [00:07<00:00,  1.33it/s]

 global_steps : 439


55epoch  440step | loss:  1.9824 : 100%|██████████| 8/8 [00:08<00:00,  1.02s/it]


 global_steps : 440


56epoch  441step | loss:  1.9997 :  12%|█▎        | 1/8 [00:00<00:02,  3.30it/s]

 global_steps : 441


56epoch  442step | loss:  1.9756 :  25%|██▌       | 2/8 [00:00<00:01,  3.17it/s]

 global_steps : 442


56epoch  443step | loss:  1.9732 :  38%|███▊      | 3/8 [00:00<00:01,  3.31it/s]

 global_steps : 443


56epoch  444step | loss:  1.9773 :  50%|█████     | 4/8 [00:01<00:01,  3.36it/s]

 global_steps : 444


56epoch  445step | loss:  1.9623 :  62%|██████▎   | 5/8 [00:01<00:00,  3.34it/s]

 global_steps : 445


56epoch  446step | loss:  1.9694 :  75%|███████▌  | 6/8 [00:01<00:00,  3.37it/s]

 global_steps : 446


56epoch  447step | loss:  1.9782 :  88%|████████▊ | 7/8 [00:02<00:00,  3.37it/s]

 global_steps : 447


56epoch  448step | loss:  1.9619 : 100%|██████████| 8/8 [00:02<00:00,  3.39it/s]


 global_steps : 448


57epoch  449step | loss:  1.9620 :  12%|█▎        | 1/8 [00:00<00:02,  3.50it/s]

 global_steps : 449


epoch 57 loss: 2.1695: 100%|██████████| 2/2 [00:00<00:00, 21.19it/s]
57epoch  450step | loss:  1.9373 :  25%|██▌       | 2/8 [00:00<00:02,  2.75it/s]

validation multi-class accuracy = 0.4032, f1 score = 0.3141
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.57      1.00      0.73         4
           5       1.00      1.00      1.00         4
           6       0.25      0.33      0.29         3
           7       0.00      0.00      0.00         3
           8       0.50      1.00      0.67         3
           9       0.00      0.00      0.00         3
          10       0.29      0.67      0.40         6
          11       0.00      0.00      0.00         3
          12       1.00      0.50      0.67         2
          13       0.00      0.00      0.00         2
          14       0.33      1.00      0.50         4
          15       0.20      0.25      0.22         4
          16       0.

57epoch  451step | loss:  1.9076 :  38%|███▊      | 3/8 [00:00<00:01,  3.02it/s]

 global_steps : 451


57epoch  452step | loss:  1.9021 :  50%|█████     | 4/8 [00:01<00:01,  3.08it/s]

 global_steps : 452


57epoch  453step | loss:  1.9303 :  62%|██████▎   | 5/8 [00:01<00:00,  3.20it/s]

 global_steps : 453


57epoch  454step | loss:  1.9317 :  75%|███████▌  | 6/8 [00:01<00:00,  3.27it/s]

 global_steps : 454


57epoch  455step | loss:  1.9371 :  88%|████████▊ | 7/8 [00:02<00:00,  3.30it/s]

 global_steps : 455


57epoch  456step | loss:  1.9344 : 100%|██████████| 8/8 [00:02<00:00,  3.24it/s]


 global_steps : 456


58epoch  457step | loss:  1.8002 :  12%|█▎        | 1/8 [00:00<00:02,  3.46it/s]

 global_steps : 457


58epoch  458step | loss:  1.9305 :  25%|██▌       | 2/8 [00:00<00:01,  3.39it/s]

 global_steps : 458


58epoch  459step | loss:  1.9340 :  38%|███▊      | 3/8 [00:00<00:01,  3.34it/s]

 global_steps : 459


58epoch  460step | loss:  1.9587 :  50%|█████     | 4/8 [00:01<00:01,  3.36it/s]

 global_steps : 460


58epoch  461step | loss:  1.9387 :  62%|██████▎   | 5/8 [00:01<00:00,  3.36it/s]

 global_steps : 461


58epoch  462step | loss:  1.9302 :  75%|███████▌  | 6/8 [00:01<00:00,  3.26it/s]

 global_steps : 462


58epoch  463step | loss:  1.9306 :  88%|████████▊ | 7/8 [00:02<00:00,  3.30it/s]

 global_steps : 463


58epoch  464step | loss:  1.9007 : 100%|██████████| 8/8 [00:02<00:00,  3.33it/s]


 global_steps : 464


epoch 59 loss: 2.1051: 100%|██████████| 2/2 [00:00<00:00, 13.72it/s]


validation multi-class accuracy = 0.4516, f1 score = 0.3507
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         3
           4       0.57      1.00      0.73         4
           5       1.00      1.00      1.00         4
           6       0.40      0.67      0.50         3
           7       0.00      0.00      0.00         3
           8       0.60      1.00      0.75         3
           9       0.00      0.00      0.00         3
          10       0.31      0.83      0.45         6
          11       0.00      0.00      0.00         3
          12       1.00      0.50      0.67         2
          13       0.00      0.00      0.00         2
          14       0.44      1.00      0.62         4
          15       0.17      0.25      0.20         4
          16       0.

59epoch  465step | loss:  1.9259 :  12%|█▎        | 1/8 [00:06<00:43,  6.22s/it]

eval/acc : 0.45161290322580644, eval/f1 : 0.35072997178260334, global_steps: 465


59epoch  466step | loss:  1.9083 :  25%|██▌       | 2/8 [00:06<00:16,  2.75s/it]

 global_steps : 466


59epoch  467step | loss:  1.9435 :  38%|███▊      | 3/8 [00:06<00:08,  1.63s/it]

 global_steps : 467


59epoch  468step | loss:  1.9022 :  50%|█████     | 4/8 [00:07<00:04,  1.12s/it]

 global_steps : 468


59epoch  469step | loss:  1.8661 :  62%|██████▎   | 5/8 [00:07<00:02,  1.22it/s]

 global_steps : 469


59epoch  470step | loss:  1.8796 :  75%|███████▌  | 6/8 [00:07<00:01,  1.56it/s]

 global_steps : 470


59epoch  471step | loss:  1.8793 :  88%|████████▊ | 7/8 [00:08<00:00,  1.88it/s]

 global_steps : 471


59epoch  472step | loss:  1.8481 : 100%|██████████| 8/8 [00:08<00:00,  1.04s/it]


 global_steps : 472


60epoch  473step | loss:  1.9096 :  12%|█▎        | 1/8 [00:00<00:02,  3.48it/s]

 global_steps : 473


60epoch  474step | loss:  1.7750 :  25%|██▌       | 2/8 [00:00<00:01,  3.34it/s]

 global_steps : 474


60epoch  475step | loss:  1.7576 :  38%|███▊      | 3/8 [00:00<00:01,  3.37it/s]

 global_steps : 475


60epoch  476step | loss:  1.7641 :  50%|█████     | 4/8 [00:01<00:01,  3.24it/s]

 global_steps : 476


60epoch  477step | loss:  1.7942 :  62%|██████▎   | 5/8 [00:01<00:00,  3.28it/s]

 global_steps : 477


60epoch  478step | loss:  1.7972 :  75%|███████▌  | 6/8 [00:01<00:00,  3.33it/s]

 global_steps : 478


60epoch  479step | loss:  1.7866 :  88%|████████▊ | 7/8 [00:02<00:00,  3.38it/s]

 global_steps : 479


epoch 60 loss: 2.0452: 100%|██████████| 2/2 [00:00<00:00, 19.83it/s]


validation multi-class accuracy = 0.5323, f1 score = 0.4689
              precision    recall  f1-score   support

           0       1.00      0.67      0.80         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       1.00      0.67      0.80         3
           4       0.50      1.00      0.67         4
           5       1.00      1.00      1.00         4
           6       0.50      0.67      0.57         3
           7       0.00      0.00      0.00         3
           8       0.50      1.00      0.67         3
           9       0.00      0.00      0.00         3
          10       0.40      0.67      0.50         6
          11       0.00      0.00      0.00         3
          12       1.00      0.50      0.67         2
          13       0.00      0.00      0.00         2
          14       0.44      1.00      0.62         4
          15       0.20      0.25      0.22         4
          16       0.

60epoch  480step | loss:  1.7812 : 100%|██████████| 8/8 [00:08<00:00,  1.02s/it]


eval/acc : 0.532258064516129, eval/f1 : 0.4688966004755479, global_steps: 480


61epoch  481step | loss:  1.9846 :  12%|█▎        | 1/8 [00:00<00:02,  3.42it/s]

 global_steps : 481


61epoch  482step | loss:  1.9059 :  25%|██▌       | 2/8 [00:00<00:01,  3.33it/s]

 global_steps : 482


61epoch  483step | loss:  1.7800 :  38%|███▊      | 3/8 [00:00<00:01,  3.35it/s]

 global_steps : 483


61epoch  484step | loss:  1.8012 :  50%|█████     | 4/8 [00:01<00:01,  3.34it/s]

 global_steps : 484


61epoch  485step | loss:  1.7968 :  62%|██████▎   | 5/8 [00:01<00:00,  3.37it/s]

 global_steps : 485


61epoch  486step | loss:  1.7744 :  75%|███████▌  | 6/8 [00:01<00:00,  3.38it/s]

 global_steps : 486


61epoch  487step | loss:  1.7418 :  88%|████████▊ | 7/8 [00:02<00:00,  3.41it/s]

 global_steps : 487


61epoch  488step | loss:  1.7594 : 100%|██████████| 8/8 [00:02<00:00,  3.42it/s]


 global_steps : 488


62epoch  489step | loss:  1.7479 :  12%|█▎        | 1/8 [00:00<00:02,  3.36it/s]

 global_steps : 489


62epoch  490step | loss:  1.7686 :  25%|██▌       | 2/8 [00:00<00:01,  3.36it/s]

 global_steps : 490


62epoch  491step | loss:  1.7274 :  38%|███▊      | 3/8 [00:00<00:01,  3.34it/s]

 global_steps : 491


62epoch  492step | loss:  1.7115 :  50%|█████     | 4/8 [00:01<00:01,  3.26it/s]

 global_steps : 492


62epoch  493step | loss:  1.7359 :  62%|██████▎   | 5/8 [00:01<00:00,  3.29it/s]

 global_steps : 493


62epoch  494step | loss:  1.7188 :  75%|███████▌  | 6/8 [00:01<00:00,  3.30it/s]

 global_steps : 494


epoch 62 loss: 1.9427: 100%|██████████| 2/2 [00:00<00:00, 19.78it/s]


validation multi-class accuracy = 0.5806, f1 score = 0.5256
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       0.44      1.00      0.62         4
           5       1.00      1.00      1.00         4
           6       0.67      0.67      0.67         3
           7       0.00      0.00      0.00         3
           8       0.50      1.00      0.67         3
           9       1.00      0.33      0.50         3
          10       0.44      0.67      0.53         6
          11       0.00      0.00      0.00         3
          12       1.00      0.50      0.67         2
          13       0.00      0.00      0.00         2
          14       0.44      1.00      0.62         4
          15       0.20      0.25      0.22         4
          16       0.

62epoch  495step | loss:  1.7078 :  88%|████████▊ | 7/8 [00:08<00:02,  2.39s/it]

eval/acc : 0.5806451612903226, eval/f1 : 0.5255960413855151, global_steps: 495


62epoch  496step | loss:  1.7000 : 100%|██████████| 8/8 [00:08<00:00,  1.10s/it]


 global_steps : 496


63epoch  497step | loss:  1.6443 :  12%|█▎        | 1/8 [00:00<00:02,  3.28it/s]

 global_steps : 497


63epoch  498step | loss:  1.6756 :  25%|██▌       | 2/8 [00:00<00:01,  3.13it/s]

 global_steps : 498


63epoch  499step | loss:  1.6913 :  38%|███▊      | 3/8 [00:00<00:01,  3.16it/s]

 global_steps : 499


63epoch  500step | loss:  1.6426 :  50%|█████     | 4/8 [00:01<00:01,  3.23it/s]

 global_steps : 500


63epoch  501step | loss:  1.6523 :  62%|██████▎   | 5/8 [00:01<00:00,  3.30it/s]

 global_steps : 501


63epoch  502step | loss:  1.6352 :  75%|███████▌  | 6/8 [00:01<00:00,  3.29it/s]

 global_steps : 502


63epoch  503step | loss:  1.6327 :  88%|████████▊ | 7/8 [00:02<00:00,  3.30it/s]

 global_steps : 503


63epoch  504step | loss:  1.6438 : 100%|██████████| 8/8 [00:02<00:00,  3.33it/s]


 global_steps : 504


64epoch  505step | loss:  1.6392 :  12%|█▎        | 1/8 [00:00<00:02,  3.33it/s]

 global_steps : 505


64epoch  506step | loss:  1.6403 :  25%|██▌       | 2/8 [00:00<00:01,  3.37it/s]

 global_steps : 506


64epoch  507step | loss:  1.6209 :  38%|███▊      | 3/8 [00:00<00:01,  3.31it/s]

 global_steps : 507


64epoch  508step | loss:  1.6585 :  50%|█████     | 4/8 [00:01<00:01,  3.25it/s]

 global_steps : 508


64epoch  509step | loss:  1.6349 :  62%|██████▎   | 5/8 [00:01<00:00,  3.30it/s]

 global_steps : 509


epoch 64 loss: 1.8628: 100%|██████████| 2/2 [00:00<00:00, 20.58it/s]


validation multi-class accuracy = 0.5968, f1 score = 0.5397
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       0.50      1.00      0.67         4
           5       0.80      1.00      0.89         4
           6       0.67      0.67      0.67         3
           7       0.00      0.00      0.00         3
           8       0.50      1.00      0.67         3
           9       1.00      0.33      0.50         3
          10       0.44      0.67      0.53         6
          11       0.00      0.00      0.00         3
          12       1.00      0.50      0.67         2
          13       0.00      0.00      0.00         2
          14       0.44      1.00      0.62         4
          15       0.25      0.25      0.25         4
          16       0.

64epoch  510step | loss:  1.6213 :  75%|███████▌  | 6/8 [00:09<00:05,  2.99s/it]

eval/acc : 0.5967741935483871, eval/f1 : 0.5396986054880791, global_steps: 510


64epoch  511step | loss:  1.6074 :  88%|████████▊ | 7/8 [00:10<00:02,  2.13s/it]

 global_steps : 511


64epoch  512step | loss:  1.6057 : 100%|██████████| 8/8 [00:10<00:00,  1.30s/it]


 global_steps : 512


65epoch  513step | loss:  1.6955 :  12%|█▎        | 1/8 [00:00<00:02,  3.09it/s]

 global_steps : 513


65epoch  514step | loss:  1.6858 :  25%|██▌       | 2/8 [00:00<00:01,  3.09it/s]

 global_steps : 514


65epoch  515step | loss:  1.6078 :  38%|███▊      | 3/8 [00:00<00:01,  3.23it/s]

 global_steps : 515


65epoch  516step | loss:  1.5651 :  50%|█████     | 4/8 [00:01<00:01,  3.30it/s]

 global_steps : 516


65epoch  517step | loss:  1.5543 :  62%|██████▎   | 5/8 [00:01<00:00,  3.23it/s]

 global_steps : 517


65epoch  518step | loss:  1.5518 :  75%|███████▌  | 6/8 [00:01<00:00,  3.17it/s]

 global_steps : 518


65epoch  519step | loss:  1.5497 :  88%|████████▊ | 7/8 [00:02<00:00,  3.24it/s]

 global_steps : 519


65epoch  520step | loss:  1.5324 : 100%|██████████| 8/8 [00:02<00:00,  3.29it/s]


 global_steps : 520


66epoch  521step | loss:  1.3924 :  12%|█▎        | 1/8 [00:00<00:02,  3.19it/s]

 global_steps : 521


66epoch  522step | loss:  1.4053 :  25%|██▌       | 2/8 [00:00<00:01,  3.28it/s]

 global_steps : 522


66epoch  523step | loss:  1.4501 :  38%|███▊      | 3/8 [00:00<00:01,  3.27it/s]

 global_steps : 523


66epoch  524step | loss:  1.4570 :  50%|█████     | 4/8 [00:01<00:01,  3.30it/s]

 global_steps : 524


epoch 66 loss: 1.7459: 100%|██████████| 2/2 [00:00<00:00, 20.79it/s]


validation multi-class accuracy = 0.6129, f1 score = 0.5742
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       0.57      1.00      0.73         4
           5       1.00      1.00      1.00         4
           6       0.67      0.67      0.67         3
           7       0.00      0.00      0.00         3
           8       0.50      1.00      0.67         3
           9       1.00      0.67      0.80         3
          10       0.50      0.67      0.57         6
          11       1.00      0.33      0.50         3
          12       1.00      0.50      0.67         2
          13       0.00      0.00      0.00         2
          14       0.50      1.00      0.67         4
          15       0.17      0.25      0.20         4
          16       0.

66epoch  525step | loss:  1.4485 :  62%|██████▎   | 5/8 [00:09<00:09,  3.05s/it]

eval/acc : 0.6129032258064516, eval/f1 : 0.5742006531480216, global_steps: 525


66epoch  526step | loss:  1.4656 :  75%|███████▌  | 6/8 [00:09<00:04,  2.12s/it]

 global_steps : 526


66epoch  527step | loss:  1.4700 :  88%|████████▊ | 7/8 [00:09<00:01,  1.53s/it]

 global_steps : 527


66epoch  528step | loss:  1.4701 : 100%|██████████| 8/8 [00:10<00:00,  1.25s/it]


 global_steps : 528


67epoch  529step | loss:  1.3693 :  12%|█▎        | 1/8 [00:00<00:02,  3.30it/s]

 global_steps : 529


67epoch  530step | loss:  1.3533 :  25%|██▌       | 2/8 [00:00<00:01,  3.35it/s]

 global_steps : 530


67epoch  531step | loss:  1.3954 :  38%|███▊      | 3/8 [00:00<00:01,  3.38it/s]

 global_steps : 531


67epoch  532step | loss:  1.4064 :  50%|█████     | 4/8 [00:01<00:01,  3.34it/s]

 global_steps : 532


67epoch  533step | loss:  1.3707 :  62%|██████▎   | 5/8 [00:01<00:00,  3.32it/s]

 global_steps : 533


67epoch  534step | loss:  1.3984 :  75%|███████▌  | 6/8 [00:01<00:00,  3.36it/s]

 global_steps : 534


67epoch  535step | loss:  1.3856 :  88%|████████▊ | 7/8 [00:02<00:00,  3.38it/s]

 global_steps : 535


67epoch  536step | loss:  1.3900 : 100%|██████████| 8/8 [00:02<00:00,  3.39it/s]


 global_steps : 536


68epoch  537step | loss:  1.4504 :  12%|█▎        | 1/8 [00:00<00:02,  3.03it/s]

 global_steps : 537


68epoch  538step | loss:  1.3645 :  25%|██▌       | 2/8 [00:00<00:01,  3.11it/s]

 global_steps : 538


68epoch  539step | loss:  1.3945 :  38%|███▊      | 3/8 [00:00<00:01,  3.19it/s]

 global_steps : 539


epoch 68 loss: 1.6429: 100%|██████████| 2/2 [00:00<00:00, 20.98it/s]


validation multi-class accuracy = 0.6452, f1 score = 0.6167
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       0.57      1.00      0.73         4
           5       1.00      1.00      1.00         4
           6       0.67      0.67      0.67         3
           7       1.00      0.33      0.50         3
           8       0.60      1.00      0.75         3
           9       1.00      0.67      0.80         3
          10       0.50      0.67      0.57         6
          11       1.00      0.33      0.50         3
          12       1.00      0.50      0.67         2
          13       0.00      0.00      0.00         2
          14       0.57      1.00      0.73         4
          15       0.29      0.50      0.36         4
          16       0.

68epoch  540step | loss:  1.3998 :  50%|█████     | 4/8 [00:08<00:12,  3.15s/it]

eval/acc : 0.6451612903225806, eval/f1 : 0.616704640388851, global_steps: 540


68epoch  541step | loss:  1.3610 :  62%|██████▎   | 5/8 [00:08<00:06,  2.12s/it]

 global_steps : 541


68epoch  542step | loss:  1.3346 :  75%|███████▌  | 6/8 [00:09<00:03,  1.50s/it]

 global_steps : 542


68epoch  543step | loss:  1.3163 :  88%|████████▊ | 7/8 [00:09<00:01,  1.11s/it]

 global_steps : 543


68epoch  544step | loss:  1.3048 : 100%|██████████| 8/8 [00:09<00:00,  1.20s/it]


 global_steps : 544


69epoch  545step | loss:  1.5142 :  12%|█▎        | 1/8 [00:00<00:02,  3.27it/s]

 global_steps : 545


69epoch  546step | loss:  1.3379 :  25%|██▌       | 2/8 [00:00<00:01,  3.29it/s]

 global_steps : 546


69epoch  547step | loss:  1.2753 :  38%|███▊      | 3/8 [00:00<00:01,  3.30it/s]

 global_steps : 547


69epoch  548step | loss:  1.2803 :  50%|█████     | 4/8 [00:01<00:01,  3.34it/s]

 global_steps : 548


69epoch  549step | loss:  1.2854 :  62%|██████▎   | 5/8 [00:01<00:00,  3.29it/s]

 global_steps : 549


69epoch  550step | loss:  1.2957 :  75%|███████▌  | 6/8 [00:01<00:00,  3.29it/s]

 global_steps : 550


69epoch  551step | loss:  1.2905 :  88%|████████▊ | 7/8 [00:02<00:00,  3.23it/s]

 global_steps : 551


69epoch  552step | loss:  1.2783 : 100%|██████████| 8/8 [00:02<00:00,  3.31it/s]


 global_steps : 552


70epoch  553step | loss:  1.2040 :  12%|█▎        | 1/8 [00:00<00:02,  3.40it/s]

 global_steps : 553


70epoch  554step | loss:  1.2766 :  25%|██▌       | 2/8 [00:00<00:01,  3.41it/s]

 global_steps : 554


epoch 70 loss: 1.5320: 100%|██████████| 2/2 [00:00<00:00, 16.86it/s]


validation multi-class accuracy = 0.7258, f1 score = 0.7251
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       0.57      1.00      0.73         4
           5       1.00      1.00      1.00         4
           6       0.75      1.00      0.86         3
           7       1.00      0.33      0.50         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       0.57      0.67      0.62         6
          11       1.00      0.67      0.80         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.50      1.00      0.67         4
          15       0.33      0.25      0.29         4
          16       0.

70epoch  555step | loss:  1.2299 :  38%|███▊      | 3/8 [00:09<00:20,  4.05s/it]

eval/acc : 0.7258064516129032, eval/f1 : 0.7250521408416146, global_steps: 555


70epoch  556step | loss:  1.2110 :  50%|█████     | 4/8 [00:09<00:10,  2.57s/it]

 global_steps : 556


70epoch  557step | loss:  1.1910 :  62%|██████▎   | 5/8 [00:09<00:05,  1.76s/it]

 global_steps : 557


70epoch  558step | loss:  1.2053 :  75%|███████▌  | 6/8 [00:10<00:02,  1.27s/it]

 global_steps : 558


70epoch  559step | loss:  1.1981 :  88%|████████▊ | 7/8 [00:10<00:00,  1.05it/s]

 global_steps : 559


70epoch  560step | loss:  1.1955 : 100%|██████████| 8/8 [00:10<00:00,  1.33s/it]


 global_steps : 560


71epoch  561step | loss:  1.2104 :  12%|█▎        | 1/8 [00:00<00:02,  3.43it/s]

 global_steps : 561


71epoch  562step | loss:  1.1164 :  25%|██▌       | 2/8 [00:00<00:01,  3.34it/s]

 global_steps : 562


71epoch  563step | loss:  1.0746 :  38%|███▊      | 3/8 [00:00<00:01,  3.35it/s]

 global_steps : 563


71epoch  564step | loss:  1.0719 :  50%|█████     | 4/8 [00:01<00:01,  3.32it/s]

 global_steps : 564


71epoch  565step | loss:  1.1166 :  62%|██████▎   | 5/8 [00:01<00:00,  3.33it/s]

 global_steps : 565


71epoch  566step | loss:  1.1180 :  75%|███████▌  | 6/8 [00:01<00:00,  3.35it/s]

 global_steps : 566


71epoch  567step | loss:  1.1103 :  88%|████████▊ | 7/8 [00:02<00:00,  3.36it/s]

 global_steps : 567


71epoch  568step | loss:  1.1181 : 100%|██████████| 8/8 [00:02<00:00,  3.39it/s]


 global_steps : 568


72epoch  569step | loss:  1.1636 :  12%|█▎        | 1/8 [00:00<00:02,  3.44it/s]

 global_steps : 569


epoch 72 loss: 1.4073: 100%|██████████| 2/2 [00:00<00:00, 21.29it/s]


validation multi-class accuracy = 0.7903, f1 score = 0.7861
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       0.80      1.00      0.89         4
           5       1.00      1.00      1.00         4
           6       0.75      1.00      0.86         3
           7       1.00      0.67      0.80         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       0.67      0.67      0.67         6
          11       1.00      0.67      0.80         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       0.50      0.75      0.60         4
          16       0.

72epoch  570step | loss:  1.0526 :  25%|██▌       | 2/8 [00:08<00:30,  5.06s/it]

eval/acc : 0.7903225806451613, eval/f1 : 0.7861319966583125, global_steps: 570


72epoch  571step | loss:  1.0460 :  38%|███▊      | 3/8 [00:09<00:14,  2.90s/it]

 global_steps : 571


72epoch  572step | loss:  1.0331 :  50%|█████     | 4/8 [00:09<00:07,  1.87s/it]

 global_steps : 572


72epoch  573step | loss:  1.0471 :  62%|██████▎   | 5/8 [00:09<00:03,  1.31s/it]

 global_steps : 573


72epoch  574step | loss:  1.0622 :  75%|███████▌  | 6/8 [00:09<00:01,  1.04it/s]

 global_steps : 574


72epoch  575step | loss:  1.0504 :  88%|████████▊ | 7/8 [00:10<00:00,  1.34it/s]

 global_steps : 575


72epoch  576step | loss:  1.0480 : 100%|██████████| 8/8 [00:10<00:00,  1.31s/it]


 global_steps : 576


73epoch  577step | loss:  1.0525 :  12%|█▎        | 1/8 [00:00<00:02,  3.47it/s]

 global_steps : 577


73epoch  578step | loss:  0.9930 :  25%|██▌       | 2/8 [00:00<00:01,  3.47it/s]

 global_steps : 578


73epoch  579step | loss:  0.9727 :  38%|███▊      | 3/8 [00:00<00:01,  3.46it/s]

 global_steps : 579


73epoch  580step | loss:  1.0083 :  50%|█████     | 4/8 [00:01<00:01,  3.44it/s]

 global_steps : 580


73epoch  581step | loss:  1.0335 :  62%|██████▎   | 5/8 [00:01<00:00,  3.34it/s]

 global_steps : 581


73epoch  582step | loss:  1.0224 :  75%|███████▌  | 6/8 [00:01<00:00,  3.26it/s]

 global_steps : 582


73epoch  583step | loss:  1.0004 :  88%|████████▊ | 7/8 [00:02<00:00,  3.30it/s]

 global_steps : 583


73epoch  584step | loss:  1.0004 : 100%|██████████| 8/8 [00:02<00:00,  3.38it/s]


 global_steps : 584


epoch 74 loss: 1.2852: 100%|██████████| 2/2 [00:00<00:00, 21.10it/s]


validation multi-class accuracy = 0.8226, f1 score = 0.8186
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       0.80      1.00      0.89         4
           5       1.00      1.00      1.00         4
           6       0.75      1.00      0.86         3
           7       1.00      0.67      0.80         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       0.67      0.67      0.67         6
          11       1.00      0.67      0.80         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       0.75      0.75      0.75         4
          16       0.

74epoch  585step | loss:  1.0282 :  12%|█▎        | 1/8 [00:08<00:58,  8.29s/it]

eval/acc : 0.8225806451612904, eval/f1 : 0.8185881370091896, global_steps: 585


74epoch  586step | loss:  0.9649 :  25%|██▌       | 2/8 [00:08<00:21,  3.59s/it]

 global_steps : 586


74epoch  587step | loss:  0.9696 :  38%|███▊      | 3/8 [00:08<00:10,  2.09s/it]

 global_steps : 587


74epoch  588step | loss:  0.9590 :  50%|█████     | 4/8 [00:09<00:05,  1.39s/it]

 global_steps : 588


74epoch  589step | loss:  0.9427 :  62%|██████▎   | 5/8 [00:09<00:02,  1.00it/s]

 global_steps : 589


74epoch  590step | loss:  0.9527 :  75%|███████▌  | 6/8 [00:09<00:01,  1.30it/s]

 global_steps : 590


74epoch  591step | loss:  0.9366 :  88%|████████▊ | 7/8 [00:10<00:00,  1.61it/s]

 global_steps : 591


74epoch  592step | loss:  0.9346 : 100%|██████████| 8/8 [00:10<00:00,  1.30s/it]


 global_steps : 592


75epoch  593step | loss:  0.9230 :  12%|█▎        | 1/8 [00:00<00:02,  3.10it/s]

 global_steps : 593


75epoch  594step | loss:  0.8899 :  25%|██▌       | 2/8 [00:00<00:01,  3.24it/s]

 global_steps : 594


75epoch  595step | loss:  0.8462 :  38%|███▊      | 3/8 [00:00<00:01,  3.25it/s]

 global_steps : 595


75epoch  596step | loss:  0.8534 :  50%|█████     | 4/8 [00:01<00:01,  3.32it/s]

 global_steps : 596


75epoch  597step | loss:  0.8824 :  62%|██████▎   | 5/8 [00:01<00:00,  3.33it/s]

 global_steps : 597


75epoch  598step | loss:  0.8725 :  75%|███████▌  | 6/8 [00:01<00:00,  3.36it/s]

 global_steps : 598


75epoch  599step | loss:  0.8722 :  88%|████████▊ | 7/8 [00:02<00:00,  3.38it/s]

 global_steps : 599


epoch 75 loss: 1.1941: 100%|██████████| 2/2 [00:00<00:00, 20.02it/s]
75epoch  600step | loss:  0.8816 : 100%|██████████| 8/8 [00:02<00:00,  3.19it/s]


validation multi-class accuracy = 0.8065, f1 score = 0.7984
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       0.80      1.00      0.89         4
           5       1.00      1.00      1.00         4
           6       0.75      1.00      0.86         3
           7       1.00      0.67      0.80         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       0.67      0.67      0.67         6
          11       1.00      0.67      0.80         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       0.60      0.75      0.67         4
          16       0.

76epoch  601step | loss:  0.8091 :  12%|█▎        | 1/8 [00:00<00:02,  3.20it/s]

 global_steps : 601


76epoch  602step | loss:  0.8509 :  25%|██▌       | 2/8 [00:00<00:01,  3.21it/s]

 global_steps : 602


76epoch  603step | loss:  0.8481 :  38%|███▊      | 3/8 [00:00<00:01,  3.20it/s]

 global_steps : 603


76epoch  604step | loss:  0.8228 :  50%|█████     | 4/8 [00:01<00:01,  3.13it/s]

 global_steps : 604


76epoch  605step | loss:  0.8353 :  62%|██████▎   | 5/8 [00:01<00:00,  3.13it/s]

 global_steps : 605


76epoch  606step | loss:  0.8374 :  75%|███████▌  | 6/8 [00:01<00:00,  3.15it/s]

 global_steps : 606


76epoch  607step | loss:  0.8262 :  88%|████████▊ | 7/8 [00:02<00:00,  3.22it/s]

 global_steps : 607


76epoch  608step | loss:  0.8175 : 100%|██████████| 8/8 [00:02<00:00,  3.25it/s]


 global_steps : 608


77epoch  609step | loss:  0.7971 :  12%|█▎        | 1/8 [00:00<00:02,  3.26it/s]

 global_steps : 609


77epoch  610step | loss:  0.7790 :  25%|██▌       | 2/8 [00:00<00:02,  2.90it/s]

 global_steps : 610


77epoch  611step | loss:  0.7874 :  38%|███▊      | 3/8 [00:01<00:01,  2.81it/s]

 global_steps : 611


77epoch  612step | loss:  0.7763 :  50%|█████     | 4/8 [00:01<00:01,  2.75it/s]

 global_steps : 612


77epoch  613step | loss:  0.7734 :  62%|██████▎   | 5/8 [00:01<00:01,  2.73it/s]

 global_steps : 613


77epoch  614step | loss:  0.7752 :  75%|███████▌  | 6/8 [00:02<00:00,  2.66it/s]

 global_steps : 614


epoch 77 loss: 1.0872: 100%|██████████| 2/2 [00:00<00:00, 15.00it/s]
77epoch  615step | loss:  0.7671 :  88%|████████▊ | 7/8 [00:02<00:00,  2.32it/s]

validation multi-class accuracy = 0.8226, f1 score = 0.8186
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       0.80      1.00      0.89         4
           5       1.00      1.00      1.00         4
           6       0.75      1.00      0.86         3
           7       1.00      0.67      0.80         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       0.67      0.67      0.67         6
          11       1.00      0.67      0.80         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       0.75      0.75      0.75         4
          16       0.

77epoch  616step | loss:  0.7793 : 100%|██████████| 8/8 [00:03<00:00,  2.64it/s]


 global_steps : 616


78epoch  617step | loss:  0.8099 :  12%|█▎        | 1/8 [00:00<00:02,  3.28it/s]

 global_steps : 617


78epoch  618step | loss:  0.8135 :  25%|██▌       | 2/8 [00:00<00:01,  3.00it/s]

 global_steps : 618


78epoch  619step | loss:  0.8043 :  38%|███▊      | 3/8 [00:01<00:01,  2.93it/s]

 global_steps : 619


78epoch  620step | loss:  0.7884 :  50%|█████     | 4/8 [00:01<00:01,  2.85it/s]

 global_steps : 620


78epoch  621step | loss:  0.7484 :  62%|██████▎   | 5/8 [00:01<00:01,  2.86it/s]

 global_steps : 621


78epoch  622step | loss:  0.7345 :  75%|███████▌  | 6/8 [00:02<00:00,  2.81it/s]

 global_steps : 622


78epoch  623step | loss:  0.7215 :  88%|████████▊ | 7/8 [00:02<00:00,  2.75it/s]

 global_steps : 623


78epoch  624step | loss:  0.7322 : 100%|██████████| 8/8 [00:02<00:00,  2.88it/s]


 global_steps : 624


79epoch  625step | loss:  0.7242 :  12%|█▎        | 1/8 [00:00<00:02,  2.61it/s]

 global_steps : 625


79epoch  626step | loss:  0.7155 :  25%|██▌       | 2/8 [00:00<00:02,  2.70it/s]

 global_steps : 626


79epoch  627step | loss:  0.7073 :  38%|███▊      | 3/8 [00:01<00:01,  2.84it/s]

 global_steps : 627


79epoch  628step | loss:  0.7152 :  50%|█████     | 4/8 [00:01<00:01,  2.94it/s]

 global_steps : 628


79epoch  629step | loss:  0.6959 :  62%|██████▎   | 5/8 [00:01<00:01,  2.99it/s]

 global_steps : 629


epoch 79 loss: 0.9976: 100%|██████████| 2/2 [00:00<00:00, 18.12it/s]


validation multi-class accuracy = 0.8387, f1 score = 0.8315
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       0.80      1.00      0.89         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      0.67      0.80         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       0.71      0.83      0.77         6
          11       1.00      0.67      0.80         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       0.75      0.75      0.75         4
          16       0.

79epoch  630step | loss:  0.6707 :  75%|███████▌  | 6/8 [00:08<00:04,  2.45s/it]

eval/acc : 0.8387096774193549, eval/f1 : 0.8315050446629394, global_steps: 630


79epoch  631step | loss:  0.6860 :  88%|████████▊ | 7/8 [00:08<00:01,  1.75s/it]

 global_steps : 631


79epoch  632step | loss:  0.6888 : 100%|██████████| 8/8 [00:08<00:00,  1.11s/it]


 global_steps : 632


80epoch  633step | loss:  0.6797 :  12%|█▎        | 1/8 [00:00<00:01,  3.60it/s]

 global_steps : 633


80epoch  634step | loss:  0.6879 :  25%|██▌       | 2/8 [00:00<00:01,  3.50it/s]

 global_steps : 634


80epoch  635step | loss:  0.6772 :  38%|███▊      | 3/8 [00:00<00:01,  3.36it/s]

 global_steps : 635


80epoch  636step | loss:  0.6408 :  50%|█████     | 4/8 [00:01<00:01,  3.38it/s]

 global_steps : 636


80epoch  637step | loss:  0.6358 :  62%|██████▎   | 5/8 [00:01<00:00,  3.38it/s]

 global_steps : 637


80epoch  638step | loss:  0.6302 :  75%|███████▌  | 6/8 [00:01<00:00,  3.23it/s]

 global_steps : 638


80epoch  639step | loss:  0.6329 :  88%|████████▊ | 7/8 [00:02<00:00,  3.28it/s]

 global_steps : 639


80epoch  640step | loss:  0.6493 : 100%|██████████| 8/8 [00:02<00:00,  3.33it/s]


 global_steps : 640


81epoch  641step | loss:  0.6568 :  12%|█▎        | 1/8 [00:00<00:02,  3.13it/s]

 global_steps : 641


81epoch  642step | loss:  0.6817 :  25%|██▌       | 2/8 [00:00<00:01,  3.32it/s]

 global_steps : 642


81epoch  643step | loss:  0.6567 :  38%|███▊      | 3/8 [00:00<00:01,  3.36it/s]

 global_steps : 643


81epoch  644step | loss:  0.6316 :  50%|█████     | 4/8 [00:01<00:01,  3.33it/s]

 global_steps : 644


epoch 81 loss: 0.9172: 100%|██████████| 2/2 [00:00<00:00, 21.38it/s]


validation multi-class accuracy = 0.9032, f1 score = 0.9030
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

81epoch  645step | loss:  0.6321 :  62%|██████▎   | 5/8 [00:07<00:07,  2.45s/it]

eval/acc : 0.9032258064516129, eval/f1 : 0.9029847345636819, global_steps: 645


81epoch  646step | loss:  0.6308 :  75%|███████▌  | 6/8 [00:07<00:03,  1.71s/it]

 global_steps : 646


81epoch  647step | loss:  0.6225 :  88%|████████▊ | 7/8 [00:08<00:01,  1.25s/it]

 global_steps : 647


81epoch  648step | loss:  0.6260 : 100%|██████████| 8/8 [00:08<00:00,  1.04s/it]


 global_steps : 648


82epoch  649step | loss:  0.5844 :  12%|█▎        | 1/8 [00:00<00:02,  3.29it/s]

 global_steps : 649


82epoch  650step | loss:  0.5942 :  25%|██▌       | 2/8 [00:00<00:01,  3.35it/s]

 global_steps : 650


82epoch  651step | loss:  0.5840 :  38%|███▊      | 3/8 [00:00<00:01,  3.34it/s]

 global_steps : 651


82epoch  652step | loss:  0.5941 :  50%|█████     | 4/8 [00:01<00:01,  3.35it/s]

 global_steps : 652


82epoch  653step | loss:  0.5883 :  62%|██████▎   | 5/8 [00:01<00:00,  3.36it/s]

 global_steps : 653


82epoch  654step | loss:  0.5766 :  75%|███████▌  | 6/8 [00:01<00:00,  3.37it/s]

 global_steps : 654


82epoch  655step | loss:  0.5664 :  88%|████████▊ | 7/8 [00:02<00:00,  3.37it/s]

 global_steps : 655


82epoch  656step | loss:  0.5685 : 100%|██████████| 8/8 [00:02<00:00,  3.41it/s]


 global_steps : 656


83epoch  657step | loss:  0.6192 :  12%|█▎        | 1/8 [00:00<00:02,  3.49it/s]

 global_steps : 657


83epoch  658step | loss:  0.5712 :  25%|██▌       | 2/8 [00:00<00:01,  3.36it/s]

 global_steps : 658


83epoch  659step | loss:  0.5457 :  38%|███▊      | 3/8 [00:00<00:01,  3.39it/s]

 global_steps : 659


epoch 83 loss: 0.8494: 100%|██████████| 2/2 [00:00<00:00, 19.21it/s]
83epoch  660step | loss:  0.5381 :  50%|█████     | 4/8 [00:01<00:01,  2.87it/s]

validation multi-class accuracy = 0.9032, f1 score = 0.9030
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

83epoch  661step | loss:  0.5551 :  62%|██████▎   | 5/8 [00:01<00:01,  2.99it/s]

 global_steps : 661


83epoch  662step | loss:  0.5504 :  75%|███████▌  | 6/8 [00:01<00:00,  3.01it/s]

 global_steps : 662


83epoch  663step | loss:  0.5343 :  88%|████████▊ | 7/8 [00:02<00:00,  3.06it/s]

 global_steps : 663


83epoch  664step | loss:  0.5342 : 100%|██████████| 8/8 [00:02<00:00,  3.13it/s]


 global_steps : 664


84epoch  665step | loss:  0.4736 :  12%|█▎        | 1/8 [00:00<00:02,  3.48it/s]

 global_steps : 665


84epoch  666step | loss:  0.4806 :  25%|██▌       | 2/8 [00:00<00:01,  3.39it/s]

 global_steps : 666


84epoch  667step | loss:  0.4811 :  38%|███▊      | 3/8 [00:00<00:01,  3.27it/s]

 global_steps : 667


84epoch  668step | loss:  0.4874 :  50%|█████     | 4/8 [00:01<00:01,  3.32it/s]

 global_steps : 668


84epoch  669step | loss:  0.4863 :  62%|██████▎   | 5/8 [00:01<00:00,  3.35it/s]

 global_steps : 669


84epoch  670step | loss:  0.4965 :  75%|███████▌  | 6/8 [00:01<00:00,  3.28it/s]

 global_steps : 670


84epoch  671step | loss:  0.5109 :  88%|████████▊ | 7/8 [00:02<00:00,  3.18it/s]

 global_steps : 671


84epoch  672step | loss:  0.5092 : 100%|██████████| 8/8 [00:02<00:00,  3.32it/s]


 global_steps : 672


85epoch  673step | loss:  0.4993 :  12%|█▎        | 1/8 [00:00<00:02,  3.47it/s]

 global_steps : 673


85epoch  674step | loss:  0.5067 :  25%|██▌       | 2/8 [00:00<00:01,  3.33it/s]

 global_steps : 674


epoch 85 loss: 0.8138: 100%|██████████| 2/2 [00:00<00:00, 19.84it/s]
85epoch  675step | loss:  0.5231 :  38%|███▊      | 3/8 [00:01<00:01,  2.65it/s]

validation multi-class accuracy = 0.9032, f1 score = 0.9030
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

85epoch  676step | loss:  0.5030 :  50%|█████     | 4/8 [00:01<00:01,  2.50it/s]

 global_steps : 676


85epoch  677step | loss:  0.5049 :  62%|██████▎   | 5/8 [00:01<00:01,  2.59it/s]

 global_steps : 677


85epoch  678step | loss:  0.4932 :  75%|███████▌  | 6/8 [00:02<00:00,  2.65it/s]

 global_steps : 678


85epoch  679step | loss:  0.4896 :  88%|████████▊ | 7/8 [00:02<00:00,  2.68it/s]

 global_steps : 679


85epoch  680step | loss:  0.4864 : 100%|██████████| 8/8 [00:02<00:00,  2.72it/s]


 global_steps : 680


86epoch  681step | loss:  0.5373 :  12%|█▎        | 1/8 [00:00<00:02,  2.82it/s]

 global_steps : 681


86epoch  682step | loss:  0.5024 :  25%|██▌       | 2/8 [00:00<00:02,  2.68it/s]

 global_steps : 682


86epoch  683step | loss:  0.4807 :  38%|███▊      | 3/8 [00:01<00:01,  2.84it/s]

 global_steps : 683


86epoch  684step | loss:  0.4728 :  50%|█████     | 4/8 [00:01<00:01,  2.87it/s]

 global_steps : 684


86epoch  685step | loss:  0.4710 :  62%|██████▎   | 5/8 [00:01<00:01,  2.85it/s]

 global_steps : 685


86epoch  686step | loss:  0.4693 :  75%|███████▌  | 6/8 [00:02<00:00,  2.88it/s]

 global_steps : 686


86epoch  687step | loss:  0.4649 :  88%|████████▊ | 7/8 [00:02<00:00,  2.94it/s]

 global_steps : 687


86epoch  688step | loss:  0.4579 : 100%|██████████| 8/8 [00:02<00:00,  2.92it/s]


 global_steps : 688


87epoch  689step | loss:  0.4154 :  12%|█▎        | 1/8 [00:00<00:02,  3.06it/s]

 global_steps : 689


epoch 87 loss: 0.7501: 100%|██████████| 2/2 [00:00<00:00, 20.70it/s]


validation multi-class accuracy = 0.9194, f1 score = 0.9214
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

87epoch  690step | loss:  0.4336 :  25%|██▌       | 2/8 [00:06<00:22,  3.77s/it]

eval/acc : 0.9193548387096774, eval/f1 : 0.921405787195261, global_steps: 690


87epoch  691step | loss:  0.4146 :  38%|███▊      | 3/8 [00:06<00:10,  2.19s/it]

 global_steps : 691


87epoch  692step | loss:  0.4169 :  50%|█████     | 4/8 [00:07<00:05,  1.44s/it]

 global_steps : 692


87epoch  693step | loss:  0.4114 :  62%|██████▎   | 5/8 [00:07<00:03,  1.03s/it]

 global_steps : 693


87epoch  694step | loss:  0.4236 :  75%|███████▌  | 6/8 [00:07<00:01,  1.28it/s]

 global_steps : 694


87epoch  695step | loss:  0.4324 :  88%|████████▊ | 7/8 [00:08<00:00,  1.59it/s]

 global_steps : 695


87epoch  696step | loss:  0.4348 : 100%|██████████| 8/8 [00:08<00:00,  1.04s/it]


 global_steps : 696


88epoch  697step | loss:  0.4278 :  12%|█▎        | 1/8 [00:00<00:02,  3.50it/s]

 global_steps : 697


88epoch  698step | loss:  0.4225 :  25%|██▌       | 2/8 [00:00<00:01,  3.44it/s]

 global_steps : 698


88epoch  699step | loss:  0.4116 :  38%|███▊      | 3/8 [00:00<00:01,  3.33it/s]

 global_steps : 699


88epoch  700step | loss:  0.4188 :  50%|█████     | 4/8 [00:01<00:01,  3.36it/s]

 global_steps : 700


88epoch  701step | loss:  0.4133 :  62%|██████▎   | 5/8 [00:01<00:00,  3.35it/s]

 global_steps : 701


88epoch  702step | loss:  0.4149 :  75%|███████▌  | 6/8 [00:01<00:00,  3.36it/s]

 global_steps : 702


88epoch  703step | loss:  0.4151 :  88%|████████▊ | 7/8 [00:02<00:00,  3.37it/s]

 global_steps : 703


88epoch  704step | loss:  0.4328 : 100%|██████████| 8/8 [00:02<00:00,  3.42it/s]


 global_steps : 704


epoch 89 loss: 0.7184: 100%|██████████| 2/2 [00:00<00:00, 20.82it/s]
89epoch  705step | loss:  0.3678 :  12%|█▎        | 1/8 [00:00<00:02,  2.43it/s]

validation multi-class accuracy = 0.9194, f1 score = 0.9214
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

89epoch  706step | loss:  0.3886 :  25%|██▌       | 2/8 [00:00<00:02,  2.92it/s]

 global_steps : 706


89epoch  707step | loss:  0.3973 :  38%|███▊      | 3/8 [00:00<00:01,  3.12it/s]

 global_steps : 707


89epoch  708step | loss:  0.4063 :  50%|█████     | 4/8 [00:01<00:01,  3.23it/s]

 global_steps : 708


89epoch  709step | loss:  0.4104 :  62%|██████▎   | 5/8 [00:01<00:00,  3.30it/s]

 global_steps : 709


89epoch  710step | loss:  0.4129 :  75%|███████▌  | 6/8 [00:01<00:00,  3.33it/s]

 global_steps : 710


89epoch  711step | loss:  0.4082 :  88%|████████▊ | 7/8 [00:02<00:00,  3.38it/s]

 global_steps : 711


89epoch  712step | loss:  0.4021 : 100%|██████████| 8/8 [00:02<00:00,  3.28it/s]


 global_steps : 712


90epoch  713step | loss:  0.4008 :  12%|█▎        | 1/8 [00:00<00:02,  3.27it/s]

 global_steps : 713


90epoch  714step | loss:  0.4436 :  25%|██▌       | 2/8 [00:00<00:01,  3.32it/s]

 global_steps : 714


90epoch  715step | loss:  0.4220 :  38%|███▊      | 3/8 [00:00<00:01,  3.37it/s]

 global_steps : 715


90epoch  716step | loss:  0.4114 :  50%|█████     | 4/8 [00:01<00:01,  3.37it/s]

 global_steps : 716


90epoch  717step | loss:  0.4064 :  62%|██████▎   | 5/8 [00:01<00:00,  3.37it/s]

 global_steps : 717


90epoch  718step | loss:  0.4022 :  75%|███████▌  | 6/8 [00:01<00:00,  3.40it/s]

 global_steps : 718


90epoch  719step | loss:  0.3967 :  88%|████████▊ | 7/8 [00:02<00:00,  3.33it/s]

 global_steps : 719


epoch 90 loss: 0.6805: 100%|██████████| 2/2 [00:00<00:00, 20.15it/s]
90epoch  720step | loss:  0.3932 : 100%|██████████| 8/8 [00:02<00:00,  3.21it/s]


validation multi-class accuracy = 0.9194, f1 score = 0.9214
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

91epoch  721step | loss:  0.4147 :  12%|█▎        | 1/8 [00:00<00:02,  2.90it/s]

 global_steps : 721


91epoch  722step | loss:  0.4206 :  25%|██▌       | 2/8 [00:00<00:01,  3.11it/s]

 global_steps : 722


91epoch  723step | loss:  0.3991 :  38%|███▊      | 3/8 [00:00<00:01,  3.24it/s]

 global_steps : 723


91epoch  724step | loss:  0.3833 :  50%|█████     | 4/8 [00:01<00:01,  3.01it/s]

 global_steps : 724


91epoch  725step | loss:  0.3758 :  62%|██████▎   | 5/8 [00:01<00:01,  2.90it/s]

 global_steps : 725


91epoch  726step | loss:  0.3794 :  75%|███████▌  | 6/8 [00:02<00:00,  2.75it/s]

 global_steps : 726


91epoch  727step | loss:  0.3788 :  88%|████████▊ | 7/8 [00:02<00:00,  2.76it/s]

 global_steps : 727


91epoch  728step | loss:  0.3831 : 100%|██████████| 8/8 [00:02<00:00,  2.88it/s]


 global_steps : 728


92epoch  729step | loss:  0.3677 :  12%|█▎        | 1/8 [00:00<00:02,  2.88it/s]

 global_steps : 729


92epoch  730step | loss:  0.3609 :  25%|██▌       | 2/8 [00:00<00:02,  2.98it/s]

 global_steps : 730


92epoch  731step | loss:  0.3556 :  38%|███▊      | 3/8 [00:01<00:01,  2.99it/s]

 global_steps : 731


92epoch  732step | loss:  0.3960 :  50%|█████     | 4/8 [00:01<00:01,  2.99it/s]

 global_steps : 732


92epoch  733step | loss:  0.3857 :  62%|██████▎   | 5/8 [00:01<00:00,  3.03it/s]

 global_steps : 733


92epoch  734step | loss:  0.3790 :  75%|███████▌  | 6/8 [00:02<00:00,  2.98it/s]

 global_steps : 734


epoch 92 loss: 0.6594: 100%|██████████| 2/2 [00:00<00:00, 20.16it/s]


validation multi-class accuracy = 0.9355, f1 score = 0.9371
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

92epoch  735step | loss:  0.3718 :  88%|████████▊ | 7/8 [00:07<00:02,  2.15s/it]

eval/acc : 0.9354838709677419, eval/f1 : 0.9370699475962635, global_steps: 735


92epoch  736step | loss:  0.3722 : 100%|██████████| 8/8 [00:08<00:00,  1.02s/it]


 global_steps : 736


93epoch  737step | loss:  0.3539 :  12%|█▎        | 1/8 [00:00<00:02,  3.02it/s]

 global_steps : 737


93epoch  738step | loss:  0.3468 :  25%|██▌       | 2/8 [00:00<00:01,  3.21it/s]

 global_steps : 738


93epoch  739step | loss:  0.3575 :  38%|███▊      | 3/8 [00:00<00:01,  3.27it/s]

 global_steps : 739


93epoch  740step | loss:  0.3383 :  50%|█████     | 4/8 [00:01<00:01,  3.24it/s]

 global_steps : 740


93epoch  741step | loss:  0.3607 :  62%|██████▎   | 5/8 [00:01<00:00,  3.29it/s]

 global_steps : 741


93epoch  742step | loss:  0.3589 :  75%|███████▌  | 6/8 [00:01<00:00,  3.32it/s]

 global_steps : 742


93epoch  743step | loss:  0.3563 :  88%|████████▊ | 7/8 [00:02<00:00,  3.36it/s]

 global_steps : 743


93epoch  744step | loss:  0.3555 : 100%|██████████| 8/8 [00:02<00:00,  3.35it/s]


 global_steps : 744


94epoch  745step | loss:  0.4136 :  12%|█▎        | 1/8 [00:00<00:02,  3.40it/s]

 global_steps : 745


94epoch  746step | loss:  0.3566 :  25%|██▌       | 2/8 [00:00<00:01,  3.32it/s]

 global_steps : 746


94epoch  747step | loss:  0.3838 :  38%|███▊      | 3/8 [00:00<00:01,  3.35it/s]

 global_steps : 747


94epoch  748step | loss:  0.3764 :  50%|█████     | 4/8 [00:01<00:01,  3.37it/s]

 global_steps : 748


94epoch  749step | loss:  0.3610 :  62%|██████▎   | 5/8 [00:01<00:00,  3.40it/s]

 global_steps : 749


epoch 94 loss: 0.6402: 100%|██████████| 2/2 [00:00<00:00, 18.41it/s]
94epoch  750step | loss:  0.3492 :  75%|███████▌  | 6/8 [00:01<00:00,  2.91it/s]

validation multi-class accuracy = 0.9355, f1 score = 0.9371
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

94epoch  751step | loss:  0.3447 :  88%|████████▊ | 7/8 [00:02<00:00,  3.06it/s]

 global_steps : 751


94epoch  752step | loss:  0.3397 : 100%|██████████| 8/8 [00:02<00:00,  3.24it/s]


 global_steps : 752


95epoch  753step | loss:  0.3385 :  12%|█▎        | 1/8 [00:00<00:02,  3.47it/s]

 global_steps : 753


95epoch  754step | loss:  0.3492 :  25%|██▌       | 2/8 [00:00<00:01,  3.33it/s]

 global_steps : 754


95epoch  755step | loss:  0.3442 :  38%|███▊      | 3/8 [00:00<00:01,  3.30it/s]

 global_steps : 755


95epoch  756step | loss:  0.3414 :  50%|█████     | 4/8 [00:01<00:01,  3.25it/s]

 global_steps : 756


95epoch  757step | loss:  0.3260 :  62%|██████▎   | 5/8 [00:01<00:00,  3.23it/s]

 global_steps : 757


95epoch  758step | loss:  0.3231 :  75%|███████▌  | 6/8 [00:01<00:00,  3.27it/s]

 global_steps : 758


95epoch  759step | loss:  0.3299 :  88%|████████▊ | 7/8 [00:02<00:00,  3.28it/s]

 global_steps : 759


95epoch  760step | loss:  0.3330 : 100%|██████████| 8/8 [00:02<00:00,  3.23it/s]


 global_steps : 760


96epoch  761step | loss:  0.3511 :  12%|█▎        | 1/8 [00:00<00:02,  2.72it/s]

 global_steps : 761


96epoch  762step | loss:  0.3442 :  25%|██▌       | 2/8 [00:00<00:02,  2.42it/s]

 global_steps : 762


96epoch  763step | loss:  0.3345 :  38%|███▊      | 3/8 [00:01<00:01,  2.51it/s]

 global_steps : 763


96epoch  764step | loss:  0.3244 :  50%|█████     | 4/8 [00:01<00:01,  2.64it/s]

 global_steps : 764


epoch 96 loss: 0.6128: 100%|██████████| 2/2 [00:00<00:00, 16.37it/s]
96epoch  765step | loss:  0.3331 :  62%|██████▎   | 5/8 [00:02<00:01,  2.27it/s]

validation multi-class accuracy = 0.9355, f1 score = 0.9371
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

96epoch  766step | loss:  0.3294 :  75%|███████▌  | 6/8 [00:02<00:00,  2.41it/s]

 global_steps : 766


96epoch  767step | loss:  0.3344 :  88%|████████▊ | 7/8 [00:02<00:00,  2.50it/s]

 global_steps : 767


96epoch  768step | loss:  0.3308 : 100%|██████████| 8/8 [00:03<00:00,  2.57it/s]


 global_steps : 768


97epoch  769step | loss:  0.3713 :  12%|█▎        | 1/8 [00:00<00:02,  2.68it/s]

 global_steps : 769


97epoch  770step | loss:  0.3424 :  25%|██▌       | 2/8 [00:00<00:02,  2.76it/s]

 global_steps : 770


97epoch  771step | loss:  0.3298 :  38%|███▊      | 3/8 [00:01<00:01,  2.82it/s]

 global_steps : 771


97epoch  772step | loss:  0.3385 :  50%|█████     | 4/8 [00:01<00:01,  3.02it/s]

 global_steps : 772


97epoch  773step | loss:  0.3299 :  62%|██████▎   | 5/8 [00:01<00:01,  2.99it/s]

 global_steps : 773


97epoch  774step | loss:  0.3270 :  75%|███████▌  | 6/8 [00:02<00:00,  2.84it/s]

 global_steps : 774


97epoch  775step | loss:  0.3224 :  88%|████████▊ | 7/8 [00:02<00:00,  2.92it/s]

 global_steps : 775


97epoch  776step | loss:  0.3188 : 100%|██████████| 8/8 [00:02<00:00,  2.90it/s]


 global_steps : 776


98epoch  777step | loss:  0.3484 :  12%|█▎        | 1/8 [00:00<00:02,  2.72it/s]

 global_steps : 777


98epoch  778step | loss:  0.3304 :  25%|██▌       | 2/8 [00:00<00:02,  2.61it/s]

 global_steps : 778


98epoch  779step | loss:  0.3217 :  38%|███▊      | 3/8 [00:01<00:01,  2.72it/s]

 global_steps : 779


epoch 98 loss: 0.6128: 100%|██████████| 2/2 [00:00<00:00, 20.18it/s]
98epoch  780step | loss:  0.3104 :  50%|█████     | 4/8 [00:01<00:01,  2.48it/s]

validation multi-class accuracy = 0.9355, f1 score = 0.9371
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

98epoch  781step | loss:  0.3144 :  62%|██████▎   | 5/8 [00:01<00:01,  2.75it/s]

 global_steps : 781


98epoch  782step | loss:  0.3152 :  75%|███████▌  | 6/8 [00:02<00:00,  2.84it/s]

 global_steps : 782


98epoch  783step | loss:  0.3112 :  88%|████████▊ | 7/8 [00:02<00:00,  2.92it/s]

 global_steps : 783


98epoch  784step | loss:  0.3123 : 100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


 global_steps : 784


99epoch  785step | loss:  0.3166 :  12%|█▎        | 1/8 [00:00<00:02,  3.03it/s]

 global_steps : 785


99epoch  786step | loss:  0.3201 :  25%|██▌       | 2/8 [00:00<00:01,  3.03it/s]

 global_steps : 786


99epoch  787step | loss:  0.3275 :  38%|███▊      | 3/8 [00:01<00:01,  2.96it/s]

 global_steps : 787


99epoch  788step | loss:  0.3153 :  50%|█████     | 4/8 [00:01<00:01,  2.85it/s]

 global_steps : 788


99epoch  789step | loss:  0.3081 :  62%|██████▎   | 5/8 [00:01<00:01,  2.98it/s]

 global_steps : 789


99epoch  790step | loss:  0.3062 :  75%|███████▌  | 6/8 [00:02<00:00,  3.01it/s]

 global_steps : 790


99epoch  791step | loss:  0.3050 :  88%|████████▊ | 7/8 [00:02<00:00,  3.00it/s]

 global_steps : 791


99epoch  792step | loss:  0.3058 : 100%|██████████| 8/8 [00:02<00:00,  3.04it/s]


 global_steps : 792


100epoch  793step | loss:  0.3191 :  12%|█▎        | 1/8 [00:00<00:02,  3.05it/s]

 global_steps : 793


100epoch  794step | loss:  0.2965 :  25%|██▌       | 2/8 [00:00<00:01,  3.00it/s]

 global_steps : 794


epoch 100 loss: 0.5921: 100%|██████████| 2/2 [00:00<00:00, 16.10it/s]
100epoch  795step | loss:  0.2994 :  38%|███▊      | 3/8 [00:01<00:02,  2.46it/s]

validation multi-class accuracy = 0.9355, f1 score = 0.9371
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         3
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         3
          10       1.00      0.83      0.91         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         2
          13       1.00      0.50      0.67         2
          14       0.67      1.00      0.80         4
          15       1.00      0.75      0.86         4
          16       0.

100epoch  796step | loss:  0.2994 :  50%|█████     | 4/8 [00:01<00:01,  2.51it/s]

 global_steps : 796


100epoch  797step | loss:  0.3062 :  62%|██████▎   | 5/8 [00:01<00:01,  2.54it/s]

 global_steps : 797


100epoch  798step | loss:  0.3102 :  75%|███████▌  | 6/8 [00:02<00:00,  2.64it/s]

 global_steps : 798


100epoch  799step | loss:  0.3089 :  88%|████████▊ | 7/8 [00:02<00:00,  2.66it/s]

 global_steps : 799


100epoch  800step | loss:  0.3077 : 100%|██████████| 8/8 [00:03<00:00,  2.67it/s]

 global_steps : 800





# 💌Inference

In [None]:
class CFG:
    tokenizer_max_length = 35
    batch_size = 1
    model_name = "xlm-roberta-base"

In [None]:
class NLP_Dataset_test(Dataset):
    def __init__(self, dataframe, tokenizer):
        self.dataset = dataframe
        self.question = dataframe['question']
        self.labels = dataframe['label']
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        tokenized_text = self.tokenizer(self.question[idx],
                                        max_length=CFG.tokenizer_max_length,
                                        padding='max_length',
                                        truncation=True,
                                        return_tensors='pt',
                                        add_special_tokens=True)
        
        tokenized_text['label'] = self.labels[idx]
        return tokenized_text

    def __len__(self):
        return len(self.labels)

In [None]:
names = {0: '이름',
         1: '가격',
         2: '사이즈 옵션',
         3: '소재',
         4: '비침',
         5: '카테고리',
         6: '색 옵션',
         7: '두께감',
         8: '신축성',
         9: '촉감',
         10: '핏',
         11: '안감',
         12: '스타일',
         13: '프린팅',
         14: '상의 색',
         15: '하의 색',
         16: '상의 카테고리',
         17: '하의 카테고리',
         18: '넥 라인'}

### 학습된 모델 불러오기

In [None]:
network = torch.load("/content/drive/MyDrive/Colab Notebooks/fashion_reader/question_intention_classification/output/new_baseline.pt") #기존까지 학습된 모델 load
pretrained_model_state = deepcopy(network.state_dict())
network.load_state_dict(pretrained_model_state)
device = "cuda"
network.to(device)
network.eval()

XLMRobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(250002, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (Laye

In [None]:
input_str = "옷이 체크야?"
test_df = pd.DataFrame(zip([input_str],[-1]), columns=['question', 'label'])
tokenizer = AutoTokenizer.from_pretrained(CFG.model_name)
test_set = NLP_Dataset_test(test_df, tokenizer)
test_loader = DataLoader(dataset=test_set, batch_size=CFG.batch_size, shuffle=False)

### Test Data로 Inference

In [None]:
preds_all = []
prediction_array=[]
with tqdm(test_loader,
          total=test_loader.__len__(),
          unit='batch') as test_bar:
    for batch in test_bar:
        input_ids = batch['input_ids'].squeeze(1).to(device)
        attention_mask = batch['attention_mask'].squeeze(1).to(device)

        preds = network(input_ids, attention_mask = attention_mask)[0]
        preds_all += [torch.argmax(preds, 1).detach().cpu().numpy().item()]

100%|██████████| 1/1 [00:00<00:00,  3.98batch/s]


In [None]:
print(f'유저 질문 : {input_str}')
print(f'질문 의도 : {preds_all[0]}, {names[preds_all[0]]}')

유저 질문 : 옷이 체크야?
질문 의도 : 13, 프린팅
