In [216]:
!pip install transformers
!pip install datasets #데이터 세트 다운로드 Hugging Face 연동



In [217]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


In [218]:
import copy # 특정한 파이썬 객체를 통째로 메모리에 copy할 때
import json # json 형식으로 데이터를 표현할 때
import logging # 학습 과정 등 전반적인 프로그램의 진행 상황을 로깅할 때
import os # 파일 입출력 등 현재 컴퓨터에 대한 기능 수행할 때

# 경고(warning) 메시지가 너무 많이 나오는 것을 대비하여 무시 처리
import warnings
warnings.filterwarnings("ignore")

# 로깅할 때 기본적으로 오류(error) 사항으로 로그 메시지를 남기겠다는 의미
import logging
logging.basicConfig(level=logging.ERROR)

# 벡터, 행렬 등의 처리를 위한 NumPy, 테이블(엑셀) 형식의 데이터 처리할 때 Pandas
import numpy as np
import pandas as pd

from datasets import load_dataset
# train_test_split: 별도로 구분된 validation 세트가 없을 때
# 학습 데이터 세트에서 일부를 train과 validation으로 나눌 때 자주 사용 (8:2 정도로 나눔)
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torch

### 학습한 모델 관련 라이브러리 불러오기

In [219]:
import transformers
# Auto Model For Sequence Classification: 텍스트 분류를 위한 모델 → Cross-Entropy loss 사용
from transformers import AutoConfig, AutoModel, AutoModelForSequenceClassification, AutoTokenizer
# linear_schedule_with_warmup: 단계적으로 learning rate 줄여나가는 방법
# AdamW: SGD와 같이 optimization 방법 중 하나
from transformers import AdamW, get_linear_schedule_with_warmup

### 우리가 쓸 모델

- KoBigBird를 사용하고, 다음과 같은 형태로 사용 가능
- KoBigBird: BigBird 특유의 sparse attention 사용 (default)

In [220]:
from transformers import AutoModel, AutoTokenizer

# by default its in `block_sparse` mode with num_random_blocks=3, block_size=64
# 이름에서부터 알 수 있듯이 KoBigBird는 BERT 기반의 모델
model = AutoModel.from_pretrained("monologg/kobigbird-bert-base")

# Tokenizer도 마찬가지로 BERT 기반에서 가져온 것을 확인
tokenizer = AutoTokenizer.from_pretrained("monologg/kobigbird-bert-base")
text = "한국어 BigBird 모델을 공개합니다!"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

Attention type 'block_sparse' is not possible if sequence_length: 12 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...


### 실험을 위한 하이퍼 파라미터 설정

In [221]:
# SimpleNamespace는 온점(.)으로 속성의 값을 정의할 수 있도록 해주는 라이브러리
# config.task = "cls"라고 하면, 나중에 print(config.task)했을 때 "cls"라고 출력
from types import SimpleNamespace

config = SimpleNamespace()

config.task = "cls"
config.dataset = "comment"

config.cache_dir = "cache" # 현재 데이터 세트에 대한 단어집 등 현재 task를 위한 임시적인 폴더
config.output_dir = "output" # 최종적인 모델이 저장되고, 결과가 저장되는 폴더

config.use_tpu = False
config.model_name_or_path = "monologg/kobigbird-bert-base" # Model name or path (HuggingFace에서 불러와 사용할 모델 이름)
config.data_dir = "./" # The input data dir ("10000_labeled.csv"가 있는 경로)

# 실질적으로 학습을 위해서는 tokenizing이 수행된 train file과 predict file을 만들어야 한다.
config.train_file = "joongang.csv" # 미리 준비된 학습 데이터 세트 경로
# 어차피 10000_labeled.csv를 (1) training 목적, (2) validatoin 목적으로 쪼개니까 평가는 validation으로 결과가 나올 것임
config.predict_file = "joongang.csv" # 미리 준비된 평가 데이터 세트 경로
# → 이거 일단 없으면, 지금처럼 train_file로 넣으시되, 나중에 생기시면 갈아끼우기

config.max_seq_length = 2048 # The maximum total input sequence length after tokenization. (최대 토큰 길이)
config.train_batch_size = 4 # Batch size for training. (학습할 때 batch_size)
config.eval_batch_size = 2 # Batch size for evaluation. (평가할 때 batch_size)

config.learning_rate = 3e-5 # The initial learning rate for Adam. (Adam optimizer에서 쓸 learning rate)
config.num_train_epochs = 10 # Total number of training epochs to perform. (전체 학습 epoch 수)

config.num_labels = 5 # 현재 task에서 선호도(1: 극진보, 2: 진보, 3: 중립, 4: 보수, 5: 극보수)의 개수는 5개이므로
# 5 classes multi-class classification 문제로 이해 할 수 있음
config.gradient_accumulation_steps = 2 # Number of updates steps to accumulate before performing a backward/update pass.
# batch_size가 큰 것처럼 처리하기 위해서, backward()를 매 번 수행하지 않고, gradient를 누적(acculmulation)하는 것

config.threads = 4
config.seed = 42 # random seed for initialization

config.do_train = True # Whether to run training.
config.do_eval_during_train = True
config.do_eval = True # Whether to run prediction.

config.do_lower_case = False
config.weight_decay = 0.0 # Weight decay if we apply some.
config.adam_epsilon = 1e-8 # Epsilon for Adam optimizer.
config.max_grad_norm = 1.0 # Max gradient norm.
config.warmup_proportion = 0.0 # Warmup proportion for linear warmup
# BigBird에서는 full attention을 하면, 메모리는 조금 더 소모되지만, 더 정확도가 향상
# config.attention_type = "original_full"

### 학습 데이터 전처리
- 학습 text를 매번 tokenization을 하지 않고, 모델 학습 시작전에 미리 모든 텍스트를 tokenization 한 결과를 저장한다.

In [222]:
# 본 실습에서 사용할 tokenizer 객체 초기화
tokenizer = AutoTokenizer.from_pretrained(config.model_name_or_path, cache_dir=config.cache_dir)

In [223]:
def train_split(config, texts, labels, is_train):
    # 바로 여기에서 train 데이터 세트가 8:2로 training과 validation이 나누어 진다.
    # 지금 평가 결과는 validation에 대한 결과
    # [오류] stratify가 labels면, test_dataset에 특정 레이블이 아예 등장하지 않으면 오류 발생
    """
    x_train, y_train, x_label, y_label = train_test_split(
        texts, labels, test_size=0.2, random_state=config.seed, stratify=labels
    )
    """
    x_train, y_train, x_label, y_label = train_test_split(
        texts, labels, test_size=0.2, random_state=config.seed, stratify=None
    )
    if is_train:
        texts, labels = x_train, x_label
    else:
        texts, labels = y_train, y_label
    return texts, labels

# 댓글(comment)이 담긴 .csv 파일이 있을 때, 여기에서 텍스트와 레이블 추출
def process_comment_cls(config, data_file, is_train):
    df = pd.read_csv(data_file)
    df = df.dropna(subset=['title', 'content', 'label1', 'label2'])

    # 매 줄에서 "label1(정치성향)", "label2(편향여부)" 열 추출
    politics = (df["label1"] - 1).astype(int).values.tolist()
    governments = (df["label2"] - 1).astype(int).values.tolist()
    labels = []
    # 한 줄씩 데이터를 확인하며
    for i in range(len(politics)):
        politic = politics[i] # 선호도
        government = governments[i] # 비속어
        labels.append([politic, government])
    print(len(labels))

    # title과 content를 합쳐서 texts로 표현
    texts = (df["title"] + " " + df["content"]).astype(str).values.tolist()

    texts, labels = train_split(config, texts, labels, is_train)
    return texts, labels

### 데이터 토큰화
- 주어진 데이터를 토큰화하고, 토큰화된 데이터를 파일에 저장하는 함수를 정의.
- 주어진 데이터는 텍스트와 레이블로 구성되어 있으며, 텍스트는 토큰화되고, 레이블은 정수로 변환.
- 토큰화된 데이터와 변환된 레이블은 JSON 형식으로 파일에 저장

In [224]:
import torch.utils.data as torch_data

def data_pretokenizing(config, tokenizer, is_train=True):
    if is_train:
        data_file = config.train_file
    else:
        data_file = config.predict_file

    data_path = config.data_dir
    if data_file is not None:
        data_path = os.path.join(data_path, data_file)
    else:
        data_path += "/"

    # 실제로 tokenizer를  저장될 데이터 세트의 파일 이름이 바로 dataset_file
    comps = [
        data_path,
        config.dataset,
        config.model_name_or_path.replace("/", "_"),
        config.max_seq_length,
        "train" if is_train else "dev",
        "dataset.txt",
    ]
    dataset_file = "_".join([str(comp) for comp in comps])
    print("dataset_file:", dataset_file)

    # 텍스트 문장을 읽어와서 token 값만 저장
    with open(dataset_file, "w", encoding="utf-8") as writer_file:
        # data: "joongang.csv" 파일에서 읽어와 (텍스트, 선호도 레이블)만 남긴 .csv 파일
        cnt = 0
        total_data = process_comment_cls(config, data_path, is_train)
        # 학습 데이터 세트를 하나씩 확인하며
        for text, label in zip(total_data[0], total_data[1]):
            # 여기에서 data는 하나의 (텍스트, 레이블) 쌍
            # feature는 해당 텍스트를 tokenizer에 넣어서 나온 결과
            feature = tokenizer(text, max_length=config.max_seq_length, padding="max_length", truncation=True, add_special_tokens=True)
            # 실제로 학습을 위해서는 (각 토큰의 index, 정답 레이블)로 학습을 진행
            writed_data = {
                "input_ids": feature["input_ids"],
                "attention_mask": feature["attention_mask"],
                "politic": int(float(label[0])), # "2.0" → 2.0 → 2
                "government": int(float(label[1])), # "2.0" → 2.0 → 2
              }
            # JSON은 쉽게 말하면 Python에서 dictionary와 같음 → 이를 file로 저장하는 것
            writer_file.write(json.dumps(writed_data) + "\n")
            cnt += 1
        print(f"{cnt} features processed from {data_path}")

    return dataset_file


In [225]:
# 본 코드에서 학습을 수행하려는 경우
if config.do_train:
    # 학습 데이터 세트 전처리
    train_dataset_file = data_pretokenizing(config, tokenizer=tokenizer)

# 평가 데이터 세트 전처리(validation = dev 같은 의미)
predict_dataset_file = data_pretokenizing(config, tokenizer=tokenizer, is_train=False)

# 결과적으로 만들어진 "./10000_labeled.csv_comment_monologg_kobigbird-bert-base_1024_train_dataset.txt"
# 내용을 확인해 보면, 약 8,000개의 각 학습 데이터에 대하여
#   → 하나씩 {"input_ids", "attention_mask", "preference", "slang"}으로 구성

dataset_file: ./joongang.csv_comment_monologg_kobigbird-bert-base_1024_train_dataset.txt
499
399 features processed from ./joongang.csv
dataset_file: ./joongang.csv_comment_monologg_kobigbird-bert-base_1024_dev_dataset.txt
499
100 features processed from ./joongang.csv


### 데이터로더 초기화

#### 데이터패딩

In [226]:
class IterableDatasetPad(torch.utils.data.IterableDataset):
    def __init__(
        self,
        dataset: torch.utils.data.IterableDataset,
        batch_size: int = 1,
        num_devices: int = 1,
        seed: int = 0,
    ):
        self.dataset = dataset
        self.batch_size = batch_size
        self.seed = seed
        self.num_examples = 0

        chunk_size = self.batch_size * num_devices
        length = len(dataset)
        self.length = length + (chunk_size - length % chunk_size)

    def __len__(self):
        return self.length

    def __iter__(self):
        self.num_examples = 0
        if (
            not hasattr(self.dataset, "set_epoch")
            and hasattr(self.dataset, "generator")
            and isinstance(self.dataset.generator, torch.Generator)
        ):
            self.dataset.generator.manual_seed(self.seed + self.epoch)

        first_batch = None
        current_batch = []
        for element in self.dataset:
            self.num_examples += 1
            current_batch.append(element)
            # Wait to have a full batch before yielding elements.
            if len(current_batch) == self.batch_size:
                for batch in current_batch:
                    yield batch
                    if first_batch is None:
                        first_batch = batch.copy()
                current_batch = []

        while self.num_examples < self.length:
            add_num = self.batch_size - len(current_batch)
            self.num_examples += add_num
            current_batch += [first_batch] * add_num
            for batch in current_batch:
                yield batch
            current_batch = []

#### 전처리된 데이터를 DataLoader로 불러옴

In [235]:
# 전처리된 데이터는 하나하나 {"input_ids", "attention_mask", "labels", ...} 형태를 가짐
# PyTorch가 하나의 배치를 처리할 때는 PyTorch Tensor 형태여야 함
# <데이터 로더에서 불러오는 "Tensor"를 정의하는 함수>
def collate_fn(features):
    input_ids = [sample["input_ids"] for sample in features]
    attention_mask = [sample["attention_mask"] for sample in features]

    politic = [sample["politic"] for sample in features]
    government = [sample["government"] for sample in features]

    input_ids = torch.tensor(np.array(input_ids).astype(np.int64), dtype=torch.long)
    attention_mask = torch.tensor(np.array(attention_mask).astype(np.int8), dtype=torch.long)
    politic = torch.tensor(np.array(politic).astype(np.int64), dtype=torch.long)
    government = torch.tensor(np.array(government).astype(np.int64), dtype=torch.long)
    inputs = {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
    }
    labels = {
        "politic": politic,
        "government" : government
    }
    return inputs, labels

# 본 코드에서 학습을 수행하려는 경우
if config.do_train:
    # 학습 데이터 로더 초기화
    train_dataset = load_dataset("text", data_files=train_dataset_file, download_mode="force_redownload")["train"]
    train_dataset = train_dataset.map(lambda x: json.loads(x["text"]), batched=False)

    train_dataloader = torch_data.DataLoader(
        train_dataset,
        sampler=torch_data.RandomSampler(train_dataset),
        drop_last=False,
        batch_size=config.train_batch_size,
        collate_fn=(collate_fn),
    )

# 평가 데이터 세트 전처리(validation = dev 같은 의미)
predict_dataset = load_dataset("text", data_files=predict_dataset_file, download_mode="force_redownload")["train"]
predict_dataset = predict_dataset.map(lambda x: json.loads(x["text"]), batched=False)
predict_dataset = IterableDatasetPad(
    dataset=predict_dataset,
    batch_size=config.eval_batch_size,
    num_devices=1,
    seed=config.seed,
)

predict_dataloader = torch_data.DataLoader(
    predict_dataset,
    sampler=None,
    drop_last=False,
    batch_size=config.eval_batch_size,
    collate_fn=(collate_fn),
)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/399 [00:00<?, ? examples/s]

Map:   0%|          | 0/399 [00:00<?, ? examples/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

### 텍스트 분류 모델 정의

In [236]:
from transformers import AutoModel
import torch.nn as nn

# 텍스트 분류 모델 정의
class ClsModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # (전처리된) 학습할 데이터 세트의 경로 설정
        data_file = os.path.join(config.data_dir, str(config.train_file))
        # 사전 학습된 모델 가중치 불러오기
        model_config = AutoConfig.from_pretrained(config.model_name_or_path, num_labels=config.num_labels)
        self.model = AutoModel.from_pretrained(
            config.model_name_or_path, config=model_config, cache_dir=config.cache_dir
        )
        self.classifier1 = nn.Linear(768, 5)
        self.classifier2 = nn.Linear(768, 5)

        # BERT model과 별개로, 입력 문자열을 토큰의 index로 바꾸어주는 tokenizer가 사용됨
        self.tokenizer = tokenizer

    # "학습된 모델"을 save_dir에 저장하는 함수
    def save_pretrained(self, save_dir):
        self.model.save_pretrained(save_dir)
        # Tokenizer는 기본적으로 "special_tokens_map_file", "tokenizer_file"을 가질 수 있음
        # 이러한 값을 제거한 뒤에 save_dir에 저장하겠다는 의미
        for key in ["special_tokens_map_file", "tokenizer_file"]:
            self.tokenizer.init_kwargs.pop(key, None)
        self.tokenizer.save_pretrained(save_dir)

    def get_optimizer(self): # 현재 모델을 학습하기 위한 최적화 방법(AdamW) 객체를 불러오는 함수
        # bias랑 LayerNorm에는 decay 적용하지 않겠다는 의미
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": config.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": config.weight_decay,
            },
        ]
        # AdamW의 첫 번째 인자(params)는 "학습할 가중치", weight_decay는 가중치에 적용되는 regularization 기법
        optimizer = AdamW(optimizer_grouped_parameters, lr=config.learning_rate, eps=config.adam_epsilon)
        return optimizer

    def get_scheduler(self, batch_num, optimizer): # AdamW로 학습할 때, learning rate을 단계적으로 줄이기 위한 함수
        if config.warmup_proportion == 0.0:
            return None

        t_total = batch_num // config.gradient_accumulation_steps * config.num_train_epochs

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=int(t_total * config.warmup_proportion),
            num_training_steps=t_total,
        )

        return scheduler

    def forward(self, inputs):
        # print(inputs) => {"input_ids", "attention_mask"}
        # https://huggingface.co/docs/transformers/model_doc/big_bird
        # BigBirdModel의 입력 양식에 맞게 넣어주어야 오류가 없음
        # BigBirdModel로 "input_ids", "attention_mask" 등 "미리 정해진" 규격에 맞는 입력만
        # 들어가야 오류가 없다는 의미 => 그러므로, preference, slang 등은 들어가면 X
        hidden = self.model(**inputs)
        # print(hidden.last_hidden_state.shape) # torch.Size([4 (batch_size), 1024 (seq_len), 768 (embedding_size)])
        # 마지막 레이어의 [CLS] 토큰만 가져오기
        cls_token_embeddings = hidden.last_hidden_state[:,0,:] # [batch_size, 768]
        # print(cls_token_embeddings.shape)
        output_1 = self.classifier1(cls_token_embeddings) # 768 → 5
        output_2 = self.classifier2(cls_token_embeddings) # 768 → 5
        print(output_1.shape) # [batch_size, 5]
        print(output_2.shape) # [batch_size, 5]

        return output_1, output_2

    def eval_step(self, inputs, labels, outputs):
        logits_1 = outputs[0].detach().cpu()
        logits_2 = outputs[1].detach().cpu()
        labels_1 = self.tensor_to_list(labels["politic"])
        labels_2 = self.tensor_to_list(labels["government"])
        predictions_1 = self.tensor_to_list(torch.argmax(logits_1, dim=-1))
        predictions_2 = self.tensor_to_list(torch.argmax(logits_2, dim=-1))
        results_1 = [{"prediction": prediction, "label": label} for prediction, label in zip(predictions_1, labels_1)]
        results_2 = [{"prediction": prediction, "label": label} for prediction, label in zip(predictions_2, labels_2)]
        return {"results_1": results_1, "results_2": results_2}





    # PyTorch의 Tensor 객체를 NumPy 객체로 변환
    def tensor_to_array(self, tensor):
        return tensor.detach().cpu().numpy()

    # PyTorch의 Tensor 객체를 Python의 리스트(list) 자료형으로 변환
    def tensor_to_list(self, tensor):
        return self.tensor_to_array(tensor).tolist()

In [237]:
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

def cal_running_avg_loss(loss, running_avg_loss, decay=0.99):
    if running_avg_loss == 0:
        return loss
    running_avg_loss = running_avg_loss * decay + (1 - decay) * loss
    return running_avg_loss

### 모델 학습 및 평가 라이브러리


In [238]:
from functools import partial
import sklearn.metrics as sklearn_metrics

"""binary_metrics = {
    "accuracy": sklearn_metrics.accuracy_score,
    "precision": sklearn_metrics.precision_score, # TP / (TP + FP)
    "recall": sklearn_metrics.recall_score, # recall = sensitivity (민감도)
    "f1": sklearn_metrics.f1_score,
    "matthews_corrcoef": sklearn_metrics.matthews_corrcoef,
    "roc_auc": sklearn_metrics.roc_auc_score,
}""" # 우리는 두가지 task 다 다중분류임으로 안씀

metrics = {
    "accuracy": sklearn_metrics.accuracy_score,
    "f1-macro": partial(sklearn_metrics.f1_score, average="macro"),
}


def eval_cls(results_1, results_2, **kwargs):
    predictions_1 = np.array([result["prediction"] for result in results_1])
    labels_1 = np.array([result["label"] for result in results_1])
    predictions_2 = np.array([result["prediction"] for result in results_2])
    labels_2 = np.array([result["label"] for result in results_2])
    results_1 = {
        metric: round(f(labels_1, predictions_1) * 100, 2)
        for metric, f in metrics.items()
    }
    results_2 = {
        metric: round(f(labels_2, predictions_2) * 100, 2)
        for metric, f in metrics.items()
    }

    return {
        "results_1": results_1,
        "results_2": results_2,
        "best_score_1": results_1["f1-macro"],
        "best_score_2": results_2["f1-macro"],
    }


### Epoch 동안 학습 및 평가를 수행하는 함수 정의

In [239]:
def _run_epoch(model, loader, device=None, context=None, **kwargs):
    config = kwargs["config"]
    is_train = kwargs["is_train"]

    avg_loss = 0
    results = []
    batch_num = len(loader)

    if is_train:
        model.train()
        if config.use_tpu:
            optimizer = context.getattr_or("optimizer", lambda: model.get_optimizer())
            scheduler = context.getattr_or("scheduler", lambda: model.get_scheduler(batch_num, optimizer))
        else:
            optimizer = kwargs["optimizer"]
            scheduler = kwargs["scheduler"]
    else:
        model.eval()

    is_master = True

    pbar = tqdm(enumerate(loader), total=batch_num, disable=not is_master, dynamic_ncols=True, position=0, leave=True)

    corrected_1 = 0
    corrected_2 = 0
    total = 0

    for i, (inputs, labels) in pbar:
        # inputs: {"input_ids": [batch_size(4), seq_len, 768], "attention_mask": [batch_size(4), seq_len, 768]}
        # labels: {"preference": [batch_size(4), 1], "slang": [batch_size(4), 1], "politic": [batch_size(4), 1]}
        if not config.use_tpu:
            # (k, v) => ("input_ids", value)
            # (k, v) => ("attention_mask", value)
            for k, v in inputs.items():
                if isinstance(v, torch.Tensor):
                    inputs[k] = v.to(device)
            for k, v in labels.items():
                if isinstance(v, torch.Tensor):
                    labels[k] = v.to(device)

        outputs = model(inputs)


        outputs_1 = outputs[0]
        outputs_2 = outputs[1]

        labels_1 = labels["politic"]
        labels_2 = labels["government"]

        loss_function_1 = nn.CrossEntropyLoss()
        loss_1 = loss_function_1(outputs_1, labels_1)

        total += outputs_1.shape[0]

        _, predicted_1 = outputs_1.max(1)
        corrected_1 += predicted_1.eq(labels_1).sum().item()

        loss_function_2 = nn.CrossEntropyLoss()
        loss_2 = loss_function_2(outputs_2, labels_2)

        _, predicted_2 = outputs_2.max(1)
        corrected_2 += predicted_2.eq(labels_2).sum().item()

        w_1 = 1
        w_2 = 1
        loss = w_1 * loss_1 + w_2 * loss_2

        avg_loss = cal_running_avg_loss(loss.item(), avg_loss)
        loss /= config.gradient_accumulation_steps

        if is_train:
            loss.backward()
            if i % config.gradient_accumulation_steps == 0 or i == batch_num - 1:
                if config.max_grad_norm > 0:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm)

                optimizer.step()
                optimizer.zero_grad()

                if scheduler is not None:
                    scheduler.step()
        else:
            result = (model.module if hasattr(model, "module") else model).eval_step(inputs, labels, outputs)
            results.extend(result)

        if is_master:
            pbar.set_description(f"epoch: {kwargs['epoch'] + 1}, {('train' if is_train else 'valid')} loss: {min(100, round(avg_loss, 4))}")

    return {
        "loss": avg_loss,
        "result": results,
    }


# 학습 코드에서 호출하는 함수
def run_epoch(**kwargs):
    model = kwargs.pop("model")
    if kwargs["config"].use_tpu:
        results = model(_run_epoch, **kwargs)
    else:
        results = _run_epoch(model, **kwargs)

    if isinstance(results, list):
        loss = sum([result["loss"] for result in results]) / len(results)
        result = []
        for res in results:
            result.extend(res["result"])
        results = {"loss": loss, "result": result}

    return results

### 딥러닝 모델 초기화 및 설정

In [240]:
# 현재 모델 이름이 "monologg/kobigbird-bert-base" 이므로, Hugging Face에서 찾아서 불러옴
set_seed(config.seed)

# 딥러닝 모델 초기화
model = ClsModel()

print(f"configuration: {str(config)}")

if torch.cuda.is_available(): # GPU를 사용할 수 있다면
    gpu_count = torch.cuda.device_count()
    print(f"{gpu_count} GPU device detected")
    devices = ["cuda:{}".format(i) for i in range(gpu_count)]
    model_dp = torch.nn.DataParallel(model, device_ids=devices)
    model.to(devices[0])
else: # GPU를 사용할 수 없다면 CPU로 구동
    devices = ["cpu"]
    model_dp = model

# 학습 결과를 저장하기 위한 폴더 만들기
if not os.path.exists(config.cache_dir):
    os.makedirs(config.cache_dir)

output_dir = os.path.join(config.output_dir, config.task, config.dataset)
print("Output directory:", output_dir)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 실제 학습을 위해 optimizer 및 scheduler 초기화
optimizer = None
scheduler = None
if config.do_train: # 학습 모드(train mode)인 경우
    optimizer = model.get_optimizer()
    scheduler = model.get_scheduler(len(train_dataloader), optimizer)

params = {
    "config": config,
    "model": model_dp,
    "optimizer": optimizer,
    "scheduler": scheduler,
}
if not config.use_tpu:
    params["device"] = devices[0]

configuration: namespace(task='cls', dataset='comment', cache_dir='cache', output_dir='output', use_tpu=False, model_name_or_path='monologg/kobigbird-bert-base', data_dir='./', train_file='joongang.csv', predict_file='joongang.csv', max_seq_length=1024, train_batch_size=4, eval_batch_size=2, learning_rate=3e-05, num_train_epochs=10, num_labels=5, gradient_accumulation_steps=2, threads=4, seed=42, do_train=True, do_eval_during_train=True, do_eval=True, do_lower_case=False, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, warmup_proportion=0.0)
1 GPU device detected
Output directory: output/cls/comment


In [249]:
def do_eval(epoch):
    with torch.no_grad():
        results = run_epoch(loader=predict_dataloader, epoch=epoch, is_train=False, **params)["result"]
        print(results)
        results_1 = [inner_list[0] for inner_list in results]
        results_2 = [inner_list[1] for inner_list in results]
        eval_results = eval_cls(
            config=config,
            model=model,
            loader=predict_dataloader,
            tokenizer=model.tokenizer,
            results_1=results_1,
            results_2=results_2,
        )

    print("Eval results for output 1.")
    for k, v in eval_results["results_1"].items():
        print(f"{k} : {v}")

    print("Eval results for output 2.")
    for k, v in eval_results["results_2"].items():
        print(f"{k} : {v}")

    return eval_results["best_score_1"], eval_results["best_score_2"]

train_losses = []
val_accuracies = []
if config.do_train:
    best_score = (0, 0)
    for epoch in range(config.num_train_epochs):
        train_results = run_epoch(loader=train_dataloader, epoch=epoch, is_train=True, **params)
        train_loss = train_results['loss']
        train_losses.append(train_loss)

        if config.do_eval_during_train:
            score1, score2 = do_eval(epoch)
            val_accuracies.append((score1, score2))

            if score1 >= best_score[0] and score2 >= best_score[1]:
                best_score = (score1, score2)
                output_dir = os.path.join(config.output_dir, config.task, config.dataset, f"{epoch}-{best_score[0]}-{best_score[1]}-ckpt")
                copy.deepcopy(
                    model_dp.module
                    if hasattr(model_dp, "module")
                    else model_dp._models[0]
                    if hasattr(model_dp, "_models")
                    else model_dp
                ).cpu().save_pretrained(output_dir)
                with open(os.path.join(output_dir, "finetune_config.json"), "w") as save_config:
                    json.dump(vars(config), save_config, sort_keys=True, indent=4)
                print(f"Checkpoint {output_dir} saved.")


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0786:   1%|          | 1/100 [00:00<01:04,  1.53it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0787:   2%|▏         | 2/100 [00:01<01:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.079:   3%|▎         | 3/100 [00:01<01:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0804:   4%|▍         | 4/100 [00:02<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0805:   5%|▌         | 5/100 [00:03<00:59,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0805:   6%|▌         | 6/100 [00:03<00:58,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0807:   7%|▋         | 7/100 [00:04<00:58,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0868:   8%|▊         | 8/100 [00:04<00:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0888:   9%|▉         | 9/100 [00:05<00:56,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0904:  10%|█         | 10/100 [00:06<00:55,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0932:  11%|█         | 11/100 [00:06<00:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0939:  12%|█▏        | 12/100 [00:07<00:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0943:  13%|█▎        | 13/100 [00:08<00:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.0995:  14%|█▍        | 14/100 [00:08<00:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1016:  15%|█▌        | 15/100 [00:09<00:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1035:  16%|█▌        | 16/100 [00:09<00:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1034:  17%|█▋        | 17/100 [00:10<00:51,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1033:  18%|█▊        | 18/100 [00:11<00:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1033:  19%|█▉        | 19/100 [00:11<00:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1052:  20%|██        | 20/100 [00:12<00:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1062:  21%|██        | 21/100 [00:12<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1087:  22%|██▏       | 22/100 [00:13<00:47,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1088:  23%|██▎       | 23/100 [00:14<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1086:  24%|██▍       | 24/100 [00:14<00:46,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1086:  25%|██▌       | 25/100 [00:15<00:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1085:  26%|██▌       | 26/100 [00:15<00:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1082:  27%|██▋       | 27/100 [00:16<00:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1115:  28%|██▊       | 28/100 [00:17<00:43,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1119:  29%|██▉       | 29/100 [00:17<00:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.112:  30%|███       | 30/100 [00:18<00:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1193:  31%|███       | 31/100 [00:19<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1249:  32%|███▏      | 32/100 [00:19<00:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1255:  33%|███▎      | 33/100 [00:20<00:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.125:  34%|███▍      | 34/100 [00:20<00:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1255:  35%|███▌      | 35/100 [00:21<00:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1292:  36%|███▌      | 36/100 [00:22<00:38,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1346:  37%|███▋      | 37/100 [00:22<00:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1384:  38%|███▊      | 38/100 [00:23<00:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1378:  39%|███▉      | 39/100 [00:23<00:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1374:  40%|████      | 40/100 [00:24<00:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1387:  41%|████      | 41/100 [00:25<00:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1388:  42%|████▏     | 42/100 [00:25<00:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1393:  43%|████▎     | 43/100 [00:26<00:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1454:  44%|████▍     | 44/100 [00:26<00:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1458:  45%|████▌     | 45/100 [00:27<00:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1456:  46%|████▌     | 46/100 [00:28<00:32,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1516:  47%|████▋     | 47/100 [00:28<00:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1512:  48%|████▊     | 48/100 [00:29<00:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1536:  49%|████▉     | 49/100 [00:30<00:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1558:  50%|█████     | 50/100 [00:30<00:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1573:  51%|█████     | 51/100 [00:31<00:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1595:  52%|█████▏    | 52/100 [00:31<00:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1619:  53%|█████▎    | 53/100 [00:32<00:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1615:  54%|█████▍    | 54/100 [00:33<00:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1633:  55%|█████▌    | 55/100 [00:33<00:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1631:  56%|█████▌    | 56/100 [00:34<00:26,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.163:  57%|█████▋    | 57/100 [00:34<00:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1625:  58%|█████▊    | 58/100 [00:35<00:25,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1618:  59%|█████▉    | 59/100 [00:36<00:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1611:  60%|██████    | 60/100 [00:36<00:24,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1616:  61%|██████    | 61/100 [00:37<00:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1633:  62%|██████▏   | 62/100 [00:37<00:22,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1634:  63%|██████▎   | 63/100 [00:38<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.163:  64%|██████▍   | 64/100 [00:39<00:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.168:  65%|██████▌   | 65/100 [00:39<00:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1686:  66%|██████▌   | 66/100 [00:40<00:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1692:  67%|██████▋   | 67/100 [00:41<00:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1718:  68%|██████▊   | 68/100 [00:41<00:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.171:  69%|██████▉   | 69/100 [00:42<00:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1701:  70%|███████   | 70/100 [00:42<00:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1693:  71%|███████   | 71/100 [00:43<00:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1686:  72%|███████▏  | 72/100 [00:44<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1684:  73%|███████▎  | 73/100 [00:44<00:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1686:  74%|███████▍  | 74/100 [00:45<00:15,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1703:  75%|███████▌  | 75/100 [00:45<00:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1747:  76%|███████▌  | 76/100 [00:46<00:14,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1737:  77%|███████▋  | 77/100 [00:47<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1732:  78%|███████▊  | 78/100 [00:47<00:13,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1724:  79%|███████▉  | 79/100 [00:48<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1742:  80%|████████  | 80/100 [00:48<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1746:  81%|████████  | 81/100 [00:49<00:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.174:  82%|████████▏ | 82/100 [00:50<00:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1731:  83%|████████▎ | 83/100 [00:50<00:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1721:  84%|████████▍ | 84/100 [00:51<00:09,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1722:  85%|████████▌ | 85/100 [00:52<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1736:  86%|████████▌ | 86/100 [00:52<00:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1735:  87%|████████▋ | 87/100 [00:53<00:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1728:  88%|████████▊ | 88/100 [00:53<00:07,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1722:  89%|████████▉ | 89/100 [00:54<00:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1742:  90%|█████████ | 90/100 [00:55<00:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1775:  91%|█████████ | 91/100 [00:55<00:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1768:  92%|█████████▏| 92/100 [00:56<00:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1763:  93%|█████████▎| 93/100 [00:56<00:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1812:  94%|█████████▍| 94/100 [00:57<00:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1803:  95%|█████████▌| 95/100 [00:58<00:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1804:  96%|█████████▌| 96/100 [00:58<00:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1794:  97%|█████████▋| 97/100 [00:59<00:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1785:  98%|█████████▊| 98/100 [01:00<00:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 0.1775:  99%|█████████▉| 99/100 [01:00<00:00,  1.63it/s]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 1, train loss: 0.1764: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]
epoch: 1, valid loss: 1.4313:   4%|▍         | 2/51 [00:00<00:06,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4241:   8%|▊         | 4/51 [00:00<00:06,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4005:  12%|█▏        | 6/51 [00:00<00:05,  7.76it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4109:  16%|█▌        | 8/51 [00:01<00:05,  7.80it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4378:  20%|█▉        | 10/51 [00:01<00:05,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4334:  24%|██▎       | 12/51 [00:01<00:05,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4443:  27%|██▋       | 14/51 [00:01<00:04,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4456:  31%|███▏      | 16/51 [00:02<00:04,  7.82it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4447:  35%|███▌      | 18/51 [00:02<00:04,  7.82it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4231:  39%|███▉      | 20/51 [00:02<00:03,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3969:  43%|████▎     | 22/51 [00:02<00:03,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4241:  47%|████▋     | 24/51 [00:03<00:03,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4356:  51%|█████     | 26/51 [00:03<00:03,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4622:  55%|█████▍    | 28/51 [00:03<00:03,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.5115:  59%|█████▉    | 30/51 [00:03<00:02,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.5014:  63%|██████▎   | 32/51 [00:04<00:02,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.5033:  67%|██████▋   | 34/51 [00:04<00:02,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4994:  71%|███████   | 36/51 [00:04<00:01,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.5183:  75%|███████▍  | 38/51 [00:04<00:01,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4985:  78%|███████▊  | 40/51 [00:05<00:01,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4772:  82%|████████▏ | 42/51 [00:05<00:01,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4513:  86%|████████▋ | 44/51 [00:05<00:00,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4618:  90%|█████████ | 46/51 [00:05<00:00,  7.81it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.456:  94%|█████████▍| 48/51 [00:06<00:00,  7.81it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4546:  98%|█████████▊| 50/51 [00:06<00:00,  7.81it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.4407: 100%|██████████| 51/51 [00:06<00:00,  7.73it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[[{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 2}, {'prediction': 1, 'label': 1}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.199:   1%|          | 1/100 [00:00<01:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1988:   2%|▏         | 2/100 [00:01<00:58,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2058:   3%|▎         | 3/100 [00:01<00:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2045:   4%|▍         | 4/100 [00:02<00:58,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2031:   5%|▌         | 5/100 [00:03<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2076:   6%|▌         | 6/100 [00:03<00:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2063:   7%|▋         | 7/100 [00:04<00:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2055:   8%|▊         | 8/100 [00:04<00:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2139:   9%|▉         | 9/100 [00:05<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2126:  10%|█         | 10/100 [00:06<00:54,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2113:  11%|█         | 11/100 [00:06<00:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2133:  12%|█▏        | 12/100 [00:07<00:53,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.212:  13%|█▎        | 13/100 [00:07<00:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2108:  14%|█▍        | 14/100 [00:08<00:52,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2094:  15%|█▌        | 15/100 [00:09<00:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2099:  16%|█▌        | 16/100 [00:09<00:50,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2089:  17%|█▋        | 17/100 [00:10<00:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2089:  18%|█▊        | 18/100 [00:10<00:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2109:  19%|█▉        | 19/100 [00:11<00:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2095:  20%|██        | 20/100 [00:12<00:48,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2082:  21%|██        | 21/100 [00:12<00:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2068:  22%|██▏       | 22/100 [00:13<00:47,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2055:  23%|██▎       | 23/100 [00:14<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2042:  24%|██▍       | 24/100 [00:14<00:45,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2062:  25%|██▌       | 25/100 [00:15<00:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2059:  26%|██▌       | 26/100 [00:15<00:44,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2054:  27%|██▋       | 27/100 [00:16<00:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2041:  28%|██▊       | 28/100 [00:17<00:43,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2032:  29%|██▉       | 29/100 [00:17<00:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2049:  30%|███       | 30/100 [00:18<00:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2037:  31%|███       | 31/100 [00:18<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.213:  32%|███▏      | 32/100 [00:19<00:41,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2149:  33%|███▎      | 33/100 [00:20<00:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2135:  34%|███▍      | 34/100 [00:20<00:40,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.214:  35%|███▌      | 35/100 [00:21<00:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2133:  36%|███▌      | 36/100 [00:21<00:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.212:  37%|███▋      | 37/100 [00:22<00:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2105:  38%|███▊      | 38/100 [00:23<00:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2093:  39%|███▉      | 39/100 [00:23<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2084:  40%|████      | 40/100 [00:24<00:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2072:  41%|████      | 41/100 [00:25<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2167:  42%|████▏     | 42/100 [00:25<00:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2167:  43%|████▎     | 43/100 [00:26<00:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2172:  44%|████▍     | 44/100 [00:26<00:33,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2158:  45%|████▌     | 45/100 [00:27<00:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2147:  46%|████▌     | 46/100 [00:28<00:32,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2138:  47%|████▋     | 47/100 [00:28<00:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2125:  48%|████▊     | 48/100 [00:29<00:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2125:  49%|████▉     | 49/100 [00:29<00:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2111:  50%|█████     | 50/100 [00:30<00:30,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2127:  51%|█████     | 51/100 [00:31<00:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2115:  52%|█████▏    | 52/100 [00:31<00:28,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2107:  53%|█████▎    | 53/100 [00:32<00:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2141:  54%|█████▍    | 54/100 [00:32<00:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2219:  55%|█████▌    | 55/100 [00:33<00:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2219:  56%|█████▌    | 56/100 [00:34<00:26,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2206:  57%|█████▋    | 57/100 [00:34<00:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2192:  58%|█████▊    | 58/100 [00:35<00:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2233:  59%|█████▉    | 59/100 [00:35<00:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2219:  60%|██████    | 60/100 [00:36<00:24,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2205:  61%|██████    | 61/100 [00:37<00:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2198:  62%|██████▏   | 62/100 [00:37<00:22,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2199:  63%|██████▎   | 63/100 [00:38<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2186:  64%|██████▍   | 64/100 [00:38<00:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2177:  65%|██████▌   | 65/100 [00:39<00:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2163:  66%|██████▌   | 66/100 [00:40<00:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2148:  67%|██████▋   | 67/100 [00:40<00:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2139:  68%|██████▊   | 68/100 [00:41<00:19,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2132:  69%|██████▉   | 69/100 [00:42<00:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.212:  70%|███████   | 70/100 [00:42<00:18,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2107:  71%|███████   | 71/100 [00:43<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2094:  72%|███████▏  | 72/100 [00:43<00:16,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2085:  73%|███████▎  | 73/100 [00:44<00:16,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2072:  74%|███████▍  | 74/100 [00:45<00:15,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2067:  75%|███████▌  | 75/100 [00:45<00:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2063:  76%|███████▌  | 76/100 [00:46<00:14,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2078:  77%|███████▋  | 77/100 [00:46<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2063:  78%|███████▊  | 78/100 [00:47<00:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2049:  79%|███████▉  | 79/100 [00:48<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2067:  80%|████████  | 80/100 [00:48<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2059:  81%|████████  | 81/100 [00:49<00:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2045:  82%|████████▏ | 82/100 [00:49<00:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2031:  83%|████████▎ | 83/100 [00:50<00:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2019:  84%|████████▍ | 84/100 [00:51<00:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2006:  85%|████████▌ | 85/100 [00:51<00:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.2007:  86%|████████▌ | 86/100 [00:52<00:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1995:  87%|████████▋ | 87/100 [00:53<00:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1988:  88%|████████▊ | 88/100 [00:53<00:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1975:  89%|████████▉ | 89/100 [00:54<00:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1962:  90%|█████████ | 90/100 [00:54<00:06,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.196:  91%|█████████ | 91/100 [00:55<00:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1952:  92%|█████████▏| 92/100 [00:56<00:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1941:  93%|█████████▎| 93/100 [00:56<00:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.194:  94%|█████████▍| 94/100 [00:57<00:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1929:  95%|█████████▌| 95/100 [00:57<00:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1929:  96%|█████████▌| 96/100 [00:58<00:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1916:  97%|█████████▋| 97/100 [00:59<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1907:  98%|█████████▊| 98/100 [00:59<00:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 0.1899:  99%|█████████▉| 99/100 [01:00<00:00,  1.61it/s]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 2, train loss: 0.1894: 100%|██████████| 100/100 [01:00<00:00,  1.64it/s]
epoch: 2, valid loss: 1.5318:   4%|▍         | 2/51 [00:00<00:06,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5288:   8%|▊         | 4/51 [00:00<00:06,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5087:  12%|█▏        | 6/51 [00:00<00:05,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5145:  16%|█▌        | 8/51 [00:01<00:05,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5531:  20%|█▉        | 10/51 [00:01<00:05,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5259:  24%|██▎       | 12/51 [00:01<00:05,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5411:  27%|██▋       | 14/51 [00:01<00:04,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5589:  31%|███▏      | 16/51 [00:02<00:04,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5507:  35%|███▌      | 18/51 [00:02<00:04,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5259:  39%|███▉      | 20/51 [00:02<00:03,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.499:  43%|████▎     | 22/51 [00:02<00:03,  7.81it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5226:  47%|████▋     | 24/51 [00:03<00:03,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.522:  51%|█████     | 26/51 [00:03<00:03,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5487:  55%|█████▍    | 28/51 [00:03<00:02,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5786:  59%|█████▉    | 30/51 [00:03<00:02,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5553:  63%|██████▎   | 32/51 [00:04<00:02,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5583:  67%|██████▋   | 34/51 [00:04<00:02,  7.76it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5622:  71%|███████   | 36/51 [00:04<00:01,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5844:  75%|███████▍  | 38/51 [00:04<00:01,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5608:  78%|███████▊  | 40/51 [00:05<00:01,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5324:  82%|████████▏ | 42/51 [00:05<00:01,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5036:  86%|████████▋ | 44/51 [00:05<00:00,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5077:  90%|█████████ | 46/51 [00:05<00:00,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4974:  94%|█████████▍| 48/51 [00:06<00:00,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5191:  98%|█████████▊| 50/51 [00:06<00:00,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.5045: 100%|██████████| 51/51 [00:06<00:00,  7.70it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[[{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 4, 'label': 3}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 2}, {'prediction': 1, 'label': 1}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0702:   1%|          | 1/100 [00:00<01:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0706:   2%|▏         | 2/100 [00:01<00:58,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0706:   3%|▎         | 3/100 [00:01<00:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0709:   4%|▍         | 4/100 [00:02<00:58,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0716:   5%|▌         | 5/100 [00:03<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0718:   6%|▌         | 6/100 [00:03<00:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0723:   7%|▋         | 7/100 [00:04<00:57,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0722:   8%|▊         | 8/100 [00:04<00:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0728:   9%|▉         | 9/100 [00:05<00:57,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0728:  10%|█         | 10/100 [00:06<00:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0729:  11%|█         | 11/100 [00:06<00:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0729:  12%|█▏        | 12/100 [00:07<00:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0727:  13%|█▎        | 13/100 [00:07<00:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0731:  14%|█▍        | 14/100 [00:08<00:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0756:  15%|█▌        | 15/100 [00:09<00:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0756:  16%|█▌        | 16/100 [00:09<00:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0797:  17%|█▋        | 17/100 [00:10<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0796:  18%|█▊        | 18/100 [00:11<00:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0795:  19%|█▉        | 19/100 [00:11<00:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0797:  20%|██        | 20/100 [00:12<00:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0796:  21%|██        | 21/100 [00:12<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.087:  22%|██▏       | 22/100 [00:13<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0868:  23%|██▎       | 23/100 [00:14<00:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0871:  24%|██▍       | 24/100 [00:14<00:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0882:  25%|██▌       | 25/100 [00:15<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0904:  26%|██▌       | 26/100 [00:15<00:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0902:  27%|██▋       | 27/100 [00:16<00:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.09:  28%|██▊       | 28/100 [00:17<00:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0908:  29%|██▉       | 29/100 [00:17<00:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0906:  30%|███       | 30/100 [00:18<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0915:  31%|███       | 31/100 [00:19<00:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0916:  32%|███▏      | 32/100 [00:19<00:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0913:  33%|███▎      | 33/100 [00:20<00:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0912:  34%|███▍      | 34/100 [00:20<00:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.091:  35%|███▌      | 35/100 [00:21<00:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0941:  36%|███▌      | 36/100 [00:22<00:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0945:  37%|███▋      | 37/100 [00:22<00:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0942:  38%|███▊      | 38/100 [00:23<00:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.094:  39%|███▉      | 39/100 [00:23<00:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0938:  40%|████      | 40/100 [00:24<00:36,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0937:  41%|████      | 41/100 [00:25<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0942:  42%|████▏     | 42/100 [00:25<00:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0941:  43%|████▎     | 43/100 [00:26<00:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0939:  44%|████▍     | 44/100 [00:26<00:33,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0946:  45%|████▌     | 45/100 [00:27<00:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0943:  46%|████▌     | 46/100 [00:28<00:32,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0944:  47%|████▋     | 47/100 [00:28<00:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0953:  48%|████▊     | 48/100 [00:29<00:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0981:  49%|████▉     | 49/100 [00:30<00:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0978:  50%|█████     | 50/100 [00:30<00:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0975:  51%|█████     | 51/100 [00:31<00:30,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0987:  52%|█████▏    | 52/100 [00:31<00:29,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0983:  53%|█████▎    | 53/100 [00:32<00:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0979:  54%|█████▍    | 54/100 [00:33<00:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0978:  55%|█████▌    | 55/100 [00:33<00:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0974:  56%|█████▌    | 56/100 [00:34<00:26,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.097:  57%|█████▋    | 57/100 [00:34<00:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0967:  58%|█████▊    | 58/100 [00:35<00:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.097:  59%|█████▉    | 59/100 [00:36<00:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0966:  60%|██████    | 60/100 [00:36<00:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0962:  61%|██████    | 61/100 [00:37<00:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.0958:  62%|██████▏   | 62/100 [00:38<00:23,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1024:  63%|██████▎   | 63/100 [00:38<00:22,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1021:  64%|██████▍   | 64/100 [00:39<00:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1019:  65%|██████▌   | 65/100 [00:39<00:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1051:  66%|██████▌   | 66/100 [00:40<00:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1059:  67%|██████▋   | 67/100 [00:41<00:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1054:  68%|██████▊   | 68/100 [00:41<00:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1053:  69%|██████▉   | 69/100 [00:42<00:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.105:  70%|███████   | 70/100 [00:42<00:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1049:  71%|███████   | 71/100 [00:43<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1062:  72%|███████▏  | 72/100 [00:44<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1111:  73%|███████▎  | 73/100 [00:44<00:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1118:  74%|███████▍  | 74/100 [00:45<00:15,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1113:  75%|███████▌  | 75/100 [00:45<00:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1107:  76%|███████▌  | 76/100 [00:46<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1105:  77%|███████▋  | 77/100 [00:47<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1106:  78%|███████▊  | 78/100 [00:47<00:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1102:  79%|███████▉  | 79/100 [00:48<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1099:  80%|████████  | 80/100 [00:49<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1094:  81%|████████  | 81/100 [00:49<00:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1092:  82%|████████▏ | 82/100 [00:50<00:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1104:  83%|████████▎ | 83/100 [00:50<00:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1107:  84%|████████▍ | 84/100 [00:51<00:09,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1102:  85%|████████▌ | 85/100 [00:52<00:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1103:  86%|████████▌ | 86/100 [00:52<00:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1099:  87%|████████▋ | 87/100 [00:53<00:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1095:  88%|████████▊ | 88/100 [00:53<00:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.11:  89%|████████▉ | 89/100 [00:54<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1178:  90%|█████████ | 90/100 [00:55<00:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1173:  91%|█████████ | 91/100 [00:55<00:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1168:  92%|█████████▏| 92/100 [00:56<00:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1237:  93%|█████████▎| 93/100 [00:57<00:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1233:  94%|█████████▍| 94/100 [00:57<00:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1227:  95%|█████████▌| 95/100 [00:58<00:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1226:  96%|█████████▌| 96/100 [00:58<00:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1222:  97%|█████████▋| 97/100 [00:59<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.1218:  98%|█████████▊| 98/100 [01:00<00:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 0.122:  99%|█████████▉| 99/100 [01:00<00:00,  1.63it/s]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 3, train loss: 0.1242: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]
epoch: 3, valid loss: 1.9693:   4%|▍         | 2/51 [00:00<00:06,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9461:   8%|▊         | 4/51 [00:00<00:06,  7.76it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9217:  12%|█▏        | 6/51 [00:00<00:05,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9501:  16%|█▌        | 8/51 [00:01<00:05,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9471:  20%|█▉        | 10/51 [00:01<00:05,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9157:  24%|██▎       | 12/51 [00:01<00:05,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9328:  27%|██▋       | 14/51 [00:01<00:04,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9446:  31%|███▏      | 16/51 [00:02<00:04,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9345:  35%|███▌      | 18/51 [00:02<00:04,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8993:  39%|███▉      | 20/51 [00:02<00:04,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8644:  43%|████▎     | 22/51 [00:02<00:03,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8786:  47%|████▋     | 24/51 [00:03<00:03,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8666:  51%|█████     | 26/51 [00:03<00:03,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9004:  55%|█████▍    | 28/51 [00:03<00:03,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9252:  59%|█████▉    | 30/51 [00:03<00:02,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8972:  63%|██████▎   | 32/51 [00:04<00:02,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8905:  67%|██████▋   | 34/51 [00:04<00:02,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8977:  71%|███████   | 36/51 [00:04<00:01,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9233:  75%|███████▍  | 38/51 [00:04<00:01,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.9071:  78%|███████▊  | 40/51 [00:05<00:01,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8707:  82%|████████▏ | 42/51 [00:05<00:01,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8348:  86%|████████▋ | 44/51 [00:05<00:00,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8592:  90%|█████████ | 46/51 [00:06<00:00,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.8443:  94%|█████████▍| 48/51 [00:06<00:00,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.851:  98%|█████████▊| 50/51 [00:06<00:00,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.833: 100%|██████████| 51/51 [00:06<00:00,  7.63it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[[{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 2}, {'prediction': 1, 'label': 1}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0644:   1%|          | 1/100 [00:00<01:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0644:   2%|▏         | 2/100 [00:01<00:59,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0644:   3%|▎         | 3/100 [00:01<00:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.065:   4%|▍         | 4/100 [00:02<00:57,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0656:   5%|▌         | 5/100 [00:03<00:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0659:   6%|▌         | 6/100 [00:03<00:57,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0662:   7%|▋         | 7/100 [00:04<00:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0686:   8%|▊         | 8/100 [00:04<00:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0686:   9%|▉         | 9/100 [00:05<00:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0686:  10%|█         | 10/100 [00:06<00:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.07:  11%|█         | 11/100 [00:06<00:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0702:  12%|█▏        | 12/100 [00:07<00:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0715:  13%|█▎        | 13/100 [00:07<00:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0725:  14%|█▍        | 14/100 [00:08<00:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0724:  15%|█▌        | 15/100 [00:09<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0725:  16%|█▌        | 16/100 [00:09<00:50,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0729:  17%|█▋        | 17/100 [00:10<00:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0731:  18%|█▊        | 18/100 [00:10<00:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0809:  19%|█▉        | 19/100 [00:11<00:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0807:  20%|██        | 20/100 [00:12<00:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0804:  21%|██        | 21/100 [00:12<00:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0803:  22%|██▏       | 22/100 [00:13<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0801:  23%|██▎       | 23/100 [00:14<00:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0799:  24%|██▍       | 24/100 [00:14<00:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0854:  25%|██▌       | 25/100 [00:15<00:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0854:  26%|██▌       | 26/100 [00:15<00:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0872:  27%|██▋       | 27/100 [00:16<00:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0875:  28%|██▊       | 28/100 [00:17<00:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0872:  29%|██▉       | 29/100 [00:17<00:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0871:  30%|███       | 30/100 [00:18<00:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0868:  31%|███       | 31/100 [00:18<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0869:  32%|███▏      | 32/100 [00:19<00:41,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0866:  33%|███▎      | 33/100 [00:20<00:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0938:  34%|███▍      | 34/100 [00:20<00:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0935:  35%|███▌      | 35/100 [00:21<00:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0937:  36%|███▌      | 36/100 [00:22<00:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0934:  37%|███▋      | 37/100 [00:22<00:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0934:  38%|███▊      | 38/100 [00:23<00:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.095:  39%|███▉      | 39/100 [00:23<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0954:  40%|████      | 40/100 [00:24<00:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.095:  41%|████      | 41/100 [00:25<00:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0961:  42%|████▏     | 42/100 [00:25<00:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0963:  43%|████▎     | 43/100 [00:26<00:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0962:  44%|████▍     | 44/100 [00:26<00:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.096:  45%|████▌     | 45/100 [00:27<00:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0956:  46%|████▌     | 46/100 [00:28<00:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.0954:  47%|████▋     | 47/100 [00:28<00:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1017:  48%|████▊     | 48/100 [00:29<00:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1027:  49%|████▉     | 49/100 [00:30<00:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1025:  50%|█████     | 50/100 [00:30<00:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1021:  51%|█████     | 51/100 [00:31<00:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1018:  52%|█████▏    | 52/100 [00:31<00:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1015:  53%|█████▎    | 53/100 [00:32<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1015:  54%|█████▍    | 54/100 [00:33<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1013:  55%|█████▌    | 55/100 [00:33<00:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1013:  56%|█████▌    | 56/100 [00:34<00:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1012:  57%|█████▋    | 57/100 [00:34<00:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1016:  58%|█████▊    | 58/100 [00:35<00:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1026:  59%|█████▉    | 59/100 [00:36<00:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1037:  60%|██████    | 60/100 [00:36<00:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1033:  61%|██████    | 61/100 [00:37<00:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1032:  62%|██████▏   | 62/100 [00:37<00:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1027:  63%|██████▎   | 63/100 [00:38<00:23,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1076:  64%|██████▍   | 64/100 [00:39<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1072:  65%|██████▌   | 65/100 [00:39<00:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1069:  66%|██████▌   | 66/100 [00:40<00:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1064:  67%|██████▋   | 67/100 [00:41<00:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1077:  68%|██████▊   | 68/100 [00:41<00:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1075:  69%|██████▉   | 69/100 [00:42<00:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.107:  70%|███████   | 70/100 [00:42<00:18,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.107:  71%|███████   | 71/100 [00:43<00:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1068:  72%|███████▏  | 72/100 [00:44<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1069:  73%|███████▎  | 73/100 [00:44<00:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1065:  74%|███████▍  | 74/100 [00:45<00:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1061:  75%|███████▌  | 75/100 [00:45<00:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1056:  76%|███████▌  | 76/100 [00:46<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1051:  77%|███████▋  | 77/100 [00:47<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1051:  78%|███████▊  | 78/100 [00:47<00:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1047:  79%|███████▉  | 79/100 [00:48<00:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1104:  80%|████████  | 80/100 [00:48<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1104:  81%|████████  | 81/100 [00:49<00:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1131:  82%|████████▏ | 82/100 [00:50<00:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1126:  83%|████████▎ | 83/100 [00:50<00:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1121:  84%|████████▍ | 84/100 [00:51<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1115:  85%|████████▌ | 85/100 [00:52<00:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1111:  86%|████████▌ | 86/100 [00:52<00:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1108:  87%|████████▋ | 87/100 [00:53<00:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1103:  88%|████████▊ | 88/100 [00:53<00:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1098:  89%|████████▉ | 89/100 [00:54<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1093:  90%|█████████ | 90/100 [00:55<00:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1088:  91%|█████████ | 91/100 [00:55<00:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1101:  92%|█████████▏| 92/100 [00:56<00:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1147:  93%|█████████▎| 93/100 [00:56<00:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1153:  94%|█████████▍| 94/100 [00:57<00:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1146:  95%|█████████▌| 95/100 [00:58<00:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1146:  96%|█████████▌| 96/100 [00:58<00:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1142:  97%|█████████▋| 97/100 [00:59<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1136:  98%|█████████▊| 98/100 [01:00<00:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.1135:  99%|█████████▉| 99/100 [01:00<00:00,  1.62it/s]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 4, train loss: 0.1131: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]
epoch: 4, valid loss: 1.8907:   4%|▍         | 2/51 [00:00<00:06,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8813:   8%|▊         | 4/51 [00:00<00:06,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8504:  12%|█▏        | 6/51 [00:00<00:05,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8616:  16%|█▌        | 8/51 [00:01<00:05,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8825:  20%|█▉        | 10/51 [00:01<00:05,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8557:  24%|██▎       | 12/51 [00:01<00:05,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8601:  27%|██▋       | 14/51 [00:01<00:04,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.877:  31%|███▏      | 16/51 [00:02<00:04,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8732:  35%|███▌      | 18/51 [00:02<00:04,  7.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8423:  39%|███▉      | 20/51 [00:02<00:04,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8069:  43%|████▎     | 22/51 [00:02<00:03,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8452:  47%|████▋     | 24/51 [00:03<00:03,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8309:  51%|█████     | 26/51 [00:03<00:03,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8593:  55%|█████▍    | 28/51 [00:03<00:02,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8765:  59%|█████▉    | 30/51 [00:03<00:02,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8564:  63%|██████▎   | 32/51 [00:04<00:02,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8489:  67%|██████▋   | 34/51 [00:04<00:02,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8462:  71%|███████   | 36/51 [00:04<00:01,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8588:  75%|███████▍  | 38/51 [00:04<00:01,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.8272:  78%|███████▊  | 40/51 [00:05<00:01,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.7918:  82%|████████▏ | 42/51 [00:05<00:01,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.758:  86%|████████▋ | 44/51 [00:05<00:00,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.7345:  90%|█████████ | 46/51 [00:06<00:00,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.7212:  94%|█████████▍| 48/51 [00:06<00:00,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.7291:  98%|█████████▊| 50/51 [00:06<00:00,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.7122: 100%|██████████| 51/51 [00:06<00:00,  7.65it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[[{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 2}, {'prediction': 1, 'label': 1}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0567:   1%|          | 1/100 [00:00<01:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.058:   2%|▏         | 2/100 [00:01<00:58,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.058:   3%|▎         | 3/100 [00:01<00:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.058:   4%|▍         | 4/100 [00:02<00:57,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.058:   5%|▌         | 5/100 [00:03<00:58,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0581:   6%|▌         | 6/100 [00:03<00:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.061:   7%|▋         | 7/100 [00:04<00:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.061:   8%|▊         | 8/100 [00:04<00:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0611:   9%|▉         | 9/100 [00:05<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0616:  10%|█         | 10/100 [00:06<00:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0622:  11%|█         | 11/100 [00:06<00:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0622:  12%|█▏        | 12/100 [00:07<00:53,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0623:  13%|█▎        | 13/100 [00:07<00:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0622:  14%|█▍        | 14/100 [00:08<00:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0623:  15%|█▌        | 15/100 [00:09<00:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0624:  16%|█▌        | 16/100 [00:09<00:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0626:  17%|█▋        | 17/100 [00:10<00:51,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0628:  18%|█▊        | 18/100 [00:11<00:50,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0637:  19%|█▉        | 19/100 [00:11<00:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0643:  20%|██        | 20/100 [00:12<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0645:  21%|██        | 21/100 [00:12<00:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.065:  22%|██▏       | 22/100 [00:13<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.065:  23%|██▎       | 23/100 [00:14<00:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0656:  24%|██▍       | 24/100 [00:14<00:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0657:  25%|██▌       | 25/100 [00:15<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0656:  26%|██▌       | 26/100 [00:15<00:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0656:  27%|██▋       | 27/100 [00:16<00:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0656:  28%|██▊       | 28/100 [00:17<00:43,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0655:  29%|██▉       | 29/100 [00:17<00:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0657:  30%|███       | 30/100 [00:18<00:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0656:  31%|███       | 31/100 [00:19<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0655:  32%|███▏      | 32/100 [00:19<00:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0673:  33%|███▎      | 33/100 [00:20<00:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0695:  34%|███▍      | 34/100 [00:20<00:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0711:  35%|███▌      | 35/100 [00:21<00:40,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.071:  36%|███▌      | 36/100 [00:22<00:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0712:  37%|███▋      | 37/100 [00:22<00:39,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0715:  38%|███▊      | 38/100 [00:23<00:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0715:  39%|███▉      | 39/100 [00:23<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0718:  40%|████      | 40/100 [00:24<00:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0716:  41%|████      | 41/100 [00:25<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0714:  42%|████▏     | 42/100 [00:25<00:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0712:  43%|████▎     | 43/100 [00:26<00:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.071:  44%|████▍     | 44/100 [00:26<00:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0708:  45%|████▌     | 45/100 [00:27<00:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0707:  46%|████▌     | 46/100 [00:28<00:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0705:  47%|████▋     | 47/100 [00:28<00:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0703:  48%|████▊     | 48/100 [00:29<00:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0849:  49%|████▉     | 49/100 [00:30<00:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0861:  50%|█████     | 50/100 [00:30<00:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0857:  51%|█████     | 51/100 [00:31<00:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0862:  52%|█████▏    | 52/100 [00:31<00:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0858:  53%|█████▎    | 53/100 [00:32<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0913:  54%|█████▍    | 54/100 [00:33<00:27,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0911:  55%|█████▌    | 55/100 [00:33<00:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0918:  56%|█████▌    | 56/100 [00:34<00:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0914:  57%|█████▋    | 57/100 [00:34<00:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0909:  58%|█████▊    | 58/100 [00:35<00:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0906:  59%|█████▉    | 59/100 [00:36<00:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.091:  60%|██████    | 60/100 [00:36<00:24,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0912:  61%|██████    | 61/100 [00:37<00:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0908:  62%|██████▏   | 62/100 [00:37<00:23,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0931:  63%|██████▎   | 63/100 [00:38<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0935:  64%|██████▍   | 64/100 [00:39<00:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0933:  65%|██████▌   | 65/100 [00:39<00:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.093:  66%|██████▌   | 66/100 [00:40<00:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0927:  67%|██████▋   | 67/100 [00:41<00:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0937:  68%|██████▊   | 68/100 [00:41<00:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1044:  69%|██████▉   | 69/100 [00:42<00:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.104:  70%|███████   | 70/100 [00:42<00:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1039:  71%|███████   | 71/100 [00:43<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1036:  72%|███████▏  | 72/100 [00:44<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1031:  73%|███████▎  | 73/100 [00:44<00:16,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1026:  74%|███████▍  | 74/100 [00:45<00:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1022:  75%|███████▌  | 75/100 [00:45<00:15,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1018:  76%|███████▌  | 76/100 [00:46<00:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1029:  77%|███████▋  | 77/100 [00:47<00:14,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1028:  78%|███████▊  | 78/100 [00:47<00:13,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1024:  79%|███████▉  | 79/100 [00:48<00:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.102:  80%|████████  | 80/100 [00:49<00:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1017:  81%|████████  | 81/100 [00:49<00:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1012:  82%|████████▏ | 82/100 [00:50<00:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1008:  83%|████████▎ | 83/100 [00:50<00:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1003:  84%|████████▍ | 84/100 [00:51<00:09,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0998:  85%|████████▌ | 85/100 [00:52<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0994:  86%|████████▌ | 86/100 [00:52<00:08,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.099:  87%|████████▋ | 87/100 [00:53<00:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.0985:  88%|████████▊ | 88/100 [00:53<00:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1033:  89%|████████▉ | 89/100 [00:54<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1029:  90%|█████████ | 90/100 [00:55<00:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1025:  91%|█████████ | 91/100 [00:55<00:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1025:  92%|█████████▏| 92/100 [00:56<00:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1021:  93%|█████████▎| 93/100 [00:57<00:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1018:  94%|█████████▍| 94/100 [00:57<00:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1035:  95%|█████████▌| 95/100 [00:58<00:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1032:  96%|█████████▌| 96/100 [00:58<00:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1028:  97%|█████████▋| 97/100 [00:59<00:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.1025:  98%|█████████▊| 98/100 [01:00<00:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.109:  99%|█████████▉| 99/100 [01:00<00:00,  1.61it/s]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 5, train loss: 0.1084: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]
epoch: 5, valid loss: 3.2877:   4%|▍         | 2/51 [00:00<00:06,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 3.2252:   8%|▊         | 4/51 [00:00<00:06,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 3.1719:  12%|█▏        | 6/51 [00:00<00:05,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 3.1315:  16%|█▌        | 8/51 [00:01<00:05,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 3.1202:  20%|█▉        | 10/51 [00:01<00:05,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 3.0616:  24%|██▎       | 12/51 [00:01<00:05,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 3.0536:  27%|██▋       | 14/51 [00:01<00:04,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 3.0399:  31%|███▏      | 16/51 [00:02<00:04,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 3.0369:  35%|███▌      | 18/51 [00:02<00:04,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.9962:  39%|███▉      | 20/51 [00:02<00:04,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.9376:  43%|████▎     | 22/51 [00:02<00:03,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.9591:  47%|████▋     | 24/51 [00:03<00:03,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.9277:  51%|█████     | 26/51 [00:03<00:03,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.9292:  55%|█████▍    | 28/51 [00:03<00:02,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.9376:  59%|█████▉    | 30/51 [00:03<00:02,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.8879:  63%|██████▎   | 32/51 [00:04<00:02,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.8593:  67%|██████▋   | 34/51 [00:04<00:02,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.8308:  71%|███████   | 36/51 [00:04<00:01,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.8266:  75%|███████▍  | 38/51 [00:04<00:01,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.7875:  78%|███████▊  | 40/51 [00:05<00:01,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.7362:  82%|████████▏ | 42/51 [00:05<00:01,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.6859:  86%|████████▋ | 44/51 [00:05<00:00,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.6824:  90%|█████████ | 46/51 [00:05<00:00,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.6587:  94%|█████████▍| 48/51 [00:06<00:00,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.6406:  98%|█████████▊| 50/51 [00:06<00:00,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 2.6145: 100%|██████████| 51/51 [00:06<00:00,  7.68it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[[{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 4, 'label': 3}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 1, 'label': 1}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0836:   1%|          | 1/100 [00:00<01:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0833:   2%|▏         | 2/100 [00:01<00:58,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0847:   3%|▎         | 3/100 [00:01<00:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0843:   4%|▍         | 4/100 [00:02<00:57,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0844:   5%|▌         | 5/100 [00:03<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0841:   6%|▌         | 6/100 [00:03<00:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.084:   7%|▋         | 7/100 [00:04<00:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.084:   8%|▊         | 8/100 [00:04<00:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0837:   9%|▉         | 9/100 [00:05<00:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0835:  10%|█         | 10/100 [00:06<00:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0834:  11%|█         | 11/100 [00:06<00:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0831:  12%|█▏        | 12/100 [00:07<00:53,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0828:  13%|█▎        | 13/100 [00:07<00:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0827:  14%|█▍        | 14/100 [00:08<00:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0878:  15%|█▌        | 15/100 [00:09<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0877:  16%|█▌        | 16/100 [00:09<00:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0874:  17%|█▋        | 17/100 [00:10<00:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0871:  18%|█▊        | 18/100 [00:10<00:50,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0869:  19%|█▉        | 19/100 [00:11<00:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0869:  20%|██        | 20/100 [00:12<00:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0867:  21%|██        | 21/100 [00:12<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0863:  22%|██▏       | 22/100 [00:13<00:47,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.086:  23%|██▎       | 23/100 [00:14<00:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0871:  24%|██▍       | 24/100 [00:14<00:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0868:  25%|██▌       | 25/100 [00:15<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.093:  26%|██▌       | 26/100 [00:15<00:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0927:  27%|██▋       | 27/100 [00:16<00:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0926:  28%|██▊       | 28/100 [00:17<00:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0922:  29%|██▉       | 29/100 [00:17<00:43,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0937:  30%|███       | 30/100 [00:18<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0935:  31%|███       | 31/100 [00:19<00:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0937:  32%|███▏      | 32/100 [00:19<00:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0934:  33%|███▎      | 33/100 [00:20<00:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.093:  34%|███▍      | 34/100 [00:20<00:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1015:  35%|███▌      | 35/100 [00:21<00:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1014:  36%|███▌      | 36/100 [00:22<00:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1014:  37%|███▋      | 37/100 [00:22<00:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1009:  38%|███▊      | 38/100 [00:23<00:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1004:  39%|███▉      | 39/100 [00:23<00:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1009:  40%|████      | 40/100 [00:24<00:36,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1005:  41%|████      | 41/100 [00:25<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1:  42%|████▏     | 42/100 [00:25<00:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0994:  43%|████▎     | 43/100 [00:26<00:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.099:  44%|████▍     | 44/100 [00:26<00:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0985:  45%|████▌     | 45/100 [00:27<00:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0984:  46%|████▌     | 46/100 [00:28<00:33,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0979:  47%|████▋     | 47/100 [00:28<00:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0977:  48%|████▊     | 48/100 [00:29<00:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.0975:  49%|████▉     | 49/100 [00:30<00:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1037:  50%|█████     | 50/100 [00:30<00:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1032:  51%|█████     | 51/100 [00:31<00:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1126:  52%|█████▏    | 52/100 [00:31<00:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1121:  53%|█████▎    | 53/100 [00:32<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1117:  54%|█████▍    | 54/100 [00:33<00:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1112:  55%|█████▌    | 55/100 [00:33<00:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1106:  56%|█████▌    | 56/100 [00:34<00:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1108:  57%|█████▋    | 57/100 [00:34<00:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1103:  58%|█████▊    | 58/100 [00:35<00:25,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1099:  59%|█████▉    | 59/100 [00:36<00:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1093:  60%|██████    | 60/100 [00:36<00:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1091:  61%|██████    | 61/100 [00:37<00:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1086:  62%|██████▏   | 62/100 [00:37<00:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.108:  63%|██████▎   | 63/100 [00:38<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1076:  64%|██████▍   | 64/100 [00:39<00:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1072:  65%|██████▌   | 65/100 [00:39<00:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1069:  66%|██████▌   | 66/100 [00:40<00:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1064:  67%|██████▋   | 67/100 [00:41<00:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1059:  68%|██████▊   | 68/100 [00:41<00:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1053:  69%|██████▉   | 69/100 [00:42<00:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1047:  70%|███████   | 70/100 [00:42<00:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1042:  71%|███████   | 71/100 [00:43<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1037:  72%|███████▏  | 72/100 [00:44<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1033:  73%|███████▎  | 73/100 [00:44<00:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1029:  74%|███████▍  | 74/100 [00:45<00:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1024:  75%|███████▌  | 75/100 [00:45<00:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1021:  76%|███████▌  | 76/100 [00:46<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1017:  77%|███████▋  | 77/100 [00:47<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1012:  78%|███████▊  | 78/100 [00:47<00:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1009:  79%|███████▉  | 79/100 [00:48<00:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1037:  80%|████████  | 80/100 [00:48<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1067:  81%|████████  | 81/100 [00:49<00:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1062:  82%|████████▏ | 82/100 [00:50<00:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1059:  83%|████████▎ | 83/100 [00:50<00:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1056:  84%|████████▍ | 84/100 [00:51<00:09,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1052:  85%|████████▌ | 85/100 [00:52<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1057:  86%|████████▌ | 86/100 [00:52<00:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1052:  87%|████████▋ | 87/100 [00:53<00:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1046:  88%|████████▊ | 88/100 [00:53<00:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1041:  89%|████████▉ | 89/100 [00:54<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1043:  90%|█████████ | 90/100 [00:55<00:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1037:  91%|█████████ | 91/100 [00:55<00:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1032:  92%|█████████▏| 92/100 [00:56<00:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1027:  93%|█████████▎| 93/100 [00:56<00:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1029:  94%|█████████▍| 94/100 [00:57<00:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1025:  95%|█████████▌| 95/100 [00:58<00:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.102:  96%|█████████▌| 96/100 [00:58<00:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1015:  97%|█████████▋| 97/100 [00:59<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.101:  98%|█████████▊| 98/100 [00:59<00:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.1006:  99%|█████████▉| 99/100 [01:00<00:00,  1.63it/s]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 6, train loss: 0.1001: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]
epoch: 6, valid loss: 3.3277:   4%|▍         | 2/51 [00:00<00:06,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 3.2929:   8%|▊         | 4/51 [00:00<00:06,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 3.2296:  12%|█▏        | 6/51 [00:00<00:05,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 3.1891:  16%|█▌        | 8/51 [00:01<00:05,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 3.193:  20%|█▉        | 10/51 [00:01<00:05,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 3.1401:  24%|██▎       | 12/51 [00:01<00:05,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 3.1428:  27%|██▋       | 14/51 [00:01<00:04,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 3.1266:  31%|███▏      | 16/51 [00:02<00:04,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 3.0904:  35%|███▌      | 18/51 [00:02<00:04,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 3.0323:  39%|███▉      | 20/51 [00:02<00:04,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.9729:  43%|████▎     | 22/51 [00:02<00:03,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.986:  47%|████▋     | 24/51 [00:03<00:03,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.9807:  51%|█████     | 26/51 [00:03<00:03,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.9844:  55%|█████▍    | 28/51 [00:03<00:03,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.9739:  59%|█████▉    | 30/51 [00:03<00:02,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.9165:  63%|██████▎   | 32/51 [00:04<00:02,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.8873:  67%|██████▋   | 34/51 [00:04<00:02,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.8603:  71%|███████   | 36/51 [00:04<00:02,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.847:  75%|███████▍  | 38/51 [00:04<00:01,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.8102:  78%|███████▊  | 40/51 [00:05<00:01,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.7642:  82%|████████▏ | 42/51 [00:05<00:01,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.7118:  86%|████████▋ | 44/51 [00:05<00:00,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.6869:  90%|█████████ | 46/51 [00:06<00:00,  7.22it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.6568:  94%|█████████▍| 48/51 [00:06<00:00,  7.27it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.6387:  98%|█████████▊| 50/51 [00:06<00:00,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 2.6126: 100%|██████████| 51/51 [00:06<00:00,  7.57it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[[{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 4, 'label': 3}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 1, 'label': 1}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0546:   1%|          | 1/100 [00:00<01:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0546:   2%|▏         | 2/100 [00:01<00:58,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.058:   3%|▎         | 3/100 [00:01<00:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0581:   4%|▍         | 4/100 [00:02<00:57,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.058:   5%|▌         | 5/100 [00:03<00:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.058:   6%|▌         | 6/100 [00:03<00:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0579:   7%|▋         | 7/100 [00:04<00:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0579:   8%|▊         | 8/100 [00:04<00:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.061:   9%|▉         | 9/100 [00:05<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0609:  10%|█         | 10/100 [00:06<00:54,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0607:  11%|█         | 11/100 [00:06<00:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0606:  12%|█▏        | 12/100 [00:07<00:53,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0605:  13%|█▎        | 13/100 [00:07<00:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0605:  14%|█▍        | 14/100 [00:08<00:52,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0604:  15%|█▌        | 15/100 [00:09<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0604:  16%|█▌        | 16/100 [00:09<00:50,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0604:  17%|█▋        | 17/100 [00:10<00:51,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0603:  18%|█▊        | 18/100 [00:10<00:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0677:  19%|█▉        | 19/100 [00:11<00:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0675:  20%|██        | 20/100 [00:12<00:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0673:  21%|██        | 21/100 [00:12<00:48,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0672:  22%|██▏       | 22/100 [00:13<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0671:  23%|██▎       | 23/100 [00:14<00:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0669:  24%|██▍       | 24/100 [00:14<00:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0673:  25%|██▌       | 25/100 [00:15<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0671:  26%|██▌       | 26/100 [00:15<00:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0669:  27%|██▋       | 27/100 [00:16<00:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0667:  28%|██▊       | 28/100 [00:17<00:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0677:  29%|██▉       | 29/100 [00:17<00:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0714:  30%|███       | 30/100 [00:18<00:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0711:  31%|███       | 31/100 [00:18<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0709:  32%|███▏      | 32/100 [00:19<00:41,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0707:  33%|███▎      | 33/100 [00:20<00:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0733:  34%|███▍      | 34/100 [00:20<00:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0736:  35%|███▌      | 35/100 [00:21<00:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0745:  36%|███▌      | 36/100 [00:22<00:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0744:  37%|███▋      | 37/100 [00:22<00:39,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0741:  38%|███▊      | 38/100 [00:23<00:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0739:  39%|███▉      | 39/100 [00:23<00:38,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0737:  40%|████      | 40/100 [00:24<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0738:  41%|████      | 41/100 [00:25<00:36,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0736:  42%|████▏     | 42/100 [00:25<00:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0734:  43%|████▎     | 43/100 [00:26<00:35,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0733:  44%|████▍     | 44/100 [00:27<00:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0731:  45%|████▌     | 45/100 [00:27<00:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0811:  46%|████▌     | 46/100 [00:28<00:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0809:  47%|████▋     | 47/100 [00:28<00:33,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0811:  48%|████▊     | 48/100 [00:29<00:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0808:  49%|████▉     | 49/100 [00:30<00:32,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0806:  50%|█████     | 50/100 [00:30<00:31,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0803:  51%|█████     | 51/100 [00:31<00:30,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.08:  52%|█████▏    | 52/100 [00:31<00:29,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0801:  53%|█████▎    | 53/100 [00:32<00:29,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0798:  54%|█████▍    | 54/100 [00:33<00:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0798:  55%|█████▌    | 55/100 [00:33<00:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0797:  56%|█████▌    | 56/100 [00:34<00:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0796:  57%|█████▋    | 57/100 [00:35<00:26,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0793:  58%|█████▊    | 58/100 [00:35<00:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.079:  59%|█████▉    | 59/100 [00:36<00:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0787:  60%|██████    | 60/100 [00:36<00:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0785:  61%|██████    | 61/100 [00:37<00:24,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0782:  62%|██████▏   | 62/100 [00:38<00:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0798:  63%|██████▎   | 63/100 [00:38<00:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0794:  64%|██████▍   | 64/100 [00:39<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0791:  65%|██████▌   | 65/100 [00:40<00:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.079:  66%|██████▌   | 66/100 [00:40<00:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0787:  67%|██████▋   | 67/100 [00:41<00:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0785:  68%|██████▊   | 68/100 [00:41<00:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0782:  69%|██████▉   | 69/100 [00:42<00:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.078:  70%|███████   | 70/100 [00:43<00:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0777:  71%|███████   | 71/100 [00:43<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0774:  72%|███████▏  | 72/100 [00:44<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0771:  73%|███████▎  | 73/100 [00:44<00:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0769:  74%|███████▍  | 74/100 [00:45<00:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0765:  75%|███████▌  | 75/100 [00:46<00:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0763:  76%|███████▌  | 76/100 [00:46<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0765:  77%|███████▋  | 77/100 [00:47<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0765:  78%|███████▊  | 78/100 [00:47<00:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0762:  79%|███████▉  | 79/100 [00:48<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0761:  80%|████████  | 80/100 [00:49<00:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0759:  81%|████████  | 81/100 [00:49<00:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0759:  82%|████████▏ | 82/100 [00:50<00:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0756:  83%|████████▎ | 83/100 [00:51<00:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0754:  84%|████████▍ | 84/100 [00:51<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0756:  85%|████████▌ | 85/100 [00:52<00:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0754:  86%|████████▌ | 86/100 [00:52<00:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0751:  87%|████████▋ | 87/100 [00:53<00:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0756:  88%|████████▊ | 88/100 [00:54<00:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0758:  89%|████████▉ | 89/100 [00:54<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0754:  90%|█████████ | 90/100 [00:55<00:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0789:  91%|█████████ | 91/100 [00:55<00:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0828:  92%|█████████▏| 92/100 [00:56<00:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0835:  93%|█████████▎| 93/100 [00:57<00:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0832:  94%|█████████▍| 94/100 [00:57<00:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0832:  95%|█████████▌| 95/100 [00:58<00:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.083:  96%|█████████▌| 96/100 [00:59<00:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.085:  97%|█████████▋| 97/100 [00:59<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0846:  98%|█████████▊| 98/100 [01:00<00:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.0843:  99%|█████████▉| 99/100 [01:00<00:00,  1.62it/s]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 7, train loss: 0.0839: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]
epoch: 7, valid loss: 3.3151:   4%|▍         | 2/51 [00:00<00:06,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 3.2781:   8%|▊         | 4/51 [00:00<00:06,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 3.2145:  12%|█▏        | 6/51 [00:00<00:05,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 3.195:  16%|█▌        | 8/51 [00:01<00:05,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 3.1968:  20%|█▉        | 10/51 [00:01<00:05,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 3.1528:  24%|██▎       | 12/51 [00:01<00:05,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 3.146:  27%|██▋       | 14/51 [00:01<00:04,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 3.1257:  31%|███▏      | 16/51 [00:02<00:04,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 3.0937:  35%|███▌      | 18/51 [00:02<00:04,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 3.0547:  39%|███▉      | 20/51 [00:02<00:04,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.9948:  43%|████▎     | 22/51 [00:02<00:03,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 3.0072:  47%|████▋     | 24/51 [00:03<00:03,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.9757:  51%|█████     | 26/51 [00:03<00:03,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.982:  55%|█████▍    | 28/51 [00:03<00:02,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.9895:  59%|█████▉    | 30/51 [00:03<00:02,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.9333:  63%|██████▎   | 32/51 [00:04<00:02,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.9027:  67%|██████▋   | 34/51 [00:04<00:02,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.8739:  71%|███████   | 36/51 [00:04<00:01,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.8574:  75%|███████▍  | 38/51 [00:04<00:01,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.8081:  78%|███████▊  | 40/51 [00:05<00:01,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.7572:  82%|████████▏ | 42/51 [00:05<00:01,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.7069:  86%|████████▋ | 44/51 [00:05<00:00,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.6789:  90%|█████████ | 46/51 [00:06<00:00,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.6524:  94%|█████████▍| 48/51 [00:06<00:00,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.6339:  98%|█████████▊| 50/51 [00:06<00:00,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.6079: 100%|██████████| 51/51 [00:06<00:00,  7.61it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[[{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 4, 'label': 3}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 2}, {'prediction': 1, 'label': 1}], [{'prediction': 3, 'label': 3}, {'prediction': 0, 'label': 1}], [

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0526:   1%|          | 1/100 [00:00<01:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0526:   2%|▏         | 2/100 [00:01<00:58,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0581:   3%|▎         | 3/100 [00:01<00:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.058:   4%|▍         | 4/100 [00:02<00:57,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.058:   5%|▌         | 5/100 [00:03<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.058:   6%|▌         | 6/100 [00:03<00:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.058:   7%|▋         | 7/100 [00:04<00:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0594:   8%|▊         | 8/100 [00:04<00:55,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0594:   9%|▉         | 9/100 [00:05<00:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0595:  10%|█         | 10/100 [00:06<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0594:  11%|█         | 11/100 [00:06<00:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0593:  12%|█▏        | 12/100 [00:07<00:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0592:  13%|█▎        | 13/100 [00:07<00:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0591:  14%|█▍        | 14/100 [00:08<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.059:  15%|█▌        | 15/100 [00:09<00:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0588:  16%|█▌        | 16/100 [00:09<00:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0597:  17%|█▋        | 17/100 [00:10<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0621:  18%|█▊        | 18/100 [00:11<00:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.062:  19%|█▉        | 19/100 [00:11<00:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0618:  20%|██        | 20/100 [00:12<00:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0618:  21%|██        | 21/100 [00:12<00:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0616:  22%|██▏       | 22/100 [00:13<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0615:  23%|██▎       | 23/100 [00:14<00:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0614:  24%|██▍       | 24/100 [00:14<00:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0613:  25%|██▌       | 25/100 [00:15<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0613:  26%|██▌       | 26/100 [00:15<00:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0611:  27%|██▋       | 27/100 [00:16<00:44,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0616:  28%|██▊       | 28/100 [00:17<00:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0616:  29%|██▉       | 29/100 [00:17<00:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0616:  30%|███       | 30/100 [00:18<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0616:  31%|███       | 31/100 [00:19<00:43,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.062:  32%|███▏      | 32/100 [00:19<00:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0619:  33%|███▎      | 33/100 [00:20<00:41,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0618:  34%|███▍      | 34/100 [00:20<00:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0617:  35%|███▌      | 35/100 [00:21<00:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0617:  36%|███▌      | 36/100 [00:22<00:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0618:  37%|███▋      | 37/100 [00:22<00:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0617:  38%|███▊      | 38/100 [00:23<00:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0616:  39%|███▉      | 39/100 [00:23<00:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0617:  40%|████      | 40/100 [00:24<00:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0626:  41%|████      | 41/100 [00:25<00:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0624:  42%|████▏     | 42/100 [00:25<00:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0623:  43%|████▎     | 43/100 [00:26<00:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0622:  44%|████▍     | 44/100 [00:26<00:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0621:  45%|████▌     | 45/100 [00:27<00:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0622:  46%|████▌     | 46/100 [00:28<00:32,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.062:  47%|████▋     | 47/100 [00:28<00:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.062:  48%|████▊     | 48/100 [00:29<00:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0619:  49%|████▉     | 49/100 [00:30<00:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0618:  50%|█████     | 50/100 [00:30<00:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0616:  51%|█████     | 51/100 [00:31<00:30,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0615:  52%|█████▏    | 52/100 [00:31<00:29,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0613:  53%|█████▎    | 53/100 [00:32<00:29,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0613:  54%|█████▍    | 54/100 [00:33<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0612:  55%|█████▌    | 55/100 [00:33<00:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0611:  56%|█████▌    | 56/100 [00:34<00:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0611:  57%|█████▋    | 57/100 [00:35<00:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.061:  58%|█████▊    | 58/100 [00:35<00:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.062:  59%|█████▉    | 59/100 [00:36<00:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0619:  60%|██████    | 60/100 [00:36<00:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0618:  61%|██████    | 61/100 [00:37<00:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0617:  62%|██████▏   | 62/100 [00:38<00:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0616:  63%|██████▎   | 63/100 [00:38<00:22,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0614:  64%|██████▍   | 64/100 [00:39<00:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0613:  65%|██████▌   | 65/100 [00:39<00:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0611:  66%|██████▌   | 66/100 [00:40<00:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.061:  67%|██████▋   | 67/100 [00:41<00:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0609:  68%|██████▊   | 68/100 [00:41<00:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0611:  69%|██████▉   | 69/100 [00:42<00:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0644:  70%|███████   | 70/100 [00:42<00:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0642:  71%|███████   | 71/100 [00:43<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.064:  72%|███████▏  | 72/100 [00:44<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.064:  73%|███████▎  | 73/100 [00:44<00:16,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0638:  74%|███████▍  | 74/100 [00:45<00:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0637:  75%|███████▌  | 75/100 [00:46<00:15,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0667:  76%|███████▌  | 76/100 [00:46<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0665:  77%|███████▋  | 77/100 [00:47<00:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0676:  78%|███████▊  | 78/100 [00:47<00:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0674:  79%|███████▉  | 79/100 [00:48<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0672:  80%|████████  | 80/100 [00:49<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0673:  81%|████████  | 81/100 [00:49<00:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0671:  82%|████████▏ | 82/100 [00:50<00:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0669:  83%|████████▎ | 83/100 [00:50<00:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.067:  84%|████████▍ | 84/100 [00:51<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0667:  85%|████████▌ | 85/100 [00:52<00:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0665:  86%|████████▌ | 86/100 [00:52<00:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0673:  87%|████████▋ | 87/100 [00:53<00:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0672:  88%|████████▊ | 88/100 [00:54<00:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0671:  89%|████████▉ | 89/100 [00:54<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.067:  90%|█████████ | 90/100 [00:55<00:06,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0668:  91%|█████████ | 91/100 [00:55<00:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0666:  92%|█████████▏| 92/100 [00:56<00:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0664:  93%|█████████▎| 93/100 [00:57<00:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0663:  94%|█████████▍| 94/100 [00:57<00:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0668:  95%|█████████▌| 95/100 [00:58<00:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0667:  96%|█████████▌| 96/100 [00:58<00:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0664:  97%|█████████▋| 97/100 [00:59<00:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0663:  98%|█████████▊| 98/100 [01:00<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.0661:  99%|█████████▉| 99/100 [01:00<00:00,  1.62it/s]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 8, train loss: 0.0659: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]
epoch: 8, valid loss: 2.9748:   4%|▍         | 2/51 [00:00<00:06,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.9411:   8%|▊         | 4/51 [00:00<00:06,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.8844:  12%|█▏        | 6/51 [00:00<00:05,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.8613:  16%|█▌        | 8/51 [00:01<00:05,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.8735:  20%|█▉        | 10/51 [00:01<00:05,  7.81it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.8323:  24%|██▎       | 12/51 [00:01<00:05,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.8608:  27%|██▋       | 14/51 [00:01<00:04,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.8458:  31%|███▏      | 16/51 [00:02<00:04,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.8238:  35%|███▌      | 18/51 [00:02<00:04,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.7727:  39%|███▉      | 20/51 [00:02<00:04,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.7184:  43%|████▎     | 22/51 [00:02<00:03,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.7329:  47%|████▋     | 24/51 [00:03<00:03,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.725:  51%|█████     | 26/51 [00:03<00:03,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.7332:  55%|█████▍    | 28/51 [00:03<00:02,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.7289:  59%|█████▉    | 30/51 [00:03<00:02,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.6803:  63%|██████▎   | 32/51 [00:04<00:02,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.656:  67%|██████▋   | 34/51 [00:04<00:02,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.6313:  71%|███████   | 36/51 [00:04<00:01,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.631:  75%|███████▍  | 38/51 [00:04<00:01,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.6011:  78%|███████▊  | 40/51 [00:05<00:01,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.5674:  82%|████████▏ | 42/51 [00:05<00:01,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.5176:  86%|████████▋ | 44/51 [00:05<00:00,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.5013:  90%|█████████ | 46/51 [00:05<00:00,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.4782:  94%|█████████▍| 48/51 [00:06<00:00,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.4534:  98%|█████████▊| 50/51 [00:06<00:00,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 2.4291: 100%|██████████| 51/51 [00:06<00:00,  7.66it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[[{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 4, 'label': 3}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 2}, {'prediction': 1, 'label': 1}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.047:   1%|          | 1/100 [00:00<01:02,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.047:   2%|▏         | 2/100 [00:01<01:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0473:   3%|▎         | 3/100 [00:01<01:00,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0474:   4%|▍         | 4/100 [00:02<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0474:   5%|▌         | 5/100 [00:03<00:58,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0476:   6%|▌         | 6/100 [00:03<00:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0477:   7%|▋         | 7/100 [00:04<00:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.048:   8%|▊         | 8/100 [00:04<00:55,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0481:   9%|▉         | 9/100 [00:05<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0481:  10%|█         | 10/100 [00:06<00:54,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0482:  11%|█         | 11/100 [00:06<00:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0482:  12%|█▏        | 12/100 [00:07<00:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0483:  13%|█▎        | 13/100 [00:07<00:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0483:  14%|█▍        | 14/100 [00:08<00:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0483:  15%|█▌        | 15/100 [00:09<00:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0488:  16%|█▌        | 16/100 [00:09<00:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0493:  17%|█▋        | 17/100 [00:10<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0493:  18%|█▊        | 18/100 [00:11<00:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0493:  19%|█▉        | 19/100 [00:11<00:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0492:  20%|██        | 20/100 [00:12<00:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0493:  21%|██        | 21/100 [00:12<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0493:  22%|██▏       | 22/100 [00:13<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0493:  23%|██▎       | 23/100 [00:14<00:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0492:  24%|██▍       | 24/100 [00:14<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0492:  25%|██▌       | 25/100 [00:15<00:46,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0492:  26%|██▌       | 26/100 [00:15<00:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0491:  27%|██▋       | 27/100 [00:16<00:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0491:  28%|██▊       | 28/100 [00:17<00:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0492:  29%|██▉       | 29/100 [00:17<00:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0493:  30%|███       | 30/100 [00:18<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0494:  31%|███       | 31/100 [00:19<00:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0494:  32%|███▏      | 32/100 [00:19<00:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.05:  33%|███▎      | 33/100 [00:20<00:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0525:  34%|███▍      | 34/100 [00:20<00:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0524:  35%|███▌      | 35/100 [00:21<00:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0525:  36%|███▌      | 36/100 [00:22<00:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0525:  37%|███▋      | 37/100 [00:22<00:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0524:  38%|███▊      | 38/100 [00:23<00:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0523:  39%|███▉      | 39/100 [00:23<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0523:  40%|████      | 40/100 [00:24<00:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0523:  41%|████      | 41/100 [00:25<00:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0522:  42%|████▏     | 42/100 [00:25<00:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0521:  43%|████▎     | 43/100 [00:26<00:35,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0522:  44%|████▍     | 44/100 [00:27<00:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0521:  45%|████▌     | 45/100 [00:27<00:34,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0527:  46%|████▌     | 46/100 [00:28<00:33,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0527:  47%|████▋     | 47/100 [00:28<00:33,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0528:  48%|████▊     | 48/100 [00:29<00:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0527:  49%|████▉     | 49/100 [00:30<00:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0526:  50%|█████     | 50/100 [00:30<00:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0526:  51%|█████     | 51/100 [00:31<00:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0527:  52%|█████▏    | 52/100 [00:31<00:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0526:  53%|█████▎    | 53/100 [00:32<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0525:  54%|█████▍    | 54/100 [00:33<00:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0526:  55%|█████▌    | 55/100 [00:33<00:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0525:  56%|█████▌    | 56/100 [00:34<00:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0542:  57%|█████▋    | 57/100 [00:35<00:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0541:  58%|█████▊    | 58/100 [00:35<00:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.054:  59%|█████▉    | 59/100 [00:36<00:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0539:  60%|██████    | 60/100 [00:36<00:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0538:  61%|██████    | 61/100 [00:37<00:24,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0539:  62%|██████▏   | 62/100 [00:38<00:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0552:  63%|██████▎   | 63/100 [00:38<00:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0574:  64%|██████▍   | 64/100 [00:39<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0573:  65%|██████▌   | 65/100 [00:39<00:21,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0571:  66%|██████▌   | 66/100 [00:40<00:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0571:  67%|██████▋   | 67/100 [00:41<00:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0572:  68%|██████▊   | 68/100 [00:41<00:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0572:  69%|██████▉   | 69/100 [00:42<00:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0571:  70%|███████   | 70/100 [00:43<00:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0569:  71%|███████   | 71/100 [00:43<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0569:  72%|███████▏  | 72/100 [00:44<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0568:  73%|███████▎  | 73/100 [00:44<00:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0566:  74%|███████▍  | 74/100 [00:45<00:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0566:  75%|███████▌  | 75/100 [00:46<00:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0565:  76%|███████▌  | 76/100 [00:46<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0567:  77%|███████▋  | 77/100 [00:47<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0566:  78%|███████▊  | 78/100 [00:47<00:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0564:  79%|███████▉  | 79/100 [00:48<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0565:  80%|████████  | 80/100 [00:49<00:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.059:  81%|████████  | 81/100 [00:49<00:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0589:  82%|████████▏ | 82/100 [00:50<00:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0588:  83%|████████▎ | 83/100 [00:51<00:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0588:  84%|████████▍ | 84/100 [00:51<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0587:  85%|████████▌ | 85/100 [00:52<00:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0593:  86%|████████▌ | 86/100 [00:52<00:08,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0593:  87%|████████▋ | 87/100 [00:53<00:08,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0592:  88%|████████▊ | 88/100 [00:54<00:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.059:  89%|████████▉ | 89/100 [00:54<00:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0589:  90%|█████████ | 90/100 [00:55<00:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0589:  91%|█████████ | 91/100 [00:56<00:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0589:  92%|█████████▏| 92/100 [00:56<00:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0587:  93%|█████████▎| 93/100 [00:57<00:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0587:  94%|█████████▍| 94/100 [00:57<00:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0586:  95%|█████████▌| 95/100 [00:58<00:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0585:  96%|█████████▌| 96/100 [00:59<00:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0583:  97%|█████████▋| 97/100 [00:59<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0582:  98%|█████████▊| 98/100 [01:00<00:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.058:  99%|█████████▉| 99/100 [01:00<00:00,  1.63it/s]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 9, train loss: 0.0579: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]
epoch: 9, valid loss: 2.7824:   4%|▍         | 2/51 [00:00<00:06,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7457:   8%|▊         | 4/51 [00:00<00:06,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6925:  12%|█▏        | 6/51 [00:00<00:05,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6726:  16%|█▌        | 8/51 [00:01<00:05,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6994:  20%|█▉        | 10/51 [00:01<00:05,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.667:  24%|██▎       | 12/51 [00:01<00:05,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6864:  27%|██▋       | 14/51 [00:01<00:04,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6768:  31%|███▏      | 16/51 [00:02<00:04,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6629:  35%|███▌      | 18/51 [00:02<00:04,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6278:  39%|███▉      | 20/51 [00:02<00:04,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5764:  43%|████▎     | 22/51 [00:02<00:03,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5915:  47%|████▋     | 24/51 [00:03<00:03,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5796:  51%|█████     | 26/51 [00:03<00:03,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6008:  55%|█████▍    | 28/51 [00:03<00:03,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6064:  59%|█████▉    | 30/51 [00:03<00:02,  7.06it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5582:  63%|██████▎   | 32/51 [00:04<00:02,  7.22it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5376:  67%|██████▋   | 34/51 [00:04<00:02,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5207:  71%|███████   | 36/51 [00:04<00:01,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5147:  75%|███████▍  | 38/51 [00:05<00:01,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.4864:  78%|███████▊  | 40/51 [00:05<00:01,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.4511:  82%|████████▏ | 42/51 [00:05<00:01,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.4033:  86%|████████▋ | 44/51 [00:05<00:00,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.3852:  90%|█████████ | 46/51 [00:06<00:00,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.3629:  94%|█████████▍| 48/51 [00:06<00:00,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.3511:  98%|█████████▊| 50/51 [00:06<00:00,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.3279: 100%|██████████| 51/51 [00:06<00:00,  7.58it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[[{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 4, 'label': 3}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 2}, {'prediction': 1, 'label': 1}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.066:   1%|          | 1/100 [00:00<01:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0658:   2%|▏         | 2/100 [00:01<00:58,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0656:   3%|▎         | 3/100 [00:01<00:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0654:   4%|▍         | 4/100 [00:02<00:58,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0653:   5%|▌         | 5/100 [00:03<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0651:   6%|▌         | 6/100 [00:03<00:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0649:   7%|▋         | 7/100 [00:04<00:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0647:   8%|▊         | 8/100 [00:04<00:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0645:   9%|▉         | 9/100 [00:05<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0642:  10%|█         | 10/100 [00:06<00:54,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0644:  11%|█         | 11/100 [00:06<00:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0657:  12%|█▏        | 12/100 [00:07<00:53,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0657:  13%|█▎        | 13/100 [00:07<00:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0658:  14%|█▍        | 14/100 [00:08<00:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0656:  15%|█▌        | 15/100 [00:09<00:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0656:  16%|█▌        | 16/100 [00:09<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0654:  17%|█▋        | 17/100 [00:10<00:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0653:  18%|█▊        | 18/100 [00:10<00:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0652:  19%|█▉        | 19/100 [00:11<00:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0651:  20%|██        | 20/100 [00:12<00:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0652:  21%|██        | 21/100 [00:12<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.065:  22%|██▏       | 22/100 [00:13<00:47,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0648:  23%|██▎       | 23/100 [00:14<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0646:  24%|██▍       | 24/100 [00:14<00:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0644:  25%|██▌       | 25/100 [00:15<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0644:  26%|██▌       | 26/100 [00:15<00:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0644:  27%|██▋       | 27/100 [00:16<00:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0643:  28%|██▊       | 28/100 [00:17<00:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0641:  29%|██▉       | 29/100 [00:17<00:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.064:  30%|███       | 30/100 [00:18<00:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0638:  31%|███       | 31/100 [00:18<00:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0637:  32%|███▏      | 32/100 [00:19<00:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0634:  33%|███▎      | 33/100 [00:20<00:41,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0634:  34%|███▍      | 34/100 [00:20<00:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0632:  35%|███▌      | 35/100 [00:21<00:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.063:  36%|███▌      | 36/100 [00:22<00:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0628:  37%|███▋      | 37/100 [00:22<00:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0628:  38%|███▊      | 38/100 [00:23<00:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0626:  39%|███▉      | 39/100 [00:23<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0625:  40%|████      | 40/100 [00:24<00:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0624:  41%|████      | 41/100 [00:25<00:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0622:  42%|████▏     | 42/100 [00:25<00:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0622:  43%|████▎     | 43/100 [00:26<00:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0621:  44%|████▍     | 44/100 [00:26<00:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.062:  45%|████▌     | 45/100 [00:27<00:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.063:  46%|████▌     | 46/100 [00:28<00:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0628:  47%|████▋     | 47/100 [00:28<00:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0627:  48%|████▊     | 48/100 [00:29<00:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0624:  49%|████▉     | 49/100 [00:30<00:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0622:  50%|█████     | 50/100 [00:30<00:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0621:  51%|█████     | 51/100 [00:31<00:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0619:  52%|█████▏    | 52/100 [00:31<00:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0618:  53%|█████▎    | 53/100 [00:32<00:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0616:  54%|█████▍    | 54/100 [00:33<00:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0615:  55%|█████▌    | 55/100 [00:33<00:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0613:  56%|█████▌    | 56/100 [00:34<00:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0612:  57%|█████▋    | 57/100 [00:34<00:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0611:  58%|█████▊    | 58/100 [00:35<00:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0609:  59%|█████▉    | 59/100 [00:36<00:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0607:  60%|██████    | 60/100 [00:36<00:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0607:  61%|██████    | 61/100 [00:37<00:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0605:  62%|██████▏   | 62/100 [00:37<00:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0603:  63%|██████▎   | 63/100 [00:38<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0602:  64%|██████▍   | 64/100 [00:39<00:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0602:  65%|██████▌   | 65/100 [00:39<00:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.06:  66%|██████▌   | 66/100 [00:40<00:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.06:  67%|██████▋   | 67/100 [00:41<00:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0613:  68%|██████▊   | 68/100 [00:41<00:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0612:  69%|██████▉   | 69/100 [00:42<00:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.061:  70%|███████   | 70/100 [00:42<00:18,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0609:  71%|███████   | 71/100 [00:43<00:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0607:  72%|███████▏  | 72/100 [00:44<00:16,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0631:  73%|███████▎  | 73/100 [00:44<00:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0629:  74%|███████▍  | 74/100 [00:45<00:15,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0628:  75%|███████▌  | 75/100 [00:45<00:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0626:  76%|███████▌  | 76/100 [00:46<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0624:  77%|███████▋  | 77/100 [00:47<00:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0623:  78%|███████▊  | 78/100 [00:47<00:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0621:  79%|███████▉  | 79/100 [00:48<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0619:  80%|████████  | 80/100 [00:48<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0618:  81%|████████  | 81/100 [00:49<00:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0616:  82%|████████▏ | 82/100 [00:50<00:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0615:  83%|████████▎ | 83/100 [00:50<00:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0615:  84%|████████▍ | 84/100 [00:51<00:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0613:  85%|████████▌ | 85/100 [00:52<00:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0613:  86%|████████▌ | 86/100 [00:52<00:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0612:  87%|████████▋ | 87/100 [00:53<00:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0611:  88%|████████▊ | 88/100 [00:53<00:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.061:  89%|████████▉ | 89/100 [00:54<00:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0608:  90%|█████████ | 90/100 [00:55<00:06,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0607:  91%|█████████ | 91/100 [00:55<00:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0605:  92%|█████████▏| 92/100 [00:56<00:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0604:  93%|█████████▎| 93/100 [00:56<00:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0603:  94%|█████████▍| 94/100 [00:57<00:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0601:  95%|█████████▌| 95/100 [00:58<00:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.06:  96%|█████████▌| 96/100 [00:58<00:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0598:  97%|█████████▋| 97/100 [00:59<00:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0598:  98%|█████████▊| 98/100 [00:59<00:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0596:  99%|█████████▉| 99/100 [01:00<00:00,  1.62it/s]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 10, train loss: 0.0594: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]
epoch: 10, valid loss: 4.0365:   4%|▍         | 2/51 [00:00<00:06,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.9851:   8%|▊         | 4/51 [00:00<00:06,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.9084:  12%|█▏        | 6/51 [00:00<00:05,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.877:  16%|█▌        | 8/51 [00:01<00:05,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.8554:  20%|█▉        | 10/51 [00:01<00:05,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.7889:  24%|██▎       | 12/51 [00:01<00:05,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.7755:  27%|██▋       | 14/51 [00:01<00:04,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.744:  31%|███▏      | 16/51 [00:02<00:04,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.7013:  35%|███▌      | 18/51 [00:02<00:04,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.6393:  39%|███▉      | 20/51 [00:02<00:04,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.5677:  43%|████▎     | 22/51 [00:02<00:03,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.5675:  47%|████▋     | 24/51 [00:03<00:03,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.5469:  51%|█████     | 26/51 [00:03<00:03,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.5525:  55%|█████▍    | 28/51 [00:03<00:03,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.554:  59%|█████▉    | 30/51 [00:03<00:02,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.4855:  63%|██████▎   | 32/51 [00:04<00:02,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.4451:  67%|██████▋   | 34/51 [00:04<00:02,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.4095:  71%|███████   | 36/51 [00:04<00:01,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.3956:  75%|███████▍  | 38/51 [00:04<00:01,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.3509:  78%|███████▊  | 40/51 [00:05<00:01,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.3:  82%|████████▏ | 42/51 [00:05<00:01,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.2352:  86%|████████▋ | 44/51 [00:05<00:00,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.2098:  90%|█████████ | 46/51 [00:06<00:00,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.1727:  94%|█████████▍| 48/51 [00:06<00:00,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.137:  98%|█████████▊| 50/51 [00:06<00:00,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 3.1059: 100%|██████████| 51/51 [00:06<00:00,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[[{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 4, 'label': 3}], [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}], [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], [{'prediction': 1, 'label': 2}, {'prediction': 1, 'label': 1}], [{'prediction': 3, 'label': 3}, {'prediction': 0, 'label': 1}], [




In [None]:
1. huggingface에서 다른 모델 찾아서 그냥 돌아가는 거면 아무거나 돌려보기(automodel 바꿔서)
2. 지금 코드 자체가 문제 없는지 확인해보기 (loss 바꿔보거나... 데이터 줄여보거나...)
3. 길이 늘려보기 (2048) => 아마도 메모리 이슈 있음 => 이 경우 배치 사이즈 줄이고, 어큐뮬레이션 늘려보기
4. 여기까지 문제 없는 거 확인하면 1000개 채우는대로 최고 모델 세팅 찾아서 auto labeling 해서 나머지 다 채우기
=> 최종적으로 10,000개 채우고, 모델 대충 4~5개정도 베이스라인 비교해서 논문 작성