In [1]:
!pip install transformers
!pip install datasets #데이터 세트 다운로드 Hugging Face 연동

Collecting transformers
  Downloading transformers-4.32.1-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m57.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m111.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m78.6 MB/s[0m eta [36m0:00:

In [2]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


In [3]:
import copy # 특정한 파이썬 객체를 통째로 메모리에 copy할 때
import json # json 형식으로 데이터를 표현할 때
import logging # 학습 과정 등 전반적인 프로그램의 진행 상황을 로깅할 때
import os # 파일 입출력 등 현재 컴퓨터에 대한 기능 수행할 때

# 경고(warning) 메시지가 너무 많이 나오는 것을 대비하여 무시 처리
import warnings
warnings.filterwarnings("ignore")

# 로깅할 때 기본적으로 오류(error) 사항으로 로그 메시지를 남기겠다는 의미
import logging
logging.basicConfig(level=logging.ERROR)

# 벡터, 행렬 등의 처리를 위한 NumPy, 테이블(엑셀) 형식의 데이터 처리할 때 Pandas
import numpy as np
import pandas as pd

from datasets import load_dataset
# train_test_split: 별도로 구분된 validation 세트가 없을 때
# 학습 데이터 세트에서 일부를 train과 validation으로 나눌 때 자주 사용 (8:2 정도로 나눔)
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torch

### 학습한 모델 관련 라이브러리 불러오기

In [4]:
import transformers
# Auto Model For Sequence Classification: 텍스트 분류를 위한 모델 → Cross-Entropy loss 사용
from transformers import AutoConfig, AutoModel, AutoModelForSequenceClassification, AutoTokenizer
# linear_schedule_with_warmup: 단계적으로 learning rate 줄여나가는 방법
# AdamW: SGD와 같이 optimization 방법 중 하나
from transformers import AdamW, get_linear_schedule_with_warmup

### 우리가 쓸 모델

- KoBigBird를 사용하고, 다음과 같은 형태로 사용 가능
- KoBigBird: BigBird 특유의 sparse attention 사용 (default)

In [5]:
from transformers import AutoModel, AutoTokenizer

# by default its in `block_sparse` mode with num_random_blocks=3, block_size=64
# 이름에서부터 알 수 있듯이 KoBigBird는 BERT 기반의 모델
model = AutoModel.from_pretrained("monologg/kobigbird-bert-base")

# Tokenizer도 마찬가지로 BERT 기반에서 가져온 것을 확인
tokenizer = AutoTokenizer.from_pretrained("monologg/kobigbird-bert-base")
text = "한국어 BigBird 모델을 공개합니다!"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

Downloading (…)lve/main/config.json:   0%|          | 0.00/870 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/458M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/373 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/241k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/492k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/169 [00:00<?, ?B/s]

Attention type 'block_sparse' is not possible if sequence_length: 12 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...


### 실험을 위한 하이퍼 파라미터 설정

In [6]:
# SimpleNamespace는 온점(.)으로 속성의 값을 정의할 수 있도록 해주는 라이브러리
# config.task = "cls"라고 하면, 나중에 print(config.task)했을 때 "cls"라고 출력
from types import SimpleNamespace

config = SimpleNamespace()

config.task = "cls"
config.dataset = "comment"

config.cache_dir = "cache" # 현재 데이터 세트에 대한 단어집 등 현재 task를 위한 임시적인 폴더
config.output_dir = "output" # 최종적인 모델이 저장되고, 결과가 저장되는 폴더

config.use_tpu = False
config.model_name_or_path = "monologg/kobigbird-bert-base" # Model name or path (HuggingFace에서 불러와 사용할 모델 이름)
config.data_dir = "./" # The input data dir ("10000_labeled.csv"가 있는 경로)

# 실질적으로 학습을 위해서는 tokenizing이 수행된 train file과 predict file을 만들어야 한다.
config.train_file = "joongang.csv" # 미리 준비된 학습 데이터 세트 경로
# 어차피 10000_labeled.csv를 (1) training 목적, (2) validatoin 목적으로 쪼개니까 평가는 validation으로 결과가 나올 것임
config.predict_file = "joongang.csv" # 미리 준비된 평가 데이터 세트 경로
# → 이거 일단 없으면, 지금처럼 train_file로 넣으시되, 나중에 생기시면 갈아끼우기

config.max_seq_length = 3072 # The maximum total input sequence length after tokenization. (최대 토큰 길이)
config.train_batch_size = 4 # Batch size for training. (학습할 때 batch_size)
config.eval_batch_size = 2 # Batch size for evaluation. (평가할 때 batch_size)

config.learning_rate = 3e-5 # The initial learning rate for Adam. (Adam optimizer에서 쓸 learning rate)
config.num_train_epochs = 10 # Total number of training epochs to perform. (전체 학습 epoch 수)

config.num_labels = 5 # 현재 task에서 선호도(1: 극진보, 2: 진보, 3: 중립, 4: 보수, 5: 극보수)의 개수는 5개이므로
# 5 classes multi-class classification 문제로 이해 할 수 있음
config.gradient_accumulation_steps = 2 # Number of updates steps to accumulate before performing a backward/update pass.
# batch_size가 큰 것처럼 처리하기 위해서, backward()를 매 번 수행하지 않고, gradient를 누적(acculmulation)하는 것

config.threads = 4
config.seed = 42 # random seed for initialization

config.do_train = True # Whether to run training.
config.do_eval_during_train = True
config.do_eval = True # Whether to run prediction.

config.do_lower_case = False
config.weight_decay = 0.0 # Weight decay if we apply some.
config.adam_epsilon = 1e-8 # Epsilon for Adam optimizer.
config.max_grad_norm = 1.0 # Max gradient norm.
config.warmup_proportion = 0.0 # Warmup proportion for linear warmup
# BigBird에서는 full attention을 하면, 메모리는 조금 더 소모되지만, 더 정확도가 향상
# config.attention_type = "original_full"

### 학습 데이터 전처리
- 학습 text를 매번 tokenization을 하지 않고, 모델 학습 시작전에 미리 모든 텍스트를 tokenization 한 결과를 저장한다.

In [7]:
# 본 실습에서 사용할 tokenizer 객체 초기화
tokenizer = AutoTokenizer.from_pretrained(config.model_name_or_path, cache_dir=config.cache_dir)

Downloading (…)okenizer_config.json:   0%|          | 0.00/373 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/241k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/492k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/169 [00:00<?, ?B/s]

In [8]:
def train_split(config, texts, labels, is_train):
    # 바로 여기에서 train 데이터 세트가 8:2로 training과 validation이 나누어 진다.
    # 지금 평가 결과는 validation에 대한 결과
    # [오류] stratify가 labels면, test_dataset에 특정 레이블이 아예 등장하지 않으면 오류 발생
    """
    x_train, y_train, x_label, y_label = train_test_split(
        texts, labels, test_size=0.2, random_state=config.seed, stratify=labels
    )
    """
    x_train, y_train, x_label, y_label = train_test_split(
        texts, labels, test_size=0.2, random_state=config.seed, stratify=None
    )
    if is_train:
        texts, labels = x_train, x_label
    else:
        texts, labels = y_train, y_label
    return texts, labels

# 댓글(comment)이 담긴 .csv 파일이 있을 때, 여기에서 텍스트와 레이블 추출
def process_comment_cls(config, data_file, is_train):
    df = pd.read_csv(data_file)
    df = df.dropna(subset=['title', 'content', 'label1', 'label2'])

    # 매 줄에서 "label1(정치성향)", "label2(편향여부)" 열 추출
    politics = (df["label1"] - 1).astype(int).values.tolist()
    governments = (df["label2"] - 1).astype(int).values.tolist()
    labels = []
    # 한 줄씩 데이터를 확인하며
    for i in range(len(politics)):
        politic = politics[i] # 선호도
        government = governments[i] # 비속어
        labels.append([politic, government])
    print(len(labels))

    # title과 content를 합쳐서 texts로 표현
    texts = (df["title"] + " " + df["content"]).astype(str).values.tolist()

    texts, labels = train_split(config, texts, labels, is_train)
    return texts, labels

### 데이터 토큰화
- 주어진 데이터를 토큰화하고, 토큰화된 데이터를 파일에 저장하는 함수를 정의.
- 주어진 데이터는 텍스트와 레이블로 구성되어 있으며, 텍스트는 토큰화되고, 레이블은 정수로 변환.
- 토큰화된 데이터와 변환된 레이블은 JSON 형식으로 파일에 저장

In [9]:
import torch.utils.data as torch_data

def data_pretokenizing(config, tokenizer, is_train=True):
    if is_train:
        data_file = config.train_file
    else:
        data_file = config.predict_file

    data_path = config.data_dir
    if data_file is not None:
        data_path = os.path.join(data_path, data_file)
    else:
        data_path += "/"

    # 실제로 tokenizer를  저장될 데이터 세트의 파일 이름이 바로 dataset_file
    comps = [
        data_path,
        config.dataset,
        config.model_name_or_path.replace("/", "_"),
        config.max_seq_length,
        "train" if is_train else "dev",
        "dataset.txt",
    ]
    dataset_file = "_".join([str(comp) for comp in comps])
    print("dataset_file:", dataset_file)

    # 텍스트 문장을 읽어와서 token 값만 저장
    with open(dataset_file, "w", encoding="utf-8") as writer_file:
        # data: "joongang.csv" 파일에서 읽어와 (텍스트, 선호도 레이블)만 남긴 .csv 파일
        cnt = 0
        total_data = process_comment_cls(config, data_path, is_train)
        # 학습 데이터 세트를 하나씩 확인하며
        for text, label in zip(total_data[0], total_data[1]):
            # 여기에서 data는 하나의 (텍스트, 레이블) 쌍
            # feature는 해당 텍스트를 tokenizer에 넣어서 나온 결과
            feature = tokenizer(text, max_length=config.max_seq_length, padding="max_length", truncation=True, add_special_tokens=True)
            # 실제로 학습을 위해서는 (각 토큰의 index, 정답 레이블)로 학습을 진행
            writed_data = {
                "input_ids": feature["input_ids"],
                "attention_mask": feature["attention_mask"],
                "politic": int(float(label[0])), # "2.0" → 2.0 → 2
                "government": int(float(label[1])), # "2.0" → 2.0 → 2
              }
            # JSON은 쉽게 말하면 Python에서 dictionary와 같음 → 이를 file로 저장하는 것
            writer_file.write(json.dumps(writed_data) + "\n")
            cnt += 1
        print(f"{cnt} features processed from {data_path}")

    return dataset_file


In [10]:
# 본 코드에서 학습을 수행하려는 경우
if config.do_train:
    # 학습 데이터 세트 전처리
    train_dataset_file = data_pretokenizing(config, tokenizer=tokenizer)

# 평가 데이터 세트 전처리(validation = dev 같은 의미)
predict_dataset_file = data_pretokenizing(config, tokenizer=tokenizer, is_train=False)

# 결과적으로 만들어진 "./10000_labeled.csv_comment_monologg_kobigbird-bert-base_1024_train_dataset.txt"
# 내용을 확인해 보면, 약 8,000개의 각 학습 데이터에 대하여
#   → 하나씩 {"input_ids", "attention_mask", "preference", "slang"}으로 구성

dataset_file: ./joongang.csv_comment_monologg_kobigbird-bert-base_3072_train_dataset.txt
499
399 features processed from ./joongang.csv
dataset_file: ./joongang.csv_comment_monologg_kobigbird-bert-base_3072_dev_dataset.txt
499
100 features processed from ./joongang.csv


### 데이터로더 초기화

#### 데이터패딩

In [11]:
class IterableDatasetPad(torch.utils.data.IterableDataset):
    def __init__(
        self,
        dataset: torch.utils.data.IterableDataset,
        batch_size: int = 1,
        num_devices: int = 1,
        seed: int = 0,
    ):
        self.dataset = dataset
        self.batch_size = batch_size
        self.seed = seed
        self.num_examples = 0

        chunk_size = self.batch_size * num_devices
        length = len(dataset)
        self.length = length + (chunk_size - length % chunk_size)

    def __len__(self):
        return self.length

    def __iter__(self):
        self.num_examples = 0
        if (
            not hasattr(self.dataset, "set_epoch")
            and hasattr(self.dataset, "generator")
            and isinstance(self.dataset.generator, torch.Generator)
        ):
            self.dataset.generator.manual_seed(self.seed + self.epoch)

        first_batch = None
        current_batch = []
        for element in self.dataset:
            self.num_examples += 1
            current_batch.append(element)
            # Wait to have a full batch before yielding elements.
            if len(current_batch) == self.batch_size:
                for batch in current_batch:
                    yield batch
                    if first_batch is None:
                        first_batch = batch.copy()
                current_batch = []

        while self.num_examples < self.length:
            add_num = self.batch_size - len(current_batch)
            self.num_examples += add_num
            current_batch += [first_batch] * add_num
            for batch in current_batch:
                yield batch
            current_batch = []

#### 전처리된 데이터를 DataLoader로 불러옴

In [12]:
# 전처리된 데이터는 하나하나 {"input_ids", "attention_mask", "labels", ...} 형태를 가짐
# PyTorch가 하나의 배치를 처리할 때는 PyTorch Tensor 형태여야 함
# <데이터 로더에서 불러오는 "Tensor"를 정의하는 함수>
def collate_fn(features):
    input_ids = [sample["input_ids"] for sample in features]
    attention_mask = [sample["attention_mask"] for sample in features]

    politic = [sample["politic"] for sample in features]
    government = [sample["government"] for sample in features]

    input_ids = torch.tensor(np.array(input_ids).astype(np.int64), dtype=torch.long)
    attention_mask = torch.tensor(np.array(attention_mask).astype(np.int8), dtype=torch.long)
    politic = torch.tensor(np.array(politic).astype(np.int64), dtype=torch.long)
    government = torch.tensor(np.array(government).astype(np.int64), dtype=torch.long)
    inputs = {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
    }
    labels = {
        "politic": politic,
        "government" : government
    }
    return inputs, labels

# 본 코드에서 학습을 수행하려는 경우
if config.do_train:
    # 학습 데이터 로더 초기화
    train_dataset = load_dataset("text", data_files=train_dataset_file, download_mode="force_redownload")["train"]
    train_dataset = train_dataset.map(lambda x: json.loads(x["text"]), batched=False)

    train_dataloader = torch_data.DataLoader(
        train_dataset,
        sampler=torch_data.RandomSampler(train_dataset),
        drop_last=False,
        batch_size=config.train_batch_size,
        collate_fn=(collate_fn),
    )

# 평가 데이터 세트 전처리(validation = dev 같은 의미)
predict_dataset = load_dataset("text", data_files=predict_dataset_file, download_mode="force_redownload")["train"]
predict_dataset = predict_dataset.map(lambda x: json.loads(x["text"]), batched=False)
predict_dataset = IterableDatasetPad(
    dataset=predict_dataset,
    batch_size=config.eval_batch_size,
    num_devices=1,
    seed=config.seed,
)

predict_dataloader = torch_data.DataLoader(
    predict_dataset,
    sampler=None,
    drop_last=False,
    batch_size=config.eval_batch_size,
    collate_fn=(collate_fn),
)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/399 [00:00<?, ? examples/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

### 텍스트 분류 모델 정의

In [13]:
from transformers import AutoModel
import torch.nn as nn

# 텍스트 분류 모델 정의
class ClsModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # (전처리된) 학습할 데이터 세트의 경로 설정
        data_file = os.path.join(config.data_dir, str(config.train_file))
        # 사전 학습된 모델 가중치 불러오기
        model_config = AutoConfig.from_pretrained(config.model_name_or_path, num_labels=config.num_labels)
        self.model = AutoModel.from_pretrained(
            config.model_name_or_path, config=model_config, cache_dir=config.cache_dir
        )
        self.classifier1 = nn.Linear(768, 5)
        self.classifier2 = nn.Linear(768, 5)

        # BERT model과 별개로, 입력 문자열을 토큰의 index로 바꾸어주는 tokenizer가 사용됨
        self.tokenizer = tokenizer

    # "학습된 모델"을 save_dir에 저장하는 함수
    def save_pretrained(self, save_dir):
        self.model.save_pretrained(save_dir)
        # Tokenizer는 기본적으로 "special_tokens_map_file", "tokenizer_file"을 가질 수 있음
        # 이러한 값을 제거한 뒤에 save_dir에 저장하겠다는 의미
        for key in ["special_tokens_map_file", "tokenizer_file"]:
            self.tokenizer.init_kwargs.pop(key, None)
        self.tokenizer.save_pretrained(save_dir)

    def get_optimizer(self): # 현재 모델을 학습하기 위한 최적화 방법(AdamW) 객체를 불러오는 함수
        # bias랑 LayerNorm에는 decay 적용하지 않겠다는 의미
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": config.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": config.weight_decay,
            },
        ]
        # AdamW의 첫 번째 인자(params)는 "학습할 가중치", weight_decay는 가중치에 적용되는 regularization 기법
        optimizer = AdamW(optimizer_grouped_parameters, lr=config.learning_rate, eps=config.adam_epsilon)
        return optimizer

    def get_scheduler(self, batch_num, optimizer): # AdamW로 학습할 때, learning rate을 단계적으로 줄이기 위한 함수
        if config.warmup_proportion == 0.0:
            return None

        t_total = batch_num // config.gradient_accumulation_steps * config.num_train_epochs

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=int(t_total * config.warmup_proportion),
            num_training_steps=t_total,
        )

        return scheduler

    def forward(self, inputs):
        # print(inputs) => {"input_ids", "attention_mask"}
        # https://huggingface.co/docs/transformers/model_doc/big_bird
        # BigBirdModel의 입력 양식에 맞게 넣어주어야 오류가 없음
        # BigBirdModel로 "input_ids", "attention_mask" 등 "미리 정해진" 규격에 맞는 입력만
        # 들어가야 오류가 없다는 의미 => 그러므로, preference, slang 등은 들어가면 X
        hidden = self.model(**inputs)
        # print(hidden.last_hidden_state.shape) # torch.Size([4 (batch_size), 1024 (seq_len), 768 (embedding_size)])
        # 마지막 레이어의 [CLS] 토큰만 가져오기
        cls_token_embeddings = hidden.last_hidden_state[:,0,:] # [batch_size, 768]
        # print(cls_token_embeddings.shape)
        output_1 = self.classifier1(cls_token_embeddings) # 768 → 5
        output_2 = self.classifier2(cls_token_embeddings) # 768 → 5
        print(output_1.shape) # [batch_size, 5]
        print(output_2.shape) # [batch_size, 5]

        return output_1, output_2

    def eval_step(self, inputs, labels, outputs):
        logits_1 = outputs[0].detach().cpu()
        logits_2 = outputs[1].detach().cpu()
        labels_1 = self.tensor_to_list(labels["politic"])
        labels_2 = self.tensor_to_list(labels["government"])
        predictions_1 = self.tensor_to_list(torch.argmax(logits_1, dim=-1))
        predictions_2 = self.tensor_to_list(torch.argmax(logits_2, dim=-1))
        results_1 = [{"prediction": prediction, "label": label} for prediction, label in zip(predictions_1, labels_1)]
        results_2 = [{"prediction": prediction, "label": label} for prediction, label in zip(predictions_2, labels_2)]
        return {"results_1": results_1, "results_2": results_2}





    # PyTorch의 Tensor 객체를 NumPy 객체로 변환
    def tensor_to_array(self, tensor):
        return tensor.detach().cpu().numpy()

    # PyTorch의 Tensor 객체를 Python의 리스트(list) 자료형으로 변환
    def tensor_to_list(self, tensor):
        return self.tensor_to_array(tensor).tolist()

In [14]:
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

def cal_running_avg_loss(loss, running_avg_loss, decay=0.99):
    if running_avg_loss == 0:
        return loss
    running_avg_loss = running_avg_loss * decay + (1 - decay) * loss
    return running_avg_loss

### 모델 학습 및 평가 라이브러리


In [15]:
from functools import partial
import sklearn.metrics as sklearn_metrics

"""binary_metrics = {
    "accuracy": sklearn_metrics.accuracy_score,
    "precision": sklearn_metrics.precision_score, # TP / (TP + FP)
    "recall": sklearn_metrics.recall_score, # recall = sensitivity (민감도)
    "f1": sklearn_metrics.f1_score,
    "matthews_corrcoef": sklearn_metrics.matthews_corrcoef,
    "roc_auc": sklearn_metrics.roc_auc_score,
}""" # 우리는 두가지 task 다 다중분류임으로 안씀

metrics = {
    "accuracy": sklearn_metrics.accuracy_score,
    "f1-macro": partial(sklearn_metrics.f1_score, average="macro"),
}


def eval_cls(results_1, results_2, **kwargs):
    predictions_1 = np.array([result["prediction"] for result in results_1])
    labels_1 = np.array([result["label"] for result in results_1])
    predictions_2 = np.array([result["prediction"] for result in results_2])
    labels_2 = np.array([result["label"] for result in results_2])
    results_1 = {
        metric: round(f(labels_1, predictions_1) * 100, 2)
        for metric, f in metrics.items()
    }
    results_2 = {
        metric: round(f(labels_2, predictions_2) * 100, 2)
        for metric, f in metrics.items()
    }

    return {
        "results_1": results_1,
        "results_2": results_2,
        "best_score_1": results_1["f1-macro"],
        "best_score_2": results_2["f1-macro"],
    }


### Epoch 동안 학습 및 평가를 수행하는 함수 정의

In [16]:
def _run_epoch(model, loader, device=None, context=None, **kwargs):
    config = kwargs["config"]
    is_train = kwargs["is_train"]

    avg_loss = 0
    results = []
    batch_num = len(loader)

    if is_train:
        model.train()
        if config.use_tpu:
            optimizer = context.getattr_or("optimizer", lambda: model.get_optimizer())
            scheduler = context.getattr_or("scheduler", lambda: model.get_scheduler(batch_num, optimizer))
        else:
            optimizer = kwargs["optimizer"]
            scheduler = kwargs["scheduler"]
    else:
        model.eval()

    is_master = True

    pbar = tqdm(enumerate(loader), total=batch_num, disable=not is_master, dynamic_ncols=True, position=0, leave=True)

    corrected_1 = 0
    corrected_2 = 0
    total = 0

    for i, (inputs, labels) in pbar:
        # inputs: {"input_ids": [batch_size(4), seq_len, 768], "attention_mask": [batch_size(4), seq_len, 768]}
        # labels: {"preference": [batch_size(4), 1], "slang": [batch_size(4), 1], "politic": [batch_size(4), 1]}
        if not config.use_tpu:
            # (k, v) => ("input_ids", value)
            # (k, v) => ("attention_mask", value)
            for k, v in inputs.items():
                if isinstance(v, torch.Tensor):
                    inputs[k] = v.to(device)
            for k, v in labels.items():
                if isinstance(v, torch.Tensor):
                    labels[k] = v.to(device)

        outputs = model(inputs)


        outputs_1 = outputs[0]
        outputs_2 = outputs[1]

        labels_1 = labels["politic"]
        labels_2 = labels["government"]

        loss_function_1 = nn.CrossEntropyLoss()
        loss_1 = loss_function_1(outputs_1, labels_1)

        total += outputs_1.shape[0]

        _, predicted_1 = outputs_1.max(1)
        corrected_1 += predicted_1.eq(labels_1).sum().item()

        loss_function_2 = nn.CrossEntropyLoss()
        loss_2 = loss_function_2(outputs_2, labels_2)

        _, predicted_2 = outputs_2.max(1)
        corrected_2 += predicted_2.eq(labels_2).sum().item()

        w_1 = 1
        w_2 = 1
        loss = w_1 * loss_1 + w_2 * loss_2

        avg_loss = cal_running_avg_loss(loss.item(), avg_loss)
        loss /= config.gradient_accumulation_steps

        if is_train:
            loss.backward()
            if i % config.gradient_accumulation_steps == 0 or i == batch_num - 1:
                if config.max_grad_norm > 0:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm)

                optimizer.step()
                optimizer.zero_grad()

                if scheduler is not None:
                    scheduler.step()
        else:
            result = (model.module if hasattr(model, "module") else model).eval_step(inputs, labels, outputs)
            results.append(result)

        if is_master:
            pbar.set_description(f"epoch: {kwargs['epoch'] + 1}, {('train' if is_train else 'valid')} loss: {min(100, round(avg_loss, 4))}")

    return {
        "loss": avg_loss,
        "result": results,
    }


# 학습 코드에서 호출하는 함수
def run_epoch(**kwargs):
    model = kwargs.pop("model")
    if kwargs["config"].use_tpu:
        results = model(_run_epoch, **kwargs)
    else:
        results = _run_epoch(model, **kwargs)

    if isinstance(results, list):
        loss = sum([result["loss"] for result in results]) / len(results)
        result = []
        for res in results:
            result.extend(res["result"])
        results = {"loss": loss, "result": result}

    return results

### 딥러닝 모델 초기화 및 설정

In [17]:
# 현재 모델 이름이 "monologg/kobigbird-bert-base" 이므로, Hugging Face에서 찾아서 불러옴
set_seed(config.seed)

# 딥러닝 모델 초기화
model = ClsModel()

print(f"configuration: {str(config)}")

if torch.cuda.is_available(): # GPU를 사용할 수 있다면
    gpu_count = torch.cuda.device_count()
    print(f"{gpu_count} GPU device detected")
    devices = ["cuda:{}".format(i) for i in range(gpu_count)]
    model_dp = torch.nn.DataParallel(model, device_ids=devices)
    model.to(devices[0])
else: # GPU를 사용할 수 없다면 CPU로 구동
    devices = ["cpu"]
    model_dp = model

# 학습 결과를 저장하기 위한 폴더 만들기
if not os.path.exists(config.cache_dir):
    os.makedirs(config.cache_dir)

output_dir = os.path.join(config.output_dir, config.task, config.dataset)
print("Output directory:", output_dir)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 실제 학습을 위해 optimizer 및 scheduler 초기화
optimizer = None
scheduler = None
if config.do_train: # 학습 모드(train mode)인 경우
    optimizer = model.get_optimizer()
    scheduler = model.get_scheduler(len(train_dataloader), optimizer)

params = {
    "config": config,
    "model": model_dp,
    "optimizer": optimizer,
    "scheduler": scheduler,
}
if not config.use_tpu:
    params["device"] = devices[0]

Downloading model.safetensors:   0%|          | 0.00/458M [00:00<?, ?B/s]

configuration: namespace(task='cls', dataset='comment', cache_dir='cache', output_dir='output', use_tpu=False, model_name_or_path='monologg/kobigbird-bert-base', data_dir='./', train_file='joongang.csv', predict_file='joongang.csv', max_seq_length=3072, train_batch_size=4, eval_batch_size=2, learning_rate=3e-05, num_train_epochs=10, num_labels=5, gradient_accumulation_steps=2, threads=4, seed=42, do_train=True, do_eval_during_train=True, do_eval=True, do_lower_case=False, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, warmup_proportion=0.0)
1 GPU device detected
Output directory: output/cls/comment


In [18]:
def do_eval(epoch):
    with torch.no_grad():
        results = run_epoch(loader=predict_dataloader, epoch=epoch, is_train=False, **params)["result"]
        print(results)
        results_1 = [result['results_1'] for result in results]
        results_2 = [result['results_2'] for result in results]

        results_1 = [item for sublist in results_1 for item in sublist]
        results_2 = [item for sublist in results_2 for item in sublist]

        eval_results = eval_cls(
            config=config,
            model=model,
            loader=predict_dataloader,
            tokenizer=model.tokenizer,
            results_1=results_1,
            results_2=results_2,
        )

    print("Eval results for output 1.")
    for k, v in eval_results["results_1"].items():
        print(f"{k} : {v}")

    print("Eval results for output 2.")
    for k, v in eval_results["results_2"].items():
        print(f"{k} : {v}")

    return eval_results["best_score_1"], eval_results["best_score_2"]

train_losses = []
val_accuracies = []
if config.do_train:
    best_score = (0, 0)
    for epoch in range(config.num_train_epochs):
        train_results = run_epoch(loader=train_dataloader, epoch=epoch, is_train=True, **params)
        train_loss = train_results['loss']
        train_losses.append(train_loss)

        if config.do_eval_during_train:
            score1, score2 = do_eval(epoch)
            val_accuracies.append((score1, score2))

            if score1 >= best_score[0] and score2 >= best_score[1]:
                best_score = (score1, score2)
                output_dir = os.path.join(config.output_dir, config.task, config.dataset, f"{epoch}-{best_score[0]}-{best_score[1]}-ckpt")
                copy.deepcopy(
                    model_dp.module
                    if hasattr(model_dp, "module")
                    else model_dp._models[0]
                    if hasattr(model_dp, "_models")
                    else model_dp
                ).cpu().save_pretrained(output_dir)
                with open(os.path.join(output_dir, "finetune_config.json"), "w") as save_config:
                    json.dump(vars(config), save_config, sort_keys=True, indent=4)
                print(f"Checkpoint {output_dir} saved.")


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.961:   1%|          | 1/100 [00:05<09:14,  5.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9565:   2%|▏         | 2/100 [00:06<05:01,  3.08s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9537:   3%|▎         | 3/100 [00:08<03:42,  2.29s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9495:   4%|▍         | 4/100 [00:09<03:03,  1.91s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9491:   5%|▌         | 5/100 [00:10<02:42,  1.71s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.943:   6%|▌         | 6/100 [00:12<02:27,  1.57s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9448:   7%|▋         | 7/100 [00:13<02:19,  1.50s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9357:   8%|▊         | 8/100 [00:14<02:12,  1.44s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.928:   9%|▉         | 9/100 [00:16<02:08,  1.41s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9167:  10%|█         | 10/100 [00:17<02:04,  1.39s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9177:  11%|█         | 11/100 [00:18<02:02,  1.38s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9131:  12%|█▏        | 12/100 [00:20<01:59,  1.36s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9045:  13%|█▎        | 13/100 [00:21<01:58,  1.36s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8963:  14%|█▍        | 14/100 [00:22<01:55,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.904:  15%|█▌        | 15/100 [00:24<01:55,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8906:  16%|█▌        | 16/100 [00:25<01:52,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8953:  17%|█▋        | 17/100 [00:26<01:51,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8881:  18%|█▊        | 18/100 [00:28<01:49,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.893:  19%|█▉        | 19/100 [00:29<01:48,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8919:  20%|██        | 20/100 [00:30<01:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8836:  21%|██        | 21/100 [00:32<01:45,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8796:  22%|██▏       | 22/100 [00:33<01:43,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8714:  23%|██▎       | 23/100 [00:34<01:42,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8767:  24%|██▍       | 24/100 [00:36<01:40,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8668:  25%|██▌       | 25/100 [00:37<01:39,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.861:  26%|██▌       | 26/100 [00:38<01:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8586:  27%|██▋       | 27/100 [00:40<01:37,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8496:  28%|██▊       | 28/100 [00:41<01:35,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.842:  29%|██▉       | 29/100 [00:42<01:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8322:  30%|███       | 30/100 [00:44<01:32,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8331:  31%|███       | 31/100 [00:45<01:32,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8261:  32%|███▏      | 32/100 [00:46<01:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8207:  33%|███▎      | 33/100 [00:48<01:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8137:  34%|███▍      | 34/100 [00:49<01:27,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8127:  35%|███▌      | 35/100 [00:50<01:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8037:  36%|███▌      | 36/100 [00:52<01:24,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7939:  37%|███▋      | 37/100 [00:53<01:23,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7945:  38%|███▊      | 38/100 [00:54<01:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7854:  39%|███▉      | 39/100 [00:56<01:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7753:  40%|████      | 40/100 [00:57<01:19,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7686:  41%|████      | 41/100 [00:58<01:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7645:  42%|████▏     | 42/100 [01:00<01:17,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7626:  43%|████▎     | 43/100 [01:01<01:16,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7578:  44%|████▍     | 44/100 [01:02<01:14,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7441:  45%|████▌     | 45/100 [01:04<01:13,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7295:  46%|████▌     | 46/100 [01:05<01:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7224:  47%|████▋     | 47/100 [01:06<01:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.71:  48%|████▊     | 48/100 [01:08<01:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6985:  49%|████▉     | 49/100 [01:09<01:08,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6996:  50%|█████     | 50/100 [01:10<01:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.687:  51%|█████     | 51/100 [01:12<01:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.684:  52%|█████▏    | 52/100 [01:13<01:03,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6832:  53%|█████▎    | 53/100 [01:14<01:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6682:  54%|█████▍    | 54/100 [01:16<01:00,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6525:  55%|█████▌    | 55/100 [01:17<00:59,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6562:  56%|█████▌    | 56/100 [01:18<00:58,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6449:  57%|█████▋    | 57/100 [01:20<00:57,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6291:  58%|█████▊    | 58/100 [01:21<00:55,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6268:  59%|█████▉    | 59/100 [01:22<00:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6214:  60%|██████    | 60/100 [01:24<00:53,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6098:  61%|██████    | 61/100 [01:25<00:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6005:  62%|██████▏   | 62/100 [01:26<00:50,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.588:  63%|██████▎   | 63/100 [01:28<00:49,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5738:  64%|██████▍   | 64/100 [01:29<00:47,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5708:  65%|██████▌   | 65/100 [01:30<00:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5662:  66%|██████▌   | 66/100 [01:32<00:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5657:  67%|██████▋   | 67/100 [01:33<00:43,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5585:  68%|██████▊   | 68/100 [01:34<00:42,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5566:  69%|██████▉   | 69/100 [01:36<00:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5435:  70%|███████   | 70/100 [01:37<00:39,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5401:  71%|███████   | 71/100 [01:38<00:38,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5342:  72%|███████▏  | 72/100 [01:40<00:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5255:  73%|███████▎  | 73/100 [01:41<00:36,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5165:  74%|███████▍  | 74/100 [01:42<00:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5237:  75%|███████▌  | 75/100 [01:44<00:33,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5102:  76%|███████▌  | 76/100 [01:45<00:31,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4934:  77%|███████▋  | 77/100 [01:46<00:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.489:  78%|███████▊  | 78/100 [01:48<00:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.481:  79%|███████▉  | 79/100 [01:49<00:27,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4809:  80%|████████  | 80/100 [01:50<00:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4733:  81%|████████  | 81/100 [01:52<00:25,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4631:  82%|████████▏ | 82/100 [01:53<00:23,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4615:  83%|████████▎ | 83/100 [01:54<00:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4507:  84%|████████▍ | 84/100 [01:56<00:21,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4444:  85%|████████▌ | 85/100 [01:57<00:20,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4426:  86%|████████▌ | 86/100 [01:58<00:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4342:  87%|████████▋ | 87/100 [02:00<00:17,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4253:  88%|████████▊ | 88/100 [02:01<00:15,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4273:  89%|████████▉ | 89/100 [02:02<00:14,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4254:  90%|█████████ | 90/100 [02:04<00:13,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4146:  91%|█████████ | 91/100 [02:05<00:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3986:  92%|█████████▏| 92/100 [02:06<00:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4036:  93%|█████████▎| 93/100 [02:08<00:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3912:  94%|█████████▍| 94/100 [02:09<00:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3865:  95%|█████████▌| 95/100 [02:10<00:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3764:  96%|█████████▌| 96/100 [02:12<00:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3675:  97%|█████████▋| 97/100 [02:13<00:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3665:  98%|█████████▊| 98/100 [02:14<00:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3593:  99%|█████████▉| 99/100 [02:16<00:01,  1.34s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 1, train loss: 2.3441: 100%|██████████| 100/100 [02:17<00:00,  1.37s/it]
epoch: 1, valid loss: 1.0017:   2%|▏         | 1/51 [00:00<00:11,  4.25it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0081:   4%|▍         | 2/51 [00:00<00:11,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0125:   6%|▌         | 3/51 [00:00<00:10,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0237:   8%|▊         | 4/51 [00:00<00:10,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.038:  10%|▉         | 5/51 [00:01<00:10,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0379:  12%|█▏        | 6/51 [00:01<00:10,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0559:  14%|█▎        | 7/51 [00:01<00:10,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0629:  16%|█▌        | 8/51 [00:01<00:09,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0733:  18%|█▊        | 9/51 [00:02<00:09,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0839:  20%|█▉        | 10/51 [00:02<00:09,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0861:  22%|██▏       | 11/51 [00:02<00:09,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0863:  24%|██▎       | 12/51 [00:02<00:08,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0874:  25%|██▌       | 13/51 [00:02<00:08,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1104:  27%|██▋       | 14/51 [00:03<00:08,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1115:  29%|██▉       | 15/51 [00:03<00:08,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1132:  31%|███▏      | 16/51 [00:03<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1304:  33%|███▎      | 17/51 [00:03<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1419:  35%|███▌      | 18/51 [00:04<00:07,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1417:  37%|███▋      | 19/51 [00:04<00:07,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1515:  39%|███▉      | 20/51 [00:04<00:06,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1559:  41%|████      | 21/51 [00:04<00:06,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1577:  43%|████▎     | 22/51 [00:04<00:06,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1832:  45%|████▌     | 23/51 [00:05<00:06,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1932:  47%|████▋     | 24/51 [00:05<00:06,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2109:  49%|████▉     | 25/51 [00:05<00:05,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2193:  51%|█████     | 26/51 [00:05<00:05,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2266:  53%|█████▎    | 27/51 [00:06<00:05,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2419:  55%|█████▍    | 28/51 [00:06<00:05,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2531:  57%|█████▋    | 29/51 [00:06<00:04,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2599:  59%|█████▉    | 30/51 [00:06<00:04,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2842:  61%|██████    | 31/51 [00:07<00:04,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2982:  63%|██████▎   | 32/51 [00:07<00:04,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2954:  65%|██████▍   | 33/51 [00:07<00:04,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3021:  67%|██████▋   | 34/51 [00:07<00:03,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2974:  69%|██████▊   | 35/51 [00:07<00:03,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3064:  71%|███████   | 36/51 [00:08<00:03,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3226:  73%|███████▎  | 37/51 [00:08<00:03,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3183:  75%|███████▍  | 38/51 [00:08<00:02,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.321:  76%|███████▋  | 39/51 [00:08<00:02,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.323:  78%|███████▊  | 40/51 [00:09<00:02,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3215:  80%|████████  | 41/51 [00:09<00:02,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3219:  82%|████████▏ | 42/51 [00:09<00:02,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3208:  84%|████████▍ | 43/51 [00:09<00:01,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3155:  86%|████████▋ | 44/51 [00:09<00:01,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3204:  88%|████████▊ | 45/51 [00:10<00:01,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3216:  90%|█████████ | 46/51 [00:10<00:01,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3177:  92%|█████████▏| 47/51 [00:10<00:00,  4.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3134:  94%|█████████▍| 48/51 [00:10<00:00,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3156:  96%|█████████▌| 49/51 [00:11<00:00,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3162:  98%|█████████▊| 50/51 [00:11<00:00,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3129: 100%|██████████| 51/51 [00:11<00:00,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti




Checkpoint output/cls/comment/0-34.49-55.81-ckpt saved.


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1308:   1%|          | 1/100 [00:01<02:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1351:   2%|▏         | 2/100 [00:02<02:09,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1345:   3%|▎         | 3/100 [00:03<02:08,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1382:   4%|▍         | 4/100 [00:05<02:07,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1446:   5%|▌         | 5/100 [00:06<02:06,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1389:   6%|▌         | 6/100 [00:07<02:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1576:   7%|▋         | 7/100 [00:09<02:03,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1687:   8%|▊         | 8/100 [00:10<02:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.17:   9%|▉         | 9/100 [00:11<02:01,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1729:  10%|█         | 10/100 [00:13<01:59,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1853:  11%|█         | 11/100 [00:14<01:59,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1898:  12%|█▏        | 12/100 [00:15<01:57,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1979:  13%|█▎        | 13/100 [00:17<01:56,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1974:  14%|█▍        | 14/100 [00:18<01:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.197:  15%|█▌        | 15/100 [00:19<01:53,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2139:  16%|█▌        | 16/100 [00:21<01:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2132:  17%|█▋        | 17/100 [00:22<01:50,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2099:  18%|█▊        | 18/100 [00:23<01:48,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.21:  19%|█▉        | 19/100 [00:25<01:48,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2153:  20%|██        | 20/100 [00:26<01:47,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2228:  21%|██        | 21/100 [00:28<01:45,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2267:  22%|██▏       | 22/100 [00:29<01:43,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2268:  23%|██▎       | 23/100 [00:30<01:43,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2264:  24%|██▍       | 24/100 [00:31<01:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2262:  25%|██▌       | 25/100 [00:33<01:40,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2411:  26%|██▌       | 26/100 [00:34<01:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2362:  27%|██▋       | 27/100 [00:36<01:37,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2394:  28%|██▊       | 28/100 [00:37<01:35,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2425:  29%|██▉       | 29/100 [00:38<01:35,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2457:  30%|███       | 30/100 [00:40<01:33,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.26:  31%|███       | 31/100 [00:41<01:32,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2663:  32%|███▏      | 32/100 [00:42<01:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2737:  33%|███▎      | 33/100 [00:44<01:29,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2848:  34%|███▍      | 34/100 [00:45<01:27,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2985:  35%|███▌      | 35/100 [00:46<01:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3001:  36%|███▌      | 36/100 [00:47<01:24,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3006:  37%|███▋      | 37/100 [00:49<01:24,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2975:  38%|███▊      | 38/100 [00:50<01:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2994:  39%|███▉      | 39/100 [00:52<01:21,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3078:  40%|████      | 40/100 [00:53<01:19,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3052:  41%|████      | 41/100 [00:54<01:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3065:  42%|████▏     | 42/100 [00:55<01:16,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.317:  43%|████▎     | 43/100 [00:57<01:15,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3229:  44%|████▍     | 44/100 [00:58<01:14,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3197:  45%|████▌     | 45/100 [00:59<01:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3324:  46%|████▌     | 46/100 [01:01<01:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.327:  47%|████▋     | 47/100 [01:02<01:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3252:  48%|████▊     | 48/100 [01:03<01:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3242:  49%|████▉     | 49/100 [01:05<01:08,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3342:  50%|█████     | 50/100 [01:06<01:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.328:  51%|█████     | 51/100 [01:07<01:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3308:  52%|█████▏    | 52/100 [01:09<01:03,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3261:  53%|█████▎    | 53/100 [01:10<01:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3229:  54%|█████▍    | 54/100 [01:11<01:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3193:  55%|█████▌    | 55/100 [01:13<01:00,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3213:  56%|█████▌    | 56/100 [01:14<00:58,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3186:  57%|█████▋    | 57/100 [01:15<00:57,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3297:  58%|█████▊    | 58/100 [01:17<00:56,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3231:  59%|█████▉    | 59/100 [01:18<00:55,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.316:  60%|██████    | 60/100 [01:19<00:53,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3138:  61%|██████    | 61/100 [01:21<00:52,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.312:  62%|██████▏   | 62/100 [01:22<00:50,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3089:  63%|██████▎   | 63/100 [01:23<00:49,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3071:  64%|██████▍   | 64/100 [01:25<00:47,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3078:  65%|██████▌   | 65/100 [01:26<00:46,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3024:  66%|██████▌   | 66/100 [01:27<00:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.304:  67%|██████▋   | 67/100 [01:29<00:44,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3124:  68%|██████▊   | 68/100 [01:30<00:42,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3183:  69%|██████▉   | 69/100 [01:31<00:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.313:  70%|███████   | 70/100 [01:33<00:39,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3146:  71%|███████   | 71/100 [01:34<00:38,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3155:  72%|███████▏  | 72/100 [01:35<00:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3153:  73%|███████▎  | 73/100 [01:37<00:36,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3276:  74%|███████▍  | 74/100 [01:38<00:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3231:  75%|███████▌  | 75/100 [01:39<00:33,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3198:  76%|███████▌  | 76/100 [01:41<00:32,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3219:  77%|███████▋  | 77/100 [01:42<00:30,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3278:  78%|███████▊  | 78/100 [01:43<00:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3377:  79%|███████▉  | 79/100 [01:45<00:28,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3456:  80%|████████  | 80/100 [01:46<00:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3537:  81%|████████  | 81/100 [01:47<00:25,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3575:  82%|████████▏ | 82/100 [01:49<00:23,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.359:  83%|████████▎ | 83/100 [01:50<00:22,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3574:  84%|████████▍ | 84/100 [01:51<00:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3639:  85%|████████▌ | 85/100 [01:53<00:20,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3598:  86%|████████▌ | 86/100 [01:54<00:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3697:  87%|████████▋ | 87/100 [01:55<00:17,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3696:  88%|████████▊ | 88/100 [01:57<00:15,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3807:  89%|████████▉ | 89/100 [01:58<00:14,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3872:  90%|█████████ | 90/100 [01:59<00:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3855:  91%|█████████ | 91/100 [02:01<00:12,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3849:  92%|█████████▏| 92/100 [02:02<00:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3762:  93%|█████████▎| 93/100 [02:03<00:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3735:  94%|█████████▍| 94/100 [02:05<00:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3885:  95%|█████████▌| 95/100 [02:06<00:06,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3955:  96%|█████████▌| 96/100 [02:07<00:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4032:  97%|█████████▋| 97/100 [02:09<00:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.402:  98%|█████████▊| 98/100 [02:10<00:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4024:  99%|█████████▉| 99/100 [02:11<00:01,  1.34s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 2, train loss: 1.4026: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]
epoch: 2, valid loss: 1.407:   2%|▏         | 1/51 [00:00<00:11,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.41:   4%|▍         | 2/51 [00:00<00:11,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4061:   6%|▌         | 3/51 [00:00<00:10,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4036:   8%|▊         | 4/51 [00:00<00:10,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4131:  10%|▉         | 5/51 [00:01<00:10,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4053:  12%|█▏        | 6/51 [00:01<00:10,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4062:  14%|█▎        | 7/51 [00:01<00:09,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4021:  16%|█▌        | 8/51 [00:01<00:09,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3985:  18%|█▊        | 9/51 [00:02<00:09,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4133:  20%|█▉        | 10/51 [00:02<00:09,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4118:  22%|██▏       | 11/51 [00:02<00:09,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4078:  24%|██▎       | 12/51 [00:02<00:08,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4003:  25%|██▌       | 13/51 [00:02<00:08,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4096:  27%|██▋       | 14/51 [00:03<00:08,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4052:  29%|██▉       | 15/51 [00:03<00:08,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3999:  31%|███▏      | 16/51 [00:03<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4017:  33%|███▎      | 17/51 [00:03<00:07,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4031:  35%|███▌      | 18/51 [00:04<00:07,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3963:  37%|███▋      | 19/51 [00:04<00:07,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3984:  39%|███▉      | 20/51 [00:04<00:07,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3945:  41%|████      | 21/51 [00:04<00:06,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3902:  43%|████▎     | 22/51 [00:04<00:06,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4035:  45%|████▌     | 23/51 [00:05<00:06,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4088:  47%|████▋     | 24/51 [00:05<00:06,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4301:  49%|████▉     | 25/51 [00:05<00:05,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4274:  51%|█████     | 26/51 [00:05<00:05,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4264:  53%|█████▎    | 27/51 [00:06<00:05,  4.32it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4353:  55%|█████▍    | 28/51 [00:06<00:05,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4539:  57%|█████▋    | 29/51 [00:06<00:05,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4552:  59%|█████▉    | 30/51 [00:06<00:04,  4.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4595:  61%|██████    | 31/51 [00:07<00:04,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4626:  63%|██████▎   | 32/51 [00:07<00:04,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4563:  65%|██████▍   | 33/51 [00:07<00:04,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4606:  67%|██████▋   | 34/51 [00:07<00:03,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4532:  69%|██████▊   | 35/51 [00:07<00:03,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4574:  71%|███████   | 36/51 [00:08<00:03,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4673:  73%|███████▎  | 37/51 [00:08<00:03,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4576:  75%|███████▍  | 38/51 [00:08<00:02,  4.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4515:  76%|███████▋  | 39/51 [00:08<00:02,  4.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4472:  78%|███████▊  | 40/51 [00:09<00:02,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4374:  80%|████████  | 41/51 [00:09<00:02,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.434:  82%|████████▏ | 42/51 [00:09<00:02,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.427:  84%|████████▍ | 43/51 [00:09<00:01,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4153:  86%|████████▋ | 44/51 [00:09<00:01,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4086:  88%|████████▊ | 45/51 [00:10<00:01,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4063:  90%|█████████ | 46/51 [00:10<00:01,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.4025:  92%|█████████▏| 47/51 [00:10<00:00,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3967:  94%|█████████▍| 48/51 [00:10<00:00,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3934:  96%|█████████▌| 49/51 [00:11<00:00,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3875:  98%|█████████▊| 50/51 [00:11<00:00,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3855: 100%|██████████| 51/51 [00:11<00:00,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 2, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti




Checkpoint output/cls/comment/1-45.12-57.92-ckpt saved.


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8749:   1%|          | 1/100 [00:01<02:13,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8758:   2%|▏         | 2/100 [00:02<02:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8697:   3%|▎         | 3/100 [00:04<02:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8718:   4%|▍         | 4/100 [00:05<02:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8661:   5%|▌         | 5/100 [00:06<02:07,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8596:   6%|▌         | 6/100 [00:07<02:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8483:   7%|▋         | 7/100 [00:09<02:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8374:   8%|▊         | 8/100 [00:10<02:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8276:   9%|▉         | 9/100 [00:12<02:01,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8224:  10%|█         | 10/100 [00:13<01:59,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8116:  11%|█         | 11/100 [00:14<01:58,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8071:  12%|█▏        | 12/100 [00:15<01:56,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.8074:  13%|█▎        | 13/100 [00:17<01:56,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7989:  14%|█▍        | 14/100 [00:18<01:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7897:  15%|█▌        | 15/100 [00:20<01:53,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7827:  16%|█▌        | 16/100 [00:21<01:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7735:  17%|█▋        | 17/100 [00:22<01:51,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7711:  18%|█▊        | 18/100 [00:24<01:49,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7673:  19%|█▉        | 19/100 [00:25<01:48,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7659:  20%|██        | 20/100 [00:26<01:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7583:  21%|██        | 21/100 [00:28<01:45,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7479:  22%|██▏       | 22/100 [00:29<01:43,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.746:  23%|██▎       | 23/100 [00:30<01:42,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7483:  24%|██▍       | 24/100 [00:32<01:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7357:  25%|██▌       | 25/100 [00:33<01:40,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7259:  26%|██▌       | 26/100 [00:34<01:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7144:  27%|██▋       | 27/100 [00:36<01:37,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7087:  28%|██▊       | 28/100 [00:37<01:35,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7058:  29%|██▉       | 29/100 [00:38<01:34,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7021:  30%|███       | 30/100 [00:40<01:32,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6893:  31%|███       | 31/100 [00:41<01:32,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6779:  32%|███▏      | 32/100 [00:42<01:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6692:  33%|███▎      | 33/100 [00:44<01:29,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6699:  34%|███▍      | 34/100 [00:45<01:27,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6617:  35%|███▌      | 35/100 [00:46<01:27,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6539:  36%|███▌      | 36/100 [00:48<01:25,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6582:  37%|███▋      | 37/100 [00:49<01:25,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6523:  38%|███▊      | 38/100 [00:50<01:23,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6485:  39%|███▉      | 39/100 [00:52<01:22,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6454:  40%|████      | 40/100 [00:53<01:20,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6399:  41%|████      | 41/100 [00:54<01:19,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6374:  42%|████▏     | 42/100 [00:56<01:17,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6285:  43%|████▎     | 43/100 [00:57<01:16,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6264:  44%|████▍     | 44/100 [00:58<01:14,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6341:  45%|████▌     | 45/100 [01:00<01:13,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6298:  46%|████▌     | 46/100 [01:01<01:12,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6289:  47%|████▋     | 47/100 [01:02<01:10,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6231:  48%|████▊     | 48/100 [01:04<01:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6129:  49%|████▉     | 49/100 [01:05<01:08,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6027:  50%|█████     | 50/100 [01:06<01:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5967:  51%|█████     | 51/100 [01:08<01:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5951:  52%|█████▏    | 52/100 [01:09<01:03,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5902:  53%|█████▎    | 53/100 [01:10<01:02,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5834:  54%|█████▍    | 54/100 [01:12<01:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5815:  55%|█████▌    | 55/100 [01:13<01:00,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5695:  56%|█████▌    | 56/100 [01:14<00:58,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5576:  57%|█████▋    | 57/100 [01:16<00:57,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5548:  58%|█████▊    | 58/100 [01:17<00:55,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5548:  59%|█████▉    | 59/100 [01:18<00:54,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5544:  60%|██████    | 60/100 [01:20<00:53,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5519:  61%|██████    | 61/100 [01:21<00:52,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.541:  62%|██████▏   | 62/100 [01:22<00:50,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5388:  63%|██████▎   | 63/100 [01:24<00:49,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5307:  64%|██████▍   | 64/100 [01:25<00:47,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5203:  65%|██████▌   | 65/100 [01:26<00:46,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5112:  66%|██████▌   | 66/100 [01:28<00:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5051:  67%|██████▋   | 67/100 [01:29<00:44,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4977:  68%|██████▊   | 68/100 [01:30<00:42,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.521:  69%|██████▉   | 69/100 [01:32<00:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5218:  70%|███████   | 70/100 [01:33<00:39,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5327:  71%|███████   | 71/100 [01:34<00:38,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5259:  72%|███████▏  | 72/100 [01:36<00:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5389:  73%|███████▎  | 73/100 [01:37<00:36,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5271:  74%|███████▍  | 74/100 [01:38<00:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.524:  75%|███████▌  | 75/100 [01:40<00:33,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5193:  76%|███████▌  | 76/100 [01:41<00:31,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5148:  77%|███████▋  | 77/100 [01:42<00:30,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5135:  78%|███████▊  | 78/100 [01:44<00:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5052:  79%|███████▉  | 79/100 [01:45<00:28,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5027:  80%|████████  | 80/100 [01:46<00:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5036:  81%|████████  | 81/100 [01:48<00:25,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5119:  82%|████████▏ | 82/100 [01:49<00:24,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5097:  83%|████████▎ | 83/100 [01:50<00:22,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5067:  84%|████████▍ | 84/100 [01:52<00:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5131:  85%|████████▌ | 85/100 [01:53<00:20,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5085:  86%|████████▌ | 86/100 [01:54<00:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5089:  87%|████████▋ | 87/100 [01:56<00:17,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5007:  88%|████████▊ | 88/100 [01:57<00:15,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5098:  89%|████████▉ | 89/100 [01:58<00:14,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5002:  90%|█████████ | 90/100 [02:00<00:13,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.496:  91%|█████████ | 91/100 [02:01<00:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4905:  92%|█████████▏| 92/100 [02:02<00:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4822:  93%|█████████▎| 93/100 [02:04<00:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4763:  94%|█████████▍| 94/100 [02:05<00:08,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4712:  95%|█████████▌| 95/100 [02:06<00:06,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4721:  96%|█████████▌| 96/100 [02:08<00:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4638:  97%|█████████▋| 97/100 [02:09<00:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4533:  98%|█████████▊| 98/100 [02:10<00:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4526:  99%|█████████▉| 99/100 [02:12<00:01,  1.34s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 3, train loss: 1.4434: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]
epoch: 3, valid loss: 1.2083:   2%|▏         | 1/51 [00:00<00:11,  4.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2163:   4%|▍         | 2/51 [00:00<00:11,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2195:   6%|▌         | 3/51 [00:00<00:10,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2141:   8%|▊         | 4/51 [00:00<00:10,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2198:  10%|▉         | 5/51 [00:01<00:10,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2119:  12%|█▏        | 6/51 [00:01<00:10,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.22:  14%|█▎        | 7/51 [00:01<00:09,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2203:  16%|█▌        | 8/51 [00:01<00:09,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2243:  18%|█▊        | 9/51 [00:02<00:09,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2322:  20%|█▉        | 10/51 [00:02<00:09,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2278:  22%|██▏       | 11/51 [00:02<00:09,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2233:  24%|██▎       | 12/51 [00:02<00:08,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2178:  25%|██▌       | 13/51 [00:02<00:08,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2184:  27%|██▋       | 14/51 [00:03<00:08,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2165:  29%|██▉       | 15/51 [00:03<00:08,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2165:  31%|███▏      | 16/51 [00:03<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2102:  33%|███▎      | 17/51 [00:03<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2149:  35%|███▌      | 18/51 [00:04<00:07,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2093:  37%|███▋      | 19/51 [00:04<00:07,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2117:  39%|███▉      | 20/51 [00:04<00:06,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2051:  41%|████      | 21/51 [00:04<00:06,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.1974:  43%|████▎     | 22/51 [00:04<00:06,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2169:  45%|████▌     | 23/51 [00:05<00:06,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2174:  47%|████▋     | 24/51 [00:05<00:06,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2303:  49%|████▉     | 25/51 [00:05<00:05,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2243:  51%|█████     | 26/51 [00:05<00:05,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2252:  53%|█████▎    | 27/51 [00:06<00:05,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2411:  55%|█████▍    | 28/51 [00:06<00:05,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2628:  57%|█████▋    | 29/51 [00:06<00:04,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2652:  59%|█████▉    | 30/51 [00:06<00:04,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2745:  61%|██████    | 31/51 [00:06<00:04,  4.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2756:  63%|██████▎   | 32/51 [00:07<00:04,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2704:  65%|██████▍   | 33/51 [00:07<00:04,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2791:  67%|██████▋   | 34/51 [00:07<00:03,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2725:  69%|██████▊   | 35/51 [00:07<00:03,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2785:  71%|███████   | 36/51 [00:08<00:03,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2923:  73%|███████▎  | 37/51 [00:08<00:03,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2893:  75%|███████▍  | 38/51 [00:08<00:02,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2816:  76%|███████▋  | 39/51 [00:08<00:02,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2809:  78%|███████▊  | 40/51 [00:09<00:02,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2723:  80%|████████  | 41/51 [00:09<00:02,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2673:  82%|████████▏ | 42/51 [00:09<00:02,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.262:  84%|████████▍ | 43/51 [00:09<00:01,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2523:  86%|████████▋ | 44/51 [00:09<00:01,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2484:  88%|████████▊ | 45/51 [00:10<00:01,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2467:  90%|█████████ | 46/51 [00:10<00:01,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2436:  92%|█████████▏| 47/51 [00:10<00:00,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2396:  94%|█████████▍| 48/51 [00:10<00:00,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2377:  96%|█████████▌| 49/51 [00:11<00:00,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2329:  98%|█████████▊| 50/51 [00:11<00:00,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.2279: 100%|██████████| 51/51 [00:11<00:00,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 0, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti




Checkpoint output/cls/comment/2-60.53-61.99-ckpt saved.


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.288:   1%|          | 1/100 [00:01<02:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2795:   2%|▏         | 2/100 [00:02<02:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2782:   3%|▎         | 3/100 [00:04<02:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.268:   4%|▍         | 4/100 [00:05<02:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2621:   5%|▌         | 5/100 [00:06<02:07,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2704:   6%|▌         | 6/100 [00:07<02:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.271:   7%|▋         | 7/100 [00:09<02:03,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2698:   8%|▊         | 8/100 [00:10<02:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2653:   9%|▉         | 9/100 [00:12<02:01,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2656:  10%|█         | 10/100 [00:13<02:00,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2604:  11%|█         | 11/100 [00:14<01:59,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2598:  12%|█▏        | 12/100 [00:16<01:57,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2545:  13%|█▎        | 13/100 [00:17<01:56,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2457:  14%|█▍        | 14/100 [00:18<01:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2439:  15%|█▌        | 15/100 [00:20<01:53,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2355:  16%|█▌        | 16/100 [00:21<01:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2262:  17%|█▋        | 17/100 [00:22<01:50,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2212:  18%|█▊        | 18/100 [00:24<01:49,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2189:  19%|█▉        | 19/100 [00:25<01:48,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2208:  20%|██        | 20/100 [00:26<01:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2153:  21%|██        | 21/100 [00:28<01:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2067:  22%|██▏       | 22/100 [00:29<01:43,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2132:  23%|██▎       | 23/100 [00:30<01:42,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2087:  24%|██▍       | 24/100 [00:32<01:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2049:  25%|██▌       | 25/100 [00:33<01:40,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2017:  26%|██▌       | 26/100 [00:34<01:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2017:  27%|██▋       | 27/100 [00:36<01:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1936:  28%|██▊       | 28/100 [00:37<01:35,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1952:  29%|██▉       | 29/100 [00:38<01:35,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1907:  30%|███       | 30/100 [00:40<01:33,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1859:  31%|███       | 31/100 [00:41<01:32,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1801:  32%|███▏      | 32/100 [00:42<01:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1826:  33%|███▎      | 33/100 [00:44<01:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1854:  34%|███▍      | 34/100 [00:45<01:27,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1831:  35%|███▌      | 35/100 [00:46<01:26,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1774:  36%|███▌      | 36/100 [00:47<01:25,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1696:  37%|███▋      | 37/100 [00:49<01:24,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1668:  38%|███▊      | 38/100 [00:50<01:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1625:  39%|███▉      | 39/100 [00:52<01:21,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1636:  40%|████      | 40/100 [00:53<01:19,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1668:  41%|████      | 41/100 [00:54<01:18,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.165:  42%|████▏     | 42/100 [00:55<01:17,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.161:  43%|████▎     | 43/100 [00:57<01:16,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1646:  44%|████▍     | 44/100 [00:58<01:14,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1597:  45%|████▌     | 45/100 [01:00<01:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1509:  46%|████▌     | 46/100 [01:01<01:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.159:  47%|████▋     | 47/100 [01:02<01:10,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1535:  48%|████▊     | 48/100 [01:04<01:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1688:  49%|████▉     | 49/100 [01:05<01:09,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1613:  50%|█████     | 50/100 [01:06<01:07,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.156:  51%|█████     | 51/100 [01:08<01:05,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1507:  52%|█████▏    | 52/100 [01:09<01:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1523:  53%|█████▎    | 53/100 [01:10<01:03,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.144:  54%|█████▍    | 54/100 [01:12<01:01,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1415:  55%|█████▌    | 55/100 [01:13<01:00,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1511:  56%|█████▌    | 56/100 [01:14<00:58,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1561:  57%|█████▋    | 57/100 [01:16<00:57,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1566:  58%|█████▊    | 58/100 [01:17<00:55,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1559:  59%|█████▉    | 59/100 [01:18<00:54,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1512:  60%|██████    | 60/100 [01:20<00:53,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1434:  61%|██████    | 61/100 [01:21<00:52,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1397:  62%|██████▏   | 62/100 [01:22<00:50,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1397:  63%|██████▎   | 63/100 [01:24<00:49,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1381:  64%|██████▍   | 64/100 [01:25<00:47,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1302:  65%|██████▌   | 65/100 [01:26<00:46,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.124:  66%|██████▌   | 66/100 [01:28<00:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1167:  67%|██████▋   | 67/100 [01:29<00:44,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1194:  68%|██████▊   | 68/100 [01:30<00:42,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1249:  69%|██████▉   | 69/100 [01:32<00:41,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1164:  70%|███████   | 70/100 [01:33<00:39,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1191:  71%|███████   | 71/100 [01:34<00:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1115:  72%|███████▏  | 72/100 [01:36<00:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1184:  73%|███████▎  | 73/100 [01:37<00:36,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1177:  74%|███████▍  | 74/100 [01:38<00:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1181:  75%|███████▌  | 75/100 [01:40<00:33,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1245:  76%|███████▌  | 76/100 [01:41<00:32,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1236:  77%|███████▋  | 77/100 [01:42<00:30,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1161:  78%|███████▊  | 78/100 [01:44<00:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1085:  79%|███████▉  | 79/100 [01:45<00:28,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1036:  80%|████████  | 80/100 [01:46<00:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0948:  81%|████████  | 81/100 [01:48<00:25,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0908:  82%|████████▏ | 82/100 [01:49<00:23,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0824:  83%|████████▎ | 83/100 [01:50<00:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0773:  84%|████████▍ | 84/100 [01:52<00:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.085:  85%|████████▌ | 85/100 [01:53<00:20,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0764:  86%|████████▌ | 86/100 [01:54<00:18,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0752:  87%|████████▋ | 87/100 [01:56<00:17,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0713:  88%|████████▊ | 88/100 [01:57<00:16,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.065:  89%|████████▉ | 89/100 [01:58<00:14,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0612:  90%|█████████ | 90/100 [02:00<00:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0553:  91%|█████████ | 91/100 [02:01<00:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0467:  92%|█████████▏| 92/100 [02:02<00:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0425:  93%|█████████▎| 93/100 [02:04<00:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0371:  94%|█████████▍| 94/100 [02:05<00:08,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0507:  95%|█████████▌| 95/100 [02:06<00:06,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0436:  96%|█████████▌| 96/100 [02:08<00:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0473:  97%|█████████▋| 97/100 [02:09<00:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0474:  98%|█████████▊| 98/100 [02:10<00:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0467:  99%|█████████▉| 99/100 [02:12<00:01,  1.34s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 4, train loss: 1.0418: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]
epoch: 4, valid loss: 0.8005:   2%|▏         | 1/51 [00:00<00:11,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8097:   4%|▍         | 2/51 [00:00<00:11,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8083:   6%|▌         | 3/51 [00:00<00:10,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8199:   8%|▊         | 4/51 [00:00<00:10,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8279:  10%|▉         | 5/51 [00:01<00:10,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8221:  12%|█▏        | 6/51 [00:01<00:10,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8365:  14%|█▎        | 7/51 [00:01<00:09,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.835:  16%|█▌        | 8/51 [00:01<00:09,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8509:  18%|█▊        | 9/51 [00:02<00:09,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8758:  20%|█▉        | 10/51 [00:02<00:09,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8756:  22%|██▏       | 11/51 [00:02<00:09,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8703:  24%|██▎       | 12/51 [00:02<00:08,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8641:  25%|██▌       | 13/51 [00:02<00:08,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8759:  27%|██▋       | 14/51 [00:03<00:08,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8801:  29%|██▉       | 15/51 [00:03<00:08,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8847:  31%|███▏      | 16/51 [00:03<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8834:  33%|███▎      | 17/51 [00:03<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9039:  35%|███▌      | 18/51 [00:04<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8973:  37%|███▋      | 19/51 [00:04<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9127:  39%|███▉      | 20/51 [00:04<00:07,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9064:  41%|████      | 21/51 [00:04<00:06,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8987:  43%|████▎     | 22/51 [00:04<00:06,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9223:  45%|████▌     | 23/51 [00:05<00:06,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9361:  47%|████▋     | 24/51 [00:05<00:06,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9568:  49%|████▉     | 25/51 [00:05<00:05,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9584:  51%|█████     | 26/51 [00:05<00:05,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9677:  53%|█████▎    | 27/51 [00:06<00:05,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9853:  55%|█████▍    | 28/51 [00:06<00:05,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0154:  57%|█████▋    | 29/51 [00:06<00:05,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.025:  59%|█████▉    | 30/51 [00:06<00:04,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0383:  61%|██████    | 31/51 [00:07<00:04,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0442:  63%|██████▎   | 32/51 [00:07<00:04,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0363:  65%|██████▍   | 33/51 [00:07<00:04,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0493:  67%|██████▋   | 34/51 [00:07<00:03,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0448:  69%|██████▊   | 35/51 [00:07<00:03,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0496:  71%|███████   | 36/51 [00:08<00:03,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0795:  73%|███████▎  | 37/51 [00:08<00:03,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0722:  75%|███████▍  | 38/51 [00:08<00:02,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0661:  76%|███████▋  | 39/51 [00:08<00:02,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.064:  78%|███████▊  | 40/51 [00:09<00:02,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.056:  80%|████████  | 41/51 [00:09<00:02,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0477:  82%|████████▏ | 42/51 [00:09<00:02,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0431:  84%|████████▍ | 43/51 [00:09<00:01,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.034:  86%|████████▋ | 44/51 [00:10<00:01,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0347:  88%|████████▊ | 45/51 [00:10<00:01,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0302:  90%|█████████ | 46/51 [00:10<00:01,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.031:  92%|█████████▏| 47/51 [00:10<00:00,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0296:  94%|█████████▍| 48/51 [00:10<00:00,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0309:  96%|█████████▌| 49/51 [00:11<00:00,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0366:  98%|█████████▊| 50/51 [00:11<00:00,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.0301: 100%|██████████| 51/51 [00:11<00:00,  4.39it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2863:   1%|          | 1/100 [00:01<02:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2798:   2%|▏         | 2/100 [00:02<02:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.272:   3%|▎         | 3/100 [00:04<02:10,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2708:   4%|▍         | 4/100 [00:05<02:08,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2693:   5%|▌         | 5/100 [00:06<02:08,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.265:   6%|▌         | 6/100 [00:08<02:06,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2549:   7%|▋         | 7/100 [00:09<02:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2455:   8%|▊         | 8/100 [00:10<02:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2387:   9%|▉         | 9/100 [00:12<02:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2284:  10%|█         | 10/100 [00:13<01:59,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2194:  11%|█         | 11/100 [00:14<01:58,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2114:  12%|█▏        | 12/100 [00:16<01:56,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2089:  13%|█▎        | 13/100 [00:17<01:56,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2018:  14%|█▍        | 14/100 [00:18<01:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1931:  15%|█▌        | 15/100 [00:20<01:53,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1882:  16%|█▌        | 16/100 [00:21<01:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1828:  17%|█▋        | 17/100 [00:22<01:51,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1771:  18%|█▊        | 18/100 [00:24<01:48,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1687:  19%|█▉        | 19/100 [00:25<01:48,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1589:  20%|██        | 20/100 [00:26<01:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1601:  21%|██        | 21/100 [00:28<01:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.155:  22%|██▏       | 22/100 [00:29<01:43,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1533:  23%|██▎       | 23/100 [00:30<01:42,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1546:  24%|██▍       | 24/100 [00:32<01:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1486:  25%|██▌       | 25/100 [00:33<01:40,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.14:  26%|██▌       | 26/100 [00:34<01:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1311:  27%|██▋       | 27/100 [00:36<01:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1315:  28%|██▊       | 28/100 [00:37<01:35,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1235:  29%|██▉       | 29/100 [00:38<01:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1217:  30%|███       | 30/100 [00:39<01:33,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1126:  31%|███       | 31/100 [00:41<01:32,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1047:  32%|███▏      | 32/100 [00:42<01:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.107:  33%|███▎      | 33/100 [00:44<01:29,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0995:  34%|███▍      | 34/100 [00:45<01:27,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0941:  35%|███▌      | 35/100 [00:46<01:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0897:  36%|███▌      | 36/100 [00:47<01:25,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0985:  37%|███▋      | 37/100 [00:49<01:24,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0957:  38%|███▊      | 38/100 [00:50<01:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0894:  39%|███▉      | 39/100 [00:51<01:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0848:  40%|████      | 40/100 [00:53<01:19,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0779:  41%|████      | 41/100 [00:54<01:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0694:  42%|████▏     | 42/100 [00:55<01:16,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0662:  43%|████▎     | 43/100 [00:57<01:16,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0648:  44%|████▍     | 44/100 [00:58<01:14,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0611:  45%|████▌     | 45/100 [00:59<01:13,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0569:  46%|████▌     | 46/100 [01:01<01:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0603:  47%|████▋     | 47/100 [01:02<01:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0589:  48%|████▊     | 48/100 [01:03<01:08,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.051:  49%|████▉     | 49/100 [01:05<01:08,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0442:  50%|█████     | 50/100 [01:06<01:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0481:  51%|█████     | 51/100 [01:07<01:05,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0419:  52%|█████▏    | 52/100 [01:09<01:03,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0464:  53%|█████▎    | 53/100 [01:10<01:02,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0407:  54%|█████▍    | 54/100 [01:11<01:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0324:  55%|█████▌    | 55/100 [01:13<00:59,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0239:  56%|█████▌    | 56/100 [01:14<00:58,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0213:  57%|█████▋    | 57/100 [01:15<00:57,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0172:  58%|█████▊    | 58/100 [01:17<00:55,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0135:  59%|█████▉    | 59/100 [01:18<00:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0119:  60%|██████    | 60/100 [01:19<00:53,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.006:  61%|██████    | 61/100 [01:21<00:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9991:  62%|██████▏   | 62/100 [01:22<00:50,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.994:  63%|██████▎   | 63/100 [01:23<00:49,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9895:  64%|██████▍   | 64/100 [01:25<00:48,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9833:  65%|██████▌   | 65/100 [01:26<00:46,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9778:  66%|██████▌   | 66/100 [01:27<00:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9704:  67%|██████▋   | 67/100 [01:29<00:44,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9663:  68%|██████▊   | 68/100 [01:30<00:42,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9671:  69%|██████▉   | 69/100 [01:31<00:41,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9633:  70%|███████   | 70/100 [01:33<00:39,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.959:  71%|███████   | 71/100 [01:34<00:38,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9626:  72%|███████▏  | 72/100 [01:35<00:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9616:  73%|███████▎  | 73/100 [01:37<00:36,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9539:  74%|███████▍  | 74/100 [01:38<00:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9464:  75%|███████▌  | 75/100 [01:39<00:33,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9403:  76%|███████▌  | 76/100 [01:41<00:31,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.944:  77%|███████▋  | 77/100 [01:42<00:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.936:  78%|███████▊  | 78/100 [01:43<00:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9325:  79%|███████▉  | 79/100 [01:45<00:27,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9365:  80%|████████  | 80/100 [01:46<00:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9412:  81%|████████  | 81/100 [01:47<00:25,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9336:  82%|████████▏ | 82/100 [01:49<00:24,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9295:  83%|████████▎ | 83/100 [01:50<00:22,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9222:  84%|████████▍ | 84/100 [01:51<00:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9205:  85%|████████▌ | 85/100 [01:53<00:20,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9135:  86%|████████▌ | 86/100 [01:54<00:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9145:  87%|████████▋ | 87/100 [01:55<00:17,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9075:  88%|████████▊ | 88/100 [01:57<00:15,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9086:  89%|████████▉ | 89/100 [01:58<00:14,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9065:  90%|█████████ | 90/100 [01:59<00:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9041:  91%|█████████ | 91/100 [02:01<00:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9022:  92%|█████████▏| 92/100 [02:02<00:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.903:  93%|█████████▎| 93/100 [02:03<00:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9004:  94%|█████████▍| 94/100 [02:05<00:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8943:  95%|█████████▌| 95/100 [02:06<00:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8881:  96%|█████████▌| 96/100 [02:07<00:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8887:  97%|█████████▋| 97/100 [02:09<00:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8862:  98%|█████████▊| 98/100 [02:10<00:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8797:  99%|█████████▉| 99/100 [02:11<00:01,  1.34s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 5, train loss: 0.8737: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]
epoch: 5, valid loss: 1.237:   2%|▏         | 1/51 [00:00<00:11,  4.32it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2488:   4%|▍         | 2/51 [00:00<00:11,  4.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2409:   6%|▌         | 3/51 [00:00<00:10,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2317:   8%|▊         | 4/51 [00:00<00:10,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2373:  10%|▉         | 5/51 [00:01<00:10,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2268:  12%|█▏        | 6/51 [00:01<00:10,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2252:  14%|█▎        | 7/51 [00:01<00:10,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2161:  16%|█▌        | 8/51 [00:01<00:09,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2272:  18%|█▊        | 9/51 [00:02<00:09,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2321:  20%|█▉        | 10/51 [00:02<00:09,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2342:  22%|██▏       | 11/51 [00:02<00:09,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2238:  24%|██▎       | 12/51 [00:02<00:08,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2154:  25%|██▌       | 13/51 [00:02<00:08,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2319:  27%|██▋       | 14/51 [00:03<00:08,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2308:  29%|██▉       | 15/51 [00:03<00:08,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2357:  31%|███▏      | 16/51 [00:03<00:07,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2305:  33%|███▎      | 17/51 [00:03<00:07,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2399:  35%|███▌      | 18/51 [00:04<00:07,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2292:  37%|███▋      | 19/51 [00:04<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2255:  39%|███▉      | 20/51 [00:04<00:07,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2208:  41%|████      | 21/51 [00:04<00:06,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2133:  43%|████▎     | 22/51 [00:05<00:06,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2389:  45%|████▌     | 23/51 [00:05<00:06,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.25:  47%|████▋     | 24/51 [00:05<00:06,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2599:  49%|████▉     | 25/51 [00:05<00:05,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2564:  51%|█████     | 26/51 [00:05<00:05,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2574:  53%|█████▎    | 27/51 [00:06<00:05,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2708:  55%|█████▍    | 28/51 [00:06<00:05,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3046:  57%|█████▋    | 29/51 [00:06<00:04,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3137:  59%|█████▉    | 30/51 [00:06<00:04,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3103:  61%|██████    | 31/51 [00:07<00:04,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3063:  63%|██████▎   | 32/51 [00:07<00:04,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2952:  65%|██████▍   | 33/51 [00:07<00:04,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3116:  67%|██████▋   | 34/51 [00:07<00:03,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2998:  69%|██████▊   | 35/51 [00:07<00:03,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.314:  71%|███████   | 36/51 [00:08<00:03,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3248:  73%|███████▎  | 37/51 [00:08<00:03,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3131:  75%|███████▍  | 38/51 [00:08<00:02,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3028:  76%|███████▋  | 39/51 [00:08<00:02,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3007:  78%|███████▊  | 40/51 [00:09<00:02,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2893:  80%|████████  | 41/51 [00:09<00:02,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.28:  82%|████████▏ | 42/51 [00:09<00:02,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2689:  84%|████████▍ | 43/51 [00:09<00:01,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2572:  86%|████████▋ | 44/51 [00:09<00:01,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2568:  88%|████████▊ | 45/51 [00:10<00:01,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2623:  90%|█████████ | 46/51 [00:10<00:01,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2681:  92%|█████████▏| 47/51 [00:10<00:00,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2598:  94%|█████████▍| 48/51 [00:10<00:00,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2643:  96%|█████████▌| 49/51 [00:11<00:00,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2601:  98%|█████████▊| 50/51 [00:11<00:00,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.2489: 100%|██████████| 51/51 [00:11<00:00,  4.41it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4394:   1%|          | 1/100 [00:01<02:13,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4446:   2%|▏         | 2/100 [00:02<02:09,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4431:   3%|▎         | 3/100 [00:03<02:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.445:   4%|▍         | 4/100 [00:05<02:07,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4439:   5%|▌         | 5/100 [00:06<02:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4443:   6%|▌         | 6/100 [00:07<02:04,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4441:   7%|▋         | 7/100 [00:09<02:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4423:   8%|▊         | 8/100 [00:10<02:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4405:   9%|▉         | 9/100 [00:11<02:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4397:  10%|█         | 10/100 [00:13<01:59,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4407:  11%|█         | 11/100 [00:14<01:58,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4409:  12%|█▏        | 12/100 [00:15<01:56,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4378:  13%|█▎        | 13/100 [00:17<01:55,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4367:  14%|█▍        | 14/100 [00:18<01:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4373:  15%|█▌        | 15/100 [00:19<01:53,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.443:  16%|█▌        | 16/100 [00:21<01:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4404:  17%|█▋        | 17/100 [00:22<01:50,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4387:  18%|█▊        | 18/100 [00:23<01:48,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4418:  19%|█▉        | 19/100 [00:25<01:47,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4388:  20%|██        | 20/100 [00:26<01:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.441:  21%|██        | 21/100 [00:27<01:45,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4386:  22%|██▏       | 22/100 [00:29<01:43,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4455:  23%|██▎       | 23/100 [00:30<01:42,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4465:  24%|██▍       | 24/100 [00:31<01:40,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4456:  25%|██▌       | 25/100 [00:33<01:40,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.45:  26%|██▌       | 26/100 [00:34<01:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4574:  27%|██▋       | 27/100 [00:35<01:37,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4542:  28%|██▊       | 28/100 [00:37<01:35,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4511:  29%|██▉       | 29/100 [00:38<01:35,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4477:  30%|███       | 30/100 [00:39<01:33,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4459:  31%|███       | 31/100 [00:41<01:32,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4428:  32%|███▏      | 32/100 [00:42<01:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4442:  33%|███▎      | 33/100 [00:43<01:29,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.453:  34%|███▍      | 34/100 [00:45<01:27,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4528:  35%|███▌      | 35/100 [00:46<01:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4529:  36%|███▌      | 36/100 [00:47<01:24,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4552:  37%|███▋      | 37/100 [00:49<01:24,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4524:  38%|███▊      | 38/100 [00:50<01:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4514:  39%|███▉      | 39/100 [00:51<01:21,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.448:  40%|████      | 40/100 [00:53<01:19,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4447:  41%|████      | 41/100 [00:54<01:18,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4421:  42%|████▏     | 42/100 [00:55<01:17,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4392:  43%|████▎     | 43/100 [00:57<01:15,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4375:  44%|████▍     | 44/100 [00:58<01:14,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4349:  45%|████▌     | 45/100 [00:59<01:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4425:  46%|████▌     | 46/100 [01:01<01:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4395:  47%|████▋     | 47/100 [01:02<01:10,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4382:  48%|████▊     | 48/100 [01:03<01:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4424:  49%|████▉     | 49/100 [01:05<01:08,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4396:  50%|█████     | 50/100 [01:06<01:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4411:  51%|█████     | 51/100 [01:07<01:05,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4415:  52%|█████▏    | 52/100 [01:09<01:03,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4501:  53%|█████▎    | 53/100 [01:10<01:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4554:  54%|█████▍    | 54/100 [01:11<01:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4532:  55%|█████▌    | 55/100 [01:13<01:00,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4535:  56%|█████▌    | 56/100 [01:14<00:58,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4568:  57%|█████▋    | 57/100 [01:15<00:57,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4533:  58%|█████▊    | 58/100 [01:17<00:55,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4579:  59%|█████▉    | 59/100 [01:18<00:54,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4595:  60%|██████    | 60/100 [01:19<00:53,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4578:  61%|██████    | 61/100 [01:21<00:52,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.47:  62%|██████▏   | 62/100 [01:22<00:50,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4668:  63%|██████▎   | 63/100 [01:23<00:49,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4632:  64%|██████▍   | 64/100 [01:25<00:47,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4637:  65%|██████▌   | 65/100 [01:26<00:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4605:  66%|██████▌   | 66/100 [01:27<00:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4589:  67%|██████▋   | 67/100 [01:29<00:44,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4566:  68%|██████▊   | 68/100 [01:30<00:42,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4532:  69%|██████▉   | 69/100 [01:31<00:41,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4518:  70%|███████   | 70/100 [01:33<00:39,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4619:  71%|███████   | 71/100 [01:34<00:38,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4589:  72%|███████▏  | 72/100 [01:35<00:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4578:  73%|███████▎  | 73/100 [01:37<00:36,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.457:  74%|███████▍  | 74/100 [01:38<00:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4538:  75%|███████▌  | 75/100 [01:39<00:33,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4505:  76%|███████▌  | 76/100 [01:41<00:31,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4499:  77%|███████▋  | 77/100 [01:42<00:30,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4479:  78%|███████▊  | 78/100 [01:43<00:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4483:  79%|███████▉  | 79/100 [01:45<00:28,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4542:  80%|████████  | 80/100 [01:46<00:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4535:  81%|████████  | 81/100 [01:47<00:25,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4502:  82%|████████▏ | 82/100 [01:49<00:24,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4496:  83%|████████▎ | 83/100 [01:50<00:22,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4482:  84%|████████▍ | 84/100 [01:51<00:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4508:  85%|████████▌ | 85/100 [01:53<00:20,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4499:  86%|████████▌ | 86/100 [01:54<00:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4487:  87%|████████▋ | 87/100 [01:55<00:17,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4453:  88%|████████▊ | 88/100 [01:57<00:15,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4474:  89%|████████▉ | 89/100 [01:58<00:14,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4531:  90%|█████████ | 90/100 [01:59<00:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.45:  91%|█████████ | 91/100 [02:01<00:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.463:  92%|█████████▏| 92/100 [02:02<00:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4614:  93%|█████████▎| 93/100 [02:03<00:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.458:  94%|█████████▍| 94/100 [02:05<00:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4546:  95%|█████████▌| 95/100 [02:06<00:06,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4513:  96%|█████████▌| 96/100 [02:07<00:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4529:  97%|█████████▋| 97/100 [02:09<00:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4501:  98%|█████████▊| 98/100 [02:10<00:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4474:  99%|█████████▉| 99/100 [02:11<00:01,  1.34s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 6, train loss: 0.4448: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]
epoch: 6, valid loss: 1.1968:   2%|▏         | 1/51 [00:00<00:11,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2088:   4%|▍         | 2/51 [00:00<00:11,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2093:   6%|▌         | 3/51 [00:00<00:10,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2:   8%|▊         | 4/51 [00:00<00:10,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2069:  10%|▉         | 5/51 [00:01<00:10,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.1962:  12%|█▏        | 6/51 [00:01<00:10,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2103:  14%|█▎        | 7/51 [00:01<00:10,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2001:  16%|█▌        | 8/51 [00:01<00:09,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2071:  18%|█▊        | 9/51 [00:02<00:09,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2246:  20%|█▉        | 10/51 [00:02<00:09,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2203:  22%|██▏       | 11/51 [00:02<00:09,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2106:  24%|██▎       | 12/51 [00:02<00:08,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2089:  25%|██▌       | 13/51 [00:02<00:08,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2194:  27%|██▋       | 14/51 [00:03<00:08,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2163:  29%|██▉       | 15/51 [00:03<00:08,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2243:  31%|███▏      | 16/51 [00:03<00:07,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2176:  33%|███▎      | 17/51 [00:03<00:07,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2296:  35%|███▌      | 18/51 [00:04<00:07,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2182:  37%|███▋      | 19/51 [00:04<00:07,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2129:  39%|███▉      | 20/51 [00:04<00:07,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2076:  41%|████      | 21/51 [00:04<00:06,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2045:  43%|████▎     | 22/51 [00:05<00:06,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2315:  45%|████▌     | 23/51 [00:05<00:06,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2238:  47%|████▋     | 24/51 [00:05<00:06,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2368:  49%|████▉     | 25/51 [00:05<00:05,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2404:  51%|█████     | 26/51 [00:05<00:05,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2494:  53%|█████▎    | 27/51 [00:06<00:05,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2624:  55%|█████▍    | 28/51 [00:06<00:05,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2918:  57%|█████▋    | 29/51 [00:06<00:05,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.3035:  59%|█████▉    | 30/51 [00:06<00:04,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.3014:  61%|██████    | 31/51 [00:07<00:04,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2986:  63%|██████▎   | 32/51 [00:07<00:04,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2869:  65%|██████▍   | 33/51 [00:07<00:04,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.3078:  67%|██████▋   | 34/51 [00:07<00:03,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2964:  69%|██████▊   | 35/51 [00:07<00:03,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.309:  71%|███████   | 36/51 [00:08<00:03,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.3487:  73%|███████▎  | 37/51 [00:08<00:03,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.3363:  75%|███████▍  | 38/51 [00:08<00:02,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.3257:  76%|███████▋  | 39/51 [00:08<00:02,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.3265:  78%|███████▊  | 40/51 [00:09<00:02,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.3145:  80%|████████  | 41/51 [00:09<00:02,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.3037:  82%|████████▏ | 42/51 [00:09<00:02,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2936:  84%|████████▍ | 43/51 [00:09<00:01,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2815:  86%|████████▋ | 44/51 [00:09<00:01,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.273:  88%|████████▊ | 45/51 [00:10<00:01,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2899:  90%|█████████ | 46/51 [00:10<00:01,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2918:  92%|█████████▏| 47/51 [00:10<00:00,  4.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2807:  94%|█████████▍| 48/51 [00:10<00:00,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2809:  96%|█████████▌| 49/51 [00:11<00:00,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2826:  98%|█████████▊| 50/51 [00:11<00:00,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.2707: 100%|██████████| 51/51 [00:11<00:00,  4.41it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2439:   1%|          | 1/100 [00:01<02:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2427:   2%|▏         | 2/100 [00:02<02:09,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2475:   3%|▎         | 3/100 [00:03<02:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2509:   4%|▍         | 4/100 [00:05<02:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2594:   5%|▌         | 5/100 [00:06<02:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2586:   6%|▌         | 6/100 [00:07<02:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2599:   7%|▋         | 7/100 [00:09<02:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2596:   8%|▊         | 8/100 [00:10<02:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2594:   9%|▉         | 9/100 [00:11<02:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2608:  10%|█         | 10/100 [00:13<01:59,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2613:  11%|█         | 11/100 [00:14<01:58,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.265:  12%|█▏        | 12/100 [00:15<01:57,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2656:  13%|█▎        | 13/100 [00:17<01:56,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2657:  14%|█▍        | 14/100 [00:18<01:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2644:  15%|█▌        | 15/100 [00:19<01:53,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2651:  16%|█▌        | 16/100 [00:21<01:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2656:  17%|█▋        | 17/100 [00:22<01:50,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2696:  18%|█▊        | 18/100 [00:23<01:49,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2685:  19%|█▉        | 19/100 [00:25<01:48,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2685:  20%|██        | 20/100 [00:26<01:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2723:  21%|██        | 21/100 [00:27<01:45,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2729:  22%|██▏       | 22/100 [00:29<01:43,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2721:  23%|██▎       | 23/100 [00:30<01:42,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2756:  24%|██▍       | 24/100 [00:31<01:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2753:  25%|██▌       | 25/100 [00:33<01:40,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2768:  26%|██▌       | 26/100 [00:34<01:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.28:  27%|██▋       | 27/100 [00:35<01:37,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2802:  28%|██▊       | 28/100 [00:37<01:35,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2803:  29%|██▉       | 29/100 [00:38<01:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2799:  30%|███       | 30/100 [00:39<01:32,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2794:  31%|███       | 31/100 [00:41<01:32,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2784:  32%|███▏      | 32/100 [00:42<01:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2839:  33%|███▎      | 33/100 [00:43<01:29,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2832:  34%|███▍      | 34/100 [00:45<01:28,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2829:  35%|███▌      | 35/100 [00:46<01:27,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2811:  36%|███▌      | 36/100 [00:47<01:25,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2801:  37%|███▋      | 37/100 [00:49<01:24,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2781:  38%|███▊      | 38/100 [00:50<01:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2765:  39%|███▉      | 39/100 [00:52<01:21,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2748:  40%|████      | 40/100 [00:53<01:19,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2738:  41%|████      | 41/100 [00:54<01:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.279:  42%|████▏     | 42/100 [00:55<01:16,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2789:  43%|████▎     | 43/100 [00:57<01:15,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2782:  44%|████▍     | 44/100 [00:58<01:15,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2762:  45%|████▌     | 45/100 [01:00<01:13,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2746:  46%|████▌     | 46/100 [01:01<01:12,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2745:  47%|████▋     | 47/100 [01:02<01:10,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2789:  48%|████▊     | 48/100 [01:04<01:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.277:  49%|████▉     | 49/100 [01:05<01:08,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2752:  50%|█████     | 50/100 [01:06<01:07,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2783:  51%|█████     | 51/100 [01:08<01:06,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2815:  52%|█████▏    | 52/100 [01:09<01:05,  1.36s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2809:  53%|█████▎    | 53/100 [01:10<01:04,  1.37s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2801:  54%|█████▍    | 54/100 [01:12<01:02,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2786:  55%|█████▌    | 55/100 [01:13<01:00,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2769:  56%|█████▌    | 56/100 [01:14<00:58,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2764:  57%|█████▋    | 57/100 [01:16<00:57,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2758:  58%|█████▊    | 58/100 [01:17<00:56,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2741:  59%|█████▉    | 59/100 [01:18<00:54,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2807:  60%|██████    | 60/100 [01:20<00:53,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2792:  61%|██████    | 61/100 [01:21<00:52,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2777:  62%|██████▏   | 62/100 [01:22<00:50,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2823:  63%|██████▎   | 63/100 [01:24<00:49,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2808:  64%|██████▍   | 64/100 [01:25<00:48,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2854:  65%|██████▌   | 65/100 [01:26<00:46,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.284:  66%|██████▌   | 66/100 [01:28<00:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2821:  67%|██████▋   | 67/100 [01:29<00:44,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2818:  68%|██████▊   | 68/100 [01:30<00:42,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2831:  69%|██████▉   | 69/100 [01:32<00:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2828:  70%|███████   | 70/100 [01:33<00:39,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.283:  71%|███████   | 71/100 [01:34<00:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2868:  72%|███████▏  | 72/100 [01:36<00:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2854:  73%|███████▎  | 73/100 [01:37<00:36,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2836:  74%|███████▍  | 74/100 [01:38<00:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2871:  75%|███████▌  | 75/100 [01:40<00:33,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2879:  76%|███████▌  | 76/100 [01:41<00:31,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2953:  77%|███████▋  | 77/100 [01:42<00:30,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.294:  78%|███████▊  | 78/100 [01:44<00:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2929:  79%|███████▉  | 79/100 [01:45<00:28,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.294:  80%|████████  | 80/100 [01:46<00:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2925:  81%|████████  | 81/100 [01:48<00:25,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2927:  82%|████████▏ | 82/100 [01:49<00:23,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2908:  83%|████████▎ | 83/100 [01:50<00:22,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2915:  84%|████████▍ | 84/100 [01:52<00:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2911:  85%|████████▌ | 85/100 [01:53<00:20,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2895:  86%|████████▌ | 86/100 [01:54<00:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2945:  87%|████████▋ | 87/100 [01:56<00:17,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.2935:  88%|████████▊ | 88/100 [01:57<00:15,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3001:  89%|████████▉ | 89/100 [01:58<00:14,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3049:  90%|█████████ | 90/100 [02:00<00:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3075:  91%|█████████ | 91/100 [02:01<00:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3055:  92%|█████████▏| 92/100 [02:02<00:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3101:  93%|█████████▎| 93/100 [02:04<00:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3094:  94%|█████████▍| 94/100 [02:05<00:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3088:  95%|█████████▌| 95/100 [02:06<00:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3143:  96%|█████████▌| 96/100 [02:08<00:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3135:  97%|█████████▋| 97/100 [02:09<00:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3137:  98%|█████████▊| 98/100 [02:10<00:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.3172:  99%|█████████▉| 99/100 [02:12<00:01,  1.34s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 7, train loss: 0.316: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]
epoch: 7, valid loss: 1.8169:   2%|▏         | 1/51 [00:00<00:11,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8183:   4%|▍         | 2/51 [00:00<00:11,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8019:   6%|▌         | 3/51 [00:00<00:11,  4.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8163:   8%|▊         | 4/51 [00:00<00:10,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8191:  10%|▉         | 5/51 [00:01<00:10,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.807:  12%|█▏        | 6/51 [00:01<00:10,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8196:  14%|█▎        | 7/51 [00:01<00:10,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8026:  16%|█▌        | 8/51 [00:01<00:09,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8141:  18%|█▊        | 9/51 [00:02<00:09,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.824:  20%|█▉        | 10/51 [00:02<00:09,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8112:  22%|██▏       | 11/51 [00:02<00:09,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7949:  24%|██▎       | 12/51 [00:02<00:08,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7854:  25%|██▌       | 13/51 [00:02<00:08,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7961:  27%|██▋       | 14/51 [00:03<00:08,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7922:  29%|██▉       | 15/51 [00:03<00:08,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7911:  31%|███▏      | 16/51 [00:03<00:07,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.777:  33%|███▎      | 17/51 [00:03<00:07,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7964:  35%|███▌      | 18/51 [00:04<00:07,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7794:  37%|███▋      | 19/51 [00:04<00:07,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7697:  39%|███▉      | 20/51 [00:04<00:07,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7531:  41%|████      | 21/51 [00:04<00:06,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7364:  43%|████▎     | 22/51 [00:05<00:06,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7576:  45%|████▌     | 23/51 [00:05<00:06,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7636:  47%|████▋     | 24/51 [00:05<00:06,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7691:  49%|████▉     | 25/51 [00:05<00:05,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7585:  51%|█████     | 26/51 [00:05<00:05,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7711:  53%|█████▎    | 27/51 [00:06<00:05,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7859:  55%|█████▍    | 28/51 [00:06<00:05,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8207:  57%|█████▋    | 29/51 [00:06<00:04,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8275:  59%|█████▉    | 30/51 [00:06<00:04,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8144:  61%|██████    | 31/51 [00:07<00:04,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.814:  63%|██████▎   | 32/51 [00:07<00:04,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7968:  65%|██████▍   | 33/51 [00:07<00:04,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8047:  67%|██████▋   | 34/51 [00:07<00:03,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7881:  69%|██████▊   | 35/51 [00:07<00:03,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7937:  71%|███████   | 36/51 [00:08<00:03,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8191:  73%|███████▎  | 37/51 [00:08<00:03,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8018:  75%|███████▍  | 38/51 [00:08<00:02,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7957:  76%|███████▋  | 39/51 [00:08<00:02,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7846:  78%|███████▊  | 40/51 [00:09<00:02,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7687:  80%|████████  | 41/51 [00:09<00:02,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7527:  82%|████████▏ | 42/51 [00:09<00:02,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7379:  84%|████████▍ | 43/51 [00:09<00:01,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7212:  86%|████████▋ | 44/51 [00:10<00:01,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7219:  88%|████████▊ | 45/51 [00:10<00:01,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7114:  90%|█████████ | 46/51 [00:10<00:01,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7151:  92%|█████████▏| 47/51 [00:10<00:00,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7093:  94%|█████████▍| 48/51 [00:10<00:00,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7155:  96%|█████████▌| 49/51 [00:11<00:00,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7189:  98%|█████████▊| 50/51 [00:11<00:00,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7026: 100%|██████████| 51/51 [00:11<00:00,  4.40it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4377:   1%|          | 1/100 [00:01<02:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4403:   2%|▏         | 2/100 [00:02<02:09,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4372:   3%|▎         | 3/100 [00:04<02:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4341:   4%|▍         | 4/100 [00:05<02:07,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4353:   5%|▌         | 5/100 [00:06<02:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4349:   6%|▌         | 6/100 [00:07<02:04,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4418:   7%|▋         | 7/100 [00:09<02:03,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4387:   8%|▊         | 8/100 [00:10<02:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4414:   9%|▉         | 9/100 [00:11<02:01,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4379:  10%|█         | 10/100 [00:13<01:59,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4364:  11%|█         | 11/100 [00:14<01:58,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4336:  12%|█▏        | 12/100 [00:15<01:56,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4307:  13%|█▎        | 13/100 [00:17<01:56,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4275:  14%|█▍        | 14/100 [00:18<01:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4242:  15%|█▌        | 15/100 [00:19<01:53,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4219:  16%|█▌        | 16/100 [00:21<01:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4214:  17%|█▋        | 17/100 [00:22<01:50,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4185:  18%|█▊        | 18/100 [00:23<01:48,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4162:  19%|█▉        | 19/100 [00:25<01:48,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4193:  20%|██        | 20/100 [00:26<01:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4171:  21%|██        | 21/100 [00:27<01:45,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4185:  22%|██▏       | 22/100 [00:29<01:44,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4157:  23%|██▎       | 23/100 [00:30<01:43,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4126:  24%|██▍       | 24/100 [00:31<01:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4096:  25%|██▌       | 25/100 [00:33<01:40,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4124:  26%|██▌       | 26/100 [00:34<01:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4118:  27%|██▋       | 27/100 [00:35<01:37,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4091:  28%|██▊       | 28/100 [00:37<01:35,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4063:  29%|██▉       | 29/100 [00:38<01:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4034:  30%|███       | 30/100 [00:39<01:33,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4006:  31%|███       | 31/100 [00:41<01:32,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3981:  32%|███▏      | 32/100 [00:42<01:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3953:  33%|███▎      | 33/100 [00:43<01:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3939:  34%|███▍      | 34/100 [00:45<01:27,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3953:  35%|███▌      | 35/100 [00:46<01:26,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3944:  36%|███▌      | 36/100 [00:47<01:24,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3919:  37%|███▋      | 37/100 [00:49<01:24,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3888:  38%|███▊      | 38/100 [00:50<01:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3885:  39%|███▉      | 39/100 [00:51<01:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3881:  40%|████      | 40/100 [00:53<01:19,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.385:  41%|████      | 41/100 [00:54<01:18,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3821:  42%|████▏     | 42/100 [00:55<01:17,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3792:  43%|████▎     | 43/100 [00:57<01:16,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3761:  44%|████▍     | 44/100 [00:58<01:14,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3742:  45%|████▌     | 45/100 [00:59<01:13,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.372:  46%|████▌     | 46/100 [01:01<01:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.371:  47%|████▋     | 47/100 [01:02<01:10,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3762:  48%|████▊     | 48/100 [01:03<01:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3752:  49%|████▉     | 49/100 [01:05<01:08,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3777:  50%|█████     | 50/100 [01:06<01:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3764:  51%|█████     | 51/100 [01:07<01:05,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3737:  52%|█████▏    | 52/100 [01:09<01:03,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3716:  53%|█████▎    | 53/100 [01:10<01:02,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3767:  54%|█████▍    | 54/100 [01:11<01:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3759:  55%|█████▌    | 55/100 [01:13<01:00,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3739:  56%|█████▌    | 56/100 [01:14<00:58,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.371:  57%|█████▋    | 57/100 [01:15<00:57,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3683:  58%|█████▊    | 58/100 [01:17<00:56,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3672:  59%|█████▉    | 59/100 [01:18<00:55,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3651:  60%|██████    | 60/100 [01:20<00:53,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.371:  61%|██████    | 61/100 [01:21<00:52,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3679:  62%|██████▏   | 62/100 [01:22<00:50,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3664:  63%|██████▎   | 63/100 [01:24<00:49,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.366:  64%|██████▍   | 64/100 [01:25<00:47,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3653:  65%|██████▌   | 65/100 [01:26<00:46,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3627:  66%|██████▌   | 66/100 [01:28<00:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3665:  67%|██████▋   | 67/100 [01:29<00:44,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3716:  68%|██████▊   | 68/100 [01:30<00:42,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3713:  69%|██████▉   | 69/100 [01:32<00:41,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3708:  70%|███████   | 70/100 [01:33<00:39,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.368:  71%|███████   | 71/100 [01:34<00:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3654:  72%|███████▏  | 72/100 [01:36<00:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3676:  73%|███████▎  | 73/100 [01:37<00:36,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3673:  74%|███████▍  | 74/100 [01:38<00:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3647:  75%|███████▌  | 75/100 [01:40<00:33,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.362:  76%|███████▌  | 76/100 [01:41<00:31,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3592:  77%|███████▋  | 77/100 [01:42<00:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3566:  78%|███████▊  | 78/100 [01:44<00:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3539:  79%|███████▉  | 79/100 [01:45<00:28,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3519:  80%|████████  | 80/100 [01:46<00:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3497:  81%|████████  | 81/100 [01:48<00:25,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3538:  82%|████████▏ | 82/100 [01:49<00:23,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3523:  83%|████████▎ | 83/100 [01:50<00:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3498:  84%|████████▍ | 84/100 [01:51<00:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3496:  85%|████████▌ | 85/100 [01:53<00:20,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3492:  86%|████████▌ | 86/100 [01:54<00:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.347:  87%|████████▋ | 87/100 [01:56<00:17,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3453:  88%|████████▊ | 88/100 [01:57<00:15,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3447:  89%|████████▉ | 89/100 [01:58<00:14,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3431:  90%|█████████ | 90/100 [02:00<00:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3408:  91%|█████████ | 91/100 [02:01<00:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3385:  92%|█████████▏| 92/100 [02:02<00:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3358:  93%|█████████▎| 93/100 [02:04<00:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3396:  94%|█████████▍| 94/100 [02:05<00:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3371:  95%|█████████▌| 95/100 [02:06<00:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3344:  96%|█████████▌| 96/100 [02:07<00:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3367:  97%|█████████▋| 97/100 [02:09<00:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.342:  98%|█████████▊| 98/100 [02:10<00:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3395:  99%|█████████▉| 99/100 [02:11<00:01,  1.33s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 8, train loss: 0.3531: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]
epoch: 8, valid loss: 1.4985:   2%|▏         | 1/51 [00:00<00:11,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5166:   4%|▍         | 2/51 [00:00<00:11,  4.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5075:   6%|▌         | 3/51 [00:00<00:10,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5163:   8%|▊         | 4/51 [00:00<00:10,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5163:  10%|▉         | 5/51 [00:01<00:10,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5038:  12%|█▏        | 6/51 [00:01<00:10,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5262:  14%|█▎        | 7/51 [00:01<00:09,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5232:  16%|█▌        | 8/51 [00:01<00:09,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.529:  18%|█▊        | 9/51 [00:02<00:09,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5352:  20%|█▉        | 10/51 [00:02<00:09,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.529:  22%|██▏       | 11/51 [00:02<00:09,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5144:  24%|██▎       | 12/51 [00:02<00:08,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5084:  25%|██▌       | 13/51 [00:02<00:08,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5061:  27%|██▋       | 14/51 [00:03<00:08,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5023:  29%|██▉       | 15/51 [00:03<00:08,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5092:  31%|███▏      | 16/51 [00:03<00:07,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.495:  33%|███▎      | 17/51 [00:03<00:07,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5143:  35%|███▌      | 18/51 [00:04<00:07,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5:  37%|███▋      | 19/51 [00:04<00:07,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.4975:  39%|███▉      | 20/51 [00:04<00:07,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.4834:  41%|████      | 21/51 [00:04<00:06,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.4831:  43%|████▎     | 22/51 [00:05<00:06,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5124:  45%|████▌     | 23/51 [00:05<00:06,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5129:  47%|████▋     | 24/51 [00:05<00:06,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5079:  49%|████▉     | 25/51 [00:05<00:06,  4.32it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5011:  51%|█████     | 26/51 [00:05<00:05,  4.31it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5246:  53%|█████▎    | 27/51 [00:06<00:05,  4.30it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5375:  55%|█████▍    | 28/51 [00:06<00:05,  4.30it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5742:  57%|█████▋    | 29/51 [00:06<00:05,  4.32it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5837:  59%|█████▉    | 30/51 [00:06<00:04,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5879:  61%|██████    | 31/51 [00:07<00:04,  4.27it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5812:  63%|██████▎   | 32/51 [00:07<00:04,  4.31it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5665:  65%|██████▍   | 33/51 [00:07<00:04,  4.31it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5803:  67%|██████▋   | 34/51 [00:07<00:03,  4.31it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5663:  69%|██████▊   | 35/51 [00:08<00:03,  4.30it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5805:  71%|███████   | 36/51 [00:08<00:03,  4.32it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.6284:  73%|███████▎  | 37/51 [00:08<00:03,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.6137:  75%|███████▍  | 38/51 [00:08<00:02,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.6016:  76%|███████▋  | 39/51 [00:08<00:02,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5909:  78%|███████▊  | 40/51 [00:09<00:02,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5756:  80%|████████  | 41/51 [00:09<00:02,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5606:  82%|████████▏ | 42/51 [00:09<00:02,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5461:  84%|████████▍ | 43/51 [00:09<00:01,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5313:  86%|████████▋ | 44/51 [00:10<00:01,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5177:  88%|████████▊ | 45/51 [00:10<00:01,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5118:  90%|█████████ | 46/51 [00:10<00:01,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5149:  92%|█████████▏| 47/51 [00:10<00:00,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5061:  94%|█████████▍| 48/51 [00:10<00:00,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5157:  96%|█████████▌| 49/51 [00:11<00:00,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5176:  98%|█████████▊| 50/51 [00:11<00:00,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.5034: 100%|██████████| 51/51 [00:11<00:00,  4.37it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1011:   1%|          | 1/100 [00:01<02:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1009:   2%|▏         | 2/100 [00:02<02:08,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1006:   3%|▎         | 3/100 [00:03<02:08,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1008:   4%|▍         | 4/100 [00:05<02:06,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1019:   5%|▌         | 5/100 [00:06<02:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1025:   6%|▌         | 6/100 [00:07<02:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1024:   7%|▋         | 7/100 [00:09<02:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1077:   8%|▊         | 8/100 [00:10<02:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1143:   9%|▉         | 9/100 [00:11<02:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1138:  10%|█         | 10/100 [00:13<01:59,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1149:  11%|█         | 11/100 [00:14<01:59,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1146:  12%|█▏        | 12/100 [00:15<01:57,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1151:  13%|█▎        | 13/100 [00:17<01:56,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1162:  14%|█▍        | 14/100 [00:18<01:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1165:  15%|█▌        | 15/100 [00:19<01:53,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1185:  16%|█▌        | 16/100 [00:21<01:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1182:  17%|█▋        | 17/100 [00:22<01:50,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.118:  18%|█▊        | 18/100 [00:23<01:49,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1251:  19%|█▉        | 19/100 [00:25<01:48,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1258:  20%|██        | 20/100 [00:26<01:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1256:  21%|██        | 21/100 [00:27<01:45,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1251:  22%|██▏       | 22/100 [00:29<01:43,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.129:  23%|██▎       | 23/100 [00:30<01:42,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1309:  24%|██▍       | 24/100 [00:31<01:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1349:  25%|██▌       | 25/100 [00:33<01:40,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1389:  26%|██▌       | 26/100 [00:34<01:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1386:  27%|██▋       | 27/100 [00:36<01:37,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1389:  28%|██▊       | 28/100 [00:37<01:35,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1453:  29%|██▉       | 29/100 [00:38<01:34,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.147:  30%|███       | 30/100 [00:39<01:33,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1474:  31%|███       | 31/100 [00:41<01:32,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1469:  32%|███▏      | 32/100 [00:42<01:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1486:  33%|███▎      | 33/100 [00:44<01:29,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1493:  34%|███▍      | 34/100 [00:45<01:27,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1544:  35%|███▌      | 35/100 [00:46<01:26,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1547:  36%|███▌      | 36/100 [00:47<01:25,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1543:  37%|███▋      | 37/100 [00:49<01:24,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1537:  38%|███▊      | 38/100 [00:50<01:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1581:  39%|███▉      | 39/100 [00:52<01:21,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1576:  40%|████      | 40/100 [00:53<01:19,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1572:  41%|████      | 41/100 [00:54<01:18,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1589:  42%|████▏     | 42/100 [00:55<01:17,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1634:  43%|████▎     | 43/100 [00:57<01:16,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1629:  44%|████▍     | 44/100 [00:58<01:14,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1627:  45%|████▌     | 45/100 [01:00<01:13,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1621:  46%|████▌     | 46/100 [01:01<01:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1622:  47%|████▋     | 47/100 [01:02<01:10,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1617:  48%|████▊     | 48/100 [01:03<01:09,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1631:  49%|████▉     | 49/100 [01:05<01:08,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1629:  50%|█████     | 50/100 [01:06<01:06,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1641:  51%|█████     | 51/100 [01:08<01:05,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1635:  52%|█████▏    | 52/100 [01:09<01:03,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1626:  53%|█████▎    | 53/100 [01:10<01:02,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1628:  54%|█████▍    | 54/100 [01:11<01:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1628:  55%|█████▌    | 55/100 [01:13<01:00,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1626:  56%|█████▌    | 56/100 [01:14<00:58,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1618:  57%|█████▋    | 57/100 [01:15<00:57,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1618:  58%|█████▊    | 58/100 [01:17<00:55,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1609:  59%|█████▉    | 59/100 [01:18<00:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1609:  60%|██████    | 60/100 [01:19<00:53,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1695:  61%|██████    | 61/100 [01:21<00:51,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1748:  62%|██████▏   | 62/100 [01:22<00:50,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1738:  63%|██████▎   | 63/100 [01:23<00:49,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1733:  64%|██████▍   | 64/100 [01:25<00:48,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1734:  65%|██████▌   | 65/100 [01:26<00:46,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1769:  66%|██████▌   | 66/100 [01:27<00:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1758:  67%|██████▋   | 67/100 [01:29<00:44,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1753:  68%|██████▊   | 68/100 [01:30<00:42,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1748:  69%|██████▉   | 69/100 [01:31<00:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1737:  70%|███████   | 70/100 [01:33<00:39,  1.32s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.173:  71%|███████   | 71/100 [01:34<00:38,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.172:  72%|███████▏  | 72/100 [01:35<00:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1712:  73%|███████▎  | 73/100 [01:37<00:36,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1719:  74%|███████▍  | 74/100 [01:38<00:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1713:  75%|███████▌  | 75/100 [01:39<00:33,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1713:  76%|███████▌  | 76/100 [01:41<00:31,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1733:  77%|███████▋  | 77/100 [01:42<00:30,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1723:  78%|███████▊  | 78/100 [01:43<00:29,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1713:  79%|███████▉  | 79/100 [01:45<00:28,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1708:  80%|████████  | 80/100 [01:46<00:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1701:  81%|████████  | 81/100 [01:47<00:25,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1691:  82%|████████▏ | 82/100 [01:49<00:23,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1683:  83%|████████▎ | 83/100 [01:50<00:22,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1681:  84%|████████▍ | 84/100 [01:51<00:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1676:  85%|████████▌ | 85/100 [01:53<00:20,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1674:  86%|████████▌ | 86/100 [01:54<00:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.167:  87%|████████▋ | 87/100 [01:55<00:17,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1663:  88%|████████▊ | 88/100 [01:57<00:15,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1738:  89%|████████▉ | 89/100 [01:58<00:14,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1733:  90%|█████████ | 90/100 [01:59<00:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1722:  91%|█████████ | 91/100 [02:01<00:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1731:  92%|█████████▏| 92/100 [02:02<00:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1727:  93%|█████████▎| 93/100 [02:03<00:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1719:  94%|█████████▍| 94/100 [02:05<00:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1709:  95%|█████████▌| 95/100 [02:06<00:06,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.17:  96%|█████████▌| 96/100 [02:07<00:05,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1693:  97%|█████████▋| 97/100 [02:09<00:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1721:  98%|█████████▊| 98/100 [02:10<00:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1712:  99%|█████████▉| 99/100 [02:11<00:01,  1.34s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 9, train loss: 0.1733: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]
epoch: 9, valid loss: 1.3599:   2%|▏         | 1/51 [00:00<00:11,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3762:   4%|▍         | 2/51 [00:00<00:11,  4.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3792:   6%|▌         | 3/51 [00:00<00:11,  4.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3714:   8%|▊         | 4/51 [00:00<00:10,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3653:  10%|▉         | 5/51 [00:01<00:10,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3531:  12%|█▏        | 6/51 [00:01<00:10,  4.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.351:  14%|█▎        | 7/51 [00:01<00:10,  4.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3412:  16%|█▌        | 8/51 [00:01<00:09,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3511:  18%|█▊        | 9/51 [00:02<00:09,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.359:  20%|█▉        | 10/51 [00:02<00:09,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3513:  22%|██▏       | 11/51 [00:02<00:09,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3383:  24%|██▎       | 12/51 [00:02<00:08,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.35:  25%|██▌       | 13/51 [00:02<00:08,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3741:  27%|██▋       | 14/51 [00:03<00:08,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3665:  29%|██▉       | 15/51 [00:03<00:08,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3764:  31%|███▏      | 16/51 [00:03<00:08,  4.32it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3667:  33%|███▎      | 17/51 [00:03<00:07,  4.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.378:  35%|███▌      | 18/51 [00:04<00:07,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3647:  37%|███▋      | 19/51 [00:04<00:07,  4.32it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3552:  39%|███▉      | 20/51 [00:04<00:07,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3425:  41%|████      | 21/51 [00:04<00:06,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.343:  43%|████▎     | 22/51 [00:05<00:06,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3813:  45%|████▌     | 23/51 [00:05<00:06,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.3986:  47%|████▋     | 24/51 [00:05<00:06,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4131:  49%|████▉     | 25/51 [00:05<00:05,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4153:  51%|█████     | 26/51 [00:05<00:05,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4311:  53%|█████▎    | 27/51 [00:06<00:05,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4442:  55%|█████▍    | 28/51 [00:06<00:05,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4861:  57%|█████▋    | 29/51 [00:06<00:05,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5019:  59%|█████▉    | 30/51 [00:06<00:04,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5023:  61%|██████    | 31/51 [00:07<00:04,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4909:  63%|██████▎   | 32/51 [00:07<00:04,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4785:  65%|██████▍   | 33/51 [00:07<00:04,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5113:  67%|██████▋   | 34/51 [00:07<00:03,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4996:  69%|██████▊   | 35/51 [00:08<00:03,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5222:  71%|███████   | 36/51 [00:08<00:03,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5572:  73%|███████▎  | 37/51 [00:08<00:03,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5426:  75%|███████▍  | 38/51 [00:08<00:02,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5285:  76%|███████▋  | 39/51 [00:08<00:02,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5331:  78%|███████▊  | 40/51 [00:09<00:02,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5187:  80%|████████  | 41/51 [00:09<00:02,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5108:  82%|████████▏ | 42/51 [00:09<00:02,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4973:  84%|████████▍ | 43/51 [00:09<00:01,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.483:  86%|████████▋ | 44/51 [00:10<00:01,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4742:  88%|████████▊ | 45/51 [00:10<00:01,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4992:  90%|█████████ | 46/51 [00:10<00:01,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.5027:  92%|█████████▏| 47/51 [00:10<00:00,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4884:  94%|█████████▍| 48/51 [00:11<00:00,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.503:  96%|█████████▌| 49/51 [00:11<00:00,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4967:  98%|█████████▊| 50/51 [00:11<00:00,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.4823: 100%|██████████| 51/51 [00:11<00:00,  4.36it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0755:   1%|          | 1/100 [00:01<02:13,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0759:   2%|▏         | 2/100 [00:02<02:10,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0763:   3%|▎         | 3/100 [00:04<02:10,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0762:   4%|▍         | 4/100 [00:05<02:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0766:   5%|▌         | 5/100 [00:06<02:06,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0767:   6%|▌         | 6/100 [00:07<02:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0768:   7%|▋         | 7/100 [00:09<02:04,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0809:   8%|▊         | 8/100 [00:10<02:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0807:   9%|▉         | 9/100 [00:12<02:01,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0807:  10%|█         | 10/100 [00:13<01:59,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0817:  11%|█         | 11/100 [00:14<01:59,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0819:  12%|█▏        | 12/100 [00:16<01:57,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0821:  13%|█▎        | 13/100 [00:17<01:56,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0821:  14%|█▍        | 14/100 [00:18<01:54,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0821:  15%|█▌        | 15/100 [00:20<01:53,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0826:  16%|█▌        | 16/100 [00:21<01:52,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0837:  17%|█▋        | 17/100 [00:22<01:51,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.085:  18%|█▊        | 18/100 [00:24<01:49,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0861:  19%|█▉        | 19/100 [00:25<01:48,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0859:  20%|██        | 20/100 [00:26<01:46,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0868:  21%|██        | 21/100 [00:28<01:46,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0874:  22%|██▏       | 22/100 [00:29<01:44,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0906:  23%|██▎       | 23/100 [00:30<01:43,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0912:  24%|██▍       | 24/100 [00:32<01:41,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.093:  25%|██▌       | 25/100 [00:33<01:40,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0928:  26%|██▌       | 26/100 [00:34<01:38,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0925:  27%|██▋       | 27/100 [00:36<01:37,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0951:  28%|██▊       | 28/100 [00:37<01:35,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0949:  29%|██▉       | 29/100 [00:38<01:34,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0953:  30%|███       | 30/100 [00:40<01:33,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1007:  31%|███       | 31/100 [00:41<01:32,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1004:  32%|███▏      | 32/100 [00:42<01:30,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1:  33%|███▎      | 33/100 [00:44<01:29,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0999:  34%|███▍      | 34/100 [00:45<01:28,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0996:  35%|███▌      | 35/100 [00:46<01:27,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.101:  36%|███▌      | 36/100 [00:48<01:25,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1009:  37%|███▋      | 37/100 [00:49<01:24,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1017:  38%|███▊      | 38/100 [00:50<01:22,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1013:  39%|███▉      | 39/100 [00:52<01:21,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1011:  40%|████      | 40/100 [00:53<01:20,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1014:  41%|████      | 41/100 [00:54<01:19,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1011:  42%|████▏     | 42/100 [00:56<01:17,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.102:  43%|████▎     | 43/100 [00:57<01:16,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1016:  44%|████▍     | 44/100 [00:58<01:14,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1081:  45%|████▌     | 45/100 [01:00<01:13,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1082:  46%|████▌     | 46/100 [01:01<01:11,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1123:  47%|████▋     | 47/100 [01:02<01:11,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1125:  48%|████▊     | 48/100 [01:04<01:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1125:  49%|████▉     | 49/100 [01:05<01:08,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1123:  50%|█████     | 50/100 [01:06<01:07,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1129:  51%|█████     | 51/100 [01:08<01:05,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1124:  52%|█████▏    | 52/100 [01:09<01:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1121:  53%|█████▎    | 53/100 [01:10<01:02,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1117:  54%|█████▍    | 54/100 [01:12<01:01,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1131:  55%|█████▌    | 55/100 [01:13<01:00,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1125:  56%|█████▌    | 56/100 [01:14<00:58,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1135:  57%|█████▋    | 57/100 [01:16<00:57,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1218:  58%|█████▊    | 58/100 [01:17<00:56,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1216:  59%|█████▉    | 59/100 [01:18<00:55,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1209:  60%|██████    | 60/100 [01:20<00:53,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1204:  61%|██████    | 61/100 [01:21<00:52,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1204:  62%|██████▏   | 62/100 [01:22<00:50,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1198:  63%|██████▎   | 63/100 [01:24<00:49,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1194:  64%|██████▍   | 64/100 [01:25<00:47,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1191:  65%|██████▌   | 65/100 [01:26<00:46,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1187:  66%|██████▌   | 66/100 [01:28<00:45,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1189:  67%|██████▋   | 67/100 [01:29<00:44,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1204:  68%|██████▊   | 68/100 [01:30<00:42,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1199:  69%|██████▉   | 69/100 [01:32<00:41,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1214:  70%|███████   | 70/100 [01:33<00:39,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1209:  71%|███████   | 71/100 [01:34<00:38,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1203:  72%|███████▏  | 72/100 [01:36<00:37,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1202:  73%|███████▎  | 73/100 [01:37<00:36,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1285:  74%|███████▍  | 74/100 [01:38<00:34,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1278:  75%|███████▌  | 75/100 [01:40<00:33,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1273:  76%|███████▌  | 76/100 [01:41<00:32,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1318:  77%|███████▋  | 77/100 [01:42<00:30,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1314:  78%|███████▊  | 78/100 [01:44<00:29,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1309:  79%|███████▉  | 79/100 [01:45<00:28,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1303:  80%|████████  | 80/100 [01:46<00:26,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1297:  81%|████████  | 81/100 [01:48<00:25,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1292:  82%|████████▏ | 82/100 [01:49<00:23,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1294:  83%|████████▎ | 83/100 [01:50<00:22,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1321:  84%|████████▍ | 84/100 [01:52<00:21,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1316:  85%|████████▌ | 85/100 [01:53<00:20,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1312:  86%|████████▌ | 86/100 [01:54<00:18,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1305:  87%|████████▋ | 87/100 [01:56<00:17,  1.35s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1312:  88%|████████▊ | 88/100 [01:57<00:16,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1307:  89%|████████▉ | 89/100 [01:58<00:14,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1325:  90%|█████████ | 90/100 [02:00<00:13,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1327:  91%|█████████ | 91/100 [02:01<00:12,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1403:  92%|█████████▏| 92/100 [02:02<00:10,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1436:  93%|█████████▎| 93/100 [02:04<00:09,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1431:  94%|█████████▍| 94/100 [02:05<00:07,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1425:  95%|█████████▌| 95/100 [02:06<00:06,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1424:  96%|█████████▌| 96/100 [02:08<00:05,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1422:  97%|█████████▋| 97/100 [02:09<00:04,  1.34s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1444:  98%|█████████▊| 98/100 [02:10<00:02,  1.33s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1441:  99%|█████████▉| 99/100 [02:12<00:01,  1.34s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 10, train loss: 0.1433: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]
epoch: 10, valid loss: 1.2515:   2%|▏         | 1/51 [00:00<00:11,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2652:   4%|▍         | 2/51 [00:00<00:11,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2553:   6%|▌         | 3/51 [00:00<00:10,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2502:   8%|▊         | 4/51 [00:00<00:10,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2605:  10%|▉         | 5/51 [00:01<00:10,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2486:  12%|█▏        | 6/51 [00:01<00:10,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2698:  14%|█▎        | 7/51 [00:01<00:09,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2665:  16%|█▌        | 8/51 [00:01<00:09,  4.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2804:  18%|█▊        | 9/51 [00:02<00:09,  4.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3092:  20%|█▉        | 10/51 [00:02<00:09,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3019:  22%|██▏       | 11/51 [00:02<00:09,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.291:  24%|██▎       | 12/51 [00:02<00:08,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2879:  25%|██▌       | 13/51 [00:02<00:08,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3138:  27%|██▋       | 14/51 [00:03<00:08,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3171:  29%|██▉       | 15/51 [00:03<00:08,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3156:  31%|███▏      | 16/51 [00:03<00:07,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3046:  33%|███▎      | 17/51 [00:03<00:07,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3255:  35%|███▌      | 18/51 [00:04<00:07,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3133:  37%|███▋      | 19/51 [00:04<00:07,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3028:  39%|███▉      | 20/51 [00:04<00:07,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2907:  41%|████      | 21/51 [00:04<00:06,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.2784:  43%|████▎     | 22/51 [00:04<00:06,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3183:  45%|████▌     | 23/51 [00:05<00:06,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.316:  47%|████▋     | 24/51 [00:05<00:06,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3272:  49%|████▉     | 25/51 [00:05<00:05,  4.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3175:  51%|█████     | 26/51 [00:05<00:05,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3467:  53%|█████▎    | 27/51 [00:06<00:05,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.362:  55%|█████▍    | 28/51 [00:06<00:05,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.4006:  57%|█████▋    | 29/51 [00:06<00:05,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.4116:  59%|█████▉    | 30/51 [00:06<00:04,  4.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.4061:  61%|██████    | 31/51 [00:07<00:04,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3986:  63%|██████▎   | 32/51 [00:07<00:04,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3855:  65%|██████▍   | 33/51 [00:07<00:04,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.4017:  67%|██████▋   | 34/51 [00:07<00:03,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3887:  69%|██████▊   | 35/51 [00:07<00:03,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.4098:  71%|███████   | 36/51 [00:08<00:03,  4.28it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.4382:  73%|███████▎  | 37/51 [00:08<00:03,  4.31it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.4244:  75%|███████▍  | 38/51 [00:08<00:02,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.4119:  76%|███████▋  | 39/51 [00:08<00:02,  4.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3988:  78%|███████▊  | 40/51 [00:09<00:02,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3855:  80%|████████  | 41/51 [00:09<00:02,  4.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3723:  82%|████████▏ | 42/51 [00:09<00:02,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.362:  84%|████████▍ | 43/51 [00:09<00:01,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3489:  86%|████████▋ | 44/51 [00:10<00:01,  4.32it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3469:  88%|████████▊ | 45/51 [00:10<00:01,  4.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.359:  90%|█████████ | 46/51 [00:10<00:01,  4.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.362:  92%|█████████▏| 47/51 [00:10<00:00,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3549:  94%|█████████▍| 48/51 [00:10<00:00,  4.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3683:  96%|█████████▌| 49/51 [00:11<00:00,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.3692:  98%|█████████▊| 50/51 [00:11<00:00,  4.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.357: 100%|██████████| 51/51 [00:11<00:00,  4.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti


