In [1]:
!pip install transformers
!pip install datasets #데이터 세트 다운로드 Hugging Face 연동

Collecting transformers
  Downloading transformers-4.32.1-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m68.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m102.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m79.4 MB/s[0m eta [36m0:00:

In [2]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


In [3]:
import copy # 특정한 파이썬 객체를 통째로 메모리에 copy할 때
import json # json 형식으로 데이터를 표현할 때
import logging # 학습 과정 등 전반적인 프로그램의 진행 상황을 로깅할 때
import os # 파일 입출력 등 현재 컴퓨터에 대한 기능 수행할 때

# 경고(warning) 메시지가 너무 많이 나오는 것을 대비하여 무시 처리
import warnings
warnings.filterwarnings("ignore")

# 로깅할 때 기본적으로 오류(error) 사항으로 로그 메시지를 남기겠다는 의미
import logging
logging.basicConfig(level=logging.ERROR)

# 벡터, 행렬 등의 처리를 위한 NumPy, 테이블(엑셀) 형식의 데이터 처리할 때 Pandas
import numpy as np
import pandas as pd

from datasets import load_dataset
# train_test_split: 별도로 구분된 validation 세트가 없을 때
# 학습 데이터 세트에서 일부를 train과 validation으로 나눌 때 자주 사용 (8:2 정도로 나눔)
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torch

### 학습한 모델 관련 라이브러리 불러오기

In [4]:
import transformers
# Auto Model For Sequence Classification: 텍스트 분류를 위한 모델 → Cross-Entropy loss 사용
from transformers import AutoConfig, AutoModel, AutoModelForSequenceClassification, AutoTokenizer
# linear_schedule_with_warmup: 단계적으로 learning rate 줄여나가는 방법
# AdamW: SGD와 같이 optimization 방법 중 하나
from transformers import AdamW, get_linear_schedule_with_warmup

### 우리가 쓸 모델

- KoBigBird를 사용하고, 다음과 같은 형태로 사용 가능
- KoBigBird: BigBird 특유의 sparse attention 사용 (default)

In [5]:
from transformers import AutoModel, AutoTokenizer

# by default its in `block_sparse` mode with num_random_blocks=3, block_size=64
# 이름에서부터 알 수 있듯이 KoBigBird는 BERT 기반의 모델
model = AutoModel.from_pretrained("monologg/kobigbird-bert-base")

# Tokenizer도 마찬가지로 BERT 기반에서 가져온 것을 확인
tokenizer = AutoTokenizer.from_pretrained("monologg/kobigbird-bert-base")
text = "한국어 BigBird 모델을 공개합니다!"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

Downloading (…)lve/main/config.json:   0%|          | 0.00/870 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/458M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/373 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/241k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/492k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/169 [00:00<?, ?B/s]

Attention type 'block_sparse' is not possible if sequence_length: 12 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...


### 실험을 위한 하이퍼 파라미터 설정

In [6]:
# SimpleNamespace는 온점(.)으로 속성의 값을 정의할 수 있도록 해주는 라이브러리
# config.task = "cls"라고 하면, 나중에 print(config.task)했을 때 "cls"라고 출력
from types import SimpleNamespace

config = SimpleNamespace()

config.task = "cls"
config.dataset = "comment"

config.cache_dir = "cache" # 현재 데이터 세트에 대한 단어집 등 현재 task를 위한 임시적인 폴더
config.output_dir = "output" # 최종적인 모델이 저장되고, 결과가 저장되는 폴더

config.use_tpu = False
config.model_name_or_path = "monologg/kobigbird-bert-base" # Model name or path (HuggingFace에서 불러와 사용할 모델 이름)
config.data_dir = "./" # The input data dir ("10000_labeled.csv"가 있는 경로)

# 실질적으로 학습을 위해서는 tokenizing이 수행된 train file과 predict file을 만들어야 한다.
config.train_file = "joongang.csv" # 미리 준비된 학습 데이터 세트 경로
# 어차피 10000_labeled.csv를 (1) training 목적, (2) validatoin 목적으로 쪼개니까 평가는 validation으로 결과가 나올 것임
config.predict_file = "joongang.csv" # 미리 준비된 평가 데이터 세트 경로
# → 이거 일단 없으면, 지금처럼 train_file로 넣으시되, 나중에 생기시면 갈아끼우기

config.max_seq_length = 2048 # The maximum total input sequence length after tokenization. (최대 토큰 길이)
config.train_batch_size = 4 # Batch size for training. (학습할 때 batch_size)
config.eval_batch_size = 2 # Batch size for evaluation. (평가할 때 batch_size)

config.learning_rate = 3e-5 # The initial learning rate for Adam. (Adam optimizer에서 쓸 learning rate)
config.num_train_epochs = 10 # Total number of training epochs to perform. (전체 학습 epoch 수)

config.num_labels = 5 # 현재 task에서 선호도(1: 극진보, 2: 진보, 3: 중립, 4: 보수, 5: 극보수)의 개수는 5개이므로
# 5 classes multi-class classification 문제로 이해 할 수 있음
config.gradient_accumulation_steps = 2 # Number of updates steps to accumulate before performing a backward/update pass.
# batch_size가 큰 것처럼 처리하기 위해서, backward()를 매 번 수행하지 않고, gradient를 누적(acculmulation)하는 것

config.threads = 4
config.seed = 42 # random seed for initialization

config.do_train = True # Whether to run training.
config.do_eval_during_train = True
config.do_eval = True # Whether to run prediction.

config.do_lower_case = False
config.weight_decay = 0.0 # Weight decay if we apply some.
config.adam_epsilon = 1e-8 # Epsilon for Adam optimizer.
config.max_grad_norm = 1.0 # Max gradient norm.
config.warmup_proportion = 0.0 # Warmup proportion for linear warmup
# BigBird에서는 full attention을 하면, 메모리는 조금 더 소모되지만, 더 정확도가 향상
# config.attention_type = "original_full"

### 학습 데이터 전처리
- 학습 text를 매번 tokenization을 하지 않고, 모델 학습 시작전에 미리 모든 텍스트를 tokenization 한 결과를 저장한다.

In [7]:
# 본 실습에서 사용할 tokenizer 객체 초기화
tokenizer = AutoTokenizer.from_pretrained(config.model_name_or_path, cache_dir=config.cache_dir)

Downloading (…)okenizer_config.json:   0%|          | 0.00/373 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/241k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/492k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/169 [00:00<?, ?B/s]

In [8]:
def train_split(config, texts, labels, is_train):
    # 바로 여기에서 train 데이터 세트가 8:2로 training과 validation이 나누어 진다.
    # 지금 평가 결과는 validation에 대한 결과
    # [오류] stratify가 labels면, test_dataset에 특정 레이블이 아예 등장하지 않으면 오류 발생
    """
    x_train, y_train, x_label, y_label = train_test_split(
        texts, labels, test_size=0.2, random_state=config.seed, stratify=labels
    )
    """
    x_train, y_train, x_label, y_label = train_test_split(
        texts, labels, test_size=0.2, random_state=config.seed, stratify=None
    )
    if is_train:
        texts, labels = x_train, x_label
    else:
        texts, labels = y_train, y_label
    return texts, labels

# 댓글(comment)이 담긴 .csv 파일이 있을 때, 여기에서 텍스트와 레이블 추출
def process_comment_cls(config, data_file, is_train):
    df = pd.read_csv(data_file)
    df = df.dropna(subset=['title', 'content', 'label1', 'label2'])

    # 매 줄에서 "label1(정치성향)", "label2(편향여부)" 열 추출
    politics = (df["label1"] - 1).astype(int).values.tolist()
    governments = (df["label2"] - 1).astype(int).values.tolist()
    labels = []
    # 한 줄씩 데이터를 확인하며
    for i in range(len(politics)):
        politic = politics[i] # 선호도
        government = governments[i] # 비속어
        labels.append([politic, government])
    print(len(labels))

    # title과 content를 합쳐서 texts로 표현
    texts = (df["title"] + " " + df["content"]).astype(str).values.tolist()

    texts, labels = train_split(config, texts, labels, is_train)
    return texts, labels

### 데이터 토큰화
- 주어진 데이터를 토큰화하고, 토큰화된 데이터를 파일에 저장하는 함수를 정의.
- 주어진 데이터는 텍스트와 레이블로 구성되어 있으며, 텍스트는 토큰화되고, 레이블은 정수로 변환.
- 토큰화된 데이터와 변환된 레이블은 JSON 형식으로 파일에 저장

In [9]:
import torch.utils.data as torch_data

def data_pretokenizing(config, tokenizer, is_train=True):
    if is_train:
        data_file = config.train_file
    else:
        data_file = config.predict_file

    data_path = config.data_dir
    if data_file is not None:
        data_path = os.path.join(data_path, data_file)
    else:
        data_path += "/"

    # 실제로 tokenizer를  저장될 데이터 세트의 파일 이름이 바로 dataset_file
    comps = [
        data_path,
        config.dataset,
        config.model_name_or_path.replace("/", "_"),
        config.max_seq_length,
        "train" if is_train else "dev",
        "dataset.txt",
    ]
    dataset_file = "_".join([str(comp) for comp in comps])
    print("dataset_file:", dataset_file)

    # 텍스트 문장을 읽어와서 token 값만 저장
    with open(dataset_file, "w", encoding="utf-8") as writer_file:
        # data: "joongang.csv" 파일에서 읽어와 (텍스트, 선호도 레이블)만 남긴 .csv 파일
        cnt = 0
        total_data = process_comment_cls(config, data_path, is_train)
        # 학습 데이터 세트를 하나씩 확인하며
        for text, label in zip(total_data[0], total_data[1]):
            # 여기에서 data는 하나의 (텍스트, 레이블) 쌍
            # feature는 해당 텍스트를 tokenizer에 넣어서 나온 결과
            feature = tokenizer(text, max_length=config.max_seq_length, padding="max_length", truncation=True, add_special_tokens=True)
            # 실제로 학습을 위해서는 (각 토큰의 index, 정답 레이블)로 학습을 진행
            writed_data = {
                "input_ids": feature["input_ids"],
                "attention_mask": feature["attention_mask"],
                "politic": int(float(label[0])), # "2.0" → 2.0 → 2
                "government": int(float(label[1])), # "2.0" → 2.0 → 2
              }
            # JSON은 쉽게 말하면 Python에서 dictionary와 같음 → 이를 file로 저장하는 것
            writer_file.write(json.dumps(writed_data) + "\n")
            cnt += 1
        print(f"{cnt} features processed from {data_path}")

    return dataset_file


In [10]:
# 본 코드에서 학습을 수행하려는 경우
if config.do_train:
    # 학습 데이터 세트 전처리
    train_dataset_file = data_pretokenizing(config, tokenizer=tokenizer)

# 평가 데이터 세트 전처리(validation = dev 같은 의미)
predict_dataset_file = data_pretokenizing(config, tokenizer=tokenizer, is_train=False)

# 결과적으로 만들어진 "./10000_labeled.csv_comment_monologg_kobigbird-bert-base_1024_train_dataset.txt"
# 내용을 확인해 보면, 약 8,000개의 각 학습 데이터에 대하여
#   → 하나씩 {"input_ids", "attention_mask", "preference", "slang"}으로 구성

dataset_file: ./joongang.csv_comment_monologg_kobigbird-bert-base_2048_train_dataset.txt
499
399 features processed from ./joongang.csv
dataset_file: ./joongang.csv_comment_monologg_kobigbird-bert-base_2048_dev_dataset.txt
499
100 features processed from ./joongang.csv


### 데이터로더 초기화

#### 데이터패딩

In [11]:
class IterableDatasetPad(torch.utils.data.IterableDataset):
    def __init__(
        self,
        dataset: torch.utils.data.IterableDataset,
        batch_size: int = 1,
        num_devices: int = 1,
        seed: int = 0,
    ):
        self.dataset = dataset
        self.batch_size = batch_size
        self.seed = seed
        self.num_examples = 0

        chunk_size = self.batch_size * num_devices
        length = len(dataset)
        self.length = length + (chunk_size - length % chunk_size)

    def __len__(self):
        return self.length

    def __iter__(self):
        self.num_examples = 0
        if (
            not hasattr(self.dataset, "set_epoch")
            and hasattr(self.dataset, "generator")
            and isinstance(self.dataset.generator, torch.Generator)
        ):
            self.dataset.generator.manual_seed(self.seed + self.epoch)

        first_batch = None
        current_batch = []
        for element in self.dataset:
            self.num_examples += 1
            current_batch.append(element)
            # Wait to have a full batch before yielding elements.
            if len(current_batch) == self.batch_size:
                for batch in current_batch:
                    yield batch
                    if first_batch is None:
                        first_batch = batch.copy()
                current_batch = []

        while self.num_examples < self.length:
            add_num = self.batch_size - len(current_batch)
            self.num_examples += add_num
            current_batch += [first_batch] * add_num
            for batch in current_batch:
                yield batch
            current_batch = []

#### 전처리된 데이터를 DataLoader로 불러옴

In [12]:
# 전처리된 데이터는 하나하나 {"input_ids", "attention_mask", "labels", ...} 형태를 가짐
# PyTorch가 하나의 배치를 처리할 때는 PyTorch Tensor 형태여야 함
# <데이터 로더에서 불러오는 "Tensor"를 정의하는 함수>
def collate_fn(features):
    input_ids = [sample["input_ids"] for sample in features]
    attention_mask = [sample["attention_mask"] for sample in features]

    politic = [sample["politic"] for sample in features]
    government = [sample["government"] for sample in features]

    input_ids = torch.tensor(np.array(input_ids).astype(np.int64), dtype=torch.long)
    attention_mask = torch.tensor(np.array(attention_mask).astype(np.int8), dtype=torch.long)
    politic = torch.tensor(np.array(politic).astype(np.int64), dtype=torch.long)
    government = torch.tensor(np.array(government).astype(np.int64), dtype=torch.long)
    inputs = {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
    }
    labels = {
        "politic": politic,
        "government" : government
    }
    return inputs, labels

# 본 코드에서 학습을 수행하려는 경우
if config.do_train:
    # 학습 데이터 로더 초기화
    train_dataset = load_dataset("text", data_files=train_dataset_file, download_mode="force_redownload")["train"]
    train_dataset = train_dataset.map(lambda x: json.loads(x["text"]), batched=False)

    train_dataloader = torch_data.DataLoader(
        train_dataset,
        sampler=torch_data.RandomSampler(train_dataset),
        drop_last=False,
        batch_size=config.train_batch_size,
        collate_fn=(collate_fn),
    )

# 평가 데이터 세트 전처리(validation = dev 같은 의미)
predict_dataset = load_dataset("text", data_files=predict_dataset_file, download_mode="force_redownload")["train"]
predict_dataset = predict_dataset.map(lambda x: json.loads(x["text"]), batched=False)
predict_dataset = IterableDatasetPad(
    dataset=predict_dataset,
    batch_size=config.eval_batch_size,
    num_devices=1,
    seed=config.seed,
)

predict_dataloader = torch_data.DataLoader(
    predict_dataset,
    sampler=None,
    drop_last=False,
    batch_size=config.eval_batch_size,
    collate_fn=(collate_fn),
)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/399 [00:00<?, ? examples/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

### 텍스트 분류 모델 정의

In [13]:
from transformers import AutoModel
import torch.nn as nn

# 텍스트 분류 모델 정의
class ClsModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # (전처리된) 학습할 데이터 세트의 경로 설정
        data_file = os.path.join(config.data_dir, str(config.train_file))
        # 사전 학습된 모델 가중치 불러오기
        model_config = AutoConfig.from_pretrained(config.model_name_or_path, num_labels=config.num_labels)
        self.model = AutoModel.from_pretrained(
            config.model_name_or_path, config=model_config, cache_dir=config.cache_dir
        )
        self.classifier1 = nn.Linear(768, 5)
        self.classifier2 = nn.Linear(768, 5)

        # BERT model과 별개로, 입력 문자열을 토큰의 index로 바꾸어주는 tokenizer가 사용됨
        self.tokenizer = tokenizer

    # "학습된 모델"을 save_dir에 저장하는 함수
    def save_pretrained(self, save_dir):
        self.model.save_pretrained(save_dir)
        # Tokenizer는 기본적으로 "special_tokens_map_file", "tokenizer_file"을 가질 수 있음
        # 이러한 값을 제거한 뒤에 save_dir에 저장하겠다는 의미
        for key in ["special_tokens_map_file", "tokenizer_file"]:
            self.tokenizer.init_kwargs.pop(key, None)
        self.tokenizer.save_pretrained(save_dir)

    def get_optimizer(self): # 현재 모델을 학습하기 위한 최적화 방법(AdamW) 객체를 불러오는 함수
        # bias랑 LayerNorm에는 decay 적용하지 않겠다는 의미
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": config.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": config.weight_decay,
            },
        ]
        # AdamW의 첫 번째 인자(params)는 "학습할 가중치", weight_decay는 가중치에 적용되는 regularization 기법
        optimizer = AdamW(optimizer_grouped_parameters, lr=config.learning_rate, eps=config.adam_epsilon)
        return optimizer

    def get_scheduler(self, batch_num, optimizer): # AdamW로 학습할 때, learning rate을 단계적으로 줄이기 위한 함수
        if config.warmup_proportion == 0.0:
            return None

        t_total = batch_num // config.gradient_accumulation_steps * config.num_train_epochs

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=int(t_total * config.warmup_proportion),
            num_training_steps=t_total,
        )

        return scheduler

    def forward(self, inputs):
        # print(inputs) => {"input_ids", "attention_mask"}
        # https://huggingface.co/docs/transformers/model_doc/big_bird
        # BigBirdModel의 입력 양식에 맞게 넣어주어야 오류가 없음
        # BigBirdModel로 "input_ids", "attention_mask" 등 "미리 정해진" 규격에 맞는 입력만
        # 들어가야 오류가 없다는 의미 => 그러므로, preference, slang 등은 들어가면 X
        hidden = self.model(**inputs)
        # print(hidden.last_hidden_state.shape) # torch.Size([4 (batch_size), 1024 (seq_len), 768 (embedding_size)])
        # 마지막 레이어의 [CLS] 토큰만 가져오기
        cls_token_embeddings = hidden.last_hidden_state[:,0,:] # [batch_size, 768]
        # print(cls_token_embeddings.shape)
        output_1 = self.classifier1(cls_token_embeddings) # 768 → 5
        output_2 = self.classifier2(cls_token_embeddings) # 768 → 5
        print(output_1.shape) # [batch_size, 5]
        print(output_2.shape) # [batch_size, 5]

        return output_1, output_2

    def eval_step(self, inputs, labels, outputs):
        logits_1 = outputs[0].detach().cpu()
        logits_2 = outputs[1].detach().cpu()
        labels_1 = self.tensor_to_list(labels["politic"])
        labels_2 = self.tensor_to_list(labels["government"])
        predictions_1 = self.tensor_to_list(torch.argmax(logits_1, dim=-1))
        predictions_2 = self.tensor_to_list(torch.argmax(logits_2, dim=-1))
        results_1 = [{"prediction": prediction, "label": label} for prediction, label in zip(predictions_1, labels_1)]
        results_2 = [{"prediction": prediction, "label": label} for prediction, label in zip(predictions_2, labels_2)]
        return {"results_1": results_1, "results_2": results_2}





    # PyTorch의 Tensor 객체를 NumPy 객체로 변환
    def tensor_to_array(self, tensor):
        return tensor.detach().cpu().numpy()

    # PyTorch의 Tensor 객체를 Python의 리스트(list) 자료형으로 변환
    def tensor_to_list(self, tensor):
        return self.tensor_to_array(tensor).tolist()

In [14]:
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

def cal_running_avg_loss(loss, running_avg_loss, decay=0.99):
    if running_avg_loss == 0:
        return loss
    running_avg_loss = running_avg_loss * decay + (1 - decay) * loss
    return running_avg_loss

### 모델 학습 및 평가 라이브러리


In [15]:
from functools import partial
import sklearn.metrics as sklearn_metrics

"""binary_metrics = {
    "accuracy": sklearn_metrics.accuracy_score,
    "precision": sklearn_metrics.precision_score, # TP / (TP + FP)
    "recall": sklearn_metrics.recall_score, # recall = sensitivity (민감도)
    "f1": sklearn_metrics.f1_score,
    "matthews_corrcoef": sklearn_metrics.matthews_corrcoef,
    "roc_auc": sklearn_metrics.roc_auc_score,
}""" # 우리는 두가지 task 다 다중분류임으로 안씀

metrics = {
    "accuracy": sklearn_metrics.accuracy_score,
    "f1-macro": partial(sklearn_metrics.f1_score, average="macro"),
}


def eval_cls(results_1, results_2, **kwargs):
    predictions_1 = np.array([result["prediction"] for result in results_1])
    labels_1 = np.array([result["label"] for result in results_1])
    predictions_2 = np.array([result["prediction"] for result in results_2])
    labels_2 = np.array([result["label"] for result in results_2])
    results_1 = {
        metric: round(f(labels_1, predictions_1) * 100, 2)
        for metric, f in metrics.items()
    }
    results_2 = {
        metric: round(f(labels_2, predictions_2) * 100, 2)
        for metric, f in metrics.items()
    }

    return {
        "results_1": results_1,
        "results_2": results_2,
        "best_score_1": results_1["f1-macro"],
        "best_score_2": results_2["f1-macro"],
    }


### Epoch 동안 학습 및 평가를 수행하는 함수 정의

In [16]:
def _run_epoch(model, loader, device=None, context=None, **kwargs):
    config = kwargs["config"]
    is_train = kwargs["is_train"]

    avg_loss = 0
    results = []
    batch_num = len(loader)

    if is_train:
        model.train()
        if config.use_tpu:
            optimizer = context.getattr_or("optimizer", lambda: model.get_optimizer())
            scheduler = context.getattr_or("scheduler", lambda: model.get_scheduler(batch_num, optimizer))
        else:
            optimizer = kwargs["optimizer"]
            scheduler = kwargs["scheduler"]
    else:
        model.eval()

    is_master = True

    pbar = tqdm(enumerate(loader), total=batch_num, disable=not is_master, dynamic_ncols=True, position=0, leave=True)

    corrected_1 = 0
    corrected_2 = 0
    total = 0

    for i, (inputs, labels) in pbar:
        # inputs: {"input_ids": [batch_size(4), seq_len, 768], "attention_mask": [batch_size(4), seq_len, 768]}
        # labels: {"preference": [batch_size(4), 1], "slang": [batch_size(4), 1], "politic": [batch_size(4), 1]}
        if not config.use_tpu:
            # (k, v) => ("input_ids", value)
            # (k, v) => ("attention_mask", value)
            for k, v in inputs.items():
                if isinstance(v, torch.Tensor):
                    inputs[k] = v.to(device)
            for k, v in labels.items():
                if isinstance(v, torch.Tensor):
                    labels[k] = v.to(device)

        outputs = model(inputs)


        outputs_1 = outputs[0]
        outputs_2 = outputs[1]

        labels_1 = labels["politic"]
        labels_2 = labels["government"]

        loss_function_1 = nn.CrossEntropyLoss()
        loss_1 = loss_function_1(outputs_1, labels_1)

        total += outputs_1.shape[0]

        _, predicted_1 = outputs_1.max(1)
        corrected_1 += predicted_1.eq(labels_1).sum().item()

        loss_function_2 = nn.CrossEntropyLoss()
        loss_2 = loss_function_2(outputs_2, labels_2)

        _, predicted_2 = outputs_2.max(1)
        corrected_2 += predicted_2.eq(labels_2).sum().item()

        w_1 = 1
        w_2 = 1
        loss = w_1 * loss_1 + w_2 * loss_2

        avg_loss = cal_running_avg_loss(loss.item(), avg_loss)
        loss /= config.gradient_accumulation_steps

        if is_train:
            loss.backward()
            if i % config.gradient_accumulation_steps == 0 or i == batch_num - 1:
                if config.max_grad_norm > 0:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm)

                optimizer.step()
                optimizer.zero_grad()

                if scheduler is not None:
                    scheduler.step()
        else:
            result = (model.module if hasattr(model, "module") else model).eval_step(inputs, labels, outputs)
            results.append(result)

        if is_master:
            pbar.set_description(f"epoch: {kwargs['epoch'] + 1}, {('train' if is_train else 'valid')} loss: {min(100, round(avg_loss, 4))}")

    return {
        "loss": avg_loss,
        "result": results,
    }


# 학습 코드에서 호출하는 함수
def run_epoch(**kwargs):
    model = kwargs.pop("model")
    if kwargs["config"].use_tpu:
        results = model(_run_epoch, **kwargs)
    else:
        results = _run_epoch(model, **kwargs)

    if isinstance(results, list):
        loss = sum([result["loss"] for result in results]) / len(results)
        result = []
        for res in results:
            result.extend(res["result"])
        results = {"loss": loss, "result": result}

    return results

### 딥러닝 모델 초기화 및 설정

In [17]:
# 현재 모델 이름이 "monologg/kobigbird-bert-base" 이므로, Hugging Face에서 찾아서 불러옴
set_seed(config.seed)

# 딥러닝 모델 초기화
model = ClsModel()

print(f"configuration: {str(config)}")

if torch.cuda.is_available(): # GPU를 사용할 수 있다면
    gpu_count = torch.cuda.device_count()
    print(f"{gpu_count} GPU device detected")
    devices = ["cuda:{}".format(i) for i in range(gpu_count)]
    model_dp = torch.nn.DataParallel(model, device_ids=devices)
    model.to(devices[0])
else: # GPU를 사용할 수 없다면 CPU로 구동
    devices = ["cpu"]
    model_dp = model

# 학습 결과를 저장하기 위한 폴더 만들기
if not os.path.exists(config.cache_dir):
    os.makedirs(config.cache_dir)

output_dir = os.path.join(config.output_dir, config.task, config.dataset)
print("Output directory:", output_dir)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 실제 학습을 위해 optimizer 및 scheduler 초기화
optimizer = None
scheduler = None
if config.do_train: # 학습 모드(train mode)인 경우
    optimizer = model.get_optimizer()
    scheduler = model.get_scheduler(len(train_dataloader), optimizer)

params = {
    "config": config,
    "model": model_dp,
    "optimizer": optimizer,
    "scheduler": scheduler,
}
if not config.use_tpu:
    params["device"] = devices[0]

Downloading model.safetensors:   0%|          | 0.00/458M [00:00<?, ?B/s]

configuration: namespace(task='cls', dataset='comment', cache_dir='cache', output_dir='output', use_tpu=False, model_name_or_path='monologg/kobigbird-bert-base', data_dir='./', train_file='joongang.csv', predict_file='joongang.csv', max_seq_length=2048, train_batch_size=4, eval_batch_size=2, learning_rate=3e-05, num_train_epochs=10, num_labels=5, gradient_accumulation_steps=2, threads=4, seed=42, do_train=True, do_eval_during_train=True, do_eval=True, do_lower_case=False, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, warmup_proportion=0.0)
1 GPU device detected
Output directory: output/cls/comment


In [18]:
def do_eval(epoch):
    with torch.no_grad():
        results = run_epoch(loader=predict_dataloader, epoch=epoch, is_train=False, **params)["result"]
        print(results)
        results_1 = [result['results_1'] for result in results]
        results_2 = [result['results_2'] for result in results]

        results_1 = [item for sublist in results_1 for item in sublist]
        results_2 = [item for sublist in results_2 for item in sublist]

        eval_results = eval_cls(
            config=config,
            model=model,
            loader=predict_dataloader,
            tokenizer=model.tokenizer,
            results_1=results_1,
            results_2=results_2,
        )

    print("Eval results for output 1.")
    for k, v in eval_results["results_1"].items():
        print(f"{k} : {v}")

    print("Eval results for output 2.")
    for k, v in eval_results["results_2"].items():
        print(f"{k} : {v}")

    return eval_results["best_score_1"], eval_results["best_score_2"]

train_losses = []
val_accuracies = []
if config.do_train:
    best_score = (0, 0)
    for epoch in range(config.num_train_epochs):
        train_results = run_epoch(loader=train_dataloader, epoch=epoch, is_train=True, **params)
        train_loss = train_results['loss']
        train_losses.append(train_loss)

        if config.do_eval_during_train:
            score1, score2 = do_eval(epoch)
            val_accuracies.append((score1, score2))

            if score1 >= best_score[0] and score2 >= best_score[1]:
                best_score = (score1, score2)
                output_dir = os.path.join(config.output_dir, config.task, config.dataset, f"{epoch}-{best_score[0]}-{best_score[1]}-ckpt")
                copy.deepcopy(
                    model_dp.module
                    if hasattr(model_dp, "module")
                    else model_dp._models[0]
                    if hasattr(model_dp, "_models")
                    else model_dp
                ).cpu().save_pretrained(output_dir)
                with open(os.path.join(output_dir, "finetune_config.json"), "w") as save_config:
                    json.dump(vars(config), save_config, sort_keys=True, indent=4)
                print(f"Checkpoint {output_dir} saved.")


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9339:   1%|          | 1/100 [00:04<07:39,  4.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9312:   2%|▏         | 2/100 [00:05<04:10,  2.56s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9289:   3%|▎         | 3/100 [00:06<03:05,  1.91s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9255:   4%|▍         | 4/100 [00:07<02:32,  1.59s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9261:   5%|▌         | 5/100 [00:09<02:15,  1.43s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9224:   6%|▌         | 6/100 [00:10<02:03,  1.31s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9248:   7%|▋         | 7/100 [00:11<01:56,  1.25s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.916:   8%|▊         | 8/100 [00:12<01:50,  1.20s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9096:   9%|▉         | 9/100 [00:13<01:47,  1.18s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8987:  10%|█         | 10/100 [00:14<01:43,  1.15s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9018:  11%|█         | 11/100 [00:15<01:41,  1.15s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8985:  12%|█▏        | 12/100 [00:16<01:39,  1.13s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8898:  13%|█▎        | 13/100 [00:17<01:38,  1.13s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8823:  14%|█▍        | 14/100 [00:19<01:36,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8889:  15%|█▌        | 15/100 [00:20<01:35,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8766:  16%|█▌        | 16/100 [00:21<01:33,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8775:  17%|█▋        | 17/100 [00:22<01:32,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8697:  18%|█▊        | 18/100 [00:23<01:31,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8728:  19%|█▉        | 19/100 [00:24<01:30,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.871:  20%|██        | 20/100 [00:25<01:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8639:  21%|██        | 21/100 [00:26<01:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8595:  22%|██▏       | 22/100 [00:27<01:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.853:  23%|██▎       | 23/100 [00:29<01:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8589:  24%|██▍       | 24/100 [00:30<01:24,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8501:  25%|██▌       | 25/100 [00:31<01:23,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8446:  26%|██▌       | 26/100 [00:32<01:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8412:  27%|██▋       | 27/100 [00:33<01:21,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8306:  28%|██▊       | 28/100 [00:34<01:20,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8197:  29%|██▉       | 29/100 [00:35<01:19,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8106:  30%|███       | 30/100 [00:36<01:18,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8103:  31%|███       | 31/100 [00:38<01:17,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8044:  32%|███▏      | 32/100 [00:39<01:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7993:  33%|███▎      | 33/100 [00:40<01:14,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7929:  34%|███▍      | 34/100 [00:41<01:13,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7924:  35%|███▌      | 35/100 [00:42<01:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7814:  36%|███▌      | 36/100 [00:43<01:10,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7722:  37%|███▋      | 37/100 [00:44<01:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7712:  38%|███▊      | 38/100 [00:45<01:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7613:  39%|███▉      | 39/100 [00:46<01:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7506:  40%|████      | 40/100 [00:47<01:06,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.744:  41%|████      | 41/100 [00:49<01:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7407:  42%|████▏     | 42/100 [00:50<01:03,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7378:  43%|████▎     | 43/100 [00:51<01:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7321:  44%|████▍     | 44/100 [00:52<01:01,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7179:  45%|████▌     | 45/100 [00:53<01:01,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7037:  46%|████▌     | 46/100 [00:54<00:59,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.695:  47%|████▋     | 47/100 [00:55<00:58,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6819:  48%|████▊     | 48/100 [00:56<00:57,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6696:  49%|████▉     | 49/100 [00:57<00:56,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6728:  50%|█████     | 50/100 [00:59<00:55,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6626:  51%|█████     | 51/100 [01:00<00:55,  1.13s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6595:  52%|█████▏    | 52/100 [01:01<00:53,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6598:  53%|█████▎    | 53/100 [01:02<00:52,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6455:  54%|█████▍    | 54/100 [01:03<00:51,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6308:  55%|█████▌    | 55/100 [01:04<00:50,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.633:  56%|█████▌    | 56/100 [01:05<00:48,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6213:  57%|█████▋    | 57/100 [01:06<00:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6061:  58%|█████▊    | 58/100 [01:08<00:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6029:  59%|█████▉    | 59/100 [01:09<00:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5978:  60%|██████    | 60/100 [01:10<00:44,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5874:  61%|██████    | 61/100 [01:11<00:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5791:  62%|██████▏   | 62/100 [01:12<00:41,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5673:  63%|██████▎   | 63/100 [01:13<00:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5522:  64%|██████▍   | 64/100 [01:14<00:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5499:  65%|██████▌   | 65/100 [01:15<00:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5492:  66%|██████▌   | 66/100 [01:16<00:37,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5475:  67%|██████▋   | 67/100 [01:18<00:36,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5421:  68%|██████▊   | 68/100 [01:19<00:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5417:  69%|██████▉   | 69/100 [01:20<00:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5302:  70%|███████   | 70/100 [01:21<00:33,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5271:  71%|███████   | 71/100 [01:22<00:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5217:  72%|███████▏  | 72/100 [01:23<00:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.513:  73%|███████▎  | 73/100 [01:24<00:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5057:  74%|███████▍  | 74/100 [01:25<00:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5139:  75%|███████▌  | 75/100 [01:26<00:27,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5017:  76%|███████▌  | 76/100 [01:27<00:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4868:  77%|███████▋  | 77/100 [01:29<00:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4842:  78%|███████▊  | 78/100 [01:30<00:24,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4765:  79%|███████▉  | 79/100 [01:31<00:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4752:  80%|████████  | 80/100 [01:32<00:22,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4677:  81%|████████  | 81/100 [01:33<00:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4585:  82%|████████▏ | 82/100 [01:34<00:19,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4584:  83%|████████▎ | 83/100 [01:35<00:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4484:  84%|████████▍ | 84/100 [01:36<00:17,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4426:  85%|████████▌ | 85/100 [01:37<00:16,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4432:  86%|████████▌ | 86/100 [01:39<00:15,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4358:  87%|████████▋ | 87/100 [01:40<00:14,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4271:  88%|████████▊ | 88/100 [01:41<00:13,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4279:  89%|████████▉ | 89/100 [01:42<00:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4254:  90%|█████████ | 90/100 [01:43<00:11,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4167:  91%|█████████ | 91/100 [01:44<00:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.401:  92%|█████████▏| 92/100 [01:45<00:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4068:  93%|█████████▎| 93/100 [01:46<00:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3944:  94%|█████████▍| 94/100 [01:47<00:06,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3894:  95%|█████████▌| 95/100 [01:49<00:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3785:  96%|█████████▌| 96/100 [01:50<00:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3709:  97%|█████████▋| 97/100 [01:51<00:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3709:  98%|█████████▊| 98/100 [01:52<00:02,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3637:  99%|█████████▉| 99/100 [01:53<00:01,  1.11s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 1, train loss: 2.3486: 100%|██████████| 100/100 [01:54<00:00,  1.14s/it]
epoch: 1, valid loss: 1.0083:   4%|▍         | 2/51 [00:00<00:09,  5.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0243:   8%|▊         | 4/51 [00:00<00:08,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0394:  12%|█▏        | 6/51 [00:01<00:08,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0647:  16%|█▌        | 8/51 [00:01<00:07,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0862:  20%|█▉        | 10/51 [00:01<00:07,  5.57it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0884:  24%|██▎       | 12/51 [00:02<00:06,  5.58it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1128:  27%|██▋       | 14/51 [00:02<00:06,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1148:  31%|███▏      | 16/51 [00:02<00:06,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1443:  35%|███▌      | 18/51 [00:03<00:05,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1537:  39%|███▉      | 20/51 [00:03<00:05,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1583:  43%|████▎     | 22/51 [00:03<00:05,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1954:  47%|████▋     | 24/51 [00:04<00:04,  5.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2216:  51%|█████     | 26/51 [00:04<00:04,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2441:  55%|█████▍    | 28/51 [00:05<00:04,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2613:  59%|█████▉    | 30/51 [00:05<00:03,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2987:  63%|██████▎   | 32/51 [00:05<00:03,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3012:  67%|██████▋   | 34/51 [00:06<00:03,  5.57it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3056:  71%|███████   | 36/51 [00:06<00:02,  5.56it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3182:  75%|███████▍  | 38/51 [00:06<00:02,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3229:  78%|███████▊  | 40/51 [00:07<00:01,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3219:  82%|████████▏ | 42/51 [00:07<00:01,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3152:  86%|████████▋ | 44/51 [00:07<00:01,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3205:  90%|█████████ | 46/51 [00:08<00:00,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3122:  94%|█████████▍| 48/51 [00:08<00:00,  5.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3147:  98%|█████████▊| 50/51 [00:09<00:00,  5.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3121: 100%|██████████| 51/51 [00:09<00:00,  5.51it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2155:   1%|          | 1/100 [00:01<01:50,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2184:   2%|▏         | 2/100 [00:02<01:47,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2172:   3%|▎         | 3/100 [00:03<01:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2189:   4%|▍         | 4/100 [00:04<01:45,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2243:   5%|▌         | 5/100 [00:05<01:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2181:   6%|▌         | 6/100 [00:06<01:43,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2365:   7%|▋         | 7/100 [00:07<01:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2465:   8%|▊         | 8/100 [00:08<01:41,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2473:   9%|▉         | 9/100 [00:09<01:40,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2509:  10%|█         | 10/100 [00:11<01:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2619:  11%|█         | 11/100 [00:12<01:39,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2677:  12%|█▏        | 12/100 [00:13<01:37,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2766:  13%|█▎        | 13/100 [00:14<01:37,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2749:  14%|█▍        | 14/100 [00:15<01:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2743:  15%|█▌        | 15/100 [00:16<01:34,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2913:  16%|█▌        | 16/100 [00:17<01:33,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2898:  17%|█▋        | 17/100 [00:18<01:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2859:  18%|█▊        | 18/100 [00:19<01:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2866:  19%|█▉        | 19/100 [00:21<01:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2906:  20%|██        | 20/100 [00:22<01:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2983:  21%|██        | 21/100 [00:23<01:28,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3004:  22%|██▏       | 22/100 [00:24<01:27,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3007:  23%|██▎       | 23/100 [00:25<01:26,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2988:  24%|██▍       | 24/100 [00:26<01:24,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2975:  25%|██▌       | 25/100 [00:27<01:23,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3107:  26%|██▌       | 26/100 [00:28<01:22,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3055:  27%|██▋       | 27/100 [00:30<01:21,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3083:  28%|██▊       | 28/100 [00:31<01:20,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.311:  29%|██▉       | 29/100 [00:32<01:19,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3137:  30%|███       | 30/100 [00:33<01:17,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.328:  31%|███       | 31/100 [00:34<01:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3341:  32%|███▏      | 32/100 [00:35<01:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3407:  33%|███▎      | 33/100 [00:36<01:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.352:  34%|███▍      | 34/100 [00:37<01:12,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3658:  35%|███▌      | 35/100 [00:38<01:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3655:  36%|███▌      | 36/100 [00:39<01:10,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3648:  37%|███▋      | 37/100 [00:41<01:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3605:  38%|███▊      | 38/100 [00:42<01:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3612:  39%|███▉      | 39/100 [00:43<01:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3707:  40%|████      | 40/100 [00:44<01:05,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3675:  41%|████      | 41/100 [00:45<01:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3693:  42%|████▏     | 42/100 [00:46<01:03,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3798:  43%|████▎     | 43/100 [00:47<01:02,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3861:  44%|████▍     | 44/100 [00:48<01:01,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3824:  45%|████▌     | 45/100 [00:49<01:00,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3932:  46%|████▌     | 46/100 [00:50<00:59,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3865:  47%|████▋     | 47/100 [00:52<00:58,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3833:  48%|████▊     | 48/100 [00:53<00:57,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3826:  49%|████▉     | 49/100 [00:54<00:56,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3926:  50%|█████     | 50/100 [00:55<00:54,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3851:  51%|█████     | 51/100 [00:56<00:54,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3879:  52%|█████▏    | 52/100 [00:57<00:52,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3844:  53%|█████▎    | 53/100 [00:58<00:51,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3811:  54%|█████▍    | 54/100 [00:59<00:50,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3777:  55%|█████▌    | 55/100 [01:00<00:49,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3796:  56%|█████▌    | 56/100 [01:02<00:48,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3764:  57%|█████▋    | 57/100 [01:03<00:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3876:  58%|█████▊    | 58/100 [01:04<00:46,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3803:  59%|█████▉    | 59/100 [01:05<00:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.374:  60%|██████    | 60/100 [01:06<00:43,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3723:  61%|██████    | 61/100 [01:07<00:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3743:  62%|██████▏   | 62/100 [01:08<00:41,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3704:  63%|██████▎   | 63/100 [01:09<00:40,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3682:  64%|██████▍   | 64/100 [01:10<00:39,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3696:  65%|██████▌   | 65/100 [01:11<00:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3641:  66%|██████▌   | 66/100 [01:13<00:37,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3627:  67%|██████▋   | 67/100 [01:14<00:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3717:  68%|██████▊   | 68/100 [01:15<00:35,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3757:  69%|██████▉   | 69/100 [01:16<00:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3702:  70%|███████   | 70/100 [01:17<00:33,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3715:  71%|███████   | 71/100 [01:18<00:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3719:  72%|███████▏  | 72/100 [01:19<00:30,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.371:  73%|███████▎  | 73/100 [01:20<00:29,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3827:  74%|███████▍  | 74/100 [01:21<00:28,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3789:  75%|███████▌  | 75/100 [01:22<00:27,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3758:  76%|███████▌  | 76/100 [01:24<00:26,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.378:  77%|███████▋  | 77/100 [01:25<00:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3871:  78%|███████▊  | 78/100 [01:26<00:24,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3963:  79%|███████▉  | 79/100 [01:27<00:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4029:  80%|████████  | 80/100 [01:28<00:22,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4107:  81%|████████  | 81/100 [01:29<00:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4142:  82%|████████▏ | 82/100 [01:30<00:19,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4164:  83%|████████▎ | 83/100 [01:31<00:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4139:  84%|████████▍ | 84/100 [01:32<00:17,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4172:  85%|████████▌ | 85/100 [01:34<00:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4137:  86%|████████▌ | 86/100 [01:35<00:15,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4222:  87%|████████▋ | 87/100 [01:36<00:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.421:  88%|████████▊ | 88/100 [01:37<00:13,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.431:  89%|████████▉ | 89/100 [01:38<00:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4376:  90%|█████████ | 90/100 [01:39<00:11,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4358:  91%|█████████ | 91/100 [01:40<00:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4362:  92%|█████████▏| 92/100 [01:41<00:08,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4272:  93%|█████████▎| 93/100 [01:42<00:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4215:  94%|█████████▍| 94/100 [01:43<00:06,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4298:  95%|█████████▌| 95/100 [01:45<00:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4363:  96%|█████████▌| 96/100 [01:46<00:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4418:  97%|█████████▋| 97/100 [01:47<00:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4405:  98%|█████████▊| 98/100 [01:48<00:02,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4402:  99%|█████████▉| 99/100 [01:49<00:01,  1.11s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 2, train loss: 1.4386: 100%|██████████| 100/100 [01:50<00:00,  1.10s/it]
epoch: 2, valid loss: 1.3125:   4%|▍         | 2/51 [00:00<00:08,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3085:   8%|▊         | 4/51 [00:00<00:08,  5.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3097:  12%|█▏        | 6/51 [00:01<00:08,  5.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3058:  16%|█▌        | 8/51 [00:01<00:07,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3158:  20%|█▉        | 10/51 [00:01<00:07,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3097:  24%|██▎       | 12/51 [00:02<00:07,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3111:  27%|██▋       | 14/51 [00:02<00:06,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3055:  31%|███▏      | 16/51 [00:02<00:06,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3129:  35%|███▌      | 18/51 [00:03<00:05,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3071:  39%|███▉      | 20/51 [00:03<00:05,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3005:  43%|████▎     | 22/51 [00:04<00:05,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3231:  47%|████▋     | 24/51 [00:04<00:04,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3379:  51%|█████     | 26/51 [00:04<00:04,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3467:  55%|█████▍    | 28/51 [00:05<00:04,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3679:  59%|█████▉    | 30/51 [00:05<00:03,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3717:  63%|██████▎   | 32/51 [00:05<00:03,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3684:  67%|██████▋   | 34/51 [00:06<00:03,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3685:  71%|███████   | 36/51 [00:06<00:02,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3642:  75%|███████▍  | 38/51 [00:06<00:02,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3538:  78%|███████▊  | 40/51 [00:07<00:01,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3394:  82%|████████▏ | 42/51 [00:07<00:01,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.322:  86%|████████▋ | 44/51 [00:08<00:01,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3118:  90%|█████████ | 46/51 [00:08<00:00,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3062:  94%|█████████▍| 48/51 [00:08<00:00,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2992:  98%|█████████▊| 50/51 [00:09<00:00,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.293: 100%|██████████| 51/51 [00:09<00:00,  5.49it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7315:   1%|          | 1/100 [00:01<01:50,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7365:   2%|▏         | 2/100 [00:02<01:48,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7306:   3%|▎         | 3/100 [00:03<01:48,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7299:   4%|▍         | 4/100 [00:04<01:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7244:   5%|▌         | 5/100 [00:05<01:46,  1.13s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7198:   6%|▌         | 6/100 [00:06<01:44,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7103:   7%|▋         | 7/100 [00:07<01:43,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.7017:   8%|▊         | 8/100 [00:08<01:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6922:   9%|▉         | 9/100 [00:10<01:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6879:  10%|█         | 10/100 [00:11<01:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6793:  11%|█         | 11/100 [00:12<01:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6768:  12%|█▏        | 12/100 [00:13<01:37,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6756:  13%|█▎        | 13/100 [00:14<01:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6697:  14%|█▍        | 14/100 [00:15<01:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6613:  15%|█▌        | 15/100 [00:16<01:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6566:  16%|█▌        | 16/100 [00:17<01:33,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6469:  17%|█▋        | 17/100 [00:18<01:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6479:  18%|█▊        | 18/100 [00:19<01:30,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6454:  19%|█▉        | 19/100 [00:21<01:29,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6442:  20%|██        | 20/100 [00:22<01:28,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6363:  21%|██        | 21/100 [00:23<01:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6262:  22%|██▏       | 22/100 [00:24<01:25,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6253:  23%|██▎       | 23/100 [00:25<01:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6291:  24%|██▍       | 24/100 [00:26<01:23,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6186:  25%|██▌       | 25/100 [00:27<01:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6107:  26%|██▌       | 26/100 [00:28<01:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6003:  27%|██▋       | 27/100 [00:29<01:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5962:  28%|██▊       | 28/100 [00:31<01:19,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5937:  29%|██▉       | 29/100 [00:32<01:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5891:  30%|███       | 30/100 [00:33<01:17,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5774:  31%|███       | 31/100 [00:34<01:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5678:  32%|███▏      | 32/100 [00:35<01:14,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5596:  33%|███▎      | 33/100 [00:36<01:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5592:  34%|███▍      | 34/100 [00:37<01:12,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5521:  35%|███▌      | 35/100 [00:38<01:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5456:  36%|███▌      | 36/100 [00:39<01:10,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.547:  37%|███▋      | 37/100 [00:41<01:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5424:  38%|███▊      | 38/100 [00:42<01:08,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.54:  39%|███▉      | 39/100 [00:43<01:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5386:  40%|████      | 40/100 [00:44<01:06,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5353:  41%|████      | 41/100 [00:45<01:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.532:  42%|████▏     | 42/100 [00:46<01:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5257:  43%|████▎     | 43/100 [00:47<01:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.522:  44%|████▍     | 44/100 [00:48<01:01,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5286:  45%|████▌     | 45/100 [00:49<01:01,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5239:  46%|████▌     | 46/100 [00:50<00:59,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5238:  47%|████▋     | 47/100 [00:52<00:58,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5168:  48%|████▊     | 48/100 [00:53<00:57,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5081:  49%|████▉     | 49/100 [00:54<00:56,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4988:  50%|█████     | 50/100 [00:55<00:55,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4953:  51%|█████     | 51/100 [00:56<00:54,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.494:  52%|█████▏    | 52/100 [00:57<00:53,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4904:  53%|█████▎    | 53/100 [00:58<00:52,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4854:  54%|█████▍    | 54/100 [00:59<00:51,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4852:  55%|█████▌    | 55/100 [01:00<00:50,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4745:  56%|█████▌    | 56/100 [01:02<00:48,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4633:  57%|█████▋    | 57/100 [01:03<00:48,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4623:  58%|█████▊    | 58/100 [01:04<00:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4634:  59%|█████▉    | 59/100 [01:05<00:45,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4626:  60%|██████    | 60/100 [01:06<00:44,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4594:  61%|██████    | 61/100 [01:07<00:43,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4511:  62%|██████▏   | 62/100 [01:08<00:42,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.448:  63%|██████▎   | 63/100 [01:09<00:41,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.442:  64%|██████▍   | 64/100 [01:10<00:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4325:  65%|██████▌   | 65/100 [01:12<00:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4226:  66%|██████▌   | 66/100 [01:13<00:37,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4172:  67%|██████▋   | 67/100 [01:14<00:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4106:  68%|██████▊   | 68/100 [01:15<00:35,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4388:  69%|██████▉   | 69/100 [01:16<00:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.438:  70%|███████   | 70/100 [01:17<00:33,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4523:  71%|███████   | 71/100 [01:18<00:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4516:  72%|███████▏  | 72/100 [01:19<00:30,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4625:  73%|███████▎  | 73/100 [01:20<00:30,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4507:  74%|███████▍  | 74/100 [01:22<00:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4481:  75%|███████▌  | 75/100 [01:23<00:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4449:  76%|███████▌  | 76/100 [01:24<00:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4404:  77%|███████▋  | 77/100 [01:25<00:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4406:  78%|███████▊  | 78/100 [01:26<00:24,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4347:  79%|███████▉  | 79/100 [01:27<00:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.432:  80%|████████  | 80/100 [01:28<00:22,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4322:  81%|████████  | 81/100 [01:29<00:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4419:  82%|████████▏ | 82/100 [01:30<00:19,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4366:  83%|████████▎ | 83/100 [01:32<00:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4343:  84%|████████▍ | 84/100 [01:33<00:17,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4444:  85%|████████▌ | 85/100 [01:34<00:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4401:  86%|████████▌ | 86/100 [01:35<00:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4385:  87%|████████▋ | 87/100 [01:36<00:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4322:  88%|████████▊ | 88/100 [01:37<00:13,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4405:  89%|████████▉ | 89/100 [01:38<00:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4318:  90%|█████████ | 90/100 [01:39<00:11,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4267:  91%|█████████ | 91/100 [01:40<00:10,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.423:  92%|█████████▏| 92/100 [01:42<00:08,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4163:  93%|█████████▎| 93/100 [01:43<00:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4089:  94%|█████████▍| 94/100 [01:44<00:06,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4034:  95%|█████████▌| 95/100 [01:45<00:05,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4051:  96%|█████████▌| 96/100 [01:46<00:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3973:  97%|█████████▋| 97/100 [01:47<00:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3883:  98%|█████████▊| 98/100 [01:48<00:02,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.388:  99%|█████████▉| 99/100 [01:49<00:01,  1.11s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 3, train loss: 1.378: 100%|██████████| 100/100 [01:50<00:00,  1.11s/it]
epoch: 3, valid loss: 0.8139:   4%|▍         | 2/51 [00:00<00:08,  5.58it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8207:   8%|▊         | 4/51 [00:00<00:08,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8317:  12%|█▏        | 6/51 [00:01<00:08,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8409:  16%|█▌        | 8/51 [00:01<00:07,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8788:  20%|█▉        | 10/51 [00:01<00:07,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8744:  24%|██▎       | 12/51 [00:02<00:07,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8781:  27%|██▋       | 14/51 [00:02<00:06,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8836:  31%|███▏      | 16/51 [00:02<00:06,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8929:  35%|███▌      | 18/51 [00:03<00:05,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8951:  39%|███▉      | 20/51 [00:03<00:05,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8853:  43%|████▎     | 22/51 [00:03<00:05,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9092:  47%|████▋     | 24/51 [00:04<00:04,  5.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9188:  51%|█████     | 26/51 [00:04<00:04,  5.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9444:  55%|█████▍    | 28/51 [00:05<00:04,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9558:  59%|█████▉    | 30/51 [00:05<00:03,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9724:  63%|██████▎   | 32/51 [00:05<00:03,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9767:  67%|██████▋   | 34/51 [00:06<00:03,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9815:  71%|███████   | 36/51 [00:06<00:02,  5.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9916:  75%|███████▍  | 38/51 [00:06<00:02,  5.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9882:  78%|███████▊  | 40/51 [00:07<00:02,  5.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9782:  82%|████████▏ | 42/51 [00:07<00:01,  5.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9684:  86%|████████▋ | 44/51 [00:08<00:01,  5.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9683:  90%|█████████ | 46/51 [00:08<00:00,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9708:  94%|█████████▍| 48/51 [00:08<00:00,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.973:  98%|█████████▊| 50/51 [00:09<00:00,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9696: 100%|██████████| 51/51 [00:09<00:00,  5.47it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 0, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2443:   1%|          | 1/100 [00:01<01:51,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2351:   2%|▏         | 2/100 [00:02<01:47,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2346:   3%|▎         | 3/100 [00:03<01:48,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2255:   4%|▍         | 4/100 [00:04<01:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2182:   5%|▌         | 5/100 [00:05<01:46,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2278:   6%|▌         | 6/100 [00:06<01:44,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2277:   7%|▋         | 7/100 [00:07<01:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2263:   8%|▊         | 8/100 [00:08<01:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2234:   9%|▉         | 9/100 [00:10<01:41,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2215:  10%|█         | 10/100 [00:11<01:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2156:  11%|█         | 11/100 [00:12<01:39,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2098:  12%|█▏        | 12/100 [00:13<01:37,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2044:  13%|█▎        | 13/100 [00:14<01:37,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1964:  14%|█▍        | 14/100 [00:15<01:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1962:  15%|█▌        | 15/100 [00:16<01:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1902:  16%|█▌        | 16/100 [00:17<01:32,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1822:  17%|█▋        | 17/100 [00:18<01:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1769:  18%|█▊        | 18/100 [00:19<01:30,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1743:  19%|█▉        | 19/100 [00:21<01:29,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1783:  20%|██        | 20/100 [00:22<01:28,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1718:  21%|██        | 21/100 [00:23<01:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1634:  22%|██▏       | 22/100 [00:24<01:26,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1709:  23%|██▎       | 23/100 [00:25<01:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1686:  24%|██▍       | 24/100 [00:26<01:23,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1646:  25%|██▌       | 25/100 [00:27<01:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1624:  26%|██▌       | 26/100 [00:28<01:21,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1639:  27%|██▋       | 27/100 [00:29<01:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1566:  28%|██▊       | 28/100 [00:31<01:19,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1592:  29%|██▉       | 29/100 [00:32<01:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1572:  30%|███       | 30/100 [00:33<01:17,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.153:  31%|███       | 31/100 [00:34<01:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1472:  32%|███▏      | 32/100 [00:35<01:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1525:  33%|███▎      | 33/100 [00:36<01:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1573:  34%|███▍      | 34/100 [00:37<01:13,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1543:  35%|███▌      | 35/100 [00:38<01:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.148:  36%|███▌      | 36/100 [00:39<01:10,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1406:  37%|███▋      | 37/100 [00:41<01:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1386:  38%|███▊      | 38/100 [00:42<01:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1354:  39%|███▉      | 39/100 [00:43<01:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1313:  40%|████      | 40/100 [00:44<01:05,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1381:  41%|████      | 41/100 [00:45<01:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1349:  42%|████▏     | 42/100 [00:46<01:03,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1303:  43%|████▎     | 43/100 [00:47<01:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1334:  44%|████▍     | 44/100 [00:48<01:02,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1273:  45%|████▌     | 45/100 [00:49<01:01,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1197:  46%|████▌     | 46/100 [00:50<00:59,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1296:  47%|████▋     | 47/100 [00:52<00:58,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1247:  48%|████▊     | 48/100 [00:53<00:57,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1396:  49%|████▉     | 49/100 [00:54<00:56,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1309:  50%|█████     | 50/100 [00:55<00:55,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1257:  51%|█████     | 51/100 [00:56<00:54,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1199:  52%|█████▏    | 52/100 [00:57<00:53,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1154:  53%|█████▎    | 53/100 [00:58<00:52,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1074:  54%|█████▍    | 54/100 [00:59<00:50,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1021:  55%|█████▌    | 55/100 [01:00<00:50,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1121:  56%|█████▌    | 56/100 [01:02<00:48,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.114:  57%|█████▋    | 57/100 [01:03<00:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.114:  58%|█████▊    | 58/100 [01:04<00:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1123:  59%|█████▉    | 59/100 [01:05<00:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1067:  60%|██████    | 60/100 [01:06<00:44,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1017:  61%|██████    | 61/100 [01:07<00:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.101:  62%|██████▏   | 62/100 [01:08<00:41,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1015:  63%|██████▎   | 63/100 [01:09<00:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1031:  64%|██████▍   | 64/100 [01:10<00:39,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0967:  65%|██████▌   | 65/100 [01:12<00:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0896:  66%|██████▌   | 66/100 [01:13<00:37,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0842:  67%|██████▋   | 67/100 [01:14<00:36,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0829:  68%|██████▊   | 68/100 [01:15<00:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0899:  69%|██████▉   | 69/100 [01:16<00:34,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0813:  70%|███████   | 70/100 [01:17<00:33,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0824:  71%|███████   | 71/100 [01:18<00:32,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0757:  72%|███████▏  | 72/100 [01:19<00:31,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0798:  73%|███████▎  | 73/100 [01:20<00:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0804:  74%|███████▍  | 74/100 [01:22<00:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0774:  75%|███████▌  | 75/100 [01:23<00:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0823:  76%|███████▌  | 76/100 [01:24<00:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0822:  77%|███████▋  | 77/100 [01:25<00:25,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.074:  78%|███████▊  | 78/100 [01:26<00:24,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0663:  79%|███████▉  | 79/100 [01:27<00:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0585:  80%|████████  | 80/100 [01:28<00:22,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0499:  81%|████████  | 81/100 [01:29<00:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0433:  82%|████████▏ | 82/100 [01:30<00:19,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0354:  83%|████████▎ | 83/100 [01:32<00:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0317:  84%|████████▍ | 84/100 [01:33<00:17,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0352:  85%|████████▌ | 85/100 [01:34<00:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0269:  86%|████████▌ | 86/100 [01:35<00:15,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0297:  87%|████████▋ | 87/100 [01:36<00:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0254:  88%|████████▊ | 88/100 [01:37<00:13,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0207:  89%|████████▉ | 89/100 [01:38<00:12,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0175:  90%|█████████ | 90/100 [01:39<00:11,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0114:  91%|█████████ | 91/100 [01:40<00:10,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0034:  92%|█████████▏| 92/100 [01:42<00:08,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0003:  93%|█████████▎| 93/100 [01:43<00:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 0.9948:  94%|█████████▍| 94/100 [01:44<00:06,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.009:  95%|█████████▌| 95/100 [01:45<00:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0023:  96%|█████████▌| 96/100 [01:46<00:04,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.007:  97%|█████████▋| 97/100 [01:47<00:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0074:  98%|█████████▊| 98/100 [01:48<00:02,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.006:  99%|█████████▉| 99/100 [01:49<00:01,  1.11s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 4, train loss: 0.9988: 100%|██████████| 100/100 [01:50<00:00,  1.11s/it]
epoch: 4, valid loss: 0.6391:   4%|▍         | 2/51 [00:00<00:08,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.6525:   8%|▊         | 4/51 [00:00<00:08,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.6506:  12%|█▏        | 6/51 [00:01<00:08,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.6623:  16%|█▌        | 8/51 [00:01<00:07,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.7147:  20%|█▉        | 10/51 [00:01<00:07,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.7132:  24%|██▎       | 12/51 [00:02<00:07,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.7246:  27%|██▋       | 14/51 [00:02<00:06,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.7402:  31%|███▏      | 16/51 [00:02<00:06,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.7625:  35%|███▌      | 18/51 [00:03<00:05,  5.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.7657:  39%|███▉      | 20/51 [00:03<00:05,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.7556:  43%|████▎     | 22/51 [00:04<00:05,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.7855:  47%|████▋     | 24/51 [00:04<00:04,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8084:  51%|█████     | 26/51 [00:04<00:04,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8351:  55%|█████▍    | 28/51 [00:05<00:04,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.88:  59%|█████▉    | 30/51 [00:05<00:03,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8901:  63%|██████▎   | 32/51 [00:05<00:03,  5.31it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8976:  67%|██████▋   | 34/51 [00:06<00:03,  5.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9062:  71%|███████   | 36/51 [00:06<00:02,  5.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9294:  75%|███████▍  | 38/51 [00:06<00:02,  5.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9179:  78%|███████▊  | 40/51 [00:07<00:02,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.9038:  82%|████████▏ | 42/51 [00:07<00:01,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8951:  86%|████████▋ | 44/51 [00:08<00:01,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8922:  90%|█████████ | 46/51 [00:08<00:00,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8905:  94%|█████████▍| 48/51 [00:08<00:00,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8909:  98%|█████████▊| 50/51 [00:09<00:00,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 0.8846: 100%|██████████| 51/51 [00:09<00:00,  5.47it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2118:   1%|          | 1/100 [00:01<01:49,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2055:   2%|▏         | 2/100 [00:02<01:47,  1.09s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1977:   3%|▎         | 3/100 [00:03<01:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1964:   4%|▍         | 4/100 [00:04<01:45,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1897:   5%|▌         | 5/100 [00:05<01:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1837:   6%|▌         | 6/100 [00:06<01:43,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1738:   7%|▋         | 7/100 [00:07<01:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1654:   8%|▊         | 8/100 [00:08<01:41,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1602:   9%|▉         | 9/100 [00:09<01:40,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.151:  10%|█         | 10/100 [00:11<01:38,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1434:  11%|█         | 11/100 [00:12<01:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1376:  12%|█▏        | 12/100 [00:13<01:36,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1333:  13%|█▎        | 13/100 [00:14<01:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1276:  14%|█▍        | 14/100 [00:15<01:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1209:  15%|█▌        | 15/100 [00:16<01:34,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1157:  16%|█▌        | 16/100 [00:17<01:33,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1086:  17%|█▋        | 17/100 [00:18<01:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.102:  18%|█▊        | 18/100 [00:19<01:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0993:  19%|█▉        | 19/100 [00:21<01:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0896:  20%|██        | 20/100 [00:22<01:28,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0873:  21%|██        | 21/100 [00:23<01:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0814:  22%|██▏       | 22/100 [00:24<01:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0808:  23%|██▎       | 23/100 [00:25<01:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0799:  24%|██▍       | 24/100 [00:26<01:24,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0765:  25%|██▌       | 25/100 [00:27<01:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0707:  26%|██▌       | 26/100 [00:28<01:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0622:  27%|██▋       | 27/100 [00:29<01:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0627:  28%|██▊       | 28/100 [00:31<01:19,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0545:  29%|██▉       | 29/100 [00:32<01:19,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0532:  30%|███       | 30/100 [00:33<01:17,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.045:  31%|███       | 31/100 [00:34<01:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0375:  32%|███▏      | 32/100 [00:35<01:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0344:  33%|███▎      | 33/100 [00:36<01:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0271:  34%|███▍      | 34/100 [00:37<01:12,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0196:  35%|███▌      | 35/100 [00:38<01:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0138:  36%|███▌      | 36/100 [00:39<01:10,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0192:  37%|███▋      | 37/100 [00:41<01:10,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0183:  38%|███▊      | 38/100 [00:42<01:08,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0133:  39%|███▉      | 39/100 [00:43<01:08,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0064:  40%|████      | 40/100 [00:44<01:06,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9991:  41%|████      | 41/100 [00:45<01:05,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9916:  42%|████▏     | 42/100 [00:46<01:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9926:  43%|████▎     | 43/100 [00:47<01:03,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9876:  44%|████▍     | 44/100 [00:48<01:02,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9843:  45%|████▌     | 45/100 [00:49<01:01,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9837:  46%|████▌     | 46/100 [00:51<00:59,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9892:  47%|████▋     | 47/100 [00:52<00:58,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9874:  48%|████▊     | 48/100 [00:53<00:57,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9801:  49%|████▉     | 49/100 [00:54<00:56,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.973:  50%|█████     | 50/100 [00:55<00:55,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9758:  51%|█████     | 51/100 [00:56<00:54,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9722:  52%|█████▏    | 52/100 [00:57<00:53,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9764:  53%|█████▎    | 53/100 [00:58<00:52,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9711:  54%|█████▍    | 54/100 [00:59<00:50,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9639:  55%|█████▌    | 55/100 [01:01<00:49,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9559:  56%|█████▌    | 56/100 [01:02<00:48,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9538:  57%|█████▋    | 57/100 [01:03<00:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9504:  58%|█████▊    | 58/100 [01:04<00:46,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9466:  59%|█████▉    | 59/100 [01:05<00:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9472:  60%|██████    | 60/100 [01:06<00:44,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9426:  61%|██████    | 61/100 [01:07<00:43,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9362:  62%|██████▏   | 62/100 [01:08<00:42,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9322:  63%|██████▎   | 63/100 [01:09<00:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9283:  64%|██████▍   | 64/100 [01:10<00:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9241:  65%|██████▌   | 65/100 [01:12<00:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9199:  66%|██████▌   | 66/100 [01:13<00:37,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9129:  67%|██████▋   | 67/100 [01:14<00:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9096:  68%|██████▊   | 68/100 [01:15<00:35,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9197:  69%|██████▉   | 69/100 [01:16<00:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.913:  70%|███████   | 70/100 [01:17<00:33,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9096:  71%|███████   | 71/100 [01:18<00:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9111:  72%|███████▏  | 72/100 [01:19<00:30,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9105:  73%|███████▎  | 73/100 [01:20<00:29,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.903:  74%|███████▍  | 74/100 [01:22<00:28,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8953:  75%|███████▌  | 75/100 [01:23<00:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8884:  76%|███████▌  | 76/100 [01:24<00:26,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8941:  77%|███████▋  | 77/100 [01:25<00:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8864:  78%|███████▊  | 78/100 [01:26<00:24,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8854:  79%|███████▉  | 79/100 [01:27<00:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8899:  80%|████████  | 80/100 [01:28<00:22,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8942:  81%|████████  | 81/100 [01:29<00:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8865:  82%|████████▏ | 82/100 [01:30<00:19,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.885:  83%|████████▎ | 83/100 [01:32<00:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8778:  84%|████████▍ | 84/100 [01:33<00:17,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8754:  85%|████████▌ | 85/100 [01:34<00:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8686:  86%|████████▌ | 86/100 [01:35<00:15,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8684:  87%|████████▋ | 87/100 [01:36<00:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8618:  88%|████████▊ | 88/100 [01:37<00:13,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8661:  89%|████████▉ | 89/100 [01:38<00:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8668:  90%|█████████ | 90/100 [01:39<00:11,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8642:  91%|█████████ | 91/100 [01:40<00:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8653:  92%|█████████▏| 92/100 [01:41<00:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8712:  93%|█████████▎| 93/100 [01:43<00:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8684:  94%|█████████▍| 94/100 [01:44<00:06,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8653:  95%|█████████▌| 95/100 [01:45<00:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8589:  96%|█████████▌| 96/100 [01:46<00:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8554:  97%|█████████▋| 97/100 [01:47<00:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8519:  98%|█████████▊| 98/100 [01:48<00:02,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8456:  99%|█████████▉| 99/100 [01:49<00:01,  1.12s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 5, train loss: 0.8395: 100%|██████████| 100/100 [01:50<00:00,  1.11s/it]
epoch: 5, valid loss: 1.0137:   4%|▍         | 2/51 [00:00<00:09,  5.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0183:   8%|▊         | 4/51 [00:00<00:08,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0191:  12%|█▏        | 6/51 [00:01<00:08,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0241:  16%|█▌        | 8/51 [00:01<00:07,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0431:  20%|█▉        | 10/51 [00:01<00:07,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0383:  24%|██▎       | 12/51 [00:02<00:07,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0452:  27%|██▋       | 14/51 [00:02<00:06,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0503:  31%|███▏      | 16/51 [00:02<00:06,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0632:  35%|███▌      | 18/51 [00:03<00:06,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0544:  39%|███▉      | 20/51 [00:03<00:05,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0404:  43%|████▎     | 22/51 [00:04<00:05,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0735:  47%|████▋     | 24/51 [00:04<00:04,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.0988:  51%|█████     | 26/51 [00:04<00:04,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1202:  55%|█████▍    | 28/51 [00:05<00:04,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1678:  59%|█████▉    | 30/51 [00:05<00:03,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1653:  63%|██████▎   | 32/51 [00:05<00:03,  5.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1734:  67%|██████▋   | 34/51 [00:06<00:03,  5.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1663:  71%|███████   | 36/51 [00:06<00:02,  5.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.178:  75%|███████▍  | 38/51 [00:06<00:02,  5.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1611:  78%|███████▊  | 40/51 [00:07<00:02,  5.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1443:  82%|████████▏ | 42/51 [00:07<00:01,  5.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1292:  86%|████████▋ | 44/51 [00:08<00:01,  5.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1231:  90%|█████████ | 46/51 [00:08<00:00,  5.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1204:  94%|█████████▍| 48/51 [00:08<00:00,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1393:  98%|█████████▊| 50/51 [00:09<00:00,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.1349: 100%|██████████| 51/51 [00:09<00:00,  5.45it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4472:   1%|          | 1/100 [00:01<01:51,  1.13s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4546:   2%|▏         | 2/100 [00:02<01:48,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4526:   3%|▎         | 3/100 [00:03<01:48,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4531:   4%|▍         | 4/100 [00:04<01:45,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4513:   5%|▌         | 5/100 [00:05<01:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4534:   6%|▌         | 6/100 [00:06<01:43,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4542:   7%|▋         | 7/100 [00:07<01:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4515:   8%|▊         | 8/100 [00:08<01:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4493:   9%|▉         | 9/100 [00:09<01:41,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4486:  10%|█         | 10/100 [00:11<01:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4509:  11%|█         | 11/100 [00:12<01:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4514:  12%|█▏        | 12/100 [00:13<01:37,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4481:  13%|█▎        | 13/100 [00:14<01:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4463:  14%|█▍        | 14/100 [00:15<01:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4438:  15%|█▌        | 15/100 [00:16<01:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4491:  16%|█▌        | 16/100 [00:17<01:33,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4459:  17%|█▋        | 17/100 [00:18<01:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4449:  18%|█▊        | 18/100 [00:19<01:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4479:  19%|█▉        | 19/100 [00:21<01:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.445:  20%|██        | 20/100 [00:22<01:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4478:  21%|██        | 21/100 [00:23<01:28,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.445:  22%|██▏       | 22/100 [00:24<01:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4525:  23%|██▎       | 23/100 [00:25<01:25,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.451:  24%|██▍       | 24/100 [00:26<01:24,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4526:  25%|██▌       | 25/100 [00:27<01:23,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4628:  26%|██▌       | 26/100 [00:28<01:22,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4696:  27%|██▋       | 27/100 [00:30<01:21,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4691:  28%|██▊       | 28/100 [00:31<01:19,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4667:  29%|██▉       | 29/100 [00:32<01:19,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4631:  30%|███       | 30/100 [00:33<01:17,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4613:  31%|███       | 31/100 [00:34<01:17,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4577:  32%|███▏      | 32/100 [00:35<01:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4596:  33%|███▎      | 33/100 [00:36<01:15,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4666:  34%|███▍      | 34/100 [00:37<01:13,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4655:  35%|███▌      | 35/100 [00:38<01:12,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4645:  36%|███▌      | 36/100 [00:40<01:11,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4677:  37%|███▋      | 37/100 [00:41<01:10,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4649:  38%|███▊      | 38/100 [00:42<01:08,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4656:  39%|███▉      | 39/100 [00:43<01:08,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4622:  40%|████      | 40/100 [00:44<01:06,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4592:  41%|████      | 41/100 [00:45<01:06,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4567:  42%|████▏     | 42/100 [00:46<01:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4531:  43%|████▎     | 43/100 [00:47<01:04,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4503:  44%|████▍     | 44/100 [00:48<01:02,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4475:  45%|████▌     | 45/100 [00:50<01:01,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4579:  46%|████▌     | 46/100 [00:51<00:59,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.455:  47%|████▋     | 47/100 [00:52<00:59,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4523:  48%|████▊     | 48/100 [00:53<00:57,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4576:  49%|████▉     | 49/100 [00:54<00:56,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4548:  50%|█████     | 50/100 [00:55<00:55,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4574:  51%|█████     | 51/100 [00:56<00:54,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4609:  52%|█████▏    | 52/100 [00:57<00:53,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4646:  53%|█████▎    | 53/100 [00:58<00:52,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4743:  54%|█████▍    | 54/100 [01:00<00:50,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4711:  55%|█████▌    | 55/100 [01:01<00:50,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4758:  56%|█████▌    | 56/100 [01:02<00:48,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4816:  57%|█████▋    | 57/100 [01:03<00:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4779:  58%|█████▊    | 58/100 [01:04<00:46,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4802:  59%|█████▉    | 59/100 [01:05<00:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4803:  60%|██████    | 60/100 [01:06<00:44,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4796:  61%|██████    | 61/100 [01:07<00:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4903:  62%|██████▏   | 62/100 [01:08<00:41,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4872:  63%|██████▎   | 63/100 [01:10<00:40,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4835:  64%|██████▍   | 64/100 [01:11<00:39,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4835:  65%|██████▌   | 65/100 [01:12<00:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4805:  66%|██████▌   | 66/100 [01:13<00:37,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4781:  67%|██████▋   | 67/100 [01:14<00:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4759:  68%|██████▊   | 68/100 [01:15<00:35,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4772:  69%|██████▉   | 69/100 [01:16<00:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4742:  70%|███████   | 70/100 [01:17<00:32,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4796:  71%|███████   | 71/100 [01:18<00:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4761:  72%|███████▏  | 72/100 [01:19<00:30,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4758:  73%|███████▎  | 73/100 [01:21<00:29,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.477:  74%|███████▍  | 74/100 [01:22<00:28,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4741:  75%|███████▌  | 75/100 [01:23<00:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4711:  76%|███████▌  | 76/100 [01:24<00:26,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4689:  77%|███████▋  | 77/100 [01:25<00:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4657:  78%|███████▊  | 78/100 [01:26<00:24,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4648:  79%|███████▉  | 79/100 [01:27<00:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4703:  80%|████████  | 80/100 [01:28<00:22,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4712:  81%|████████  | 81/100 [01:29<00:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4677:  82%|████████▏ | 82/100 [01:30<00:19,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4657:  83%|████████▎ | 83/100 [01:32<00:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4633:  84%|████████▍ | 84/100 [01:33<00:17,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4638:  85%|████████▌ | 85/100 [01:34<00:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4605:  86%|████████▌ | 86/100 [01:35<00:15,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4573:  87%|████████▋ | 87/100 [01:36<00:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4539:  88%|████████▊ | 88/100 [01:37<00:13,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4567:  89%|████████▉ | 89/100 [01:38<00:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4567:  90%|█████████ | 90/100 [01:39<00:11,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4535:  91%|█████████ | 91/100 [01:40<00:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4616:  92%|█████████▏| 92/100 [01:42<00:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4601:  93%|█████████▎| 93/100 [01:43<00:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4569:  94%|█████████▍| 94/100 [01:44<00:06,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4539:  95%|█████████▌| 95/100 [01:45<00:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4516:  96%|█████████▌| 96/100 [01:46<00:04,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4515:  97%|█████████▋| 97/100 [01:47<00:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4482:  98%|█████████▊| 98/100 [01:48<00:02,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4454:  99%|█████████▉| 99/100 [01:49<00:01,  1.11s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 6, train loss: 0.4429: 100%|██████████| 100/100 [01:50<00:00,  1.11s/it]
epoch: 6, valid loss: 0.9053:   4%|▍         | 2/51 [00:00<00:09,  5.30it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 0.9142:   8%|▊         | 4/51 [00:00<00:08,  5.37it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 0.9083:  12%|█▏        | 6/51 [00:01<00:08,  5.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 0.922:  16%|█▌        | 8/51 [00:01<00:07,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 0.9711:  20%|█▉        | 10/51 [00:01<00:07,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 0.9584:  24%|██▎       | 12/51 [00:02<00:07,  5.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 0.9828:  27%|██▋       | 14/51 [00:02<00:06,  5.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 0.9903:  31%|███▏      | 16/51 [00:02<00:06,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0012:  35%|███▌      | 18/51 [00:03<00:06,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 0.9858:  39%|███▉      | 20/51 [00:03<00:05,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 0.9828:  43%|████▎     | 22/51 [00:04<00:05,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.013:  47%|████▋     | 24/51 [00:04<00:04,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0264:  51%|█████     | 26/51 [00:04<00:04,  5.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0521:  55%|█████▍    | 28/51 [00:05<00:04,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0869:  59%|█████▉    | 30/51 [00:05<00:03,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0906:  63%|██████▎   | 32/51 [00:05<00:03,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.1006:  67%|██████▋   | 34/51 [00:06<00:03,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.1085:  71%|███████   | 36/51 [00:06<00:02,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.128:  75%|███████▍  | 38/51 [00:06<00:02,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.1111:  78%|███████▊  | 40/51 [00:07<00:02,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0946:  82%|████████▏ | 42/51 [00:07<00:01,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0762:  86%|████████▋ | 44/51 [00:08<00:01,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0909:  90%|█████████ | 46/51 [00:08<00:00,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0875:  94%|█████████▍| 48/51 [00:08<00:00,  5.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0802:  98%|█████████▊| 50/51 [00:09<00:00,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.0704: 100%|██████████| 51/51 [00:09<00:00,  5.48it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5236:   1%|          | 1/100 [00:01<01:50,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5199:   2%|▏         | 2/100 [00:02<01:48,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5218:   3%|▎         | 3/100 [00:03<01:48,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5217:   4%|▍         | 4/100 [00:04<01:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5271:   5%|▌         | 5/100 [00:05<01:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5263:   6%|▌         | 6/100 [00:06<01:43,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5271:   7%|▋         | 7/100 [00:07<01:42,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5233:   8%|▊         | 8/100 [00:08<01:41,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5202:   9%|▉         | 9/100 [00:09<01:40,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5175:  10%|█         | 10/100 [00:11<01:38,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5138:  11%|█         | 11/100 [00:12<01:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5135:  12%|█▏        | 12/100 [00:13<01:36,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5152:  13%|█▎        | 13/100 [00:14<01:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5111:  14%|█▍        | 14/100 [00:15<01:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5083:  15%|█▌        | 15/100 [00:16<01:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5071:  16%|█▌        | 16/100 [00:17<01:33,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5058:  17%|█▋        | 17/100 [00:18<01:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.507:  18%|█▊        | 18/100 [00:19<01:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5033:  19%|█▉        | 19/100 [00:21<01:29,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5001:  20%|██        | 20/100 [00:22<01:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.5008:  21%|██        | 21/100 [00:23<01:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4978:  22%|██▏       | 22/100 [00:24<01:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4948:  23%|██▎       | 23/100 [00:25<01:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.494:  24%|██▍       | 24/100 [00:26<01:24,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.495:  25%|██▌       | 25/100 [00:27<01:23,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4941:  26%|██▌       | 26/100 [00:28<01:22,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4957:  27%|██▋       | 27/100 [00:29<01:21,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4924:  28%|██▊       | 28/100 [00:31<01:19,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4896:  29%|██▉       | 29/100 [00:32<01:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4862:  30%|███       | 30/100 [00:33<01:17,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.485:  31%|███       | 31/100 [00:34<01:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4817:  32%|███▏      | 32/100 [00:35<01:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4796:  33%|███▎      | 33/100 [00:36<01:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4771:  34%|███▍      | 34/100 [00:37<01:13,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4733:  35%|███▌      | 35/100 [00:38<01:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4695:  36%|███▌      | 36/100 [00:39<01:10,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4659:  37%|███▋      | 37/100 [00:41<01:10,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4622:  38%|███▊      | 38/100 [00:42<01:08,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4585:  39%|███▉      | 39/100 [00:43<01:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4548:  40%|████      | 40/100 [00:44<01:06,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4521:  41%|████      | 41/100 [00:45<01:05,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4572:  42%|████▏     | 42/100 [00:46<01:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4539:  43%|████▎     | 43/100 [00:47<01:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4525:  44%|████▍     | 44/100 [00:48<01:02,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4487:  45%|████▌     | 45/100 [00:49<01:01,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4468:  46%|████▌     | 46/100 [00:51<00:59,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4453:  47%|████▋     | 47/100 [00:52<00:59,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.449:  48%|████▊     | 48/100 [00:53<00:57,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4455:  49%|████▉     | 49/100 [00:54<00:56,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4421:  50%|█████     | 50/100 [00:55<00:55,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4473:  51%|█████     | 51/100 [00:56<00:54,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4451:  52%|█████▏    | 52/100 [00:57<00:53,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4422:  53%|█████▎    | 53/100 [00:58<00:52,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4395:  54%|█████▍    | 54/100 [00:59<00:50,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4367:  55%|█████▌    | 55/100 [01:01<00:49,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4335:  56%|█████▌    | 56/100 [01:02<00:48,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4311:  57%|█████▋    | 57/100 [01:03<00:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4277:  58%|█████▊    | 58/100 [01:04<00:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4246:  59%|█████▉    | 59/100 [01:05<00:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4285:  60%|██████    | 60/100 [01:06<00:44,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4266:  61%|██████    | 61/100 [01:07<00:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4242:  62%|██████▏   | 62/100 [01:08<00:42,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4308:  63%|██████▎   | 63/100 [01:09<00:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4278:  64%|██████▍   | 64/100 [01:10<00:39,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4283:  65%|██████▌   | 65/100 [01:12<00:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4257:  66%|██████▌   | 66/100 [01:13<00:37,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4224:  67%|██████▋   | 67/100 [01:14<00:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4214:  68%|██████▊   | 68/100 [01:15<00:35,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4213:  69%|██████▉   | 69/100 [01:16<00:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4187:  70%|███████   | 70/100 [01:17<00:33,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4177:  71%|███████   | 71/100 [01:18<00:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4205:  72%|███████▏  | 72/100 [01:19<00:31,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4188:  73%|███████▎  | 73/100 [01:20<00:30,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4173:  74%|███████▍  | 74/100 [01:22<00:28,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4189:  75%|███████▌  | 75/100 [01:23<00:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4181:  76%|███████▌  | 76/100 [01:24<00:26,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4276:  77%|███████▋  | 77/100 [01:25<00:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4257:  78%|███████▊  | 78/100 [01:26<00:24,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4288:  79%|███████▉  | 79/100 [01:27<00:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4311:  80%|████████  | 80/100 [01:28<00:22,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4278:  81%|████████  | 81/100 [01:29<00:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4253:  82%|████████▏ | 82/100 [01:30<00:20,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.422:  83%|████████▎ | 83/100 [01:32<00:19,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.421:  84%|████████▍ | 84/100 [01:33<00:17,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4184:  85%|████████▌ | 85/100 [01:34<00:16,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4155:  86%|████████▌ | 86/100 [01:35<00:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4196:  87%|████████▋ | 87/100 [01:36<00:14,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4168:  88%|████████▊ | 88/100 [01:37<00:13,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4238:  89%|████████▉ | 89/100 [01:38<00:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4285:  90%|█████████ | 90/100 [01:39<00:11,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4319:  91%|█████████ | 91/100 [01:40<00:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4286:  92%|█████████▏| 92/100 [01:42<00:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4301:  93%|█████████▎| 93/100 [01:43<00:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4302:  94%|█████████▍| 94/100 [01:44<00:06,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4283:  95%|█████████▌| 95/100 [01:45<00:05,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4309:  96%|█████████▌| 96/100 [01:46<00:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4288:  97%|█████████▋| 97/100 [01:47<00:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4257:  98%|█████████▊| 98/100 [01:48<00:02,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.4271:  99%|█████████▉| 99/100 [01:49<00:01,  1.11s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 7, train loss: 0.4241: 100%|██████████| 100/100 [01:50<00:00,  1.11s/it]
epoch: 7, valid loss: 0.9896:   4%|▍         | 2/51 [00:00<00:09,  5.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0097:   8%|▊         | 4/51 [00:00<00:08,  5.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0081:  12%|█▏        | 6/51 [00:01<00:08,  5.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0192:  16%|█▌        | 8/51 [00:01<00:07,  5.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0384:  20%|█▉        | 10/51 [00:01<00:07,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0226:  24%|██▎       | 12/51 [00:02<00:07,  5.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0316:  27%|██▋       | 14/51 [00:02<00:06,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0528:  31%|███▏      | 16/51 [00:02<00:06,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0668:  35%|███▌      | 18/51 [00:03<00:06,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0495:  39%|███▉      | 20/51 [00:03<00:05,  5.38it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0355:  43%|████▎     | 22/51 [00:04<00:05,  5.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0829:  47%|████▋     | 24/51 [00:04<00:04,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.0933:  51%|█████     | 26/51 [00:04<00:04,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.1271:  55%|█████▍    | 28/51 [00:05<00:04,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.1831:  59%|█████▉    | 30/51 [00:05<00:03,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.18:  63%|██████▎   | 32/51 [00:05<00:03,  5.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.1855:  67%|██████▋   | 34/51 [00:06<00:03,  5.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.1919:  71%|███████   | 36/51 [00:06<00:02,  5.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.2194:  75%|███████▍  | 38/51 [00:07<00:02,  5.34it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.2099:  78%|███████▊  | 40/51 [00:07<00:02,  5.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.1887:  82%|████████▏ | 42/51 [00:07<00:01,  5.33it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.1681:  86%|████████▋ | 44/51 [00:08<00:01,  5.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.1799:  90%|█████████ | 46/51 [00:08<00:00,  5.36it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.1769:  94%|█████████▍| 48/51 [00:08<00:00,  5.35it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.1895:  98%|█████████▊| 50/51 [00:09<00:00,  5.40it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.1786: 100%|██████████| 51/51 [00:09<00:00,  5.41it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3385:   1%|          | 1/100 [00:01<01:51,  1.13s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3419:   2%|▏         | 2/100 [00:02<01:48,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3393:   3%|▎         | 3/100 [00:03<01:48,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3377:   4%|▍         | 4/100 [00:04<01:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3372:   5%|▌         | 5/100 [00:05<01:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.341:   6%|▌         | 6/100 [00:06<01:43,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3442:   7%|▋         | 7/100 [00:07<01:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3417:   8%|▊         | 8/100 [00:08<01:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3426:   9%|▉         | 9/100 [00:10<01:41,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.341:  10%|█         | 10/100 [00:11<01:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3416:  11%|█         | 11/100 [00:12<01:39,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3397:  12%|█▏        | 12/100 [00:13<01:37,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3385:  13%|█▎        | 13/100 [00:14<01:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.336:  14%|█▍        | 14/100 [00:15<01:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3338:  15%|█▌        | 15/100 [00:16<01:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3318:  16%|█▌        | 16/100 [00:17<01:32,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3312:  17%|█▋        | 17/100 [00:18<01:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.33:  18%|█▊        | 18/100 [00:19<01:30,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3279:  19%|█▉        | 19/100 [00:21<01:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3327:  20%|██        | 20/100 [00:22<01:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3309:  21%|██        | 21/100 [00:23<01:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3367:  22%|██▏       | 22/100 [00:24<01:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3344:  23%|██▎       | 23/100 [00:25<01:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3324:  24%|██▍       | 24/100 [00:26<01:23,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3302:  25%|██▌       | 25/100 [00:27<01:22,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3307:  26%|██▌       | 26/100 [00:28<01:21,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3305:  27%|██▋       | 27/100 [00:29<01:20,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.329:  28%|██▊       | 28/100 [00:31<01:19,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3267:  29%|██▉       | 29/100 [00:32<01:18,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3288:  30%|███       | 30/100 [00:33<01:17,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3269:  31%|███       | 31/100 [00:34<01:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3247:  32%|███▏      | 32/100 [00:35<01:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3225:  33%|███▎      | 33/100 [00:36<01:14,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.321:  34%|███▍      | 34/100 [00:37<01:13,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3236:  35%|███▌      | 35/100 [00:38<01:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3237:  36%|███▌      | 36/100 [00:39<01:10,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3238:  37%|███▋      | 37/100 [00:41<01:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3219:  38%|███▊      | 38/100 [00:42<01:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3209:  39%|███▉      | 39/100 [00:43<01:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3222:  40%|████      | 40/100 [00:44<01:06,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3199:  41%|████      | 41/100 [00:45<01:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.318:  42%|████▏     | 42/100 [00:46<01:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3156:  43%|████▎     | 43/100 [00:47<01:03,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3133:  44%|████▍     | 44/100 [00:48<01:02,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3132:  45%|████▌     | 45/100 [00:49<01:01,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3127:  46%|████▌     | 46/100 [00:50<00:59,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3139:  47%|████▋     | 47/100 [00:52<00:58,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3191:  48%|████▊     | 48/100 [00:53<00:57,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3171:  49%|████▉     | 49/100 [00:54<00:56,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3239:  50%|█████     | 50/100 [00:55<00:55,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3221:  51%|█████     | 51/100 [00:56<00:54,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3201:  52%|█████▏    | 52/100 [00:57<00:53,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3179:  53%|█████▎    | 53/100 [00:58<00:52,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.321:  54%|█████▍    | 54/100 [00:59<00:50,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3196:  55%|█████▌    | 55/100 [01:00<00:50,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3177:  56%|█████▌    | 56/100 [01:02<00:48,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3153:  57%|█████▋    | 57/100 [01:03<00:48,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3132:  58%|█████▊    | 58/100 [01:04<00:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.313:  59%|█████▉    | 59/100 [01:05<00:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3113:  60%|██████    | 60/100 [01:06<00:44,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3176:  61%|██████    | 61/100 [01:07<00:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3151:  62%|██████▏   | 62/100 [01:08<00:41,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3131:  63%|██████▎   | 63/100 [01:09<00:40,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3113:  64%|██████▍   | 64/100 [01:10<00:39,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3141:  65%|██████▌   | 65/100 [01:12<00:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3118:  66%|██████▌   | 66/100 [01:13<00:37,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3145:  67%|██████▋   | 67/100 [01:14<00:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3167:  68%|██████▊   | 68/100 [01:15<00:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3199:  69%|██████▉   | 69/100 [01:16<00:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3233:  70%|███████   | 70/100 [01:17<00:33,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3211:  71%|███████   | 71/100 [01:18<00:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3189:  72%|███████▏  | 72/100 [01:19<00:30,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3177:  73%|███████▎  | 73/100 [01:20<00:29,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3184:  74%|███████▍  | 74/100 [01:21<00:28,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3162:  75%|███████▌  | 75/100 [01:23<00:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.314:  76%|███████▌  | 76/100 [01:24<00:26,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3118:  77%|███████▋  | 77/100 [01:25<00:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3094:  78%|███████▊  | 78/100 [01:26<00:24,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.307:  79%|███████▉  | 79/100 [01:27<00:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3054:  80%|████████  | 80/100 [01:28<00:22,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3034:  81%|████████  | 81/100 [01:29<00:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.306:  82%|████████▏ | 82/100 [01:30<00:19,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3046:  83%|████████▎ | 83/100 [01:31<00:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3024:  84%|████████▍ | 84/100 [01:33<00:17,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3018:  85%|████████▌ | 85/100 [01:34<00:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3023:  86%|████████▌ | 86/100 [01:35<00:15,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.3001:  87%|████████▋ | 87/100 [01:36<00:14,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2991:  88%|████████▊ | 88/100 [01:37<00:13,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2972:  89%|████████▉ | 89/100 [01:38<00:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2956:  90%|█████████ | 90/100 [01:39<00:11,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2935:  91%|█████████ | 91/100 [01:40<00:10,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2914:  92%|█████████▏| 92/100 [01:41<00:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2891:  93%|█████████▎| 93/100 [01:42<00:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2872:  94%|█████████▍| 94/100 [01:44<00:06,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2851:  95%|█████████▌| 95/100 [01:45<00:05,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2829:  96%|█████████▌| 96/100 [01:46<00:04,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2818:  97%|█████████▋| 97/100 [01:47<00:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2859:  98%|█████████▊| 98/100 [01:48<00:02,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.2841:  99%|█████████▉| 99/100 [01:49<00:01,  1.11s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 8, train loss: 0.2905: 100%|██████████| 100/100 [01:50<00:00,  1.11s/it]
epoch: 8, valid loss: 1.7621:   4%|▍         | 2/51 [00:00<00:09,  5.32it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.758:   8%|▊         | 4/51 [00:00<00:08,  5.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7419:  12%|█▏        | 6/51 [00:01<00:08,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7623:  16%|█▌        | 8/51 [00:01<00:07,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7633:  20%|█▉        | 10/51 [00:01<00:07,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7333:  24%|██▎       | 12/51 [00:02<00:07,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7349:  27%|██▋       | 14/51 [00:02<00:06,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7405:  31%|███▏      | 16/51 [00:02<00:06,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7379:  35%|███▌      | 18/51 [00:03<00:06,  5.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7069:  39%|███▉      | 20/51 [00:03<00:05,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.6836:  43%|████▎     | 22/51 [00:04<00:05,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7064:  47%|████▋     | 24/51 [00:04<00:04,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.721:  51%|█████     | 26/51 [00:04<00:04,  5.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7455:  55%|█████▍    | 28/51 [00:05<00:04,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7852:  59%|█████▉    | 30/51 [00:05<00:03,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7776:  63%|██████▎   | 32/51 [00:05<00:03,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7706:  67%|██████▋   | 34/51 [00:06<00:03,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7685:  71%|███████   | 36/51 [00:06<00:02,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7957:  75%|███████▍  | 38/51 [00:06<00:02,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7649:  78%|███████▊  | 40/51 [00:07<00:02,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7319:  82%|████████▏ | 42/51 [00:07<00:01,  5.45it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7005:  86%|████████▋ | 44/51 [00:08<00:01,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.7076:  90%|█████████ | 46/51 [00:08<00:00,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.6993:  94%|█████████▍| 48/51 [00:08<00:00,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.6975:  98%|█████████▊| 50/51 [00:09<00:00,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 1.6811: 100%|██████████| 51/51 [00:09<00:00,  5.48it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0764:   1%|          | 1/100 [00:01<01:50,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0765:   2%|▏         | 2/100 [00:02<01:47,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0765:   3%|▎         | 3/100 [00:03<01:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0767:   4%|▍         | 4/100 [00:04<01:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0784:   5%|▌         | 5/100 [00:05<01:46,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0788:   6%|▌         | 6/100 [00:06<01:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0788:   7%|▋         | 7/100 [00:07<01:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0842:   8%|▊         | 8/100 [00:08<01:41,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0858:   9%|▉         | 9/100 [00:09<01:40,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0856:  10%|█         | 10/100 [00:11<01:39,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0893:  11%|█         | 11/100 [00:12<01:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0891:  12%|█▏        | 12/100 [00:13<01:37,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0893:  13%|█▎        | 13/100 [00:14<01:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0912:  14%|█▍        | 14/100 [00:15<01:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.091:  15%|█▌        | 15/100 [00:16<01:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0928:  16%|█▌        | 16/100 [00:17<01:32,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.093:  17%|█▋        | 17/100 [00:18<01:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.093:  18%|█▊        | 18/100 [00:19<01:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.0995:  19%|█▉        | 19/100 [00:21<01:30,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1034:  20%|██        | 20/100 [00:22<01:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1033:  21%|██        | 21/100 [00:23<01:28,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1031:  22%|██▏       | 22/100 [00:24<01:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1074:  23%|██▎       | 23/100 [00:25<01:26,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1088:  24%|██▍       | 24/100 [00:26<01:24,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1109:  25%|██▌       | 25/100 [00:27<01:23,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1154:  26%|██▌       | 26/100 [00:28<01:22,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.115:  27%|██▋       | 27/100 [00:29<01:21,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1146:  28%|██▊       | 28/100 [00:31<01:20,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1185:  29%|██▉       | 29/100 [00:32<01:19,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1188:  30%|███       | 30/100 [00:33<01:17,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1201:  31%|███       | 31/100 [00:34<01:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1198:  32%|███▏      | 32/100 [00:35<01:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1198:  33%|███▎      | 33/100 [00:36<01:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1198:  34%|███▍      | 34/100 [00:37<01:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1239:  35%|███▌      | 35/100 [00:38<01:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1237:  36%|███▌      | 36/100 [00:39<01:10,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1243:  37%|███▋      | 37/100 [00:41<01:10,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1273:  38%|███▊      | 38/100 [00:42<01:08,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1317:  39%|███▉      | 39/100 [00:43<01:08,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1312:  40%|████      | 40/100 [00:44<01:06,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1311:  41%|████      | 41/100 [00:45<01:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1328:  42%|████▏     | 42/100 [00:46<01:04,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1342:  43%|████▎     | 43/100 [00:47<01:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1338:  44%|████▍     | 44/100 [00:48<01:01,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1347:  45%|████▌     | 45/100 [00:49<01:01,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1342:  46%|████▌     | 46/100 [00:51<00:59,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1344:  47%|████▋     | 47/100 [00:52<00:58,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1339:  48%|████▊     | 48/100 [00:53<00:57,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1377:  49%|████▉     | 49/100 [00:54<00:57,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1378:  50%|█████     | 50/100 [00:55<00:55,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1379:  51%|█████     | 51/100 [00:56<00:54,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1376:  52%|█████▏    | 52/100 [00:57<00:53,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.137:  53%|█████▎    | 53/100 [00:58<00:52,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1373:  54%|█████▍    | 54/100 [00:59<00:51,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1376:  55%|█████▌    | 55/100 [01:01<00:50,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1376:  56%|█████▌    | 56/100 [01:02<00:48,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.137:  57%|█████▋    | 57/100 [01:03<00:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1365:  58%|█████▊    | 58/100 [01:04<00:46,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1358:  59%|█████▉    | 59/100 [01:05<00:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1356:  60%|██████    | 60/100 [01:06<00:44,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1475:  61%|██████    | 61/100 [01:07<00:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1528:  62%|██████▏   | 62/100 [01:08<00:42,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1521:  63%|██████▎   | 63/100 [01:09<00:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1516:  64%|██████▍   | 64/100 [01:11<00:39,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1518:  65%|██████▌   | 65/100 [01:12<00:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1605:  66%|██████▌   | 66/100 [01:13<00:37,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1599:  67%|██████▋   | 67/100 [01:14<00:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1608:  68%|██████▊   | 68/100 [01:15<00:35,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1628:  69%|██████▉   | 69/100 [01:16<00:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1619:  70%|███████   | 70/100 [01:17<00:33,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1612:  71%|███████   | 71/100 [01:18<00:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1605:  72%|███████▏  | 72/100 [01:19<00:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1612:  73%|███████▎  | 73/100 [01:21<00:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1658:  74%|███████▍  | 74/100 [01:22<00:28,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1652:  75%|███████▌  | 75/100 [01:23<00:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1683:  76%|███████▌  | 76/100 [01:24<00:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1711:  77%|███████▋  | 77/100 [01:25<00:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1701:  78%|███████▊  | 78/100 [01:26<00:24,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1697:  79%|███████▉  | 79/100 [01:27<00:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1705:  80%|████████  | 80/100 [01:28<00:22,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1696:  81%|████████  | 81/100 [01:29<00:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1687:  82%|████████▏ | 82/100 [01:30<00:19,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1677:  83%|████████▎ | 83/100 [01:32<00:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1669:  84%|████████▍ | 84/100 [01:33<00:17,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1663:  85%|████████▌ | 85/100 [01:34<00:16,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1657:  86%|████████▌ | 86/100 [01:35<00:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1651:  87%|████████▋ | 87/100 [01:36<00:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1641:  88%|████████▊ | 88/100 [01:37<00:13,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1715:  89%|████████▉ | 89/100 [01:38<00:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1708:  90%|█████████ | 90/100 [01:39<00:10,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1704:  91%|█████████ | 91/100 [01:40<00:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1747:  92%|█████████▏| 92/100 [01:42<00:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1736:  93%|█████████▎| 93/100 [01:43<00:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1728:  94%|█████████▍| 94/100 [01:44<00:06,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1721:  95%|█████████▌| 95/100 [01:45<00:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1712:  96%|█████████▌| 96/100 [01:46<00:04,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1743:  97%|█████████▋| 97/100 [01:47<00:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1777:  98%|█████████▊| 98/100 [01:48<00:02,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1783:  99%|█████████▉| 99/100 [01:49<00:01,  1.11s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 9, train loss: 0.1851: 100%|██████████| 100/100 [01:50<00:00,  1.11s/it]
epoch: 9, valid loss: 0.8833:   4%|▍         | 2/51 [00:00<00:08,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.9092:   8%|▊         | 4/51 [00:00<00:08,  5.44it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.8955:  12%|█▏        | 6/51 [00:01<00:08,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.8916:  16%|█▌        | 8/51 [00:01<00:07,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.9233:  20%|█▉        | 10/51 [00:01<00:07,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.9101:  24%|██▎       | 12/51 [00:02<00:07,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.9293:  27%|██▋       | 14/51 [00:02<00:06,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.947:  31%|███▏      | 16/51 [00:02<00:06,  5.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.9692:  35%|███▌      | 18/51 [00:03<00:05,  5.56it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.9542:  39%|███▉      | 20/51 [00:03<00:05,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.9498:  43%|████▎     | 22/51 [00:03<00:05,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 0.9999:  47%|████▋     | 24/51 [00:04<00:04,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.0056:  51%|█████     | 26/51 [00:04<00:04,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.0347:  55%|█████▍    | 28/51 [00:05<00:04,  5.51it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.0962:  59%|█████▉    | 30/51 [00:05<00:03,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.0962:  63%|██████▎   | 32/51 [00:05<00:03,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.1031:  67%|██████▋   | 34/51 [00:06<00:03,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.1127:  71%|███████   | 36/51 [00:06<00:02,  5.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.1397:  75%|███████▍  | 38/51 [00:06<00:02,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.1195:  78%|███████▊  | 40/51 [00:07<00:02,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.1029:  82%|████████▏ | 42/51 [00:07<00:01,  5.41it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.0834:  86%|████████▋ | 44/51 [00:08<00:01,  5.46it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.0961:  90%|█████████ | 46/51 [00:08<00:00,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.0923:  94%|█████████▍| 48/51 [00:08<00:00,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.1128:  98%|█████████▊| 50/51 [00:09<00:00,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 1.1022: 100%|██████████| 51/51 [00:09<00:00,  5.49it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 0, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0647:   1%|          | 1/100 [00:01<01:50,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0652:   2%|▏         | 2/100 [00:02<01:47,  1.09s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0653:   3%|▎         | 3/100 [00:03<01:47,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0653:   4%|▍         | 4/100 [00:04<01:45,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0654:   5%|▌         | 5/100 [00:05<01:45,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0654:   6%|▌         | 6/100 [00:06<01:43,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0655:   7%|▋         | 7/100 [00:07<01:42,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.071:   8%|▊         | 8/100 [00:08<01:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.071:   9%|▉         | 9/100 [00:09<01:41,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.071:  10%|█         | 10/100 [00:11<01:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0718:  11%|█         | 11/100 [00:12<01:39,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0719:  12%|█▏        | 12/100 [00:13<01:37,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0722:  13%|█▎        | 13/100 [00:14<01:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0722:  14%|█▍        | 14/100 [00:15<01:35,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0725:  15%|█▌        | 15/100 [00:16<01:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0731:  16%|█▌        | 16/100 [00:17<01:32,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0746:  17%|█▋        | 17/100 [00:18<01:31,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0763:  18%|█▊        | 18/100 [00:19<01:30,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0777:  19%|█▉        | 19/100 [00:21<01:29,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0775:  20%|██        | 20/100 [00:22<01:28,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0801:  21%|██        | 21/100 [00:23<01:28,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0804:  22%|██▏       | 22/100 [00:24<01:26,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0808:  23%|██▎       | 23/100 [00:25<01:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0811:  24%|██▍       | 24/100 [00:26<01:23,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0845:  25%|██▌       | 25/100 [00:27<01:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0852:  26%|██▌       | 26/100 [00:28<01:21,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0852:  27%|██▋       | 27/100 [00:29<01:20,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0867:  28%|██▊       | 28/100 [00:30<01:19,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.087:  29%|██▉       | 29/100 [00:32<01:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0875:  30%|███       | 30/100 [00:33<01:17,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0882:  31%|███       | 31/100 [00:34<01:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0882:  32%|███▏      | 32/100 [00:35<01:15,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.088:  33%|███▎      | 33/100 [00:36<01:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0883:  34%|███▍      | 34/100 [00:37<01:12,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.088:  35%|███▌      | 35/100 [00:38<01:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.089:  36%|███▌      | 36/100 [00:39<01:10,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0889:  37%|███▋      | 37/100 [00:40<01:10,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0889:  38%|███▊      | 38/100 [00:42<01:08,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0888:  39%|███▉      | 39/100 [00:43<01:08,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0889:  40%|████      | 40/100 [00:44<01:06,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0892:  41%|████      | 41/100 [00:45<01:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0891:  42%|████▏     | 42/100 [00:46<01:04,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0897:  43%|████▎     | 43/100 [00:47<01:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0894:  44%|████▍     | 44/100 [00:48<01:02,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0961:  45%|████▌     | 45/100 [00:49<01:01,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0958:  46%|████▌     | 46/100 [00:50<01:00,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0991:  47%|████▋     | 47/100 [00:52<00:59,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1008:  48%|████▊     | 48/100 [00:53<00:57,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1008:  49%|████▉     | 49/100 [00:54<00:56,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1005:  50%|█████     | 50/100 [00:55<00:55,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1007:  51%|█████     | 51/100 [00:56<00:54,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1003:  52%|█████▏    | 52/100 [00:57<00:53,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1006:  53%|█████▎    | 53/100 [00:58<00:52,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1003:  54%|█████▍    | 54/100 [00:59<00:51,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1009:  55%|█████▌    | 55/100 [01:01<00:50,  1.13s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1008:  56%|█████▌    | 56/100 [01:02<00:49,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1027:  57%|█████▋    | 57/100 [01:03<00:48,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1066:  58%|█████▊    | 58/100 [01:04<00:46,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1068:  59%|█████▉    | 59/100 [01:05<00:45,  1.12s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1064:  60%|██████    | 60/100 [01:06<00:44,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1063:  61%|██████    | 61/100 [01:07<00:43,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1062:  62%|██████▏   | 62/100 [01:08<00:42,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1058:  63%|██████▎   | 63/100 [01:09<00:41,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1054:  64%|██████▍   | 64/100 [01:10<00:39,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1051:  65%|██████▌   | 65/100 [01:12<00:38,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1049:  66%|██████▌   | 66/100 [01:13<00:37,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1054:  67%|██████▋   | 67/100 [01:14<00:36,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1059:  68%|██████▊   | 68/100 [01:15<00:35,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1054:  69%|██████▉   | 69/100 [01:16<00:34,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1061:  70%|███████   | 70/100 [01:17<00:33,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1058:  71%|███████   | 71/100 [01:18<00:32,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1053:  72%|███████▏  | 72/100 [01:19<00:30,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1051:  73%|███████▎  | 73/100 [01:20<00:29,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1052:  74%|███████▍  | 74/100 [01:22<00:28,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1048:  75%|███████▌  | 75/100 [01:23<00:27,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1049:  76%|███████▌  | 76/100 [01:24<00:26,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1094:  77%|███████▋  | 77/100 [01:25<00:25,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1094:  78%|███████▊  | 78/100 [01:26<00:24,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1097:  79%|███████▉  | 79/100 [01:27<00:23,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1092:  80%|████████  | 80/100 [01:28<00:22,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1089:  81%|████████  | 81/100 [01:29<00:21,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1086:  82%|████████▏ | 82/100 [01:30<00:19,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1088:  83%|████████▎ | 83/100 [01:32<00:18,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1095:  84%|████████▍ | 84/100 [01:33<00:17,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1091:  85%|████████▌ | 85/100 [01:34<00:16,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1093:  86%|████████▌ | 86/100 [01:35<00:15,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1088:  87%|████████▋ | 87/100 [01:36<00:14,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1088:  88%|████████▊ | 88/100 [01:37<00:13,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1086:  89%|████████▉ | 89/100 [01:38<00:12,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1085:  90%|█████████ | 90/100 [01:39<00:11,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1117:  91%|█████████ | 91/100 [01:40<00:09,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.119:  92%|█████████▏| 92/100 [01:42<00:08,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1212:  93%|█████████▎| 93/100 [01:43<00:07,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1211:  94%|█████████▍| 94/100 [01:44<00:06,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1213:  95%|█████████▌| 95/100 [01:45<00:05,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1215:  96%|█████████▌| 96/100 [01:46<00:04,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1225:  97%|█████████▋| 97/100 [01:47<00:03,  1.11s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1236:  98%|█████████▊| 98/100 [01:48<00:02,  1.10s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1234:  99%|█████████▉| 99/100 [01:49<00:01,  1.11s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 10, train loss: 0.1228: 100%|██████████| 100/100 [01:50<00:00,  1.11s/it]
epoch: 10, valid loss: 1.9726:   4%|▍         | 2/51 [00:00<00:09,  5.39it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9908:   8%|▊         | 4/51 [00:00<00:08,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9601:  12%|█▏        | 6/51 [00:01<00:08,  5.43it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9481:  16%|█▌        | 8/51 [00:01<00:07,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9701:  20%|█▉        | 10/51 [00:01<00:07,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9319:  24%|██▎       | 12/51 [00:02<00:07,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9364:  27%|██▋       | 14/51 [00:02<00:06,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9359:  31%|███▏      | 16/51 [00:02<00:06,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9531:  35%|███▌      | 18/51 [00:03<00:06,  5.48it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.926:  39%|███▉      | 20/51 [00:03<00:05,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.8979:  43%|████▎     | 22/51 [00:04<00:05,  5.57it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9332:  47%|████▋     | 24/51 [00:04<00:04,  5.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9278:  51%|█████     | 26/51 [00:04<00:04,  5.54it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9471:  55%|█████▍    | 28/51 [00:05<00:04,  5.55it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9814:  59%|█████▉    | 30/51 [00:05<00:03,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.954:  63%|██████▎   | 32/51 [00:05<00:03,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9409:  67%|██████▋   | 34/51 [00:06<00:03,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.945:  71%|███████   | 36/51 [00:06<00:02,  5.47it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.9521:  75%|███████▍  | 38/51 [00:06<00:02,  5.42it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.923:  78%|███████▊  | 40/51 [00:07<00:01,  5.50it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.8969:  82%|████████▏ | 42/51 [00:07<00:01,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.8624:  86%|████████▋ | 44/51 [00:08<00:01,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.8664:  90%|█████████ | 46/51 [00:08<00:00,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.8544:  94%|█████████▍| 48/51 [00:08<00:00,  5.52it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.838:  98%|█████████▊| 50/51 [00:09<00:00,  5.53it/s]

torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 1.8201: 100%|██████████| 51/51 [00:09<00:00,  5.49it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti




In [19]:
!head -n 3 /proc/meminfo

MemTotal:       87535908 kB
MemFree:        72828044 kB
MemAvailable:   81794680 kB


In [20]:
!nvidia-smi

Sat Sep  2 13:49:06 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P0    71W / 350W |  20707MiB / 40960MiB |     58%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces