In [65]:
!pip install transformers
!pip install datasets #데이터 세트 다운로드 Hugging Face 연동



In [66]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


In [67]:
import copy # 특정한 파이썬 객체를 통째로 메모리에 copy할 때
import json # json 형식으로 데이터를 표현할 때
import logging # 학습 과정 등 전반적인 프로그램의 진행 상황을 로깅할 때
import os # 파일 입출력 등 현재 컴퓨터에 대한 기능 수행할 때

# 경고(warning) 메시지가 너무 많이 나오는 것을 대비하여 무시 처리
import warnings
warnings.filterwarnings("ignore")

# 로깅할 때 기본적으로 오류(error) 사항으로 로그 메시지를 남기겠다는 의미
import logging
logging.basicConfig(level=logging.ERROR)

# 벡터, 행렬 등의 처리를 위한 NumPy, 테이블(엑셀) 형식의 데이터 처리할 때 Pandas
import numpy as np
import pandas as pd

from datasets import load_dataset
# train_test_split: 별도로 구분된 validation 세트가 없을 때
# 학습 데이터 세트에서 일부를 train과 validation으로 나눌 때 자주 사용 (8:2 정도로 나눔)
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torch

### 학습한 모델 관련 라이브러리 불러오기

In [68]:
import transformers
# Auto Model For Sequence Classification: 텍스트 분류를 위한 모델 → Cross-Entropy loss 사용
from transformers import AutoConfig, AutoModel, AutoModelForSequenceClassification, AutoTokenizer
# linear_schedule_with_warmup: 단계적으로 learning rate 줄여나가는 방법
# AdamW: SGD와 같이 optimization 방법 중 하나
from transformers import AdamW, get_linear_schedule_with_warmup

### 우리가 쓸 모델

- KoBigBird를 사용하고, 다음과 같은 형태로 사용 가능
- KoBigBird: BigBird 특유의 sparse attention 사용 (default)

In [69]:
from transformers import AutoModel, AutoTokenizer

# by default its in `block_sparse` mode with num_random_blocks=3, block_size=64
# 이름에서부터 알 수 있듯이 KoBigBird는 BERT 기반의 모델
model = AutoModel.from_pretrained("monologg/kobigbird-bert-base")

# Tokenizer도 마찬가지로 BERT 기반에서 가져온 것을 확인
tokenizer = AutoTokenizer.from_pretrained("monologg/kobigbird-bert-base")
text = "한국어 BigBird 모델을 공개합니다!"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

Attention type 'block_sparse' is not possible if sequence_length: 12 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...


### 실험을 위한 하이퍼 파라미터 설정

In [70]:
# SimpleNamespace는 온점(.)으로 속성의 값을 정의할 수 있도록 해주는 라이브러리
# config.task = "cls"라고 하면, 나중에 print(config.task)했을 때 "cls"라고 출력
from types import SimpleNamespace

config = SimpleNamespace()

config.task = "cls"
config.dataset = "comment"

config.cache_dir = "cache" # 현재 데이터 세트에 대한 단어집 등 현재 task를 위한 임시적인 폴더
config.output_dir = "output" # 최종적인 모델이 저장되고, 결과가 저장되는 폴더

config.use_tpu = False
config.model_name_or_path = "monologg/kobigbird-bert-base" # Model name or path (HuggingFace에서 불러와 사용할 모델 이름)
config.data_dir = "./" # The input data dir ("10000_labeled.csv"가 있는 경로)

# 실질적으로 학습을 위해서는 tokenizing이 수행된 train file과 predict file을 만들어야 한다.
config.train_file = "joongang.csv" # 미리 준비된 학습 데이터 세트 경로
# 어차피 10000_labeled.csv를 (1) training 목적, (2) validatoin 목적으로 쪼개니까 평가는 validation으로 결과가 나올 것임
config.predict_file = "joongang.csv" # 미리 준비된 평가 데이터 세트 경로
# → 이거 일단 없으면, 지금처럼 train_file로 넣으시되, 나중에 생기시면 갈아끼우기

config.max_seq_length = 1024 # The maximum total input sequence length after tokenization. (최대 토큰 길이)
config.train_batch_size = 4 # Batch size for training. (학습할 때 batch_size)
config.eval_batch_size = 2 # Batch size for evaluation. (평가할 때 batch_size)

config.learning_rate = 3e-5 # The initial learning rate for Adam. (Adam optimizer에서 쓸 learning rate)
config.num_train_epochs = 10 # Total number of training epochs to perform. (전체 학습 epoch 수)

config.num_labels = 5 # 현재 task에서 선호도(1: 극진보, 2: 진보, 3: 중립, 4: 보수, 5: 극보수)의 개수는 5개이므로
# 5 classes multi-class classification 문제로 이해 할 수 있음
config.gradient_accumulation_steps = 2 # Number of updates steps to accumulate before performing a backward/update pass.
# batch_size가 큰 것처럼 처리하기 위해서, backward()를 매 번 수행하지 않고, gradient를 누적(acculmulation)하는 것

config.threads = 4
config.seed = 42 # random seed for initialization

config.do_train = True # Whether to run training.
config.do_eval_during_train = True
config.do_eval = True # Whether to run prediction.

config.do_lower_case = False
config.weight_decay = 0.0 # Weight decay if we apply some.
config.adam_epsilon = 1e-8 # Epsilon for Adam optimizer.
config.max_grad_norm = 1.0 # Max gradient norm.
config.warmup_proportion = 0.0 # Warmup proportion for linear warmup
# BigBird에서는 full attention을 하면, 메모리는 조금 더 소모되지만, 더 정확도가 향상
# config.attention_type = "original_full"

### 학습 데이터 전처리
- 학습 text를 매번 tokenization을 하지 않고, 모델 학습 시작전에 미리 모든 텍스트를 tokenization 한 결과를 저장한다.

In [71]:
# 본 실습에서 사용할 tokenizer 객체 초기화
tokenizer = AutoTokenizer.from_pretrained(config.model_name_or_path, cache_dir=config.cache_dir)

In [72]:
def train_split(config, texts, labels, is_train):
    # 바로 여기에서 train 데이터 세트가 8:2로 training과 validation이 나누어 진다.
    # 지금 평가 결과는 validation에 대한 결과
    # [오류] stratify가 labels면, test_dataset에 특정 레이블이 아예 등장하지 않으면 오류 발생
    """
    x_train, y_train, x_label, y_label = train_test_split(
        texts, labels, test_size=0.2, random_state=config.seed, stratify=labels
    )
    """
    x_train, y_train, x_label, y_label = train_test_split(
        texts, labels, test_size=0.2, random_state=config.seed, stratify=None
    )
    if is_train:
        texts, labels = x_train, x_label
    else:
        texts, labels = y_train, y_label
    return texts, labels

# 댓글(comment)이 담긴 .csv 파일이 있을 때, 여기에서 텍스트와 레이블 추출
def process_comment_cls(config, data_file, is_train):
    df = pd.read_csv(data_file)
    df = df.dropna(subset=['title', 'content', 'label1', 'label2'])

    # 매 줄에서 "label1(정치성향)", "label2(편향여부)" 열 추출
    politics = (df["label1"] - 1).astype(int).values.tolist()
    governments = (df["label2"] - 1).astype(int).values.tolist()
    labels = []
    # 한 줄씩 데이터를 확인하며
    for i in range(len(politics)):
        politic = politics[i] # 선호도
        government = governments[i] # 비속어
        labels.append([politic, government])
    print(len(labels))

    # title과 content를 합쳐서 texts로 표현
    texts = (df["title"] + " " + df["content"]).astype(str).values.tolist()

    texts, labels = train_split(config, texts, labels, is_train)
    return texts, labels

### 데이터 토큰화
- 주어진 데이터를 토큰화하고, 토큰화된 데이터를 파일에 저장하는 함수를 정의.
- 주어진 데이터는 텍스트와 레이블로 구성되어 있으며, 텍스트는 토큰화되고, 레이블은 정수로 변환.
- 토큰화된 데이터와 변환된 레이블은 JSON 형식으로 파일에 저장

In [73]:
import torch.utils.data as torch_data

def data_pretokenizing(config, tokenizer, is_train=True):
    if is_train:
        data_file = config.train_file
    else:
        data_file = config.predict_file

    data_path = config.data_dir
    if data_file is not None:
        data_path = os.path.join(data_path, data_file)
    else:
        data_path += "/"

    # 실제로 tokenizer를  저장될 데이터 세트의 파일 이름이 바로 dataset_file
    comps = [
        data_path,
        config.dataset,
        config.model_name_or_path.replace("/", "_"),
        config.max_seq_length,
        "train" if is_train else "dev",
        "dataset.txt",
    ]
    dataset_file = "_".join([str(comp) for comp in comps])
    print("dataset_file:", dataset_file)

    # 텍스트 문장을 읽어와서 token 값만 저장
    with open(dataset_file, "w", encoding="utf-8") as writer_file:
        # data: "joongang.csv" 파일에서 읽어와 (텍스트, 선호도 레이블)만 남긴 .csv 파일
        cnt = 0
        total_data = process_comment_cls(config, data_path, is_train)
        # 학습 데이터 세트를 하나씩 확인하며
        for text, label in zip(total_data[0], total_data[1]):
            # 여기에서 data는 하나의 (텍스트, 레이블) 쌍
            # feature는 해당 텍스트를 tokenizer에 넣어서 나온 결과
            feature = tokenizer(text, max_length=config.max_seq_length, padding="max_length", truncation=True, add_special_tokens=True)
            # 실제로 학습을 위해서는 (각 토큰의 index, 정답 레이블)로 학습을 진행
            writed_data = {
                "input_ids": feature["input_ids"],
                "attention_mask": feature["attention_mask"],
                "politic": int(float(label[0])), # "2.0" → 2.0 → 2
                "government": int(float(label[1])), # "2.0" → 2.0 → 2
              }
            # JSON은 쉽게 말하면 Python에서 dictionary와 같음 → 이를 file로 저장하는 것
            writer_file.write(json.dumps(writed_data) + "\n")
            cnt += 1
        print(f"{cnt} features processed from {data_path}")

    return dataset_file


In [74]:
# 본 코드에서 학습을 수행하려는 경우
if config.do_train:
    # 학습 데이터 세트 전처리
    train_dataset_file = data_pretokenizing(config, tokenizer=tokenizer)

# 평가 데이터 세트 전처리(validation = dev 같은 의미)
predict_dataset_file = data_pretokenizing(config, tokenizer=tokenizer, is_train=False)

# 결과적으로 만들어진 "./10000_labeled.csv_comment_monologg_kobigbird-bert-base_1024_train_dataset.txt"
# 내용을 확인해 보면, 약 8,000개의 각 학습 데이터에 대하여
#   → 하나씩 {"input_ids", "attention_mask", "preference", "slang"}으로 구성

dataset_file: ./joongang.csv_comment_monologg_kobigbird-bert-base_1024_train_dataset.txt
499
399 features processed from ./joongang.csv
dataset_file: ./joongang.csv_comment_monologg_kobigbird-bert-base_1024_dev_dataset.txt
499
100 features processed from ./joongang.csv


### 데이터로더 초기화

#### 데이터패딩

In [75]:
class IterableDatasetPad(torch.utils.data.IterableDataset):
    def __init__(
        self,
        dataset: torch.utils.data.IterableDataset,
        batch_size: int = 1,
        num_devices: int = 1,
        seed: int = 0,
    ):
        self.dataset = dataset
        self.batch_size = batch_size
        self.seed = seed
        self.num_examples = 0

        chunk_size = self.batch_size * num_devices
        length = len(dataset)
        self.length = length + (chunk_size - length % chunk_size)

    def __len__(self):
        return self.length

    def __iter__(self):
        self.num_examples = 0
        if (
            not hasattr(self.dataset, "set_epoch")
            and hasattr(self.dataset, "generator")
            and isinstance(self.dataset.generator, torch.Generator)
        ):
            self.dataset.generator.manual_seed(self.seed + self.epoch)

        first_batch = None
        current_batch = []
        for element in self.dataset:
            self.num_examples += 1
            current_batch.append(element)
            # Wait to have a full batch before yielding elements.
            if len(current_batch) == self.batch_size:
                for batch in current_batch:
                    yield batch
                    if first_batch is None:
                        first_batch = batch.copy()
                current_batch = []

        while self.num_examples < self.length:
            add_num = self.batch_size - len(current_batch)
            self.num_examples += add_num
            current_batch += [first_batch] * add_num
            for batch in current_batch:
                yield batch
            current_batch = []

#### 전처리된 데이터를 DataLoader로 불러옴

In [76]:
# 전처리된 데이터는 하나하나 {"input_ids", "attention_mask", "labels", ...} 형태를 가짐
# PyTorch가 하나의 배치를 처리할 때는 PyTorch Tensor 형태여야 함
# <데이터 로더에서 불러오는 "Tensor"를 정의하는 함수>
def collate_fn(features):
    input_ids = [sample["input_ids"] for sample in features]
    attention_mask = [sample["attention_mask"] for sample in features]

    politic = [sample["politic"] for sample in features]
    government = [sample["government"] for sample in features]

    input_ids = torch.tensor(np.array(input_ids).astype(np.int64), dtype=torch.long)
    attention_mask = torch.tensor(np.array(attention_mask).astype(np.int8), dtype=torch.long)
    politic = torch.tensor(np.array(politic).astype(np.int64), dtype=torch.long)
    government = torch.tensor(np.array(government).astype(np.int64), dtype=torch.long)
    inputs = {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
    }
    labels = {
        "politic": politic,
        "government" : government
    }
    return inputs, labels

# 본 코드에서 학습을 수행하려는 경우
if config.do_train:
    # 학습 데이터 로더 초기화
    train_dataset = load_dataset("text", data_files=train_dataset_file, download_mode="force_redownload")["train"]
    train_dataset = train_dataset.map(lambda x: json.loads(x["text"]), batched=False)

    train_dataloader = torch_data.DataLoader(
        train_dataset,
        sampler=torch_data.RandomSampler(train_dataset),
        drop_last=False,
        batch_size=config.train_batch_size,
        collate_fn=(collate_fn),
    )

# 평가 데이터 세트 전처리(validation = dev 같은 의미)
predict_dataset = load_dataset("text", data_files=predict_dataset_file, download_mode="force_redownload")["train"]
predict_dataset = predict_dataset.map(lambda x: json.loads(x["text"]), batched=False)
predict_dataset = IterableDatasetPad(
    dataset=predict_dataset,
    batch_size=config.eval_batch_size,
    num_devices=1,
    seed=config.seed,
)

predict_dataloader = torch_data.DataLoader(
    predict_dataset,
    sampler=None,
    drop_last=False,
    batch_size=config.eval_batch_size,
    collate_fn=(collate_fn),
)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/399 [00:00<?, ? examples/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

### 텍스트 분류 모델 정의

In [77]:
from transformers import AutoModel
import torch.nn as nn

# 텍스트 분류 모델 정의
class ClsModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # (전처리된) 학습할 데이터 세트의 경로 설정
        data_file = os.path.join(config.data_dir, str(config.train_file))
        # 사전 학습된 모델 가중치 불러오기
        model_config = AutoConfig.from_pretrained(config.model_name_or_path, num_labels=config.num_labels)
        self.model = AutoModel.from_pretrained(
            config.model_name_or_path, config=model_config, cache_dir=config.cache_dir
        )
        self.classifier1 = nn.Linear(768, 5)
        self.classifier2 = nn.Linear(768, 5)

        # BERT model과 별개로, 입력 문자열을 토큰의 index로 바꾸어주는 tokenizer가 사용됨
        self.tokenizer = tokenizer

    # "학습된 모델"을 save_dir에 저장하는 함수
    def save_pretrained(self, save_dir):
        self.model.save_pretrained(save_dir)
        # Tokenizer는 기본적으로 "special_tokens_map_file", "tokenizer_file"을 가질 수 있음
        # 이러한 값을 제거한 뒤에 save_dir에 저장하겠다는 의미
        for key in ["special_tokens_map_file", "tokenizer_file"]:
            self.tokenizer.init_kwargs.pop(key, None)
        self.tokenizer.save_pretrained(save_dir)

    def get_optimizer(self): # 현재 모델을 학습하기 위한 최적화 방법(AdamW) 객체를 불러오는 함수
        # bias랑 LayerNorm에는 decay 적용하지 않겠다는 의미
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": config.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": config.weight_decay,
            },
        ]
        # AdamW의 첫 번째 인자(params)는 "학습할 가중치", weight_decay는 가중치에 적용되는 regularization 기법
        optimizer = AdamW(optimizer_grouped_parameters, lr=config.learning_rate, eps=config.adam_epsilon)
        return optimizer

    def get_scheduler(self, batch_num, optimizer): # AdamW로 학습할 때, learning rate을 단계적으로 줄이기 위한 함수
        if config.warmup_proportion == 0.0:
            return None

        t_total = batch_num // config.gradient_accumulation_steps * config.num_train_epochs

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=int(t_total * config.warmup_proportion),
            num_training_steps=t_total,
        )

        return scheduler

    def forward(self, inputs):
        # print(inputs) => {"input_ids", "attention_mask"}
        # https://huggingface.co/docs/transformers/model_doc/big_bird
        # BigBirdModel의 입력 양식에 맞게 넣어주어야 오류가 없음
        # BigBirdModel로 "input_ids", "attention_mask" 등 "미리 정해진" 규격에 맞는 입력만
        # 들어가야 오류가 없다는 의미 => 그러므로, preference, slang 등은 들어가면 X
        hidden = self.model(**inputs)
        # print(hidden.last_hidden_state.shape) # torch.Size([4 (batch_size), 1024 (seq_len), 768 (embedding_size)])
        # 마지막 레이어의 [CLS] 토큰만 가져오기
        cls_token_embeddings = hidden.last_hidden_state[:,0,:] # [batch_size, 768]
        # print(cls_token_embeddings.shape)
        output_1 = self.classifier1(cls_token_embeddings) # 768 → 5
        output_2 = self.classifier2(cls_token_embeddings) # 768 → 5
        print(output_1.shape) # [batch_size, 5]
        print(output_2.shape) # [batch_size, 5]

        return output_1, output_2

    def eval_step(self, inputs, labels, outputs):
        logits_1 = outputs[0].detach().cpu()
        logits_2 = outputs[1].detach().cpu()
        labels_1 = self.tensor_to_list(labels["politic"])
        labels_2 = self.tensor_to_list(labels["government"])
        predictions_1 = self.tensor_to_list(torch.argmax(logits_1, dim=-1))
        predictions_2 = self.tensor_to_list(torch.argmax(logits_2, dim=-1))
        results_1 = [{"prediction": prediction, "label": label} for prediction, label in zip(predictions_1, labels_1)]
        results_2 = [{"prediction": prediction, "label": label} for prediction, label in zip(predictions_2, labels_2)]
        return {"results_1": results_1, "results_2": results_2}





    # PyTorch의 Tensor 객체를 NumPy 객체로 변환
    def tensor_to_array(self, tensor):
        return tensor.detach().cpu().numpy()

    # PyTorch의 Tensor 객체를 Python의 리스트(list) 자료형으로 변환
    def tensor_to_list(self, tensor):
        return self.tensor_to_array(tensor).tolist()

In [78]:
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

def cal_running_avg_loss(loss, running_avg_loss, decay=0.99):
    if running_avg_loss == 0:
        return loss
    running_avg_loss = running_avg_loss * decay + (1 - decay) * loss
    return running_avg_loss

### 모델 학습 및 평가 라이브러리


In [79]:
from functools import partial
import sklearn.metrics as sklearn_metrics

"""binary_metrics = {
    "accuracy": sklearn_metrics.accuracy_score,
    "precision": sklearn_metrics.precision_score, # TP / (TP + FP)
    "recall": sklearn_metrics.recall_score, # recall = sensitivity (민감도)
    "f1": sklearn_metrics.f1_score,
    "matthews_corrcoef": sklearn_metrics.matthews_corrcoef,
    "roc_auc": sklearn_metrics.roc_auc_score,
}""" # 우리는 두가지 task 다 다중분류임으로 안씀

metrics = {
    "accuracy": sklearn_metrics.accuracy_score,
    "f1-macro": partial(sklearn_metrics.f1_score, average="macro"),
}


def eval_cls(results_1, results_2, **kwargs):
    predictions_1 = np.array([result["prediction"] for result in results_1])
    labels_1 = np.array([result["label"] for result in results_1])
    predictions_2 = np.array([result["prediction"] for result in results_2])
    labels_2 = np.array([result["label"] for result in results_2])
    results_1 = {
        metric: round(f(labels_1, predictions_1) * 100, 2)
        for metric, f in metrics.items()
    }
    results_2 = {
        metric: round(f(labels_2, predictions_2) * 100, 2)
        for metric, f in metrics.items()
    }

    return {
        "results_1": results_1,
        "results_2": results_2,
        "best_score_1": results_1["f1-macro"],
        "best_score_2": results_2["f1-macro"],
    }


### Epoch 동안 학습 및 평가를 수행하는 함수 정의

In [80]:
def _run_epoch(model, loader, device=None, context=None, **kwargs):
    config = kwargs["config"]
    is_train = kwargs["is_train"]

    avg_loss = 0
    results = []
    batch_num = len(loader)

    if is_train:
        model.train()
        if config.use_tpu:
            optimizer = context.getattr_or("optimizer", lambda: model.get_optimizer())
            scheduler = context.getattr_or("scheduler", lambda: model.get_scheduler(batch_num, optimizer))
        else:
            optimizer = kwargs["optimizer"]
            scheduler = kwargs["scheduler"]
    else:
        model.eval()

    is_master = True

    pbar = tqdm(enumerate(loader), total=batch_num, disable=not is_master, dynamic_ncols=True, position=0, leave=True)

    corrected_1 = 0
    corrected_2 = 0
    total = 0

    for i, (inputs, labels) in pbar:
        # inputs: {"input_ids": [batch_size(4), seq_len, 768], "attention_mask": [batch_size(4), seq_len, 768]}
        # labels: {"preference": [batch_size(4), 1], "slang": [batch_size(4), 1], "politic": [batch_size(4), 1]}
        if not config.use_tpu:
            # (k, v) => ("input_ids", value)
            # (k, v) => ("attention_mask", value)
            for k, v in inputs.items():
                if isinstance(v, torch.Tensor):
                    inputs[k] = v.to(device)
            for k, v in labels.items():
                if isinstance(v, torch.Tensor):
                    labels[k] = v.to(device)

        outputs = model(inputs)


        outputs_1 = outputs[0]
        outputs_2 = outputs[1]

        labels_1 = labels["politic"]
        labels_2 = labels["government"]

        loss_function_1 = nn.CrossEntropyLoss()
        loss_1 = loss_function_1(outputs_1, labels_1)

        total += outputs_1.shape[0]

        _, predicted_1 = outputs_1.max(1)
        corrected_1 += predicted_1.eq(labels_1).sum().item()

        loss_function_2 = nn.CrossEntropyLoss()
        loss_2 = loss_function_2(outputs_2, labels_2)

        _, predicted_2 = outputs_2.max(1)
        corrected_2 += predicted_2.eq(labels_2).sum().item()

        w_1 = 1
        w_2 = 1
        loss = w_1 * loss_1 + w_2 * loss_2

        avg_loss = cal_running_avg_loss(loss.item(), avg_loss)
        loss /= config.gradient_accumulation_steps

        if is_train:
            loss.backward()
            if i % config.gradient_accumulation_steps == 0 or i == batch_num - 1:
                if config.max_grad_norm > 0:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm)

                optimizer.step()
                optimizer.zero_grad()

                if scheduler is not None:
                    scheduler.step()
        else:
            result = (model.module if hasattr(model, "module") else model).eval_step(inputs, labels, outputs)
            results.append(result)

        if is_master:
            pbar.set_description(f"epoch: {kwargs['epoch'] + 1}, {('train' if is_train else 'valid')} loss: {min(100, round(avg_loss, 4))}")

    return {
        "loss": avg_loss,
        "result": results,
    }


# 학습 코드에서 호출하는 함수
def run_epoch(**kwargs):
    model = kwargs.pop("model")
    if kwargs["config"].use_tpu:
        results = model(_run_epoch, **kwargs)
    else:
        results = _run_epoch(model, **kwargs)

    if isinstance(results, list):
        loss = sum([result["loss"] for result in results]) / len(results)
        result = []
        for res in results:
            result.extend(res["result"])
        results = {"loss": loss, "result": result}

    return results

### 딥러닝 모델 초기화 및 설정

In [81]:
# 현재 모델 이름이 "monologg/kobigbird-bert-base" 이므로, Hugging Face에서 찾아서 불러옴
set_seed(config.seed)

# 딥러닝 모델 초기화
model = ClsModel()

print(f"configuration: {str(config)}")

if torch.cuda.is_available(): # GPU를 사용할 수 있다면
    gpu_count = torch.cuda.device_count()
    print(f"{gpu_count} GPU device detected")
    devices = ["cuda:{}".format(i) for i in range(gpu_count)]
    model_dp = torch.nn.DataParallel(model, device_ids=devices)
    model.to(devices[0])
else: # GPU를 사용할 수 없다면 CPU로 구동
    devices = ["cpu"]
    model_dp = model

# 학습 결과를 저장하기 위한 폴더 만들기
if not os.path.exists(config.cache_dir):
    os.makedirs(config.cache_dir)

output_dir = os.path.join(config.output_dir, config.task, config.dataset)
print("Output directory:", output_dir)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 실제 학습을 위해 optimizer 및 scheduler 초기화
optimizer = None
scheduler = None
if config.do_train: # 학습 모드(train mode)인 경우
    optimizer = model.get_optimizer()
    scheduler = model.get_scheduler(len(train_dataloader), optimizer)

params = {
    "config": config,
    "model": model_dp,
    "optimizer": optimizer,
    "scheduler": scheduler,
}
if not config.use_tpu:
    params["device"] = devices[0]

configuration: namespace(task='cls', dataset='comment', cache_dir='cache', output_dir='output', use_tpu=False, model_name_or_path='monologg/kobigbird-bert-base', data_dir='./', train_file='joongang.csv', predict_file='joongang.csv', max_seq_length=1024, train_batch_size=4, eval_batch_size=2, learning_rate=3e-05, num_train_epochs=10, num_labels=5, gradient_accumulation_steps=2, threads=4, seed=42, do_train=True, do_eval_during_train=True, do_eval=True, do_lower_case=False, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, warmup_proportion=0.0)
1 GPU device detected
Output directory: output/cls/comment


In [82]:
def do_eval(epoch):
    with torch.no_grad():
        results = run_epoch(loader=predict_dataloader, epoch=epoch, is_train=False, **params)["result"]
        print(results)
        results_1 = [result['results_1'] for result in results]
        results_2 = [result['results_2'] for result in results]

        results_1 = [item for sublist in results_1 for item in sublist]
        results_2 = [item for sublist in results_2 for item in sublist]

        eval_results = eval_cls(
            config=config,
            model=model,
            loader=predict_dataloader,
            tokenizer=model.tokenizer,
            results_1=results_1,
            results_2=results_2,
        )

    print("Eval results for output 1.")
    for k, v in eval_results["results_1"].items():
        print(f"{k} : {v}")

    print("Eval results for output 2.")
    for k, v in eval_results["results_2"].items():
        print(f"{k} : {v}")

    return eval_results["best_score_1"], eval_results["best_score_2"]

train_losses = []
val_accuracies = []
if config.do_train:
    best_score = (0, 0)
    for epoch in range(config.num_train_epochs):
        train_results = run_epoch(loader=train_dataloader, epoch=epoch, is_train=True, **params)
        train_loss = train_results['loss']
        train_losses.append(train_loss)

        if config.do_eval_during_train:
            score1, score2 = do_eval(epoch)
            val_accuracies.append((score1, score2))

            if score1 >= best_score[0] and score2 >= best_score[1]:
                best_score = (score1, score2)
                output_dir = os.path.join(config.output_dir, config.task, config.dataset, f"{epoch}-{best_score[0]}-{best_score[1]}-ckpt")
                copy.deepcopy(
                    model_dp.module
                    if hasattr(model_dp, "module")
                    else model_dp._models[0]
                    if hasattr(model_dp, "_models")
                    else model_dp
                ).cpu().save_pretrained(output_dir)
                with open(os.path.join(output_dir, "finetune_config.json"), "w") as save_config:
                    json.dump(vars(config), save_config, sort_keys=True, indent=4)
                print(f"Checkpoint {output_dir} saved.")


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.941:   1%|          | 1/100 [00:01<02:49,  1.72s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9374:   2%|▏         | 2/100 [00:03<02:42,  1.66s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9355:   3%|▎         | 3/100 [00:05<02:42,  1.68s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9344:   4%|▍         | 4/100 [00:06<02:38,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9342:   5%|▌         | 5/100 [00:08<02:37,  1.66s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9295:   6%|▌         | 6/100 [00:09<02:34,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9317:   7%|▋         | 7/100 [00:11<02:34,  1.66s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9227:   8%|▊         | 8/100 [00:13<02:32,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9158:   9%|▉         | 9/100 [00:14<02:30,  1.66s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.906:  10%|█         | 10/100 [00:16<02:27,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9077:  11%|█         | 11/100 [00:18<02:26,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.9055:  12%|█▏        | 12/100 [00:19<02:23,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8974:  13%|█▎        | 13/100 [00:21<02:22,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8895:  14%|█▍        | 14/100 [00:23<02:19,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8965:  15%|█▌        | 15/100 [00:24<02:18,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.884:  16%|█▌        | 16/100 [00:26<02:16,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8883:  17%|█▋        | 17/100 [00:27<02:15,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8802:  18%|█▊        | 18/100 [00:29<02:13,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8845:  19%|█▉        | 19/100 [00:31<02:12,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8829:  20%|██        | 20/100 [00:32<02:09,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8753:  21%|██        | 21/100 [00:34<02:08,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.871:  22%|██▏       | 22/100 [00:36<02:05,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8637:  23%|██▎       | 23/100 [00:37<02:05,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8669:  24%|██▍       | 24/100 [00:39<02:02,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8571:  25%|██▌       | 25/100 [00:40<02:01,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8515:  26%|██▌       | 26/100 [00:42<01:59,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8481:  27%|██▋       | 27/100 [00:44<01:58,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.839:  28%|██▊       | 28/100 [00:45<01:55,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8289:  29%|██▉       | 29/100 [00:47<01:54,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8187:  30%|███       | 30/100 [00:48<01:52,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8189:  31%|███       | 31/100 [00:50<01:51,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8115:  32%|███▏      | 32/100 [00:52<01:48,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.807:  33%|███▎      | 33/100 [00:53<01:48,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.8008:  34%|███▍      | 34/100 [00:55<01:46,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7989:  35%|███▌      | 35/100 [00:57<01:45,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7905:  36%|███▌      | 36/100 [00:58<01:42,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7821:  37%|███▋      | 37/100 [01:00<01:41,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7804:  38%|███▊      | 38/100 [01:01<01:39,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7734:  39%|███▉      | 39/100 [01:03<01:38,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7628:  40%|████      | 40/100 [01:05<01:36,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7564:  41%|████      | 41/100 [01:06<01:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7535:  42%|████▏     | 42/100 [01:08<01:33,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7508:  43%|████▎     | 43/100 [01:09<01:32,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7446:  44%|████▍     | 44/100 [01:11<01:29,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7311:  45%|████▌     | 45/100 [01:13<01:28,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7168:  46%|████▌     | 46/100 [01:14<01:26,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.7098:  47%|████▋     | 47/100 [01:16<01:25,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6964:  48%|████▊     | 48/100 [01:17<01:23,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6846:  49%|████▉     | 49/100 [01:19<01:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6857:  50%|█████     | 50/100 [01:21<01:20,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6738:  51%|█████     | 51/100 [01:22<01:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6712:  52%|█████▏    | 52/100 [01:24<01:17,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6714:  53%|█████▎    | 53/100 [01:26<01:16,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6571:  54%|█████▍    | 54/100 [01:27<01:14,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6426:  55%|█████▌    | 55/100 [01:29<01:12,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6444:  56%|█████▌    | 56/100 [01:30<01:10,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6335:  57%|█████▋    | 57/100 [01:32<01:09,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6183:  58%|█████▊    | 58/100 [01:34<01:07,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6158:  59%|█████▉    | 59/100 [01:35<01:06,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.6108:  60%|██████    | 60/100 [01:37<01:04,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5976:  61%|██████    | 61/100 [01:38<01:03,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5883:  62%|██████▏   | 62/100 [01:40<01:01,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5741:  63%|██████▎   | 63/100 [01:42<00:59,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5593:  64%|██████▍   | 64/100 [01:43<00:58,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5572:  65%|██████▌   | 65/100 [01:45<00:57,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5576:  66%|██████▌   | 66/100 [01:47<00:55,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5568:  67%|██████▋   | 67/100 [01:48<00:53,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5492:  68%|██████▊   | 68/100 [01:50<00:51,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5478:  69%|██████▉   | 69/100 [01:51<00:50,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5353:  70%|███████   | 70/100 [01:53<00:48,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.532:  71%|███████   | 71/100 [01:55<00:47,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5257:  72%|███████▏  | 72/100 [01:56<00:45,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5173:  73%|███████▎  | 73/100 [01:58<00:43,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5094:  74%|███████▍  | 74/100 [02:00<00:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5171:  75%|███████▌  | 75/100 [02:01<00:40,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.5045:  76%|███████▌  | 76/100 [02:03<00:38,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4888:  77%|███████▋  | 77/100 [02:04<00:37,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.484:  78%|███████▊  | 78/100 [02:06<00:35,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4752:  79%|███████▉  | 79/100 [02:08<00:34,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4743:  80%|████████  | 80/100 [02:09<00:32,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4679:  81%|████████  | 81/100 [02:11<00:30,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4585:  82%|████████▏ | 82/100 [02:12<00:28,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4568:  83%|████████▎ | 83/100 [02:14<00:27,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4474:  84%|████████▍ | 84/100 [02:16<00:25,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4418:  85%|████████▌ | 85/100 [02:17<00:24,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4403:  86%|████████▌ | 86/100 [02:19<00:22,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4316:  87%|████████▋ | 87/100 [02:21<00:21,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.423:  88%|████████▊ | 88/100 [02:22<00:19,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4241:  89%|████████▉ | 89/100 [02:24<00:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4215:  90%|█████████ | 90/100 [02:25<00:16,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4117:  91%|█████████ | 91/100 [02:27<00:14,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3966:  92%|█████████▏| 92/100 [02:29<00:12,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.4043:  93%|█████████▎| 93/100 [02:30<00:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3917:  94%|█████████▍| 94/100 [02:32<00:09,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.386:  95%|█████████▌| 95/100 [02:33<00:08,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3745:  96%|█████████▌| 96/100 [02:35<00:06,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.367:  97%|█████████▋| 97/100 [02:37<00:04,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3661:  98%|█████████▊| 98/100 [02:38<00:03,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 1, train loss: 2.3589:  99%|█████████▉| 99/100 [02:40<00:01,  1.62s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 1, train loss: 2.3444: 100%|██████████| 100/100 [02:41<00:00,  1.62s/it]
epoch: 1, valid loss: 1.0554:   2%|▏         | 1/51 [00:00<00:11,  4.27it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0602:   4%|▍         | 2/51 [00:00<00:11,  4.26it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0636:   6%|▌         | 3/51 [00:00<00:11,  4.25it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.075:   8%|▊         | 4/51 [00:00<00:11,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0898:  10%|▉         | 5/51 [00:01<00:10,  4.20it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.0892:  12%|█▏        | 6/51 [00:01<00:10,  4.20it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1083:  14%|█▎        | 7/51 [00:01<00:10,  4.22it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.115:  16%|█▌        | 8/51 [00:01<00:10,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1248:  18%|█▊        | 9/51 [00:02<00:10,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1352:  20%|█▉        | 10/51 [00:02<00:09,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.137:  22%|██▏       | 11/51 [00:02<00:09,  4.20it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1369:  24%|██▎       | 12/51 [00:02<00:09,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1373:  25%|██▌       | 13/51 [00:03<00:09,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1578:  27%|██▋       | 14/51 [00:03<00:08,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1587:  29%|██▉       | 15/51 [00:03<00:08,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1585:  31%|███▏      | 16/51 [00:03<00:08,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1747:  33%|███▎      | 17/51 [00:04<00:08,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1868:  35%|███▌      | 18/51 [00:04<00:07,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1863:  37%|███▋      | 19/51 [00:04<00:07,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1951:  39%|███▉      | 20/51 [00:04<00:07,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1979:  41%|████      | 21/51 [00:05<00:07,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.1974:  43%|████▎     | 22/51 [00:05<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2211:  45%|████▌     | 23/51 [00:05<00:06,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2308:  47%|████▋     | 24/51 [00:05<00:06,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2474:  49%|████▉     | 25/51 [00:06<00:06,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2551:  51%|█████     | 26/51 [00:06<00:06,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2624:  53%|█████▎    | 27/51 [00:06<00:05,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2772:  55%|█████▍    | 28/51 [00:06<00:05,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2884:  57%|█████▋    | 29/51 [00:06<00:05,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.2934:  59%|█████▉    | 30/51 [00:07<00:05,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3201:  61%|██████    | 31/51 [00:07<00:04,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.332:  63%|██████▎   | 32/51 [00:07<00:04,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3291:  65%|██████▍   | 33/51 [00:07<00:04,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3347:  67%|██████▋   | 34/51 [00:08<00:04,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3306:  69%|██████▊   | 35/51 [00:08<00:03,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3372:  71%|███████   | 36/51 [00:08<00:03,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3518:  73%|███████▎  | 37/51 [00:08<00:03,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3477:  75%|███████▍  | 38/51 [00:09<00:03,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3506:  76%|███████▋  | 39/51 [00:09<00:02,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3505:  78%|███████▊  | 40/51 [00:09<00:02,  4.20it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3492:  80%|████████  | 41/51 [00:09<00:02,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3481:  82%|████████▏ | 42/51 [00:10<00:02,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3464:  84%|████████▍ | 43/51 [00:10<00:01,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3407:  86%|████████▋ | 44/51 [00:10<00:01,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3445:  88%|████████▊ | 45/51 [00:10<00:01,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.345:  90%|█████████ | 46/51 [00:11<00:01,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3409:  92%|█████████▏| 47/51 [00:11<00:00,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3372:  94%|█████████▍| 48/51 [00:11<00:00,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3388:  96%|█████████▌| 49/51 [00:11<00:00,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.3406:  98%|█████████▊| 50/51 [00:12<00:00,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 1, valid loss: 1.339: 100%|██████████| 51/51 [00:12<00:00,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti




Checkpoint output/cls/comment/0-33.11-54.23-ckpt saved.


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.159:   1%|          | 1/100 [00:01<02:41,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1619:   2%|▏         | 2/100 [00:03<02:37,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1611:   3%|▎         | 3/100 [00:04<02:39,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1629:   4%|▍         | 4/100 [00:06<02:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.169:   5%|▌         | 5/100 [00:08<02:34,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1637:   6%|▌         | 6/100 [00:09<02:31,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.182:   7%|▋         | 7/100 [00:11<02:30,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1897:   8%|▊         | 8/100 [00:12<02:28,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1905:   9%|▉         | 9/100 [00:14<02:27,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.1924:  10%|█         | 10/100 [00:16<02:24,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2043:  11%|█         | 11/100 [00:17<02:25,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2084:  12%|█▏        | 12/100 [00:19<02:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.218:  13%|█▎        | 13/100 [00:21<02:21,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2178:  14%|█▍        | 14/100 [00:22<02:18,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2179:  15%|█▌        | 15/100 [00:24<02:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2366:  16%|█▌        | 16/100 [00:25<02:15,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2373:  17%|█▋        | 17/100 [00:27<02:14,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2342:  18%|█▊        | 18/100 [00:29<02:12,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2344:  19%|█▉        | 19/100 [00:30<02:11,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2391:  20%|██        | 20/100 [00:32<02:09,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2479:  21%|██        | 21/100 [00:34<02:08,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2504:  22%|██▏       | 22/100 [00:35<02:05,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.251:  23%|██▎       | 23/100 [00:37<02:04,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2495:  24%|██▍       | 24/100 [00:38<02:02,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2501:  25%|██▌       | 25/100 [00:40<02:01,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2656:  26%|██▌       | 26/100 [00:42<01:59,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2594:  27%|██▋       | 27/100 [00:43<01:58,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2613:  28%|██▊       | 28/100 [00:45<01:55,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2632:  29%|██▉       | 29/100 [00:46<01:54,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2656:  30%|███       | 30/100 [00:48<01:52,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2793:  31%|███       | 31/100 [00:50<01:51,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.2826:  32%|███▏      | 32/100 [00:51<01:49,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.289:  33%|███▎      | 33/100 [00:53<01:48,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.301:  34%|███▍      | 34/100 [00:54<01:46,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3148:  35%|███▌      | 35/100 [00:56<01:45,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3139:  36%|███▌      | 36/100 [00:58<01:43,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3143:  37%|███▋      | 37/100 [00:59<01:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3107:  38%|███▊      | 38/100 [01:01<01:39,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3127:  39%|███▉      | 39/100 [01:03<01:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3205:  40%|████      | 40/100 [01:04<01:36,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3163:  41%|████      | 41/100 [01:06<01:36,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3189:  42%|████▏     | 42/100 [01:07<01:33,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3285:  43%|████▎     | 43/100 [01:09<01:32,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3361:  44%|████▍     | 44/100 [01:11<01:30,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3332:  45%|████▌     | 45/100 [01:12<01:29,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3477:  46%|████▌     | 46/100 [01:14<01:26,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3418:  47%|████▋     | 47/100 [01:16<01:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3397:  48%|████▊     | 48/100 [01:17<01:23,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3376:  49%|████▉     | 49/100 [01:19<01:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3462:  50%|█████     | 50/100 [01:20<01:20,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3399:  51%|█████     | 51/100 [01:22<01:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3457:  52%|█████▏    | 52/100 [01:24<01:17,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3445:  53%|█████▎    | 53/100 [01:25<01:16,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3423:  54%|█████▍    | 54/100 [01:27<01:13,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3405:  55%|█████▌    | 55/100 [01:28<01:12,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3402:  56%|█████▌    | 56/100 [01:30<01:10,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3366:  57%|█████▋    | 57/100 [01:32<01:09,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3483:  58%|█████▊    | 58/100 [01:33<01:07,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3415:  59%|█████▉    | 59/100 [01:35<01:06,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3348:  60%|██████    | 60/100 [01:36<01:04,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3324:  61%|██████    | 61/100 [01:38<01:03,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3339:  62%|██████▏   | 62/100 [01:40<01:00,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3298:  63%|██████▎   | 63/100 [01:41<00:59,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3292:  64%|██████▍   | 64/100 [01:43<00:57,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.329:  65%|██████▌   | 65/100 [01:45<00:56,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3237:  66%|██████▌   | 66/100 [01:46<00:54,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3213:  67%|██████▋   | 67/100 [01:48<00:53,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3297:  68%|██████▊   | 68/100 [01:49<00:51,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3354:  69%|██████▉   | 69/100 [01:51<00:50,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3305:  70%|███████   | 70/100 [01:53<00:48,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3321:  71%|███████   | 71/100 [01:54<00:47,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3311:  72%|███████▏  | 72/100 [01:56<00:45,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3332:  73%|███████▎  | 73/100 [01:58<00:43,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3464:  74%|███████▍  | 74/100 [01:59<00:41,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3428:  75%|███████▌  | 75/100 [02:01<00:40,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3396:  76%|███████▌  | 76/100 [02:02<00:38,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3426:  77%|███████▋  | 77/100 [02:04<00:37,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3485:  78%|███████▊  | 78/100 [02:06<00:35,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3575:  79%|███████▉  | 79/100 [02:07<00:34,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3698:  80%|████████  | 80/100 [02:09<00:32,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3794:  81%|████████  | 81/100 [02:11<00:30,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3843:  82%|████████▏ | 82/100 [02:12<00:29,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3859:  83%|████████▎ | 83/100 [02:14<00:27,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3854:  84%|████████▍ | 84/100 [02:15<00:25,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.389:  85%|████████▌ | 85/100 [02:17<00:24,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3844:  86%|████████▌ | 86/100 [02:19<00:22,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3932:  87%|████████▋ | 87/100 [02:20<00:21,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3927:  88%|████████▊ | 88/100 [02:22<00:19,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4067:  89%|████████▉ | 89/100 [02:23<00:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4135:  90%|█████████ | 90/100 [02:25<00:16,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4123:  91%|█████████ | 91/100 [02:27<00:14,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4136:  92%|█████████▏| 92/100 [02:28<00:12,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4057:  93%|█████████▎| 93/100 [02:30<00:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.3999:  94%|█████████▍| 94/100 [02:31<00:09,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4088:  95%|█████████▌| 95/100 [02:33<00:08,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4122:  96%|█████████▌| 96/100 [02:35<00:06,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4181:  97%|█████████▋| 97/100 [02:36<00:04,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4162:  98%|█████████▊| 98/100 [02:38<00:03,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 2, train loss: 1.4168:  99%|█████████▉| 99/100 [02:40<00:01,  1.62s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 2, train loss: 1.4167: 100%|██████████| 100/100 [02:41<00:00,  1.61s/it]
epoch: 2, valid loss: 1.2405:   2%|▏         | 1/51 [00:00<00:11,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2457:   4%|▍         | 2/51 [00:00<00:11,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2447:   6%|▌         | 3/51 [00:00<00:11,  4.21it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2446:   8%|▊         | 4/51 [00:00<00:11,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2517:  10%|▉         | 5/51 [00:01<00:10,  4.20it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2453:  12%|█▏        | 6/51 [00:01<00:10,  4.21it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2519:  14%|█▎        | 7/51 [00:01<00:10,  4.22it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2491:  16%|█▌        | 8/51 [00:01<00:10,  4.21it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2456:  18%|█▊        | 9/51 [00:02<00:10,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2461:  20%|█▉        | 10/51 [00:02<00:09,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2457:  22%|██▏       | 11/51 [00:02<00:09,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2435:  24%|██▎       | 12/51 [00:02<00:09,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2366:  25%|██▌       | 13/51 [00:03<00:09,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2452:  27%|██▋       | 14/51 [00:03<00:08,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2412:  29%|██▉       | 15/51 [00:03<00:08,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2403:  31%|███▏      | 16/51 [00:03<00:08,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2439:  33%|███▎      | 17/51 [00:04<00:08,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2489:  35%|███▌      | 18/51 [00:04<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2423:  37%|███▋      | 19/51 [00:04<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2439:  39%|███▉      | 20/51 [00:04<00:07,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.24:  41%|████      | 21/51 [00:05<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2399:  43%|████▎     | 22/51 [00:05<00:06,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2532:  45%|████▌     | 23/51 [00:05<00:06,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2626:  47%|████▋     | 24/51 [00:05<00:06,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.286:  49%|████▉     | 25/51 [00:06<00:06,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2863:  51%|█████     | 26/51 [00:06<00:06,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2855:  53%|█████▎    | 27/51 [00:06<00:05,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2956:  55%|█████▍    | 28/51 [00:06<00:05,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3128:  57%|█████▋    | 29/51 [00:06<00:05,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3144:  59%|█████▉    | 30/51 [00:07<00:05,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3195:  61%|██████    | 31/51 [00:07<00:04,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3218:  63%|██████▎   | 32/51 [00:07<00:04,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3149:  65%|██████▍   | 33/51 [00:07<00:04,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3184:  67%|██████▋   | 34/51 [00:08<00:04,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3133:  69%|██████▊   | 35/51 [00:08<00:03,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3152:  71%|███████   | 36/51 [00:08<00:03,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3218:  73%|███████▎  | 37/51 [00:08<00:03,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3144:  75%|███████▍  | 38/51 [00:09<00:03,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3085:  76%|███████▋  | 39/51 [00:09<00:02,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.3053:  78%|███████▊  | 40/51 [00:09<00:02,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2968:  80%|████████  | 41/51 [00:09<00:02,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2928:  82%|████████▏ | 42/51 [00:10<00:02,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2864:  84%|████████▍ | 43/51 [00:10<00:01,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2761:  86%|████████▋ | 44/51 [00:10<00:01,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2702:  88%|████████▊ | 45/51 [00:10<00:01,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2663:  90%|█████████ | 46/51 [00:11<00:01,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2643:  92%|█████████▏| 47/51 [00:11<00:00,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2592:  94%|█████████▍| 48/51 [00:11<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2569:  96%|█████████▌| 49/51 [00:11<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2518:  98%|█████████▊| 50/51 [00:12<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 2, valid loss: 1.2469: 100%|██████████| 51/51 [00:12<00:00,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti




Checkpoint output/cls/comment/1-52.43-59.31-ckpt saved.


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.665:   1%|          | 1/100 [00:01<02:40,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6694:   2%|▏         | 2/100 [00:03<02:36,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6645:   3%|▎         | 3/100 [00:04<02:36,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6657:   4%|▍         | 4/100 [00:06<02:33,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6628:   5%|▌         | 5/100 [00:08<02:33,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6579:   6%|▌         | 6/100 [00:09<02:30,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6486:   7%|▋         | 7/100 [00:11<02:31,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6415:   8%|▊         | 8/100 [00:12<02:28,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6344:   9%|▉         | 9/100 [00:14<02:28,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6298:  10%|█         | 10/100 [00:16<02:25,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6214:  11%|█         | 11/100 [00:17<02:24,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6188:  12%|█▏        | 12/100 [00:19<02:21,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6156:  13%|█▎        | 13/100 [00:21<02:21,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6097:  14%|█▍        | 14/100 [00:22<02:18,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.6023:  15%|█▌        | 15/100 [00:24<02:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5977:  16%|█▌        | 16/100 [00:25<02:14,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5889:  17%|█▋        | 17/100 [00:27<02:14,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5889:  18%|█▊        | 18/100 [00:29<02:11,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5882:  19%|█▉        | 19/100 [00:30<02:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5863:  20%|██        | 20/100 [00:32<02:08,  1.60s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5806:  21%|██        | 21/100 [00:33<02:07,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5709:  22%|██▏       | 22/100 [00:35<02:05,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.57:  23%|██▎       | 23/100 [00:37<02:04,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5746:  24%|██▍       | 24/100 [00:38<02:02,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.563:  25%|██▌       | 25/100 [00:40<02:01,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5553:  26%|██▌       | 26/100 [00:41<01:59,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5455:  27%|██▋       | 27/100 [00:43<01:58,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5435:  28%|██▊       | 28/100 [00:45<01:55,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5414:  29%|██▉       | 29/100 [00:46<01:54,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5374:  30%|███       | 30/100 [00:48<01:52,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5262:  31%|███       | 31/100 [00:50<01:51,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5161:  32%|███▏      | 32/100 [00:51<01:49,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5092:  33%|███▎      | 33/100 [00:53<01:48,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5085:  34%|███▍      | 34/100 [00:54<01:46,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.5021:  35%|███▌      | 35/100 [00:56<01:46,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4935:  36%|███▌      | 36/100 [00:58<01:43,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4964:  37%|███▋      | 37/100 [00:59<01:42,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4944:  38%|███▊      | 38/100 [01:01<01:39,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4918:  39%|███▉      | 39/100 [01:03<01:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4907:  40%|████      | 40/100 [01:04<01:36,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4881:  41%|████      | 41/100 [01:06<01:35,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4854:  42%|████▏     | 42/100 [01:07<01:33,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4796:  43%|████▎     | 43/100 [01:09<01:32,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4804:  44%|████▍     | 44/100 [01:11<01:30,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4875:  45%|████▌     | 45/100 [01:12<01:29,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4846:  46%|████▌     | 46/100 [01:14<01:26,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4849:  47%|████▋     | 47/100 [01:15<01:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.481:  48%|████▊     | 48/100 [01:17<01:23,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4724:  49%|████▉     | 49/100 [01:19<01:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4635:  50%|█████     | 50/100 [01:20<01:20,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4598:  51%|█████     | 51/100 [01:22<01:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4606:  52%|█████▏    | 52/100 [01:24<01:17,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4569:  53%|█████▎    | 53/100 [01:25<01:16,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4522:  54%|█████▍    | 54/100 [01:27<01:13,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4515:  55%|█████▌    | 55/100 [01:28<01:12,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4413:  56%|█████▌    | 56/100 [01:30<01:10,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4314:  57%|█████▋    | 57/100 [01:32<01:09,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4289:  58%|█████▊    | 58/100 [01:33<01:07,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4273:  59%|█████▉    | 59/100 [01:35<01:06,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4272:  60%|██████    | 60/100 [01:36<01:04,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4242:  61%|██████    | 61/100 [01:38<01:03,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4148:  62%|██████▏   | 62/100 [01:40<01:01,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.411:  63%|██████▎   | 63/100 [01:41<01:00,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4042:  64%|██████▍   | 64/100 [01:43<00:58,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3959:  65%|██████▌   | 65/100 [01:45<00:57,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3884:  66%|██████▌   | 66/100 [01:46<00:55,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3834:  67%|██████▋   | 67/100 [01:48<00:53,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3773:  68%|██████▊   | 68/100 [01:49<00:51,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4055:  69%|██████▉   | 69/100 [01:51<00:50,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4056:  70%|███████   | 70/100 [01:53<00:48,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4191:  71%|███████   | 71/100 [01:54<00:47,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4131:  72%|███████▏  | 72/100 [01:56<00:45,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4268:  73%|███████▎  | 73/100 [01:58<00:43,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.416:  74%|███████▍  | 74/100 [01:59<00:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4128:  75%|███████▌  | 75/100 [02:01<00:40,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4095:  76%|███████▌  | 76/100 [02:02<00:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4055:  77%|███████▋  | 77/100 [02:04<00:37,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4048:  78%|███████▊  | 78/100 [02:06<00:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3998:  79%|███████▉  | 79/100 [02:07<00:34,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3985:  80%|████████  | 80/100 [02:09<00:32,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3986:  81%|████████  | 81/100 [02:11<00:30,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4047:  82%|████████▏ | 82/100 [02:12<00:29,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4047:  83%|████████▎ | 83/100 [02:14<00:27,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4076:  84%|████████▍ | 84/100 [02:15<00:25,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4155:  85%|████████▌ | 85/100 [02:17<00:24,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4151:  86%|████████▌ | 86/100 [02:19<00:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4199:  87%|████████▋ | 87/100 [02:20<00:21,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4159:  88%|████████▊ | 88/100 [02:22<00:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4226:  89%|████████▉ | 89/100 [02:24<00:17,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4144:  90%|█████████ | 90/100 [02:25<00:16,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4107:  91%|█████████ | 91/100 [02:27<00:14,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.4068:  92%|█████████▏| 92/100 [02:28<00:12,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3996:  93%|█████████▎| 93/100 [02:30<00:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3954:  94%|█████████▍| 94/100 [02:32<00:09,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3952:  95%|█████████▌| 95/100 [02:33<00:08,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3968:  96%|█████████▌| 96/100 [02:35<00:06,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3891:  97%|█████████▋| 97/100 [02:36<00:04,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3786:  98%|█████████▊| 98/100 [02:38<00:03,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 3, train loss: 1.3788:  99%|█████████▉| 99/100 [02:40<00:01,  1.62s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 3, train loss: 1.3708: 100%|██████████| 100/100 [02:41<00:00,  1.62s/it]
epoch: 3, valid loss: 0.848:   2%|▏         | 1/51 [00:00<00:11,  4.29it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8639:   4%|▍         | 2/51 [00:00<00:11,  4.25it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8767:   6%|▌         | 3/51 [00:00<00:11,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8763:   8%|▊         | 4/51 [00:00<00:11,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8832:  10%|▉         | 5/51 [00:01<00:11,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8806:  12%|█▏        | 6/51 [00:01<00:10,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.8951:  14%|█▎        | 7/51 [00:01<00:10,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9001:  16%|█▌        | 8/51 [00:01<00:10,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9111:  18%|█▊        | 9/51 [00:02<00:10,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9258:  20%|█▉        | 10/51 [00:02<00:09,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9247:  22%|██▏       | 11/51 [00:02<00:09,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9211:  24%|██▎       | 12/51 [00:02<00:09,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9203:  25%|██▌       | 13/51 [00:03<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9244:  27%|██▋       | 14/51 [00:03<00:08,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.926:  29%|██▉       | 15/51 [00:03<00:08,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9326:  31%|███▏      | 16/51 [00:03<00:08,  4.08it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9306:  33%|███▎      | 17/51 [00:04<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9393:  35%|███▌      | 18/51 [00:04<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9367:  37%|███▋      | 19/51 [00:04<00:07,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9419:  39%|███▉      | 20/51 [00:04<00:07,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9384:  41%|████      | 21/51 [00:05<00:07,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9445:  43%|████▎     | 22/51 [00:05<00:07,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9628:  45%|████▌     | 23/51 [00:05<00:06,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9612:  47%|████▋     | 24/51 [00:05<00:06,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9781:  49%|████▉     | 25/51 [00:06<00:06,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9784:  51%|█████     | 26/51 [00:06<00:05,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9799:  53%|█████▎    | 27/51 [00:06<00:05,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 0.9982:  55%|█████▍    | 28/51 [00:06<00:05,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0027:  57%|█████▋    | 29/51 [00:07<00:05,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0068:  59%|█████▉    | 30/51 [00:07<00:05,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0301:  61%|██████    | 31/51 [00:07<00:04,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0309:  63%|██████▎   | 32/51 [00:07<00:04,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0279:  65%|██████▍   | 33/51 [00:07<00:04,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0377:  67%|██████▋   | 34/51 [00:08<00:04,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0338:  69%|██████▊   | 35/51 [00:08<00:03,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0318:  71%|███████   | 36/51 [00:08<00:03,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0497:  73%|███████▎  | 37/51 [00:08<00:03,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0493:  75%|███████▍  | 38/51 [00:09<00:03,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0452:  76%|███████▋  | 39/51 [00:09<00:02,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0412:  78%|███████▊  | 40/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0357:  80%|████████  | 41/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0315:  82%|████████▏ | 42/51 [00:10<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0278:  84%|████████▍ | 43/51 [00:10<00:01,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.02:  86%|████████▋ | 44/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0205:  88%|████████▊ | 45/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0164:  90%|█████████ | 46/51 [00:11<00:01,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0119:  92%|█████████▏| 47/51 [00:11<00:00,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0067:  94%|█████████▍| 48/51 [00:11<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.008:  96%|█████████▌| 49/51 [00:11<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.01:  98%|█████████▊| 50/51 [00:12<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 3, valid loss: 1.0078: 100%|██████████| 51/51 [00:12<00:00,  4.14it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 0, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 1, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.3238:   1%|          | 1/100 [00:01<02:42,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.3138:   2%|▏         | 2/100 [00:03<02:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.3143:   3%|▎         | 3/100 [00:04<02:39,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.3044:   4%|▍         | 4/100 [00:06<02:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2973:   5%|▌         | 5/100 [00:08<02:34,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.306:   6%|▌         | 6/100 [00:09<02:31,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.3076:   7%|▋         | 7/100 [00:11<02:30,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.3134:   8%|▊         | 8/100 [00:12<02:28,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.3093:   9%|▉         | 9/100 [00:14<02:27,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.3069:  10%|█         | 10/100 [00:16<02:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.3018:  11%|█         | 11/100 [00:17<02:24,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.295:  12%|█▏        | 12/100 [00:19<02:21,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2899:  13%|█▎        | 13/100 [00:21<02:21,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2809:  14%|█▍        | 14/100 [00:22<02:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2752:  15%|█▌        | 15/100 [00:24<02:18,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2698:  16%|█▌        | 16/100 [00:25<02:15,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2609:  17%|█▋        | 17/100 [00:27<02:15,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2537:  18%|█▊        | 18/100 [00:29<02:12,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2541:  19%|█▉        | 19/100 [00:30<02:12,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2578:  20%|██        | 20/100 [00:32<02:09,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2541:  21%|██        | 21/100 [00:34<02:08,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2447:  22%|██▏       | 22/100 [00:35<02:06,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2488:  23%|██▎       | 23/100 [00:37<02:05,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2447:  24%|██▍       | 24/100 [00:38<02:02,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2391:  25%|██▌       | 25/100 [00:40<02:01,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2366:  26%|██▌       | 26/100 [00:42<01:59,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2342:  27%|██▋       | 27/100 [00:43<01:58,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.229:  28%|██▊       | 28/100 [00:45<01:56,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.23:  29%|██▉       | 29/100 [00:47<01:55,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2246:  30%|███       | 30/100 [00:48<01:52,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2216:  31%|███       | 31/100 [00:50<01:51,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2171:  32%|███▏      | 32/100 [00:51<01:49,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2186:  33%|███▎      | 33/100 [00:53<01:49,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2202:  34%|███▍      | 34/100 [00:55<01:46,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2199:  35%|███▌      | 35/100 [00:56<01:45,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2143:  36%|███▌      | 36/100 [00:58<01:43,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2055:  37%|███▋      | 37/100 [00:59<01:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2032:  38%|███▊      | 38/100 [01:01<01:39,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.199:  39%|███▉      | 39/100 [01:03<01:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1973:  40%|████      | 40/100 [01:04<01:36,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2028:  41%|████      | 41/100 [01:06<01:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2006:  42%|████▏     | 42/100 [01:08<01:33,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1943:  43%|████▎     | 43/100 [01:09<01:32,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1956:  44%|████▍     | 44/100 [01:11<01:30,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1904:  45%|████▌     | 45/100 [01:12<01:29,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1831:  46%|████▌     | 46/100 [01:14<01:27,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1934:  47%|████▋     | 47/100 [01:16<01:26,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1896:  48%|████▊     | 48/100 [01:17<01:23,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2107:  49%|████▉     | 49/100 [01:19<01:23,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.2034:  50%|█████     | 50/100 [01:21<01:20,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1984:  51%|█████     | 51/100 [01:22<01:19,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1918:  52%|█████▏    | 52/100 [01:24<01:17,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1909:  53%|█████▎    | 53/100 [01:25<01:16,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1838:  54%|█████▍    | 54/100 [01:27<01:14,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1786:  55%|█████▌    | 55/100 [01:29<01:13,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1858:  56%|█████▌    | 56/100 [01:30<01:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1876:  57%|█████▋    | 57/100 [01:32<01:10,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1861:  58%|█████▊    | 58/100 [01:33<01:07,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1786:  59%|█████▉    | 59/100 [01:35<01:06,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.177:  60%|██████    | 60/100 [01:37<01:04,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1683:  61%|██████    | 61/100 [01:38<01:03,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1633:  62%|██████▏   | 62/100 [01:40<01:01,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1693:  63%|██████▎   | 63/100 [01:42<00:59,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1718:  64%|██████▍   | 64/100 [01:43<00:58,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1635:  65%|██████▌   | 65/100 [01:45<00:56,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1578:  66%|██████▌   | 66/100 [01:46<00:54,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.152:  67%|██████▋   | 67/100 [01:48<00:53,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1508:  68%|██████▊   | 68/100 [01:50<00:51,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1575:  69%|██████▉   | 69/100 [01:51<00:50,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1493:  70%|███████   | 70/100 [01:53<00:48,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1526:  71%|███████   | 71/100 [01:55<00:47,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1467:  72%|███████▏  | 72/100 [01:56<00:45,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1519:  73%|███████▎  | 73/100 [01:58<00:43,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1526:  74%|███████▍  | 74/100 [01:59<00:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1521:  75%|███████▌  | 75/100 [02:01<00:40,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1592:  76%|███████▌  | 76/100 [02:03<00:38,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1622:  77%|███████▋  | 77/100 [02:04<00:37,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1529:  78%|███████▊  | 78/100 [02:06<00:35,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1437:  79%|███████▉  | 79/100 [02:07<00:34,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.135:  80%|████████  | 80/100 [02:09<00:32,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1259:  81%|████████  | 81/100 [02:11<00:30,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1187:  82%|████████▏ | 82/100 [02:12<00:29,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1114:  83%|████████▎ | 83/100 [02:14<00:27,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1077:  84%|████████▍ | 84/100 [02:16<00:25,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.1042:  85%|████████▌ | 85/100 [02:17<00:24,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0961:  86%|████████▌ | 86/100 [02:19<00:22,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0945:  87%|████████▋ | 87/100 [02:20<00:21,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0898:  88%|████████▊ | 88/100 [02:22<00:19,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0822:  89%|████████▉ | 89/100 [02:24<00:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0772:  90%|█████████ | 90/100 [02:25<00:16,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0712:  91%|█████████ | 91/100 [02:27<00:14,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0623:  92%|█████████▏| 92/100 [02:28<00:12,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0574:  93%|█████████▎| 93/100 [02:30<00:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.052:  94%|█████████▍| 94/100 [02:32<00:09,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.065:  95%|█████████▌| 95/100 [02:33<00:08,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0571:  96%|█████████▌| 96/100 [02:35<00:06,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.06:  97%|█████████▋| 97/100 [02:37<00:04,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0625:  98%|█████████▊| 98/100 [02:38<00:03,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 4, train loss: 1.0594:  99%|█████████▉| 99/100 [02:40<00:01,  1.62s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 4, train loss: 1.0521: 100%|██████████| 100/100 [02:41<00:00,  1.62s/it]
epoch: 4, valid loss: 1.267:   2%|▏         | 1/51 [00:00<00:11,  4.27it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2755:   4%|▍         | 2/51 [00:00<00:11,  4.19it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2768:   6%|▌         | 3/51 [00:00<00:11,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2753:   8%|▊         | 4/51 [00:00<00:11,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2656:  10%|▉         | 5/51 [00:01<00:11,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2549:  12%|█▏        | 6/51 [00:01<00:10,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2636:  14%|█▎        | 7/51 [00:01<00:10,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2575:  16%|█▌        | 8/51 [00:01<00:10,  4.08it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2681:  18%|█▊        | 9/51 [00:02<00:10,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2681:  20%|█▉        | 10/51 [00:02<00:10,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2725:  22%|██▏       | 11/51 [00:02<00:09,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2617:  24%|██▎       | 12/51 [00:02<00:09,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2525:  25%|██▌       | 13/51 [00:03<00:09,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2617:  27%|██▋       | 14/51 [00:03<00:09,  4.07it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.258:  29%|██▉       | 15/51 [00:03<00:08,  4.03it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.258:  31%|███▏      | 16/51 [00:03<00:08,  4.05it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2522:  33%|███▎      | 17/51 [00:04<00:08,  4.07it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2618:  35%|███▌      | 18/51 [00:04<00:08,  4.07it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2519:  37%|███▋      | 19/51 [00:04<00:07,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2609:  39%|███▉      | 20/51 [00:04<00:07,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2508:  41%|████      | 21/51 [00:05<00:07,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2406:  43%|████▎     | 22/51 [00:05<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2613:  45%|████▌     | 23/51 [00:05<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2668:  47%|████▋     | 24/51 [00:05<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2747:  49%|████▉     | 25/51 [00:06<00:06,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2816:  51%|█████     | 26/51 [00:06<00:06,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2756:  53%|█████▎    | 27/51 [00:06<00:05,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2895:  55%|█████▍    | 28/51 [00:06<00:05,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3195:  57%|█████▋    | 29/51 [00:07<00:05,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3265:  59%|█████▉    | 30/51 [00:07<00:05,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3307:  61%|██████    | 31/51 [00:07<00:04,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3297:  63%|██████▎   | 32/51 [00:07<00:04,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3193:  65%|██████▍   | 33/51 [00:08<00:04,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3296:  67%|██████▋   | 34/51 [00:08<00:04,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3191:  69%|██████▊   | 35/51 [00:08<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3224:  71%|███████   | 36/51 [00:08<00:03,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3408:  73%|███████▎  | 37/51 [00:08<00:03,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3332:  75%|███████▍  | 38/51 [00:09<00:03,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3226:  76%|███████▋  | 39/51 [00:09<00:02,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.322:  78%|███████▊  | 40/51 [00:09<00:02,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.3113:  80%|████████  | 41/51 [00:09<00:02,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.303:  82%|████████▏ | 42/51 [00:10<00:02,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2941:  84%|████████▍ | 43/51 [00:10<00:01,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2826:  86%|████████▋ | 44/51 [00:10<00:01,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2763:  88%|████████▊ | 45/51 [00:10<00:01,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2673:  90%|█████████ | 46/51 [00:11<00:01,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2679:  92%|█████████▏| 47/51 [00:11<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2581:  94%|█████████▍| 48/51 [00:11<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2516:  96%|█████████▌| 49/51 [00:11<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2482:  98%|█████████▊| 50/51 [00:12<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 4, valid loss: 1.2385: 100%|██████████| 51/51 [00:12<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti




Checkpoint output/cls/comment/3-75.4-62.03-ckpt saved.


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2409:   1%|          | 1/100 [00:01<02:42,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2344:   2%|▏         | 2/100 [00:03<02:37,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2266:   3%|▎         | 3/100 [00:04<02:38,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2251:   4%|▍         | 4/100 [00:06<02:34,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2198:   5%|▌         | 5/100 [00:08<02:34,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2161:   6%|▌         | 6/100 [00:09<02:31,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2061:   7%|▋         | 7/100 [00:11<02:31,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.2:   8%|▊         | 8/100 [00:12<02:28,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1947:   9%|▉         | 9/100 [00:14<02:27,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1849:  10%|█         | 10/100 [00:16<02:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1775:  11%|█         | 11/100 [00:17<02:25,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1692:  12%|█▏        | 12/100 [00:19<02:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1658:  13%|█▎        | 13/100 [00:21<02:21,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1597:  14%|█▍        | 14/100 [00:22<02:18,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1505:  15%|█▌        | 15/100 [00:24<02:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1452:  16%|█▌        | 16/100 [00:25<02:15,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1382:  17%|█▋        | 17/100 [00:27<02:14,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1343:  18%|█▊        | 18/100 [00:29<02:12,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1363:  19%|█▉        | 19/100 [00:30<02:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1264:  20%|██        | 20/100 [00:32<02:08,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1354:  21%|██        | 21/100 [00:34<02:08,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.13:  22%|██▏       | 22/100 [00:35<02:05,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1293:  23%|██▎       | 23/100 [00:37<02:05,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1329:  24%|██▍       | 24/100 [00:38<02:02,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1275:  25%|██▌       | 25/100 [00:40<02:02,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1196:  26%|██▌       | 26/100 [00:42<01:59,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1104:  27%|██▋       | 27/100 [00:43<01:58,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1185:  28%|██▊       | 28/100 [00:45<01:56,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1108:  29%|██▉       | 29/100 [00:46<01:55,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1115:  30%|███       | 30/100 [00:48<01:52,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1023:  31%|███       | 31/100 [00:50<01:51,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0955:  32%|███▏      | 32/100 [00:51<01:49,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.1002:  33%|███▎      | 33/100 [00:53<01:49,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0942:  34%|███▍      | 34/100 [00:55<01:46,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0889:  35%|███▌      | 35/100 [00:56<01:45,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.083:  36%|███▌      | 36/100 [00:58<01:43,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0892:  37%|███▋      | 37/100 [00:59<01:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0889:  38%|███▊      | 38/100 [01:01<01:39,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0861:  39%|███▉      | 39/100 [01:03<01:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0808:  40%|████      | 40/100 [01:04<01:36,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.074:  41%|████      | 41/100 [01:06<01:36,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0662:  42%|████▏     | 42/100 [01:08<01:33,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0669:  43%|████▎     | 43/100 [01:09<01:32,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0643:  44%|████▍     | 44/100 [01:11<01:30,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0593:  45%|████▌     | 45/100 [01:12<01:29,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.058:  46%|████▌     | 46/100 [01:14<01:27,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0708:  47%|████▋     | 47/100 [01:16<01:26,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0679:  48%|████▊     | 48/100 [01:17<01:23,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0596:  49%|████▉     | 49/100 [01:19<01:23,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0522:  50%|█████     | 50/100 [01:20<01:20,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0564:  51%|█████     | 51/100 [01:22<01:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0524:  52%|█████▏    | 52/100 [01:24<01:17,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0579:  53%|█████▎    | 53/100 [01:25<01:16,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0524:  54%|█████▍    | 54/100 [01:27<01:14,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0438:  55%|█████▌    | 55/100 [01:29<01:13,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0353:  56%|█████▌    | 56/100 [01:30<01:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0338:  57%|█████▋    | 57/100 [01:32<01:10,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0281:  58%|█████▊    | 58/100 [01:33<01:07,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0267:  59%|█████▉    | 59/100 [01:35<01:06,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0291:  60%|██████    | 60/100 [01:37<01:04,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0273:  61%|██████    | 61/100 [01:38<01:03,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0194:  62%|██████▏   | 62/100 [01:40<01:01,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0156:  63%|██████▎   | 63/100 [01:42<00:59,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 1.0075:  64%|██████▍   | 64/100 [01:43<00:58,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9994:  65%|██████▌   | 65/100 [01:45<00:56,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9956:  66%|██████▌   | 66/100 [01:46<00:54,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9874:  67%|██████▋   | 67/100 [01:48<00:53,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9832:  68%|██████▊   | 68/100 [01:50<00:51,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.977:  69%|██████▉   | 69/100 [01:51<00:50,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9735:  70%|███████   | 70/100 [01:53<00:48,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.966:  71%|███████   | 71/100 [01:55<00:47,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9635:  72%|███████▏  | 72/100 [01:56<00:45,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9633:  73%|███████▎  | 73/100 [01:58<00:44,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9584:  74%|███████▍  | 74/100 [01:59<00:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9501:  75%|███████▌  | 75/100 [02:01<00:40,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9421:  76%|███████▌  | 76/100 [02:03<00:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9487:  77%|███████▋  | 77/100 [02:04<00:37,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9416:  78%|███████▊  | 78/100 [02:06<00:35,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9363:  79%|███████▉  | 79/100 [02:08<00:34,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9393:  80%|████████  | 80/100 [02:09<00:32,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9356:  81%|████████  | 81/100 [02:11<00:31,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9299:  82%|████████▏ | 82/100 [02:12<00:29,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9278:  83%|████████▎ | 83/100 [02:14<00:27,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9218:  84%|████████▍ | 84/100 [02:16<00:26,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9203:  85%|████████▌ | 85/100 [02:17<00:24,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9148:  86%|████████▌ | 86/100 [02:19<00:22,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9123:  87%|████████▋ | 87/100 [02:21<00:21,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9054:  88%|████████▊ | 88/100 [02:22<00:19,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.906:  89%|████████▉ | 89/100 [02:24<00:18,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9044:  90%|█████████ | 90/100 [02:26<00:16,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9027:  91%|█████████ | 91/100 [02:27<00:14,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9044:  92%|█████████▏| 92/100 [02:29<00:12,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9066:  93%|█████████▎| 93/100 [02:30<00:11,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.9042:  94%|█████████▍| 94/100 [02:32<00:09,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8986:  95%|█████████▌| 95/100 [02:34<00:08,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8916:  96%|█████████▌| 96/100 [02:35<00:06,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8903:  97%|█████████▋| 97/100 [02:37<00:04,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8931:  98%|█████████▊| 98/100 [02:39<00:03,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 5, train loss: 0.8865:  99%|█████████▉| 99/100 [02:40<00:01,  1.64s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 5, train loss: 0.8802: 100%|██████████| 100/100 [02:42<00:00,  1.62s/it]
epoch: 5, valid loss: 1.4455:   2%|▏         | 1/51 [00:00<00:11,  4.25it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4499:   4%|▍         | 2/51 [00:00<00:11,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4399:   6%|▌         | 3/51 [00:00<00:11,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.428:   8%|▊         | 4/51 [00:00<00:11,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4311:  10%|▉         | 5/51 [00:01<00:11,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4188:  12%|█▏        | 6/51 [00:01<00:10,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4271:  14%|█▎        | 7/51 [00:01<00:10,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4169:  16%|█▌        | 8/51 [00:01<00:10,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4278:  18%|█▊        | 9/51 [00:02<00:10,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4277:  20%|█▉        | 10/51 [00:02<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4277:  22%|██▏       | 11/51 [00:02<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4163:  24%|██▎       | 12/51 [00:02<00:09,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4147:  25%|██▌       | 13/51 [00:03<00:09,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.411:  27%|██▋       | 14/51 [00:03<00:08,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4121:  29%|██▉       | 15/51 [00:03<00:08,  4.08it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4024:  31%|███▏      | 16/51 [00:03<00:08,  4.07it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3923:  33%|███▎      | 17/51 [00:04<00:08,  4.04it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4025:  35%|███▌      | 18/51 [00:04<00:08,  4.07it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3906:  37%|███▋      | 19/51 [00:04<00:07,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3831:  39%|███▉      | 20/51 [00:04<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3706:  41%|████      | 21/51 [00:05<00:07,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3581:  43%|████▎     | 22/51 [00:05<00:07,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3788:  45%|████▌     | 23/51 [00:05<00:06,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3874:  47%|████▋     | 24/51 [00:05<00:06,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3988:  49%|████▉     | 25/51 [00:06<00:06,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3973:  51%|█████     | 26/51 [00:06<00:06,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.39:  53%|█████▎    | 27/51 [00:06<00:05,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4039:  55%|█████▍    | 28/51 [00:06<00:05,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4256:  57%|█████▋    | 29/51 [00:07<00:05,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4329:  59%|█████▉    | 30/51 [00:07<00:05,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.432:  61%|██████    | 31/51 [00:07<00:04,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4249:  63%|██████▎   | 32/51 [00:07<00:04,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4122:  65%|██████▍   | 33/51 [00:08<00:04,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4258:  67%|██████▋   | 34/51 [00:08<00:04,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4137:  69%|██████▊   | 35/51 [00:08<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4181:  71%|███████   | 36/51 [00:08<00:03,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4422:  73%|███████▎  | 37/51 [00:08<00:03,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4301:  75%|███████▍  | 38/51 [00:09<00:03,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4229:  76%|███████▋  | 39/51 [00:09<00:02,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.4101:  78%|███████▊  | 40/51 [00:09<00:02,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3987:  80%|████████  | 41/51 [00:09<00:02,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3876:  82%|████████▏ | 42/51 [00:10<00:02,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3759:  84%|████████▍ | 43/51 [00:10<00:01,  4.15it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3633:  86%|████████▋ | 44/51 [00:10<00:01,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3521:  88%|████████▊ | 45/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3553:  90%|█████████ | 46/51 [00:11<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3594:  92%|█████████▏| 47/51 [00:11<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3544:  94%|█████████▍| 48/51 [00:11<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3617:  96%|█████████▌| 49/51 [00:11<00:00,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3694:  98%|█████████▊| 50/51 [00:12<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 5, valid loss: 1.3633: 100%|██████████| 51/51 [00:12<00:00,  4.12it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5089:   1%|          | 1/100 [00:01<02:43,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5157:   2%|▏         | 2/100 [00:03<02:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5137:   3%|▎         | 3/100 [00:04<02:39,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.515:   4%|▍         | 4/100 [00:06<02:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5145:   5%|▌         | 5/100 [00:08<02:35,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5133:   6%|▌         | 6/100 [00:09<02:31,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5111:   7%|▋         | 7/100 [00:11<02:31,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5083:   8%|▊         | 8/100 [00:12<02:28,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5052:   9%|▉         | 9/100 [00:14<02:28,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5041:  10%|█         | 10/100 [00:16<02:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.503:  11%|█         | 11/100 [00:17<02:25,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5031:  12%|█▏        | 12/100 [00:19<02:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5:  13%|█▎        | 13/100 [00:21<02:21,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4984:  14%|█▍        | 14/100 [00:22<02:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5049:  15%|█▌        | 15/100 [00:24<02:18,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5114:  16%|█▌        | 16/100 [00:25<02:15,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5085:  17%|█▋        | 17/100 [00:27<02:15,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5074:  18%|█▊        | 18/100 [00:29<02:13,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5101:  19%|█▉        | 19/100 [00:30<02:12,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5109:  20%|██        | 20/100 [00:32<02:09,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.513:  21%|██        | 21/100 [00:34<02:09,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5094:  22%|██▏       | 22/100 [00:35<02:06,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5169:  23%|██▎       | 23/100 [00:37<02:05,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5142:  24%|██▍       | 24/100 [00:39<02:02,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5159:  25%|██▌       | 25/100 [00:40<02:02,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5271:  26%|██▌       | 26/100 [00:42<02:00,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5322:  27%|██▋       | 27/100 [00:43<01:59,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5283:  28%|██▊       | 28/100 [00:45<01:56,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5258:  29%|██▉       | 29/100 [00:47<01:55,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5218:  30%|███       | 30/100 [00:48<01:53,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5192:  31%|███       | 31/100 [00:50<01:52,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5176:  32%|███▏      | 32/100 [00:52<01:49,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5184:  33%|███▎      | 33/100 [00:53<01:49,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5207:  34%|███▍      | 34/100 [00:55<01:47,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5193:  35%|███▌      | 35/100 [00:56<01:46,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5202:  36%|███▌      | 36/100 [00:58<01:43,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5229:  37%|███▋      | 37/100 [01:00<01:42,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5193:  38%|███▊      | 38/100 [01:01<01:40,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5175:  39%|███▉      | 39/100 [01:03<01:39,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5135:  40%|████      | 40/100 [01:05<01:37,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5097:  41%|████      | 41/100 [01:06<01:36,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5077:  42%|████▏     | 42/100 [01:08<01:34,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5055:  43%|████▎     | 43/100 [01:09<01:32,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5023:  44%|████▍     | 44/100 [01:11<01:30,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4993:  45%|████▌     | 45/100 [01:13<01:29,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5018:  46%|████▌     | 46/100 [01:14<01:26,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4983:  47%|████▋     | 47/100 [01:16<01:26,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4956:  48%|████▊     | 48/100 [01:17<01:24,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5042:  49%|████▉     | 49/100 [01:19<01:23,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5002:  50%|█████     | 50/100 [01:21<01:21,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5062:  51%|█████     | 51/100 [01:22<01:19,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5058:  52%|█████▏    | 52/100 [01:24<01:17,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5106:  53%|█████▎    | 53/100 [01:26<01:16,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5227:  54%|█████▍    | 54/100 [01:27<01:14,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.52:  55%|█████▌    | 55/100 [01:29<01:13,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5182:  56%|█████▌    | 56/100 [01:30<01:10,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5242:  57%|█████▋    | 57/100 [01:32<01:09,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5203:  58%|█████▊    | 58/100 [01:34<01:07,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5256:  59%|█████▉    | 59/100 [01:35<01:06,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5272:  60%|██████    | 60/100 [01:37<01:04,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5258:  61%|██████    | 61/100 [01:39<01:03,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5348:  62%|██████▏   | 62/100 [01:40<01:01,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5307:  63%|██████▎   | 63/100 [01:42<01:00,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5264:  64%|██████▍   | 64/100 [01:43<00:58,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.524:  65%|██████▌   | 65/100 [01:45<00:56,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5209:  66%|██████▌   | 66/100 [01:47<00:54,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5195:  67%|██████▋   | 67/100 [01:48<00:53,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5166:  68%|██████▊   | 68/100 [01:50<00:51,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5139:  69%|██████▉   | 69/100 [01:52<00:50,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.511:  70%|███████   | 70/100 [01:53<00:48,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5145:  71%|███████   | 71/100 [01:55<00:47,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5113:  72%|███████▏  | 72/100 [01:56<00:45,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5098:  73%|███████▎  | 73/100 [01:58<00:43,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5133:  74%|███████▍  | 74/100 [02:00<00:41,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.51:  75%|███████▌  | 75/100 [02:01<00:40,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5069:  76%|███████▌  | 76/100 [02:03<00:38,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5045:  77%|███████▋  | 77/100 [02:05<00:37,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5015:  78%|███████▊  | 78/100 [02:06<00:35,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5003:  79%|███████▉  | 79/100 [02:08<00:34,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5052:  80%|████████  | 80/100 [02:09<00:32,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5061:  81%|████████  | 81/100 [02:11<00:30,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5022:  82%|████████▏ | 82/100 [02:13<00:29,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5033:  83%|████████▎ | 83/100 [02:14<00:27,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5026:  84%|████████▍ | 84/100 [02:16<00:25,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5065:  85%|████████▌ | 85/100 [02:17<00:24,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5029:  86%|████████▌ | 86/100 [02:19<00:22,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4992:  87%|████████▋ | 87/100 [02:21<00:21,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4954:  88%|████████▊ | 88/100 [02:22<00:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5025:  89%|████████▉ | 89/100 [02:24<00:17,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5103:  90%|█████████ | 90/100 [02:26<00:16,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5066:  91%|█████████ | 91/100 [02:27<00:14,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5127:  92%|█████████▏| 92/100 [02:29<00:12,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5106:  93%|█████████▎| 93/100 [02:30<00:11,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5067:  94%|█████████▍| 94/100 [02:32<00:09,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5045:  95%|█████████▌| 95/100 [02:34<00:08,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5013:  96%|█████████▌| 96/100 [02:35<00:06,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.5028:  97%|█████████▋| 97/100 [02:37<00:04,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4991:  98%|█████████▊| 98/100 [02:39<00:03,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 6, train loss: 0.4958:  99%|█████████▉| 99/100 [02:40<00:01,  1.63s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 6, train loss: 0.4941: 100%|██████████| 100/100 [02:42<00:00,  1.62s/it]
epoch: 6, valid loss: 1.4988:   2%|▏         | 1/51 [00:00<00:12,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.5129:   4%|▍         | 2/51 [00:00<00:11,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.5044:   6%|▌         | 3/51 [00:00<00:11,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4918:   8%|▊         | 4/51 [00:00<00:11,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4811:  10%|▉         | 5/51 [00:01<00:11,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4687:  12%|█▏        | 6/51 [00:01<00:10,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4679:  14%|█▎        | 7/51 [00:01<00:10,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4575:  16%|█▌        | 8/51 [00:01<00:10,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4651:  18%|█▊        | 9/51 [00:02<00:10,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4692:  20%|█▉        | 10/51 [00:02<00:09,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4568:  22%|██▏       | 11/51 [00:02<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4433:  24%|██▎       | 12/51 [00:02<00:09,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4368:  25%|██▌       | 13/51 [00:03<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4533:  27%|██▋       | 14/51 [00:03<00:08,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4441:  29%|██▉       | 15/51 [00:03<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4445:  31%|███▏      | 16/51 [00:03<00:08,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4393:  33%|███▎      | 17/51 [00:04<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4387:  35%|███▌      | 18/51 [00:04<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4252:  37%|███▋      | 19/51 [00:04<00:07,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4137:  39%|███▉      | 20/51 [00:04<00:07,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4006:  41%|████      | 21/51 [00:05<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.3878:  43%|████▎     | 22/51 [00:05<00:07,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4025:  45%|████▌     | 23/51 [00:05<00:06,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4078:  47%|████▋     | 24/51 [00:05<00:06,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4256:  49%|████▉     | 25/51 [00:06<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4128:  51%|█████     | 26/51 [00:06<00:06,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4011:  53%|█████▎    | 27/51 [00:06<00:05,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4139:  55%|█████▍    | 28/51 [00:06<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4098:  57%|█████▋    | 29/51 [00:07<00:05,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4224:  59%|█████▉    | 30/51 [00:07<00:05,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4231:  61%|██████    | 31/51 [00:07<00:04,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.428:  63%|██████▎   | 32/51 [00:07<00:04,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4152:  65%|██████▍   | 33/51 [00:08<00:04,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4358:  67%|██████▋   | 34/51 [00:08<00:04,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4231:  69%|██████▊   | 35/51 [00:08<00:03,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4369:  71%|███████   | 36/51 [00:08<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4777:  73%|███████▎  | 37/51 [00:08<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4666:  75%|███████▍  | 38/51 [00:09<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4537:  76%|███████▋  | 39/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.462:  78%|███████▊  | 40/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4488:  80%|████████  | 41/51 [00:09<00:02,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4549:  82%|████████▏ | 42/51 [00:10<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4414:  84%|████████▍ | 43/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4278:  86%|████████▋ | 44/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4224:  88%|████████▊ | 45/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4373:  90%|█████████ | 46/51 [00:11<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4369:  92%|█████████▏| 47/51 [00:11<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4239:  94%|█████████▍| 48/51 [00:11<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4241:  96%|█████████▌| 49/51 [00:11<00:00,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4145:  98%|█████████▊| 50/51 [00:12<00:00,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 6, valid loss: 1.4014: 100%|██████████| 51/51 [00:12<00:00,  4.12it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.976:   1%|          | 1/100 [00:01<02:41,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9684:   2%|▏         | 2/100 [00:03<02:37,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9635:   3%|▎         | 3/100 [00:04<02:38,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9591:   4%|▍         | 4/100 [00:06<02:34,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9592:   5%|▌         | 5/100 [00:08<02:34,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9518:   6%|▌         | 6/100 [00:09<02:31,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9484:   7%|▋         | 7/100 [00:11<02:31,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9404:   8%|▊         | 8/100 [00:12<02:28,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9338:   9%|▉         | 9/100 [00:14<02:28,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9265:  10%|█         | 10/100 [00:16<02:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9198:  11%|█         | 11/100 [00:17<02:25,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9137:  12%|█▏        | 12/100 [00:19<02:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.9068:  13%|█▎        | 13/100 [00:21<02:22,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8989:  14%|█▍        | 14/100 [00:22<02:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8916:  15%|█▌        | 15/100 [00:24<02:18,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.886:  16%|█▌        | 16/100 [00:25<02:16,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8799:  17%|█▋        | 17/100 [00:27<02:15,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8814:  18%|█▊        | 18/100 [00:29<02:13,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8738:  19%|█▉        | 19/100 [00:30<02:13,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8682:  20%|██        | 20/100 [00:32<02:10,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8614:  21%|██        | 21/100 [00:34<02:09,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8547:  22%|██▏       | 22/100 [00:35<02:06,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8481:  23%|██▎       | 23/100 [00:37<02:05,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8462:  24%|██▍       | 24/100 [00:39<02:03,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8486:  25%|██▌       | 25/100 [00:40<02:02,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8449:  26%|██▌       | 26/100 [00:42<02:00,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8392:  27%|██▋       | 27/100 [00:43<01:59,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8322:  28%|██▊       | 28/100 [00:45<01:56,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.827:  29%|██▉       | 29/100 [00:47<01:55,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8239:  30%|███       | 30/100 [00:48<01:53,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8197:  31%|███       | 31/100 [00:50<01:52,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8137:  32%|███▏      | 32/100 [00:52<01:50,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8078:  33%|███▎      | 33/100 [00:53<01:49,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.8021:  34%|███▍      | 34/100 [00:55<01:47,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7953:  35%|███▌      | 35/100 [00:56<01:46,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7885:  36%|███▌      | 36/100 [00:58<01:43,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7819:  37%|███▋      | 37/100 [01:00<01:43,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.775:  38%|███▊      | 38/100 [01:01<01:41,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7683:  39%|███▉      | 39/100 [01:03<01:40,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7618:  40%|████      | 40/100 [01:05<01:37,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.756:  41%|████      | 41/100 [01:06<01:37,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7588:  42%|████▏     | 42/100 [01:08<01:35,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7526:  43%|████▎     | 43/100 [01:10<01:33,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7485:  44%|████▍     | 44/100 [01:11<01:31,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.742:  45%|████▌     | 45/100 [01:13<01:30,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7355:  46%|████▌     | 46/100 [01:14<01:27,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.73:  47%|████▋     | 47/100 [01:16<01:26,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7369:  48%|████▊     | 48/100 [01:18<01:24,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7307:  49%|████▉     | 49/100 [01:19<01:23,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7244:  50%|█████     | 50/100 [01:21<01:21,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7232:  51%|█████     | 51/100 [01:23<01:20,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7212:  52%|█████▏    | 52/100 [01:24<01:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7168:  53%|█████▎    | 53/100 [01:26<01:16,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7107:  54%|█████▍    | 54/100 [01:28<01:14,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.7056:  55%|█████▌    | 55/100 [01:29<01:13,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6997:  56%|█████▌    | 56/100 [01:31<01:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6966:  57%|█████▋    | 57/100 [01:32<01:10,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.691:  58%|█████▊    | 58/100 [01:34<01:08,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6855:  59%|█████▉    | 59/100 [01:36<01:07,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6892:  60%|██████    | 60/100 [01:37<01:05,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6869:  61%|██████    | 61/100 [01:39<01:03,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6813:  62%|██████▏   | 62/100 [01:41<01:01,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.687:  63%|██████▎   | 63/100 [01:42<01:00,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6812:  64%|██████▍   | 64/100 [01:44<00:58,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6815:  65%|██████▌   | 65/100 [01:46<00:57,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6767:  66%|██████▌   | 66/100 [01:47<00:55,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6712:  67%|██████▋   | 67/100 [01:49<00:53,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6685:  68%|██████▊   | 68/100 [01:50<00:51,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6699:  69%|██████▉   | 69/100 [01:52<00:50,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6654:  70%|███████   | 70/100 [01:54<00:48,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6609:  71%|███████   | 71/100 [01:55<00:47,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6627:  72%|███████▏  | 72/100 [01:57<00:45,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6592:  73%|███████▎  | 73/100 [01:59<00:44,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6575:  74%|███████▍  | 74/100 [02:00<00:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6589:  75%|███████▌  | 75/100 [02:02<00:40,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6548:  76%|███████▌  | 76/100 [02:03<00:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.657:  77%|███████▋  | 77/100 [02:05<00:37,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6524:  78%|███████▊  | 78/100 [02:07<00:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6475:  79%|███████▉  | 79/100 [02:08<00:34,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6482:  80%|████████  | 80/100 [02:10<00:32,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6428:  81%|████████  | 81/100 [02:12<00:31,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6382:  82%|████████▏ | 82/100 [02:13<00:29,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6328:  83%|████████▎ | 83/100 [02:15<00:27,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6297:  84%|████████▍ | 84/100 [02:16<00:26,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6244:  85%|████████▌ | 85/100 [02:18<00:24,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6194:  86%|████████▌ | 86/100 [02:20<00:22,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6228:  87%|████████▋ | 87/100 [02:21<00:21,  1.66s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6215:  88%|████████▊ | 88/100 [02:23<00:19,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6308:  89%|████████▉ | 89/100 [02:25<00:18,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6343:  90%|█████████ | 90/100 [02:26<00:16,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6377:  91%|█████████ | 91/100 [02:28<00:14,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6329:  92%|█████████▏| 92/100 [02:30<00:13,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6296:  93%|█████████▎| 93/100 [02:31<00:11,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6251:  94%|█████████▍| 94/100 [02:33<00:09,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6202:  95%|█████████▌| 95/100 [02:35<00:08,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6231:  96%|█████████▌| 96/100 [02:36<00:06,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6211:  97%|█████████▋| 97/100 [02:38<00:04,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6173:  98%|█████████▊| 98/100 [02:39<00:03,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 7, train loss: 0.6149:  99%|█████████▉| 99/100 [02:41<00:01,  1.64s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 7, train loss: 0.6101: 100%|██████████| 100/100 [02:42<00:00,  1.63s/it]
epoch: 7, valid loss: 2.0083:   2%|▏         | 1/51 [00:00<00:12,  4.07it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.011:   4%|▍         | 2/51 [00:00<00:11,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9924:   6%|▌         | 3/51 [00:00<00:11,  4.08it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9992:   8%|▊         | 4/51 [00:00<00:11,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9968:  10%|▉         | 5/51 [00:01<00:11,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9795:  12%|█▏        | 6/51 [00:01<00:10,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9967:  14%|█▎        | 7/51 [00:01<00:10,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9912:  16%|█▌        | 8/51 [00:01<00:10,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.0037:  18%|█▊        | 9/51 [00:02<00:10,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 2.0097:  20%|█▉        | 10/51 [00:02<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9927:  22%|██▏       | 11/51 [00:02<00:09,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9734:  24%|██▎       | 12/51 [00:02<00:09,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9556:  25%|██▌       | 13/51 [00:03<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9476:  27%|██▋       | 14/51 [00:03<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9453:  29%|██▉       | 15/51 [00:03<00:08,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9387:  31%|███▏      | 16/51 [00:03<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9217:  33%|███▎      | 17/51 [00:04<00:08,  4.07it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9343:  35%|███▌      | 18/51 [00:04<00:08,  4.04it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9158:  37%|███▋      | 19/51 [00:04<00:07,  4.05it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9073:  39%|███▉      | 20/51 [00:04<00:07,  4.07it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8888:  41%|████      | 21/51 [00:05<00:07,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8707:  43%|████▎     | 22/51 [00:05<00:07,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8881:  45%|████▌     | 23/51 [00:05<00:06,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9003:  47%|████▋     | 24/51 [00:05<00:06,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8968:  49%|████▉     | 25/51 [00:06<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8853:  51%|█████     | 26/51 [00:06<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8867:  53%|█████▎    | 27/51 [00:06<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8997:  55%|█████▍    | 28/51 [00:06<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9237:  57%|█████▋    | 29/51 [00:07<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9288:  59%|█████▉    | 30/51 [00:07<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9195:  61%|██████    | 31/51 [00:07<00:04,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9192:  63%|██████▎   | 32/51 [00:07<00:04,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9008:  65%|██████▍   | 33/51 [00:08<00:04,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9073:  67%|██████▋   | 34/51 [00:08<00:04,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8894:  69%|██████▊   | 35/51 [00:08<00:03,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8819:  71%|███████   | 36/51 [00:08<00:03,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.9059:  73%|███████▎  | 37/51 [00:09<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.888:  75%|███████▍  | 38/51 [00:09<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8783:  76%|███████▋  | 39/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8604:  78%|███████▊  | 40/51 [00:09<00:02,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8432:  80%|████████  | 41/51 [00:09<00:02,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8255:  82%|████████▏ | 42/51 [00:10<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.8103:  84%|████████▍ | 43/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7929:  86%|████████▋ | 44/51 [00:10<00:01,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7775:  88%|████████▊ | 45/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7663:  90%|█████████ | 46/51 [00:11<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7717:  92%|█████████▏| 47/51 [00:11<00:00,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.769:  94%|█████████▍| 48/51 [00:11<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.774:  96%|█████████▌| 49/51 [00:11<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7752:  98%|█████████▊| 50/51 [00:12<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 7, valid loss: 1.7586: 100%|██████████| 51/51 [00:12<00:00,  4.10it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6994:   1%|          | 1/100 [00:01<02:43,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.7012:   2%|▏         | 2/100 [00:03<02:39,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6959:   3%|▎         | 3/100 [00:04<02:40,  1.66s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6911:   4%|▍         | 4/100 [00:06<02:36,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6897:   5%|▌         | 5/100 [00:08<02:35,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6864:   6%|▌         | 6/100 [00:09<02:32,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6839:   7%|▋         | 7/100 [00:11<02:32,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6781:   8%|▊         | 8/100 [00:13<02:29,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6788:   9%|▉         | 9/100 [00:14<02:28,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.673:  10%|█         | 10/100 [00:16<02:26,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6682:  11%|█         | 11/100 [00:17<02:25,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6629:  12%|█▏        | 12/100 [00:19<02:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6583:  13%|█▎        | 13/100 [00:21<02:22,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6529:  14%|█▍        | 14/100 [00:22<02:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6475:  15%|█▌        | 15/100 [00:24<02:19,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6422:  16%|█▌        | 16/100 [00:26<02:16,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6376:  17%|█▋        | 17/100 [00:27<02:16,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6326:  18%|█▊        | 18/100 [00:29<02:14,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6286:  19%|█▉        | 19/100 [00:31<02:13,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6295:  20%|██        | 20/100 [00:32<02:10,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6268:  21%|██        | 21/100 [00:34<02:09,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6255:  22%|██▏       | 22/100 [00:35<02:07,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6221:  23%|██▎       | 23/100 [00:37<02:06,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6171:  24%|██▍       | 24/100 [00:39<02:03,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6119:  25%|██▌       | 25/100 [00:40<02:02,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6147:  26%|██▌       | 26/100 [00:42<02:00,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.611:  27%|██▋       | 27/100 [00:44<01:59,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6063:  28%|██▊       | 28/100 [00:45<01:56,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.6013:  29%|██▉       | 29/100 [00:47<01:55,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5999:  30%|███       | 30/100 [00:48<01:52,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.595:  31%|███       | 31/100 [00:50<01:52,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.591:  32%|███▏      | 32/100 [00:52<01:49,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.586:  33%|███▎      | 33/100 [00:53<01:49,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5845:  34%|███▍      | 34/100 [00:55<01:47,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5881:  35%|███▌      | 35/100 [00:57<01:46,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5842:  36%|███▌      | 36/100 [00:58<01:44,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5795:  37%|███▋      | 37/100 [01:00<01:43,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5755:  38%|███▊      | 38/100 [01:01<01:40,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5766:  39%|███▉      | 39/100 [01:03<01:39,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5731:  40%|████      | 40/100 [01:05<01:37,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5687:  41%|████      | 41/100 [01:06<01:36,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5641:  42%|████▏     | 42/100 [01:08<01:34,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5593:  43%|████▎     | 43/100 [01:10<01:33,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5552:  44%|████▍     | 44/100 [01:11<01:30,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5508:  45%|████▌     | 45/100 [01:13<01:29,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.547:  46%|████▌     | 46/100 [01:14<01:27,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5442:  47%|████▋     | 47/100 [01:16<01:26,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5511:  48%|████▊     | 48/100 [01:18<01:23,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5485:  49%|████▉     | 49/100 [01:19<01:23,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5501:  50%|█████     | 50/100 [01:21<01:20,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5459:  51%|█████     | 51/100 [01:23<01:19,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.542:  52%|█████▏    | 52/100 [01:24<01:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5376:  53%|█████▎    | 53/100 [01:26<01:16,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5353:  54%|█████▍    | 54/100 [01:27<01:14,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.535:  55%|█████▌    | 55/100 [01:29<01:13,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.531:  56%|█████▌    | 56/100 [01:31<01:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5265:  57%|█████▋    | 57/100 [01:32<01:10,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5223:  58%|█████▊    | 58/100 [01:34<01:08,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5196:  59%|█████▉    | 59/100 [01:36<01:07,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5161:  60%|██████    | 60/100 [01:37<01:05,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5205:  61%|██████    | 61/100 [01:39<01:03,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5161:  62%|██████▏   | 62/100 [01:41<01:01,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5121:  63%|██████▎   | 63/100 [01:42<01:00,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5083:  64%|██████▍   | 64/100 [01:44<00:58,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5066:  65%|██████▌   | 65/100 [01:45<00:57,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5026:  66%|██████▌   | 66/100 [01:47<00:55,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.504:  67%|██████▋   | 67/100 [01:49<00:53,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.509:  68%|██████▊   | 68/100 [01:50<00:51,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.505:  69%|██████▉   | 69/100 [01:52<00:50,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5072:  70%|███████   | 70/100 [01:54<00:48,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.503:  71%|███████   | 71/100 [01:55<00:47,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.499:  72%|███████▏  | 72/100 [01:57<00:45,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.498:  73%|███████▎  | 73/100 [01:58<00:44,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.5012:  74%|███████▍  | 74/100 [02:00<00:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.497:  75%|███████▌  | 75/100 [02:02<00:40,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4932:  76%|███████▌  | 76/100 [02:03<00:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4907:  77%|███████▋  | 77/100 [02:05<00:37,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4871:  78%|███████▊  | 78/100 [02:07<00:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4835:  79%|███████▉  | 79/100 [02:08<00:34,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4807:  80%|████████  | 80/100 [02:10<00:34,  1.70s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4771:  81%|████████  | 81/100 [02:12<00:32,  1.69s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4804:  82%|████████▏ | 82/100 [02:13<00:29,  1.66s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4778:  83%|████████▎ | 83/100 [02:15<00:28,  1.66s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4739:  84%|████████▍ | 84/100 [02:17<00:26,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4792:  85%|████████▌ | 85/100 [02:18<00:24,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4855:  86%|████████▌ | 86/100 [02:20<00:22,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4821:  87%|████████▋ | 87/100 [02:22<00:21,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4788:  88%|████████▊ | 88/100 [02:23<00:19,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4768:  89%|████████▉ | 89/100 [02:25<00:18,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4736:  90%|█████████ | 90/100 [02:26<00:16,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4698:  91%|█████████ | 91/100 [02:28<00:14,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4661:  92%|█████████▏| 92/100 [02:30<00:13,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4622:  93%|█████████▎| 93/100 [02:31<00:11,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4597:  94%|█████████▍| 94/100 [02:33<00:09,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4564:  95%|█████████▌| 95/100 [02:35<00:08,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4527:  96%|█████████▌| 96/100 [02:36<00:06,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4525:  97%|█████████▋| 97/100 [02:38<00:04,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4546:  98%|█████████▊| 98/100 [02:39<00:03,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 8, train loss: 0.4536:  99%|█████████▉| 99/100 [02:41<00:01,  1.63s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 8, train loss: 0.4594: 100%|██████████| 100/100 [02:42<00:00,  1.63s/it]
epoch: 8, valid loss: 0.6041:   2%|▏         | 1/51 [00:00<00:12,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.6234:   4%|▍         | 2/51 [00:00<00:11,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.6275:   6%|▌         | 3/51 [00:00<00:11,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.63:   8%|▊         | 4/51 [00:00<00:11,  3.96it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.6321:  10%|▉         | 5/51 [00:01<00:11,  4.01it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.6277:  12%|█▏        | 6/51 [00:01<00:11,  4.01it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.6666:  14%|█▎        | 7/51 [00:01<00:11,  3.98it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.6642:  16%|█▌        | 8/51 [00:01<00:10,  4.01it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.6816:  18%|█▊        | 9/51 [00:02<00:10,  4.03it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.696:  20%|█▉        | 10/51 [00:02<00:10,  4.00it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.6982:  22%|██▏       | 11/51 [00:02<00:09,  4.02it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.6919:  24%|██▎       | 12/51 [00:02<00:09,  4.04it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.689:  25%|██▌       | 13/51 [00:03<00:09,  4.04it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7122:  27%|██▋       | 14/51 [00:03<00:09,  4.08it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7252:  29%|██▉       | 15/51 [00:03<00:08,  4.08it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7341:  31%|███▏      | 16/51 [00:03<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7285:  33%|███▎      | 17/51 [00:04<00:08,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7515:  35%|███▌      | 18/51 [00:04<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7448:  37%|███▋      | 19/51 [00:04<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7407:  39%|███▉      | 20/51 [00:04<00:07,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.734:  41%|████      | 21/51 [00:05<00:07,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7275:  43%|████▎     | 22/51 [00:05<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7593:  45%|████▌     | 23/51 [00:05<00:06,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7711:  47%|████▋     | 24/51 [00:05<00:06,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7742:  49%|████▉     | 25/51 [00:06<00:06,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.7838:  51%|█████     | 26/51 [00:06<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.804:  53%|█████▎    | 27/51 [00:06<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8233:  55%|█████▍    | 28/51 [00:06<00:05,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8542:  57%|█████▋    | 29/51 [00:07<00:05,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8711:  59%|█████▉    | 30/51 [00:07<00:05,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8708:  61%|██████    | 31/51 [00:07<00:04,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8739:  63%|██████▎   | 32/51 [00:07<00:04,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8659:  65%|██████▍   | 33/51 [00:08<00:04,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8878:  67%|██████▋   | 34/51 [00:08<00:04,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8798:  69%|██████▊   | 35/51 [00:08<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8949:  71%|███████   | 36/51 [00:08<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.9397:  73%|███████▎  | 37/51 [00:09<00:03,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.9312:  75%|███████▍  | 38/51 [00:09<00:03,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.9234:  76%|███████▋  | 39/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.9153:  78%|███████▊  | 40/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.9071:  80%|████████  | 41/51 [00:10<00:02,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8986:  82%|████████▏ | 42/51 [00:10<00:02,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8916:  84%|████████▍ | 43/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8832:  86%|████████▋ | 44/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8844:  88%|████████▊ | 45/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8921:  90%|█████████ | 46/51 [00:11<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.9028:  92%|█████████▏| 47/51 [00:11<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.8948:  94%|█████████▍| 48/51 [00:11<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.9135:  96%|█████████▌| 49/51 [00:11<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.9263:  98%|█████████▊| 50/51 [00:12<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 8, valid loss: 0.9177: 100%|██████████| 51/51 [00:12<00:00,  4.10it/s]


torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1655:   1%|          | 1/100 [00:01<02:42,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1646:   2%|▏         | 2/100 [00:03<02:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1638:   3%|▎         | 3/100 [00:04<02:39,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1629:   4%|▍         | 4/100 [00:06<02:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1641:   5%|▌         | 5/100 [00:08<02:34,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1636:   6%|▌         | 6/100 [00:09<02:32,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1628:   7%|▋         | 7/100 [00:11<02:31,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1662:   8%|▊         | 8/100 [00:12<02:28,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1663:   9%|▉         | 9/100 [00:14<02:28,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1653:  10%|█         | 10/100 [00:16<02:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1687:  11%|█         | 11/100 [00:17<02:25,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1676:  12%|█▏        | 12/100 [00:19<02:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1674:  13%|█▎        | 13/100 [00:21<02:21,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1672:  14%|█▍        | 14/100 [00:22<02:18,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1663:  15%|█▌        | 15/100 [00:24<02:18,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1665:  16%|█▌        | 16/100 [00:25<02:15,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1657:  17%|█▋        | 17/100 [00:27<02:15,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1655:  18%|█▊        | 18/100 [00:29<02:13,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1733:  19%|█▉        | 19/100 [00:30<02:12,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1786:  20%|██        | 20/100 [00:32<02:09,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1783:  21%|██        | 21/100 [00:34<02:08,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1773:  22%|██▏       | 22/100 [00:35<02:06,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.182:  23%|██▎       | 23/100 [00:37<02:05,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1844:  24%|██▍       | 24/100 [00:38<02:02,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1917:  25%|██▌       | 25/100 [00:40<02:02,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1923:  26%|██▌       | 26/100 [00:42<02:00,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1914:  27%|██▋       | 27/100 [00:43<01:59,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1906:  28%|██▊       | 28/100 [00:45<01:56,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.19:  29%|██▉       | 29/100 [00:47<01:55,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1898:  30%|███       | 30/100 [00:48<01:53,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1903:  31%|███       | 31/100 [00:50<01:52,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1892:  32%|███▏      | 32/100 [00:51<01:49,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.194:  33%|███▎      | 33/100 [00:53<01:49,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.194:  34%|███▍      | 34/100 [00:55<01:46,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.194:  35%|███▌      | 35/100 [00:56<01:46,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1935:  36%|███▌      | 36/100 [00:58<01:43,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1928:  37%|███▋      | 37/100 [01:00<01:42,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1917:  38%|███▊      | 38/100 [01:01<01:40,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1945:  39%|███▉      | 39/100 [01:03<01:39,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1938:  40%|████      | 40/100 [01:04<01:37,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1947:  41%|████      | 41/100 [01:06<01:36,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.1968:  42%|████▏     | 42/100 [01:08<01:33,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2051:  43%|████▎     | 43/100 [01:09<01:33,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2039:  44%|████▍     | 44/100 [01:11<01:30,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2052:  45%|████▌     | 45/100 [01:13<01:29,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2041:  46%|████▌     | 46/100 [01:14<01:27,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2048:  47%|████▋     | 47/100 [01:16<01:26,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2042:  48%|████▊     | 48/100 [01:18<01:24,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.21:  49%|████▉     | 49/100 [01:19<01:23,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2092:  50%|█████     | 50/100 [01:21<01:20,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2148:  51%|█████     | 51/100 [01:22<01:19,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2137:  52%|█████▏    | 52/100 [01:24<01:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2125:  53%|█████▎    | 53/100 [01:26<01:16,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2135:  54%|█████▍    | 54/100 [01:27<01:14,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2125:  55%|█████▌    | 55/100 [01:29<01:13,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2119:  56%|█████▌    | 56/100 [01:30<01:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2106:  57%|█████▋    | 57/100 [01:32<01:10,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2094:  58%|█████▊    | 58/100 [01:34<01:08,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2083:  59%|█████▉    | 59/100 [01:35<01:07,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2084:  60%|██████    | 60/100 [01:37<01:04,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.217:  61%|██████    | 61/100 [01:39<01:03,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2193:  62%|██████▏   | 62/100 [01:40<01:01,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.22:  63%|██████▎   | 63/100 [01:42<01:00,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2185:  64%|██████▍   | 64/100 [01:44<00:58,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.218:  65%|██████▌   | 65/100 [01:45<00:57,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2217:  66%|██████▌   | 66/100 [01:47<00:55,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2202:  67%|██████▋   | 67/100 [01:48<00:53,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2193:  68%|██████▊   | 68/100 [01:50<00:51,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2192:  69%|██████▉   | 69/100 [01:52<00:50,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2179:  70%|███████   | 70/100 [01:53<00:48,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2167:  71%|███████   | 71/100 [01:55<00:47,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2154:  72%|███████▏  | 72/100 [01:57<00:45,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2142:  73%|███████▎  | 73/100 [01:58<00:44,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2142:  74%|███████▍  | 74/100 [02:00<00:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2135:  75%|███████▌  | 75/100 [02:01<00:40,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2128:  76%|███████▌  | 76/100 [02:03<00:38,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2119:  77%|███████▋  | 77/100 [02:05<00:37,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2107:  78%|███████▊  | 78/100 [02:06<00:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2094:  79%|███████▉  | 79/100 [02:08<00:34,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2094:  80%|████████  | 80/100 [02:10<00:32,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2082:  81%|████████  | 81/100 [02:11<00:31,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2068:  82%|████████▏ | 82/100 [02:13<00:29,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2056:  83%|████████▎ | 83/100 [02:14<00:27,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.205:  84%|████████▍ | 84/100 [02:16<00:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2043:  85%|████████▌ | 85/100 [02:18<00:24,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2034:  86%|████████▌ | 86/100 [02:19<00:22,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2053:  87%|████████▋ | 87/100 [02:21<00:21,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.204:  88%|████████▊ | 88/100 [02:23<00:19,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2108:  89%|████████▉ | 89/100 [02:24<00:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2095:  90%|█████████ | 90/100 [02:26<00:16,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2089:  91%|█████████ | 91/100 [02:27<00:14,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2097:  92%|█████████▏| 92/100 [02:29<00:12,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2083:  93%|█████████▎| 93/100 [02:31<00:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.207:  94%|█████████▍| 94/100 [02:32<00:09,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2058:  95%|█████████▌| 95/100 [02:34<00:08,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2046:  96%|█████████▌| 96/100 [02:36<00:06,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.212:  97%|█████████▋| 97/100 [02:37<00:04,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.215:  98%|█████████▊| 98/100 [02:39<00:03,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 9, train loss: 0.2139:  99%|█████████▉| 99/100 [02:40<00:01,  1.63s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 9, train loss: 0.216: 100%|██████████| 100/100 [02:42<00:00,  1.62s/it]
epoch: 9, valid loss: 3.1638:   2%|▏         | 1/51 [00:00<00:11,  4.20it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 3.1622:   4%|▍         | 2/51 [00:00<00:11,  4.18it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 3.1338:   6%|▌         | 3/51 [00:00<00:11,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 3.1045:   8%|▊         | 4/51 [00:00<00:11,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 3.0742:  10%|▉         | 5/51 [00:01<00:11,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 3.0588:  12%|█▏        | 6/51 [00:01<00:10,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 3.038:  14%|█▎        | 7/51 [00:01<00:10,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 3.009:  16%|█▌        | 8/51 [00:01<00:10,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.995:  18%|█▊        | 9/51 [00:02<00:10,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.9854:  20%|█▉        | 10/51 [00:02<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.969:  22%|██▏       | 11/51 [00:02<00:09,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.9399:  24%|██▎       | 12/51 [00:02<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.9137:  25%|██▌       | 13/51 [00:03<00:09,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.9086:  27%|██▋       | 14/51 [00:03<00:08,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.8848:  29%|██▉       | 15/51 [00:03<00:08,  4.08it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.8789:  31%|███▏      | 16/51 [00:03<00:08,  4.05it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.8532:  33%|███▎      | 17/51 [00:04<00:08,  4.04it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.8417:  35%|███▌      | 18/51 [00:04<00:08,  4.06it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.8138:  37%|███▋      | 19/51 [00:04<00:07,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7869:  39%|███▉      | 20/51 [00:04<00:07,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7597:  41%|████      | 21/51 [00:05<00:07,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7482:  43%|████▎     | 22/51 [00:05<00:07,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7694:  45%|████▌     | 23/51 [00:05<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7822:  47%|████▋     | 24/51 [00:05<00:06,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7594:  49%|████▉     | 25/51 [00:06<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7391:  51%|█████     | 26/51 [00:06<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.724:  53%|█████▎    | 27/51 [00:06<00:05,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7221:  55%|█████▍    | 28/51 [00:06<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.741:  57%|█████▋    | 29/51 [00:07<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7468:  59%|█████▉    | 30/51 [00:07<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7319:  61%|██████    | 31/51 [00:07<00:04,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7176:  63%|██████▎   | 32/51 [00:07<00:04,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6911:  65%|██████▍   | 33/51 [00:08<00:04,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.718:  67%|██████▋   | 34/51 [00:08<00:04,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7087:  69%|██████▊   | 35/51 [00:08<00:03,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7192:  71%|███████   | 36/51 [00:08<00:03,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7261:  73%|███████▎  | 37/51 [00:09<00:03,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.7001:  75%|███████▍  | 38/51 [00:09<00:03,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6738:  76%|███████▋  | 39/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6558:  78%|███████▊  | 40/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6301:  80%|████████  | 41/51 [00:09<00:02,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.6139:  82%|████████▏ | 42/51 [00:10<00:02,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5886:  84%|████████▍ | 43/51 [00:10<00:01,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5635:  86%|████████▋ | 44/51 [00:10<00:01,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.554:  88%|████████▊ | 45/51 [00:10<00:01,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5646:  90%|█████████ | 46/51 [00:11<00:01,  4.17it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5671:  92%|█████████▏| 47/51 [00:11<00:00,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5423:  94%|█████████▍| 48/51 [00:11<00:00,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5444:  96%|█████████▌| 49/51 [00:11<00:00,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5295:  98%|█████████▊| 50/51 [00:12<00:00,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 9, valid loss: 2.5047: 100%|██████████| 51/51 [00:12<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 1, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti




Checkpoint output/cls/comment/8-77.31-68.97-ckpt saved.


  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.097:   1%|          | 1/100 [00:01<02:42,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0976:   2%|▏         | 2/100 [00:03<02:38,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0995:   3%|▎         | 3/100 [00:04<02:39,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.0995:   4%|▍         | 4/100 [00:06<02:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1005:   5%|▌         | 5/100 [00:08<02:35,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1005:   6%|▌         | 6/100 [00:09<02:32,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1004:   7%|▋         | 7/100 [00:11<02:31,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1034:   8%|▊         | 8/100 [00:12<02:28,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1031:   9%|▉         | 9/100 [00:14<02:28,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1028:  10%|█         | 10/100 [00:16<02:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1048:  11%|█         | 11/100 [00:17<02:25,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1048:  12%|█▏        | 12/100 [00:19<02:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1046:  13%|█▎        | 13/100 [00:21<02:21,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1046:  14%|█▍        | 14/100 [00:22<02:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1048:  15%|█▌        | 15/100 [00:24<02:18,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1048:  16%|█▌        | 16/100 [00:25<02:15,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1075:  17%|█▋        | 17/100 [00:27<02:15,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1097:  18%|█▊        | 18/100 [00:29<02:12,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.111:  19%|█▉        | 19/100 [00:30<02:12,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1107:  20%|██        | 20/100 [00:32<02:09,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1109:  21%|██        | 21/100 [00:34<02:08,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.111:  22%|██▏       | 22/100 [00:35<02:06,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1109:  23%|██▎       | 23/100 [00:37<02:05,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1106:  24%|██▍       | 24/100 [00:38<02:03,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1125:  25%|██▌       | 25/100 [00:40<02:02,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.112:  26%|██▌       | 26/100 [00:42<02:00,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1116:  27%|██▋       | 27/100 [00:43<01:59,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1135:  28%|██▊       | 28/100 [00:45<01:56,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1134:  29%|██▉       | 29/100 [00:47<01:56,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1139:  30%|███       | 30/100 [00:48<01:53,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1151:  31%|███       | 31/100 [00:50<01:52,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1147:  32%|███▏      | 32/100 [00:52<01:49,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1142:  33%|███▎      | 33/100 [00:53<01:49,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1139:  34%|███▍      | 34/100 [00:55<01:46,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1134:  35%|███▌      | 35/100 [00:56<01:45,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1144:  36%|███▌      | 36/100 [00:58<01:43,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1151:  37%|███▋      | 37/100 [01:00<01:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.118:  38%|███▊      | 38/100 [01:01<01:39,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1175:  39%|███▉      | 39/100 [01:03<01:39,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1173:  40%|████      | 40/100 [01:04<01:36,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1174:  41%|████      | 41/100 [01:06<01:35,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1174:  42%|████▏     | 42/100 [01:08<01:33,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1179:  43%|████▎     | 43/100 [01:09<01:32,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1174:  44%|████▍     | 44/100 [01:11<01:30,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1251:  45%|████▌     | 45/100 [01:13<01:29,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1247:  46%|████▌     | 46/100 [01:14<01:26,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1293:  47%|████▋     | 47/100 [01:16<01:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1289:  48%|████▊     | 48/100 [01:17<01:23,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1286:  49%|████▉     | 49/100 [01:19<01:23,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1279:  50%|█████     | 50/100 [01:21<01:20,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.129:  51%|█████     | 51/100 [01:22<01:20,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1283:  52%|█████▏    | 52/100 [01:24<01:17,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.128:  53%|█████▎    | 53/100 [01:26<01:16,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1276:  54%|█████▍    | 54/100 [01:27<01:14,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1282:  55%|█████▌    | 55/100 [01:29<01:13,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1275:  56%|█████▌    | 56/100 [01:30<01:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1293:  57%|█████▋    | 57/100 [01:32<01:10,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.137:  58%|█████▊    | 58/100 [01:34<01:08,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1384:  59%|█████▉    | 59/100 [01:35<01:07,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1377:  60%|██████    | 60/100 [01:37<01:04,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.137:  61%|██████    | 61/100 [01:39<01:03,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1377:  62%|██████▏   | 62/100 [01:40<01:01,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1369:  63%|██████▎   | 63/100 [01:42<01:00,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1364:  64%|██████▍   | 64/100 [01:43<00:58,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1358:  65%|██████▌   | 65/100 [01:45<00:57,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1357:  66%|██████▌   | 66/100 [01:47<00:55,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1376:  67%|██████▋   | 67/100 [01:48<00:53,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1382:  68%|██████▊   | 68/100 [01:50<00:51,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1374:  69%|██████▉   | 69/100 [01:52<00:50,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1373:  70%|███████   | 70/100 [01:53<00:48,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1367:  71%|███████   | 71/100 [01:55<00:47,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.136:  72%|███████▏  | 72/100 [01:56<00:45,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1355:  73%|███████▎  | 73/100 [01:58<00:43,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1406:  74%|███████▍  | 74/100 [02:00<00:42,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1399:  75%|███████▌  | 75/100 [02:01<00:40,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1398:  76%|███████▌  | 76/100 [02:03<00:38,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.146:  77%|███████▋  | 77/100 [02:05<00:37,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1456:  78%|███████▊  | 78/100 [02:06<00:35,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1449:  79%|███████▉  | 79/100 [02:08<00:34,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1441:  80%|████████  | 80/100 [02:09<00:32,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1442:  81%|████████  | 81/100 [02:11<00:30,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1436:  82%|████████▏ | 82/100 [02:13<00:29,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1431:  83%|████████▎ | 83/100 [02:14<00:27,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.144:  84%|████████▍ | 84/100 [02:16<00:25,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1432:  85%|████████▌ | 85/100 [02:18<00:24,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.143:  86%|████████▌ | 86/100 [02:19<00:22,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1426:  87%|████████▋ | 87/100 [02:21<00:21,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1434:  88%|████████▊ | 88/100 [02:22<00:19,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1437:  89%|████████▉ | 89/100 [02:24<00:17,  1.64s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1493:  90%|█████████ | 90/100 [02:26<00:16,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1526:  91%|█████████ | 91/100 [02:27<00:14,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1597:  92%|█████████▏| 92/100 [02:29<00:12,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1597:  93%|█████████▎| 93/100 [02:31<00:11,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1601:  94%|█████████▍| 94/100 [02:32<00:09,  1.61s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1593:  95%|█████████▌| 95/100 [02:34<00:08,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1589:  96%|█████████▌| 96/100 [02:35<00:06,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1603:  97%|█████████▋| 97/100 [02:37<00:04,  1.63s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.163:  98%|█████████▊| 98/100 [02:39<00:03,  1.62s/it]

torch.Size([4, 5])
torch.Size([4, 5])


epoch: 10, train loss: 0.1627:  99%|█████████▉| 99/100 [02:40<00:01,  1.63s/it]

torch.Size([3, 5])
torch.Size([3, 5])


epoch: 10, train loss: 0.1623: 100%|██████████| 100/100 [02:42<00:00,  1.62s/it]
epoch: 10, valid loss: 2.5405:   2%|▏         | 1/51 [00:00<00:11,  4.20it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.5449:   4%|▍         | 2/51 [00:00<00:11,  4.20it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.5202:   6%|▌         | 3/51 [00:00<00:11,  4.16it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.5119:   8%|▊         | 4/51 [00:00<00:11,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.4935:  10%|▉         | 5/51 [00:01<00:11,  4.14it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.4695:  12%|█▏        | 6/51 [00:01<00:10,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.4715:  14%|█▎        | 7/51 [00:01<00:10,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.4478:  16%|█▌        | 8/51 [00:01<00:10,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.4492:  18%|█▊        | 9/51 [00:02<00:10,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.4531:  20%|█▉        | 10/51 [00:02<00:09,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.433:  22%|██▏       | 11/51 [00:02<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.4091:  24%|██▎       | 12/51 [00:02<00:09,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.386:  25%|██▌       | 13/51 [00:03<00:09,  4.07it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3901:  27%|██▋       | 14/51 [00:03<00:09,  4.06it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3846:  29%|██▉       | 15/51 [00:03<00:08,  4.04it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3804:  31%|███▏      | 16/51 [00:03<00:08,  4.05it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3596:  33%|███▎      | 17/51 [00:04<00:08,  4.05it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3618:  35%|███▌      | 18/51 [00:04<00:08,  4.09it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3387:  37%|███▋      | 19/51 [00:04<00:07,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3299:  39%|███▉      | 20/51 [00:04<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3071:  41%|████      | 21/51 [00:05<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2845:  43%|████▎     | 22/51 [00:05<00:07,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3117:  45%|████▌     | 23/51 [00:05<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.316:  47%|████▋     | 24/51 [00:05<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.319:  49%|████▉     | 25/51 [00:06<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2973:  51%|█████     | 26/51 [00:06<00:06,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2969:  53%|█████▎    | 27/51 [00:06<00:05,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3051:  55%|█████▍    | 28/51 [00:06<00:05,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.329:  57%|█████▋    | 29/51 [00:07<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.3324:  59%|█████▉    | 30/51 [00:07<00:05,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.311:  61%|██████    | 31/51 [00:07<00:04,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2956:  63%|██████▎   | 32/51 [00:07<00:04,  4.10it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2732:  65%|██████▍   | 33/51 [00:08<00:04,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2798:  67%|██████▋   | 34/51 [00:08<00:04,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2576:  69%|██████▊   | 35/51 [00:08<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2691:  71%|███████   | 36/51 [00:08<00:03,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.299:  73%|███████▎  | 37/51 [00:09<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2768:  75%|███████▍  | 38/51 [00:09<00:03,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2557:  76%|███████▋  | 39/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2338:  78%|███████▊  | 40/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.2121:  80%|████████  | 41/51 [00:09<00:02,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.1924:  82%|████████▏ | 42/51 [00:10<00:02,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.1749:  84%|████████▍ | 43/51 [00:10<00:01,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.1538:  86%|████████▋ | 44/51 [00:10<00:01,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.1351:  88%|████████▊ | 45/51 [00:10<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.1445:  90%|█████████ | 46/51 [00:11<00:01,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.1507:  92%|█████████▏| 47/51 [00:11<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.1326:  94%|█████████▍| 48/51 [00:11<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.141:  96%|█████████▌| 49/51 [00:11<00:00,  4.13it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.1392:  98%|█████████▊| 50/51 [00:12<00:00,  4.12it/s]

torch.Size([2, 5])
torch.Size([2, 5])


epoch: 10, valid loss: 2.1182: 100%|██████████| 51/51 [00:12<00:00,  4.11it/s]

torch.Size([2, 5])
torch.Size([2, 5])
[{'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 4, 'label': 3}]}, {'results_1': [{'prediction': 1, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}]}, {'results_1': [{'prediction': 3, 'label': 3}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 2}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'predicti


