In [1]:
!pip install transformers
!pip install datasets #데이터 세트 다운로드 Hugging Face 연동



In [2]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


In [3]:
import copy # 특정한 파이썬 객체를 통째로 메모리에 copy할 때
import json # json 형식으로 데이터를 표현할 때
import logging # 학습 과정 등 전반적인 프로그램의 진행 상황을 로깅할 때
import os # 파일 입출력 등 현재 컴퓨터에 대한 기능 수행할 때

# 경고(warning) 메시지가 너무 많이 나오는 것을 대비하여 무시 처리
import warnings
warnings.filterwarnings("ignore")

# 로깅할 때 기본적으로 오류(error) 사항으로 로그 메시지를 남기겠다는 의미
import logging
logging.basicConfig(level=logging.ERROR)

# 벡터, 행렬 등의 처리를 위한 NumPy, 테이블(엑셀) 형식의 데이터 처리할 때 Pandas
import numpy as np
import pandas as pd

from datasets import load_dataset
# train_test_split: 별도로 구분된 validation 세트가 없을 때
# 학습 데이터 세트에서 일부를 train과 validation으로 나눌 때 자주 사용 (8:2 정도로 나눔)
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torch

### 학습한 모델 관련 라이브러리 불러오기

In [4]:
import transformers
# Auto Model For Sequence Classification: 텍스트 분류를 위한 모델 → Cross-Entropy loss 사용
from transformers import AutoConfig, AutoModel, AutoModelForSequenceClassification, AutoTokenizer
# linear_schedule_with_warmup: 단계적으로 learning rate 줄여나가는 방법
# AdamW: SGD와 같이 optimization 방법 중 하나
from transformers import AdamW, get_linear_schedule_with_warmup

### 우리가 쓸 모델

- KoBigBird를 사용하고, 다음과 같은 형태로 사용 가능
- KoBigBird: BigBird 특유의 sparse attention 사용 (default)

In [5]:
from transformers import AutoModel, AutoTokenizer

# by default its in `block_sparse` mode with num_random_blocks=3, block_size=64
# 이름에서부터 알 수 있듯이 KoBigBird는 BERT 기반의 모델
model = AutoModel.from_pretrained("monologg/kobigbird-bert-base")

# Tokenizer도 마찬가지로 BERT 기반에서 가져온 것을 확인
tokenizer = AutoTokenizer.from_pretrained("monologg/kobigbird-bert-base")
text = "한국어 BigBird 모델을 공개합니다!"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

Attention type 'block_sparse' is not possible if sequence_length: 12 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...


### 실험을 위한 하이퍼 파라미터 설정

In [6]:
# SimpleNamespace는 온점(.)으로 속성의 값을 정의할 수 있도록 해주는 라이브러리
# config.task = "cls"라고 하면, 나중에 print(config.task)했을 때 "cls"라고 출력
from types import SimpleNamespace

config = SimpleNamespace()

config.task = "cls"
config.dataset = "comment"

config.cache_dir = "cache" # 현재 데이터 세트에 대한 단어집 등 현재 task를 위한 임시적인 폴더
config.output_dir = "output" # 최종적인 모델이 저장되고, 결과가 저장되는 폴더

config.use_tpu = False
config.model_name_or_path = "monologg/kobigbird-bert-base" # Model name or path (HuggingFace에서 불러와 사용할 모델 이름)
config.data_dir = "./" # The input data dir ("10000_labeled.csv"가 있는 경로)

# 실질적으로 학습을 위해서는 tokenizing이 수행된 train file과 predict file을 만들어야 한다.
config.train_file = "complete_train.csv" # 미리 준비된 학습 데이터 세트 경로
# 어차피 10000_labeled.csv를 (1) training 목적, (2) validatoin 목적으로 쪼개니까 평가는 validation으로 결과가 나올 것임
config.predict_file = "complete_train.csv" # 미리 준비된 평가 데이터 세트 경로
# → 이거 일단 없으면, 지금처럼 train_file로 넣으시되, 나중에 생기시면 갈아끼우기

config.max_seq_length = 1024 # The maximum total input sequence length after tokenization. (최대 토큰 길이)
config.train_batch_size = 4 # Batch size for training. (학습할 때 batch_size)
config.eval_batch_size = 2 # Batch size for evaluation. (평가할 때 batch_size)

config.learning_rate = 3e-5 # The initial learning rate for Adam. (Adam optimizer에서 쓸 learning rate)
config.num_train_epochs = 10 # Total number of training epochs to perform. (전체 학습 epoch 수)

config.num_labels = 5 # 현재 task에서 선호도(1: 극진보, 2: 진보, 3: 중립, 4: 보수, 5: 극보수)의 개수는 5개이므로
# 5 classes multi-class classification 문제로 이해 할 수 있음
config.gradient_accumulation_steps = 2 # Number of updates steps to accumulate before performing a backward/update pass.
# batch_size가 큰 것처럼 처리하기 위해서, backward()를 매 번 수행하지 않고, gradient를 누적(acculmulation)하는 것

config.threads = 4
config.seed = 42 # random seed for initialization

config.do_train = True # Whether to run training.
config.do_eval_during_train = True
config.do_eval = True # Whether to run prediction.

config.do_lower_case = False
config.weight_decay = 0.0 # Weight decay if we apply some.
config.adam_epsilon = 1e-8 # Epsilon for Adam optimizer.
config.max_grad_norm = 1.0 # Max gradient norm.
config.warmup_proportion = 0.0 # Warmup proportion for linear warmup
# BigBird에서는 full attention을 하면, 메모리는 조금 더 소모되지만, 더 정확도가 향상
# config.attention_type = "original_full"

### 학습 데이터 전처리
- 학습 text를 매번 tokenization을 하지 않고, 모델 학습 시작전에 미리 모든 텍스트를 tokenization 한 결과를 저장한다.

In [7]:
# 본 실습에서 사용할 tokenizer 객체 초기화
tokenizer = AutoTokenizer.from_pretrained(config.model_name_or_path, cache_dir=config.cache_dir)

In [8]:
def train_split(config, texts, labels, is_train):
    # 바로 여기에서 train 데이터 세트가 8:2로 training과 validation이 나누어 진다.
    # 지금 평가 결과는 validation에 대한 결과
    # [오류] stratify가 labels면, test_dataset에 특정 레이블이 아예 등장하지 않으면 오류 발생
    """
    x_train, y_train, x_label, y_label = train_test_split(
        texts, labels, test_size=0.2, random_state=config.seed, stratify=labels
    )
    """
    x_train, y_train, x_label, y_label = train_test_split(
        texts, labels, test_size=0.2, random_state=config.seed, stratify=None
    )
    if is_train:
        texts, labels = x_train, x_label
    else:
        texts, labels = y_train, y_label
    return texts, labels

# 댓글(comment)이 담긴 .csv 파일이 있을 때, 여기에서 텍스트와 레이블 추출
def process_comment_cls(config, data_file, is_train):
    df = pd.read_csv(data_file)
    df = df.dropna(subset=['title', 'content', 'label1', 'label2'])

    # 매 줄에서 "label1(정치성향)", "label2(편향여부)" 열 추출
    politics = (df["label1"] - 1).astype(int).values.tolist()
    governments = (df["label2"]).astype(int).values.tolist()
    labels = []
    # 한 줄씩 데이터를 확인하며
    for i in range(len(politics)):
        politic = politics[i]
        government = governments[i]
        labels.append([politic, government])
    print(len(labels))

    # title과 content를 합쳐서 texts로 표현
    texts = (df["title"] + " " + df["content"]).astype(str).values.tolist()

    texts, labels = train_split(config, texts, labels, is_train)
    return texts, labels

### 데이터 토큰화
- 주어진 데이터를 토큰화하고, 토큰화된 데이터를 파일에 저장하는 함수를 정의.
- 주어진 데이터는 텍스트와 레이블로 구성되어 있으며, 텍스트는 토큰화되고, 레이블은 정수로 변환.
- 토큰화된 데이터와 변환된 레이블은 JSON 형식으로 파일에 저장

In [9]:
import torch.utils.data as torch_data

def data_pretokenizing(config, tokenizer, is_train=True):
    if is_train:
        data_file = config.train_file
    else:
        data_file = config.predict_file

    data_path = config.data_dir
    if data_file is not None:
        data_path = os.path.join(data_path, data_file)
    else:
        data_path += "/"

    # 실제로 tokenizer를  저장될 데이터 세트의 파일 이름이 바로 dataset_file
    comps = [
        data_path,
        config.dataset,
        config.model_name_or_path.replace("/", "_"),
        config.max_seq_length,
        "train" if is_train else "dev",
        "dataset.txt",
    ]
    dataset_file = "_".join([str(comp) for comp in comps])
    print("dataset_file:", dataset_file)

    # 텍스트 문장을 읽어와서 token 값만 저장
    with open(dataset_file, "w", encoding="utf-8") as writer_file:
        # data: "joongang.csv" 파일에서 읽어와 (텍스트, 선호도 레이블)만 남긴 .csv 파일
        cnt = 0
        total_data = process_comment_cls(config, data_path, is_train)
        # 학습 데이터 세트를 하나씩 확인하며
        for text, label in zip(total_data[0], total_data[1]):
            # 여기에서 data는 하나의 (텍스트, 레이블) 쌍
            # feature는 해당 텍스트를 tokenizer에 넣어서 나온 결과
            feature = tokenizer(text, max_length=config.max_seq_length, padding="max_length", truncation=True, add_special_tokens=True)
            # 실제로 학습을 위해서는 (각 토큰의 index, 정답 레이블)로 학습을 진행
            writed_data = {
                "input_ids": feature["input_ids"],
                "attention_mask": feature["attention_mask"],
                "politic": int(float(label[0])), # "2.0" → 2.0 → 2
                "government": int(float(label[1])), # "2.0" → 2.0 → 2
              }
            # JSON은 쉽게 말하면 Python에서 dictionary와 같음 → 이를 file로 저장하는 것
            writer_file.write(json.dumps(writed_data) + "\n")
            cnt += 1
        print(f"{cnt} features processed from {data_path}")

    return dataset_file


In [10]:
# 본 코드에서 학습을 수행하려는 경우
if config.do_train:
    # 학습 데이터 세트 전처리
    train_dataset_file = data_pretokenizing(config, tokenizer=tokenizer)

# 평가 데이터 세트 전처리(validation = dev 같은 의미)
predict_dataset_file = data_pretokenizing(config, tokenizer=tokenizer, is_train=False)

# 결과적으로 만들어진 "./10000_labeled.csv_comment_monologg_kobigbird-bert-base_1024_train_dataset.txt"
# 내용을 확인해 보면, 약 8,000개의 각 학습 데이터에 대하여
#   → 하나씩 {"input_ids", "attention_mask", "preference", "slang"}으로 구성

dataset_file: ./complete_train.csv_comment_monologg_kobigbird-bert-base_1024_train_dataset.txt
2000
1600 features processed from ./complete_train.csv
dataset_file: ./complete_train.csv_comment_monologg_kobigbird-bert-base_1024_dev_dataset.txt
2000
400 features processed from ./complete_train.csv


### 데이터로더 초기화

#### 데이터패딩

In [11]:
class IterableDatasetPad(torch.utils.data.IterableDataset):
    def __init__(
        self,
        dataset: torch.utils.data.IterableDataset,
        batch_size: int = 1,
        num_devices: int = 1,
        seed: int = 0,
    ):
        self.dataset = dataset
        self.batch_size = batch_size
        self.seed = seed
        self.num_examples = 0

        chunk_size = self.batch_size * num_devices
        length = len(dataset)
        self.length = length + (chunk_size - length % chunk_size)

    def __len__(self):
        return self.length

    def __iter__(self):
        self.num_examples = 0
        if (
            not hasattr(self.dataset, "set_epoch")
            and hasattr(self.dataset, "generator")
            and isinstance(self.dataset.generator, torch.Generator)
        ):
            self.dataset.generator.manual_seed(self.seed + self.epoch)

        first_batch = None
        current_batch = []
        for element in self.dataset:
            self.num_examples += 1
            current_batch.append(element)
            # Wait to have a full batch before yielding elements.
            if len(current_batch) == self.batch_size:
                for batch in current_batch:
                    yield batch
                    if first_batch is None:
                        first_batch = batch.copy()
                current_batch = []

        while self.num_examples < self.length:
            add_num = self.batch_size - len(current_batch)
            self.num_examples += add_num
            current_batch += [first_batch] * add_num
            for batch in current_batch:
                yield batch
            current_batch = []

#### 전처리된 데이터를 DataLoader로 불러옴

In [12]:
# 전처리된 데이터는 하나하나 {"input_ids", "attention_mask", "labels", ...} 형태를 가짐
# PyTorch가 하나의 배치를 처리할 때는 PyTorch Tensor 형태여야 함
# <데이터 로더에서 불러오는 "Tensor"를 정의하는 함수>
def collate_fn(features):
    input_ids = [sample["input_ids"] for sample in features]
    attention_mask = [sample["attention_mask"] for sample in features]

    politic = [sample["politic"] for sample in features]
    government = [sample["government"] for sample in features]

    input_ids = torch.tensor(np.array(input_ids).astype(np.int64), dtype=torch.long)
    attention_mask = torch.tensor(np.array(attention_mask).astype(np.int8), dtype=torch.long)
    politic = torch.tensor(np.array(politic).astype(np.int64), dtype=torch.long)
    government = torch.tensor(np.array(government).astype(np.int64), dtype=torch.long)
    inputs = {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
    }
    labels = {
        "politic": politic,
        "government" : government
    }
    return inputs, labels

# 본 코드에서 학습을 수행하려는 경우
if config.do_train:
    # 학습 데이터 로더 초기화
    train_dataset = load_dataset("text", data_files=train_dataset_file, download_mode="force_redownload")["train"]
    train_dataset = train_dataset.map(lambda x: json.loads(x["text"]), batched=False)

    train_dataloader = torch_data.DataLoader(
        train_dataset,
        sampler=torch_data.RandomSampler(train_dataset),
        drop_last=False,
        batch_size=config.train_batch_size,
        collate_fn=(collate_fn),
    )

# 평가 데이터 세트 전처리(validation = dev 같은 의미)
predict_dataset = load_dataset("text", data_files=predict_dataset_file, download_mode="force_redownload")["train"]
predict_dataset = predict_dataset.map(lambda x: json.loads(x["text"]), batched=False)
predict_dataset = IterableDatasetPad(
    dataset=predict_dataset,
    batch_size=config.eval_batch_size,
    num_devices=1,
    seed=config.seed,
)

predict_dataloader = torch_data.DataLoader(
    predict_dataset,
    sampler=None,
    drop_last=False,
    batch_size=config.eval_batch_size,
    collate_fn=(collate_fn),
)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

### 텍스트 분류 모델 정의

In [13]:
from transformers import AutoModel
import torch.nn as nn

# 텍스트 분류 모델 정의
class ClsModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # (전처리된) 학습할 데이터 세트의 경로 설정
        data_file = os.path.join(config.data_dir, str(config.train_file))
        # 사전 학습된 모델 가중치 불러오기
        model_config = AutoConfig.from_pretrained(config.model_name_or_path, num_labels=config.num_labels)
        self.model = AutoModel.from_pretrained(
            config.model_name_or_path, config=model_config, cache_dir=config.cache_dir
        )
        self.classifier1 = nn.Linear(768, 5)
        self.classifier2 = nn.Linear(768, 6)

        # BERT model과 별개로, 입력 문자열을 토큰의 index로 바꾸어주는 tokenizer가 사용됨
        self.tokenizer = tokenizer

    # "학습된 모델"을 save_dir에 저장하는 함수
    def save_pretrained(self, save_dir):
        self.model.save_pretrained(save_dir)
        # Tokenizer는 기본적으로 "special_tokens_map_file", "tokenizer_file"을 가질 수 있음
        # 이러한 값을 제거한 뒤에 save_dir에 저장하겠다는 의미
        for key in ["special_tokens_map_file", "tokenizer_file"]:
            self.tokenizer.init_kwargs.pop(key, None)
        self.tokenizer.save_pretrained(save_dir)

    def get_optimizer(self): # 현재 모델을 학습하기 위한 최적화 방법(AdamW) 객체를 불러오는 함수
        # bias랑 LayerNorm에는 decay 적용하지 않겠다는 의미
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": config.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": config.weight_decay,
            },
        ]
        # AdamW의 첫 번째 인자(params)는 "학습할 가중치", weight_decay는 가중치에 적용되는 regularization 기법
        optimizer = AdamW(optimizer_grouped_parameters, lr=config.learning_rate, eps=config.adam_epsilon)
        return optimizer

    def get_scheduler(self, batch_num, optimizer): # AdamW로 학습할 때, learning rate을 단계적으로 줄이기 위한 함수
        if config.warmup_proportion == 0.0:
            return None

        t_total = batch_num // config.gradient_accumulation_steps * config.num_train_epochs

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=int(t_total * config.warmup_proportion),
            num_training_steps=t_total,
        )

        return scheduler

    def forward(self, inputs):
        # print(inputs) => {"input_ids", "attention_mask"}
        # https://huggingface.co/docs/transformers/model_doc/big_bird
        # BigBirdModel의 입력 양식에 맞게 넣어주어야 오류가 없음
        # BigBirdModel로 "input_ids", "attention_mask" 등 "미리 정해진" 규격에 맞는 입력만
        # 들어가야 오류가 없다는 의미 => 그러므로, preference, slang 등은 들어가면 X
        hidden = self.model(**inputs)
        # print(hidden.last_hidden_state.shape) # torch.Size([4 (batch_size), 1024 (seq_len), 768 (embedding_size)])
        # 마지막 레이어의 [CLS] 토큰만 가져오기
        cls_token_embeddings = hidden.last_hidden_state[:,0,:] # [batch_size, 768]
        # print(cls_token_embeddings.shape)
        output_1 = self.classifier1(cls_token_embeddings) # 768 → 5
        output_2 = self.classifier2(cls_token_embeddings) # 768 → 6
        print(output_1.shape) # [batch_size, 5]
        print(output_2.shape) # [batch_size, 6]

        return output_1, output_2

    def eval_step(self, inputs, labels, outputs):
        logits_1 = outputs[0].detach().cpu()
        logits_2 = outputs[1].detach().cpu()
        labels_1 = self.tensor_to_list(labels["politic"])
        labels_2 = self.tensor_to_list(labels["government"])
        predictions_1 = self.tensor_to_list(torch.argmax(logits_1, dim=-1))
        predictions_2 = self.tensor_to_list(torch.argmax(logits_2, dim=-1))
        results_1 = [{"prediction": prediction, "label": label} for prediction, label in zip(predictions_1, labels_1)]
        results_2 = [{"prediction": prediction, "label": label} for prediction, label in zip(predictions_2, labels_2)]
        return {"results_1": results_1, "results_2": results_2}





    # PyTorch의 Tensor 객체를 NumPy 객체로 변환
    def tensor_to_array(self, tensor):
        return tensor.detach().cpu().numpy()

    # PyTorch의 Tensor 객체를 Python의 리스트(list) 자료형으로 변환
    def tensor_to_list(self, tensor):
        return self.tensor_to_array(tensor).tolist()

In [14]:
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

def cal_running_avg_loss(loss, running_avg_loss, decay=0.99):
    if running_avg_loss == 0:
        return loss
    running_avg_loss = running_avg_loss * decay + (1 - decay) * loss
    return running_avg_loss

### 모델 학습 및 평가 라이브러리


In [15]:
from functools import partial
import sklearn.metrics as sklearn_metrics

"""binary_metrics = {
    "accuracy": sklearn_metrics.accuracy_score,
    "precision": sklearn_metrics.precision_score, # TP / (TP + FP)
    "recall": sklearn_metrics.recall_score, # recall = sensitivity (민감도)
    "f1": sklearn_metrics.f1_score,
    "matthews_corrcoef": sklearn_metrics.matthews_corrcoef,
    "roc_auc": sklearn_metrics.roc_auc_score,
}""" # 우리는 두가지 task 다 다중분류임으로 안씀

metrics = {
    "accuracy": sklearn_metrics.accuracy_score,
    "f1-macro": partial(sklearn_metrics.f1_score, average="macro"),
}


def eval_cls(results_1, results_2, **kwargs):
    predictions_1 = np.array([result["prediction"] for result in results_1])
    labels_1 = np.array([result["label"] for result in results_1])
    predictions_2 = np.array([result["prediction"] for result in results_2])
    labels_2 = np.array([result["label"] for result in results_2])
    results_1 = {
        metric: round(f(labels_1, predictions_1) * 100, 2)
        for metric, f in metrics.items()
    }
    results_2 = {
        metric: round(f(labels_2, predictions_2) * 100, 2)
        for metric, f in metrics.items()
    }

    return {
        "results_1": results_1,
        "results_2": results_2,
        "best_score_1": results_1["accuracy"],
        "best_score_2": results_2["accuracy"],

    }


### Epoch 동안 학습 및 평가를 수행하는 함수 정의

In [16]:
def _run_epoch(model, loader, device=None, context=None, **kwargs):
    config = kwargs["config"]
    is_train = kwargs["is_train"]

    avg_loss = 0
    results = []
    batch_num = len(loader)

    if is_train:
        model.train()
        if config.use_tpu:
            optimizer = context.getattr_or("optimizer", lambda: model.get_optimizer())
            scheduler = context.getattr_or("scheduler", lambda: model.get_scheduler(batch_num, optimizer))
        else:
            optimizer = kwargs["optimizer"]
            scheduler = kwargs["scheduler"]
    else:
        model.eval()

    is_master = True

    pbar = tqdm(enumerate(loader), total=batch_num, disable=not is_master, dynamic_ncols=True, position=0, leave=True)

    corrected_1 = 0
    corrected_2 = 0
    total = 0

    for i, (inputs, labels) in pbar:
        # inputs: {"input_ids": [batch_size(4), seq_len, 768], "attention_mask": [batch_size(4), seq_len, 768]}
        # labels: {"preference": [batch_size(4), 1], "slang": [batch_size(4), 1], "politic": [batch_size(4), 1]}
        if not config.use_tpu:
            # (k, v) => ("input_ids", value)
            # (k, v) => ("attention_mask", value)
            for k, v in inputs.items():
                if isinstance(v, torch.Tensor):
                    inputs[k] = v.to(device)
            for k, v in labels.items():
                if isinstance(v, torch.Tensor):
                    labels[k] = v.to(device)

        outputs = model(inputs)


        outputs_1 = outputs[0]
        outputs_2 = outputs[1]

        labels_1 = labels["politic"]
        labels_2 = labels["government"]

        loss_function_1 = nn.CrossEntropyLoss()
        loss_1 = loss_function_1(outputs_1, labels_1)

        total += outputs_1.shape[0]

        _, predicted_1 = outputs_1.max(1)
        corrected_1 += predicted_1.eq(labels_1).sum().item()

        loss_function_2 = nn.CrossEntropyLoss()
        loss_2 = loss_function_2(outputs_2, labels_2)

        _, predicted_2 = outputs_2.max(1)
        corrected_2 += predicted_2.eq(labels_2).sum().item()

        w_1 = 1
        w_2 = 1
        loss = w_1 * loss_1 + w_2 * loss_2

        avg_loss = cal_running_avg_loss(loss.item(), avg_loss)
        loss /= config.gradient_accumulation_steps

        if is_train:
            loss.backward()
            if i % config.gradient_accumulation_steps == 0 or i == batch_num - 1:
                if config.max_grad_norm > 0:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm)

                optimizer.step()
                optimizer.zero_grad()

                if scheduler is not None:
                    scheduler.step()
        else:
            result = (model.module if hasattr(model, "module") else model).eval_step(inputs, labels, outputs)
            results.append(result)

        if is_master:
            pbar.set_description(f"epoch: {kwargs['epoch'] + 1}, {('train' if is_train else 'valid')} loss: {min(100, round(avg_loss, 4))}")

    return {
        "loss": avg_loss,
        "result": results,
    }


# 학습 코드에서 호출하는 함수
def run_epoch(**kwargs):
    model = kwargs.pop("model")
    if kwargs["config"].use_tpu:
        results = model(_run_epoch, **kwargs)
    else:
        results = _run_epoch(model, **kwargs)

    if isinstance(results, list):
        loss = sum([result["loss"] for result in results]) / len(results)
        result = []
        for res in results:
            result.extend(res["result"])
        results = {"loss": loss, "result": result}

    return results

### 딥러닝 모델 초기화 및 설정

In [17]:
# 현재 모델 이름이 "monologg/kobigbird-bert-base" 이므로, Hugging Face에서 찾아서 불러옴
set_seed(config.seed)

# 딥러닝 모델 초기화
model = ClsModel()

print(f"configuration: {str(config)}")

if torch.cuda.is_available(): # GPU를 사용할 수 있다면
    gpu_count = torch.cuda.device_count()
    print(f"{gpu_count} GPU device detected")
    devices = ["cuda:{}".format(i) for i in range(gpu_count)]
    model_dp = torch.nn.DataParallel(model, device_ids=devices)
    model.to(devices[0])
else: # GPU를 사용할 수 없다면 CPU로 구동
    devices = ["cpu"]
    model_dp = model

# 학습 결과를 저장하기 위한 폴더 만들기
if not os.path.exists(config.cache_dir):
    os.makedirs(config.cache_dir)

output_dir = os.path.join(config.output_dir, config.task, config.dataset)
print("Output directory:", output_dir)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 실제 학습을 위해 optimizer 및 scheduler 초기화
optimizer = None
scheduler = None
if config.do_train: # 학습 모드(train mode)인 경우
    optimizer = model.get_optimizer()
    scheduler = model.get_scheduler(len(train_dataloader), optimizer)

params = {
    "config": config,
    "model": model_dp,
    "optimizer": optimizer,
    "scheduler": scheduler,
}
if not config.use_tpu:
    params["device"] = devices[0]

configuration: namespace(task='cls', dataset='comment', cache_dir='cache', output_dir='output', use_tpu=False, model_name_or_path='monologg/kobigbird-bert-base', data_dir='./', train_file='complete_train.csv', predict_file='complete_train.csv', max_seq_length=1024, train_batch_size=4, eval_batch_size=2, learning_rate=3e-05, num_train_epochs=10, num_labels=5, gradient_accumulation_steps=2, threads=4, seed=42, do_train=True, do_eval_during_train=True, do_eval=True, do_lower_case=False, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, warmup_proportion=0.0)
1 GPU device detected
Output directory: output/cls/comment


In [18]:
def do_eval(epoch):
    with torch.no_grad():
        results = run_epoch(loader=predict_dataloader, epoch=epoch, is_train=False, **params)["result"]
        print(results)
        results_1 = [result['results_1'] for result in results]
        results_2 = [result['results_2'] for result in results]

        results_1 = [item for sublist in results_1 for item in sublist]
        results_2 = [item for sublist in results_2 for item in sublist]

        eval_results = eval_cls(
            config=config,
            model=model,
            loader=predict_dataloader,
            tokenizer=model.tokenizer,
            results_1=results_1,
            results_2=results_2,
        )

    print("Eval results for output 1.")
    for k, v in eval_results["results_1"].items():
        print(f"{k} : {v}")

    print("Eval results for output 2.")
    for k, v in eval_results["results_2"].items():
        print(f"{k} : {v}")

    return eval_results["best_score_1"], eval_results["best_score_2"]

train_losses = []
val_accuracies = []
if config.do_train:
    best_score = (0, 0)
    for epoch in range(config.num_train_epochs):
        train_results = run_epoch(loader=train_dataloader, epoch=epoch, is_train=True, **params)
        train_loss = train_results['loss']
        train_losses.append(train_loss)

        if config.do_eval_during_train:
            score1, score2 = do_eval(epoch)
            val_accuracies.append((score1, score2))

            if score1 >= best_score[0] and score2 >= best_score[1]:
                best_score = (score1, score2)
                output_dir = os.path.join(config.output_dir, config.task, config.dataset, f"{epoch}-{best_score[0]}-{best_score[1]}-ckpt")
                copy.deepcopy(
                    model_dp.module
                    if hasattr(model_dp, "module")
                    else model_dp._models[0]
                    if hasattr(model_dp, "_models")
                    else model_dp
                ).cpu().save_pretrained(output_dir)
                with open(os.path.join(output_dir, "finetune_config.json"), "w") as save_config:
                    json.dump(vars(config), save_config, sort_keys=True, indent=4)
                print(f"Checkpoint {output_dir} saved.")


  0%|          | 0/400 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5907:   0%|          | 1/400 [00:01<10:58,  1.65s/it]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5873:   0%|          | 2/400 [00:02<06:50,  1.03s/it]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5893:   1%|          | 3/400 [00:02<05:36,  1.18it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5841:   1%|          | 4/400 [00:03<04:54,  1.34it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5779:   1%|▏         | 5/400 [00:04<04:39,  1.41it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5651:   2%|▏         | 6/400 [00:04<04:21,  1.51it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5503:   2%|▏         | 7/400 [00:05<04:15,  1.54it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5418:   2%|▏         | 8/400 [00:05<04:06,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5382:   2%|▏         | 9/400 [00:06<04:04,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5272:   2%|▎         | 10/400 [00:07<04:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5144:   3%|▎         | 11/400 [00:07<04:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.514:   3%|▎         | 12/400 [00:08<03:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5013:   3%|▎         | 13/400 [00:08<03:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5055:   4%|▎         | 14/400 [00:09<03:53,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5062:   4%|▍         | 15/400 [00:10<03:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5007:   4%|▍         | 16/400 [00:10<03:51,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.5063:   4%|▍         | 17/400 [00:11<03:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4905:   4%|▍         | 18/400 [00:11<03:51,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4824:   5%|▍         | 19/400 [00:12<03:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4713:   5%|▌         | 20/400 [00:13<03:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4682:   5%|▌         | 21/400 [00:13<03:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4559:   6%|▌         | 22/400 [00:14<03:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4534:   6%|▌         | 23/400 [00:15<03:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4443:   6%|▌         | 24/400 [00:15<03:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4291:   6%|▋         | 25/400 [00:16<03:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4228:   6%|▋         | 26/400 [00:16<03:46,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4191:   7%|▋         | 27/400 [00:17<03:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4169:   7%|▋         | 28/400 [00:18<03:45,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4122:   7%|▋         | 29/400 [00:18<03:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4081:   8%|▊         | 30/400 [00:19<03:44,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4049:   8%|▊         | 31/400 [00:19<03:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.4:   8%|▊         | 32/400 [00:20<03:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3891:   8%|▊         | 33/400 [00:21<03:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3872:   8%|▊         | 34/400 [00:21<03:40,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3764:   9%|▉         | 35/400 [00:22<03:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3648:   9%|▉         | 36/400 [00:22<03:39,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3683:   9%|▉         | 37/400 [00:23<03:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.364:  10%|▉         | 38/400 [00:24<03:38,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3506:  10%|▉         | 39/400 [00:24<03:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3515:  10%|█         | 40/400 [00:25<03:38,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3408:  10%|█         | 41/400 [00:26<03:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3376:  10%|█         | 42/400 [00:26<03:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3214:  11%|█         | 43/400 [00:27<03:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3171:  11%|█         | 44/400 [00:27<03:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3163:  11%|█▏        | 45/400 [00:28<03:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3122:  12%|█▏        | 46/400 [00:29<03:34,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3069:  12%|█▏        | 47/400 [00:29<03:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.3012:  12%|█▏        | 48/400 [00:30<03:33,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2972:  12%|█▏        | 49/400 [00:30<03:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2919:  12%|█▎        | 50/400 [00:31<03:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2936:  13%|█▎        | 51/400 [00:32<03:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2915:  13%|█▎        | 52/400 [00:32<03:29,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2855:  13%|█▎        | 53/400 [00:33<03:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2771:  14%|█▎        | 54/400 [00:33<03:28,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2725:  14%|█▍        | 55/400 [00:34<03:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2641:  14%|█▍        | 56/400 [00:35<03:27,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2549:  14%|█▍        | 57/400 [00:35<03:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2489:  14%|█▍        | 58/400 [00:36<03:26,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2393:  15%|█▍        | 59/400 [00:36<03:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2333:  15%|█▌        | 60/400 [00:37<03:26,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2265:  15%|█▌        | 61/400 [00:38<03:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2267:  16%|█▌        | 62/400 [00:38<03:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2156:  16%|█▌        | 63/400 [00:39<03:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.215:  16%|█▌        | 64/400 [00:39<03:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.2049:  16%|█▋        | 65/400 [00:40<03:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1917:  16%|█▋        | 66/400 [00:41<03:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1804:  17%|█▋        | 67/400 [00:41<03:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1757:  17%|█▋        | 68/400 [00:42<03:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1726:  17%|█▋        | 69/400 [00:43<03:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1655:  18%|█▊        | 70/400 [00:43<03:19,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1549:  18%|█▊        | 71/400 [00:44<03:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1469:  18%|█▊        | 72/400 [00:44<03:18,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1436:  18%|█▊        | 73/400 [00:45<03:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1357:  18%|█▊        | 74/400 [00:46<03:17,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1284:  19%|█▉        | 75/400 [00:46<03:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1355:  19%|█▉        | 76/400 [00:47<03:15,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1307:  19%|█▉        | 77/400 [00:47<03:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1303:  20%|█▉        | 78/400 [00:48<03:14,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1235:  20%|█▉        | 79/400 [00:49<03:16,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1258:  20%|██        | 80/400 [00:49<03:13,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1159:  20%|██        | 81/400 [00:50<03:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.11:  20%|██        | 82/400 [00:50<03:13,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1106:  21%|██        | 83/400 [00:51<03:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.106:  21%|██        | 84/400 [00:52<03:11,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.1005:  21%|██▏       | 85/400 [00:52<03:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0916:  22%|██▏       | 86/400 [00:53<03:09,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0998:  22%|██▏       | 87/400 [00:54<03:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.095:  22%|██▏       | 88/400 [00:54<03:08,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0939:  22%|██▏       | 89/400 [00:55<03:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0911:  22%|██▎       | 90/400 [00:55<03:06,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0802:  23%|██▎       | 91/400 [00:56<03:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.074:  23%|██▎       | 92/400 [00:57<03:06,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0823:  23%|██▎       | 93/400 [00:57<03:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0849:  24%|██▎       | 94/400 [00:58<03:04,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0869:  24%|██▍       | 95/400 [00:58<03:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0934:  24%|██▍       | 96/400 [00:59<03:03,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0877:  24%|██▍       | 97/400 [01:00<03:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0933:  24%|██▍       | 98/400 [01:00<03:02,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0889:  25%|██▍       | 99/400 [01:01<03:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0945:  25%|██▌       | 100/400 [01:01<03:01,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0875:  25%|██▌       | 101/400 [01:02<03:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0868:  26%|██▌       | 102/400 [01:03<03:00,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0894:  26%|██▌       | 103/400 [01:03<03:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0821:  26%|██▌       | 104/400 [01:04<03:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0794:  26%|██▋       | 105/400 [01:04<03:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.069:  26%|██▋       | 106/400 [01:05<02:58,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0653:  27%|██▋       | 107/400 [01:06<02:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0619:  27%|██▋       | 108/400 [01:06<02:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0498:  27%|██▋       | 109/400 [01:07<02:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0429:  28%|██▊       | 110/400 [01:08<02:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0411:  28%|██▊       | 111/400 [01:08<02:59,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0321:  28%|██▊       | 112/400 [01:09<02:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0204:  28%|██▊       | 113/400 [01:09<02:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0114:  28%|██▊       | 114/400 [01:10<02:53,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 3.0018:  29%|██▉       | 115/400 [01:11<02:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9933:  29%|██▉       | 116/400 [01:11<02:51,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.984:  29%|██▉       | 117/400 [01:12<02:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9807:  30%|██▉       | 118/400 [01:12<02:49,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9752:  30%|██▉       | 119/400 [01:13<02:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9658:  30%|███       | 120/400 [01:14<02:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9602:  30%|███       | 121/400 [01:14<02:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.963:  30%|███       | 122/400 [01:15<02:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9613:  31%|███       | 123/400 [01:15<02:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9643:  31%|███       | 124/400 [01:16<02:47,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9541:  31%|███▏      | 125/400 [01:17<02:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9466:  32%|███▏      | 126/400 [01:17<02:45,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9308:  32%|███▏      | 127/400 [01:18<02:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9282:  32%|███▏      | 128/400 [01:18<02:45,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9265:  32%|███▏      | 129/400 [01:19<02:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9226:  32%|███▎      | 130/400 [01:20<02:44,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9157:  33%|███▎      | 131/400 [01:20<02:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.911:  33%|███▎      | 132/400 [01:21<02:41,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9159:  33%|███▎      | 133/400 [01:22<02:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9155:  34%|███▎      | 134/400 [01:22<02:40,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9162:  34%|███▍      | 135/400 [01:23<02:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9087:  34%|███▍      | 136/400 [01:23<02:38,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9062:  34%|███▍      | 137/400 [01:24<02:39,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.9022:  34%|███▍      | 138/400 [01:25<02:37,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8966:  35%|███▍      | 139/400 [01:25<02:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8887:  35%|███▌      | 140/400 [01:26<02:36,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8751:  35%|███▌      | 141/400 [01:26<02:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8585:  36%|███▌      | 142/400 [01:27<02:35,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8551:  36%|███▌      | 143/400 [01:28<02:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8476:  36%|███▌      | 144/400 [01:28<02:34,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8388:  36%|███▋      | 145/400 [01:29<02:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.845:  36%|███▋      | 146/400 [01:29<02:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8465:  37%|███▋      | 147/400 [01:30<02:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8304:  37%|███▋      | 148/400 [01:31<02:32,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8245:  37%|███▋      | 149/400 [01:31<02:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8226:  38%|███▊      | 150/400 [01:32<02:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8169:  38%|███▊      | 151/400 [01:32<02:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8161:  38%|███▊      | 152/400 [01:33<02:29,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8108:  38%|███▊      | 153/400 [01:34<02:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8026:  38%|███▊      | 154/400 [01:34<02:28,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.8033:  39%|███▉      | 155/400 [01:35<02:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7982:  39%|███▉      | 156/400 [01:35<02:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7816:  39%|███▉      | 157/400 [01:36<02:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7933:  40%|███▉      | 158/400 [01:37<02:25,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7799:  40%|███▉      | 159/400 [01:37<02:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.766:  40%|████      | 160/400 [01:38<02:24,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7629:  40%|████      | 161/400 [01:39<02:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7596:  40%|████      | 162/400 [01:39<02:23,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7697:  41%|████      | 163/400 [01:40<02:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7619:  41%|████      | 164/400 [01:40<02:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7605:  41%|████▏     | 165/400 [01:41<02:25,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7619:  42%|████▏     | 166/400 [01:42<02:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7464:  42%|████▏     | 167/400 [01:42<02:24,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7373:  42%|████▏     | 168/400 [01:43<02:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7334:  42%|████▏     | 169/400 [01:43<02:22,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7364:  42%|████▎     | 170/400 [01:44<02:19,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7335:  43%|████▎     | 171/400 [01:45<02:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7343:  43%|████▎     | 172/400 [01:45<02:18,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7251:  43%|████▎     | 173/400 [01:46<02:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7199:  44%|████▎     | 174/400 [01:46<02:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.7111:  44%|████▍     | 175/400 [01:47<02:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6954:  44%|████▍     | 176/400 [01:48<02:15,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6939:  44%|████▍     | 177/400 [01:48<02:16,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6899:  44%|████▍     | 178/400 [01:49<02:14,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6886:  45%|████▍     | 179/400 [01:50<02:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6821:  45%|████▌     | 180/400 [01:50<02:12,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6784:  45%|████▌     | 181/400 [01:51<02:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6748:  46%|████▌     | 182/400 [01:51<02:12,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6711:  46%|████▌     | 183/400 [01:52<02:13,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6699:  46%|████▌     | 184/400 [01:53<02:11,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6613:  46%|████▋     | 185/400 [01:53<02:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6482:  46%|████▋     | 186/400 [01:54<02:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6486:  47%|████▋     | 187/400 [01:54<02:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6533:  47%|████▋     | 188/400 [01:55<02:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6676:  47%|████▋     | 189/400 [01:56<02:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6642:  48%|████▊     | 190/400 [01:56<02:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.66:  48%|████▊     | 191/400 [01:57<02:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6546:  48%|████▊     | 192/400 [01:57<02:05,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.669:  48%|████▊     | 193/400 [01:58<02:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6566:  48%|████▊     | 194/400 [01:59<02:04,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6614:  49%|████▉     | 195/400 [01:59<02:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6528:  49%|████▉     | 196/400 [02:00<02:02,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6488:  49%|████▉     | 197/400 [02:00<02:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6495:  50%|████▉     | 198/400 [02:01<02:01,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6414:  50%|████▉     | 199/400 [02:02<02:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6352:  50%|█████     | 200/400 [02:02<02:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6361:  50%|█████     | 201/400 [02:03<02:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.627:  50%|█████     | 202/400 [02:03<01:59,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6353:  51%|█████     | 203/400 [02:04<02:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6298:  51%|█████     | 204/400 [02:05<01:58,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.626:  51%|█████▏    | 205/400 [02:05<01:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6212:  52%|█████▏    | 206/400 [02:06<01:57,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6117:  52%|█████▏    | 207/400 [02:07<01:59,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6114:  52%|█████▏    | 208/400 [02:07<01:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6016:  52%|█████▏    | 209/400 [02:08<01:58,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5901:  52%|█████▎    | 210/400 [02:08<01:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5895:  53%|█████▎    | 211/400 [02:09<01:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5905:  53%|█████▎    | 212/400 [02:10<01:54,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5923:  53%|█████▎    | 213/400 [02:10<01:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.576:  54%|█████▎    | 214/400 [02:11<01:52,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5782:  54%|█████▍    | 215/400 [02:11<01:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5676:  54%|█████▍    | 216/400 [02:12<01:51,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5774:  54%|█████▍    | 217/400 [02:13<01:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5827:  55%|█████▍    | 218/400 [02:13<01:50,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.581:  55%|█████▍    | 219/400 [02:14<01:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5742:  55%|█████▌    | 220/400 [02:15<01:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.6:  55%|█████▌    | 221/400 [02:15<01:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5994:  56%|█████▌    | 222/400 [02:16<01:47,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5898:  56%|█████▌    | 223/400 [02:16<01:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5761:  56%|█████▌    | 224/400 [02:17<01:46,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5726:  56%|█████▋    | 225/400 [02:18<01:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5637:  56%|█████▋    | 226/400 [02:18<01:44,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5657:  57%|█████▋    | 227/400 [02:19<01:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5604:  57%|█████▋    | 228/400 [02:19<01:44,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5529:  57%|█████▋    | 229/400 [02:20<01:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5545:  57%|█████▊    | 230/400 [02:21<01:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5582:  58%|█████▊    | 231/400 [02:21<01:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5587:  58%|█████▊    | 232/400 [02:22<01:41,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5624:  58%|█████▊    | 233/400 [02:22<01:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5579:  58%|█████▊    | 234/400 [02:23<01:40,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.558:  59%|█████▉    | 235/400 [02:24<01:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5519:  59%|█████▉    | 236/400 [02:24<01:39,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5599:  59%|█████▉    | 237/400 [02:25<01:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5638:  60%|█████▉    | 238/400 [02:25<01:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5588:  60%|█████▉    | 239/400 [02:26<01:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5672:  60%|██████    | 240/400 [02:27<01:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5675:  60%|██████    | 241/400 [02:27<01:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5595:  60%|██████    | 242/400 [02:28<01:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5598:  61%|██████    | 243/400 [02:29<01:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5558:  61%|██████    | 244/400 [02:29<01:34,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5508:  61%|██████▏   | 245/400 [02:30<01:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5503:  62%|██████▏   | 246/400 [02:30<01:33,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5423:  62%|██████▏   | 247/400 [02:31<01:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5453:  62%|██████▏   | 248/400 [02:32<01:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5352:  62%|██████▏   | 249/400 [02:32<01:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5223:  62%|██████▎   | 250/400 [02:33<01:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5179:  63%|██████▎   | 251/400 [02:33<01:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5282:  63%|██████▎   | 252/400 [02:34<01:29,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5288:  63%|██████▎   | 253/400 [02:35<01:29,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5171:  64%|██████▎   | 254/400 [02:35<01:28,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5128:  64%|██████▍   | 255/400 [02:36<01:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5111:  64%|██████▍   | 256/400 [02:36<01:26,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.515:  64%|██████▍   | 257/400 [02:37<01:26,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5132:  64%|██████▍   | 258/400 [02:38<01:25,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5107:  65%|██████▍   | 259/400 [02:38<01:25,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5083:  65%|██████▌   | 260/400 [02:39<01:24,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4994:  65%|██████▌   | 261/400 [02:39<01:24,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5032:  66%|██████▌   | 262/400 [02:40<01:23,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.495:  66%|██████▌   | 263/400 [02:41<01:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4882:  66%|██████▌   | 264/400 [02:41<01:22,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4908:  66%|██████▋   | 265/400 [02:42<01:22,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4859:  66%|██████▋   | 266/400 [02:42<01:20,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.488:  67%|██████▋   | 267/400 [02:43<01:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4847:  67%|██████▋   | 268/400 [02:44<01:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4857:  67%|██████▋   | 269/400 [02:44<01:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4861:  68%|██████▊   | 270/400 [02:45<01:18,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4949:  68%|██████▊   | 271/400 [02:46<01:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4956:  68%|██████▊   | 272/400 [02:46<01:17,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5096:  68%|██████▊   | 273/400 [02:47<01:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5079:  68%|██████▊   | 274/400 [02:47<01:16,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5129:  69%|██████▉   | 275/400 [02:48<01:16,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.5017:  69%|██████▉   | 276/400 [02:49<01:14,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4927:  69%|██████▉   | 277/400 [02:49<01:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4824:  70%|██████▉   | 278/400 [02:50<01:13,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4786:  70%|██████▉   | 279/400 [02:50<01:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4881:  70%|███████   | 280/400 [02:51<01:12,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4835:  70%|███████   | 281/400 [02:52<01:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4758:  70%|███████   | 282/400 [02:52<01:10,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4699:  71%|███████   | 283/400 [02:53<01:11,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4731:  71%|███████   | 284/400 [02:53<01:09,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4691:  71%|███████▏  | 285/400 [02:54<01:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4654:  72%|███████▏  | 286/400 [02:55<01:08,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4625:  72%|███████▏  | 287/400 [02:55<01:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4586:  72%|███████▏  | 288/400 [02:56<01:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4582:  72%|███████▏  | 289/400 [02:56<01:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4476:  72%|███████▎  | 290/400 [02:57<01:06,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4463:  73%|███████▎  | 291/400 [02:58<01:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4475:  73%|███████▎  | 292/400 [02:58<01:05,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4567:  73%|███████▎  | 293/400 [02:59<01:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4661:  74%|███████▎  | 294/400 [03:00<01:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4621:  74%|███████▍  | 295/400 [03:00<01:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4511:  74%|███████▍  | 296/400 [03:01<01:02,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4463:  74%|███████▍  | 297/400 [03:01<01:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4382:  74%|███████▍  | 298/400 [03:02<01:01,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.435:  75%|███████▍  | 299/400 [03:03<01:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4397:  75%|███████▌  | 300/400 [03:03<01:00,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4306:  75%|███████▌  | 301/400 [03:04<01:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4234:  76%|███████▌  | 302/400 [03:04<00:59,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.414:  76%|███████▌  | 303/400 [03:05<00:58,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4005:  76%|███████▌  | 304/400 [03:06<00:57,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.4006:  76%|███████▋  | 305/400 [03:06<00:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3995:  76%|███████▋  | 306/400 [03:07<00:56,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3977:  77%|███████▋  | 307/400 [03:07<00:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3906:  77%|███████▋  | 308/400 [03:08<00:55,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3868:  77%|███████▋  | 309/400 [03:09<00:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3881:  78%|███████▊  | 310/400 [03:09<00:54,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3976:  78%|███████▊  | 311/400 [03:10<00:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3979:  78%|███████▊  | 312/400 [03:10<00:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3896:  78%|███████▊  | 313/400 [03:11<00:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3822:  78%|███████▊  | 314/400 [03:12<00:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3785:  79%|███████▉  | 315/400 [03:12<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.367:  79%|███████▉  | 316/400 [03:13<00:50,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3635:  79%|███████▉  | 317/400 [03:13<00:50,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3707:  80%|███████▉  | 318/400 [03:14<00:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3681:  80%|███████▉  | 319/400 [03:15<00:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3705:  80%|████████  | 320/400 [03:15<00:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3661:  80%|████████  | 321/400 [03:16<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3658:  80%|████████  | 322/400 [03:17<00:47,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3585:  81%|████████  | 323/400 [03:17<00:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3554:  81%|████████  | 324/400 [03:18<00:45,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3589:  81%|████████▏ | 325/400 [03:18<00:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3505:  82%|████████▏ | 326/400 [03:19<00:44,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3441:  82%|████████▏ | 327/400 [03:20<00:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3446:  82%|████████▏ | 328/400 [03:20<00:43,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3385:  82%|████████▏ | 329/400 [03:21<00:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3319:  82%|████████▎ | 330/400 [03:21<00:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3243:  83%|████████▎ | 331/400 [03:22<00:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3262:  83%|████████▎ | 332/400 [03:23<00:41,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3224:  83%|████████▎ | 333/400 [03:23<00:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3245:  84%|████████▎ | 334/400 [03:24<00:39,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3306:  84%|████████▍ | 335/400 [03:24<00:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.326:  84%|████████▍ | 336/400 [03:25<00:38,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3144:  84%|████████▍ | 337/400 [03:26<00:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3216:  84%|████████▍ | 338/400 [03:26<00:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.326:  85%|████████▍ | 339/400 [03:27<00:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3189:  85%|████████▌ | 340/400 [03:27<00:36,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3221:  85%|████████▌ | 341/400 [03:28<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3217:  86%|████████▌ | 342/400 [03:29<00:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3228:  86%|████████▌ | 343/400 [03:29<00:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3183:  86%|████████▌ | 344/400 [03:30<00:33,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3094:  86%|████████▋ | 345/400 [03:31<00:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3113:  86%|████████▋ | 346/400 [03:31<00:32,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3068:  87%|████████▋ | 347/400 [03:32<00:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3091:  87%|████████▋ | 348/400 [03:32<00:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3101:  87%|████████▋ | 349/400 [03:33<00:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3051:  88%|████████▊ | 350/400 [03:34<00:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3148:  88%|████████▊ | 351/400 [03:34<00:29,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3149:  88%|████████▊ | 352/400 [03:35<00:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3056:  88%|████████▊ | 353/400 [03:35<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2967:  88%|████████▊ | 354/400 [03:36<00:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2903:  89%|████████▉ | 355/400 [03:37<00:27,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2966:  89%|████████▉ | 356/400 [03:37<00:26,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3095:  89%|████████▉ | 357/400 [03:38<00:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3165:  90%|████████▉ | 358/400 [03:38<00:25,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3219:  90%|████████▉ | 359/400 [03:39<00:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3182:  90%|█████████ | 360/400 [03:40<00:24,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3218:  90%|█████████ | 361/400 [03:40<00:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3285:  90%|█████████ | 362/400 [03:41<00:22,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3322:  91%|█████████ | 363/400 [03:41<00:22,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3286:  91%|█████████ | 364/400 [03:42<00:21,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3205:  91%|█████████▏| 365/400 [03:43<00:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3191:  92%|█████████▏| 366/400 [03:43<00:20,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3063:  92%|█████████▏| 367/400 [03:44<00:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.3007:  92%|█████████▏| 368/400 [03:44<00:19,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2926:  92%|█████████▏| 369/400 [03:45<00:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.279:  92%|█████████▎| 370/400 [03:46<00:18,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2719:  93%|█████████▎| 371/400 [03:46<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2921:  93%|█████████▎| 372/400 [03:47<00:16,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2946:  93%|█████████▎| 373/400 [03:48<00:16,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2849:  94%|█████████▎| 374/400 [03:48<00:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2787:  94%|█████████▍| 375/400 [03:49<00:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2821:  94%|█████████▍| 376/400 [03:49<00:14,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2894:  94%|█████████▍| 377/400 [03:50<00:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2937:  94%|█████████▍| 378/400 [03:51<00:13,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.295:  95%|█████████▍| 379/400 [03:51<00:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2997:  95%|█████████▌| 380/400 [03:52<00:12,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2997:  95%|█████████▌| 381/400 [03:52<00:11,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2945:  96%|█████████▌| 382/400 [03:53<00:10,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2809:  96%|█████████▌| 383/400 [03:54<00:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.288:  96%|█████████▌| 384/400 [03:54<00:09,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2796:  96%|█████████▋| 385/400 [03:55<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2958:  96%|█████████▋| 386/400 [03:55<00:08,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2871:  97%|█████████▋| 387/400 [03:56<00:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2867:  97%|█████████▋| 388/400 [03:57<00:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2748:  97%|█████████▋| 389/400 [03:57<00:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2773:  98%|█████████▊| 390/400 [03:58<00:06,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2756:  98%|█████████▊| 391/400 [03:58<00:05,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2702:  98%|█████████▊| 392/400 [03:59<00:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2563:  98%|█████████▊| 393/400 [04:00<00:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2626:  98%|█████████▊| 394/400 [04:00<00:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2702:  99%|█████████▉| 395/400 [04:01<00:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2658:  99%|█████████▉| 396/400 [04:02<00:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2708:  99%|█████████▉| 397/400 [04:02<00:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.271: 100%|█████████▉| 398/400 [04:03<00:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2771: 100%|█████████▉| 399/400 [04:03<00:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 1, train loss: 2.2742: 100%|██████████| 400/400 [04:04<00:00,  1.64it/s]
epoch: 1, valid loss: 3.3736:   1%|          | 2/201 [00:00<00:25,  7.81it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.3672:   2%|▏         | 4/201 [00:00<00:25,  7.81it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.342:   3%|▎         | 6/201 [00:00<00:25,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.3001:   4%|▍         | 8/201 [00:01<00:24,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.2822:   5%|▍         | 10/201 [00:01<00:24,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.2537:   6%|▌         | 12/201 [00:01<00:24,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.2289:   7%|▋         | 14/201 [00:01<00:24,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.2328:   8%|▊         | 16/201 [00:02<00:23,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.212:   9%|▉         | 18/201 [00:02<00:23,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.1968:  10%|▉         | 20/201 [00:02<00:23,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.1488:  11%|█         | 22/201 [00:02<00:22,  7.82it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.129:  12%|█▏        | 24/201 [00:03<00:23,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.0982:  13%|█▎        | 26/201 [00:03<00:22,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.0725:  14%|█▍        | 28/201 [00:03<00:22,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.0548:  15%|█▍        | 30/201 [00:03<00:22,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 3.0204:  16%|█▌        | 32/201 [00:04<00:21,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.9883:  17%|█▋        | 34/201 [00:04<00:21,  7.80it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.9555:  18%|█▊        | 36/201 [00:04<00:21,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.9391:  19%|█▉        | 38/201 [00:04<00:20,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.9289:  20%|█▉        | 40/201 [00:05<00:20,  7.76it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.901:  21%|██        | 42/201 [00:05<00:20,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.891:  22%|██▏       | 44/201 [00:05<00:20,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.8702:  23%|██▎       | 46/201 [00:05<00:20,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.8385:  24%|██▍       | 48/201 [00:06<00:19,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.8082:  25%|██▍       | 50/201 [00:06<00:19,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7803:  26%|██▌       | 52/201 [00:06<00:19,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7793:  27%|██▋       | 54/201 [00:06<00:18,  7.80it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7607:  28%|██▊       | 56/201 [00:07<00:18,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7724:  29%|██▉       | 58/201 [00:07<00:18,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7723:  30%|██▉       | 60/201 [00:07<00:18,  7.82it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7482:  31%|███       | 62/201 [00:08<00:17,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7408:  32%|███▏      | 64/201 [00:08<00:17,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7389:  33%|███▎      | 66/201 [00:08<00:17,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7239:  34%|███▍      | 68/201 [00:08<00:17,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7136:  35%|███▍      | 70/201 [00:09<00:18,  7.27it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.7044:  36%|███▌      | 72/201 [00:09<00:17,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.6823:  37%|███▋      | 74/201 [00:09<00:17,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.6534:  38%|███▊      | 76/201 [00:09<00:16,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.6557:  39%|███▉      | 78/201 [00:10<00:16,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.627:  40%|███▉      | 80/201 [00:10<00:15,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.6125:  41%|████      | 82/201 [00:10<00:15,  7.76it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.6248:  42%|████▏     | 84/201 [00:10<00:15,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.6072:  43%|████▎     | 86/201 [00:11<00:14,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.607:  44%|████▍     | 88/201 [00:11<00:14,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.5976:  45%|████▍     | 90/201 [00:11<00:14,  7.81it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.5749:  46%|████▌     | 92/201 [00:11<00:13,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.5757:  47%|████▋     | 94/201 [00:12<00:13,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.5576:  48%|████▊     | 96/201 [00:12<00:13,  7.80it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.5405:  49%|████▉     | 98/201 [00:12<00:13,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.5275:  50%|████▉     | 100/201 [00:12<00:12,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.5027:  51%|█████     | 102/201 [00:13<00:12,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.5029:  52%|█████▏    | 104/201 [00:13<00:12,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.503:  53%|█████▎    | 106/201 [00:13<00:12,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4923:  54%|█████▎    | 108/201 [00:14<00:12,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4854:  55%|█████▍    | 110/201 [00:14<00:11,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4721:  56%|█████▌    | 112/201 [00:14<00:11,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.481:  57%|█████▋    | 114/201 [00:14<00:11,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4866:  58%|█████▊    | 116/201 [00:15<00:10,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4732:  59%|█████▊    | 118/201 [00:15<00:10,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4537:  60%|█████▉    | 120/201 [00:15<00:10,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4535:  61%|██████    | 122/201 [00:15<00:10,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4417:  62%|██████▏   | 124/201 [00:16<00:09,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4422:  63%|██████▎   | 126/201 [00:16<00:09,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4585:  64%|██████▎   | 128/201 [00:16<00:09,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4414:  65%|██████▍   | 130/201 [00:16<00:09,  7.80it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4345:  66%|██████▌   | 132/201 [00:17<00:08,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4302:  67%|██████▋   | 134/201 [00:17<00:08,  7.76it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4254:  68%|██████▊   | 136/201 [00:17<00:08,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.4209:  69%|██████▊   | 138/201 [00:17<00:08,  7.80it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.419:  70%|██████▉   | 140/201 [00:18<00:07,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3957:  71%|███████   | 142/201 [00:18<00:07,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3981:  72%|███████▏  | 144/201 [00:18<00:07,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3861:  73%|███████▎  | 146/201 [00:18<00:07,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3631:  74%|███████▎  | 148/201 [00:19<00:06,  7.80it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3572:  75%|███████▍  | 150/201 [00:19<00:06,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3518:  76%|███████▌  | 152/201 [00:19<00:06,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.357:  77%|███████▋  | 154/201 [00:19<00:06,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3592:  78%|███████▊  | 156/201 [00:20<00:05,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3516:  79%|███████▊  | 158/201 [00:20<00:05,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3806:  80%|███████▉  | 160/201 [00:20<00:05,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3685:  81%|████████  | 162/201 [00:21<00:05,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.348:  82%|████████▏ | 164/201 [00:21<00:04,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3431:  83%|████████▎ | 166/201 [00:21<00:04,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3242:  84%|████████▎ | 168/201 [00:21<00:04,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3175:  85%|████████▍ | 170/201 [00:22<00:04,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.3054:  86%|████████▌ | 172/201 [00:22<00:03,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2901:  87%|████████▋ | 174/201 [00:22<00:03,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.274:  88%|████████▊ | 176/201 [00:22<00:03,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2762:  89%|████████▊ | 178/201 [00:23<00:03,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2915:  90%|████████▉ | 180/201 [00:23<00:02,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2773:  91%|█████████ | 182/201 [00:23<00:02,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2681:  92%|█████████▏| 184/201 [00:23<00:02,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2634:  93%|█████████▎| 186/201 [00:24<00:01,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2554:  94%|█████████▎| 188/201 [00:24<00:01,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2492:  95%|█████████▍| 190/201 [00:24<00:01,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2401:  96%|█████████▌| 192/201 [00:24<00:01,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.237:  97%|█████████▋| 194/201 [00:25<00:00,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2464:  98%|█████████▊| 196/201 [00:25<00:00,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2574:  99%|█████████▊| 198/201 [00:25<00:00,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2717: 100%|█████████▉| 200/201 [00:26<00:00,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 1, valid loss: 2.2866: 100%|██████████| 201/201 [00:26<00:00,  7.69it/s]


torch.Size([2, 5])
torch.Size([2, 6])
[{'results_1': [{'prediction': 3, 'label': 0}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 4}, {'prediction': 0, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 1}, {'prediction': 1, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 0, 'label': 0}, {'prediction': 0, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 0, 'label': 4}]}, {'results_1': [{'predicti

  0%|          | 0/400 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7258:   0%|          | 1/400 [00:00<04:09,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7245:   0%|          | 2/400 [00:01<04:00,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7369:   1%|          | 3/400 [00:01<04:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7413:   1%|          | 4/400 [00:02<03:58,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7599:   1%|▏         | 5/400 [00:03<04:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7672:   2%|▏         | 6/400 [00:03<03:57,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7656:   2%|▏         | 7/400 [00:04<03:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7677:   2%|▏         | 8/400 [00:04<03:55,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7682:   2%|▏         | 9/400 [00:05<03:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7783:   2%|▎         | 10/400 [00:06<03:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7877:   3%|▎         | 11/400 [00:06<03:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.797:   3%|▎         | 12/400 [00:07<03:54,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7924:   3%|▎         | 13/400 [00:07<03:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7917:   4%|▎         | 14/400 [00:08<03:53,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7817:   4%|▍         | 15/400 [00:09<03:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7815:   4%|▍         | 16/400 [00:09<03:52,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8007:   4%|▍         | 17/400 [00:10<03:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7991:   4%|▍         | 18/400 [00:10<03:52,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7971:   5%|▍         | 19/400 [00:11<03:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7924:   5%|▌         | 20/400 [00:12<03:50,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8:   5%|▌         | 21/400 [00:12<03:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8041:   6%|▌         | 22/400 [00:13<03:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8054:   6%|▌         | 23/400 [00:14<03:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8133:   6%|▌         | 24/400 [00:14<03:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8087:   6%|▋         | 25/400 [00:15<03:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8062:   6%|▋         | 26/400 [00:15<03:46,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.799:   7%|▋         | 27/400 [00:16<03:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7895:   7%|▋         | 28/400 [00:17<03:45,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7911:   7%|▋         | 29/400 [00:17<03:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7956:   8%|▊         | 30/400 [00:18<03:43,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7948:   8%|▊         | 31/400 [00:18<03:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7944:   8%|▊         | 32/400 [00:19<03:42,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7949:   8%|▊         | 33/400 [00:20<03:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7926:   8%|▊         | 34/400 [00:20<03:41,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7879:   9%|▉         | 35/400 [00:21<03:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8024:   9%|▉         | 36/400 [00:21<03:40,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7935:   9%|▉         | 37/400 [00:22<03:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8021:  10%|▉         | 38/400 [00:23<03:39,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8094:  10%|▉         | 39/400 [00:23<03:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8138:  10%|█         | 40/400 [00:24<03:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.817:  10%|█         | 41/400 [00:24<03:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8117:  10%|█         | 42/400 [00:25<03:36,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8173:  11%|█         | 43/400 [00:26<03:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8142:  11%|█         | 44/400 [00:26<03:34,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8155:  11%|█▏        | 45/400 [00:27<03:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8113:  12%|█▏        | 46/400 [00:27<03:33,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8069:  12%|█▏        | 47/400 [00:28<03:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.7998:  12%|█▏        | 48/400 [00:29<03:33,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8035:  12%|█▏        | 49/400 [00:29<03:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8094:  12%|█▎        | 50/400 [00:30<03:32,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8148:  13%|█▎        | 51/400 [00:31<03:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8287:  13%|█▎        | 52/400 [00:31<03:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8335:  13%|█▎        | 53/400 [00:32<03:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8306:  14%|█▎        | 54/400 [00:32<03:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.829:  14%|█▍        | 55/400 [00:33<03:34,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8247:  14%|█▍        | 56/400 [00:34<03:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8238:  14%|█▍        | 57/400 [00:34<03:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8267:  14%|█▍        | 58/400 [00:35<03:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8411:  15%|█▍        | 59/400 [00:35<03:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8333:  15%|█▌        | 60/400 [00:36<03:25,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8363:  15%|█▌        | 61/400 [00:37<03:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8424:  16%|█▌        | 62/400 [00:37<03:23,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8427:  16%|█▌        | 63/400 [00:38<03:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8341:  16%|█▌        | 64/400 [00:38<03:22,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8466:  16%|█▋        | 65/400 [00:39<03:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8422:  16%|█▋        | 66/400 [00:40<03:21,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8504:  17%|█▋        | 67/400 [00:40<03:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8718:  17%|█▋        | 68/400 [00:41<03:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8768:  17%|█▋        | 69/400 [00:42<03:22,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.875:  18%|█▊        | 70/400 [00:42<03:19,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8764:  18%|█▊        | 71/400 [00:43<03:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8806:  18%|█▊        | 72/400 [00:43<03:18,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8846:  18%|█▊        | 73/400 [00:44<03:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8855:  18%|█▊        | 74/400 [00:45<03:16,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8788:  19%|█▉        | 75/400 [00:45<03:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8772:  19%|█▉        | 76/400 [00:46<03:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8794:  19%|█▉        | 77/400 [00:46<03:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8818:  20%|█▉        | 78/400 [00:47<03:16,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.891:  20%|█▉        | 79/400 [00:48<03:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8992:  20%|██        | 80/400 [00:48<03:13,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9017:  20%|██        | 81/400 [00:49<03:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8947:  20%|██        | 82/400 [00:49<03:12,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8888:  21%|██        | 83/400 [00:50<03:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8893:  21%|██        | 84/400 [00:51<03:11,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8946:  21%|██▏       | 85/400 [00:51<03:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9096:  22%|██▏       | 86/400 [00:52<03:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9198:  22%|██▏       | 87/400 [00:52<03:11,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9269:  22%|██▏       | 88/400 [00:53<03:08,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9261:  22%|██▏       | 89/400 [00:54<03:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9178:  22%|██▎       | 90/400 [00:54<03:06,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9237:  23%|██▎       | 91/400 [00:55<03:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9244:  23%|██▎       | 92/400 [00:55<03:05,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9181:  23%|██▎       | 93/400 [00:56<03:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9173:  24%|██▎       | 94/400 [00:57<03:04,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9093:  24%|██▍       | 95/400 [00:57<03:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9212:  24%|██▍       | 96/400 [00:58<03:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9253:  24%|██▍       | 97/400 [00:59<03:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9415:  24%|██▍       | 98/400 [00:59<03:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9444:  25%|██▍       | 99/400 [01:00<03:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9342:  25%|██▌       | 100/400 [01:00<03:00,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9381:  25%|██▌       | 101/400 [01:01<03:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9338:  26%|██▌       | 102/400 [01:02<02:59,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.926:  26%|██▌       | 103/400 [01:02<02:59,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9138:  26%|██▌       | 104/400 [01:03<02:59,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9137:  26%|██▋       | 105/400 [01:03<03:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9038:  26%|██▋       | 106/400 [01:04<02:58,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8956:  27%|██▋       | 107/400 [01:05<02:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8931:  27%|██▋       | 108/400 [01:05<02:56,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8975:  27%|██▋       | 109/400 [01:06<02:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8977:  28%|██▊       | 110/400 [01:06<02:54,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9058:  28%|██▊       | 111/400 [01:07<02:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.904:  28%|██▊       | 112/400 [01:08<02:53,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9037:  28%|██▊       | 113/400 [01:08<02:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9059:  28%|██▊       | 114/400 [01:09<02:52,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9015:  29%|██▉       | 115/400 [01:09<02:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8959:  29%|██▉       | 116/400 [01:10<02:51,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9019:  29%|██▉       | 117/400 [01:11<02:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8962:  30%|██▉       | 118/400 [01:11<02:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9022:  30%|██▉       | 119/400 [01:12<02:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9024:  30%|███       | 120/400 [01:13<02:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9087:  30%|███       | 121/400 [01:13<02:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9219:  30%|███       | 122/400 [01:14<02:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9232:  31%|███       | 123/400 [01:14<02:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9257:  31%|███       | 124/400 [01:15<02:47,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9292:  31%|███▏      | 125/400 [01:16<02:47,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9295:  32%|███▏      | 126/400 [01:16<02:45,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9291:  32%|███▏      | 127/400 [01:17<02:45,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9176:  32%|███▏      | 128/400 [01:17<02:43,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9252:  32%|███▏      | 129/400 [01:18<02:44,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.928:  32%|███▎      | 130/400 [01:19<02:42,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.931:  33%|███▎      | 131/400 [01:19<02:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.933:  33%|███▎      | 132/400 [01:20<02:40,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9409:  33%|███▎      | 133/400 [01:20<02:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9329:  34%|███▎      | 134/400 [01:21<02:40,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9409:  34%|███▍      | 135/400 [01:22<02:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9398:  34%|███▍      | 136/400 [01:22<02:39,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9336:  34%|███▍      | 137/400 [01:23<02:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9336:  34%|███▍      | 138/400 [01:23<02:39,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9293:  35%|███▍      | 139/400 [01:24<02:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.933:  35%|███▌      | 140/400 [01:25<02:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9355:  35%|███▌      | 141/400 [01:25<02:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9325:  36%|███▌      | 142/400 [01:26<02:36,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.926:  36%|███▌      | 143/400 [01:27<02:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9161:  36%|███▌      | 144/400 [01:27<02:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9095:  36%|███▋      | 145/400 [01:28<02:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9072:  36%|███▋      | 146/400 [01:28<02:33,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9147:  37%|███▋      | 147/400 [01:29<02:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9152:  37%|███▋      | 148/400 [01:30<02:31,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9105:  37%|███▋      | 149/400 [01:30<02:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9059:  38%|███▊      | 150/400 [01:31<02:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9089:  38%|███▊      | 151/400 [01:31<02:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9047:  38%|███▊      | 152/400 [01:32<02:29,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.896:  38%|███▊      | 153/400 [01:33<02:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8838:  38%|███▊      | 154/400 [01:33<02:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8846:  39%|███▉      | 155/400 [01:34<02:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8775:  39%|███▉      | 156/400 [01:34<02:27,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8824:  39%|███▉      | 157/400 [01:35<02:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8693:  40%|███▉      | 158/400 [01:36<02:26,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.866:  40%|███▉      | 159/400 [01:36<02:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8684:  40%|████      | 160/400 [01:37<02:25,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8683:  40%|████      | 161/400 [01:37<02:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8709:  40%|████      | 162/400 [01:38<02:24,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8715:  41%|████      | 163/400 [01:39<02:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8738:  41%|████      | 164/400 [01:39<02:22,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8673:  41%|████▏     | 165/400 [01:40<02:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8699:  42%|████▏     | 166/400 [01:40<02:21,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8624:  42%|████▏     | 167/400 [01:41<02:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8674:  42%|████▏     | 168/400 [01:42<02:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8685:  42%|████▏     | 169/400 [01:42<02:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8779:  42%|████▎     | 170/400 [01:43<02:19,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8785:  43%|████▎     | 171/400 [01:44<02:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.876:  43%|████▎     | 172/400 [01:44<02:18,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8685:  43%|████▎     | 173/400 [01:45<02:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8769:  44%|████▎     | 174/400 [01:45<02:15,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8744:  44%|████▍     | 175/400 [01:46<02:16,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8768:  44%|████▍     | 176/400 [01:47<02:15,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8717:  44%|████▍     | 177/400 [01:47<02:16,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8687:  44%|████▍     | 178/400 [01:48<02:13,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8697:  45%|████▍     | 179/400 [01:48<02:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8846:  45%|████▌     | 180/400 [01:49<02:13,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8953:  45%|████▌     | 181/400 [01:50<02:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8984:  46%|████▌     | 182/400 [01:50<02:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.892:  46%|████▌     | 183/400 [01:51<02:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.888:  46%|████▌     | 184/400 [01:51<02:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.887:  46%|████▋     | 185/400 [01:52<02:11,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8871:  46%|████▋     | 186/400 [01:53<02:09,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8752:  47%|████▋     | 187/400 [01:53<02:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8837:  47%|████▋     | 188/400 [01:54<02:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8848:  47%|████▋     | 189/400 [01:54<02:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8967:  48%|████▊     | 190/400 [01:55<02:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8948:  48%|████▊     | 191/400 [01:56<02:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8953:  48%|████▊     | 192/400 [01:56<02:06,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9036:  48%|████▊     | 193/400 [01:57<02:09,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8984:  48%|████▊     | 194/400 [01:58<02:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8914:  49%|████▉     | 195/400 [01:58<02:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8934:  49%|████▉     | 196/400 [01:59<02:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9009:  49%|████▉     | 197/400 [01:59<02:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9149:  50%|████▉     | 198/400 [02:00<02:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9221:  50%|████▉     | 199/400 [02:01<02:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9163:  50%|█████     | 200/400 [02:01<02:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9161:  50%|█████     | 201/400 [02:02<02:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9143:  50%|█████     | 202/400 [02:02<02:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9172:  51%|█████     | 203/400 [02:03<02:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9264:  51%|█████     | 204/400 [02:04<01:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9239:  51%|█████▏    | 205/400 [02:04<02:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9238:  52%|█████▏    | 206/400 [02:05<01:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9249:  52%|█████▏    | 207/400 [02:06<01:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9347:  52%|█████▏    | 208/400 [02:06<01:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9343:  52%|█████▏    | 209/400 [02:07<01:58,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.929:  52%|█████▎    | 210/400 [02:07<01:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9251:  53%|█████▎    | 211/400 [02:08<01:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9237:  53%|█████▎    | 212/400 [02:09<01:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9304:  53%|█████▎    | 213/400 [02:09<01:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9297:  54%|█████▎    | 214/400 [02:10<01:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9188:  54%|█████▍    | 215/400 [02:11<01:55,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9203:  54%|█████▍    | 216/400 [02:11<01:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9215:  54%|█████▍    | 217/400 [02:12<01:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9122:  55%|█████▍    | 218/400 [02:12<01:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9035:  55%|█████▍    | 219/400 [02:13<01:56,  1.56it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9146:  55%|█████▌    | 220/400 [02:14<01:54,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9147:  55%|█████▌    | 221/400 [02:14<01:54,  1.56it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9304:  56%|█████▌    | 222/400 [02:15<01:52,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9256:  56%|█████▌    | 223/400 [02:16<01:51,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.929:  56%|█████▌    | 224/400 [02:16<01:49,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9291:  56%|█████▋    | 225/400 [02:17<01:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9435:  56%|█████▋    | 226/400 [02:17<01:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9509:  57%|█████▋    | 227/400 [02:18<01:48,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9552:  57%|█████▋    | 228/400 [02:19<01:46,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9644:  57%|█████▋    | 229/400 [02:19<01:47,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9692:  57%|█████▊    | 230/400 [02:20<01:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9753:  58%|█████▊    | 231/400 [02:21<01:46,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.97:  58%|█████▊    | 232/400 [02:21<01:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9661:  58%|█████▊    | 233/400 [02:22<01:43,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9705:  58%|█████▊    | 234/400 [02:22<01:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9688:  59%|█████▉    | 235/400 [02:23<01:43,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9681:  59%|█████▉    | 236/400 [02:24<01:41,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9663:  59%|█████▉    | 237/400 [02:24<01:41,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9596:  60%|█████▉    | 238/400 [02:25<01:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9548:  60%|█████▉    | 239/400 [02:25<01:39,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9621:  60%|██████    | 240/400 [02:26<01:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.975:  60%|██████    | 241/400 [02:27<01:39,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9721:  60%|██████    | 242/400 [02:27<01:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9711:  61%|██████    | 243/400 [02:28<01:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9717:  61%|██████    | 244/400 [02:29<01:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9688:  61%|██████▏   | 245/400 [02:29<01:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9754:  62%|██████▏   | 246/400 [02:30<01:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9843:  62%|██████▏   | 247/400 [02:30<01:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9769:  62%|██████▏   | 248/400 [02:31<01:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.981:  62%|██████▏   | 249/400 [02:32<01:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.991:  62%|██████▎   | 250/400 [02:32<01:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9867:  63%|██████▎   | 251/400 [02:33<01:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9853:  63%|██████▎   | 252/400 [02:33<01:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9888:  63%|██████▎   | 253/400 [02:34<01:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9973:  64%|██████▎   | 254/400 [02:35<01:28,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9842:  64%|██████▍   | 255/400 [02:35<01:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9917:  64%|██████▍   | 256/400 [02:36<01:27,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9872:  64%|██████▍   | 257/400 [02:37<01:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9826:  64%|██████▍   | 258/400 [02:37<01:28,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9851:  65%|██████▍   | 259/400 [02:38<01:28,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9888:  65%|██████▌   | 260/400 [02:38<01:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9838:  65%|██████▌   | 261/400 [02:39<01:26,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9834:  66%|██████▌   | 262/400 [02:40<01:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9785:  66%|██████▌   | 263/400 [02:40<01:25,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9778:  66%|██████▌   | 264/400 [02:41<01:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9675:  66%|██████▋   | 265/400 [02:42<01:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9628:  66%|██████▋   | 266/400 [02:42<01:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9681:  67%|██████▋   | 267/400 [02:43<01:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9611:  67%|██████▋   | 268/400 [02:43<01:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.959:  67%|██████▋   | 269/400 [02:44<01:21,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9626:  68%|██████▊   | 270/400 [02:45<01:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9688:  68%|██████▊   | 271/400 [02:45<01:20,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9666:  68%|██████▊   | 272/400 [02:46<01:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9741:  68%|██████▊   | 273/400 [02:46<01:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9661:  68%|██████▊   | 274/400 [02:47<01:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9732:  69%|██████▉   | 275/400 [02:48<01:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9598:  69%|██████▉   | 276/400 [02:48<01:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9584:  69%|██████▉   | 277/400 [02:49<01:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9528:  70%|██████▉   | 278/400 [02:49<01:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9537:  70%|██████▉   | 279/400 [02:50<01:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9493:  70%|███████   | 280/400 [02:51<01:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9658:  70%|███████   | 281/400 [02:51<01:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9623:  70%|███████   | 282/400 [02:52<01:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9625:  71%|███████   | 283/400 [02:53<01:12,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9611:  71%|███████   | 284/400 [02:53<01:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9763:  71%|███████▏  | 285/400 [02:54<01:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9661:  72%|███████▏  | 286/400 [02:54<01:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9659:  72%|███████▏  | 287/400 [02:55<01:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9782:  72%|███████▏  | 288/400 [02:56<01:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9947:  72%|███████▏  | 289/400 [02:56<01:09,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9917:  72%|███████▎  | 290/400 [02:57<01:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9955:  73%|███████▎  | 291/400 [02:58<01:09,  1.57it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.987:  73%|███████▎  | 292/400 [02:58<01:07,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9837:  73%|███████▎  | 293/400 [02:59<01:07,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9904:  74%|███████▎  | 294/400 [02:59<01:06,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9946:  74%|███████▍  | 295/400 [03:00<01:06,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9969:  74%|███████▍  | 296/400 [03:01<01:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.991:  74%|███████▍  | 297/400 [03:01<01:04,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9833:  74%|███████▍  | 298/400 [03:02<01:02,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9734:  75%|███████▍  | 299/400 [03:03<01:03,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9716:  75%|███████▌  | 300/400 [03:03<01:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9656:  75%|███████▌  | 301/400 [03:04<01:01,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9648:  76%|███████▌  | 302/400 [03:04<01:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9647:  76%|███████▌  | 303/400 [03:05<01:00,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.959:  76%|███████▌  | 304/400 [03:06<00:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9445:  76%|███████▋  | 305/400 [03:06<00:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9599:  76%|███████▋  | 306/400 [03:07<00:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9502:  77%|███████▋  | 307/400 [03:08<00:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9458:  77%|███████▋  | 308/400 [03:08<00:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9366:  77%|███████▋  | 309/400 [03:09<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9276:  78%|███████▊  | 310/400 [03:09<00:54,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9139:  78%|███████▊  | 311/400 [03:10<00:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9203:  78%|███████▊  | 312/400 [03:11<00:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9115:  78%|███████▊  | 313/400 [03:11<00:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9088:  78%|███████▊  | 314/400 [03:12<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9094:  79%|███████▉  | 315/400 [03:12<00:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.912:  79%|███████▉  | 316/400 [03:13<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.916:  79%|███████▉  | 317/400 [03:14<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9138:  80%|███████▉  | 318/400 [03:14<00:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9111:  80%|███████▉  | 319/400 [03:15<00:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9062:  80%|████████  | 320/400 [03:15<00:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9025:  80%|████████  | 321/400 [03:16<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8952:  80%|████████  | 322/400 [03:17<00:47,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9007:  81%|████████  | 323/400 [03:17<00:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.8915:  81%|████████  | 324/400 [03:18<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9117:  81%|████████▏ | 325/400 [03:19<00:46,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9132:  82%|████████▏ | 326/400 [03:19<00:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9132:  82%|████████▏ | 327/400 [03:20<00:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9078:  82%|████████▏ | 328/400 [03:20<00:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9116:  82%|████████▏ | 329/400 [03:21<00:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9273:  82%|████████▎ | 330/400 [03:22<00:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9333:  83%|████████▎ | 331/400 [03:22<00:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.939:  83%|████████▎ | 332/400 [03:23<00:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.929:  83%|████████▎ | 333/400 [03:23<00:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.93:  84%|████████▎ | 334/400 [03:24<00:40,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9226:  84%|████████▍ | 335/400 [03:25<00:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9277:  84%|████████▍ | 336/400 [03:25<00:38,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9184:  84%|████████▍ | 337/400 [03:26<00:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9259:  84%|████████▍ | 338/400 [03:26<00:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9243:  85%|████████▍ | 339/400 [03:27<00:38,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9232:  85%|████████▌ | 340/400 [03:28<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9237:  85%|████████▌ | 341/400 [03:28<00:37,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9229:  86%|████████▌ | 342/400 [03:29<00:36,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9215:  86%|████████▌ | 343/400 [03:30<00:35,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9209:  86%|████████▌ | 344/400 [03:30<00:34,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9293:  86%|████████▋ | 345/400 [03:31<00:35,  1.56it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9275:  86%|████████▋ | 346/400 [03:32<00:33,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9288:  87%|████████▋ | 347/400 [03:32<00:33,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9346:  87%|████████▋ | 348/400 [03:33<00:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9337:  87%|████████▋ | 349/400 [03:33<00:31,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9377:  88%|████████▊ | 350/400 [03:34<00:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9275:  88%|████████▊ | 351/400 [03:35<00:30,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9274:  88%|████████▊ | 352/400 [03:35<00:29,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9208:  88%|████████▊ | 353/400 [03:36<00:29,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9264:  88%|████████▊ | 354/400 [03:36<00:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.937:  89%|████████▉ | 355/400 [03:37<00:28,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.935:  89%|████████▉ | 356/400 [03:38<00:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9282:  89%|████████▉ | 357/400 [03:38<00:26,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9286:  90%|████████▉ | 358/400 [03:39<00:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9216:  90%|████████▉ | 359/400 [03:40<00:25,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9208:  90%|█████████ | 360/400 [03:40<00:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9328:  90%|█████████ | 361/400 [03:41<00:24,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9271:  90%|█████████ | 362/400 [03:41<00:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9188:  91%|█████████ | 363/400 [03:42<00:23,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9229:  91%|█████████ | 364/400 [03:43<00:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9245:  91%|█████████▏| 365/400 [03:43<00:22,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9166:  92%|█████████▏| 366/400 [03:44<00:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9075:  92%|█████████▏| 367/400 [03:45<00:20,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9146:  92%|█████████▏| 368/400 [03:45<00:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9214:  92%|█████████▏| 369/400 [03:46<00:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9265:  92%|█████████▎| 370/400 [03:46<00:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9282:  93%|█████████▎| 371/400 [03:47<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9312:  93%|█████████▎| 372/400 [03:48<00:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9312:  93%|█████████▎| 373/400 [03:48<00:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9305:  94%|█████████▎| 374/400 [03:49<00:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9348:  94%|█████████▍| 375/400 [03:49<00:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9496:  94%|█████████▍| 376/400 [03:50<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9541:  94%|█████████▍| 377/400 [03:51<00:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9507:  94%|█████████▍| 378/400 [03:51<00:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9659:  95%|█████████▍| 379/400 [03:52<00:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.966:  95%|█████████▌| 380/400 [03:53<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9594:  95%|█████████▌| 381/400 [03:53<00:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9655:  96%|█████████▌| 382/400 [03:54<00:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9687:  96%|█████████▌| 383/400 [03:54<00:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9731:  96%|█████████▌| 384/400 [03:55<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.971:  96%|█████████▋| 385/400 [03:56<00:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9763:  96%|█████████▋| 386/400 [03:56<00:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9767:  97%|█████████▋| 387/400 [03:57<00:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9858:  97%|█████████▋| 388/400 [03:57<00:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9834:  97%|█████████▋| 389/400 [03:58<00:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9852:  98%|█████████▊| 390/400 [03:59<00:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9911:  98%|█████████▊| 391/400 [03:59<00:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 1.9918:  98%|█████████▊| 392/400 [04:00<00:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 2.0128:  98%|█████████▊| 393/400 [04:01<00:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 2.0037:  98%|█████████▊| 394/400 [04:01<00:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 2.0026:  99%|█████████▉| 395/400 [04:02<00:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 2.0026:  99%|█████████▉| 396/400 [04:02<00:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 2.0021:  99%|█████████▉| 397/400 [04:03<00:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 2.0045: 100%|█████████▉| 398/400 [04:04<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 2.0019: 100%|█████████▉| 399/400 [04:04<00:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 2, train loss: 2.002: 100%|██████████| 400/400 [04:05<00:00,  1.63it/s]
epoch: 2, valid loss: 4.4011:   1%|          | 2/201 [00:00<00:25,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 4.3766:   2%|▏         | 4/201 [00:00<00:25,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 4.3157:   3%|▎         | 6/201 [00:00<00:25,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 4.2468:   4%|▍         | 8/201 [00:01<00:25,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 4.1971:   5%|▍         | 10/201 [00:01<00:24,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 4.1442:   6%|▌         | 12/201 [00:01<00:25,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 4.1005:   7%|▋         | 14/201 [00:01<00:24,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 4.0738:   8%|▊         | 16/201 [00:02<00:24,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 4.0411:   9%|▉         | 18/201 [00:02<00:24,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.9978:  10%|▉         | 20/201 [00:02<00:23,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.9281:  11%|█         | 22/201 [00:02<00:23,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.8946:  12%|█▏        | 24/201 [00:03<00:23,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.8428:  13%|█▎        | 26/201 [00:03<00:23,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.8188:  14%|█▍        | 28/201 [00:03<00:23,  7.52it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.7837:  15%|█▍        | 30/201 [00:03<00:23,  7.25it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.728:  16%|█▌        | 32/201 [00:04<00:22,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.6809:  17%|█▋        | 34/201 [00:04<00:22,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.6291:  18%|█▊        | 36/201 [00:04<00:21,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.5932:  19%|█▉        | 38/201 [00:05<00:21,  7.52it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.5735:  20%|█▉        | 40/201 [00:05<00:21,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.5343:  21%|██        | 42/201 [00:05<00:20,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.4962:  22%|██▏       | 44/201 [00:05<00:20,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.4558:  23%|██▎       | 46/201 [00:06<00:20,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.4059:  24%|██▍       | 48/201 [00:06<00:20,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.3735:  25%|██▍       | 50/201 [00:06<00:20,  7.52it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.3288:  26%|██▌       | 52/201 [00:06<00:19,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.3247:  27%|██▋       | 54/201 [00:07<00:19,  7.52it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.2958:  28%|██▊       | 56/201 [00:07<00:19,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.2986:  29%|██▉       | 58/201 [00:07<00:18,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.3004:  30%|██▉       | 60/201 [00:07<00:18,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.2612:  31%|███       | 62/201 [00:08<00:18,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.2254:  32%|███▏      | 64/201 [00:08<00:18,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.2004:  33%|███▎      | 66/201 [00:08<00:17,  7.52it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.1888:  34%|███▍      | 68/201 [00:09<00:17,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.1567:  35%|███▍      | 70/201 [00:09<00:17,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.1423:  36%|███▌      | 72/201 [00:09<00:16,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.1108:  37%|███▋      | 74/201 [00:09<00:16,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.078:  38%|███▊      | 76/201 [00:10<00:16,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.0728:  39%|███▉      | 78/201 [00:10<00:16,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.0334:  40%|███▉      | 80/201 [00:10<00:16,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.0038:  41%|████      | 82/201 [00:10<00:16,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 3.0214:  42%|████▏     | 84/201 [00:11<00:15,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.9931:  43%|████▎     | 86/201 [00:11<00:15,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.985:  44%|████▍     | 88/201 [00:11<00:14,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.9517:  45%|████▍     | 90/201 [00:11<00:14,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.913:  46%|████▌     | 92/201 [00:12<00:14,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.9066:  47%|████▋     | 94/201 [00:12<00:14,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.8791:  48%|████▊     | 96/201 [00:12<00:13,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.8614:  49%|████▉     | 98/201 [00:12<00:13,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.8401:  50%|████▉     | 100/201 [00:13<00:13,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.8063:  51%|█████     | 102/201 [00:13<00:13,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.7802:  52%|█████▏    | 104/201 [00:13<00:12,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.7732:  53%|█████▎    | 106/201 [00:14<00:12,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.7495:  54%|█████▎    | 108/201 [00:14<00:12,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.7188:  55%|█████▍    | 110/201 [00:14<00:12,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.7017:  56%|█████▌    | 112/201 [00:14<00:12,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.715:  57%|█████▋    | 114/201 [00:15<00:11,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.691:  58%|█████▊    | 116/201 [00:15<00:11,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.6754:  59%|█████▊    | 118/201 [00:15<00:11,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.6403:  60%|█████▉    | 120/201 [00:15<00:10,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.6387:  61%|██████    | 122/201 [00:16<00:10,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.6161:  62%|██████▏   | 124/201 [00:16<00:10,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.6125:  63%|██████▎   | 126/201 [00:16<00:10,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.611:  64%|██████▎   | 128/201 [00:16<00:09,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.5944:  65%|██████▍   | 130/201 [00:17<00:09,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.5691:  66%|██████▌   | 132/201 [00:17<00:09,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.5637:  67%|██████▋   | 134/201 [00:17<00:09,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.5416:  68%|██████▊   | 136/201 [00:18<00:08,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.5164:  69%|██████▊   | 138/201 [00:18<00:08,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.5007:  70%|██████▉   | 140/201 [00:18<00:08,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.4699:  71%|███████   | 142/201 [00:18<00:07,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.4679:  72%|███████▏  | 144/201 [00:19<00:07,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.4616:  73%|███████▎  | 146/201 [00:19<00:07,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.4276:  74%|███████▎  | 148/201 [00:19<00:06,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.4179:  75%|███████▍  | 150/201 [00:19<00:06,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.4106:  76%|███████▌  | 152/201 [00:20<00:06,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.4039:  77%|███████▋  | 154/201 [00:20<00:06,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.402:  78%|███████▊  | 156/201 [00:20<00:05,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3885:  79%|███████▊  | 158/201 [00:20<00:05,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.4159:  80%|███████▉  | 160/201 [00:21<00:05,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3971:  81%|████████  | 162/201 [00:21<00:05,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3826:  82%|████████▏ | 164/201 [00:21<00:04,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3792:  83%|████████▎ | 166/201 [00:21<00:04,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3599:  84%|████████▎ | 168/201 [00:22<00:04,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3563:  85%|████████▍ | 170/201 [00:22<00:04,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3296:  86%|████████▌ | 172/201 [00:22<00:03,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3213:  87%|████████▋ | 174/201 [00:23<00:03,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3019:  88%|████████▊ | 176/201 [00:23<00:03,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.2967:  89%|████████▊ | 178/201 [00:23<00:03,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3052:  90%|████████▉ | 180/201 [00:23<00:02,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.3009:  91%|█████████ | 182/201 [00:24<00:02,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.2855:  92%|█████████▏| 184/201 [00:24<00:02,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.2757:  93%|█████████▎| 186/201 [00:24<00:01,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.2689:  94%|█████████▎| 188/201 [00:24<00:01,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.2605:  95%|█████████▍| 190/201 [00:25<00:01,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.2379:  96%|█████████▌| 192/201 [00:25<00:01,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.2541:  97%|█████████▋| 194/201 [00:25<00:00,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.2649:  98%|█████████▊| 196/201 [00:25<00:00,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.268:  99%|█████████▊| 198/201 [00:26<00:00,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.2715: 100%|█████████▉| 200/201 [00:26<00:00,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 2, valid loss: 2.2932: 100%|██████████| 201/201 [00:26<00:00,  7.56it/s]


torch.Size([2, 5])
torch.Size([2, 6])
[{'results_1': [{'prediction': 3, 'label': 0}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 4, 'label': 1}, {'prediction': 2, 'label': 0}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 5, 'label': 4}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 2, 'label': 3}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 1}, {'prediction': 1, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 0, 'label': 0}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 4}]}, {'results_1': [{'predicti

  0%|          | 0/400 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7977:   0%|          | 1/400 [00:00<04:15,  1.56it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7904:   0%|          | 2/400 [00:01<04:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7836:   1%|          | 3/400 [00:01<04:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7806:   1%|          | 4/400 [00:02<04:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7766:   1%|▏         | 5/400 [00:03<04:07,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.776:   2%|▏         | 6/400 [00:03<04:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7767:   2%|▏         | 7/400 [00:04<04:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7785:   2%|▏         | 8/400 [00:04<04:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.775:   2%|▏         | 9/400 [00:05<04:04,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7648:   2%|▎         | 10/400 [00:06<03:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7614:   3%|▎         | 11/400 [00:06<04:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7564:   3%|▎         | 12/400 [00:07<03:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.749:   3%|▎         | 13/400 [00:08<03:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7503:   4%|▎         | 14/400 [00:08<03:55,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.755:   4%|▍         | 15/400 [00:09<03:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7578:   4%|▍         | 16/400 [00:09<03:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7642:   4%|▍         | 17/400 [00:10<03:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7697:   4%|▍         | 18/400 [00:11<03:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7723:   5%|▍         | 19/400 [00:11<03:56,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7688:   5%|▌         | 20/400 [00:12<03:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7715:   5%|▌         | 21/400 [00:12<03:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7642:   6%|▌         | 22/400 [00:13<03:50,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7613:   6%|▌         | 23/400 [00:14<03:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7561:   6%|▌         | 24/400 [00:14<03:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.75:   6%|▋         | 25/400 [00:15<03:51,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7425:   6%|▋         | 26/400 [00:16<03:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7395:   7%|▋         | 27/400 [00:16<03:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7347:   7%|▋         | 28/400 [00:17<03:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7421:   7%|▋         | 29/400 [00:17<03:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7524:   8%|▊         | 30/400 [00:18<03:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7466:   8%|▊         | 31/400 [00:19<03:48,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7514:   8%|▊         | 32/400 [00:19<03:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7486:   8%|▊         | 33/400 [00:20<03:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7436:   8%|▊         | 34/400 [00:20<03:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.748:   9%|▉         | 35/400 [00:21<03:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7432:   9%|▉         | 36/400 [00:22<03:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7407:   9%|▉         | 37/400 [00:22<03:44,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7405:  10%|▉         | 38/400 [00:23<03:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7409:  10%|▉         | 39/400 [00:24<03:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7422:  10%|█         | 40/400 [00:24<03:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7367:  10%|█         | 41/400 [00:25<03:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7411:  10%|█         | 42/400 [00:25<03:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7401:  11%|█         | 43/400 [00:26<03:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7392:  11%|█         | 44/400 [00:27<03:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7362:  11%|█▏        | 45/400 [00:27<03:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.737:  12%|█▏        | 46/400 [00:28<03:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7346:  12%|█▏        | 47/400 [00:28<03:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7324:  12%|█▏        | 48/400 [00:29<03:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7433:  12%|█▏        | 49/400 [00:30<03:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7361:  12%|█▎        | 50/400 [00:30<03:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7358:  13%|█▎        | 51/400 [00:31<03:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.726:  13%|█▎        | 52/400 [00:31<03:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7148:  13%|█▎        | 53/400 [00:32<03:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7161:  14%|█▎        | 54/400 [00:33<03:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7118:  14%|█▍        | 55/400 [00:33<03:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7147:  14%|█▍        | 56/400 [00:34<03:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7203:  14%|█▍        | 57/400 [00:35<03:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7102:  14%|█▍        | 58/400 [00:35<03:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.7084:  15%|█▍        | 59/400 [00:36<03:29,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6978:  15%|█▌        | 60/400 [00:36<03:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6944:  15%|█▌        | 61/400 [00:37<03:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6826:  16%|█▌        | 62/400 [00:38<03:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6834:  16%|█▌        | 63/400 [00:38<03:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6815:  16%|█▌        | 64/400 [00:39<03:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6804:  16%|█▋        | 65/400 [00:39<03:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6837:  16%|█▋        | 66/400 [00:40<03:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6928:  17%|█▋        | 67/400 [00:41<03:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6894:  17%|█▋        | 68/400 [00:41<03:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6925:  17%|█▋        | 69/400 [00:42<03:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6861:  18%|█▊        | 70/400 [00:43<03:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6833:  18%|█▊        | 71/400 [00:43<03:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6815:  18%|█▊        | 72/400 [00:44<03:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6827:  18%|█▊        | 73/400 [00:44<03:22,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6723:  18%|█▊        | 74/400 [00:45<03:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6649:  19%|█▉        | 75/400 [00:46<03:21,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6547:  19%|█▉        | 76/400 [00:46<03:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.642:  19%|█▉        | 77/400 [00:47<03:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6315:  20%|█▉        | 78/400 [00:47<03:16,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6209:  20%|█▉        | 79/400 [00:48<03:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6254:  20%|██        | 80/400 [00:49<03:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.628:  20%|██        | 81/400 [00:49<03:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6347:  20%|██        | 82/400 [00:50<03:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.635:  21%|██        | 83/400 [00:51<03:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6411:  21%|██        | 84/400 [00:51<03:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6277:  21%|██▏       | 85/400 [00:52<03:18,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6178:  22%|██▏       | 86/400 [00:52<03:16,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6091:  22%|██▏       | 87/400 [00:53<03:16,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.619:  22%|██▏       | 88/400 [00:54<03:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6205:  22%|██▏       | 89/400 [00:54<03:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6369:  22%|██▎       | 90/400 [00:55<03:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6283:  23%|██▎       | 91/400 [00:56<03:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6287:  23%|██▎       | 92/400 [00:56<03:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6151:  23%|██▎       | 93/400 [00:57<03:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6107:  24%|██▎       | 94/400 [00:57<03:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6064:  24%|██▍       | 95/400 [00:58<03:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6001:  24%|██▍       | 96/400 [00:59<03:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5972:  24%|██▍       | 97/400 [00:59<03:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6052:  24%|██▍       | 98/400 [01:00<03:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6066:  25%|██▍       | 99/400 [01:00<03:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6049:  25%|██▌       | 100/400 [01:01<03:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6025:  25%|██▌       | 101/400 [01:02<03:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5953:  26%|██▌       | 102/400 [01:02<03:00,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6044:  26%|██▌       | 103/400 [01:03<03:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5963:  26%|██▌       | 104/400 [01:03<03:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6063:  26%|██▋       | 105/400 [01:04<03:02,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5993:  26%|██▋       | 106/400 [01:05<02:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6056:  27%|██▋       | 107/400 [01:05<03:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6201:  27%|██▋       | 108/400 [01:06<02:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6221:  27%|██▋       | 109/400 [01:07<02:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.615:  28%|██▊       | 110/400 [01:07<02:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6096:  28%|██▊       | 111/400 [01:08<02:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6104:  28%|██▊       | 112/400 [01:08<02:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6172:  28%|██▊       | 113/400 [01:09<02:58,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6103:  28%|██▊       | 114/400 [01:10<02:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6065:  29%|██▉       | 115/400 [01:10<02:57,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6042:  29%|██▉       | 116/400 [01:11<02:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6022:  29%|██▉       | 117/400 [01:11<02:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6012:  30%|██▉       | 118/400 [01:12<02:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6004:  30%|██▉       | 119/400 [01:13<02:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6012:  30%|███       | 120/400 [01:13<02:50,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6068:  30%|███       | 121/400 [01:14<02:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6021:  30%|███       | 122/400 [01:15<02:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5977:  31%|███       | 123/400 [01:15<02:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5941:  31%|███       | 124/400 [01:16<02:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5942:  31%|███▏      | 125/400 [01:16<02:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5906:  32%|███▏      | 126/400 [01:17<02:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.59:  32%|███▏      | 127/400 [01:18<02:50,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5962:  32%|███▏      | 128/400 [01:18<02:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5913:  32%|███▏      | 129/400 [01:19<02:48,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5914:  32%|███▎      | 130/400 [01:19<02:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5984:  33%|███▎      | 131/400 [01:20<02:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5939:  33%|███▎      | 132/400 [01:21<02:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5848:  33%|███▎      | 133/400 [01:21<02:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5879:  34%|███▎      | 134/400 [01:22<02:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5879:  34%|███▍      | 135/400 [01:23<02:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5894:  34%|███▍      | 136/400 [01:23<02:39,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5873:  34%|███▍      | 137/400 [01:24<02:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5908:  34%|███▍      | 138/400 [01:24<02:38,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5908:  35%|███▍      | 139/400 [01:25<02:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5847:  35%|███▌      | 140/400 [01:26<02:36,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.579:  35%|███▌      | 141/400 [01:26<02:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5745:  36%|███▌      | 142/400 [01:27<02:35,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5771:  36%|███▌      | 143/400 [01:27<02:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5733:  36%|███▌      | 144/400 [01:28<02:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5695:  36%|███▋      | 145/400 [01:29<02:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5679:  36%|███▋      | 146/400 [01:29<02:33,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5617:  37%|███▋      | 147/400 [01:30<02:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5573:  37%|███▋      | 148/400 [01:30<02:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5543:  37%|███▋      | 149/400 [01:31<02:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5469:  38%|███▊      | 150/400 [01:32<02:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5436:  38%|███▊      | 151/400 [01:32<02:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5448:  38%|███▊      | 152/400 [01:33<02:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5416:  38%|███▊      | 153/400 [01:33<02:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5431:  38%|███▊      | 154/400 [01:34<02:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5526:  39%|███▉      | 155/400 [01:35<02:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5563:  39%|███▉      | 156/400 [01:35<02:27,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5521:  39%|███▉      | 157/400 [01:36<02:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.547:  40%|███▉      | 158/400 [01:37<02:25,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5503:  40%|███▉      | 159/400 [01:37<02:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5502:  40%|████      | 160/400 [01:38<02:24,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5578:  40%|████      | 161/400 [01:38<02:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5612:  40%|████      | 162/400 [01:39<02:24,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5584:  41%|████      | 163/400 [01:40<02:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5554:  41%|████      | 164/400 [01:40<02:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5502:  41%|████▏     | 165/400 [01:41<02:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.545:  42%|████▏     | 166/400 [01:41<02:22,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5396:  42%|████▏     | 167/400 [01:42<02:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5334:  42%|████▏     | 168/400 [01:43<02:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5336:  42%|████▏     | 169/400 [01:43<02:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5372:  42%|████▎     | 170/400 [01:44<02:19,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5469:  43%|████▎     | 171/400 [01:44<02:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5445:  43%|████▎     | 172/400 [01:45<02:18,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.534:  43%|████▎     | 173/400 [01:46<02:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5348:  44%|████▎     | 174/400 [01:46<02:17,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5351:  44%|████▍     | 175/400 [01:47<02:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5378:  44%|████▍     | 176/400 [01:47<02:14,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5284:  44%|████▍     | 177/400 [01:48<02:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5282:  44%|████▍     | 178/400 [01:49<02:14,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5272:  45%|████▍     | 179/400 [01:49<02:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5423:  45%|████▌     | 180/400 [01:50<02:12,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5474:  45%|████▌     | 181/400 [01:51<02:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5417:  46%|████▌     | 182/400 [01:51<02:12,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5323:  46%|████▌     | 183/400 [01:52<02:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5266:  46%|████▌     | 184/400 [01:52<02:11,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5222:  46%|████▋     | 185/400 [01:53<02:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5214:  46%|████▋     | 186/400 [01:54<02:09,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5184:  47%|████▋     | 187/400 [01:54<02:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5162:  47%|████▋     | 188/400 [01:55<02:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5252:  47%|████▋     | 189/400 [01:55<02:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5214:  48%|████▊     | 190/400 [01:56<02:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5211:  48%|████▊     | 191/400 [01:57<02:08,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5208:  48%|████▊     | 192/400 [01:57<02:05,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5157:  48%|████▊     | 193/400 [01:58<02:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5132:  48%|████▊     | 194/400 [01:58<02:05,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5256:  49%|████▉     | 195/400 [01:59<02:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5319:  49%|████▉     | 196/400 [02:00<02:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.536:  49%|████▉     | 197/400 [02:00<02:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5355:  50%|████▉     | 198/400 [02:01<02:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5254:  50%|████▉     | 199/400 [02:02<02:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5199:  50%|█████     | 200/400 [02:02<02:00,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5209:  50%|█████     | 201/400 [02:03<02:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5362:  50%|█████     | 202/400 [02:03<01:59,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5335:  51%|█████     | 203/400 [02:04<02:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5334:  51%|█████     | 204/400 [02:05<01:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5232:  51%|█████▏    | 205/400 [02:05<01:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.519:  52%|█████▏    | 206/400 [02:06<01:58,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5198:  52%|█████▏    | 207/400 [02:06<01:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5141:  52%|█████▏    | 208/400 [02:07<01:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5259:  52%|█████▏    | 209/400 [02:08<01:58,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.526:  52%|█████▎    | 210/400 [02:08<01:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5178:  53%|█████▎    | 211/400 [02:09<01:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.513:  53%|█████▎    | 212/400 [02:09<01:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5082:  53%|█████▎    | 213/400 [02:10<01:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.4993:  54%|█████▎    | 214/400 [02:11<01:52,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.501:  54%|█████▍    | 215/400 [02:11<01:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5041:  54%|█████▍    | 216/400 [02:12<01:51,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5253:  54%|█████▍    | 217/400 [02:13<01:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.52:  55%|█████▍    | 218/400 [02:13<01:49,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5292:  55%|█████▍    | 219/400 [02:14<01:50,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5248:  55%|█████▌    | 220/400 [02:14<01:48,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5297:  55%|█████▌    | 221/400 [02:15<01:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5383:  56%|█████▌    | 222/400 [02:16<01:47,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5396:  56%|█████▌    | 223/400 [02:16<01:47,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5437:  56%|█████▌    | 224/400 [02:17<01:46,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5447:  56%|█████▋    | 225/400 [02:17<01:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5349:  56%|█████▋    | 226/400 [02:18<01:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5421:  57%|█████▋    | 227/400 [02:19<01:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5404:  57%|█████▋    | 228/400 [02:19<01:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5313:  57%|█████▋    | 229/400 [02:20<01:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5316:  57%|█████▊    | 230/400 [02:20<01:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5448:  58%|█████▊    | 231/400 [02:21<01:44,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5417:  58%|█████▊    | 232/400 [02:22<01:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5401:  58%|█████▊    | 233/400 [02:22<01:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5284:  58%|█████▊    | 234/400 [02:23<01:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5286:  59%|█████▉    | 235/400 [02:24<01:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5306:  59%|█████▉    | 236/400 [02:24<01:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5499:  59%|█████▉    | 237/400 [02:25<01:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5468:  60%|█████▉    | 238/400 [02:25<01:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5408:  60%|█████▉    | 239/400 [02:26<01:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.543:  60%|██████    | 240/400 [02:27<01:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.532:  60%|██████    | 241/400 [02:27<01:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5401:  60%|██████    | 242/400 [02:28<01:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5462:  61%|██████    | 243/400 [02:28<01:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5465:  61%|██████    | 244/400 [02:29<01:34,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5568:  61%|██████▏   | 245/400 [02:30<01:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5593:  62%|██████▏   | 246/400 [02:30<01:33,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5716:  62%|██████▏   | 247/400 [02:31<01:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.593:  62%|██████▏   | 248/400 [02:31<01:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5877:  62%|██████▏   | 249/400 [02:32<01:33,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.581:  62%|██████▎   | 250/400 [02:33<01:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5949:  63%|██████▎   | 251/400 [02:33<01:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5987:  63%|██████▎   | 252/400 [02:34<01:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6101:  63%|██████▎   | 253/400 [02:35<01:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6006:  64%|██████▎   | 254/400 [02:35<01:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5883:  64%|██████▍   | 255/400 [02:36<01:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5877:  64%|██████▍   | 256/400 [02:36<01:27,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5945:  64%|██████▍   | 257/400 [02:37<01:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5998:  64%|██████▍   | 258/400 [02:38<01:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5933:  65%|██████▍   | 259/400 [02:38<01:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5922:  65%|██████▌   | 260/400 [02:39<01:24,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5853:  65%|██████▌   | 261/400 [02:39<01:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5917:  66%|██████▌   | 262/400 [02:40<01:23,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5895:  66%|██████▌   | 263/400 [02:41<01:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6085:  66%|██████▌   | 264/400 [02:41<01:22,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6042:  66%|██████▋   | 265/400 [02:42<01:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6014:  66%|██████▋   | 266/400 [02:42<01:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5973:  67%|██████▋   | 267/400 [02:43<01:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6021:  67%|██████▋   | 268/400 [02:44<01:19,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5978:  67%|██████▋   | 269/400 [02:44<01:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5945:  68%|██████▊   | 270/400 [02:45<01:18,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.5954:  68%|██████▊   | 271/400 [02:46<01:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6149:  68%|██████▊   | 272/400 [02:46<01:17,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6098:  68%|██████▊   | 273/400 [02:47<01:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6124:  68%|██████▊   | 274/400 [02:47<01:16,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.611:  69%|██████▉   | 275/400 [02:48<01:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6333:  69%|██████▉   | 276/400 [02:49<01:15,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6397:  69%|██████▉   | 277/400 [02:49<01:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6346:  70%|██████▉   | 278/400 [02:50<01:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6301:  70%|██████▉   | 279/400 [02:50<01:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6333:  70%|███████   | 280/400 [02:51<01:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6232:  70%|███████   | 281/400 [02:52<01:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6167:  70%|███████   | 282/400 [02:52<01:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6286:  71%|███████   | 283/400 [02:53<01:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6429:  71%|███████   | 284/400 [02:53<01:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6431:  71%|███████▏  | 285/400 [02:54<01:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6337:  72%|███████▏  | 286/400 [02:55<01:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6293:  72%|███████▏  | 287/400 [02:55<01:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.631:  72%|███████▏  | 288/400 [02:56<01:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6265:  72%|███████▏  | 289/400 [02:57<01:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6198:  72%|███████▎  | 290/400 [02:57<01:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6155:  73%|███████▎  | 291/400 [02:58<01:07,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6188:  73%|███████▎  | 292/400 [02:58<01:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6244:  73%|███████▎  | 293/400 [02:59<01:06,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.615:  74%|███████▎  | 294/400 [03:00<01:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6236:  74%|███████▍  | 295/400 [03:00<01:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6313:  74%|███████▍  | 296/400 [03:01<01:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6246:  74%|███████▍  | 297/400 [03:01<01:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6341:  74%|███████▍  | 298/400 [03:02<01:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6569:  75%|███████▍  | 299/400 [03:03<01:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.664:  75%|███████▌  | 300/400 [03:03<01:00,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.67:  75%|███████▌  | 301/400 [03:04<01:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6757:  76%|███████▌  | 302/400 [03:05<00:59,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6751:  76%|███████▌  | 303/400 [03:05<00:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6695:  76%|███████▌  | 304/400 [03:06<00:58,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6734:  76%|███████▋  | 305/400 [03:06<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6738:  76%|███████▋  | 306/400 [03:07<00:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6763:  77%|███████▋  | 307/400 [03:08<00:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6781:  77%|███████▋  | 308/400 [03:08<00:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6827:  77%|███████▋  | 309/400 [03:09<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6825:  78%|███████▊  | 310/400 [03:09<00:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6792:  78%|███████▊  | 311/400 [03:10<00:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.685:  78%|███████▊  | 312/400 [03:11<00:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6825:  78%|███████▊  | 313/400 [03:11<00:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6807:  78%|███████▊  | 314/400 [03:12<00:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6911:  79%|███████▉  | 315/400 [03:13<00:53,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6834:  79%|███████▉  | 316/400 [03:13<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6767:  79%|███████▉  | 317/400 [03:14<00:51,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6682:  80%|███████▉  | 318/400 [03:14<00:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6831:  80%|███████▉  | 319/400 [03:15<00:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6886:  80%|████████  | 320/400 [03:16<00:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.686:  80%|████████  | 321/400 [03:16<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6799:  80%|████████  | 322/400 [03:17<00:47,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6808:  81%|████████  | 323/400 [03:17<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6811:  81%|████████  | 324/400 [03:18<00:46,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6924:  81%|████████▏ | 325/400 [03:19<00:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.688:  82%|████████▏ | 326/400 [03:19<00:44,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.689:  82%|████████▏ | 327/400 [03:20<00:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6939:  82%|████████▏ | 328/400 [03:20<00:43,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6962:  82%|████████▏ | 329/400 [03:21<00:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6881:  82%|████████▎ | 330/400 [03:22<00:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6877:  83%|████████▎ | 331/400 [03:22<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6818:  83%|████████▎ | 332/400 [03:23<00:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6817:  83%|████████▎ | 333/400 [03:24<00:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6836:  84%|████████▎ | 334/400 [03:24<00:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6881:  84%|████████▍ | 335/400 [03:25<00:40,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6863:  84%|████████▍ | 336/400 [03:25<00:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6836:  84%|████████▍ | 337/400 [03:26<00:39,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6728:  84%|████████▍ | 338/400 [03:27<00:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6667:  85%|████████▍ | 339/400 [03:27<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6598:  85%|████████▌ | 340/400 [03:28<00:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6591:  85%|████████▌ | 341/400 [03:28<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6561:  86%|████████▌ | 342/400 [03:29<00:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.645:  86%|████████▌ | 343/400 [03:30<00:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6512:  86%|████████▌ | 344/400 [03:30<00:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6381:  86%|████████▋ | 345/400 [03:31<00:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6353:  86%|████████▋ | 346/400 [03:31<00:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6343:  87%|████████▋ | 347/400 [03:32<00:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6382:  87%|████████▋ | 348/400 [03:33<00:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6527:  87%|████████▋ | 349/400 [03:33<00:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6571:  88%|████████▊ | 350/400 [03:34<00:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6551:  88%|████████▊ | 351/400 [03:35<00:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6508:  88%|████████▊ | 352/400 [03:35<00:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6463:  88%|████████▊ | 353/400 [03:36<00:29,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6398:  88%|████████▊ | 354/400 [03:36<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6324:  89%|████████▉ | 355/400 [03:37<00:28,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6252:  89%|████████▉ | 356/400 [03:38<00:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6288:  89%|████████▉ | 357/400 [03:38<00:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.631:  90%|████████▉ | 358/400 [03:39<00:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6277:  90%|████████▉ | 359/400 [03:40<00:25,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6387:  90%|█████████ | 360/400 [03:40<00:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6433:  90%|█████████ | 361/400 [03:41<00:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6516:  90%|█████████ | 362/400 [03:41<00:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6477:  91%|█████████ | 363/400 [03:42<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6472:  91%|█████████ | 364/400 [03:43<00:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6552:  91%|█████████▏| 365/400 [03:43<00:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6579:  92%|█████████▏| 366/400 [03:44<00:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.663:  92%|█████████▏| 367/400 [03:44<00:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6606:  92%|█████████▏| 368/400 [03:45<00:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6613:  92%|█████████▏| 369/400 [03:46<00:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6574:  92%|█████████▎| 370/400 [03:46<00:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6484:  93%|█████████▎| 371/400 [03:47<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6628:  93%|█████████▎| 372/400 [03:47<00:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.669:  93%|█████████▎| 373/400 [03:48<00:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6593:  94%|█████████▎| 374/400 [03:49<00:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6513:  94%|█████████▍| 375/400 [03:49<00:15,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6416:  94%|█████████▍| 376/400 [03:50<00:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6387:  94%|█████████▍| 377/400 [03:51<00:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6336:  94%|█████████▍| 378/400 [03:51<00:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6431:  95%|█████████▍| 379/400 [03:52<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6321:  95%|█████████▌| 380/400 [03:52<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6297:  95%|█████████▌| 381/400 [03:53<00:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6221:  96%|█████████▌| 382/400 [03:54<00:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6248:  96%|█████████▌| 383/400 [03:54<00:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6242:  96%|█████████▌| 384/400 [03:55<00:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6246:  96%|█████████▋| 385/400 [03:55<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.619:  96%|█████████▋| 386/400 [03:56<00:08,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6125:  97%|█████████▋| 387/400 [03:57<00:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6107:  97%|█████████▋| 388/400 [03:57<00:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.628:  97%|█████████▋| 389/400 [03:58<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6241:  98%|█████████▊| 390/400 [03:58<00:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6189:  98%|█████████▊| 391/400 [03:59<00:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6222:  98%|█████████▊| 392/400 [04:00<00:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.621:  98%|█████████▊| 393/400 [04:00<00:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6289:  98%|█████████▊| 394/400 [04:01<00:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6208:  99%|█████████▉| 395/400 [04:02<00:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6214:  99%|█████████▉| 396/400 [04:02<00:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6295:  99%|█████████▉| 397/400 [04:03<00:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6294: 100%|█████████▉| 398/400 [04:03<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6358: 100%|█████████▉| 399/400 [04:04<00:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 3, train loss: 1.6426: 100%|██████████| 400/400 [04:05<00:00,  1.63it/s]
epoch: 3, valid loss: 2.7227:   1%|          | 2/201 [00:00<00:25,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.7179:   2%|▏         | 4/201 [00:00<00:25,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.6844:   3%|▎         | 6/201 [00:00<00:25,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.6493:   4%|▍         | 8/201 [00:01<00:25,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.6293:   5%|▍         | 10/201 [00:01<00:24,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.608:   6%|▌         | 12/201 [00:01<00:24,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.6106:   7%|▋         | 14/201 [00:01<00:24,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.6181:   8%|▊         | 16/201 [00:02<00:24,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.6156:   9%|▉         | 18/201 [00:02<00:23,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.617:  10%|▉         | 20/201 [00:02<00:23,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.5704:  11%|█         | 22/201 [00:02<00:23,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.5531:  12%|█▏        | 24/201 [00:03<00:23,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.5387:  13%|█▎        | 26/201 [00:03<00:22,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.5239:  14%|█▍        | 28/201 [00:03<00:22,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.4945:  15%|█▍        | 30/201 [00:03<00:22,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.4655:  16%|█▌        | 32/201 [00:04<00:22,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.442:  17%|█▋        | 34/201 [00:04<00:21,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.4055:  18%|█▊        | 36/201 [00:04<00:21,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.3796:  19%|█▉        | 38/201 [00:04<00:21,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.3725:  20%|█▉        | 40/201 [00:05<00:20,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.3532:  21%|██        | 42/201 [00:05<00:20,  7.76it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.3323:  22%|██▏       | 44/201 [00:05<00:20,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.3124:  23%|██▎       | 46/201 [00:05<00:20,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2797:  24%|██▍       | 48/201 [00:06<00:19,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2708:  25%|██▍       | 50/201 [00:06<00:19,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2416:  26%|██▌       | 52/201 [00:06<00:19,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2589:  27%|██▋       | 54/201 [00:07<00:19,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2468:  28%|██▊       | 56/201 [00:07<00:18,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2741:  29%|██▉       | 58/201 [00:07<00:18,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2812:  30%|██▉       | 60/201 [00:07<00:18,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2698:  31%|███       | 62/201 [00:08<00:18,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2559:  32%|███▏      | 64/201 [00:08<00:17,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.244:  33%|███▎      | 66/201 [00:08<00:17,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2367:  34%|███▍      | 68/201 [00:08<00:17,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.232:  35%|███▍      | 70/201 [00:09<00:17,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2323:  36%|███▌      | 72/201 [00:09<00:16,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.2103:  37%|███▋      | 74/201 [00:09<00:16,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.1878:  38%|███▊      | 76/201 [00:09<00:16,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.1784:  39%|███▉      | 78/201 [00:10<00:16,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.1564:  40%|███▉      | 80/201 [00:10<00:15,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.1283:  41%|████      | 82/201 [00:10<00:15,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.1434:  42%|████▏     | 84/201 [00:10<00:15,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.1312:  43%|████▎     | 86/201 [00:11<00:15,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.1548:  44%|████▍     | 88/201 [00:11<00:14,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.1256:  45%|████▍     | 90/201 [00:11<00:14,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0967:  46%|████▌     | 92/201 [00:12<00:14,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.1081:  47%|████▋     | 94/201 [00:12<00:13,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0914:  48%|████▊     | 96/201 [00:12<00:13,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0748:  49%|████▉     | 98/201 [00:12<00:13,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0652:  50%|████▉     | 100/201 [00:13<00:13,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.046:  51%|█████     | 102/201 [00:13<00:12,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0265:  52%|█████▏    | 104/201 [00:13<00:12,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.031:  53%|█████▎    | 106/201 [00:13<00:12,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0298:  54%|█████▎    | 108/201 [00:14<00:12,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.012:  55%|█████▍    | 110/201 [00:14<00:11,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0138:  56%|█████▌    | 112/201 [00:14<00:11,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0531:  57%|█████▋    | 114/201 [00:14<00:11,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0364:  58%|█████▊    | 116/201 [00:15<00:11,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0502:  59%|█████▊    | 118/201 [00:15<00:10,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.024:  60%|█████▉    | 120/201 [00:15<00:10,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.03:  61%|██████    | 122/201 [00:15<00:10,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0219:  62%|██████▏   | 124/201 [00:16<00:10,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0322:  63%|██████▎   | 126/201 [00:16<00:09,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0463:  64%|██████▎   | 128/201 [00:16<00:09,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0213:  65%|██████▍   | 130/201 [00:17<00:09,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0033:  66%|██████▌   | 132/201 [00:17<00:08,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0127:  67%|██████▋   | 134/201 [00:17<00:08,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 2.0016:  68%|██████▊   | 136/201 [00:17<00:08,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9947:  69%|██████▊   | 138/201 [00:18<00:08,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9811:  70%|██████▉   | 140/201 [00:18<00:08,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.949:  71%|███████   | 142/201 [00:18<00:07,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9619:  72%|███████▏  | 144/201 [00:18<00:07,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9612:  73%|███████▎  | 146/201 [00:19<00:07,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9358:  74%|███████▎  | 148/201 [00:19<00:07,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9368:  75%|███████▍  | 150/201 [00:19<00:06,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9333:  76%|███████▌  | 152/201 [00:19<00:06,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9352:  77%|███████▋  | 154/201 [00:20<00:06,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.937:  78%|███████▊  | 156/201 [00:20<00:05,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.921:  79%|███████▊  | 158/201 [00:20<00:05,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9576:  80%|███████▉  | 160/201 [00:20<00:05,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9681:  81%|████████  | 162/201 [00:21<00:05,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9601:  82%|████████▏ | 164/201 [00:21<00:04,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9633:  83%|████████▎ | 166/201 [00:21<00:04,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9535:  84%|████████▎ | 168/201 [00:22<00:04,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.967:  85%|████████▍ | 170/201 [00:22<00:04,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9508:  86%|████████▌ | 172/201 [00:22<00:03,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9434:  87%|████████▋ | 174/201 [00:22<00:03,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9366:  88%|████████▊ | 176/201 [00:23<00:03,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9355:  89%|████████▊ | 178/201 [00:23<00:03,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9488:  90%|████████▉ | 180/201 [00:23<00:02,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9406:  91%|█████████ | 182/201 [00:23<00:02,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9432:  92%|█████████▏| 184/201 [00:24<00:02,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9429:  93%|█████████▎| 186/201 [00:24<00:01,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9497:  94%|█████████▎| 188/201 [00:24<00:01,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9471:  95%|█████████▍| 190/201 [00:24<00:01,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.939:  96%|█████████▌| 192/201 [00:25<00:01,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9574:  97%|█████████▋| 194/201 [00:25<00:00,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9707:  98%|█████████▊| 196/201 [00:25<00:00,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9716:  99%|█████████▊| 198/201 [00:25<00:00,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9684: 100%|█████████▉| 200/201 [00:26<00:00,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 3, valid loss: 1.9667: 100%|██████████| 201/201 [00:26<00:00,  7.63it/s]


torch.Size([2, 5])
torch.Size([2, 6])
[{'results_1': [{'prediction': 0, 'label': 0}, {'prediction': 0, 'label': 1}], 'results_2': [{'prediction': 1, 'label': 1}, {'prediction': 1, 'label': 0}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 5, 'label': 4}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 1}, {'prediction': 1, 'label': 2}], 'results_2': [{'prediction': 5, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 3, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 0, 'label': 0}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 4, 'label': 4}]}, {'results_1': [{'predicti

  0%|          | 0/400 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5449:   0%|          | 1/400 [00:00<04:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5471:   0%|          | 2/400 [00:01<04:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5589:   1%|          | 3/400 [00:01<04:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5627:   1%|          | 4/400 [00:02<04:00,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5668:   1%|▏         | 5/400 [00:03<04:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5673:   2%|▏         | 6/400 [00:03<03:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5709:   2%|▏         | 7/400 [00:04<04:02,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5748:   2%|▏         | 8/400 [00:04<03:58,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5859:   2%|▏         | 9/400 [00:05<04:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.59:   2%|▎         | 10/400 [00:06<04:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5911:   3%|▎         | 11/400 [00:06<04:03,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.5971:   3%|▎         | 12/400 [00:07<03:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6157:   3%|▎         | 13/400 [00:08<04:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6164:   4%|▎         | 14/400 [00:08<03:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6167:   4%|▍         | 15/400 [00:09<04:00,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6311:   4%|▍         | 16/400 [00:09<03:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6359:   4%|▍         | 17/400 [00:10<03:58,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6357:   4%|▍         | 18/400 [00:11<03:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.64:   5%|▍         | 19/400 [00:11<03:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6491:   5%|▌         | 20/400 [00:12<03:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6664:   5%|▌         | 21/400 [00:12<03:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6687:   6%|▌         | 22/400 [00:13<03:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6753:   6%|▌         | 23/400 [00:14<03:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6763:   6%|▌         | 24/400 [00:14<03:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6778:   6%|▋         | 25/400 [00:15<03:53,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6783:   6%|▋         | 26/400 [00:16<03:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.677:   7%|▋         | 27/400 [00:16<03:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6861:   7%|▋         | 28/400 [00:17<03:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6863:   7%|▋         | 29/400 [00:17<03:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6902:   8%|▊         | 30/400 [00:18<03:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6881:   8%|▊         | 31/400 [00:19<03:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6928:   8%|▊         | 32/400 [00:19<03:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.6996:   8%|▊         | 33/400 [00:20<03:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.7139:   8%|▊         | 34/400 [00:20<03:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.7335:   9%|▉         | 35/400 [00:21<03:48,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.7317:   9%|▉         | 36/400 [00:22<03:44,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.7337:   9%|▉         | 37/400 [00:22<03:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.7423:  10%|▉         | 38/400 [00:23<03:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.7422:  10%|▉         | 39/400 [00:24<03:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.7668:  10%|█         | 40/400 [00:24<03:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.7753:  10%|█         | 41/400 [00:25<03:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.7787:  10%|█         | 42/400 [00:25<03:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.7947:  11%|█         | 43/400 [00:26<03:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8042:  11%|█         | 44/400 [00:27<03:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8081:  11%|█▏        | 45/400 [00:27<03:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8199:  12%|█▏        | 46/400 [00:28<03:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8167:  12%|█▏        | 47/400 [00:28<03:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.815:  12%|█▏        | 48/400 [00:29<03:33,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.812:  12%|█▏        | 49/400 [00:30<03:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8105:  12%|█▎        | 50/400 [00:30<03:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8216:  13%|█▎        | 51/400 [00:31<03:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8272:  13%|█▎        | 52/400 [00:32<03:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8241:  13%|█▎        | 53/400 [00:32<03:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8285:  14%|█▎        | 54/400 [00:33<03:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8248:  14%|█▍        | 55/400 [00:33<03:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8235:  14%|█▍        | 56/400 [00:34<03:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8254:  14%|█▍        | 57/400 [00:35<03:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8277:  14%|█▍        | 58/400 [00:35<03:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8371:  15%|█▍        | 59/400 [00:36<03:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8476:  15%|█▌        | 60/400 [00:36<03:29,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.846:  15%|█▌        | 61/400 [00:37<03:29,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8542:  16%|█▌        | 62/400 [00:38<03:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8787:  16%|█▌        | 63/400 [00:38<03:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.881:  16%|█▌        | 64/400 [00:39<03:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8811:  16%|█▋        | 65/400 [00:40<03:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8847:  16%|█▋        | 66/400 [00:40<03:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8906:  17%|█▋        | 67/400 [00:41<03:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.899:  17%|█▋        | 68/400 [00:41<03:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.8995:  17%|█▋        | 69/400 [00:42<03:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9009:  18%|█▊        | 70/400 [00:43<03:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9201:  18%|█▊        | 71/400 [00:43<03:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.919:  18%|█▊        | 72/400 [00:44<03:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9188:  18%|█▊        | 73/400 [00:44<03:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9201:  18%|█▊        | 74/400 [00:45<03:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9222:  19%|█▉        | 75/400 [00:46<03:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9223:  19%|█▉        | 76/400 [00:46<03:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9244:  19%|█▉        | 77/400 [00:47<03:21,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9324:  20%|█▉        | 78/400 [00:48<03:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9332:  20%|█▉        | 79/400 [00:48<03:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9338:  20%|██        | 80/400 [00:49<03:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.945:  20%|██        | 81/400 [00:49<03:18,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9571:  20%|██        | 82/400 [00:50<03:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9592:  21%|██        | 83/400 [00:51<03:16,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9626:  21%|██        | 84/400 [00:51<03:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9624:  21%|██▏       | 85/400 [00:52<03:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9641:  22%|██▏       | 86/400 [00:52<03:11,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.968:  22%|██▏       | 87/400 [00:53<03:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9626:  22%|██▏       | 88/400 [00:54<03:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9612:  22%|██▏       | 89/400 [00:54<03:14,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9597:  22%|██▎       | 90/400 [00:55<03:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9629:  23%|██▎       | 91/400 [00:56<03:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9584:  23%|██▎       | 92/400 [00:56<03:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9617:  23%|██▎       | 93/400 [00:57<03:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9675:  24%|██▎       | 94/400 [00:57<03:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9662:  24%|██▍       | 95/400 [00:58<03:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9786:  24%|██▍       | 96/400 [00:59<03:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9807:  24%|██▍       | 97/400 [00:59<03:08,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9857:  24%|██▍       | 98/400 [01:00<03:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9867:  25%|██▍       | 99/400 [01:00<03:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9858:  25%|██▌       | 100/400 [01:01<03:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9907:  25%|██▌       | 101/400 [01:02<03:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9857:  26%|██▌       | 102/400 [01:02<03:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9822:  26%|██▌       | 103/400 [01:03<03:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9874:  26%|██▌       | 104/400 [01:04<03:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9985:  26%|██▋       | 105/400 [01:04<03:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9916:  26%|██▋       | 106/400 [01:05<03:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9868:  27%|██▋       | 107/400 [01:05<03:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9851:  27%|██▋       | 108/400 [01:06<02:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.991:  27%|██▋       | 109/400 [01:07<02:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9982:  28%|██▊       | 110/400 [01:07<02:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9946:  28%|██▊       | 111/400 [01:08<02:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9926:  28%|██▊       | 112/400 [01:08<02:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9932:  28%|██▊       | 113/400 [01:09<02:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9873:  28%|██▊       | 114/400 [01:10<02:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9817:  29%|██▉       | 115/400 [01:10<02:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9743:  29%|██▉       | 116/400 [01:11<02:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9776:  29%|██▉       | 117/400 [01:12<02:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9786:  30%|██▉       | 118/400 [01:12<02:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9839:  30%|██▉       | 119/400 [01:13<02:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9881:  30%|███       | 120/400 [01:13<02:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0007:  30%|███       | 121/400 [01:14<02:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9996:  30%|███       | 122/400 [01:15<02:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 0.9974:  31%|███       | 123/400 [01:15<02:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0011:  31%|███       | 124/400 [01:16<02:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0009:  31%|███▏      | 125/400 [01:16<02:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0081:  32%|███▏      | 126/400 [01:17<02:46,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0191:  32%|███▏      | 127/400 [01:18<02:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0188:  32%|███▏      | 128/400 [01:18<02:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0159:  32%|███▏      | 129/400 [01:19<02:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0244:  32%|███▎      | 130/400 [01:20<02:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.02:  33%|███▎      | 131/400 [01:20<02:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0338:  33%|███▎      | 132/400 [01:21<02:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0371:  33%|███▎      | 133/400 [01:21<02:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0368:  34%|███▎      | 134/400 [01:22<02:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.059:  34%|███▍      | 135/400 [01:23<02:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0593:  34%|███▍      | 136/400 [01:23<02:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0575:  34%|███▍      | 137/400 [01:24<02:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0513:  34%|███▍      | 138/400 [01:24<02:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0576:  35%|███▍      | 139/400 [01:25<02:42,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0534:  35%|███▌      | 140/400 [01:26<02:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0524:  35%|███▌      | 141/400 [01:26<02:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0603:  36%|███▌      | 142/400 [01:27<02:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0695:  36%|███▌      | 143/400 [01:28<02:39,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0713:  36%|███▌      | 144/400 [01:28<02:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0668:  36%|███▋      | 145/400 [01:29<02:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0652:  36%|███▋      | 146/400 [01:29<02:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0661:  37%|███▋      | 147/400 [01:30<02:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.076:  37%|███▋      | 148/400 [01:31<02:33,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0821:  37%|███▋      | 149/400 [01:31<02:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0853:  38%|███▊      | 150/400 [01:32<02:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0806:  38%|███▊      | 151/400 [01:32<02:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.08:  38%|███▊      | 152/400 [01:33<02:36,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0852:  38%|███▊      | 153/400 [01:34<02:36,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0872:  38%|███▊      | 154/400 [01:34<02:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.086:  39%|███▉      | 155/400 [01:35<02:34,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1:  39%|███▉      | 156/400 [01:36<02:32,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0928:  39%|███▉      | 157/400 [01:36<02:33,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0899:  40%|███▉      | 158/400 [01:37<02:30,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0881:  40%|███▉      | 159/400 [01:38<02:32,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0919:  40%|████      | 160/400 [01:38<02:28,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0974:  40%|████      | 161/400 [01:39<02:28,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1004:  40%|████      | 162/400 [01:39<02:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1018:  41%|████      | 163/400 [01:40<02:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.095:  41%|████      | 164/400 [01:41<02:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0879:  41%|████▏     | 165/400 [01:41<02:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0913:  42%|████▏     | 166/400 [01:42<02:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0983:  42%|████▏     | 167/400 [01:42<02:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0969:  42%|████▏     | 168/400 [01:43<02:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1117:  42%|████▏     | 169/400 [01:44<02:23,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.11:  42%|████▎     | 170/400 [01:44<02:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1151:  43%|████▎     | 171/400 [01:45<02:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.116:  43%|████▎     | 172/400 [01:45<02:18,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1104:  43%|████▎     | 173/400 [01:46<02:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1081:  44%|████▎     | 174/400 [01:47<02:17,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1101:  44%|████▍     | 175/400 [01:47<02:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1033:  44%|████▍     | 176/400 [01:48<02:15,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1162:  44%|████▍     | 177/400 [01:49<02:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1121:  44%|████▍     | 178/400 [01:49<02:14,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1061:  45%|████▍     | 179/400 [01:50<02:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1055:  45%|████▌     | 180/400 [01:50<02:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1054:  45%|████▌     | 181/400 [01:51<02:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1006:  46%|████▌     | 182/400 [01:52<02:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0993:  46%|████▌     | 183/400 [01:52<02:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0982:  46%|████▌     | 184/400 [01:53<02:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0918:  46%|████▋     | 185/400 [01:53<02:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.087:  46%|████▋     | 186/400 [01:54<02:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1012:  47%|████▋     | 187/400 [01:55<02:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0941:  47%|████▋     | 188/400 [01:55<02:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0938:  47%|████▋     | 189/400 [01:56<02:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.102:  48%|████▊     | 190/400 [01:57<02:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.111:  48%|████▊     | 191/400 [01:57<02:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1107:  48%|████▊     | 192/400 [01:58<02:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1022:  48%|████▊     | 193/400 [01:58<02:08,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1066:  48%|████▊     | 194/400 [01:59<02:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1142:  49%|████▉     | 195/400 [02:00<02:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1092:  49%|████▉     | 196/400 [02:00<02:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1142:  49%|████▉     | 197/400 [02:01<02:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1163:  50%|████▉     | 198/400 [02:01<02:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1152:  50%|████▉     | 199/400 [02:02<02:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1188:  50%|█████     | 200/400 [02:03<02:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1304:  50%|█████     | 201/400 [02:03<02:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1371:  50%|█████     | 202/400 [02:04<02:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1439:  51%|█████     | 203/400 [02:05<02:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.148:  51%|█████     | 204/400 [02:05<01:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1452:  51%|█████▏    | 205/400 [02:06<02:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1436:  52%|█████▏    | 206/400 [02:06<01:57,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1422:  52%|█████▏    | 207/400 [02:07<01:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1426:  52%|█████▏    | 208/400 [02:08<01:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1488:  52%|█████▏    | 209/400 [02:08<01:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1444:  52%|█████▎    | 210/400 [02:09<01:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1357:  53%|█████▎    | 211/400 [02:09<01:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1465:  53%|█████▎    | 212/400 [02:10<01:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1419:  53%|█████▎    | 213/400 [02:11<01:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1508:  54%|█████▎    | 214/400 [02:11<01:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1593:  54%|█████▍    | 215/400 [02:12<01:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1533:  54%|█████▍    | 216/400 [02:13<01:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1569:  54%|█████▍    | 217/400 [02:13<01:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1747:  55%|█████▍    | 218/400 [02:14<01:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1695:  55%|█████▍    | 219/400 [02:14<01:53,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1757:  55%|█████▌    | 220/400 [02:15<01:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.172:  55%|█████▌    | 221/400 [02:16<01:52,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1669:  56%|█████▌    | 222/400 [02:16<01:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1659:  56%|█████▌    | 223/400 [02:17<01:50,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1587:  56%|█████▌    | 224/400 [02:17<01:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1603:  56%|█████▋    | 225/400 [02:18<01:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1563:  56%|█████▋    | 226/400 [02:19<01:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1536:  57%|█████▋    | 227/400 [02:19<01:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1546:  57%|█████▋    | 228/400 [02:20<01:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1518:  57%|█████▋    | 229/400 [02:21<01:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1469:  57%|█████▊    | 230/400 [02:21<01:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1447:  58%|█████▊    | 231/400 [02:22<01:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1511:  58%|█████▊    | 232/400 [02:22<01:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1507:  58%|█████▊    | 233/400 [02:23<01:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1618:  58%|█████▊    | 234/400 [02:24<01:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1559:  59%|█████▉    | 235/400 [02:24<01:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1524:  59%|█████▉    | 236/400 [02:25<01:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1585:  59%|█████▉    | 237/400 [02:26<01:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1651:  60%|█████▉    | 238/400 [02:26<01:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1601:  60%|█████▉    | 239/400 [02:27<01:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1673:  60%|██████    | 240/400 [02:27<01:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1631:  60%|██████    | 241/400 [02:28<01:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.163:  60%|██████    | 242/400 [02:29<01:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1638:  61%|██████    | 243/400 [02:29<01:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1579:  61%|██████    | 244/400 [02:30<01:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1532:  61%|██████▏   | 245/400 [02:30<01:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1521:  62%|██████▏   | 246/400 [02:31<01:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1511:  62%|██████▏   | 247/400 [02:32<01:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.164:  62%|██████▏   | 248/400 [02:32<01:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.161:  62%|██████▏   | 249/400 [02:33<01:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.153:  62%|██████▎   | 250/400 [02:33<01:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1482:  63%|██████▎   | 251/400 [02:34<01:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1429:  63%|██████▎   | 252/400 [02:35<01:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1438:  63%|██████▎   | 253/400 [02:35<01:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1393:  64%|██████▎   | 254/400 [02:36<01:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.137:  64%|██████▍   | 255/400 [02:37<01:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.149:  64%|██████▍   | 256/400 [02:37<01:27,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1548:  64%|██████▍   | 257/400 [02:38<01:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1621:  64%|██████▍   | 258/400 [02:38<01:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1667:  65%|██████▍   | 259/400 [02:39<01:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1617:  65%|██████▌   | 260/400 [02:40<01:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1617:  65%|██████▌   | 261/400 [02:40<01:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1623:  66%|██████▌   | 262/400 [02:41<01:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1556:  66%|██████▌   | 263/400 [02:41<01:25,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1588:  66%|██████▌   | 264/400 [02:42<01:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.16:  66%|██████▋   | 265/400 [02:43<01:24,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1554:  66%|██████▋   | 266/400 [02:43<01:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1468:  67%|██████▋   | 267/400 [02:44<01:22,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1396:  67%|██████▋   | 268/400 [02:45<01:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1346:  67%|██████▋   | 269/400 [02:45<01:21,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1293:  68%|██████▊   | 270/400 [02:46<01:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1372:  68%|██████▊   | 271/400 [02:46<01:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1365:  68%|██████▊   | 272/400 [02:47<01:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1303:  68%|██████▊   | 273/400 [02:48<01:18,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1259:  68%|██████▊   | 274/400 [02:48<01:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1286:  69%|██████▉   | 275/400 [02:49<01:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1283:  69%|██████▉   | 276/400 [02:49<01:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1239:  69%|██████▉   | 277/400 [02:50<01:16,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1179:  70%|██████▉   | 278/400 [02:51<01:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1244:  70%|██████▉   | 279/400 [02:51<01:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1188:  70%|███████   | 280/400 [02:52<01:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1249:  70%|███████   | 281/400 [02:53<01:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1289:  70%|███████   | 282/400 [02:53<01:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1245:  71%|███████   | 283/400 [02:54<01:12,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1295:  71%|███████   | 284/400 [02:54<01:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1264:  71%|███████▏  | 285/400 [02:55<01:11,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1242:  72%|███████▏  | 286/400 [02:56<01:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1379:  72%|███████▏  | 287/400 [02:56<01:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1344:  72%|███████▏  | 288/400 [02:57<01:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1413:  72%|███████▏  | 289/400 [02:58<01:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1372:  72%|███████▎  | 290/400 [02:58<01:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1361:  73%|███████▎  | 291/400 [02:59<01:07,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1329:  73%|███████▎  | 292/400 [02:59<01:05,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1243:  73%|███████▎  | 293/400 [03:00<01:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1173:  74%|███████▎  | 294/400 [03:01<01:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1117:  74%|███████▍  | 295/400 [03:01<01:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1153:  74%|███████▍  | 296/400 [03:02<01:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1096:  74%|███████▍  | 297/400 [03:02<01:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1022:  74%|███████▍  | 298/400 [03:03<01:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1015:  75%|███████▍  | 299/400 [03:04<01:02,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0995:  75%|███████▌  | 300/400 [03:04<01:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0947:  75%|███████▌  | 301/400 [03:05<01:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.096:  76%|███████▌  | 302/400 [03:05<01:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0942:  76%|███████▌  | 303/400 [03:06<01:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0898:  76%|███████▌  | 304/400 [03:07<00:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.083:  76%|███████▋  | 305/400 [03:07<00:59,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0952:  76%|███████▋  | 306/400 [03:08<00:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0929:  77%|███████▋  | 307/400 [03:09<00:58,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0904:  77%|███████▋  | 308/400 [03:09<00:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1001:  77%|███████▋  | 309/400 [03:10<00:56,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.102:  78%|███████▊  | 310/400 [03:10<00:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0964:  78%|███████▊  | 311/400 [03:11<00:55,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0998:  78%|███████▊  | 312/400 [03:12<00:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1009:  78%|███████▊  | 313/400 [03:12<00:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.0962:  78%|███████▊  | 314/400 [03:13<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1095:  79%|███████▉  | 315/400 [03:14<00:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1148:  79%|███████▉  | 316/400 [03:14<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1266:  79%|███████▉  | 317/400 [03:15<00:51,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1237:  80%|███████▉  | 318/400 [03:15<00:50,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.134:  80%|███████▉  | 319/400 [03:16<00:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1379:  80%|████████  | 320/400 [03:17<00:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1386:  80%|████████  | 321/400 [03:17<00:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.133:  80%|████████  | 322/400 [03:18<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1306:  81%|████████  | 323/400 [03:18<00:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1326:  81%|████████  | 324/400 [03:19<00:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.148:  81%|████████▏ | 325/400 [03:20<00:46,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.153:  82%|████████▏ | 326/400 [03:20<00:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1586:  82%|████████▏ | 327/400 [03:21<00:45,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1633:  82%|████████▏ | 328/400 [03:22<00:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.166:  82%|████████▏ | 329/400 [03:22<00:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1631:  82%|████████▎ | 330/400 [03:23<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1599:  83%|████████▎ | 331/400 [03:23<00:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1658:  83%|████████▎ | 332/400 [03:24<00:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1652:  83%|████████▎ | 333/400 [03:25<00:41,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1861:  84%|████████▎ | 334/400 [03:25<00:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1948:  84%|████████▍ | 335/400 [03:26<00:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2:  84%|████████▍ | 336/400 [03:26<00:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2071:  84%|████████▍ | 337/400 [03:27<00:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2075:  84%|████████▍ | 338/400 [03:28<00:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2163:  85%|████████▍ | 339/400 [03:28<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.216:  85%|████████▌ | 340/400 [03:29<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2274:  85%|████████▌ | 341/400 [03:30<00:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2325:  86%|████████▌ | 342/400 [03:30<00:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2559:  86%|████████▌ | 343/400 [03:31<00:35,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2508:  86%|████████▌ | 344/400 [03:31<00:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2486:  86%|████████▋ | 345/400 [03:32<00:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.243:  86%|████████▋ | 346/400 [03:33<00:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2349:  87%|████████▋ | 347/400 [03:33<00:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.241:  87%|████████▋ | 348/400 [03:34<00:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2446:  87%|████████▋ | 349/400 [03:35<00:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2443:  88%|████████▊ | 350/400 [03:35<00:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2383:  88%|████████▊ | 351/400 [03:36<00:30,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2411:  88%|████████▊ | 352/400 [03:36<00:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2389:  88%|████████▊ | 353/400 [03:37<00:29,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2418:  88%|████████▊ | 354/400 [03:38<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2454:  89%|████████▉ | 355/400 [03:38<00:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2403:  89%|████████▉ | 356/400 [03:39<00:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2453:  89%|████████▉ | 357/400 [03:39<00:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2387:  90%|████████▉ | 358/400 [03:40<00:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2373:  90%|████████▉ | 359/400 [03:41<00:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2275:  90%|█████████ | 360/400 [03:41<00:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2237:  90%|█████████ | 361/400 [03:42<00:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2227:  90%|█████████ | 362/400 [03:43<00:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2228:  91%|█████████ | 363/400 [03:43<00:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2158:  91%|█████████ | 364/400 [03:44<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2229:  91%|█████████▏| 365/400 [03:44<00:22,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2205:  92%|█████████▏| 366/400 [03:45<00:21,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2102:  92%|█████████▏| 367/400 [03:46<00:20,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.2016:  92%|█████████▏| 368/400 [03:46<00:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1973:  92%|█████████▏| 369/400 [03:47<00:19,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1878:  92%|█████████▎| 370/400 [03:48<00:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1823:  93%|█████████▎| 371/400 [03:48<00:18,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1722:  93%|█████████▎| 372/400 [03:49<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1659:  93%|█████████▎| 373/400 [03:49<00:16,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1743:  94%|█████████▎| 374/400 [03:50<00:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1702:  94%|█████████▍| 375/400 [03:51<00:15,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1712:  94%|█████████▍| 376/400 [03:51<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1695:  94%|█████████▍| 377/400 [03:52<00:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1696:  94%|█████████▍| 378/400 [03:52<00:13,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1751:  95%|█████████▍| 379/400 [03:53<00:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1778:  95%|█████████▌| 380/400 [03:54<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1711:  95%|█████████▌| 381/400 [03:54<00:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1765:  96%|█████████▌| 382/400 [03:55<00:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1696:  96%|█████████▌| 383/400 [03:56<00:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1707:  96%|█████████▌| 384/400 [03:56<00:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1623:  96%|█████████▋| 385/400 [03:57<00:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1567:  96%|█████████▋| 386/400 [03:57<00:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1535:  97%|█████████▋| 387/400 [03:58<00:08,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1493:  97%|█████████▋| 388/400 [03:59<00:07,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1511:  97%|█████████▋| 389/400 [03:59<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.148:  98%|█████████▊| 390/400 [04:00<00:06,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1517:  98%|█████████▊| 391/400 [04:00<00:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1662:  98%|█████████▊| 392/400 [04:01<00:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1704:  98%|█████████▊| 393/400 [04:02<00:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1682:  98%|█████████▊| 394/400 [04:02<00:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1794:  99%|█████████▉| 395/400 [04:03<00:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1742:  99%|█████████▉| 396/400 [04:03<00:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1686:  99%|█████████▉| 397/400 [04:04<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.177: 100%|█████████▉| 398/400 [04:05<00:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1706: 100%|█████████▉| 399/400 [04:05<00:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 4, train loss: 1.1647: 100%|██████████| 400/400 [04:06<00:00,  1.62it/s]
epoch: 4, valid loss: 2.0763:   1%|          | 2/201 [00:00<00:25,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0903:   2%|▏         | 4/201 [00:00<00:26,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0783:   3%|▎         | 6/201 [00:00<00:26,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0489:   4%|▍         | 8/201 [00:01<00:25,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0339:   5%|▍         | 10/201 [00:01<00:24,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0101:   6%|▌         | 12/201 [00:01<00:24,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0485:   7%|▋         | 14/201 [00:01<00:24,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0885:   8%|▊         | 16/201 [00:02<00:24,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.099:   9%|▉         | 18/201 [00:02<00:24,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0962:  10%|▉         | 20/201 [00:02<00:24,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0641:  11%|█         | 22/201 [00:02<00:23,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0705:  12%|█▏        | 24/201 [00:03<00:23,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0784:  13%|█▎        | 26/201 [00:03<00:23,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0919:  14%|█▍        | 28/201 [00:03<00:23,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0776:  15%|█▍        | 30/201 [00:03<00:22,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0578:  16%|█▌        | 32/201 [00:04<00:22,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0499:  17%|█▋        | 34/201 [00:04<00:22,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0163:  18%|█▊        | 36/201 [00:04<00:21,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0153:  19%|█▉        | 38/201 [00:05<00:21,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0289:  20%|█▉        | 40/201 [00:05<00:20,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0225:  21%|██        | 42/201 [00:05<00:20,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9993:  22%|██▏       | 44/201 [00:05<00:20,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.994:  23%|██▎       | 46/201 [00:06<00:20,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9649:  24%|██▍       | 48/201 [00:06<00:19,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9656:  25%|██▍       | 50/201 [00:06<00:19,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9403:  26%|██▌       | 52/201 [00:06<00:19,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9761:  27%|██▋       | 54/201 [00:07<00:19,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9719:  28%|██▊       | 56/201 [00:07<00:18,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0237:  29%|██▉       | 58/201 [00:07<00:18,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0377:  30%|██▉       | 60/201 [00:07<00:18,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.024:  31%|███       | 62/201 [00:08<00:18,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0055:  32%|███▏      | 64/201 [00:08<00:17,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0108:  33%|███▎      | 66/201 [00:08<00:17,  7.76it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0163:  34%|███▍      | 68/201 [00:08<00:17,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0211:  35%|███▍      | 70/201 [00:09<00:17,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0401:  36%|███▌      | 72/201 [00:09<00:17,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0154:  37%|███▋      | 74/201 [00:09<00:16,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9976:  38%|███▊      | 76/201 [00:09<00:16,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0056:  39%|███▉      | 78/201 [00:10<00:15,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9849:  40%|███▉      | 80/201 [00:10<00:15,  7.79it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9777:  41%|████      | 82/201 [00:10<00:15,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9866:  42%|████▏     | 84/201 [00:11<00:15,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9751:  43%|████▎     | 86/201 [00:11<00:14,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9944:  44%|████▍     | 88/201 [00:11<00:15,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9675:  45%|████▍     | 90/201 [00:11<00:14,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9402:  46%|████▌     | 92/201 [00:12<00:14,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9408:  47%|████▋     | 94/201 [00:12<00:14,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9281:  48%|████▊     | 96/201 [00:12<00:13,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9218:  49%|████▉     | 98/201 [00:12<00:13,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9204:  50%|████▉     | 100/201 [00:13<00:13,  7.77it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.905:  51%|█████     | 102/201 [00:13<00:13,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.8918:  52%|█████▏    | 104/201 [00:13<00:12,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.8979:  53%|█████▎    | 106/201 [00:13<00:12,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9026:  54%|█████▎    | 108/201 [00:14<00:12,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.8923:  55%|█████▍    | 110/201 [00:14<00:11,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.8856:  56%|█████▌    | 112/201 [00:14<00:11,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9371:  57%|█████▋    | 114/201 [00:14<00:11,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9304:  58%|█████▊    | 116/201 [00:15<00:10,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9394:  59%|█████▊    | 118/201 [00:15<00:10,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9119:  60%|█████▉    | 120/201 [00:15<00:10,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9425:  61%|██████    | 122/201 [00:16<00:10,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9358:  62%|██████▏   | 124/201 [00:16<00:10,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9356:  63%|██████▎   | 126/201 [00:16<00:09,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9626:  64%|██████▎   | 128/201 [00:16<00:09,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9464:  65%|██████▍   | 130/201 [00:17<00:09,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9285:  66%|██████▌   | 132/201 [00:17<00:09,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9488:  67%|██████▋   | 134/201 [00:17<00:08,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9358:  68%|██████▊   | 136/201 [00:17<00:08,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9283:  69%|██████▊   | 138/201 [00:18<00:08,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9291:  70%|██████▉   | 140/201 [00:18<00:08,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.8977:  71%|███████   | 142/201 [00:18<00:07,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9169:  72%|███████▏  | 144/201 [00:18<00:07,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9135:  73%|███████▎  | 146/201 [00:19<00:07,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.8886:  74%|███████▎  | 148/201 [00:19<00:06,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.8977:  75%|███████▍  | 150/201 [00:19<00:06,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.8925:  76%|███████▌  | 152/201 [00:19<00:06,  7.75it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9026:  77%|███████▋  | 154/201 [00:20<00:06,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9093:  78%|███████▊  | 156/201 [00:20<00:05,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.8958:  79%|███████▊  | 158/201 [00:20<00:05,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9419:  80%|███████▉  | 160/201 [00:21<00:05,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9437:  81%|████████  | 162/201 [00:21<00:05,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9418:  82%|████████▏ | 164/201 [00:21<00:04,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.946:  83%|████████▎ | 166/201 [00:21<00:04,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9412:  84%|████████▎ | 168/201 [00:22<00:04,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9542:  85%|████████▍ | 170/201 [00:22<00:04,  7.52it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.937:  86%|████████▌ | 172/201 [00:22<00:03,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9442:  87%|████████▋ | 174/201 [00:22<00:03,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9463:  88%|████████▊ | 176/201 [00:23<00:03,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9417:  89%|████████▊ | 178/201 [00:23<00:03,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.971:  90%|████████▉ | 180/201 [00:23<00:02,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9795:  91%|█████████ | 182/201 [00:23<00:02,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0018:  92%|█████████▏| 184/201 [00:24<00:02,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0175:  93%|█████████▎| 186/201 [00:24<00:01,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0135:  94%|█████████▎| 188/201 [00:24<00:01,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.003:  95%|█████████▍| 190/201 [00:24<00:01,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 1.9876:  96%|█████████▌| 192/201 [00:25<00:01,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0045:  97%|█████████▋| 194/201 [00:25<00:00,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0194:  98%|█████████▊| 196/201 [00:25<00:00,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0113:  99%|█████████▊| 198/201 [00:26<00:00,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0091: 100%|█████████▉| 200/201 [00:26<00:00,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 4, valid loss: 2.0096: 100%|██████████| 201/201 [00:26<00:00,  7.61it/s]


torch.Size([2, 5])
torch.Size([2, 6])
[{'results_1': [{'prediction': 3, 'label': 0}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 1, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 4, 'label': 4}, {'prediction': 0, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 1}, {'prediction': 1, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 3, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 0, 'label': 0}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 4, 'label': 4}]}, {'results_1': [{'predicti

  0%|          | 0/400 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 2.108:   0%|          | 1/400 [00:00<04:09,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 2.1005:   0%|          | 2/400 [00:01<04:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 2.0859:   1%|          | 3/400 [00:01<04:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 2.0719:   1%|          | 4/400 [00:02<04:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 2.0593:   1%|▏         | 5/400 [00:03<04:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 2.0438:   2%|▏         | 6/400 [00:03<04:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 2.032:   2%|▏         | 7/400 [00:04<04:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 2.0181:   2%|▏         | 8/400 [00:04<03:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 2.0062:   2%|▏         | 9/400 [00:05<04:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.9921:   2%|▎         | 10/400 [00:06<03:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.9828:   3%|▎         | 11/400 [00:06<03:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.9673:   3%|▎         | 12/400 [00:07<03:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.958:   3%|▎         | 13/400 [00:07<03:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.9409:   4%|▎         | 14/400 [00:08<03:53,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.9269:   4%|▍         | 15/400 [00:09<03:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.929:   4%|▍         | 16/400 [00:09<03:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.9217:   4%|▍         | 17/400 [00:10<03:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.9108:   4%|▍         | 18/400 [00:11<03:51,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.9023:   5%|▍         | 19/400 [00:11<03:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.8849:   5%|▌         | 20/400 [00:12<03:50,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.8734:   5%|▌         | 21/400 [00:12<03:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.8659:   6%|▌         | 22/400 [00:13<03:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.8684:   6%|▌         | 23/400 [00:14<03:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.857:   6%|▌         | 24/400 [00:14<03:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.8617:   6%|▋         | 25/400 [00:15<03:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.8549:   6%|▋         | 26/400 [00:15<03:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.845:   7%|▋         | 27/400 [00:16<03:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.8346:   7%|▋         | 28/400 [00:17<03:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.8245:   7%|▋         | 29/400 [00:17<03:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.81:   8%|▊         | 30/400 [00:18<03:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.7973:   8%|▊         | 31/400 [00:19<03:48,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.7851:   8%|▊         | 32/400 [00:19<03:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.772:   8%|▊         | 33/400 [00:20<03:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.7573:   8%|▊         | 34/400 [00:20<03:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.7463:   9%|▉         | 35/400 [00:21<03:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.7305:   9%|▉         | 36/400 [00:22<03:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.7183:   9%|▉         | 37/400 [00:22<03:44,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.708:  10%|▉         | 38/400 [00:23<03:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.7076:  10%|▉         | 39/400 [00:23<03:43,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.697:  10%|█         | 40/400 [00:24<03:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6935:  10%|█         | 41/400 [00:25<03:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6881:  10%|█         | 42/400 [00:25<03:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6862:  11%|█         | 43/400 [00:26<03:41,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6788:  11%|█         | 44/400 [00:27<03:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6737:  11%|█▏        | 45/400 [00:27<03:42,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6608:  12%|█▏        | 46/400 [00:28<03:41,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6467:  12%|█▏        | 47/400 [00:29<03:43,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6354:  12%|█▏        | 48/400 [00:29<03:39,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6251:  12%|█▏        | 49/400 [00:30<03:41,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6167:  12%|█▎        | 50/400 [00:30<03:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6135:  13%|█▎        | 51/400 [00:31<03:38,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.6027:  13%|█▎        | 52/400 [00:32<03:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5911:  13%|█▎        | 53/400 [00:32<03:36,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5923:  14%|█▎        | 54/400 [00:33<03:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5875:  14%|█▍        | 55/400 [00:33<03:36,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5839:  14%|█▍        | 56/400 [00:34<03:34,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5702:  14%|█▍        | 57/400 [00:35<03:35,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5639:  14%|█▍        | 58/400 [00:35<03:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5525:  15%|█▍        | 59/400 [00:36<03:34,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5438:  15%|█▌        | 60/400 [00:37<03:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5421:  15%|█▌        | 61/400 [00:37<03:32,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5292:  16%|█▌        | 62/400 [00:38<03:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5179:  16%|█▌        | 63/400 [00:38<03:32,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5085:  16%|█▌        | 64/400 [00:39<03:28,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.5011:  16%|█▋        | 65/400 [00:40<03:29,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4912:  16%|█▋        | 66/400 [00:40<03:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4824:  17%|█▋        | 67/400 [00:41<03:31,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4706:  17%|█▋        | 68/400 [00:42<03:26,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4627:  17%|█▋        | 69/400 [00:42<03:27,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4556:  18%|█▊        | 70/400 [00:43<03:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4452:  18%|█▊        | 71/400 [00:43<03:25,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4354:  18%|█▊        | 72/400 [00:44<03:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4314:  18%|█▊        | 73/400 [00:45<03:23,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4246:  18%|█▊        | 74/400 [00:45<03:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4143:  19%|█▉        | 75/400 [00:46<03:21,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4067:  19%|█▉        | 76/400 [00:47<03:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4033:  19%|█▉        | 77/400 [00:47<03:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3996:  20%|█▉        | 78/400 [00:48<03:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4018:  20%|█▉        | 79/400 [00:48<03:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.4016:  20%|██        | 80/400 [00:49<03:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3896:  20%|██        | 81/400 [00:50<03:17,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.383:  20%|██        | 82/400 [00:50<03:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3723:  21%|██        | 83/400 [00:51<03:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3691:  21%|██        | 84/400 [00:51<03:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3665:  21%|██▏       | 85/400 [00:52<03:15,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3634:  22%|██▏       | 86/400 [00:53<03:16,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3538:  22%|██▏       | 87/400 [00:53<03:16,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3594:  22%|██▏       | 88/400 [00:54<03:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3504:  22%|██▏       | 89/400 [00:55<03:13,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3499:  22%|██▎       | 90/400 [00:55<03:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3396:  23%|██▎       | 91/400 [00:56<03:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3296:  23%|██▎       | 92/400 [00:56<03:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3202:  23%|██▎       | 93/400 [00:57<03:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3236:  24%|██▎       | 94/400 [00:58<03:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3265:  24%|██▍       | 95/400 [00:58<03:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3274:  24%|██▍       | 96/400 [00:59<03:05,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3243:  24%|██▍       | 97/400 [00:59<03:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3215:  24%|██▍       | 98/400 [01:00<03:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3156:  25%|██▍       | 99/400 [01:01<03:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3126:  25%|██▌       | 100/400 [01:01<03:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3049:  25%|██▌       | 101/400 [01:02<03:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3163:  26%|██▌       | 102/400 [01:03<03:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3113:  26%|██▌       | 103/400 [01:03<03:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3118:  26%|██▌       | 104/400 [01:04<02:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3188:  26%|██▋       | 105/400 [01:04<03:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3143:  26%|██▋       | 106/400 [01:05<02:58,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3151:  27%|██▋       | 107/400 [01:06<03:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3196:  27%|██▋       | 108/400 [01:06<02:58,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3136:  27%|██▋       | 109/400 [01:07<03:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3063:  28%|██▊       | 110/400 [01:07<02:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.3005:  28%|██▊       | 111/400 [01:08<02:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.292:  28%|██▊       | 112/400 [01:09<02:55,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2904:  28%|██▊       | 113/400 [01:09<02:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2825:  28%|██▊       | 114/400 [01:10<02:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2731:  29%|██▉       | 115/400 [01:11<02:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2701:  29%|██▉       | 116/400 [01:11<02:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2628:  29%|██▉       | 117/400 [01:12<02:56,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.264:  30%|██▉       | 118/400 [01:12<02:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2632:  30%|██▉       | 119/400 [01:13<02:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2807:  30%|███       | 120/400 [01:14<02:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2798:  30%|███       | 121/400 [01:14<02:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.273:  30%|███       | 122/400 [01:15<02:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2718:  31%|███       | 123/400 [01:15<02:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2616:  31%|███       | 124/400 [01:16<02:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2565:  31%|███▏      | 125/400 [01:17<02:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2547:  32%|███▏      | 126/400 [01:17<02:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2472:  32%|███▏      | 127/400 [01:18<02:50,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2448:  32%|███▏      | 128/400 [01:19<02:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2353:  32%|███▏      | 129/400 [01:19<02:49,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2256:  32%|███▎      | 130/400 [01:20<02:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2156:  33%|███▎      | 131/400 [01:20<02:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2098:  33%|███▎      | 132/400 [01:21<02:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2099:  33%|███▎      | 133/400 [01:22<02:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.207:  34%|███▎      | 134/400 [01:22<02:41,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1996:  34%|███▍      | 135/400 [01:23<02:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1984:  34%|███▍      | 136/400 [01:23<02:40,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1993:  34%|███▍      | 137/400 [01:24<02:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2029:  34%|███▍      | 138/400 [01:25<02:38,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1935:  35%|███▍      | 139/400 [01:25<02:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2082:  35%|███▌      | 140/400 [01:26<02:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2073:  35%|███▌      | 141/400 [01:27<02:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2078:  36%|███▌      | 142/400 [01:27<02:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2157:  36%|███▌      | 143/400 [01:28<02:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2149:  36%|███▌      | 144/400 [01:28<02:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2086:  36%|███▋      | 145/400 [01:29<02:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.205:  36%|███▋      | 146/400 [01:30<02:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1966:  37%|███▋      | 147/400 [01:30<02:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2061:  37%|███▋      | 148/400 [01:31<02:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2081:  37%|███▋      | 149/400 [01:31<02:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1997:  38%|███▊      | 150/400 [01:32<02:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.2041:  38%|███▊      | 151/400 [01:33<02:35,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1947:  38%|███▊      | 152/400 [01:33<02:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1851:  38%|███▊      | 153/400 [01:34<02:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1813:  38%|███▊      | 154/400 [01:35<02:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1719:  39%|███▉      | 155/400 [01:35<02:34,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1702:  39%|███▉      | 156/400 [01:36<02:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1791:  39%|███▉      | 157/400 [01:36<02:33,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1779:  40%|███▉      | 158/400 [01:37<02:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1701:  40%|███▉      | 159/400 [01:38<02:30,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1693:  40%|████      | 160/400 [01:38<02:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1673:  40%|████      | 161/400 [01:39<02:29,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1617:  40%|████      | 162/400 [01:40<02:28,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1552:  41%|████      | 163/400 [01:40<02:29,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.152:  41%|████      | 164/400 [01:41<02:27,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.147:  41%|████▏     | 165/400 [01:41<02:30,  1.57it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1399:  42%|████▏     | 166/400 [01:42<02:29,  1.57it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1409:  42%|████▏     | 167/400 [01:43<02:28,  1.56it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1335:  42%|████▏     | 168/400 [01:43<02:25,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1294:  42%|████▏     | 169/400 [01:44<02:25,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1335:  42%|████▎     | 170/400 [01:45<02:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1308:  43%|████▎     | 171/400 [01:45<02:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1434:  43%|████▎     | 172/400 [01:46<02:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1448:  43%|████▎     | 173/400 [01:46<02:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1454:  44%|████▎     | 174/400 [01:47<02:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1444:  44%|████▍     | 175/400 [01:48<02:20,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1392:  44%|████▍     | 176/400 [01:48<02:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1479:  44%|████▍     | 177/400 [01:49<02:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1421:  44%|████▍     | 178/400 [01:50<02:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1371:  45%|████▍     | 179/400 [01:50<02:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1371:  45%|████▌     | 180/400 [01:51<02:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1338:  45%|████▌     | 181/400 [01:51<02:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.128:  46%|████▌     | 182/400 [01:52<02:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1249:  46%|████▌     | 183/400 [01:53<02:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1256:  46%|████▌     | 184/400 [01:53<02:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1264:  46%|████▋     | 185/400 [01:54<02:11,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1247:  46%|████▋     | 186/400 [01:54<02:08,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1357:  47%|████▋     | 187/400 [01:55<02:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.142:  47%|████▋     | 188/400 [01:56<02:08,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1327:  47%|████▋     | 189/400 [01:56<02:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1272:  48%|████▊     | 190/400 [01:57<02:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1191:  48%|████▊     | 191/400 [01:57<02:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1179:  48%|████▊     | 192/400 [01:58<02:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1102:  48%|████▊     | 193/400 [01:59<02:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1054:  48%|████▊     | 194/400 [01:59<02:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1058:  49%|████▉     | 195/400 [02:00<02:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.1001:  49%|████▉     | 196/400 [02:00<02:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0985:  49%|████▉     | 197/400 [02:01<02:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.094:  50%|████▉     | 198/400 [02:02<02:02,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0862:  50%|████▉     | 199/400 [02:02<02:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0847:  50%|█████     | 200/400 [02:03<02:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0756:  50%|█████     | 201/400 [02:04<02:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0673:  50%|█████     | 202/400 [02:04<01:59,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0636:  51%|█████     | 203/400 [02:05<02:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.058:  51%|█████     | 204/400 [02:05<01:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.059:  51%|█████▏    | 205/400 [02:06<01:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0524:  52%|█████▏    | 206/400 [02:07<01:57,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0493:  52%|█████▏    | 207/400 [02:07<01:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0458:  52%|█████▏    | 208/400 [02:08<01:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0444:  52%|█████▏    | 209/400 [02:08<01:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0385:  52%|█████▎    | 210/400 [02:09<01:55,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0307:  53%|█████▎    | 211/400 [02:10<01:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0231:  53%|█████▎    | 212/400 [02:10<01:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.017:  53%|█████▎    | 213/400 [02:11<01:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0137:  54%|█████▎    | 214/400 [02:11<01:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0071:  54%|█████▍    | 215/400 [02:12<01:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0107:  54%|█████▍    | 216/400 [02:13<01:51,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0036:  54%|█████▍    | 217/400 [02:13<01:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0053:  55%|█████▍    | 218/400 [02:14<01:50,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0042:  55%|█████▍    | 219/400 [02:15<01:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0001:  55%|█████▌    | 220/400 [02:15<01:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9958:  55%|█████▌    | 221/400 [02:16<01:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9924:  56%|█████▌    | 222/400 [02:16<01:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9905:  56%|█████▌    | 223/400 [02:17<01:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9876:  56%|█████▌    | 224/400 [02:18<01:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9863:  56%|█████▋    | 225/400 [02:18<01:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9975:  56%|█████▋    | 226/400 [02:19<01:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0075:  57%|█████▋    | 227/400 [02:19<01:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0115:  57%|█████▋    | 228/400 [02:20<01:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0092:  57%|█████▋    | 229/400 [02:21<01:46,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0056:  57%|█████▊    | 230/400 [02:21<01:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9991:  58%|█████▊    | 231/400 [02:22<01:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.996:  58%|█████▊    | 232/400 [02:23<01:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9983:  58%|█████▊    | 233/400 [02:23<01:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9946:  58%|█████▊    | 234/400 [02:24<01:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9968:  59%|█████▉    | 235/400 [02:24<01:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9982:  59%|█████▉    | 236/400 [02:25<01:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0115:  59%|█████▉    | 237/400 [02:26<01:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0041:  60%|█████▉    | 238/400 [02:26<01:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0081:  60%|█████▉    | 239/400 [02:27<01:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0005:  60%|██████    | 240/400 [02:27<01:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0013:  60%|██████    | 241/400 [02:28<01:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0251:  60%|██████    | 242/400 [02:29<01:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0181:  61%|██████    | 243/400 [02:29<01:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.01:  61%|██████    | 244/400 [02:30<01:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0017:  61%|██████▏   | 245/400 [02:31<01:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9963:  62%|██████▏   | 246/400 [02:31<01:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9972:  62%|██████▏   | 247/400 [02:32<01:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9933:  62%|██████▏   | 248/400 [02:32<01:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9943:  62%|██████▏   | 249/400 [02:33<01:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0029:  62%|██████▎   | 250/400 [02:34<01:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0134:  63%|██████▎   | 251/400 [02:34<01:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0058:  63%|██████▎   | 252/400 [02:35<01:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0:  63%|██████▎   | 253/400 [02:35<01:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9922:  64%|██████▎   | 254/400 [02:36<01:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9915:  64%|██████▍   | 255/400 [02:37<01:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.989:  64%|██████▍   | 256/400 [02:37<01:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9889:  64%|██████▍   | 257/400 [02:38<01:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9816:  64%|██████▍   | 258/400 [02:38<01:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9799:  65%|██████▍   | 259/400 [02:39<01:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9727:  65%|██████▌   | 260/400 [02:40<01:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.974:  65%|██████▌   | 261/400 [02:40<01:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9929:  66%|██████▌   | 262/400 [02:41<01:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.987:  66%|██████▌   | 263/400 [02:42<01:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9879:  66%|██████▌   | 264/400 [02:42<01:22,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9954:  66%|██████▋   | 265/400 [02:43<01:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.999:  66%|██████▋   | 266/400 [02:43<01:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9912:  67%|██████▋   | 267/400 [02:44<01:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9959:  67%|██████▋   | 268/400 [02:45<01:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9945:  67%|██████▋   | 269/400 [02:45<01:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9873:  68%|██████▊   | 270/400 [02:46<01:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9818:  68%|██████▊   | 271/400 [02:46<01:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9741:  68%|██████▊   | 272/400 [02:47<01:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9798:  68%|██████▊   | 273/400 [02:48<01:18,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9818:  68%|██████▊   | 274/400 [02:48<01:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9754:  69%|██████▉   | 275/400 [02:49<01:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9865:  69%|██████▉   | 276/400 [02:50<01:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9804:  69%|██████▉   | 277/400 [02:50<01:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9817:  70%|██████▉   | 278/400 [02:51<01:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9769:  70%|██████▉   | 279/400 [02:51<01:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9727:  70%|███████   | 280/400 [02:52<01:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9661:  70%|███████   | 281/400 [02:53<01:13,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9812:  70%|███████   | 282/400 [02:53<01:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9847:  71%|███████   | 283/400 [02:54<01:12,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9995:  71%|███████   | 284/400 [02:54<01:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9999:  71%|███████▏  | 285/400 [02:55<01:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9984:  72%|███████▏  | 286/400 [02:56<01:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0226:  72%|███████▏  | 287/400 [02:56<01:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0167:  72%|███████▏  | 288/400 [02:57<01:08,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0197:  72%|███████▏  | 289/400 [02:58<01:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0177:  72%|███████▎  | 290/400 [02:58<01:06,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0183:  73%|███████▎  | 291/400 [02:59<01:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0098:  73%|███████▎  | 292/400 [02:59<01:05,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0056:  73%|███████▎  | 293/400 [03:00<01:05,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0099:  74%|███████▎  | 294/400 [03:01<01:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0181:  74%|███████▍  | 295/400 [03:01<01:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0102:  74%|███████▍  | 296/400 [03:02<01:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0092:  74%|███████▍  | 297/400 [03:02<01:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0052:  74%|███████▍  | 298/400 [03:03<01:01,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0084:  75%|███████▍  | 299/400 [03:04<01:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0018:  75%|███████▌  | 300/400 [03:04<01:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0094:  75%|███████▌  | 301/400 [03:05<01:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.011:  76%|███████▌  | 302/400 [03:05<00:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0032:  76%|███████▌  | 303/400 [03:06<01:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0029:  76%|███████▌  | 304/400 [03:07<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 1.0001:  76%|███████▋  | 305/400 [03:07<00:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9928:  76%|███████▋  | 306/400 [03:08<00:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9883:  77%|███████▋  | 307/400 [03:09<00:57,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9886:  77%|███████▋  | 308/400 [03:09<00:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9873:  77%|███████▋  | 309/400 [03:10<00:56,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9823:  78%|███████▊  | 310/400 [03:10<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.981:  78%|███████▊  | 311/400 [03:11<00:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9785:  78%|███████▊  | 312/400 [03:12<00:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9771:  78%|███████▊  | 313/400 [03:12<00:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9764:  78%|███████▊  | 314/400 [03:13<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9809:  79%|███████▉  | 315/400 [03:13<00:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9744:  79%|███████▉  | 316/400 [03:14<00:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9782:  79%|███████▉  | 317/400 [03:15<00:51,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9766:  80%|███████▉  | 318/400 [03:15<00:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.972:  80%|███████▉  | 319/400 [03:16<00:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9684:  80%|████████  | 320/400 [03:17<00:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9634:  80%|████████  | 321/400 [03:17<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.984:  80%|████████  | 322/400 [03:18<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9841:  81%|████████  | 323/400 [03:18<00:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9797:  81%|████████  | 324/400 [03:19<00:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.979:  81%|████████▏ | 325/400 [03:20<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9745:  82%|████████▏ | 326/400 [03:20<00:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9885:  82%|████████▏ | 327/400 [03:21<00:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9962:  82%|████████▏ | 328/400 [03:21<00:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.996:  82%|████████▏ | 329/400 [03:22<00:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9916:  82%|████████▎ | 330/400 [03:23<00:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9856:  83%|████████▎ | 331/400 [03:23<00:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9917:  83%|████████▎ | 332/400 [03:24<00:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9872:  83%|████████▎ | 333/400 [03:25<00:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9873:  84%|████████▎ | 334/400 [03:25<00:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9911:  84%|████████▍ | 335/400 [03:26<00:40,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9832:  84%|████████▍ | 336/400 [03:26<00:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9785:  84%|████████▍ | 337/400 [03:27<00:39,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9921:  84%|████████▍ | 338/400 [03:28<00:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.996:  85%|████████▍ | 339/400 [03:28<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.993:  85%|████████▌ | 340/400 [03:29<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9897:  85%|████████▌ | 341/400 [03:29<00:36,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.982:  86%|████████▌ | 342/400 [03:30<00:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9847:  86%|████████▌ | 343/400 [03:31<00:35,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9774:  86%|████████▌ | 344/400 [03:31<00:34,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9727:  86%|████████▋ | 345/400 [03:32<00:34,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9667:  86%|████████▋ | 346/400 [03:33<00:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9724:  87%|████████▋ | 347/400 [03:33<00:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9662:  87%|████████▋ | 348/400 [03:34<00:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9585:  87%|████████▋ | 349/400 [03:34<00:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9605:  88%|████████▊ | 350/400 [03:35<00:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9552:  88%|████████▊ | 351/400 [03:36<00:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.974:  88%|████████▊ | 352/400 [03:36<00:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9711:  88%|████████▊ | 353/400 [03:37<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9681:  88%|████████▊ | 354/400 [03:37<00:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.973:  89%|████████▉ | 355/400 [03:38<00:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9721:  89%|████████▉ | 356/400 [03:39<00:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9751:  89%|████████▉ | 357/400 [03:39<00:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9788:  90%|████████▉ | 358/400 [03:40<00:25,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9729:  90%|████████▉ | 359/400 [03:41<00:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9674:  90%|█████████ | 360/400 [03:41<00:24,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9709:  90%|█████████ | 361/400 [03:42<00:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9675:  90%|█████████ | 362/400 [03:42<00:22,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9664:  91%|█████████ | 363/400 [03:43<00:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9662:  91%|█████████ | 364/400 [03:44<00:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9591:  91%|█████████▏| 365/400 [03:44<00:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9553:  92%|█████████▏| 366/400 [03:45<00:20,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9685:  92%|█████████▏| 367/400 [03:45<00:20,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9741:  92%|█████████▏| 368/400 [03:46<00:19,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9696:  92%|█████████▏| 369/400 [03:47<00:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9633:  92%|█████████▎| 370/400 [03:47<00:18,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9621:  93%|█████████▎| 371/400 [03:48<00:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9543:  93%|█████████▎| 372/400 [03:48<00:16,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9642:  93%|█████████▎| 373/400 [03:49<00:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.967:  94%|█████████▎| 374/400 [03:50<00:15,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9622:  94%|█████████▍| 375/400 [03:50<00:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9616:  94%|█████████▍| 376/400 [03:51<00:14,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9559:  94%|█████████▍| 377/400 [03:51<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.951:  94%|█████████▍| 378/400 [03:52<00:13,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9479:  95%|█████████▍| 379/400 [03:53<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9406:  95%|█████████▌| 380/400 [03:53<00:12,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9371:  95%|█████████▌| 381/400 [03:54<00:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9294:  96%|█████████▌| 382/400 [03:54<00:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9229:  96%|█████████▌| 383/400 [03:55<00:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9234:  96%|█████████▌| 384/400 [03:56<00:09,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9185:  96%|█████████▋| 385/400 [03:56<00:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9146:  96%|█████████▋| 386/400 [03:57<00:08,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9154:  97%|█████████▋| 387/400 [03:58<00:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9207:  97%|█████████▋| 388/400 [03:58<00:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9179:  97%|█████████▋| 389/400 [03:59<00:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9316:  98%|█████████▊| 390/400 [03:59<00:06,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9397:  98%|█████████▊| 391/400 [04:00<00:05,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9488:  98%|█████████▊| 392/400 [04:01<00:04,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9444:  98%|█████████▊| 393/400 [04:01<00:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9522:  98%|█████████▊| 394/400 [04:02<00:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9515:  99%|█████████▉| 395/400 [04:02<00:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.949:  99%|█████████▉| 396/400 [04:03<00:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9479:  99%|█████████▉| 397/400 [04:04<00:01,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9532: 100%|█████████▉| 398/400 [04:04<00:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9561: 100%|█████████▉| 399/400 [04:05<00:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 5, train loss: 0.9583: 100%|██████████| 400/400 [04:06<00:00,  1.63it/s]
epoch: 5, valid loss: 6.4492:   1%|          | 2/201 [00:00<00:26,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 6.4049:   2%|▏         | 4/201 [00:00<00:25,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 6.3042:   3%|▎         | 6/201 [00:00<00:25,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 6.1958:   4%|▍         | 8/201 [00:01<00:25,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 6.0966:   5%|▍         | 10/201 [00:01<00:24,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 6.0058:   6%|▌         | 12/201 [00:01<00:25,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.9873:   7%|▋         | 14/201 [00:01<00:25,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.954:   8%|▊         | 16/201 [00:02<00:24,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.8994:   9%|▉         | 18/201 [00:02<00:23,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.8089:  10%|▉         | 20/201 [00:02<00:23,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.7002:  11%|█         | 22/201 [00:02<00:23,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.6112:  12%|█▏        | 24/201 [00:03<00:23,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.5657:  13%|█▎        | 26/201 [00:03<00:23,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.5105:  14%|█▍        | 28/201 [00:03<00:22,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.428:  15%|█▍        | 30/201 [00:03<00:22,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.358:  16%|█▌        | 32/201 [00:04<00:21,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.3024:  17%|█▋        | 34/201 [00:04<00:21,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.2074:  18%|█▊        | 36/201 [00:04<00:21,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.1299:  19%|█▉        | 38/201 [00:05<00:21,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.0754:  20%|█▉        | 40/201 [00:05<00:21,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 5.0314:  21%|██        | 42/201 [00:05<00:20,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.9487:  22%|██▏       | 44/201 [00:05<00:20,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.8856:  23%|██▎       | 46/201 [00:06<00:20,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.7965:  24%|██▍       | 48/201 [00:06<00:19,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.7476:  25%|██▍       | 50/201 [00:06<00:19,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.6994:  26%|██▌       | 52/201 [00:06<00:19,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.7266:  27%|██▋       | 54/201 [00:07<00:19,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.6724:  28%|██▊       | 56/201 [00:07<00:18,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.6778:  29%|██▉       | 58/201 [00:07<00:18,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.6385:  30%|██▉       | 60/201 [00:07<00:18,  7.76it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.5558:  31%|███       | 62/201 [00:08<00:18,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.5114:  32%|███▏      | 64/201 [00:08<00:17,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.4478:  33%|███▎      | 66/201 [00:08<00:17,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.3973:  34%|███▍      | 68/201 [00:08<00:17,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.3488:  35%|███▍      | 70/201 [00:09<00:17,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.3352:  36%|███▌      | 72/201 [00:09<00:17,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.2729:  37%|███▋      | 74/201 [00:09<00:16,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.2122:  38%|███▊      | 76/201 [00:09<00:16,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.1899:  39%|███▉      | 78/201 [00:10<00:16,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.1413:  40%|███▉      | 80/201 [00:10<00:16,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.0853:  41%|████      | 82/201 [00:10<00:15,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.0664:  42%|████▏     | 84/201 [00:11<00:15,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.0154:  43%|████▎     | 86/201 [00:11<00:15,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 4.0271:  44%|████▍     | 88/201 [00:11<00:14,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.9598:  45%|████▍     | 90/201 [00:11<00:14,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.8893:  46%|████▌     | 92/201 [00:12<00:14,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.8674:  47%|████▋     | 94/201 [00:12<00:13,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.8077:  48%|████▊     | 96/201 [00:12<00:13,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.7615:  49%|████▉     | 98/201 [00:12<00:13,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.7529:  50%|████▉     | 100/201 [00:13<00:13,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.7067:  51%|█████     | 102/201 [00:13<00:12,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.6427:  52%|█████▏    | 104/201 [00:13<00:12,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.6166:  53%|█████▎    | 106/201 [00:13<00:12,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.5939:  54%|█████▎    | 108/201 [00:14<00:12,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.5412:  55%|█████▍    | 110/201 [00:14<00:12,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.5266:  56%|█████▌    | 112/201 [00:14<00:11,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.5364:  57%|█████▋    | 114/201 [00:15<00:11,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.4873:  58%|█████▊    | 116/201 [00:15<00:11,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.4884:  59%|█████▊    | 118/201 [00:15<00:10,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.4321:  60%|█████▉    | 120/201 [00:15<00:10,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.4431:  61%|██████    | 122/201 [00:16<00:10,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.4038:  62%|██████▏   | 124/201 [00:16<00:09,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.4097:  63%|██████▎   | 126/201 [00:16<00:09,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.4173:  64%|██████▎   | 128/201 [00:16<00:09,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.376:  65%|██████▍   | 130/201 [00:17<00:09,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.3396:  66%|██████▌   | 132/201 [00:17<00:08,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.3463:  67%|██████▋   | 134/201 [00:17<00:08,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.297:  68%|██████▊   | 136/201 [00:17<00:08,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.2747:  69%|██████▊   | 138/201 [00:18<00:08,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.2463:  70%|██████▉   | 140/201 [00:18<00:07,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.1878:  71%|███████   | 142/201 [00:18<00:07,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.206:  72%|███████▏  | 144/201 [00:18<00:07,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.179:  73%|███████▎  | 146/201 [00:19<00:07,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.1236:  74%|███████▎  | 148/201 [00:19<00:06,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.1109:  75%|███████▍  | 150/201 [00:19<00:06,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.0834:  76%|███████▌  | 152/201 [00:19<00:06,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.075:  77%|███████▋  | 154/201 [00:20<00:06,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.0531:  78%|███████▊  | 156/201 [00:20<00:05,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.0172:  79%|███████▊  | 158/201 [00:20<00:05,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.0314:  80%|███████▉  | 160/201 [00:21<00:05,  7.28it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 3.0225:  81%|████████  | 162/201 [00:21<00:05,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9884:  82%|████████▏ | 164/201 [00:21<00:04,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9768:  83%|████████▎ | 166/201 [00:21<00:04,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9698:  84%|████████▎ | 168/201 [00:22<00:04,  7.52it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9755:  85%|████████▍ | 170/201 [00:22<00:04,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9407:  86%|████████▌ | 172/201 [00:22<00:03,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9226:  87%|████████▋ | 174/201 [00:22<00:03,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9425:  88%|████████▊ | 176/201 [00:23<00:03,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9321:  89%|████████▊ | 178/201 [00:23<00:03,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9407:  90%|████████▉ | 180/201 [00:23<00:02,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9472:  91%|█████████ | 182/201 [00:23<00:02,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9597:  92%|█████████▏| 184/201 [00:24<00:02,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9553:  93%|█████████▎| 186/201 [00:24<00:01,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9612:  94%|█████████▎| 188/201 [00:24<00:01,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9342:  95%|█████████▍| 190/201 [00:25<00:01,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9051:  96%|█████████▌| 192/201 [00:25<00:01,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9251:  97%|█████████▋| 194/201 [00:25<00:00,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9279:  98%|█████████▊| 196/201 [00:25<00:00,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9047:  99%|█████████▊| 198/201 [00:26<00:00,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.88: 100%|█████████▉| 200/201 [00:26<00:00,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 5, valid loss: 2.9243: 100%|██████████| 201/201 [00:26<00:00,  7.60it/s]


torch.Size([2, 5])
torch.Size([2, 6])
[{'results_1': [{'prediction': 3, 'label': 0}, {'prediction': 3, 'label': 1}], 'results_2': [{'prediction': 4, 'label': 1}, {'prediction': 1, 'label': 0}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 4, 'label': 4}, {'prediction': 0, 'label': 3}]}, {'results_1': [{'prediction': 3, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 5, 'label': 3}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 3, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 5, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 3, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 0, 'label': 0}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 4, 'label': 4}]}, {'results_1': [{'predicti

  0%|          | 0/400 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.3655:   0%|          | 1/400 [00:00<04:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.368:   0%|          | 2/400 [00:01<03:59,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.3767:   1%|          | 3/400 [00:01<04:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.3778:   1%|          | 4/400 [00:02<03:59,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.3834:   1%|▏         | 5/400 [00:03<04:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.385:   2%|▏         | 6/400 [00:03<03:57,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.3911:   2%|▏         | 7/400 [00:04<03:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.3991:   2%|▏         | 8/400 [00:04<03:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.398:   2%|▏         | 9/400 [00:05<03:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.402:   2%|▎         | 10/400 [00:06<03:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4045:   3%|▎         | 11/400 [00:06<03:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4167:   3%|▎         | 12/400 [00:07<03:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4147:   3%|▎         | 13/400 [00:07<03:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4141:   4%|▎         | 14/400 [00:08<03:54,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4202:   4%|▍         | 15/400 [00:09<03:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4179:   4%|▍         | 16/400 [00:09<03:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4158:   4%|▍         | 17/400 [00:10<03:57,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4156:   4%|▍         | 18/400 [00:10<03:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4127:   5%|▍         | 19/400 [00:11<03:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4183:   5%|▌         | 20/400 [00:12<03:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.417:   5%|▌         | 21/400 [00:12<03:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4142:   6%|▌         | 22/400 [00:13<03:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4121:   6%|▌         | 23/400 [00:14<03:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4104:   6%|▌         | 24/400 [00:14<03:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4082:   6%|▋         | 25/400 [00:15<03:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.416:   6%|▋         | 26/400 [00:15<03:47,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4376:   7%|▋         | 27/400 [00:16<03:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4485:   7%|▋         | 28/400 [00:17<03:45,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4517:   7%|▋         | 29/400 [00:17<03:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4503:   8%|▊         | 30/400 [00:18<03:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4564:   8%|▊         | 31/400 [00:18<03:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4599:   8%|▊         | 32/400 [00:19<03:43,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4608:   8%|▊         | 33/400 [00:20<03:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4621:   8%|▊         | 34/400 [00:20<03:41,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4661:   9%|▉         | 35/400 [00:21<03:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4725:   9%|▉         | 36/400 [00:21<03:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4836:   9%|▉         | 37/400 [00:22<03:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4821:  10%|▉         | 38/400 [00:23<03:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.479:  10%|▉         | 39/400 [00:23<03:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4826:  10%|█         | 40/400 [00:24<03:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4818:  10%|█         | 41/400 [00:25<03:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4836:  10%|█         | 42/400 [00:25<03:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.489:  11%|█         | 43/400 [00:26<03:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4908:  11%|█         | 44/400 [00:26<03:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4889:  11%|█▏        | 45/400 [00:27<03:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4868:  12%|█▏        | 46/400 [00:28<03:34,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4876:  12%|█▏        | 47/400 [00:28<03:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4937:  12%|█▏        | 48/400 [00:29<03:32,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.495:  12%|█▏        | 49/400 [00:29<03:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4921:  12%|█▎        | 50/400 [00:30<03:31,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4973:  13%|█▎        | 51/400 [00:31<03:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4971:  13%|█▎        | 52/400 [00:31<03:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4971:  13%|█▎        | 53/400 [00:32<03:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4937:  14%|█▎        | 54/400 [00:32<03:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4998:  14%|█▍        | 55/400 [00:33<03:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4975:  14%|█▍        | 56/400 [00:34<03:28,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4968:  14%|█▍        | 57/400 [00:34<03:33,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4945:  14%|█▍        | 58/400 [00:35<03:33,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4911:  15%|█▍        | 59/400 [00:36<03:32,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4883:  15%|█▌        | 60/400 [00:36<03:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4875:  15%|█▌        | 61/400 [00:37<03:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4928:  16%|█▌        | 62/400 [00:37<03:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4955:  16%|█▌        | 63/400 [00:38<03:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4996:  16%|█▌        | 64/400 [00:39<03:23,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4987:  16%|█▋        | 65/400 [00:39<03:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5024:  16%|█▋        | 66/400 [00:40<03:22,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5:  17%|█▋        | 67/400 [00:40<03:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4987:  17%|█▋        | 68/400 [00:41<03:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5007:  17%|█▋        | 69/400 [00:42<03:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4991:  18%|█▊        | 70/400 [00:42<03:19,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4968:  18%|█▊        | 71/400 [00:43<03:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4939:  18%|█▊        | 72/400 [00:44<03:18,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4906:  18%|█▊        | 73/400 [00:44<03:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4885:  18%|█▊        | 74/400 [00:45<03:17,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4908:  19%|█▉        | 75/400 [00:45<03:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4957:  19%|█▉        | 76/400 [00:46<03:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4941:  19%|█▉        | 77/400 [00:47<03:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.4972:  20%|█▉        | 78/400 [00:47<03:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5041:  20%|█▉        | 79/400 [00:48<03:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5107:  20%|██        | 80/400 [00:48<03:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5095:  20%|██        | 81/400 [00:49<03:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5131:  20%|██        | 82/400 [00:50<03:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5098:  21%|██        | 83/400 [00:50<03:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5103:  21%|██        | 84/400 [00:51<03:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5099:  21%|██▏       | 85/400 [00:52<03:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.513:  22%|██▏       | 86/400 [00:52<03:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5119:  22%|██▏       | 87/400 [00:53<03:13,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5161:  22%|██▏       | 88/400 [00:53<03:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5234:  22%|██▏       | 89/400 [00:54<03:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5438:  22%|██▎       | 90/400 [00:55<03:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5401:  23%|██▎       | 91/400 [00:55<03:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5427:  23%|██▎       | 92/400 [00:56<03:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5416:  23%|██▎       | 93/400 [00:56<03:08,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5398:  24%|██▎       | 94/400 [00:57<03:05,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5376:  24%|██▍       | 95/400 [00:58<03:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5343:  24%|██▍       | 96/400 [00:58<03:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5369:  24%|██▍       | 97/400 [00:59<03:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5372:  24%|██▍       | 98/400 [00:59<03:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5331:  25%|██▍       | 99/400 [01:00<03:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5328:  25%|██▌       | 100/400 [01:01<03:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5373:  25%|██▌       | 101/400 [01:01<03:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5365:  26%|██▌       | 102/400 [01:02<03:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5417:  26%|██▌       | 103/400 [01:03<03:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5402:  26%|██▌       | 104/400 [01:03<02:58,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5367:  26%|██▋       | 105/400 [01:04<03:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5354:  26%|██▋       | 106/400 [01:04<02:57,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5359:  27%|██▋       | 107/400 [01:05<02:58,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5324:  27%|██▋       | 108/400 [01:06<02:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5425:  27%|██▋       | 109/400 [01:06<02:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5514:  28%|██▊       | 110/400 [01:07<02:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5542:  28%|██▊       | 111/400 [01:07<02:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5569:  28%|██▊       | 112/400 [01:08<02:55,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5586:  28%|██▊       | 113/400 [01:09<02:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5766:  28%|██▊       | 114/400 [01:09<02:52,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5754:  29%|██▉       | 115/400 [01:10<02:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5735:  29%|██▉       | 116/400 [01:10<02:51,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5744:  29%|██▉       | 117/400 [01:11<02:52,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5938:  30%|██▉       | 118/400 [01:12<02:49,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5945:  30%|██▉       | 119/400 [01:12<02:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6036:  30%|███       | 120/400 [01:13<02:50,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6035:  30%|███       | 121/400 [01:13<02:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6004:  30%|███       | 122/400 [01:14<02:47,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6022:  31%|███       | 123/400 [01:15<02:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.599:  31%|███       | 124/400 [01:15<02:45,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.601:  31%|███▏      | 125/400 [01:16<02:46,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5972:  32%|███▏      | 126/400 [01:16<02:44,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5972:  32%|███▏      | 127/400 [01:17<02:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6016:  32%|███▏      | 128/400 [01:18<02:43,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5975:  32%|███▏      | 129/400 [01:18<02:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5952:  32%|███▎      | 130/400 [01:19<02:43,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5958:  33%|███▎      | 131/400 [01:20<02:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5979:  33%|███▎      | 132/400 [01:20<02:41,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6012:  33%|███▎      | 133/400 [01:21<02:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5999:  34%|███▎      | 134/400 [01:21<02:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.596:  34%|███▍      | 135/400 [01:22<02:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5991:  34%|███▍      | 136/400 [01:23<02:40,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.599:  34%|███▍      | 137/400 [01:23<02:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5953:  34%|███▍      | 138/400 [01:24<02:38,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5913:  35%|███▍      | 139/400 [01:24<02:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5967:  35%|███▌      | 140/400 [01:25<02:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6008:  35%|███▌      | 141/400 [01:26<02:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6065:  36%|███▌      | 142/400 [01:26<02:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6067:  36%|███▌      | 143/400 [01:27<02:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.602:  36%|███▌      | 144/400 [01:27<02:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6027:  36%|███▋      | 145/400 [01:28<02:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6068:  36%|███▋      | 146/400 [01:29<02:34,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6072:  37%|███▋      | 147/400 [01:29<02:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6046:  37%|███▋      | 148/400 [01:30<02:32,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6045:  37%|███▋      | 149/400 [01:31<02:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6073:  38%|███▊      | 150/400 [01:31<02:30,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6124:  38%|███▊      | 151/400 [01:32<02:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6107:  38%|███▊      | 152/400 [01:32<02:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6124:  38%|███▊      | 153/400 [01:33<02:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6096:  38%|███▊      | 154/400 [01:34<02:28,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6077:  39%|███▉      | 155/400 [01:34<02:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6031:  39%|███▉      | 156/400 [01:35<02:27,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.611:  39%|███▉      | 157/400 [01:35<02:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.61:  40%|███▉      | 158/400 [01:36<02:26,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6104:  40%|███▉      | 159/400 [01:37<02:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6082:  40%|████      | 160/400 [01:37<02:24,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6051:  40%|████      | 161/400 [01:38<02:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6012:  40%|████      | 162/400 [01:38<02:24,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6003:  41%|████      | 163/400 [01:39<02:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5995:  41%|████      | 164/400 [01:40<02:22,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6002:  41%|████▏     | 165/400 [01:40<02:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5982:  42%|████▏     | 166/400 [01:41<02:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.597:  42%|████▏     | 167/400 [01:41<02:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.593:  42%|████▏     | 168/400 [01:42<02:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.589:  42%|████▏     | 169/400 [01:43<02:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5917:  42%|████▎     | 170/400 [01:43<02:19,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5903:  43%|████▎     | 171/400 [01:44<02:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5901:  43%|████▎     | 172/400 [01:45<02:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5984:  43%|████▎     | 173/400 [01:45<02:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.596:  44%|████▎     | 174/400 [01:46<02:17,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5912:  44%|████▍     | 175/400 [01:46<02:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5879:  44%|████▍     | 176/400 [01:47<02:16,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5927:  44%|████▍     | 177/400 [01:48<02:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5894:  44%|████▍     | 178/400 [01:48<02:14,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5906:  45%|████▍     | 179/400 [01:49<02:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5987:  45%|████▌     | 180/400 [01:49<02:13,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5945:  45%|████▌     | 181/400 [01:50<02:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5975:  46%|████▌     | 182/400 [01:51<02:12,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5933:  46%|████▌     | 183/400 [01:51<02:13,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5929:  46%|████▌     | 184/400 [01:52<02:11,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5938:  46%|████▋     | 185/400 [01:52<02:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5951:  46%|████▋     | 186/400 [01:53<02:09,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5913:  47%|████▋     | 187/400 [01:54<02:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5918:  47%|████▋     | 188/400 [01:54<02:08,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5887:  47%|████▋     | 189/400 [01:55<02:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.595:  48%|████▊     | 190/400 [01:55<02:06,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5916:  48%|████▊     | 191/400 [01:56<02:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.59:  48%|████▊     | 192/400 [01:57<02:05,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5912:  48%|████▊     | 193/400 [01:57<02:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6061:  48%|████▊     | 194/400 [01:58<02:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6101:  49%|████▉     | 195/400 [01:59<02:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6065:  49%|████▉     | 196/400 [01:59<02:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6043:  49%|████▉     | 197/400 [02:00<02:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6047:  50%|████▉     | 198/400 [02:00<02:01,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6029:  50%|████▉     | 199/400 [02:01<02:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6019:  50%|█████     | 200/400 [02:02<02:00,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5971:  50%|█████     | 201/400 [02:02<02:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5927:  50%|█████     | 202/400 [02:03<01:59,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5915:  51%|█████     | 203/400 [02:03<02:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5886:  51%|█████     | 204/400 [02:04<01:58,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5921:  51%|█████▏    | 205/400 [02:05<01:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5889:  52%|█████▏    | 206/400 [02:05<01:57,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5873:  52%|█████▏    | 207/400 [02:06<01:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5961:  52%|█████▏    | 208/400 [02:06<01:56,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.594:  52%|█████▏    | 209/400 [02:07<01:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6004:  52%|█████▎    | 210/400 [02:08<01:54,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6005:  53%|█████▎    | 211/400 [02:08<01:55,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5968:  53%|█████▎    | 212/400 [02:09<01:53,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5927:  53%|█████▎    | 213/400 [02:09<01:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5916:  54%|█████▎    | 214/400 [02:10<01:52,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5907:  54%|█████▍    | 215/400 [02:11<01:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5878:  54%|█████▍    | 216/400 [02:11<01:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6003:  54%|█████▍    | 217/400 [02:12<01:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5964:  55%|█████▍    | 218/400 [02:13<01:50,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5949:  55%|█████▍    | 219/400 [02:13<01:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5992:  55%|█████▌    | 220/400 [02:14<01:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.5956:  55%|█████▌    | 221/400 [02:14<01:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.608:  56%|█████▌    | 222/400 [02:15<01:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6056:  56%|█████▌    | 223/400 [02:16<01:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6027:  56%|█████▌    | 224/400 [02:16<01:46,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6087:  56%|█████▋    | 225/400 [02:17<01:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6075:  56%|█████▋    | 226/400 [02:17<01:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6067:  57%|█████▋    | 227/400 [02:18<01:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6076:  57%|█████▋    | 228/400 [02:19<01:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6073:  57%|█████▋    | 229/400 [02:19<01:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6112:  57%|█████▊    | 230/400 [02:20<01:43,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6079:  58%|█████▊    | 231/400 [02:20<01:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6038:  58%|█████▊    | 232/400 [02:21<01:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6031:  58%|█████▊    | 233/400 [02:22<01:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6019:  58%|█████▊    | 234/400 [02:22<01:40,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6202:  59%|█████▉    | 235/400 [02:23<01:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6247:  59%|█████▉    | 236/400 [02:24<01:39,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6248:  59%|█████▉    | 237/400 [02:24<01:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6208:  60%|█████▉    | 238/400 [02:25<01:38,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6178:  60%|█████▉    | 239/400 [02:25<01:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6133:  60%|██████    | 240/400 [02:26<01:36,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6182:  60%|██████    | 241/400 [02:27<01:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6171:  60%|██████    | 242/400 [02:27<01:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6139:  61%|██████    | 243/400 [02:28<01:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6115:  61%|██████    | 244/400 [02:28<01:34,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6073:  61%|██████▏   | 245/400 [02:29<01:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6067:  62%|██████▏   | 246/400 [02:30<01:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6017:  62%|██████▏   | 247/400 [02:30<01:34,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6147:  62%|██████▏   | 248/400 [02:31<01:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6172:  62%|██████▏   | 249/400 [02:31<01:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6197:  62%|██████▎   | 250/400 [02:32<01:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6158:  63%|██████▎   | 251/400 [02:33<01:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6247:  63%|██████▎   | 252/400 [02:33<01:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6199:  63%|██████▎   | 253/400 [02:34<01:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6294:  64%|██████▎   | 254/400 [02:35<01:28,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6285:  64%|██████▍   | 255/400 [02:35<01:29,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6338:  64%|██████▍   | 256/400 [02:36<01:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6358:  64%|██████▍   | 257/400 [02:36<01:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6323:  64%|██████▍   | 258/400 [02:37<01:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6336:  65%|██████▍   | 259/400 [02:38<01:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6298:  65%|██████▌   | 260/400 [02:38<01:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6261:  65%|██████▌   | 261/400 [02:39<01:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6242:  66%|██████▌   | 262/400 [02:39<01:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6288:  66%|██████▌   | 263/400 [02:40<01:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6277:  66%|██████▌   | 264/400 [02:41<01:22,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.629:  66%|██████▋   | 265/400 [02:41<01:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6316:  66%|██████▋   | 266/400 [02:42<01:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6371:  67%|██████▋   | 267/400 [02:43<01:22,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6347:  67%|██████▋   | 268/400 [02:43<01:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6314:  67%|██████▋   | 269/400 [02:44<01:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6263:  68%|██████▊   | 270/400 [02:44<01:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6219:  68%|██████▊   | 271/400 [02:45<01:20,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6172:  68%|██████▊   | 272/400 [02:46<01:18,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6124:  68%|██████▊   | 273/400 [02:46<01:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6089:  68%|██████▊   | 274/400 [02:47<01:16,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6204:  69%|██████▉   | 275/400 [02:47<01:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6197:  69%|██████▉   | 276/400 [02:48<01:15,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6239:  69%|██████▉   | 277/400 [02:49<01:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6256:  70%|██████▉   | 278/400 [02:49<01:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.625:  70%|██████▉   | 279/400 [02:50<01:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6215:  70%|███████   | 280/400 [02:50<01:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6187:  70%|███████   | 281/400 [02:51<01:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6214:  70%|███████   | 282/400 [02:52<01:11,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6195:  71%|███████   | 283/400 [02:52<01:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6193:  71%|███████   | 284/400 [02:53<01:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6148:  71%|███████▏  | 285/400 [02:54<01:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6132:  72%|███████▏  | 286/400 [02:54<01:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6089:  72%|███████▏  | 287/400 [02:55<01:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6101:  72%|███████▏  | 288/400 [02:55<01:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6073:  72%|███████▏  | 289/400 [02:56<01:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6125:  72%|███████▎  | 290/400 [02:57<01:06,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6078:  73%|███████▎  | 291/400 [02:57<01:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6125:  73%|███████▎  | 292/400 [02:58<01:05,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6137:  73%|███████▎  | 293/400 [02:58<01:05,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6122:  74%|███████▎  | 294/400 [02:59<01:04,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6153:  74%|███████▍  | 295/400 [03:00<01:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6205:  74%|███████▍  | 296/400 [03:00<01:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.616:  74%|███████▍  | 297/400 [03:01<01:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6244:  74%|███████▍  | 298/400 [03:01<01:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6236:  75%|███████▍  | 299/400 [03:02<01:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6448:  75%|███████▌  | 300/400 [03:03<01:00,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6496:  75%|███████▌  | 301/400 [03:03<01:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6473:  76%|███████▌  | 302/400 [03:04<00:59,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6476:  76%|███████▌  | 303/400 [03:04<00:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6441:  76%|███████▌  | 304/400 [03:05<00:58,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6466:  76%|███████▋  | 305/400 [03:06<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6433:  76%|███████▋  | 306/400 [03:06<00:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6548:  77%|███████▋  | 307/400 [03:07<00:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6572:  77%|███████▋  | 308/400 [03:08<00:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6529:  77%|███████▋  | 309/400 [03:08<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6561:  78%|███████▊  | 310/400 [03:09<00:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6547:  78%|███████▊  | 311/400 [03:09<00:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6619:  78%|███████▊  | 312/400 [03:10<00:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6578:  78%|███████▊  | 313/400 [03:11<00:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6596:  78%|███████▊  | 314/400 [03:11<00:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6731:  79%|███████▉  | 315/400 [03:12<00:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6676:  79%|███████▉  | 316/400 [03:12<00:50,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6642:  79%|███████▉  | 317/400 [03:13<00:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6641:  80%|███████▉  | 318/400 [03:14<00:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6777:  80%|███████▉  | 319/400 [03:14<00:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6766:  80%|████████  | 320/400 [03:15<00:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6781:  80%|████████  | 321/400 [03:16<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6729:  80%|████████  | 322/400 [03:16<00:47,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6723:  81%|████████  | 323/400 [03:17<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6761:  81%|████████  | 324/400 [03:17<00:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6778:  81%|████████▏ | 325/400 [03:18<00:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6735:  82%|████████▏ | 326/400 [03:19<00:44,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6805:  82%|████████▏ | 327/400 [03:19<00:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.676:  82%|████████▏ | 328/400 [03:20<00:43,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6733:  82%|████████▏ | 329/400 [03:20<00:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6705:  82%|████████▎ | 330/400 [03:21<00:42,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6692:  83%|████████▎ | 331/400 [03:22<00:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6668:  83%|████████▎ | 332/400 [03:22<00:41,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6648:  83%|████████▎ | 333/400 [03:23<00:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6592:  84%|████████▎ | 334/400 [03:23<00:40,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6611:  84%|████████▍ | 335/400 [03:24<00:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.656:  84%|████████▍ | 336/400 [03:25<00:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6505:  84%|████████▍ | 337/400 [03:25<00:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6468:  84%|████████▍ | 338/400 [03:26<00:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6413:  85%|████████▍ | 339/400 [03:26<00:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6378:  85%|████████▌ | 340/400 [03:27<00:36,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6497:  85%|████████▌ | 341/400 [03:28<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6531:  86%|████████▌ | 342/400 [03:28<00:35,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6522:  86%|████████▌ | 343/400 [03:29<00:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6511:  86%|████████▌ | 344/400 [03:30<00:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6592:  86%|████████▋ | 345/400 [03:30<00:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6629:  86%|████████▋ | 346/400 [03:31<00:32,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6602:  87%|████████▋ | 347/400 [03:31<00:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6588:  87%|████████▋ | 348/400 [03:32<00:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6583:  87%|████████▋ | 349/400 [03:33<00:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.66:  88%|████████▊ | 350/400 [03:33<00:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6633:  88%|████████▊ | 351/400 [03:34<00:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6619:  88%|████████▊ | 352/400 [03:34<00:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6587:  88%|████████▊ | 353/400 [03:35<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.657:  88%|████████▊ | 354/400 [03:36<00:27,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6629:  89%|████████▉ | 355/400 [03:36<00:27,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6587:  89%|████████▉ | 356/400 [03:37<00:26,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6582:  89%|████████▉ | 357/400 [03:37<00:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6545:  90%|████████▉ | 358/400 [03:38<00:25,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6526:  90%|████████▉ | 359/400 [03:39<00:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6478:  90%|█████████ | 360/400 [03:39<00:24,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.652:  90%|█████████ | 361/400 [03:40<00:23,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6546:  90%|█████████ | 362/400 [03:40<00:22,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6517:  91%|█████████ | 363/400 [03:41<00:22,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6517:  91%|█████████ | 364/400 [03:42<00:21,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6477:  91%|█████████▏| 365/400 [03:42<00:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6491:  92%|█████████▏| 366/400 [03:43<00:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6458:  92%|█████████▏| 367/400 [03:44<00:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6429:  92%|█████████▏| 368/400 [03:44<00:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6473:  92%|█████████▏| 369/400 [03:45<00:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6541:  92%|█████████▎| 370/400 [03:45<00:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6576:  93%|█████████▎| 371/400 [03:46<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6593:  93%|█████████▎| 372/400 [03:47<00:16,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6746:  93%|█████████▎| 373/400 [03:47<00:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6888:  94%|█████████▎| 374/400 [03:48<00:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6841:  94%|█████████▍| 375/400 [03:48<00:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6864:  94%|█████████▍| 376/400 [03:49<00:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6811:  94%|█████████▍| 377/400 [03:50<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6798:  94%|█████████▍| 378/400 [03:50<00:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6759:  95%|█████████▍| 379/400 [03:51<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6727:  95%|█████████▌| 380/400 [03:51<00:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6698:  95%|█████████▌| 381/400 [03:52<00:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.678:  96%|█████████▌| 382/400 [03:53<00:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6784:  96%|█████████▌| 383/400 [03:53<00:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6772:  96%|█████████▌| 384/400 [03:54<00:09,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6785:  96%|█████████▋| 385/400 [03:55<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6762:  96%|█████████▋| 386/400 [03:55<00:08,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6734:  97%|█████████▋| 387/400 [03:56<00:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6753:  97%|█████████▋| 388/400 [03:56<00:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6723:  97%|█████████▋| 389/400 [03:57<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6699:  98%|█████████▊| 390/400 [03:58<00:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6673:  98%|█████████▊| 391/400 [03:58<00:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6691:  98%|█████████▊| 392/400 [03:59<00:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6761:  98%|█████████▊| 393/400 [03:59<00:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6768:  98%|█████████▊| 394/400 [04:00<00:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6823:  99%|█████████▉| 395/400 [04:01<00:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6835:  99%|█████████▉| 396/400 [04:01<00:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6807:  99%|█████████▉| 397/400 [04:02<00:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6869: 100%|█████████▉| 398/400 [04:03<00:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6901: 100%|█████████▉| 399/400 [04:03<00:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 6, train loss: 0.6851: 100%|██████████| 400/400 [04:04<00:00,  1.64it/s]
epoch: 6, valid loss: 3.398:   1%|          | 2/201 [00:00<00:26,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.3822:   2%|▏         | 4/201 [00:00<00:25,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.3461:   3%|▎         | 6/201 [00:00<00:25,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.2985:   4%|▍         | 8/201 [00:01<00:25,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.2465:   5%|▍         | 10/201 [00:01<00:25,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.2049:   6%|▌         | 12/201 [00:01<00:24,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.1836:   7%|▋         | 14/201 [00:01<00:24,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.2122:   8%|▊         | 16/201 [00:02<00:24,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.214:   9%|▉         | 18/201 [00:02<00:23,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.2045:  10%|▉         | 20/201 [00:02<00:23,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.1438:  11%|█         | 22/201 [00:02<00:23,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.1414:  12%|█▏        | 24/201 [00:03<00:24,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.1279:  13%|█▎        | 26/201 [00:03<00:24,  7.23it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.1326:  14%|█▍        | 28/201 [00:03<00:22,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.1206:  15%|█▍        | 30/201 [00:03<00:22,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.0715:  16%|█▌        | 32/201 [00:04<00:22,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 3.0436:  17%|█▋        | 34/201 [00:04<00:21,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.9972:  18%|█▊        | 36/201 [00:04<00:21,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.9782:  19%|█▉        | 38/201 [00:05<00:21,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.9698:  20%|█▉        | 40/201 [00:05<00:21,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.9216:  21%|██        | 42/201 [00:05<00:20,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.8814:  22%|██▏       | 44/201 [00:05<00:20,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.8522:  23%|██▎       | 46/201 [00:06<00:20,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7991:  24%|██▍       | 48/201 [00:06<00:20,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7822:  25%|██▍       | 50/201 [00:06<00:20,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7375:  26%|██▌       | 52/201 [00:06<00:19,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7555:  27%|██▋       | 54/201 [00:07<00:19,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7113:  28%|██▊       | 56/201 [00:07<00:19,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7584:  29%|██▉       | 58/201 [00:07<00:19,  7.52it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7933:  30%|██▉       | 60/201 [00:07<00:18,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7628:  31%|███       | 62/201 [00:08<00:18,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7464:  32%|███▏      | 64/201 [00:08<00:17,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7421:  33%|███▎      | 66/201 [00:08<00:17,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7294:  34%|███▍      | 68/201 [00:08<00:17,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7097:  35%|███▍      | 70/201 [00:09<00:17,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.7186:  36%|███▌      | 72/201 [00:09<00:16,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.6909:  37%|███▋      | 74/201 [00:09<00:16,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.6596:  38%|███▊      | 76/201 [00:10<00:16,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.6498:  39%|███▉      | 78/201 [00:10<00:16,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.6056:  40%|███▉      | 80/201 [00:10<00:15,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.5862:  41%|████      | 82/201 [00:10<00:15,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.5873:  42%|████▏     | 84/201 [00:11<00:15,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.5916:  43%|████▎     | 86/201 [00:11<00:15,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.6183:  44%|████▍     | 88/201 [00:11<00:14,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.5893:  45%|████▍     | 90/201 [00:11<00:14,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.5635:  46%|████▌     | 92/201 [00:12<00:14,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.5524:  47%|████▋     | 94/201 [00:12<00:14,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.5196:  48%|████▊     | 96/201 [00:12<00:13,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.5222:  49%|████▉     | 98/201 [00:12<00:13,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.5406:  50%|████▉     | 100/201 [00:13<00:13,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.5132:  51%|█████     | 102/201 [00:13<00:12,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4725:  52%|█████▏    | 104/201 [00:13<00:12,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4852:  53%|█████▎    | 106/201 [00:13<00:12,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4796:  54%|█████▎    | 108/201 [00:14<00:12,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.462:  55%|█████▍    | 110/201 [00:14<00:11,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4542:  56%|█████▌    | 112/201 [00:14<00:11,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4979:  57%|█████▋    | 114/201 [00:14<00:11,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4731:  58%|█████▊    | 116/201 [00:15<00:11,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4854:  59%|█████▊    | 118/201 [00:15<00:10,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4466:  60%|█████▉    | 120/201 [00:15<00:10,  7.66it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4633:  61%|██████    | 122/201 [00:16<00:10,  7.72it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4453:  62%|██████▏   | 124/201 [00:16<00:09,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4675:  63%|██████▎   | 126/201 [00:16<00:09,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4994:  64%|██████▎   | 128/201 [00:16<00:09,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4823:  65%|██████▍   | 130/201 [00:17<00:09,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4646:  66%|██████▌   | 132/201 [00:17<00:09,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4861:  67%|██████▋   | 134/201 [00:17<00:08,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4695:  68%|██████▊   | 136/201 [00:17<00:08,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4454:  69%|██████▊   | 138/201 [00:18<00:08,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4216:  70%|██████▉   | 140/201 [00:18<00:08,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3789:  71%|███████   | 142/201 [00:18<00:07,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4012:  72%|███████▏  | 144/201 [00:18<00:07,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4034:  73%|███████▎  | 146/201 [00:19<00:07,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3725:  74%|███████▎  | 148/201 [00:19<00:06,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3682:  75%|███████▍  | 150/201 [00:19<00:06,  7.65it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3844:  76%|███████▌  | 152/201 [00:19<00:06,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3743:  77%|███████▋  | 154/201 [00:20<00:06,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3586:  78%|███████▊  | 156/201 [00:20<00:05,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3417:  79%|███████▊  | 158/201 [00:20<00:05,  7.74it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3973:  80%|███████▉  | 160/201 [00:21<00:05,  7.70it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3934:  81%|████████  | 162/201 [00:21<00:05,  7.71it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4037:  82%|████████▏ | 164/201 [00:21<00:04,  7.78it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3975:  83%|████████▎ | 166/201 [00:21<00:04,  7.73it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3921:  84%|████████▎ | 168/201 [00:22<00:04,  7.63it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4219:  85%|████████▍ | 170/201 [00:22<00:04,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3838:  86%|████████▌ | 172/201 [00:22<00:03,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3903:  87%|████████▋ | 174/201 [00:22<00:03,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3872:  88%|████████▊ | 176/201 [00:23<00:03,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3701:  89%|████████▊ | 178/201 [00:23<00:03,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3709:  90%|████████▉ | 180/201 [00:23<00:02,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.363:  91%|█████████ | 182/201 [00:23<00:02,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.3736:  92%|█████████▏| 184/201 [00:24<00:02,  7.67it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.387:  93%|█████████▎| 186/201 [00:24<00:01,  7.68it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.393:  94%|█████████▎| 188/201 [00:24<00:01,  7.69it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4076:  95%|█████████▍| 190/201 [00:24<00:01,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.37:  96%|█████████▌| 192/201 [00:25<00:01,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.409:  97%|█████████▋| 194/201 [00:25<00:00,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4393:  98%|█████████▊| 196/201 [00:25<00:00,  7.64it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4562:  99%|█████████▊| 198/201 [00:26<00:00,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4572: 100%|█████████▉| 200/201 [00:26<00:00,  7.60it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 6, valid loss: 2.4705: 100%|██████████| 201/201 [00:26<00:00,  7.60it/s]


torch.Size([2, 5])
torch.Size([2, 6])
[{'results_1': [{'prediction': 2, 'label': 0}, {'prediction': 1, 'label': 1}], 'results_2': [{'prediction': 1, 'label': 1}, {'prediction': 1, 'label': 0}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 4, 'label': 4}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 1}, {'prediction': 1, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 0, 'label': 0}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 4, 'label': 4}]}, {'results_1': [{'predicti

  0%|          | 0/400 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4535:   0%|          | 1/400 [00:00<04:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4533:   0%|          | 2/400 [00:01<03:58,  1.67it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4504:   1%|          | 3/400 [00:01<04:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4607:   1%|          | 4/400 [00:02<04:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4585:   1%|▏         | 5/400 [00:03<04:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4583:   2%|▏         | 6/400 [00:03<04:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4653:   2%|▏         | 7/400 [00:04<04:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4669:   2%|▏         | 8/400 [00:04<03:58,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4647:   2%|▏         | 9/400 [00:05<04:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4623:   2%|▎         | 10/400 [00:06<03:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4604:   3%|▎         | 11/400 [00:06<03:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4584:   3%|▎         | 12/400 [00:07<03:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4554:   3%|▎         | 13/400 [00:07<03:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4552:   4%|▎         | 14/400 [00:08<03:53,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4522:   4%|▍         | 15/400 [00:09<03:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4561:   4%|▍         | 16/400 [00:09<03:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4575:   4%|▍         | 17/400 [00:10<03:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4547:   4%|▍         | 18/400 [00:11<03:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4609:   5%|▍         | 19/400 [00:11<03:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4693:   5%|▌         | 20/400 [00:12<03:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4728:   5%|▌         | 21/400 [00:12<03:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4714:   6%|▌         | 22/400 [00:13<03:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4725:   6%|▌         | 23/400 [00:14<03:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4695:   6%|▌         | 24/400 [00:14<03:46,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4662:   6%|▋         | 25/400 [00:15<03:49,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.463:   6%|▋         | 26/400 [00:15<03:47,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4623:   7%|▋         | 27/400 [00:16<03:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4597:   7%|▋         | 28/400 [00:17<03:46,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.457:   7%|▋         | 29/400 [00:17<03:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.454:   8%|▊         | 30/400 [00:18<03:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4514:   8%|▊         | 31/400 [00:18<03:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4528:   8%|▊         | 32/400 [00:19<03:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4563:   8%|▊         | 33/400 [00:20<03:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4537:   8%|▊         | 34/400 [00:20<03:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4513:   9%|▉         | 35/400 [00:21<03:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4482:   9%|▉         | 36/400 [00:22<03:41,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4474:   9%|▉         | 37/400 [00:22<03:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4454:  10%|▉         | 38/400 [00:23<03:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4429:  10%|▉         | 39/400 [00:23<03:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4395:  10%|█         | 40/400 [00:24<03:38,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4378:  10%|█         | 41/400 [00:25<03:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.435:  10%|█         | 42/400 [00:25<03:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4332:  11%|█         | 43/400 [00:26<03:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4303:  11%|█         | 44/400 [00:26<03:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4272:  11%|█▏        | 45/400 [00:27<03:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.425:  12%|█▏        | 46/400 [00:28<03:33,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4307:  12%|█▏        | 47/400 [00:28<03:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4343:  12%|█▏        | 48/400 [00:29<03:33,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.434:  12%|█▏        | 49/400 [00:29<03:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4333:  12%|█▎        | 50/400 [00:30<03:33,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4306:  13%|█▎        | 51/400 [00:31<03:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4306:  13%|█▎        | 52/400 [00:31<03:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4277:  13%|█▎        | 53/400 [00:32<03:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4327:  14%|█▎        | 54/400 [00:33<03:31,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4303:  14%|█▍        | 55/400 [00:33<03:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4284:  14%|█▍        | 56/400 [00:34<03:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4249:  14%|█▍        | 57/400 [00:34<03:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4224:  14%|█▍        | 58/400 [00:35<03:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4237:  15%|█▍        | 59/400 [00:36<03:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4207:  15%|█▌        | 60/400 [00:36<03:27,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4211:  15%|█▌        | 61/400 [00:37<03:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4193:  16%|█▌        | 62/400 [00:37<03:25,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4165:  16%|█▌        | 63/400 [00:38<03:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.421:  16%|█▌        | 64/400 [00:39<03:23,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4213:  16%|█▋        | 65/400 [00:39<03:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.419:  16%|█▋        | 66/400 [00:40<03:22,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4178:  17%|█▋        | 67/400 [00:40<03:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4224:  17%|█▋        | 68/400 [00:41<03:21,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4216:  17%|█▋        | 69/400 [00:42<03:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4196:  18%|█▊        | 70/400 [00:42<03:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4259:  18%|█▊        | 71/400 [00:43<03:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4231:  18%|█▊        | 72/400 [00:43<03:18,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4209:  18%|█▊        | 73/400 [00:44<03:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4213:  18%|█▊        | 74/400 [00:45<03:17,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4213:  19%|█▉        | 75/400 [00:45<03:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4188:  19%|█▉        | 76/400 [00:46<03:16,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4187:  19%|█▉        | 77/400 [00:47<03:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4161:  20%|█▉        | 78/400 [00:47<03:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4236:  20%|█▉        | 79/400 [00:48<03:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.422:  20%|██        | 80/400 [00:48<03:13,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4416:  20%|██        | 81/400 [00:49<03:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4396:  20%|██        | 82/400 [00:50<03:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4392:  21%|██        | 83/400 [00:50<03:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4397:  21%|██        | 84/400 [00:51<03:11,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4461:  21%|██▏       | 85/400 [00:51<03:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4485:  22%|██▏       | 86/400 [00:52<03:09,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4463:  22%|██▏       | 87/400 [00:53<03:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4487:  22%|██▏       | 88/400 [00:53<03:08,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4465:  22%|██▏       | 89/400 [00:54<03:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4502:  22%|██▎       | 90/400 [00:54<03:08,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4493:  23%|██▎       | 91/400 [00:55<03:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4508:  23%|██▎       | 92/400 [00:56<03:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4491:  23%|██▎       | 93/400 [00:56<03:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4488:  24%|██▎       | 94/400 [00:57<03:05,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.451:  24%|██▍       | 95/400 [00:58<03:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4634:  24%|██▍       | 96/400 [00:58<03:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4603:  24%|██▍       | 97/400 [00:59<03:05,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4649:  24%|██▍       | 98/400 [00:59<03:02,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4666:  25%|██▍       | 99/400 [01:00<03:03,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4663:  25%|██▌       | 100/400 [01:01<03:01,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.463:  25%|██▌       | 101/400 [01:01<03:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4701:  26%|██▌       | 102/400 [01:02<03:00,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4672:  26%|██▌       | 103/400 [01:02<03:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4679:  26%|██▌       | 104/400 [01:03<02:59,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4662:  26%|██▋       | 105/400 [01:04<03:00,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.464:  26%|██▋       | 106/400 [01:04<02:57,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4617:  27%|██▋       | 107/400 [01:05<02:58,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4597:  27%|██▋       | 108/400 [01:05<02:55,  1.66it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4588:  27%|██▋       | 109/400 [01:06<02:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.456:  28%|██▊       | 110/400 [01:07<02:55,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4542:  28%|██▊       | 111/400 [01:07<02:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.451:  28%|██▊       | 112/400 [01:08<02:55,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4492:  28%|██▊       | 113/400 [01:09<02:58,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4495:  28%|██▊       | 114/400 [01:09<02:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4475:  29%|██▉       | 115/400 [01:10<02:56,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.445:  29%|██▉       | 116/400 [01:10<02:53,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4429:  29%|██▉       | 117/400 [01:11<02:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.441:  30%|██▉       | 118/400 [01:12<02:51,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4478:  30%|██▉       | 119/400 [01:12<02:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4454:  30%|███       | 120/400 [01:13<02:49,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4435:  30%|███       | 121/400 [01:13<02:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.441:  30%|███       | 122/400 [01:14<02:48,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4524:  31%|███       | 123/400 [01:15<02:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4529:  31%|███       | 124/400 [01:15<02:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4522:  31%|███▏      | 125/400 [01:16<02:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4582:  32%|███▏      | 126/400 [01:16<02:46,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4571:  32%|███▏      | 127/400 [01:17<02:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4621:  32%|███▏      | 128/400 [01:18<02:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4622:  32%|███▏      | 129/400 [01:18<02:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4627:  32%|███▎      | 130/400 [01:19<02:44,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4647:  33%|███▎      | 131/400 [01:20<02:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4625:  33%|███▎      | 132/400 [01:20<02:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4593:  33%|███▎      | 133/400 [01:21<02:44,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4559:  34%|███▎      | 134/400 [01:21<02:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4528:  34%|███▍      | 135/400 [01:22<02:45,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4513:  34%|███▍      | 136/400 [01:23<02:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4586:  34%|███▍      | 137/400 [01:23<02:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4639:  34%|███▍      | 138/400 [01:24<02:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4607:  35%|███▍      | 139/400 [01:24<02:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.466:  35%|███▌      | 140/400 [01:25<02:37,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4632:  35%|███▌      | 141/400 [01:26<02:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4631:  36%|███▌      | 142/400 [01:26<02:36,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4608:  36%|███▌      | 143/400 [01:27<02:36,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4625:  36%|███▌      | 144/400 [01:27<02:34,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4594:  36%|███▋      | 145/400 [01:28<02:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4612:  36%|███▋      | 146/400 [01:29<02:34,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4585:  37%|███▋      | 147/400 [01:29<02:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4655:  37%|███▋      | 148/400 [01:30<02:33,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4741:  37%|███▋      | 149/400 [01:31<02:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4721:  38%|███▊      | 150/400 [01:31<02:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4727:  38%|███▊      | 151/400 [01:32<02:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4755:  38%|███▊      | 152/400 [01:32<02:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4725:  38%|███▊      | 153/400 [01:33<02:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4704:  38%|███▊      | 154/400 [01:34<02:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4678:  39%|███▉      | 155/400 [01:34<02:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4877:  39%|███▉      | 156/400 [01:35<02:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4854:  39%|███▉      | 157/400 [01:35<02:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4907:  40%|███▉      | 158/400 [01:36<02:27,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4878:  40%|███▉      | 159/400 [01:37<02:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4838:  40%|████      | 160/400 [01:37<02:26,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4807:  40%|████      | 161/400 [01:38<02:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4786:  40%|████      | 162/400 [01:39<02:25,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.476:  41%|████      | 163/400 [01:39<02:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4812:  41%|████      | 164/400 [01:40<02:24,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4863:  41%|████▏     | 165/400 [01:40<02:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4853:  42%|████▏     | 166/400 [01:41<02:22,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4814:  42%|████▏     | 167/400 [01:42<02:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4894:  42%|████▏     | 168/400 [01:42<02:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5081:  42%|████▏     | 169/400 [01:43<02:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5066:  42%|████▎     | 170/400 [01:43<02:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5024:  43%|████▎     | 171/400 [01:44<02:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5:  43%|████▎     | 172/400 [01:45<02:19,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.503:  43%|████▎     | 173/400 [01:45<02:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4994:  44%|████▎     | 174/400 [01:46<02:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4966:  44%|████▍     | 175/400 [01:47<02:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4955:  44%|████▍     | 176/400 [01:47<02:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4921:  44%|████▍     | 177/400 [01:48<02:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4887:  44%|████▍     | 178/400 [01:48<02:16,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4867:  45%|████▍     | 179/400 [01:49<02:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4873:  45%|████▌     | 180/400 [01:50<02:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4858:  45%|████▌     | 181/400 [01:50<02:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4953:  46%|████▌     | 182/400 [01:51<02:12,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4958:  46%|████▌     | 183/400 [01:51<02:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4931:  46%|████▌     | 184/400 [01:52<02:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4964:  46%|████▋     | 185/400 [01:53<02:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4937:  46%|████▋     | 186/400 [01:53<02:10,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4904:  47%|████▋     | 187/400 [01:54<02:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4937:  47%|████▋     | 188/400 [01:54<02:08,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.49:  47%|████▋     | 189/400 [01:55<02:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4901:  48%|████▊     | 190/400 [01:56<02:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.49:  48%|████▊     | 191/400 [01:56<02:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.499:  48%|████▊     | 192/400 [01:57<02:06,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5062:  48%|████▊     | 193/400 [01:58<02:07,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5028:  48%|████▊     | 194/400 [01:58<02:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.506:  49%|████▉     | 195/400 [01:59<02:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5022:  49%|████▉     | 196/400 [01:59<02:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.499:  49%|████▉     | 197/400 [02:00<02:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4988:  50%|████▉     | 198/400 [02:01<02:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4959:  50%|████▉     | 199/400 [02:01<02:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4943:  50%|█████     | 200/400 [02:02<02:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4911:  50%|█████     | 201/400 [02:03<02:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.493:  50%|█████     | 202/400 [02:03<02:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.501:  51%|█████     | 203/400 [02:04<02:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4999:  51%|█████     | 204/400 [02:04<01:59,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5031:  51%|█████▏    | 205/400 [02:05<01:59,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5035:  52%|█████▏    | 206/400 [02:06<01:58,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.506:  52%|█████▏    | 207/400 [02:06<01:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5019:  52%|█████▏    | 208/400 [02:07<01:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4993:  52%|█████▏    | 209/400 [02:07<01:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4956:  52%|█████▎    | 210/400 [02:08<01:55,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4919:  53%|█████▎    | 211/400 [02:09<01:56,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4883:  53%|█████▎    | 212/400 [02:09<01:54,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4903:  53%|█████▎    | 213/400 [02:10<01:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.487:  54%|█████▎    | 214/400 [02:10<01:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4867:  54%|█████▍    | 215/400 [02:11<01:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4891:  54%|█████▍    | 216/400 [02:12<01:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4926:  54%|█████▍    | 217/400 [02:12<01:54,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4948:  55%|█████▍    | 218/400 [02:13<01:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4919:  55%|█████▍    | 219/400 [02:14<01:52,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4962:  55%|█████▌    | 220/400 [02:14<01:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4983:  55%|█████▌    | 221/400 [02:15<01:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5024:  56%|█████▌    | 222/400 [02:15<01:48,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5026:  56%|█████▌    | 223/400 [02:16<01:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4992:  56%|█████▌    | 224/400 [02:17<01:47,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4993:  56%|█████▋    | 225/400 [02:17<01:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.503:  56%|█████▋    | 226/400 [02:18<01:45,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5235:  57%|█████▋    | 227/400 [02:18<01:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5207:  57%|█████▋    | 228/400 [02:19<01:45,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5387:  57%|█████▋    | 229/400 [02:20<01:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5379:  57%|█████▊    | 230/400 [02:20<01:43,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5414:  58%|█████▊    | 231/400 [02:21<01:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5399:  58%|█████▊    | 232/400 [02:22<01:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5375:  58%|█████▊    | 233/400 [02:22<01:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5411:  58%|█████▊    | 234/400 [02:23<01:40,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5441:  59%|█████▉    | 235/400 [02:23<01:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5406:  59%|█████▉    | 236/400 [02:24<01:39,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5383:  59%|█████▉    | 237/400 [02:25<01:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5339:  60%|█████▉    | 238/400 [02:25<01:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5324:  60%|█████▉    | 239/400 [02:26<01:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5423:  60%|██████    | 240/400 [02:26<01:37,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5435:  60%|██████    | 241/400 [02:27<01:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5459:  60%|██████    | 242/400 [02:28<01:35,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5467:  61%|██████    | 243/400 [02:28<01:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5425:  61%|██████    | 244/400 [02:29<01:34,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5564:  61%|██████▏   | 245/400 [02:30<01:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5521:  62%|██████▏   | 246/400 [02:30<01:33,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5489:  62%|██████▏   | 247/400 [02:31<01:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5472:  62%|██████▏   | 248/400 [02:31<01:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5451:  62%|██████▏   | 249/400 [02:32<01:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5539:  62%|██████▎   | 250/400 [02:33<01:32,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5583:  63%|██████▎   | 251/400 [02:33<01:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.56:  63%|██████▎   | 252/400 [02:34<01:30,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5567:  63%|██████▎   | 253/400 [02:34<01:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5632:  64%|██████▎   | 254/400 [02:35<01:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5591:  64%|██████▍   | 255/400 [02:36<01:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5655:  64%|██████▍   | 256/400 [02:36<01:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5613:  64%|██████▍   | 257/400 [02:37<01:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5583:  64%|██████▍   | 258/400 [02:37<01:27,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5622:  65%|██████▍   | 259/400 [02:38<01:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5741:  65%|██████▌   | 260/400 [02:39<01:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5729:  65%|██████▌   | 261/400 [02:39<01:26,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5682:  66%|██████▌   | 262/400 [02:40<01:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.564:  66%|██████▌   | 263/400 [02:41<01:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5627:  66%|██████▌   | 264/400 [02:41<01:22,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5593:  66%|██████▋   | 265/400 [02:42<01:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5673:  66%|██████▋   | 266/400 [02:42<01:21,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5639:  67%|██████▋   | 267/400 [02:43<01:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5657:  67%|██████▋   | 268/400 [02:44<01:20,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5739:  67%|██████▋   | 269/400 [02:44<01:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5697:  68%|██████▊   | 270/400 [02:45<01:18,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.571:  68%|██████▊   | 271/400 [02:45<01:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5708:  68%|██████▊   | 272/400 [02:46<01:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5681:  68%|██████▊   | 273/400 [02:47<01:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5723:  68%|██████▊   | 274/400 [02:47<01:16,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5682:  69%|██████▉   | 275/400 [02:48<01:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5684:  69%|██████▉   | 276/400 [02:49<01:15,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5635:  69%|██████▉   | 277/400 [02:49<01:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5602:  70%|██████▉   | 278/400 [02:50<01:14,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5668:  70%|██████▉   | 279/400 [02:50<01:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5776:  70%|███████   | 280/400 [02:51<01:13,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5731:  70%|███████   | 281/400 [02:52<01:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5748:  70%|███████   | 282/400 [02:52<01:11,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5768:  71%|███████   | 283/400 [02:53<01:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5899:  71%|███████   | 284/400 [02:53<01:10,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5864:  71%|███████▏  | 285/400 [02:54<01:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5832:  72%|███████▏  | 286/400 [02:55<01:09,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5872:  72%|███████▏  | 287/400 [02:55<01:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5822:  72%|███████▏  | 288/400 [02:56<01:07,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5824:  72%|███████▏  | 289/400 [02:56<01:07,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5781:  72%|███████▎  | 290/400 [02:57<01:06,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.574:  73%|███████▎  | 291/400 [02:58<01:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5723:  73%|███████▎  | 292/400 [02:58<01:05,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5721:  73%|███████▎  | 293/400 [02:59<01:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5757:  74%|███████▎  | 294/400 [03:00<01:04,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5717:  74%|███████▍  | 295/400 [03:00<01:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5685:  74%|███████▍  | 296/400 [03:01<01:03,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5683:  74%|███████▍  | 297/400 [03:01<01:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5658:  74%|███████▍  | 298/400 [03:02<01:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5645:  75%|███████▍  | 299/400 [03:03<01:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5642:  75%|███████▌  | 300/400 [03:03<01:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5596:  75%|███████▌  | 301/400 [03:04<01:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5593:  76%|███████▌  | 302/400 [03:04<01:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5583:  76%|███████▌  | 303/400 [03:05<01:00,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5546:  76%|███████▌  | 304/400 [03:06<00:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5523:  76%|███████▋  | 305/400 [03:06<00:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5506:  76%|███████▋  | 306/400 [03:07<00:57,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5474:  77%|███████▋  | 307/400 [03:08<00:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5457:  77%|███████▋  | 308/400 [03:08<00:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5455:  77%|███████▋  | 309/400 [03:09<00:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5491:  78%|███████▊  | 310/400 [03:09<00:54,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.55:  78%|███████▊  | 311/400 [03:10<00:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5506:  78%|███████▊  | 312/400 [03:11<00:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5472:  78%|███████▊  | 313/400 [03:11<00:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5425:  78%|███████▊  | 314/400 [03:12<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5396:  79%|███████▉  | 315/400 [03:12<00:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5373:  79%|███████▉  | 316/400 [03:13<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5335:  79%|███████▉  | 317/400 [03:14<00:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5302:  80%|███████▉  | 318/400 [03:14<00:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.532:  80%|███████▉  | 319/400 [03:15<00:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5305:  80%|████████  | 320/400 [03:16<00:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5268:  80%|████████  | 321/400 [03:16<00:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5235:  80%|████████  | 322/400 [03:17<00:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5204:  81%|████████  | 323/400 [03:17<00:47,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5199:  81%|████████  | 324/400 [03:18<00:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5201:  81%|████████▏ | 325/400 [03:19<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5271:  82%|████████▏ | 326/400 [03:19<00:44,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5349:  82%|████████▏ | 327/400 [03:20<00:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5309:  82%|████████▏ | 328/400 [03:20<00:43,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5325:  82%|████████▏ | 329/400 [03:21<00:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5281:  82%|████████▎ | 330/400 [03:22<00:42,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5244:  83%|████████▎ | 331/400 [03:22<00:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5205:  83%|████████▎ | 332/400 [03:23<00:41,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5209:  83%|████████▎ | 333/400 [03:24<00:41,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5189:  84%|████████▎ | 334/400 [03:24<00:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5151:  84%|████████▍ | 335/400 [03:25<00:40,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5114:  84%|████████▍ | 336/400 [03:25<00:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5075:  84%|████████▍ | 337/400 [03:26<00:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5044:  84%|████████▍ | 338/400 [03:27<00:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5054:  85%|████████▍ | 339/400 [03:27<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5033:  85%|████████▌ | 340/400 [03:28<00:36,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5051:  85%|████████▌ | 341/400 [03:28<00:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5029:  86%|████████▌ | 342/400 [03:29<00:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.5021:  86%|████████▌ | 343/400 [03:30<00:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4984:  86%|████████▌ | 344/400 [03:30<00:34,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4948:  86%|████████▋ | 345/400 [03:31<00:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4922:  86%|████████▋ | 346/400 [03:32<00:32,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4901:  87%|████████▋ | 347/400 [03:32<00:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4861:  87%|████████▋ | 348/400 [03:33<00:31,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.487:  87%|████████▋ | 349/400 [03:33<00:31,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4846:  88%|████████▊ | 350/400 [03:34<00:30,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4847:  88%|████████▊ | 351/400 [03:35<00:29,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4814:  88%|████████▊ | 352/400 [03:35<00:29,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.479:  88%|████████▊ | 353/400 [03:36<00:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4764:  88%|████████▊ | 354/400 [03:36<00:28,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4729:  89%|████████▉ | 355/400 [03:37<00:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4696:  89%|████████▉ | 356/400 [03:38<00:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4768:  89%|████████▉ | 357/400 [03:38<00:26,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4759:  90%|████████▉ | 358/400 [03:39<00:25,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4731:  90%|████████▉ | 359/400 [03:40<00:25,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4728:  90%|█████████ | 360/400 [03:40<00:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4721:  90%|█████████ | 361/400 [03:41<00:24,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4715:  90%|█████████ | 362/400 [03:41<00:23,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4695:  91%|█████████ | 363/400 [03:42<00:23,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4673:  91%|█████████ | 364/400 [03:43<00:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4644:  91%|█████████▏| 365/400 [03:43<00:21,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4641:  92%|█████████▏| 366/400 [03:44<00:21,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4668:  92%|█████████▏| 367/400 [03:45<00:20,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4639:  92%|█████████▏| 368/400 [03:45<00:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4657:  92%|█████████▏| 369/400 [03:46<00:19,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4624:  92%|█████████▎| 370/400 [03:46<00:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.465:  93%|█████████▎| 371/400 [03:47<00:18,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4698:  93%|█████████▎| 372/400 [03:48<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4669:  93%|█████████▎| 373/400 [03:48<00:16,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.474:  94%|█████████▎| 374/400 [03:49<00:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.477:  94%|█████████▍| 375/400 [03:49<00:15,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4755:  94%|█████████▍| 376/400 [03:50<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.472:  94%|█████████▍| 377/400 [03:51<00:14,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4771:  94%|█████████▍| 378/400 [03:51<00:13,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4736:  95%|█████████▍| 379/400 [03:52<00:13,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.471:  95%|█████████▌| 380/400 [03:53<00:12,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4712:  95%|█████████▌| 381/400 [03:53<00:11,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4679:  96%|█████████▌| 382/400 [03:54<00:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4648:  96%|█████████▌| 383/400 [03:55<00:10,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4665:  96%|█████████▌| 384/400 [03:55<00:09,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4651:  96%|█████████▋| 385/400 [03:56<00:09,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4634:  96%|█████████▋| 386/400 [03:56<00:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.461:  97%|█████████▋| 387/400 [03:57<00:08,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4635:  97%|█████████▋| 388/400 [03:58<00:07,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4689:  97%|█████████▋| 389/400 [03:58<00:06,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4717:  98%|█████████▊| 390/400 [03:59<00:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4681:  98%|█████████▊| 391/400 [04:00<00:05,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4656:  98%|█████████▊| 392/400 [04:00<00:04,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4621:  98%|█████████▊| 393/400 [04:01<00:04,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4669:  98%|█████████▊| 394/400 [04:01<00:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4736:  99%|█████████▉| 395/400 [04:02<00:03,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.473:  99%|█████████▉| 396/400 [04:03<00:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4852:  99%|█████████▉| 397/400 [04:03<00:01,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4848: 100%|█████████▉| 398/400 [04:04<00:01,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4862: 100%|█████████▉| 399/400 [04:05<00:00,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 7, train loss: 0.4838: 100%|██████████| 400/400 [04:05<00:00,  1.63it/s]
epoch: 7, valid loss: 5.8167:   1%|          | 2/201 [00:00<00:26,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.7529:   2%|▏         | 4/201 [00:00<00:26,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.6777:   3%|▎         | 6/201 [00:00<00:27,  7.16it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.576:   4%|▍         | 8/201 [00:01<00:26,  7.18it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.5007:   5%|▍         | 10/201 [00:01<00:26,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.4212:   6%|▌         | 12/201 [00:01<00:26,  7.11it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.404:   7%|▋         | 14/201 [00:01<00:25,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.3891:   8%|▊         | 16/201 [00:02<00:25,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.3526:   9%|▉         | 18/201 [00:02<00:24,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.291:  10%|▉         | 20/201 [00:02<00:24,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.1895:  11%|█         | 22/201 [00:03<00:24,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.1362:  12%|█▏        | 24/201 [00:03<00:24,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.0938:  13%|█▎        | 26/201 [00:03<00:24,  7.19it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.0459:  14%|█▍        | 28/201 [00:03<00:23,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 5.0092:  15%|█▍        | 30/201 [00:04<00:23,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.9286:  16%|█▌        | 32/201 [00:04<00:23,  7.16it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.8785:  17%|█▋        | 34/201 [00:04<00:22,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.791:  18%|█▊        | 36/201 [00:04<00:22,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.7294:  19%|█▉        | 38/201 [00:05<00:22,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.7267:  20%|█▉        | 40/201 [00:05<00:21,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.7038:  21%|██        | 42/201 [00:05<00:21,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.6285:  22%|██▏       | 44/201 [00:06<00:21,  7.27it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.5502:  23%|██▎       | 46/201 [00:06<00:21,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.4727:  24%|██▍       | 48/201 [00:06<00:20,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.445:  25%|██▍       | 50/201 [00:06<00:20,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.3876:  26%|██▌       | 52/201 [00:07<00:20,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.418:  27%|██▋       | 54/201 [00:07<00:20,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.3578:  28%|██▊       | 56/201 [00:07<00:19,  7.29it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.3843:  29%|██▉       | 58/201 [00:07<00:19,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.3531:  30%|██▉       | 60/201 [00:08<00:19,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.3067:  31%|███       | 62/201 [00:08<00:19,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.2627:  32%|███▏      | 64/201 [00:08<00:18,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.2251:  33%|███▎      | 66/201 [00:09<00:18,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.201:  34%|███▍      | 68/201 [00:09<00:18,  7.29it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.1768:  35%|███▍      | 70/201 [00:09<00:17,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.1766:  36%|███▌      | 72/201 [00:09<00:17,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.1115:  37%|███▋      | 74/201 [00:10<00:17,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.0436:  38%|███▊      | 76/201 [00:10<00:16,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 4.0194:  39%|███▉      | 78/201 [00:10<00:16,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.9638:  40%|███▉      | 80/201 [00:10<00:16,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.9205:  41%|████      | 82/201 [00:11<00:15,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.9119:  42%|████▏     | 84/201 [00:11<00:15,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.8721:  43%|████▎     | 86/201 [00:11<00:15,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.9001:  44%|████▍     | 88/201 [00:12<00:14,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.8399:  45%|████▍     | 90/201 [00:12<00:14,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.7662:  46%|████▌     | 92/201 [00:12<00:14,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.7765:  47%|████▋     | 94/201 [00:12<00:14,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.7066:  48%|████▊     | 96/201 [00:13<00:14,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.6875:  49%|████▉     | 98/201 [00:13<00:14,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.6365:  50%|████▉     | 100/201 [00:13<00:13,  7.34it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.5931:  51%|█████     | 102/201 [00:13<00:13,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.5252:  52%|█████▏    | 104/201 [00:14<00:13,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.52:  53%|█████▎    | 106/201 [00:14<00:13,  7.12it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.5034:  54%|█████▎    | 108/201 [00:14<00:12,  7.28it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.454:  55%|█████▍    | 110/201 [00:15<00:12,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.429:  56%|█████▌    | 112/201 [00:15<00:11,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.4519:  57%|█████▋    | 114/201 [00:15<00:11,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.4069:  58%|█████▊    | 116/201 [00:15<00:11,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.4004:  59%|█████▊    | 118/201 [00:16<00:11,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.339:  60%|█████▉    | 120/201 [00:16<00:10,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.3626:  61%|██████    | 122/201 [00:16<00:10,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.321:  62%|██████▏   | 124/201 [00:16<00:10,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.3495:  63%|██████▎   | 126/201 [00:17<00:10,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.3592:  64%|██████▎   | 128/201 [00:17<00:09,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.325:  65%|██████▍   | 130/201 [00:17<00:09,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.2925:  66%|██████▌   | 132/201 [00:18<00:09,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.286:  67%|██████▋   | 134/201 [00:18<00:09,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.2491:  68%|██████▊   | 136/201 [00:18<00:08,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.2323:  69%|██████▊   | 138/201 [00:18<00:08,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.1988:  70%|██████▉   | 140/201 [00:19<00:08,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.1401:  71%|███████   | 142/201 [00:19<00:07,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.1428:  72%|███████▏  | 144/201 [00:19<00:07,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.1405:  73%|███████▎  | 146/201 [00:19<00:07,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.086:  74%|███████▎  | 148/201 [00:20<00:07,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0684:  75%|███████▍  | 150/201 [00:20<00:06,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0578:  76%|███████▌  | 152/201 [00:20<00:06,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0488:  77%|███████▋  | 154/201 [00:20<00:06,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0404:  78%|███████▊  | 156/201 [00:21<00:06,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0217:  79%|███████▊  | 158/201 [00:21<00:05,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0514:  80%|███████▉  | 160/201 [00:21<00:05,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0654:  81%|████████  | 162/201 [00:22<00:05,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0373:  82%|████████▏ | 164/201 [00:22<00:05,  7.29it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0207:  83%|████████▎ | 166/201 [00:22<00:04,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0186:  84%|████████▎ | 168/201 [00:22<00:04,  7.22it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 3.0341:  85%|████████▍ | 170/201 [00:23<00:04,  7.34it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9936:  86%|████████▌ | 172/201 [00:23<00:04,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9582:  87%|████████▋ | 174/201 [00:23<00:03,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9595:  88%|████████▊ | 176/201 [00:23<00:03,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9343:  89%|████████▊ | 178/201 [00:24<00:03,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9328:  90%|████████▉ | 180/201 [00:24<00:02,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9425:  91%|█████████ | 182/201 [00:24<00:02,  7.20it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9505:  92%|█████████▏| 184/201 [00:25<00:02,  7.15it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9604:  93%|█████████▎| 186/201 [00:25<00:02,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9623:  94%|█████████▎| 188/201 [00:25<00:01,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9333:  95%|█████████▍| 190/201 [00:25<00:01,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.8909:  96%|█████████▌| 192/201 [00:26<00:01,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.928:  97%|█████████▋| 194/201 [00:26<00:00,  7.28it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9613:  98%|█████████▊| 196/201 [00:26<00:00,  7.29it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9415:  99%|█████████▊| 198/201 [00:26<00:00,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9314: 100%|█████████▉| 200/201 [00:27<00:00,  7.16it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 7, valid loss: 2.9542: 100%|██████████| 201/201 [00:27<00:00,  7.34it/s]


torch.Size([2, 5])
torch.Size([2, 6])
[{'results_1': [{'prediction': 3, 'label': 0}, {'prediction': 0, 'label': 1}], 'results_2': [{'prediction': 5, 'label': 1}, {'prediction': 1, 'label': 0}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 4, 'label': 4}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 1}, {'prediction': 1, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 3, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 0, 'label': 0}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 4, 'label': 4}]}, {'results_1': [{'predicti

  0%|          | 0/400 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.5143:   0%|          | 1/400 [00:00<04:10,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.5104:   0%|          | 2/400 [00:01<04:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.5067:   1%|          | 3/400 [00:01<04:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.5031:   1%|          | 4/400 [00:02<04:01,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.499:   1%|▏         | 5/400 [00:03<04:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4969:   2%|▏         | 6/400 [00:03<04:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4931:   2%|▏         | 7/400 [00:04<04:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4892:   2%|▏         | 8/400 [00:04<04:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4915:   2%|▏         | 9/400 [00:05<04:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4884:   2%|▎         | 10/400 [00:06<03:58,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.487:   3%|▎         | 11/400 [00:06<04:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4839:   3%|▎         | 12/400 [00:07<03:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4807:   3%|▎         | 13/400 [00:08<03:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4776:   4%|▎         | 14/400 [00:08<03:55,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.477:   4%|▍         | 15/400 [00:09<03:59,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4741:   4%|▍         | 16/400 [00:09<03:58,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4783:   4%|▍         | 17/400 [00:10<04:00,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4746:   4%|▍         | 18/400 [00:11<03:56,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4772:   5%|▍         | 19/400 [00:11<03:58,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4758:   5%|▌         | 20/400 [00:12<03:54,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4791:   5%|▌         | 21/400 [00:12<03:56,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4775:   6%|▌         | 22/400 [00:13<03:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4766:   6%|▌         | 23/400 [00:14<03:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4755:   6%|▌         | 24/400 [00:14<03:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4728:   6%|▋         | 25/400 [00:15<03:54,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4707:   6%|▋         | 26/400 [00:16<03:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.475:   7%|▋         | 27/400 [00:16<03:52,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4739:   7%|▋         | 28/400 [00:17<03:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4725:   7%|▋         | 29/400 [00:17<03:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4686:   8%|▊         | 30/400 [00:18<03:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4649:   8%|▊         | 31/400 [00:19<03:49,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4624:   8%|▊         | 32/400 [00:19<03:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4593:   8%|▊         | 33/400 [00:20<03:48,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4582:   8%|▊         | 34/400 [00:21<03:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4602:   9%|▉         | 35/400 [00:21<03:46,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4681:   9%|▉         | 36/400 [00:22<03:44,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4711:   9%|▉         | 37/400 [00:22<03:46,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4761:  10%|▉         | 38/400 [00:23<03:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4722:  10%|▉         | 39/400 [00:24<03:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4689:  10%|█         | 40/400 [00:24<03:41,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4653:  10%|█         | 41/400 [00:25<03:43,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4653:  10%|█         | 42/400 [00:25<03:39,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4765:  11%|█         | 43/400 [00:26<03:41,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4729:  11%|█         | 44/400 [00:27<03:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4693:  11%|█▏        | 45/400 [00:27<03:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4683:  12%|█▏        | 46/400 [00:28<03:37,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4879:  12%|█▏        | 47/400 [00:29<03:38,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4851:  12%|█▏        | 48/400 [00:29<03:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4817:  12%|█▏        | 49/400 [00:30<03:38,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4801:  12%|█▎        | 50/400 [00:30<03:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4954:  13%|█▎        | 51/400 [00:31<03:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4948:  13%|█▎        | 52/400 [00:32<03:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4943:  13%|█▎        | 53/400 [00:32<03:35,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4906:  14%|█▎        | 54/400 [00:33<03:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4915:  14%|█▍        | 55/400 [00:34<03:34,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4882:  14%|█▍        | 56/400 [00:34<03:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4847:  14%|█▍        | 57/400 [00:35<03:33,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4824:  14%|█▍        | 58/400 [00:35<03:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4797:  15%|█▍        | 59/400 [00:36<03:34,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4762:  15%|█▌        | 60/400 [00:37<03:31,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.484:  15%|█▌        | 61/400 [00:37<03:35,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4805:  16%|█▌        | 62/400 [00:38<03:31,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4843:  16%|█▌        | 63/400 [00:39<03:32,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4806:  16%|█▌        | 64/400 [00:39<03:29,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4814:  16%|█▋        | 65/400 [00:40<03:30,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4808:  16%|█▋        | 66/400 [00:40<03:28,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4785:  17%|█▋        | 67/400 [00:41<03:29,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4748:  17%|█▋        | 68/400 [00:42<03:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4715:  17%|█▋        | 69/400 [00:42<03:26,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4719:  18%|█▊        | 70/400 [00:43<03:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4687:  18%|█▊        | 71/400 [00:44<03:24,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4652:  18%|█▊        | 72/400 [00:44<03:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4629:  18%|█▊        | 73/400 [00:45<03:23,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4604:  18%|█▊        | 74/400 [00:45<03:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4572:  19%|█▉        | 75/400 [00:46<03:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4542:  19%|█▉        | 76/400 [00:47<03:20,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4514:  19%|█▉        | 77/400 [00:47<03:21,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4483:  20%|█▉        | 78/400 [00:48<03:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.445:  20%|█▉        | 79/400 [00:48<03:21,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4417:  20%|██        | 80/400 [00:49<03:18,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4406:  20%|██        | 81/400 [00:50<03:20,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4388:  20%|██        | 82/400 [00:50<03:17,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.436:  21%|██        | 83/400 [00:51<03:18,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4338:  21%|██        | 84/400 [00:52<03:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.431:  21%|██▏       | 85/400 [00:52<03:17,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4278:  22%|██▏       | 86/400 [00:53<03:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4247:  22%|██▏       | 87/400 [00:53<03:15,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4214:  22%|██▏       | 88/400 [00:54<03:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4212:  22%|██▏       | 89/400 [00:55<03:13,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4184:  22%|██▎       | 90/400 [00:55<03:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4153:  23%|██▎       | 91/400 [00:56<03:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4182:  23%|██▎       | 92/400 [00:57<03:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4165:  23%|██▎       | 93/400 [00:57<03:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4147:  24%|██▎       | 94/400 [00:58<03:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4118:  24%|██▍       | 95/400 [00:58<03:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4124:  24%|██▍       | 96/400 [00:59<03:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4095:  24%|██▍       | 97/400 [01:00<03:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4098:  24%|██▍       | 98/400 [01:00<03:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4066:  25%|██▍       | 99/400 [01:01<03:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4039:  25%|██▌       | 100/400 [01:01<03:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4028:  25%|██▌       | 101/400 [01:02<03:08,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3996:  26%|██▌       | 102/400 [01:03<03:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3965:  26%|██▌       | 103/400 [01:03<03:07,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3934:  26%|██▌       | 104/400 [01:04<03:04,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3908:  26%|██▋       | 105/400 [01:05<03:04,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3924:  26%|██▋       | 106/400 [01:05<03:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.39:  27%|██▋       | 107/400 [01:06<03:04,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3881:  27%|██▋       | 108/400 [01:06<03:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3853:  27%|██▋       | 109/400 [01:07<03:01,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3933:  28%|██▊       | 110/400 [01:08<03:01,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3926:  28%|██▊       | 111/400 [01:08<03:02,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3898:  28%|██▊       | 112/400 [01:09<02:59,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3888:  28%|██▊       | 113/400 [01:10<03:00,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3887:  28%|██▊       | 114/400 [01:10<02:57,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3934:  29%|██▉       | 115/400 [01:11<02:59,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3917:  29%|██▉       | 116/400 [01:12<02:56,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3901:  29%|██▉       | 117/400 [01:12<02:57,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3881:  30%|██▉       | 118/400 [01:13<02:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3864:  30%|██▉       | 119/400 [01:13<02:55,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3838:  30%|███       | 120/400 [01:14<02:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3826:  30%|███       | 121/400 [01:15<02:54,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3797:  30%|███       | 122/400 [01:15<02:53,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3779:  31%|███       | 123/400 [01:16<02:54,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3756:  31%|███       | 124/400 [01:16<02:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3735:  31%|███▏      | 125/400 [01:17<02:52,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3734:  32%|███▏      | 126/400 [01:18<02:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3713:  32%|███▏      | 127/400 [01:18<02:50,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3751:  32%|███▏      | 128/400 [01:19<02:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3736:  32%|███▏      | 129/400 [01:20<02:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3711:  32%|███▎      | 130/400 [01:20<02:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3684:  33%|███▎      | 131/400 [01:21<02:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3662:  33%|███▎      | 132/400 [01:21<02:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3662:  33%|███▎      | 133/400 [01:22<02:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3636:  34%|███▎      | 134/400 [01:23<02:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3616:  34%|███▍      | 135/400 [01:23<02:45,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3599:  34%|███▍      | 136/400 [01:24<02:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3605:  34%|███▍      | 137/400 [01:25<02:44,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3609:  34%|███▍      | 138/400 [01:25<02:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.363:  35%|███▍      | 139/400 [01:26<02:42,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3612:  35%|███▌      | 140/400 [01:26<02:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.359:  35%|███▌      | 141/400 [01:27<02:44,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3565:  36%|███▌      | 142/400 [01:28<02:41,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3573:  36%|███▌      | 143/400 [01:28<02:43,  1.57it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3548:  36%|███▌      | 144/400 [01:29<02:40,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3535:  36%|███▋      | 145/400 [01:30<02:40,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.351:  36%|███▋      | 146/400 [01:30<02:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3486:  37%|███▋      | 147/400 [01:31<02:37,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3524:  37%|███▋      | 148/400 [01:31<02:35,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3579:  37%|███▋      | 149/400 [01:32<02:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3572:  38%|███▊      | 150/400 [01:33<02:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3552:  38%|███▊      | 151/400 [01:33<02:35,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.353:  38%|███▊      | 152/400 [01:34<02:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.359:  38%|███▊      | 153/400 [01:35<02:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3591:  38%|███▊      | 154/400 [01:35<02:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3565:  39%|███▉      | 155/400 [01:36<02:32,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3541:  39%|███▉      | 156/400 [01:36<02:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3517:  39%|███▉      | 157/400 [01:37<02:31,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3516:  40%|███▉      | 158/400 [01:38<02:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3491:  40%|███▉      | 159/400 [01:38<02:30,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3475:  40%|████      | 160/400 [01:39<02:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3472:  40%|████      | 161/400 [01:40<02:28,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3493:  40%|████      | 162/400 [01:40<02:26,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.357:  41%|████      | 163/400 [01:41<02:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3605:  41%|████      | 164/400 [01:41<02:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3591:  41%|████▏     | 165/400 [01:42<02:26,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3645:  42%|████▏     | 166/400 [01:43<02:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.362:  42%|████▏     | 167/400 [01:43<02:25,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.363:  42%|████▏     | 168/400 [01:44<02:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3617:  42%|████▏     | 169/400 [01:44<02:24,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3673:  42%|████▎     | 170/400 [01:45<02:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3693:  43%|████▎     | 171/400 [01:46<02:23,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3706:  43%|████▎     | 172/400 [01:46<02:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3687:  43%|████▎     | 173/400 [01:47<02:22,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3657:  44%|████▎     | 174/400 [01:48<02:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3686:  44%|████▍     | 175/400 [01:48<02:20,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3742:  44%|████▍     | 176/400 [01:49<02:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3721:  44%|████▍     | 177/400 [01:49<02:18,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3729:  44%|████▍     | 178/400 [01:50<02:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3749:  45%|████▍     | 179/400 [01:51<02:17,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3852:  45%|████▌     | 180/400 [01:51<02:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3828:  45%|████▌     | 181/400 [01:52<02:17,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3806:  46%|████▌     | 182/400 [01:53<02:15,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3787:  46%|████▌     | 183/400 [01:53<02:16,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3757:  46%|████▌     | 184/400 [01:54<02:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3746:  46%|████▋     | 185/400 [01:54<02:14,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3721:  46%|████▋     | 186/400 [01:55<02:12,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3696:  47%|████▋     | 187/400 [01:56<02:12,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3678:  47%|████▋     | 188/400 [01:56<02:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3657:  47%|████▋     | 189/400 [01:57<02:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.363:  48%|████▊     | 190/400 [01:57<02:08,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3619:  48%|████▊     | 191/400 [01:58<02:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3593:  48%|████▊     | 192/400 [01:59<02:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3634:  48%|████▊     | 193/400 [01:59<02:09,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3646:  48%|████▊     | 194/400 [02:00<02:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3671:  49%|████▉     | 195/400 [02:01<02:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3739:  49%|████▉     | 196/400 [02:01<02:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3792:  49%|████▉     | 197/400 [02:02<02:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3781:  50%|████▉     | 198/400 [02:02<02:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3755:  50%|████▉     | 199/400 [02:03<02:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3742:  50%|█████     | 200/400 [02:04<02:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.372:  50%|█████     | 201/400 [02:04<02:04,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3819:  50%|█████     | 202/400 [02:05<02:02,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3795:  51%|█████     | 203/400 [02:06<02:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3779:  51%|█████     | 204/400 [02:06<02:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3767:  51%|█████▏    | 205/400 [02:07<02:02,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3738:  52%|█████▏    | 206/400 [02:07<02:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3709:  52%|█████▏    | 207/400 [02:08<02:00,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3686:  52%|█████▏    | 208/400 [02:09<01:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3658:  52%|█████▏    | 209/400 [02:09<01:59,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3648:  52%|█████▎    | 210/400 [02:10<01:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.363:  53%|█████▎    | 211/400 [02:11<01:57,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3628:  53%|█████▎    | 212/400 [02:11<01:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3619:  53%|█████▎    | 213/400 [02:12<01:56,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3623:  54%|█████▎    | 214/400 [02:12<01:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3613:  54%|█████▍    | 215/400 [02:13<01:55,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.365:  54%|█████▍    | 216/400 [02:14<01:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3632:  54%|█████▍    | 217/400 [02:14<01:53,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3606:  55%|█████▍    | 218/400 [02:15<01:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3584:  55%|█████▍    | 219/400 [02:16<01:53,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3718:  55%|█████▌    | 220/400 [02:16<01:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3691:  55%|█████▌    | 221/400 [02:17<01:52,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3663:  56%|█████▌    | 222/400 [02:17<01:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.373:  56%|█████▌    | 223/400 [02:18<01:50,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.371:  56%|█████▌    | 224/400 [02:19<01:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3702:  56%|█████▋    | 225/400 [02:19<01:50,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3699:  56%|█████▋    | 226/400 [02:20<01:48,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3676:  57%|█████▋    | 227/400 [02:21<01:48,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3651:  57%|█████▋    | 228/400 [02:21<01:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3631:  57%|█████▋    | 229/400 [02:22<01:46,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3609:  57%|█████▊    | 230/400 [02:22<01:44,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3585:  58%|█████▊    | 231/400 [02:23<01:45,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3596:  58%|█████▊    | 232/400 [02:24<01:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3604:  58%|█████▊    | 233/400 [02:24<01:44,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.36:  58%|█████▊    | 234/400 [02:25<01:43,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3587:  59%|█████▉    | 235/400 [02:25<01:44,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3569:  59%|█████▉    | 236/400 [02:26<01:42,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3557:  59%|█████▉    | 237/400 [02:27<01:42,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3583:  60%|█████▉    | 238/400 [02:27<01:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3612:  60%|█████▉    | 239/400 [02:28<01:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3587:  60%|██████    | 240/400 [02:29<01:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3644:  60%|██████    | 241/400 [02:29<01:39,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3725:  60%|██████    | 242/400 [02:30<01:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3712:  61%|██████    | 243/400 [02:30<01:38,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3758:  61%|██████    | 244/400 [02:31<01:37,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3773:  61%|██████▏   | 245/400 [02:32<01:37,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3752:  62%|██████▏   | 246/400 [02:32<01:36,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.388:  62%|██████▏   | 247/400 [02:33<01:35,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3863:  62%|██████▏   | 248/400 [02:34<01:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3838:  62%|██████▏   | 249/400 [02:34<01:34,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3969:  62%|██████▎   | 250/400 [02:35<01:33,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3939:  63%|██████▎   | 251/400 [02:35<01:33,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3924:  63%|██████▎   | 252/400 [02:36<01:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3894:  63%|██████▎   | 253/400 [02:37<01:32,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3926:  64%|██████▎   | 254/400 [02:37<01:30,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.393:  64%|██████▍   | 255/400 [02:38<01:31,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3925:  64%|██████▍   | 256/400 [02:39<01:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3894:  64%|██████▍   | 257/400 [02:39<01:29,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3865:  64%|██████▍   | 258/400 [02:40<01:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3837:  65%|██████▍   | 259/400 [02:40<01:27,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3818:  65%|██████▌   | 260/400 [02:41<01:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3871:  65%|██████▌   | 261/400 [02:42<01:26,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3845:  66%|██████▌   | 262/400 [02:42<01:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3824:  66%|██████▌   | 263/400 [02:43<01:25,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3807:  66%|██████▌   | 264/400 [02:44<01:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3829:  66%|██████▋   | 265/400 [02:44<01:24,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3912:  66%|██████▋   | 266/400 [02:45<01:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3928:  67%|██████▋   | 267/400 [02:45<01:23,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4016:  67%|██████▋   | 268/400 [02:46<01:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3985:  67%|██████▋   | 269/400 [02:47<01:21,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.396:  68%|██████▊   | 270/400 [02:47<01:19,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3962:  68%|██████▊   | 271/400 [02:48<01:20,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3958:  68%|██████▊   | 272/400 [02:48<01:18,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3928:  68%|██████▊   | 273/400 [02:49<01:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3926:  68%|██████▊   | 274/400 [02:50<01:17,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3909:  69%|██████▉   | 275/400 [02:50<01:17,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3904:  69%|██████▉   | 276/400 [02:51<01:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3961:  69%|██████▉   | 277/400 [02:52<01:16,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3983:  70%|██████▉   | 278/400 [02:52<01:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3965:  70%|██████▉   | 279/400 [02:53<01:15,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3991:  70%|███████   | 280/400 [02:53<01:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3965:  70%|███████   | 281/400 [02:54<01:13,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4017:  70%|███████   | 282/400 [02:55<01:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3986:  71%|███████   | 283/400 [02:55<01:13,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3963:  71%|███████   | 284/400 [02:56<01:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4021:  71%|███████▏  | 285/400 [02:57<01:12,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4004:  72%|███████▏  | 286/400 [02:57<01:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.3978:  72%|███████▏  | 287/400 [02:58<01:11,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4004:  72%|███████▏  | 288/400 [02:58<01:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4038:  72%|███████▏  | 289/400 [02:59<01:09,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4196:  72%|███████▎  | 290/400 [03:00<01:08,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4171:  73%|███████▎  | 291/400 [03:00<01:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4189:  73%|███████▎  | 292/400 [03:01<01:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4185:  73%|███████▎  | 293/400 [03:02<01:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.419:  74%|███████▎  | 294/400 [03:02<01:05,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4166:  74%|███████▍  | 295/400 [03:03<01:05,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.415:  74%|███████▍  | 296/400 [03:03<01:03,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4146:  74%|███████▍  | 297/400 [03:04<01:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4118:  74%|███████▍  | 298/400 [03:05<01:02,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4089:  75%|███████▍  | 299/400 [03:05<01:02,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4109:  75%|███████▌  | 300/400 [03:06<01:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4083:  75%|███████▌  | 301/400 [03:07<01:02,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4057:  76%|███████▌  | 302/400 [03:07<01:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4101:  76%|███████▌  | 303/400 [03:08<01:00,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4111:  76%|███████▌  | 304/400 [03:08<00:59,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4163:  76%|███████▋  | 305/400 [03:09<00:59,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4214:  76%|███████▋  | 306/400 [03:10<00:58,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4203:  77%|███████▋  | 307/400 [03:10<00:58,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.424:  77%|███████▋  | 308/400 [03:11<00:57,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4246:  77%|███████▋  | 309/400 [03:12<00:57,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4225:  78%|███████▊  | 310/400 [03:12<00:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4205:  78%|███████▊  | 311/400 [03:13<00:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4194:  78%|███████▊  | 312/400 [03:13<00:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4169:  78%|███████▊  | 313/400 [03:14<00:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4137:  78%|███████▊  | 314/400 [03:15<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4191:  79%|███████▉  | 315/400 [03:15<00:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4198:  79%|███████▉  | 316/400 [03:16<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4192:  79%|███████▉  | 317/400 [03:16<00:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4172:  80%|███████▉  | 318/400 [03:17<00:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4163:  80%|███████▉  | 319/400 [03:18<00:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4275:  80%|████████  | 320/400 [03:18<00:48,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4328:  80%|████████  | 321/400 [03:19<00:49,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4295:  80%|████████  | 322/400 [03:20<00:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4269:  81%|████████  | 323/400 [03:20<00:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4255:  81%|████████  | 324/400 [03:21<00:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4229:  81%|████████▏ | 325/400 [03:21<00:46,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4199:  82%|████████▏ | 326/400 [03:22<00:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4208:  82%|████████▏ | 327/400 [03:23<00:45,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4216:  82%|████████▏ | 328/400 [03:23<00:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4192:  82%|████████▏ | 329/400 [03:24<00:44,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4273:  82%|████████▎ | 330/400 [03:25<00:43,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4361:  83%|████████▎ | 331/400 [03:25<00:43,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4416:  83%|████████▎ | 332/400 [03:26<00:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4511:  83%|████████▎ | 333/400 [03:26<00:41,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4505:  84%|████████▎ | 334/400 [03:27<00:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4471:  84%|████████▍ | 335/400 [03:28<00:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4522:  84%|████████▍ | 336/400 [03:28<00:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4538:  84%|████████▍ | 337/400 [03:29<00:39,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4523:  84%|████████▍ | 338/400 [03:29<00:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4618:  85%|████████▍ | 339/400 [03:30<00:38,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4595:  85%|████████▌ | 340/400 [03:31<00:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4622:  85%|████████▌ | 341/400 [03:31<00:37,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4626:  86%|████████▌ | 342/400 [03:32<00:35,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4593:  86%|████████▌ | 343/400 [03:33<00:35,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4571:  86%|████████▌ | 344/400 [03:33<00:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4537:  86%|████████▋ | 345/400 [03:34<00:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4552:  86%|████████▋ | 346/400 [03:34<00:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4552:  87%|████████▋ | 347/400 [03:35<00:33,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4531:  87%|████████▋ | 348/400 [03:36<00:32,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.45:  87%|████████▋ | 349/400 [03:36<00:32,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4467:  88%|████████▊ | 350/400 [03:37<00:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4434:  88%|████████▊ | 351/400 [03:38<00:30,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4438:  88%|████████▊ | 352/400 [03:38<00:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4458:  88%|████████▊ | 353/400 [03:39<00:29,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4423:  88%|████████▊ | 354/400 [03:39<00:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4392:  89%|████████▉ | 355/400 [03:40<00:28,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.437:  89%|████████▉ | 356/400 [03:41<00:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4337:  89%|████████▉ | 357/400 [03:41<00:26,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4305:  90%|████████▉ | 358/400 [03:42<00:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4312:  90%|████████▉ | 359/400 [03:43<00:25,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4276:  90%|█████████ | 360/400 [03:43<00:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4254:  90%|█████████ | 361/400 [03:44<00:24,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4226:  90%|█████████ | 362/400 [03:44<00:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4197:  91%|█████████ | 363/400 [03:45<00:23,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4178:  91%|█████████ | 364/400 [03:46<00:22,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4171:  91%|█████████▏| 365/400 [03:46<00:21,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.414:  92%|█████████▏| 366/400 [03:47<00:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.411:  92%|█████████▏| 367/400 [03:48<00:20,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4108:  92%|█████████▏| 368/400 [03:48<00:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4092:  92%|█████████▏| 369/400 [03:49<00:19,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4058:  92%|█████████▎| 370/400 [03:49<00:18,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4235:  93%|█████████▎| 371/400 [03:50<00:18,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4283:  93%|█████████▎| 372/400 [03:51<00:17,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4309:  93%|█████████▎| 373/400 [03:51<00:16,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4275:  94%|█████████▎| 374/400 [03:52<00:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4336:  94%|█████████▍| 375/400 [03:53<00:15,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4305:  94%|█████████▍| 376/400 [03:53<00:14,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4312:  94%|█████████▍| 377/400 [03:54<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4367:  94%|█████████▍| 378/400 [03:54<00:13,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4379:  95%|█████████▍| 379/400 [03:55<00:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4444:  95%|█████████▌| 380/400 [03:56<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4454:  95%|█████████▌| 381/400 [03:56<00:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4425:  96%|█████████▌| 382/400 [03:57<00:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4391:  96%|█████████▌| 383/400 [03:57<00:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4377:  96%|█████████▌| 384/400 [03:58<00:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4352:  96%|█████████▋| 385/400 [03:59<00:09,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4322:  96%|█████████▋| 386/400 [03:59<00:08,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4356:  97%|█████████▋| 387/400 [04:00<00:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4329:  97%|█████████▋| 388/400 [04:01<00:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.43:  97%|█████████▋| 389/400 [04:01<00:06,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4269:  98%|█████████▊| 390/400 [04:02<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4282:  98%|█████████▊| 391/400 [04:02<00:05,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4275:  98%|█████████▊| 392/400 [04:03<00:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4261:  98%|█████████▊| 393/400 [04:04<00:04,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4231:  98%|█████████▊| 394/400 [04:04<00:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4216:  99%|█████████▉| 395/400 [04:05<00:03,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4189:  99%|█████████▉| 396/400 [04:06<00:02,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.418:  99%|█████████▉| 397/400 [04:06<00:01,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4192: 100%|█████████▉| 398/400 [04:07<00:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.422: 100%|█████████▉| 399/400 [04:07<00:00,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 8, train loss: 0.4189: 100%|██████████| 400/400 [04:08<00:00,  1.61it/s]
epoch: 8, valid loss: 3.5459:   1%|          | 2/201 [00:00<00:28,  6.90it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.5348:   2%|▏         | 4/201 [00:00<00:26,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.503:   3%|▎         | 6/201 [00:00<00:26,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.465:   4%|▍         | 8/201 [00:01<00:25,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.4276:   5%|▍         | 10/201 [00:01<00:25,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.3805:   6%|▌         | 12/201 [00:01<00:25,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.4005:   7%|▋         | 14/201 [00:01<00:24,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.4389:   8%|▊         | 16/201 [00:02<00:25,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.4297:   9%|▉         | 18/201 [00:02<00:25,  7.29it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.4542:  10%|▉         | 20/201 [00:02<00:25,  7.20it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.3911:  11%|█         | 22/201 [00:03<00:24,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.3876:  12%|█▏        | 24/201 [00:03<00:24,  7.29it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.3596:  13%|█▎        | 26/201 [00:03<00:23,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.3541:  14%|█▍        | 28/201 [00:03<00:23,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.3609:  15%|█▍        | 30/201 [00:04<00:23,  7.34it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.3259:  16%|█▌        | 32/201 [00:04<00:22,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.2871:  17%|█▋        | 34/201 [00:04<00:22,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.2321:  18%|█▊        | 36/201 [00:04<00:22,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.2195:  19%|█▉        | 38/201 [00:05<00:22,  7.23it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.2154:  20%|█▉        | 40/201 [00:05<00:22,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.1934:  21%|██        | 42/201 [00:05<00:21,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.1453:  22%|██▏       | 44/201 [00:06<00:21,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.0927:  23%|██▎       | 46/201 [00:06<00:20,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.0356:  24%|██▍       | 48/201 [00:06<00:21,  7.25it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.0162:  25%|██▍       | 50/201 [00:06<00:20,  7.25it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.988:  26%|██▌       | 52/201 [00:07<00:20,  7.28it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.0421:  27%|██▋       | 54/201 [00:07<00:20,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9871:  28%|██▊       | 56/201 [00:07<00:19,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.0383:  29%|██▉       | 58/201 [00:07<00:19,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.0429:  30%|██▉       | 60/201 [00:08<00:19,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.986:  31%|███       | 62/201 [00:08<00:18,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9737:  32%|███▏      | 64/201 [00:08<00:18,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9789:  33%|███▎      | 66/201 [00:09<00:18,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9894:  34%|███▍      | 68/201 [00:09<00:18,  7.23it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9996:  35%|███▍      | 70/201 [00:09<00:17,  7.34it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 3.0084:  36%|███▌      | 72/201 [00:09<00:17,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9845:  37%|███▋      | 74/201 [00:10<00:17,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9478:  38%|███▊      | 76/201 [00:10<00:16,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9532:  39%|███▉      | 78/201 [00:10<00:16,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9187:  40%|███▉      | 80/201 [00:10<00:16,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.896:  41%|████      | 82/201 [00:11<00:15,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9181:  42%|████▏     | 84/201 [00:11<00:15,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9119:  43%|████▎     | 86/201 [00:11<00:15,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9744:  44%|████▍     | 88/201 [00:11<00:15,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.9323:  45%|████▍     | 90/201 [00:12<00:14,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.8851:  46%|████▌     | 92/201 [00:12<00:14,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.8772:  47%|████▋     | 94/201 [00:12<00:14,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.8384:  48%|████▊     | 96/201 [00:13<00:13,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.8429:  49%|████▉     | 98/201 [00:13<00:13,  7.62it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.8378:  50%|████▉     | 100/201 [00:13<00:13,  7.59it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.8029:  51%|█████     | 102/201 [00:13<00:13,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7749:  52%|█████▏    | 104/201 [00:14<00:13,  7.24it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7693:  53%|█████▎    | 106/201 [00:14<00:12,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7841:  54%|█████▎    | 108/201 [00:14<00:12,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7932:  55%|█████▍    | 110/201 [00:14<00:12,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7719:  56%|█████▌    | 112/201 [00:15<00:11,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.8013:  57%|█████▋    | 114/201 [00:15<00:11,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.766:  58%|█████▊    | 116/201 [00:15<00:11,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7709:  59%|█████▊    | 118/201 [00:15<00:10,  7.56it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7354:  60%|█████▉    | 120/201 [00:16<00:10,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7541:  61%|██████    | 122/201 [00:16<00:10,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7333:  62%|██████▏   | 124/201 [00:16<00:10,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.78:  63%|██████▎   | 126/201 [00:17<00:10,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.8002:  64%|██████▎   | 128/201 [00:17<00:09,  7.34it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7815:  65%|██████▍   | 130/201 [00:17<00:09,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7456:  66%|██████▌   | 132/201 [00:17<00:09,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7878:  67%|██████▋   | 134/201 [00:18<00:09,  7.24it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7622:  68%|██████▊   | 136/201 [00:18<00:09,  7.20it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7331:  69%|██████▊   | 138/201 [00:18<00:08,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7055:  70%|██████▉   | 140/201 [00:19<00:08,  7.10it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6538:  71%|███████   | 142/201 [00:19<00:08,  7.12it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6714:  72%|███████▏  | 144/201 [00:19<00:07,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6819:  73%|███████▎  | 146/201 [00:19<00:07,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6378:  74%|███████▎  | 148/201 [00:20<00:07,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6401:  75%|███████▍  | 150/201 [00:20<00:06,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6486:  76%|███████▌  | 152/201 [00:20<00:06,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6373:  77%|███████▋  | 154/201 [00:20<00:06,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6402:  78%|███████▊  | 156/201 [00:21<00:06,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6013:  79%|███████▊  | 158/201 [00:21<00:05,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6536:  80%|███████▉  | 160/201 [00:21<00:05,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6602:  81%|████████  | 162/201 [00:21<00:05,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6458:  82%|████████▏ | 164/201 [00:22<00:04,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6296:  83%|████████▎ | 166/201 [00:22<00:04,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6422:  84%|████████▎ | 168/201 [00:22<00:04,  7.61it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6682:  85%|████████▍ | 170/201 [00:23<00:04,  7.58it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6343:  86%|████████▌ | 172/201 [00:23<00:03,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6442:  87%|████████▋ | 174/201 [00:23<00:03,  7.28it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6563:  88%|████████▊ | 176/201 [00:23<00:03,  7.34it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.636:  89%|████████▊ | 178/201 [00:24<00:03,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6398:  90%|████████▉ | 180/201 [00:24<00:02,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6245:  91%|█████████ | 182/201 [00:24<00:02,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6416:  92%|█████████▏| 184/201 [00:24<00:02,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6553:  93%|█████████▎| 186/201 [00:25<00:02,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6558:  94%|█████████▎| 188/201 [00:25<00:01,  7.52it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6396:  95%|█████████▍| 190/201 [00:25<00:01,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.618:  96%|█████████▌| 192/201 [00:26<00:01,  7.22it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.6715:  97%|█████████▋| 194/201 [00:26<00:00,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7131:  98%|█████████▊| 196/201 [00:26<00:00,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7169:  99%|█████████▊| 198/201 [00:26<00:00,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7244: 100%|█████████▉| 200/201 [00:27<00:00,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 8, valid loss: 2.7362: 100%|██████████| 201/201 [00:27<00:00,  7.38it/s]


torch.Size([2, 5])
torch.Size([2, 6])
[{'results_1': [{'prediction': 2, 'label': 0}, {'prediction': 3, 'label': 1}], 'results_2': [{'prediction': 5, 'label': 1}, {'prediction': 1, 'label': 0}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 5, 'label': 4}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 1, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 0, 'label': 0}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 4, 'label': 4}]}, {'results_1': [{'predicti

  0%|          | 0/400 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.9818:   0%|          | 1/400 [00:00<04:08,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.9733:   0%|          | 2/400 [00:01<04:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.9643:   1%|          | 3/400 [00:01<04:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.9556:   1%|          | 4/400 [00:02<04:02,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.9491:   1%|▏         | 5/400 [00:03<04:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.9413:   2%|▏         | 6/400 [00:03<04:01,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.9329:   2%|▏         | 7/400 [00:04<04:06,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.9263:   2%|▏         | 8/400 [00:04<04:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.919:   2%|▏         | 9/400 [00:05<04:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.9109:   2%|▎         | 10/400 [00:06<03:58,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.905:   3%|▎         | 11/400 [00:06<04:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.897:   3%|▎         | 12/400 [00:07<03:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8907:   3%|▎         | 13/400 [00:08<04:02,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8826:   4%|▎         | 14/400 [00:08<03:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8746:   4%|▍         | 15/400 [00:09<03:59,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8666:   4%|▍         | 16/400 [00:09<03:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8617:   4%|▍         | 17/400 [00:10<03:58,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8592:   4%|▍         | 18/400 [00:11<03:55,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.853:   5%|▍         | 19/400 [00:11<03:58,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.846:   5%|▌         | 20/400 [00:12<03:53,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8383:   5%|▌         | 21/400 [00:13<03:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8306:   6%|▌         | 22/400 [00:13<03:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8234:   6%|▌         | 23/400 [00:14<03:55,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.818:   6%|▌         | 24/400 [00:14<03:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8132:   6%|▋         | 25/400 [00:15<03:55,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.812:   6%|▋         | 26/400 [00:16<03:51,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8047:   7%|▋         | 27/400 [00:16<03:54,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8016:   7%|▋         | 28/400 [00:17<03:52,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7978:   7%|▋         | 29/400 [00:18<03:54,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8018:   8%|▊         | 30/400 [00:18<03:50,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8014:   8%|▊         | 31/400 [00:19<03:51,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.8004:   8%|▊         | 32/400 [00:19<03:48,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7954:   8%|▊         | 33/400 [00:20<03:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7931:   8%|▊         | 34/400 [00:21<03:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7954:   9%|▉         | 35/400 [00:21<03:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7887:   9%|▉         | 36/400 [00:22<03:43,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7827:   9%|▉         | 37/400 [00:22<03:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7761:  10%|▉         | 38/400 [00:23<03:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7697:  10%|▉         | 39/400 [00:24<03:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7712:  10%|█         | 40/400 [00:24<03:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7678:  10%|█         | 41/400 [00:25<03:44,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7618:  10%|█         | 42/400 [00:26<03:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7554:  11%|█         | 43/400 [00:26<03:43,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7501:  11%|█         | 44/400 [00:27<03:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7461:  11%|█▏        | 45/400 [00:27<03:43,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7409:  12%|█▏        | 46/400 [00:28<03:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7346:  12%|█▏        | 47/400 [00:29<03:42,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7316:  12%|█▏        | 48/400 [00:29<03:39,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7257:  12%|█▏        | 49/400 [00:30<03:39,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7195:  12%|█▎        | 50/400 [00:31<03:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7136:  13%|█▎        | 51/400 [00:31<03:39,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7076:  13%|█▎        | 52/400 [00:32<03:35,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.7015:  13%|█▎        | 53/400 [00:32<03:37,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6957:  14%|█▎        | 54/400 [00:33<03:34,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6904:  14%|█▍        | 55/400 [00:34<03:35,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.689:  14%|█▍        | 56/400 [00:34<03:33,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6828:  14%|█▍        | 57/400 [00:35<03:33,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6775:  14%|█▍        | 58/400 [00:36<03:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6727:  15%|█▍        | 59/400 [00:36<03:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6694:  15%|█▌        | 60/400 [00:37<03:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6643:  15%|█▌        | 61/400 [00:37<03:30,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6613:  16%|█▌        | 62/400 [00:38<03:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6574:  16%|█▌        | 63/400 [00:39<03:30,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.652:  16%|█▌        | 64/400 [00:39<03:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.647:  16%|█▋        | 65/400 [00:40<03:30,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6482:  16%|█▋        | 66/400 [00:41<03:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6427:  17%|█▋        | 67/400 [00:41<03:30,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6374:  17%|█▋        | 68/400 [00:42<03:26,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6368:  17%|█▋        | 69/400 [00:42<03:27,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6326:  18%|█▊        | 70/400 [00:43<03:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6276:  18%|█▊        | 71/400 [00:44<03:24,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6265:  18%|█▊        | 72/400 [00:44<03:21,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6214:  18%|█▊        | 73/400 [00:45<03:24,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6162:  18%|█▊        | 74/400 [00:45<03:21,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6191:  19%|█▉        | 75/400 [00:46<03:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6138:  19%|█▉        | 76/400 [00:47<03:20,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6127:  19%|█▉        | 77/400 [00:47<03:22,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6075:  20%|█▉        | 78/400 [00:48<03:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.6031:  20%|█▉        | 79/400 [00:49<03:20,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5991:  20%|██        | 80/400 [00:49<03:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5996:  20%|██        | 81/400 [00:50<03:19,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.599:  20%|██        | 82/400 [00:50<03:16,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5939:  21%|██        | 83/400 [00:51<03:18,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5917:  21%|██        | 84/400 [00:52<03:16,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5869:  21%|██▏       | 85/400 [00:52<03:16,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5928:  22%|██▏       | 86/400 [00:53<03:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5921:  22%|██▏       | 87/400 [00:54<03:18,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5883:  22%|██▏       | 88/400 [00:54<03:15,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5831:  22%|██▏       | 89/400 [00:55<03:16,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5782:  22%|██▎       | 90/400 [00:55<03:14,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5847:  23%|██▎       | 91/400 [00:56<03:13,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5802:  23%|██▎       | 92/400 [00:57<03:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5773:  23%|██▎       | 93/400 [00:57<03:11,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5725:  24%|██▎       | 94/400 [00:58<03:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5708:  24%|██▍       | 95/400 [00:59<03:11,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5701:  24%|██▍       | 96/400 [00:59<03:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5653:  24%|██▍       | 97/400 [01:00<03:11,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5604:  24%|██▍       | 98/400 [01:00<03:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.557:  25%|██▍       | 99/400 [01:01<03:08,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5525:  25%|██▌       | 100/400 [01:02<03:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5485:  25%|██▌       | 101/400 [01:02<03:07,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5464:  26%|██▌       | 102/400 [01:03<03:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5464:  26%|██▌       | 103/400 [01:04<03:05,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5432:  26%|██▌       | 104/400 [01:04<03:05,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5389:  26%|██▋       | 105/400 [01:05<03:05,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5345:  26%|██▋       | 106/400 [01:05<03:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5317:  27%|██▋       | 107/400 [01:06<03:04,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5279:  27%|██▋       | 108/400 [01:07<03:02,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5255:  27%|██▋       | 109/400 [01:07<03:03,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5226:  28%|██▊       | 110/400 [01:08<03:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5218:  28%|██▊       | 111/400 [01:09<03:01,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.518:  28%|██▊       | 112/400 [01:09<02:58,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.519:  28%|██▊       | 113/400 [01:10<03:00,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5228:  28%|██▊       | 114/400 [01:10<02:58,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5199:  29%|██▉       | 115/400 [01:11<02:59,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5155:  29%|██▉       | 116/400 [01:12<02:56,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5115:  29%|██▉       | 117/400 [01:12<02:56,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5083:  30%|██▉       | 118/400 [01:13<02:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.51:  30%|██▉       | 119/400 [01:14<02:55,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5111:  30%|███       | 120/400 [01:14<02:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5072:  30%|███       | 121/400 [01:15<02:54,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5041:  30%|███       | 122/400 [01:15<02:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5059:  31%|███       | 123/400 [01:16<02:53,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5017:  31%|███       | 124/400 [01:17<02:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4979:  31%|███▏      | 125/400 [01:17<02:52,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.5026:  32%|███▏      | 126/400 [01:18<02:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4983:  32%|███▏      | 127/400 [01:19<02:49,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.495:  32%|███▏      | 128/400 [01:19<02:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4929:  32%|███▏      | 129/400 [01:20<02:48,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4887:  32%|███▎      | 130/400 [01:20<02:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4858:  33%|███▎      | 131/400 [01:21<02:47,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4819:  33%|███▎      | 132/400 [01:22<02:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4786:  33%|███▎      | 133/400 [01:22<02:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4781:  34%|███▎      | 134/400 [01:23<02:44,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4816:  34%|███▍      | 135/400 [01:24<02:45,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4778:  34%|███▍      | 136/400 [01:24<02:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.474:  34%|███▍      | 137/400 [01:25<02:44,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4716:  34%|███▍      | 138/400 [01:25<02:41,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4684:  35%|███▍      | 139/400 [01:26<02:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4696:  35%|███▌      | 140/400 [01:27<02:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4759:  35%|███▌      | 141/400 [01:27<02:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4742:  36%|███▌      | 142/400 [01:28<02:38,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4725:  36%|███▌      | 143/400 [01:28<02:39,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4695:  36%|███▌      | 144/400 [01:29<02:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4697:  36%|███▋      | 145/400 [01:30<02:39,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4724:  36%|███▋      | 146/400 [01:30<02:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4697:  37%|███▋      | 147/400 [01:31<02:39,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4659:  37%|███▋      | 148/400 [01:32<02:37,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4644:  37%|███▋      | 149/400 [01:32<02:38,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4622:  38%|███▊      | 150/400 [01:33<02:36,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4588:  38%|███▊      | 151/400 [01:34<02:37,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4595:  38%|███▊      | 152/400 [01:34<02:35,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4559:  38%|███▊      | 153/400 [01:35<02:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4524:  38%|███▊      | 154/400 [01:35<02:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4506:  39%|███▉      | 155/400 [01:36<02:33,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.451:  39%|███▉      | 156/400 [01:37<02:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4477:  39%|███▉      | 157/400 [01:37<02:32,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4511:  40%|███▉      | 158/400 [01:38<02:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4477:  40%|███▉      | 159/400 [01:38<02:30,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4496:  40%|████      | 160/400 [01:39<02:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4523:  40%|████      | 161/400 [01:40<02:28,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4489:  40%|████      | 162/400 [01:40<02:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4551:  41%|████      | 163/400 [01:41<02:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.452:  41%|████      | 164/400 [01:42<02:26,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4491:  41%|████▏     | 165/400 [01:42<02:27,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4458:  42%|████▏     | 166/400 [01:43<02:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4441:  42%|████▏     | 167/400 [01:43<02:26,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4404:  42%|████▏     | 168/400 [01:44<02:23,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4443:  42%|████▏     | 169/400 [01:45<02:25,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4489:  42%|████▎     | 170/400 [01:45<02:24,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4454:  43%|████▎     | 171/400 [01:46<02:25,  1.57it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4429:  43%|████▎     | 172/400 [01:47<02:22,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4433:  43%|████▎     | 173/400 [01:47<02:22,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4397:  44%|████▎     | 174/400 [01:48<02:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4384:  44%|████▍     | 175/400 [01:48<02:20,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4409:  44%|████▍     | 176/400 [01:49<02:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4432:  44%|████▍     | 177/400 [01:50<02:19,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4417:  44%|████▍     | 178/400 [01:50<02:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4392:  45%|████▍     | 179/400 [01:51<02:17,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4382:  45%|████▌     | 180/400 [01:52<02:15,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4385:  45%|████▌     | 181/400 [01:52<02:17,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4366:  46%|████▌     | 182/400 [01:53<02:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4332:  46%|████▌     | 183/400 [01:53<02:15,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4306:  46%|████▌     | 184/400 [01:54<02:13,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4278:  46%|████▋     | 185/400 [01:55<02:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.43:  46%|████▋     | 186/400 [01:55<02:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.434:  47%|████▋     | 187/400 [01:56<02:13,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4303:  47%|████▋     | 188/400 [01:57<02:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4329:  47%|████▋     | 189/400 [01:57<02:11,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4297:  48%|████▊     | 190/400 [01:58<02:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.427:  48%|████▊     | 191/400 [01:58<02:10,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4235:  48%|████▊     | 192/400 [01:59<02:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4209:  48%|████▊     | 193/400 [02:00<02:08,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4198:  48%|████▊     | 194/400 [02:00<02:06,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4172:  49%|████▉     | 195/400 [02:01<02:07,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4138:  49%|████▉     | 196/400 [02:01<02:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4122:  49%|████▉     | 197/400 [02:02<02:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4136:  50%|████▉     | 198/400 [02:03<02:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4113:  50%|████▉     | 199/400 [02:03<02:05,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4085:  50%|█████     | 200/400 [02:04<02:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4101:  50%|█████     | 201/400 [02:05<02:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.408:  50%|█████     | 202/400 [02:05<02:02,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4048:  51%|█████     | 203/400 [02:06<02:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4022:  51%|█████     | 204/400 [02:06<02:00,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4051:  51%|█████▏    | 205/400 [02:07<02:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4034:  52%|█████▏    | 206/400 [02:08<01:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4004:  52%|█████▏    | 207/400 [02:08<02:00,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3998:  52%|█████▏    | 208/400 [02:09<01:59,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3986:  52%|█████▏    | 209/400 [02:10<01:59,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3963:  52%|█████▎    | 210/400 [02:10<01:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3962:  53%|█████▎    | 211/400 [02:11<01:58,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3933:  53%|█████▎    | 212/400 [02:11<01:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3917:  53%|█████▎    | 213/400 [02:12<01:56,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3929:  54%|█████▎    | 214/400 [02:13<01:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3907:  54%|█████▍    | 215/400 [02:13<01:55,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3984:  54%|█████▍    | 216/400 [02:14<01:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3957:  54%|█████▍    | 217/400 [02:15<01:55,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3924:  55%|█████▍    | 218/400 [02:15<01:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3925:  55%|█████▍    | 219/400 [02:16<01:53,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3902:  55%|█████▌    | 220/400 [02:16<01:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4017:  55%|█████▌    | 221/400 [02:17<01:51,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4041:  56%|█████▌    | 222/400 [02:18<01:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4021:  56%|█████▌    | 223/400 [02:18<01:49,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4014:  56%|█████▌    | 224/400 [02:19<01:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3993:  56%|█████▋    | 225/400 [02:19<01:48,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3969:  56%|█████▋    | 226/400 [02:20<01:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3978:  57%|█████▋    | 227/400 [02:21<01:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3948:  57%|█████▋    | 228/400 [02:21<01:45,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3919:  57%|█████▋    | 229/400 [02:22<01:46,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3889:  57%|█████▊    | 230/400 [02:23<01:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3862:  58%|█████▊    | 231/400 [02:23<01:45,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3866:  58%|█████▊    | 232/400 [02:24<01:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.384:  58%|█████▊    | 233/400 [02:24<01:44,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3812:  58%|█████▊    | 234/400 [02:25<01:42,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3793:  59%|█████▉    | 235/400 [02:26<01:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3823:  59%|█████▉    | 236/400 [02:26<01:40,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3821:  59%|█████▉    | 237/400 [02:27<01:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3797:  60%|█████▉    | 238/400 [02:28<01:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3767:  60%|█████▉    | 239/400 [02:28<01:41,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3763:  60%|██████    | 240/400 [02:29<01:39,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3841:  60%|██████    | 241/400 [02:29<01:39,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.382:  60%|██████    | 242/400 [02:30<01:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.379:  61%|██████    | 243/400 [02:31<01:38,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3763:  61%|██████    | 244/400 [02:31<01:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3804:  61%|██████▏   | 245/400 [02:32<01:37,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3853:  62%|██████▏   | 246/400 [02:33<01:35,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3907:  62%|██████▏   | 247/400 [02:33<01:36,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3892:  62%|██████▏   | 248/400 [02:34<01:33,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3928:  62%|██████▏   | 249/400 [02:34<01:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3901:  62%|██████▎   | 250/400 [02:35<01:33,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4067:  63%|██████▎   | 251/400 [02:36<01:34,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4034:  63%|██████▎   | 252/400 [02:36<01:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.401:  63%|██████▎   | 253/400 [02:37<01:32,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3979:  64%|██████▎   | 254/400 [02:38<01:30,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3969:  64%|██████▍   | 255/400 [02:38<01:31,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3959:  64%|██████▍   | 256/400 [02:39<01:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4012:  64%|██████▍   | 257/400 [02:39<01:29,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3996:  64%|██████▍   | 258/400 [02:40<01:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3963:  65%|██████▍   | 259/400 [02:41<01:28,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4002:  65%|██████▌   | 260/400 [02:41<01:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3982:  65%|██████▌   | 261/400 [02:42<01:26,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.409:  66%|██████▌   | 262/400 [02:42<01:25,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4122:  66%|██████▌   | 263/400 [02:43<01:25,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4114:  66%|██████▌   | 264/400 [02:44<01:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4091:  66%|██████▋   | 265/400 [02:44<01:23,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4145:  66%|██████▋   | 266/400 [02:45<01:22,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.416:  67%|██████▋   | 267/400 [02:46<01:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4201:  67%|██████▋   | 268/400 [02:46<01:20,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4262:  67%|██████▋   | 269/400 [02:47<01:21,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4235:  68%|██████▊   | 270/400 [02:47<01:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4206:  68%|██████▊   | 271/400 [02:48<01:20,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4258:  68%|██████▊   | 272/400 [02:49<01:19,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4236:  68%|██████▊   | 273/400 [02:49<01:20,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4215:  68%|██████▊   | 274/400 [02:50<01:18,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4291:  69%|██████▉   | 275/400 [02:51<01:18,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4282:  69%|██████▉   | 276/400 [02:51<01:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4255:  69%|██████▉   | 277/400 [02:52<01:16,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.422:  70%|██████▉   | 278/400 [02:52<01:15,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4212:  70%|██████▉   | 279/400 [02:53<01:15,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4191:  70%|███████   | 280/400 [02:54<01:13,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4171:  70%|███████   | 281/400 [02:54<01:14,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4166:  70%|███████   | 282/400 [02:55<01:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4139:  71%|███████   | 283/400 [02:56<01:12,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.414:  71%|███████   | 284/400 [02:56<01:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4115:  71%|███████▏  | 285/400 [02:57<01:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4082:  72%|███████▏  | 286/400 [02:57<01:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4051:  72%|███████▏  | 287/400 [02:58<01:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4022:  72%|███████▏  | 288/400 [02:59<01:08,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3996:  72%|███████▏  | 289/400 [02:59<01:08,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4032:  72%|███████▎  | 290/400 [03:00<01:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4035:  73%|███████▎  | 291/400 [03:00<01:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4013:  73%|███████▎  | 292/400 [03:01<01:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3985:  73%|███████▎  | 293/400 [03:02<01:07,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.399:  74%|███████▎  | 294/400 [03:02<01:06,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3962:  74%|███████▍  | 295/400 [03:03<01:06,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4079:  74%|███████▍  | 296/400 [03:04<01:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4045:  74%|███████▍  | 297/400 [03:04<01:04,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.407:  74%|███████▍  | 298/400 [03:05<01:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4091:  75%|███████▍  | 299/400 [03:05<01:03,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4132:  75%|███████▌  | 300/400 [03:06<01:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4108:  75%|███████▌  | 301/400 [03:07<01:02,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4075:  76%|███████▌  | 302/400 [03:07<01:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4079:  76%|███████▌  | 303/400 [03:08<01:00,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4097:  76%|███████▌  | 304/400 [03:09<00:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4071:  76%|███████▋  | 305/400 [03:09<00:59,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4047:  76%|███████▋  | 306/400 [03:10<00:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4021:  77%|███████▋  | 307/400 [03:10<00:57,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.4005:  77%|███████▋  | 308/400 [03:11<00:56,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3974:  77%|███████▋  | 309/400 [03:12<00:56,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3949:  78%|███████▊  | 310/400 [03:12<00:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3932:  78%|███████▊  | 311/400 [03:13<00:55,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3906:  78%|███████▊  | 312/400 [03:14<00:55,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3881:  78%|███████▊  | 313/400 [03:14<00:55,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3867:  78%|███████▊  | 314/400 [03:15<00:53,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3892:  79%|███████▉  | 315/400 [03:15<00:53,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3873:  79%|███████▉  | 316/400 [03:16<00:51,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3852:  79%|███████▉  | 317/400 [03:17<00:51,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3827:  80%|███████▉  | 318/400 [03:17<00:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3805:  80%|███████▉  | 319/400 [03:18<00:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3776:  80%|████████  | 320/400 [03:19<00:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3752:  80%|████████  | 321/400 [03:19<00:49,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3723:  80%|████████  | 322/400 [03:20<00:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3721:  81%|████████  | 323/400 [03:20<00:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.37:  81%|████████  | 324/400 [03:21<00:46,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3672:  81%|████████▏ | 325/400 [03:22<00:46,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3679:  82%|████████▏ | 326/400 [03:22<00:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3649:  82%|████████▏ | 327/400 [03:23<00:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3623:  82%|████████▏ | 328/400 [03:23<00:44,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3597:  82%|████████▏ | 329/400 [03:24<00:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3568:  82%|████████▎ | 330/400 [03:25<00:43,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3593:  83%|████████▎ | 331/400 [03:25<00:43,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3586:  83%|████████▎ | 332/400 [03:26<00:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3563:  83%|████████▎ | 333/400 [03:27<00:42,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3534:  84%|████████▎ | 334/400 [03:27<00:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3512:  84%|████████▍ | 335/400 [03:28<00:40,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3485:  84%|████████▍ | 336/400 [03:28<00:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3458:  84%|████████▍ | 337/400 [03:29<00:39,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3439:  84%|████████▍ | 338/400 [03:30<00:38,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3412:  85%|████████▍ | 339/400 [03:30<00:38,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3404:  85%|████████▌ | 340/400 [03:31<00:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3382:  85%|████████▌ | 341/400 [03:32<00:37,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3495:  86%|████████▌ | 342/400 [03:32<00:35,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3483:  86%|████████▌ | 343/400 [03:33<00:35,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3464:  86%|████████▌ | 344/400 [03:33<00:34,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3467:  86%|████████▋ | 345/400 [03:34<00:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3485:  86%|████████▋ | 346/400 [03:35<00:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.347:  87%|████████▋ | 347/400 [03:35<00:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3462:  87%|████████▋ | 348/400 [03:36<00:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3549:  87%|████████▋ | 349/400 [03:37<00:31,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3549:  88%|████████▊ | 350/400 [03:37<00:30,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3542:  88%|████████▊ | 351/400 [03:38<00:30,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3525:  88%|████████▊ | 352/400 [03:38<00:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3498:  88%|████████▊ | 353/400 [03:39<00:29,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3473:  88%|████████▊ | 354/400 [03:40<00:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3451:  89%|████████▉ | 355/400 [03:40<00:28,  1.56it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3457:  89%|████████▉ | 356/400 [03:41<00:27,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3448:  89%|████████▉ | 357/400 [03:42<00:27,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3427:  90%|████████▉ | 358/400 [03:42<00:26,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3405:  90%|████████▉ | 359/400 [03:43<00:25,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3381:  90%|█████████ | 360/400 [03:43<00:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3357:  90%|█████████ | 361/400 [03:44<00:24,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3332:  90%|█████████ | 362/400 [03:45<00:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3342:  91%|█████████ | 363/400 [03:45<00:23,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3354:  91%|█████████ | 364/400 [03:46<00:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3336:  91%|█████████▏| 365/400 [03:47<00:21,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3338:  92%|█████████▏| 366/400 [03:47<00:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3313:  92%|█████████▏| 367/400 [03:48<00:20,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3291:  92%|█████████▏| 368/400 [03:48<00:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3268:  92%|█████████▏| 369/400 [03:49<00:19,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3287:  92%|█████████▎| 370/400 [03:50<00:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3277:  93%|█████████▎| 371/400 [03:50<00:18,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3263:  93%|█████████▎| 372/400 [03:51<00:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3239:  93%|█████████▎| 373/400 [03:51<00:16,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3215:  94%|█████████▎| 374/400 [03:52<00:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3194:  94%|█████████▍| 375/400 [03:53<00:15,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.317:  94%|█████████▍| 376/400 [03:53<00:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.318:  94%|█████████▍| 377/400 [03:54<00:14,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3159:  94%|█████████▍| 378/400 [03:55<00:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3137:  95%|█████████▍| 379/400 [03:55<00:13,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3112:  95%|█████████▌| 380/400 [03:56<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.309:  95%|█████████▌| 381/400 [03:56<00:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3071:  96%|█████████▌| 382/400 [03:57<00:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3051:  96%|█████████▌| 383/400 [03:58<00:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3033:  96%|█████████▌| 384/400 [03:58<00:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.31:  96%|█████████▋| 385/400 [03:59<00:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3076:  96%|█████████▋| 386/400 [04:00<00:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3059:  97%|█████████▋| 387/400 [04:00<00:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3127:  97%|█████████▋| 388/400 [04:01<00:07,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3102:  97%|█████████▋| 389/400 [04:01<00:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3081:  98%|█████████▊| 390/400 [04:02<00:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3063:  98%|█████████▊| 391/400 [04:03<00:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3061:  98%|█████████▊| 392/400 [04:03<00:04,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3044:  98%|█████████▊| 393/400 [04:04<00:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3027:  98%|█████████▊| 394/400 [04:05<00:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.3011:  99%|█████████▉| 395/400 [04:05<00:03,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.2989:  99%|█████████▉| 396/400 [04:06<00:02,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.2966:  99%|█████████▉| 397/400 [04:06<00:01,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.296: 100%|█████████▉| 398/400 [04:07<00:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.294: 100%|█████████▉| 399/400 [04:08<00:00,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 9, train loss: 0.2977: 100%|██████████| 400/400 [04:08<00:00,  1.61it/s]
epoch: 9, valid loss: 4.9796:   1%|          | 2/201 [00:00<00:26,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.9487:   2%|▏         | 4/201 [00:00<00:27,  7.23it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.8914:   3%|▎         | 6/201 [00:00<00:26,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.8092:   4%|▍         | 8/201 [00:01<00:25,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.7261:   5%|▍         | 10/201 [00:01<00:25,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.6585:   6%|▌         | 12/201 [00:01<00:25,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.6478:   7%|▋         | 14/201 [00:01<00:25,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.6785:   8%|▊         | 16/201 [00:02<00:24,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.6448:   9%|▉         | 18/201 [00:02<00:24,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.6222:  10%|▉         | 20/201 [00:02<00:24,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.5332:  11%|█         | 22/201 [00:02<00:23,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.5052:  12%|█▏        | 24/201 [00:03<00:23,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.4625:  13%|█▎        | 26/201 [00:03<00:23,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.4097:  14%|█▍        | 28/201 [00:03<00:23,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.383:  15%|█▍        | 30/201 [00:04<00:23,  7.19it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.3066:  16%|█▌        | 32/201 [00:04<00:23,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.2602:  17%|█▋        | 34/201 [00:04<00:22,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.1785:  18%|█▊        | 36/201 [00:04<00:22,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.1494:  19%|█▉        | 38/201 [00:05<00:22,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.1471:  20%|█▉        | 40/201 [00:05<00:22,  7.27it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.104:  21%|██        | 42/201 [00:05<00:21,  7.28it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.0604:  22%|██▏       | 44/201 [00:05<00:21,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 4.0016:  23%|██▎       | 46/201 [00:06<00:21,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.9353:  24%|██▍       | 48/201 [00:06<00:20,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.8997:  25%|██▍       | 50/201 [00:06<00:20,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.8478:  26%|██▌       | 52/201 [00:07<00:20,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.882:  27%|██▋       | 54/201 [00:07<00:19,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.8473:  28%|██▊       | 56/201 [00:07<00:19,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.8897:  29%|██▉       | 58/201 [00:07<00:19,  7.29it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.8713:  30%|██▉       | 60/201 [00:08<00:19,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.8017:  31%|███       | 62/201 [00:08<00:19,  7.24it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.782:  32%|███▏      | 64/201 [00:08<00:19,  7.18it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.787:  33%|███▎      | 66/201 [00:09<00:18,  7.19it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.7657:  34%|███▍      | 68/201 [00:09<00:18,  7.22it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.7534:  35%|███▍      | 70/201 [00:09<00:17,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.7652:  36%|███▌      | 72/201 [00:09<00:17,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.7068:  37%|███▋      | 74/201 [00:10<00:17,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.6595:  38%|███▊      | 76/201 [00:10<00:16,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.6447:  39%|███▉      | 78/201 [00:10<00:16,  7.25it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.5984:  40%|███▉      | 80/201 [00:10<00:16,  7.19it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.5287:  41%|████      | 82/201 [00:11<00:16,  7.28it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.5331:  42%|████▏     | 84/201 [00:11<00:15,  7.34it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.5099:  43%|████▎     | 86/201 [00:11<00:15,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.5349:  44%|████▍     | 88/201 [00:12<00:15,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.5112:  45%|████▍     | 90/201 [00:12<00:14,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.4528:  46%|████▌     | 92/201 [00:12<00:14,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.4735:  47%|████▋     | 94/201 [00:12<00:14,  7.57it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.4176:  48%|████▊     | 96/201 [00:13<00:13,  7.55it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.403:  49%|████▉     | 98/201 [00:13<00:13,  7.54it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.4018:  50%|████▉     | 100/201 [00:13<00:13,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.3694:  51%|█████     | 102/201 [00:13<00:13,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.3236:  52%|█████▏    | 104/201 [00:14<00:13,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.3187:  53%|█████▎    | 106/201 [00:14<00:12,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.3131:  54%|█████▎    | 108/201 [00:14<00:12,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.3029:  55%|█████▍    | 110/201 [00:14<00:12,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.2758:  56%|█████▌    | 112/201 [00:15<00:11,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.3369:  57%|█████▋    | 114/201 [00:15<00:12,  7.19it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.317:  58%|█████▊    | 116/201 [00:15<00:11,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.3166:  59%|█████▊    | 118/201 [00:16<00:11,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.2527:  60%|█████▉    | 120/201 [00:16<00:10,  7.50it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.2871:  61%|██████    | 122/201 [00:16<00:10,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.257:  62%|██████▏   | 124/201 [00:16<00:10,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.2859:  63%|██████▎   | 126/201 [00:17<00:10,  7.49it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.3043:  64%|██████▎   | 128/201 [00:17<00:09,  7.47it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.2823:  65%|██████▍   | 130/201 [00:17<00:09,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.2574:  66%|██████▌   | 132/201 [00:17<00:09,  7.42it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.2512:  67%|██████▋   | 134/201 [00:18<00:08,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.2198:  68%|██████▊   | 136/201 [00:18<00:08,  7.53it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.2071:  69%|██████▊   | 138/201 [00:18<00:08,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.1797:  70%|██████▉   | 140/201 [00:19<00:08,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.1184:  71%|███████   | 142/201 [00:19<00:08,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.1263:  72%|███████▏  | 144/201 [00:19<00:07,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.1142:  73%|███████▎  | 146/201 [00:19<00:07,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0961:  74%|███████▎  | 148/201 [00:20<00:07,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0855:  75%|███████▍  | 150/201 [00:20<00:06,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0866:  76%|███████▌  | 152/201 [00:20<00:06,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0791:  77%|███████▋  | 154/201 [00:20<00:06,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0909:  78%|███████▊  | 156/201 [00:21<00:06,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0645:  79%|███████▊  | 158/201 [00:21<00:05,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.1183:  80%|███████▉  | 160/201 [00:21<00:05,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0981:  81%|████████  | 162/201 [00:22<00:05,  7.19it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0709:  82%|████████▏ | 164/201 [00:22<00:05,  7.10it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0611:  83%|████████▎ | 166/201 [00:22<00:04,  7.12it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0612:  84%|████████▎ | 168/201 [00:22<00:04,  7.02it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0574:  85%|████████▍ | 170/201 [00:23<00:04,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0183:  86%|████████▌ | 172/201 [00:23<00:04,  7.13it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9998:  87%|████████▋ | 174/201 [00:23<00:03,  7.25it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0072:  88%|████████▊ | 176/201 [00:23<00:03,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9713:  89%|████████▊ | 178/201 [00:24<00:03,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9809:  90%|████████▉ | 180/201 [00:24<00:02,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9815:  91%|█████████ | 182/201 [00:24<00:02,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9949:  92%|█████████▏| 184/201 [00:25<00:02,  7.20it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9937:  93%|█████████▎| 186/201 [00:25<00:02,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9734:  94%|█████████▎| 188/201 [00:25<00:01,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9636:  95%|█████████▍| 190/201 [00:25<00:01,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9427:  96%|█████████▌| 192/201 [00:26<00:01,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9837:  97%|█████████▋| 194/201 [00:26<00:00,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0329:  98%|█████████▊| 196/201 [00:26<00:00,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0218:  99%|█████████▊| 198/201 [00:26<00:00,  7.23it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 2.9998: 100%|█████████▉| 200/201 [00:27<00:00,  7.17it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 9, valid loss: 3.0117: 100%|██████████| 201/201 [00:27<00:00,  7.33it/s]


torch.Size([2, 5])
torch.Size([2, 6])
[{'results_1': [{'prediction': 2, 'label': 0}, {'prediction': 1, 'label': 1}], 'results_2': [{'prediction': 2, 'label': 1}, {'prediction': 1, 'label': 0}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 4, 'label': 4}, {'prediction': 0, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 3, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 3, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 0, 'label': 0}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], 'results_2': [{'prediction': 0, 'label': 3}, {'prediction': 4, 'label': 4}]}, {'results_1': [{'predicti

  0%|          | 0/400 [00:00<?, ?it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0814:   0%|          | 1/400 [00:00<04:09,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0814:   0%|          | 2/400 [00:01<04:01,  1.65it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0822:   1%|          | 3/400 [00:01<04:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0862:   1%|          | 4/400 [00:02<04:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0871:   1%|▏         | 5/400 [00:03<04:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0871:   2%|▏         | 6/400 [00:03<04:02,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0869:   2%|▏         | 7/400 [00:04<04:05,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0873:   2%|▏         | 8/400 [00:04<04:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0874:   2%|▏         | 9/400 [00:05<04:04,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0917:   2%|▎         | 10/400 [00:06<04:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.0933:   3%|▎         | 11/400 [00:06<04:06,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1107:   3%|▎         | 12/400 [00:07<04:01,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1108:   3%|▎         | 13/400 [00:08<04:05,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1108:   4%|▎         | 14/400 [00:08<04:03,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1111:   4%|▍         | 15/400 [00:09<04:03,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1118:   4%|▍         | 16/400 [00:09<03:59,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1125:   4%|▍         | 17/400 [00:10<04:00,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1136:   4%|▍         | 18/400 [00:11<03:57,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1134:   5%|▍         | 19/400 [00:11<03:58,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1192:   5%|▌         | 20/400 [00:12<03:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1194:   5%|▌         | 21/400 [00:13<03:56,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.119:   6%|▌         | 22/400 [00:13<03:52,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.119:   6%|▌         | 23/400 [00:14<03:55,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1199:   6%|▌         | 24/400 [00:14<03:51,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1201:   6%|▋         | 25/400 [00:15<03:53,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1245:   6%|▋         | 26/400 [00:16<03:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1244:   7%|▋         | 27/400 [00:16<03:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.126:   7%|▋         | 28/400 [00:17<03:47,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1254:   7%|▋         | 29/400 [00:18<03:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1257:   8%|▊         | 30/400 [00:18<03:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1275:   8%|▊         | 31/400 [00:19<03:50,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1269:   8%|▊         | 32/400 [00:19<03:49,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1273:   8%|▊         | 33/400 [00:20<03:50,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1354:   8%|▊         | 34/400 [00:21<03:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.135:   9%|▉         | 35/400 [00:21<03:50,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1414:   9%|▉         | 36/400 [00:22<03:47,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1409:   9%|▉         | 37/400 [00:23<03:48,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1432:  10%|▉         | 38/400 [00:23<03:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1552:  10%|▉         | 39/400 [00:24<03:46,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.155:  10%|█         | 40/400 [00:24<03:42,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1573:  10%|█         | 41/400 [00:25<03:44,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.157:  10%|█         | 42/400 [00:26<03:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1569:  11%|█         | 43/400 [00:26<03:44,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1599:  11%|█         | 44/400 [00:27<03:42,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1625:  11%|█▏        | 45/400 [00:28<03:42,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1617:  12%|█▏        | 46/400 [00:28<03:39,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1608:  12%|█▏        | 47/400 [00:29<03:40,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1601:  12%|█▏        | 48/400 [00:29<03:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1641:  12%|█▏        | 49/400 [00:30<03:38,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1658:  12%|█▎        | 50/400 [00:31<03:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1713:  13%|█▎        | 51/400 [00:31<03:37,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1718:  13%|█▎        | 52/400 [00:32<03:37,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1776:  13%|█▎        | 53/400 [00:33<03:38,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1785:  14%|█▎        | 54/400 [00:33<03:36,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1779:  14%|█▍        | 55/400 [00:34<03:37,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1774:  14%|█▍        | 56/400 [00:34<03:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1766:  14%|█▍        | 57/400 [00:35<03:35,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1763:  14%|█▍        | 58/400 [00:36<03:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1759:  15%|█▍        | 59/400 [00:36<03:36,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.178:  15%|█▌        | 60/400 [00:37<03:31,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1831:  15%|█▌        | 61/400 [00:38<03:33,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.182:  16%|█▌        | 62/400 [00:38<03:29,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1826:  16%|█▌        | 63/400 [00:39<03:31,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1821:  16%|█▌        | 64/400 [00:39<03:30,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1833:  16%|█▋        | 65/400 [00:40<03:31,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1915:  16%|█▋        | 66/400 [00:41<03:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1907:  17%|█▋        | 67/400 [00:41<03:31,  1.57it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.191:  17%|█▋        | 68/400 [00:42<03:27,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1959:  17%|█▋        | 69/400 [00:43<03:29,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1949:  18%|█▊        | 70/400 [00:43<03:26,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1997:  18%|█▊        | 71/400 [00:44<03:28,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1988:  18%|█▊        | 72/400 [00:44<03:24,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1978:  18%|█▊        | 73/400 [00:45<03:26,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1966:  18%|█▊        | 74/400 [00:46<03:23,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1967:  19%|█▉        | 75/400 [00:46<03:25,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1959:  19%|█▉        | 76/400 [00:47<03:21,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1947:  19%|█▉        | 77/400 [00:48<03:23,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1964:  20%|█▉        | 78/400 [00:48<03:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1954:  20%|█▉        | 79/400 [00:49<03:21,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2004:  20%|██        | 80/400 [00:49<03:18,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2042:  20%|██        | 81/400 [00:50<03:20,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2034:  20%|██        | 82/400 [00:51<03:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2044:  21%|██        | 83/400 [00:51<03:18,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.211:  21%|██        | 84/400 [00:52<03:15,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.211:  21%|██▏       | 85/400 [00:53<03:17,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2112:  22%|██▏       | 86/400 [00:53<03:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2138:  22%|██▏       | 87/400 [00:54<03:15,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2208:  22%|██▏       | 88/400 [00:54<03:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2263:  22%|██▏       | 89/400 [00:55<03:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2251:  22%|██▎       | 90/400 [00:56<03:11,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2242:  23%|██▎       | 91/400 [00:56<03:12,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2235:  23%|██▎       | 92/400 [00:57<03:09,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2225:  23%|██▎       | 93/400 [00:58<03:11,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2213:  24%|██▎       | 94/400 [00:58<03:09,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2197:  24%|██▍       | 95/400 [00:59<03:10,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2182:  24%|██▍       | 96/400 [00:59<03:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2185:  24%|██▍       | 97/400 [01:00<03:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2171:  24%|██▍       | 98/400 [01:01<03:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2158:  25%|██▍       | 99/400 [01:01<03:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.215:  25%|██▌       | 100/400 [01:02<03:05,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2149:  25%|██▌       | 101/400 [01:02<03:06,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.215:  26%|██▌       | 102/400 [01:03<03:03,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2143:  26%|██▌       | 103/400 [01:04<03:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2149:  26%|██▌       | 104/400 [01:04<03:02,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2147:  26%|██▋       | 105/400 [01:05<03:04,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2136:  26%|██▋       | 106/400 [01:06<03:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2159:  27%|██▋       | 107/400 [01:06<03:03,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.215:  27%|██▋       | 108/400 [01:07<03:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2141:  27%|██▋       | 109/400 [01:07<03:01,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2127:  28%|██▊       | 110/400 [01:08<02:58,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2123:  28%|██▊       | 111/400 [01:09<02:59,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2117:  28%|██▊       | 112/400 [01:09<02:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2105:  28%|██▊       | 113/400 [01:10<02:59,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.211:  28%|██▊       | 114/400 [01:11<02:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2102:  29%|██▉       | 115/400 [01:11<02:58,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2098:  29%|██▉       | 116/400 [01:12<02:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2086:  29%|██▉       | 117/400 [01:12<02:56,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2073:  30%|██▉       | 118/400 [01:13<02:53,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2064:  30%|██▉       | 119/400 [01:14<02:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2095:  30%|███       | 120/400 [01:14<02:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2082:  30%|███       | 121/400 [01:15<02:53,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2069:  30%|███       | 122/400 [01:15<02:50,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2075:  31%|███       | 123/400 [01:16<02:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2063:  31%|███       | 124/400 [01:17<02:49,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2049:  31%|███▏      | 125/400 [01:17<02:50,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2042:  32%|███▏      | 126/400 [01:18<02:48,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2042:  32%|███▏      | 127/400 [01:19<02:50,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.203:  32%|███▏      | 128/400 [01:19<02:47,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2022:  32%|███▏      | 129/400 [01:20<02:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2013:  32%|███▎      | 130/400 [01:20<02:46,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2032:  33%|███▎      | 131/400 [01:21<02:48,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2025:  33%|███▎      | 132/400 [01:22<02:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2017:  33%|███▎      | 133/400 [01:22<02:47,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2003:  34%|███▎      | 134/400 [01:23<02:45,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1993:  34%|███▍      | 135/400 [01:24<02:47,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1982:  34%|███▍      | 136/400 [01:24<02:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1968:  34%|███▍      | 137/400 [01:25<02:44,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1958:  34%|███▍      | 138/400 [01:25<02:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1949:  35%|███▍      | 139/400 [01:26<02:44,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1937:  35%|███▌      | 140/400 [01:27<02:41,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1926:  35%|███▌      | 141/400 [01:27<02:42,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1965:  36%|███▌      | 142/400 [01:28<02:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1955:  36%|███▌      | 143/400 [01:29<02:40,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1946:  36%|███▌      | 144/400 [01:29<02:38,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.196:  36%|███▋      | 145/400 [01:30<02:39,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1949:  36%|███▋      | 146/400 [01:30<02:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1973:  37%|███▋      | 147/400 [01:31<02:38,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1965:  37%|███▋      | 148/400 [01:32<02:36,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2015:  37%|███▋      | 149/400 [01:32<02:38,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2004:  38%|███▊      | 150/400 [01:33<02:35,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.199:  38%|███▊      | 151/400 [01:34<02:36,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2039:  38%|███▊      | 152/400 [01:34<02:33,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2074:  38%|███▊      | 153/400 [01:35<02:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2064:  38%|███▊      | 154/400 [01:35<02:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2056:  39%|███▉      | 155/400 [01:36<02:33,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2063:  39%|███▉      | 156/400 [01:37<02:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.209:  39%|███▉      | 157/400 [01:37<02:33,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2078:  40%|███▉      | 158/400 [01:38<02:30,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2128:  40%|███▉      | 159/400 [01:39<02:30,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2138:  40%|████      | 160/400 [01:39<02:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2143:  40%|████      | 161/400 [01:40<02:29,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2199:  40%|████      | 162/400 [01:40<02:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.22:  41%|████      | 163/400 [01:41<02:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2192:  41%|████      | 164/400 [01:42<02:24,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2193:  41%|████▏     | 165/400 [01:42<02:25,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2187:  42%|████▏     | 166/400 [01:43<02:23,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2185:  42%|████▏     | 167/400 [01:44<02:24,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2186:  42%|████▏     | 168/400 [01:44<02:23,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2177:  42%|████▏     | 169/400 [01:45<02:24,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2175:  42%|████▎     | 170/400 [01:45<02:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2188:  43%|████▎     | 171/400 [01:46<02:22,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2191:  43%|████▎     | 172/400 [01:47<02:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2178:  43%|████▎     | 173/400 [01:47<02:21,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2167:  44%|████▎     | 174/400 [01:48<02:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2156:  44%|████▍     | 175/400 [01:48<02:20,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2144:  44%|████▍     | 176/400 [01:49<02:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2141:  44%|████▍     | 177/400 [01:50<02:20,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2284:  44%|████▍     | 178/400 [01:50<02:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2271:  45%|████▍     | 179/400 [01:51<02:18,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2255:  45%|████▌     | 180/400 [01:52<02:16,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2242:  45%|████▌     | 181/400 [01:52<02:16,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2231:  46%|████▌     | 182/400 [01:53<02:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2221:  46%|████▌     | 183/400 [01:53<02:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2205:  46%|████▌     | 184/400 [01:54<02:12,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2196:  46%|████▋     | 185/400 [01:55<02:13,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2184:  46%|████▋     | 186/400 [01:55<02:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2173:  47%|████▋     | 187/400 [01:56<02:12,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2173:  47%|████▋     | 188/400 [01:57<02:10,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2159:  47%|████▋     | 189/400 [01:57<02:11,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2155:  48%|████▊     | 190/400 [01:58<02:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2146:  48%|████▊     | 191/400 [01:58<02:11,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2135:  48%|████▊     | 192/400 [01:59<02:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2195:  48%|████▊     | 193/400 [02:00<02:09,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2273:  48%|████▊     | 194/400 [02:00<02:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2261:  49%|████▉     | 195/400 [02:01<02:09,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2251:  49%|████▉     | 196/400 [02:02<02:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2245:  49%|████▉     | 197/400 [02:02<02:08,  1.57it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2234:  50%|████▉     | 198/400 [02:03<02:06,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2224:  50%|████▉     | 199/400 [02:03<02:06,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2228:  50%|█████     | 200/400 [02:04<02:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2231:  50%|█████     | 201/400 [02:05<02:04,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2217:  50%|█████     | 202/400 [02:05<02:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2248:  51%|█████     | 203/400 [02:06<02:03,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2243:  51%|█████     | 204/400 [02:07<02:01,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2278:  51%|█████▏    | 205/400 [02:07<02:03,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2289:  52%|█████▏    | 206/400 [02:08<02:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2277:  52%|█████▏    | 207/400 [02:08<02:01,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2275:  52%|█████▏    | 208/400 [02:09<01:59,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2264:  52%|█████▏    | 209/400 [02:10<01:59,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2249:  52%|█████▎    | 210/400 [02:10<01:57,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.225:  53%|█████▎    | 211/400 [02:11<01:58,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2239:  53%|█████▎    | 212/400 [02:12<01:56,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2244:  53%|█████▎    | 213/400 [02:12<01:57,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2332:  54%|█████▎    | 214/400 [02:13<01:56,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2322:  54%|█████▍    | 215/400 [02:13<01:56,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2307:  54%|█████▍    | 216/400 [02:14<01:54,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2293:  54%|█████▍    | 217/400 [02:15<01:55,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2296:  55%|█████▍    | 218/400 [02:15<01:53,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2299:  55%|█████▍    | 219/400 [02:16<01:53,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2286:  55%|█████▌    | 220/400 [02:17<01:51,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.229:  55%|█████▌    | 221/400 [02:17<01:52,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2281:  56%|█████▌    | 222/400 [02:18<01:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2267:  56%|█████▌    | 223/400 [02:18<01:51,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2284:  56%|█████▌    | 224/400 [02:19<01:49,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2272:  56%|█████▋    | 225/400 [02:20<01:49,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2266:  56%|█████▋    | 226/400 [02:20<01:48,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2257:  57%|█████▋    | 227/400 [02:21<01:48,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.226:  57%|█████▋    | 228/400 [02:22<01:46,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2254:  57%|█████▋    | 229/400 [02:22<01:46,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2252:  57%|█████▊    | 230/400 [02:23<01:44,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2269:  58%|█████▊    | 231/400 [02:23<01:46,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2255:  58%|█████▊    | 232/400 [02:24<01:44,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2238:  58%|█████▊    | 233/400 [02:25<01:45,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.223:  58%|█████▊    | 234/400 [02:25<01:43,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2214:  59%|█████▉    | 235/400 [02:26<01:43,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2239:  59%|█████▉    | 236/400 [02:27<01:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2223:  59%|█████▉    | 237/400 [02:27<01:42,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2208:  60%|█████▉    | 238/400 [02:28<01:40,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2193:  60%|█████▉    | 239/400 [02:28<01:40,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2179:  60%|██████    | 240/400 [02:29<01:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2164:  60%|██████    | 241/400 [02:30<01:39,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2277:  60%|██████    | 242/400 [02:30<01:37,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2262:  61%|██████    | 243/400 [02:31<01:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2249:  61%|██████    | 244/400 [02:32<01:35,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2238:  61%|██████▏   | 245/400 [02:32<01:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2228:  62%|██████▏   | 246/400 [02:33<01:34,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2231:  62%|██████▏   | 247/400 [02:33<01:35,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2263:  62%|██████▏   | 248/400 [02:34<01:33,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2253:  62%|██████▏   | 249/400 [02:35<01:34,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2245:  62%|██████▎   | 250/400 [02:35<01:32,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2239:  63%|██████▎   | 251/400 [02:36<01:32,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2229:  63%|██████▎   | 252/400 [02:36<01:31,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2214:  63%|██████▎   | 253/400 [02:37<01:31,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2212:  64%|██████▎   | 254/400 [02:38<01:30,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2197:  64%|██████▍   | 255/400 [02:38<01:30,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2182:  64%|██████▍   | 256/400 [02:39<01:28,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2166:  64%|██████▍   | 257/400 [02:40<01:29,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2155:  64%|██████▍   | 258/400 [02:40<01:28,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2146:  65%|██████▍   | 259/400 [02:41<01:29,  1.57it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.215:  65%|██████▌   | 260/400 [02:41<01:27,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2138:  65%|██████▌   | 261/400 [02:42<01:27,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2186:  66%|██████▌   | 262/400 [02:43<01:25,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2191:  66%|██████▌   | 263/400 [02:43<01:25,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2181:  66%|██████▌   | 264/400 [02:44<01:24,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2168:  66%|██████▋   | 265/400 [02:45<01:24,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2185:  66%|██████▋   | 266/400 [02:45<01:22,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2175:  67%|██████▋   | 267/400 [02:46<01:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.224:  67%|██████▋   | 268/400 [02:46<01:22,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2229:  67%|██████▋   | 269/400 [02:47<01:22,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2229:  68%|██████▊   | 270/400 [02:48<01:20,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2218:  68%|██████▊   | 271/400 [02:48<01:20,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2201:  68%|██████▊   | 272/400 [02:49<01:19,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2187:  68%|██████▊   | 273/400 [02:50<01:19,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2174:  68%|██████▊   | 274/400 [02:50<01:17,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2168:  69%|██████▉   | 275/400 [02:51<01:18,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2205:  69%|██████▉   | 276/400 [02:51<01:16,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2273:  69%|██████▉   | 277/400 [02:52<01:17,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2257:  70%|██████▉   | 278/400 [02:53<01:15,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2245:  70%|██████▉   | 279/400 [02:53<01:16,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.223:  70%|███████   | 280/400 [02:54<01:14,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2242:  70%|███████   | 281/400 [02:55<01:14,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2233:  70%|███████   | 282/400 [02:55<01:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2232:  71%|███████   | 283/400 [02:56<01:12,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.223:  71%|███████   | 284/400 [02:56<01:11,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2215:  71%|███████▏  | 285/400 [02:57<01:11,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2202:  72%|███████▏  | 286/400 [02:58<01:10,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2199:  72%|███████▏  | 287/400 [02:58<01:10,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2192:  72%|███████▏  | 288/400 [02:59<01:08,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2178:  72%|███████▏  | 289/400 [02:59<01:08,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2163:  72%|███████▎  | 290/400 [03:00<01:07,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2156:  73%|███████▎  | 291/400 [03:01<01:07,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2159:  73%|███████▎  | 292/400 [03:01<01:06,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2146:  73%|███████▎  | 293/400 [03:02<01:06,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2131:  74%|███████▎  | 294/400 [03:03<01:05,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2121:  74%|███████▍  | 295/400 [03:03<01:05,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2123:  74%|███████▍  | 296/400 [03:04<01:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.211:  74%|███████▍  | 297/400 [03:04<01:04,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2102:  74%|███████▍  | 298/400 [03:05<01:02,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2093:  75%|███████▍  | 299/400 [03:06<01:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2079:  75%|███████▌  | 300/400 [03:06<01:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2092:  75%|███████▌  | 301/400 [03:07<01:01,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2076:  76%|███████▌  | 302/400 [03:08<01:00,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2091:  76%|███████▌  | 303/400 [03:08<01:00,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2078:  76%|███████▌  | 304/400 [03:09<00:59,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2065:  76%|███████▋  | 305/400 [03:09<00:59,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2085:  76%|███████▋  | 306/400 [03:10<00:57,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2072:  77%|███████▋  | 307/400 [03:11<00:57,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2063:  77%|███████▋  | 308/400 [03:11<00:56,  1.64it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2051:  77%|███████▋  | 309/400 [03:12<00:56,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.204:  78%|███████▊  | 310/400 [03:12<00:55,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2041:  78%|███████▊  | 311/400 [03:13<00:55,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2035:  78%|███████▊  | 312/400 [03:14<00:54,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2059:  78%|███████▊  | 313/400 [03:14<00:54,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2048:  78%|███████▊  | 314/400 [03:15<00:52,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2041:  79%|███████▉  | 315/400 [03:16<00:52,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2091:  79%|███████▉  | 316/400 [03:16<00:51,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2077:  79%|███████▉  | 317/400 [03:17<00:51,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2064:  80%|███████▉  | 318/400 [03:17<00:50,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2049:  80%|███████▉  | 319/400 [03:18<00:50,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2044:  80%|████████  | 320/400 [03:19<00:49,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2033:  80%|████████  | 321/400 [03:19<00:49,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.202:  80%|████████  | 322/400 [03:20<00:48,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2074:  81%|████████  | 323/400 [03:21<00:48,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.207:  81%|████████  | 324/400 [03:21<00:47,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2059:  81%|████████▏ | 325/400 [03:22<00:47,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2104:  82%|████████▏ | 326/400 [03:22<00:45,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2094:  82%|████████▏ | 327/400 [03:23<00:45,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2082:  82%|████████▏ | 328/400 [03:24<00:44,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2068:  82%|████████▏ | 329/400 [03:24<00:44,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2057:  82%|████████▎ | 330/400 [03:25<00:43,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2087:  83%|████████▎ | 331/400 [03:26<00:43,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2107:  83%|████████▎ | 332/400 [03:26<00:42,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2114:  83%|████████▎ | 333/400 [03:27<00:41,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2102:  84%|████████▎ | 334/400 [03:27<00:40,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2092:  84%|████████▍ | 335/400 [03:28<00:40,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2077:  84%|████████▍ | 336/400 [03:29<00:39,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2064:  84%|████████▍ | 337/400 [03:29<00:39,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2049:  84%|████████▍ | 338/400 [03:30<00:38,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2049:  85%|████████▍ | 339/400 [03:31<00:38,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2051:  85%|████████▌ | 340/400 [03:31<00:37,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2042:  85%|████████▌ | 341/400 [03:32<00:37,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2035:  86%|████████▌ | 342/400 [03:32<00:36,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2026:  86%|████████▌ | 343/400 [03:33<00:35,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2017:  86%|████████▌ | 344/400 [03:34<00:34,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2013:  86%|████████▋ | 345/400 [03:34<00:34,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2002:  86%|████████▋ | 346/400 [03:35<00:33,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2002:  87%|████████▋ | 347/400 [03:36<00:33,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1995:  87%|████████▋ | 348/400 [03:36<00:32,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1986:  87%|████████▋ | 349/400 [03:37<00:32,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1976:  88%|████████▊ | 350/400 [03:37<00:31,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1997:  88%|████████▊ | 351/400 [03:38<00:30,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2019:  88%|████████▊ | 352/400 [03:39<00:29,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2007:  88%|████████▊ | 353/400 [03:39<00:29,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1996:  88%|████████▊ | 354/400 [03:40<00:28,  1.63it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1987:  89%|████████▉ | 355/400 [03:41<00:27,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1977:  89%|████████▉ | 356/400 [03:41<00:27,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1966:  89%|████████▉ | 357/400 [03:42<00:26,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.196:  90%|████████▉ | 358/400 [03:42<00:26,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.195:  90%|████████▉ | 359/400 [03:43<00:25,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1949:  90%|█████████ | 360/400 [03:44<00:24,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1939:  90%|█████████ | 361/400 [03:44<00:24,  1.58it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1931:  90%|█████████ | 362/400 [03:45<00:23,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1922:  91%|█████████ | 363/400 [03:46<00:23,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1929:  91%|█████████ | 364/400 [03:46<00:22,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1921:  91%|█████████▏| 365/400 [03:47<00:21,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.1909:  92%|█████████▏| 366/400 [03:47<00:21,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2011:  92%|█████████▏| 367/400 [03:48<00:20,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2068:  92%|█████████▏| 368/400 [03:49<00:19,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2056:  92%|█████████▏| 369/400 [03:49<00:19,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2045:  92%|█████████▎| 370/400 [03:50<00:18,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.207:  93%|█████████▎| 371/400 [03:50<00:18,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2084:  93%|█████████▎| 372/400 [03:51<00:17,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2075:  93%|█████████▎| 373/400 [03:52<00:16,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2083:  94%|█████████▎| 374/400 [03:52<00:16,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2077:  94%|█████████▍| 375/400 [03:53<00:15,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2068:  94%|█████████▍| 376/400 [03:54<00:14,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2088:  94%|█████████▍| 377/400 [03:54<00:14,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2077:  94%|█████████▍| 378/400 [03:55<00:13,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2091:  95%|█████████▍| 379/400 [03:55<00:13,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.209:  95%|█████████▌| 380/400 [03:56<00:12,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2079:  95%|█████████▌| 381/400 [03:57<00:11,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2139:  96%|█████████▌| 382/400 [03:57<00:11,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2135:  96%|█████████▌| 383/400 [03:58<00:10,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2135:  96%|█████████▌| 384/400 [03:59<00:09,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2174:  96%|█████████▋| 385/400 [03:59<00:09,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2158:  96%|█████████▋| 386/400 [04:00<00:08,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2145:  97%|█████████▋| 387/400 [04:00<00:08,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2129:  97%|█████████▋| 388/400 [04:01<00:07,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2212:  97%|█████████▋| 389/400 [04:02<00:06,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.22:  98%|█████████▊| 390/400 [04:02<00:06,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2186:  98%|█████████▊| 391/400 [04:03<00:05,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2173:  98%|█████████▊| 392/400 [04:04<00:04,  1.62it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2162:  98%|█████████▊| 393/400 [04:04<00:04,  1.60it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2164:  98%|█████████▊| 394/400 [04:05<00:03,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2209:  99%|█████████▉| 395/400 [04:05<00:03,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2195:  99%|█████████▉| 396/400 [04:06<00:02,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2181:  99%|█████████▉| 397/400 [04:07<00:01,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2167: 100%|█████████▉| 398/400 [04:07<00:01,  1.61it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2161: 100%|█████████▉| 399/400 [04:08<00:00,  1.59it/s]

torch.Size([4, 5])
torch.Size([4, 6])


epoch: 10, train loss: 0.2148: 100%|██████████| 400/400 [04:09<00:00,  1.61it/s]
epoch: 10, valid loss: 5.7276:   1%|          | 2/201 [00:00<00:27,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.6972:   2%|▏         | 4/201 [00:00<00:26,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.6165:   3%|▎         | 6/201 [00:00<00:27,  7.10it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.5275:   4%|▍         | 8/201 [00:01<00:26,  7.23it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.458:   5%|▍         | 10/201 [00:01<00:26,  7.17it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.372:   6%|▌         | 12/201 [00:01<00:25,  7.29it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.3949:   7%|▋         | 14/201 [00:01<00:25,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.4425:   8%|▊         | 16/201 [00:02<00:25,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.4269:   9%|▉         | 18/201 [00:02<00:25,  7.16it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.3682:  10%|▉         | 20/201 [00:02<00:25,  7.23it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.2658:  11%|█         | 22/201 [00:03<00:24,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.2412:  12%|█▏        | 24/201 [00:03<00:24,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.2133:  13%|█▎        | 26/201 [00:03<00:23,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.1513:  14%|█▍        | 28/201 [00:03<00:23,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 5.0659:  15%|█▍        | 30/201 [00:04<00:22,  7.51it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.9682:  16%|█▌        | 32/201 [00:04<00:22,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.9016:  17%|█▋        | 34/201 [00:04<00:23,  7.24it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.8068:  18%|█▊        | 36/201 [00:04<00:22,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.7381:  19%|█▉        | 38/201 [00:05<00:21,  7.46it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.7073:  20%|█▉        | 40/201 [00:05<00:21,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.6569:  21%|██        | 42/201 [00:05<00:21,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.5724:  22%|██▏       | 44/201 [00:06<00:21,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.4976:  23%|██▎       | 46/201 [00:06<00:21,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.4311:  24%|██▍       | 48/201 [00:06<00:21,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.4173:  25%|██▍       | 50/201 [00:06<00:20,  7.20it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.3513:  26%|██▌       | 52/201 [00:07<00:20,  7.25it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.3872:  27%|██▋       | 54/201 [00:07<00:20,  7.27it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.3438:  28%|██▊       | 56/201 [00:07<00:20,  7.17it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.3668:  29%|██▉       | 58/201 [00:07<00:19,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.3685:  30%|██▉       | 60/201 [00:08<00:19,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.315:  31%|███       | 62/201 [00:08<00:18,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.276:  32%|███▏      | 64/201 [00:08<00:18,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.2314:  33%|███▎      | 66/201 [00:09<00:18,  7.34it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.2018:  34%|███▍      | 68/201 [00:09<00:18,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.2053:  35%|███▍      | 70/201 [00:09<00:17,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.2114:  36%|███▌      | 72/201 [00:09<00:17,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.1485:  37%|███▋      | 74/201 [00:10<00:17,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.0975:  38%|███▊      | 76/201 [00:10<00:17,  7.34it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.1289:  39%|███▉      | 78/201 [00:10<00:16,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.0757:  40%|███▉      | 80/201 [00:10<00:16,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.0347:  41%|████      | 82/201 [00:11<00:16,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.0098:  42%|████▏     | 84/201 [00:11<00:16,  7.16it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.9823:  43%|████▎     | 86/201 [00:11<00:15,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 4.0243:  44%|████▍     | 88/201 [00:12<00:15,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.9606:  45%|████▍     | 90/201 [00:12<00:15,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.8868:  46%|████▌     | 92/201 [00:12<00:15,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.9107:  47%|████▋     | 94/201 [00:12<00:14,  7.17it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.8776:  48%|████▊     | 96/201 [00:13<00:14,  7.27it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.8495:  49%|████▉     | 98/201 [00:13<00:14,  7.02it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.8479:  50%|████▉     | 100/201 [00:13<00:14,  7.08it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.804:  51%|█████     | 102/201 [00:14<00:14,  6.79it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.7365:  52%|█████▏    | 104/201 [00:14<00:13,  7.03it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.7178:  53%|█████▎    | 106/201 [00:14<00:13,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.7148:  54%|█████▎    | 108/201 [00:14<00:12,  7.20it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.6636:  55%|█████▍    | 110/201 [00:15<00:12,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.6536:  56%|█████▌    | 112/201 [00:15<00:12,  7.18it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.6978:  57%|█████▋    | 114/201 [00:15<00:12,  7.17it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.6582:  58%|█████▊    | 116/201 [00:15<00:11,  7.19it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.6568:  59%|█████▊    | 118/201 [00:16<00:11,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.5873:  60%|█████▉    | 120/201 [00:16<00:11,  7.25it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.6388:  61%|██████    | 122/201 [00:16<00:10,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.5885:  62%|██████▏   | 124/201 [00:17<00:10,  7.28it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.6062:  63%|██████▎   | 126/201 [00:17<00:10,  7.27it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.6121:  64%|██████▎   | 128/201 [00:17<00:09,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.5886:  65%|██████▍   | 130/201 [00:17<00:09,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.5486:  66%|██████▌   | 132/201 [00:18<00:09,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.5449:  67%|██████▋   | 134/201 [00:18<00:09,  7.32it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.5261:  68%|██████▊   | 136/201 [00:18<00:09,  7.20it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.497:  69%|██████▊   | 138/201 [00:18<00:08,  7.22it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.4525:  70%|██████▉   | 140/201 [00:19<00:08,  7.27it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3961:  71%|███████   | 142/201 [00:19<00:08,  7.20it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.4278:  72%|███████▏  | 144/201 [00:19<00:07,  7.31it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.4321:  73%|███████▎  | 146/201 [00:20<00:07,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3877:  74%|███████▎  | 148/201 [00:20<00:07,  7.36it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.385:  75%|███████▍  | 150/201 [00:20<00:07,  7.27it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3769:  76%|███████▌  | 152/201 [00:20<00:06,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3615:  77%|███████▋  | 154/201 [00:21<00:06,  7.48it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3404:  78%|███████▊  | 156/201 [00:21<00:06,  7.44it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3118:  79%|███████▊  | 158/201 [00:21<00:05,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3594:  80%|███████▉  | 160/201 [00:21<00:05,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3665:  81%|████████  | 162/201 [00:22<00:05,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3573:  82%|████████▏ | 164/201 [00:22<00:04,  7.41it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3399:  83%|████████▎ | 166/201 [00:22<00:04,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.336:  84%|████████▎ | 168/201 [00:23<00:04,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3487:  85%|████████▍ | 170/201 [00:23<00:04,  7.39it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3139:  86%|████████▌ | 172/201 [00:23<00:03,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3058:  87%|████████▋ | 174/201 [00:23<00:03,  7.45it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3133:  88%|████████▊ | 176/201 [00:24<00:03,  7.38it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2934:  89%|████████▊ | 178/201 [00:24<00:03,  7.37it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2793:  90%|████████▉ | 180/201 [00:24<00:02,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2696:  91%|█████████ | 182/201 [00:24<00:02,  7.33it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2906:  92%|█████████▏| 184/201 [00:25<00:02,  7.40it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2933:  93%|█████████▎| 186/201 [00:25<00:02,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2895:  94%|█████████▎| 188/201 [00:25<00:01,  7.21it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2708:  95%|█████████▍| 190/201 [00:26<00:01,  7.35it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2539:  96%|█████████▌| 192/201 [00:26<00:01,  7.26it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2779:  97%|█████████▋| 194/201 [00:26<00:00,  7.07it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.3232:  98%|█████████▊| 196/201 [00:26<00:00,  7.24it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2928:  99%|█████████▊| 198/201 [00:27<00:00,  7.30it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.2731: 100%|█████████▉| 200/201 [00:27<00:00,  7.43it/s]

torch.Size([2, 5])
torch.Size([2, 6])
torch.Size([2, 5])
torch.Size([2, 6])


epoch: 10, valid loss: 3.297: 100%|██████████| 201/201 [00:27<00:00,  7.29it/s]

torch.Size([2, 5])
torch.Size([2, 6])
[{'results_1': [{'prediction': 2, 'label': 0}, {'prediction': 2, 'label': 1}], 'results_2': [{'prediction': 5, 'label': 1}, {'prediction': 1, 'label': 0}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 4, 'label': 4}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 3, 'label': 2}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 5, 'label': 3}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 1}, {'prediction': 3, 'label': 2}], 'results_2': [{'prediction': 5, 'label': 1}, {'prediction': 0, 'label': 0}]}, {'results_1': [{'prediction': 3, 'label': 1}, {'prediction': 2, 'label': 2}], 'results_2': [{'prediction': 0, 'label': 0}, {'prediction': 3, 'label': 3}]}, {'results_1': [{'prediction': 2, 'label': 2}, {'prediction': 2, 'label': 3}], 'results_2': [{'prediction': 3, 'label': 3}, {'prediction': 4, 'label': 4}]}, {'results_1': [{'predicti




In [21]:
def inference(model, tokenizer, sentences, device):
    inference_results_label1 = []
    inference_results_label2 = []

    model.eval()
    with torch.no_grad():
        for sentence in sentences:
            inputs = tokenizer.encode_plus(
                sentence, return_tensors="pt", padding="max_length", truncation=True, max_length=config.max_seq_length
            )
            inputs = {key: value.to(device) for key, value in inputs.items()}  # 입력을 GPU로 이동
            outputs = model(**inputs)  # 모델 추론 수행

            # 가정: outputs.logits의 형태는 [batch_size, num_labels, num_classes_per_label]
            predictions_label1 = torch.argmax(outputs.logits[0, 0], dim=-1)
            predictions_label2 = torch.argmax(outputs.logits[0, 1], dim=-1)

            inference_results_label1.append(predictions_label1.item())
            inference_results_label2.append(predictions_label2.item())

    return inference_results_label1, inference_results_label2




In [22]:
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
import torch
from transformers import AutoModelForSequenceClassification
from tqdm import tqdm  # 진행 막대를 위한 라이브러리

print("Loading data...")
test_df = pd.read_csv("complete_test.csv")
test_df["text"] = test_df["title"] + " " + test_df["content"]
sentences_to_infer = test_df["text"].tolist()

print("Loading model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model = AutoModelForSequenceClassification.from_pretrained(output_dir)
best_model.to(device)
best_model.eval()

print("Starting inference...")
# tqdm을 사용하여 진행 막대 표시
inference_results_label1, inference_results_label2 = inference(best_model, tokenizer, tqdm(sentences_to_infer), device)
test_df["predicted_label1"] = inference_results_label1
test_df["predicted_label2"] = inference_results_label2

print("Calculating metrics...")
accuracy_label1 = accuracy_score(test_df["label1"], test_df["predicted_label1"])
accuracy_label2 = accuracy_score(test_df["label2"], test_df["predicted_label2"])

f1_label1 = f1_score(test_df["label1"], test_df["predicted_label1"], average='macro')
f1_label2 = f1_score(test_df["label2"], test_df["predicted_label2"], average='macro')

print(f"Accuracy for label1: {accuracy_label1:.4f}")
print(f"Accuracy for label2: {accuracy_label2:.4f}")
print(f"F1-score for label1: {f1_label1:.4f}")
print(f"F1-score for label2: {f1_label2:.4f}")

print("Saving results...")
output_csv_path = "./inference_results.csv"
del test_df["text"]
test_df.to_csv(output_csv_path, index=False)
print(f"Results saved to {output_csv_path}")



Loading data...
Loading model...


Some weights of BigBirdForSequenceClassification were not initialized from the model checkpoint at output/cls/comment/2-69.4-62.69-ckpt and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting inference...


100%|██████████| 500/500 [00:54<00:00,  9.20it/s]

Calculating metrics...
Accuracy for label1: 0.0000
Accuracy for label2: 0.4280
F1-score for label1: 0.0000
F1-score for label2: 0.0999
Saving results...
Results saved to ./inference_results.csv



