In [1]:
%load_ext autoreload
%autoreload 2

from tqdm import tqdm
from segtok import tokenizer
from utils import *
import hickle

In [2]:
# Key Hyperparameters
enable_orig = "orig"
enable_aug = False
enable_aug3 = False
max_training_samples = 100000
max_tokenized_length = 64
num_sentences = 10
valid_percent = 0.01

batch_size_finetuning = 32
epochs_finetuning = 1
lr_finetuning = 1e-5

batch_size = 128
epochs = 20
main_model_lr = 1e-5

lstm_hidden_size = 1024
regressive_style_finetuning = False

experiment_id = f"test"

In [3]:
# Parameters
regressive_style_finetuning = True
enable_orig = "orig"
enable_aug = False
enable_aug3 = False
max_training_samples = 200000
lstm_hidden_size = 64
epochs = 20
experiment_id = "classification-sentence-embeddings-regressive_style_finetuning-True-enable_orig-orig-enable_aug-False-enable_aug3-False-max_training_samples-200000-lstm_hidden_size-64-epochs-20"


In [4]:
import os
experiment_dir = f"completed-experiments/{experiment_id}"
if not os.path.exists(experiment_dir):
    os.makedirs(experiment_dir)

In [5]:
from data_parsing import *
data = load_dataset("./yelp_review_training_dataset.jsonl")

In [6]:
from training_utils import split_train_validation
from text_preprocessing import preprocess
import random

def get_train_valid():
    orig_train_x, valid_x, orig_train_y, valid_y = split_train_validation(data, 0.01)
    train_x = []
    train_y = []

    if enable_aug:
        aug_data = load_gen_dataset("./new_data.json") + load_gen_dataset("./new_data2.json")
        train_x += [i[0] for i in aug_data]
        train_y += [i[1] for i in aug_data]

    if enable_aug3:
        aug_data3 = load_gen_dataset("./new_data3.json")
        train_x += [i[0] for i in aug_data3]
        train_y += [i[1] for i in aug_data3]
        
    if enable_orig:
        train_x += orig_train_x
        train_y += orig_train_y
    
    train_x = train_x[:max_training_samples]
    train_y = train_y[:max_training_samples]

    if enable_orig == "preprocess":
        train_x = preprocess(train_x)
        valid_x = preprocess(valid_x)    

    paired_train = list(zip(train_x, train_y))
    random.shuffle(paired_train)
    train_x = [i[0] for i in paired_train]
    train_y = [i[1] for i in paired_train]

    return [x.encode("utf-8") for x in train_x], [x.encode("utf-8") for x in valid_x], train_y, valid_y

split_key = f"cache-core/split-data-{valid_percent}-orig-{enable_orig}-aug12-{enable_aug}-aug3-{enable_aug3}-max-{max_training_samples}"
train_x, valid_x, train_y, valid_y = memo_load(
    get_train_valid,
    split_key
)
split_hash = hash_file(split_key + ".hkl")

train_x = [x.decode("utf-8") for x in train_x]
valid_x = [x.decode("utf-8") for x in valid_x]

In [7]:
print(len(train_x))
print(len(train_y))
print(len(valid_x))
print(len(valid_y))

200000
200000
5336
5336


In [8]:
import torch as th

device = th.device("cuda" if th.cuda.is_available() else "cpu")
print(device)

from spacy.lang.en import English
nlp = English()
nlp.add_pipe("sentencizer")

cuda


<spacy.pipeline.sentencizer.Sentencizer at 0x7f55a06c1cd0>

In [9]:
from transformers import BertTokenizerFast, BertForSequenceClassification
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

In [10]:
# fine tune the BERT
import numpy as np

def get_finetuning_data():
    train_x_numerized = []
    train_x_mask = []
    train_y_per_sentence = []
    for i, text in tqdm(list(enumerate(train_x))):
        doc = nlp(text)
        sents = [str(sent) for sent in doc.sents]
        for sentence in sents[:num_sentences]:
            tokenized = tokenizer(sentence, truncation=True, padding="max_length", max_length=max_tokenized_length)[0]
            train_x_numerized.append(tokenized.ids)
            train_x_mask.append(tokenized.attention_mask)
            train_y_per_sentence.append(train_y[i])

    valid_x_numerized = []
    valid_x_mask = []
    valid_y_per_sentence = []
    for i, text in tqdm(list(enumerate(valid_x))):
        doc = nlp(text)
        sents = [str(sent) for sent in doc.sents]
        for sentence in sents[:num_sentences]:
            tokenized = tokenizer(sentence, truncation=True, padding="max_length", max_length=max_tokenized_length)[0]
            valid_x_numerized.append(tokenized.ids)
            valid_x_mask.append(tokenized.attention_mask)
            valid_y_per_sentence.append(valid_y[i])

    train_x_numerized = np.array(train_x_numerized)
    train_x_mask = np.array(train_x_mask)
    train_y_per_sentence = np.array(train_y_per_sentence)
    valid_x_numerized = np.array(valid_x_numerized)
    valid_x_mask = np.array(valid_x_mask)
    valid_y_per_sentence = np.array(valid_y_per_sentence)
    return train_x_numerized, train_x_mask, train_y_per_sentence, valid_x_numerized, valid_x_mask, valid_y_per_sentence

from utils import memo_load
finetuning_data_key = f"cache-core/training-data-finetuning-max-tokens-{max_tokenized_length}-split-{split_hash}"
(train_x_numerized, train_x_mask, train_y_per_sentence, valid_x_numerized, valid_x_mask, valid_y_per_sentence) = memo_load(
    lambda: get_finetuning_data(),
    finetuning_data_key
)
finetuning_data_hash = hash_file(finetuning_data_key + ".hkl")

In [11]:
from model import ReviewPredictionModel
import torch.optim as optim

def train_finetuning():
    if regressive_style_finetuning:
        embedding_bert = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=1)
    else:
        embedding_bert = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=9)

    model_to_train_finetuning = ReviewPredictionModel(0, max_tokenized_length, regressive_bert_style=regressive_style_finetuning)
    model_to_train_finetuning.transformer = embedding_bert
    model_to_train_finetuning.to(device)
    optimizer = optim.Adam(model_to_train_finetuning.parameters(), lr=lr_finetuning)
    
    training_accuracies_finetuning, validation_accuracies_finetuning = run_training_loop(
        model_to_train_finetuning, optimizer, device,
        batch_size_finetuning, epochs_finetuning,
        train_x_numerized, train_x_mask, train_y_per_sentence, valid_x_numerized, valid_x_mask, valid_y_per_sentence,
        max_validation_examples=256,
        model_id=experiment_id, tag="finetuning"
    )
    
    return embedding_bert, training_accuracies_finetuning, validation_accuracies_finetuning

def store_finetuning(tup, folder):
    embedding_bert, training_accuracies_finetuning, validation_accuracies_finetuning = tup
    th.save(embedding_bert.state_dict(), f"{folder}/model.pt")
    hickle.dump((training_accuracies_finetuning, validation_accuracies_finetuning), f"{folder}/accuracies.hkl", mode="w")

def load_finetuning(folder):
    if regressive_style_finetuning:
        embedding_bert = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=1)
    else:
        embedding_bert = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=9)
    embedding_bert.load_state_dict(th.load(f"{folder}/model.pt"))
    embedding_bert.eval()
    embedding_bert.to(device)
    training_accuracies_finetuning, validation_accuracies_finetuning = hickle.load(f"{folder}/accuracies.hkl")
    return embedding_bert, training_accuracies_finetuning, validation_accuracies_finetuning

In [12]:
from training_utils import run_training_loop

from utils import memo_load
finetuning_model_key = f"cache-core/finetuning-batch-size-{batch_size_finetuning}-epochs-{epochs_finetuning}-lr-{lr_finetuning}-regressive-{regressive_style_finetuning}-data-{finetuning_data_hash}"
embedding_bert, training_accuracies_finetuning, validation_accuracies_finetuning = manual_memo(
    train_finetuning, store_finetuning, load_finetuning,
    finetuning_model_key
)

th.save(embedding_bert.state_dict(), f"{experiment_dir}/finetuned-bert.pt")
hickle.dump((training_accuracies_finetuning, validation_accuracies_finetuning), f"{experiment_dir}/finetuning-accuracies.hkl", mode="w")

finetuning_model_hash = hash_file(finetuning_model_key + "/model.pt")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/37094 [00:00<?, ?it/s]

Epoch: 0 Iteration: 0 Loss: 3.124 Validation Loss: 2.587 Accuracy: 0.031 Validation Accuracy: 0.051:   0%|          | 0/37094 [00:00<?, ?it/s]

Epoch: 0 Iteration: 0 Loss: 3.124 Validation Loss: 2.587 Accuracy: 0.031 Validation Accuracy: 0.051:   0%|          | 1/37094 [00:00<6:37:58,  1.55it/s]

Epoch: 0 Iteration: 1 Loss: 2.735 Validation Loss: 2.587 Accuracy: 0.062 Validation Accuracy: 0.051:   0%|          | 1/37094 [00:00<6:37:58,  1.55it/s]

Epoch: 0 Iteration: 2 Loss: 2.821 Validation Loss: 2.587 Accuracy: 0.052 Validation Accuracy: 0.051:   0%|          | 1/37094 [00:00<6:37:58,  1.55it/s]

Epoch: 0 Iteration: 2 Loss: 2.821 Validation Loss: 2.587 Accuracy: 0.052 Validation Accuracy: 0.051:   0%|          | 3/37094 [00:00<2:25:35,  4.25it/s]

Epoch: 0 Iteration: 3 Loss: 2.831 Validation Loss: 2.587 Accuracy: 0.055 Validation Accuracy: 0.051:   0%|          | 3/37094 [00:00<2:25:35,  4.25it/s]

Epoch: 0 Iteration: 4 Loss: 2.890 Validation Loss: 2.587 Accuracy: 0.056 Validation Accuracy: 0.051:   0%|          | 3/37094 [00:01<2:25:35,  4.25it/s]

Epoch: 0 Iteration: 4 Loss: 2.890 Validation Loss: 2.587 Accuracy: 0.056 Validation Accuracy: 0.051:   0%|          | 5/37094 [00:01<1:41:31,  6.09it/s]

Epoch: 0 Iteration: 5 Loss: 2.971 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 5/37094 [00:01<1:41:31,  6.09it/s]

Epoch: 0 Iteration: 6 Loss: 2.943 Validation Loss: 2.587 Accuracy: 0.040 Validation Accuracy: 0.051:   0%|          | 5/37094 [00:01<1:41:31,  6.09it/s]

Epoch: 0 Iteration: 6 Loss: 2.943 Validation Loss: 2.587 Accuracy: 0.040 Validation Accuracy: 0.051:   0%|          | 7/37094 [00:01<1:23:52,  7.37it/s]

Epoch: 0 Iteration: 7 Loss: 2.863 Validation Loss: 2.587 Accuracy: 0.035 Validation Accuracy: 0.051:   0%|          | 7/37094 [00:01<1:23:52,  7.37it/s]

Epoch: 0 Iteration: 8 Loss: 2.862 Validation Loss: 2.587 Accuracy: 0.035 Validation Accuracy: 0.051:   0%|          | 7/37094 [00:01<1:23:52,  7.37it/s]

Epoch: 0 Iteration: 8 Loss: 2.862 Validation Loss: 2.587 Accuracy: 0.035 Validation Accuracy: 0.051:   0%|          | 9/37094 [00:01<1:14:51,  8.26it/s]

Epoch: 0 Iteration: 9 Loss: 2.892 Validation Loss: 2.587 Accuracy: 0.031 Validation Accuracy: 0.051:   0%|          | 9/37094 [00:01<1:14:51,  8.26it/s]

Epoch: 0 Iteration: 10 Loss: 2.858 Validation Loss: 2.587 Accuracy: 0.031 Validation Accuracy: 0.051:   0%|          | 9/37094 [00:01<1:14:51,  8.26it/s]

Epoch: 0 Iteration: 10 Loss: 2.858 Validation Loss: 2.587 Accuracy: 0.031 Validation Accuracy: 0.051:   0%|          | 11/37094 [00:01<1:09:34,  8.88it/s]

Epoch: 0 Iteration: 11 Loss: 2.958 Validation Loss: 2.587 Accuracy: 0.022 Validation Accuracy: 0.051:   0%|          | 11/37094 [00:01<1:09:34,  8.88it/s]

Epoch: 0 Iteration: 12 Loss: 2.916 Validation Loss: 2.587 Accuracy: 0.022 Validation Accuracy: 0.051:   0%|          | 11/37094 [00:01<1:09:34,  8.88it/s]

Epoch: 0 Iteration: 12 Loss: 2.916 Validation Loss: 2.587 Accuracy: 0.022 Validation Accuracy: 0.051:   0%|          | 13/37094 [00:01<1:06:20,  9.32it/s]

Epoch: 0 Iteration: 13 Loss: 2.940 Validation Loss: 2.587 Accuracy: 0.016 Validation Accuracy: 0.051:   0%|          | 13/37094 [00:01<1:06:20,  9.32it/s]

Epoch: 0 Iteration: 14 Loss: 2.965 Validation Loss: 2.587 Accuracy: 0.009 Validation Accuracy: 0.051:   0%|          | 13/37094 [00:01<1:06:20,  9.32it/s]

Epoch: 0 Iteration: 14 Loss: 2.965 Validation Loss: 2.587 Accuracy: 0.009 Validation Accuracy: 0.051:   0%|          | 15/37094 [00:01<1:04:13,  9.62it/s]

Epoch: 0 Iteration: 15 Loss: 2.995 Validation Loss: 2.587 Accuracy: 0.013 Validation Accuracy: 0.051:   0%|          | 15/37094 [00:02<1:04:13,  9.62it/s]

Epoch: 0 Iteration: 16 Loss: 2.985 Validation Loss: 2.587 Accuracy: 0.013 Validation Accuracy: 0.051:   0%|          | 15/37094 [00:02<1:04:13,  9.62it/s]

Epoch: 0 Iteration: 16 Loss: 2.985 Validation Loss: 2.587 Accuracy: 0.013 Validation Accuracy: 0.051:   0%|          | 17/37094 [00:02<1:02:49,  9.84it/s]

Epoch: 0 Iteration: 17 Loss: 2.983 Validation Loss: 2.587 Accuracy: 0.013 Validation Accuracy: 0.051:   0%|          | 17/37094 [00:02<1:02:49,  9.84it/s]

Epoch: 0 Iteration: 18 Loss: 2.990 Validation Loss: 2.587 Accuracy: 0.009 Validation Accuracy: 0.051:   0%|          | 17/37094 [00:02<1:02:49,  9.84it/s]

Epoch: 0 Iteration: 18 Loss: 2.990 Validation Loss: 2.587 Accuracy: 0.009 Validation Accuracy: 0.051:   0%|          | 19/37094 [00:02<1:01:59,  9.97it/s]

Epoch: 0 Iteration: 19 Loss: 2.978 Validation Loss: 2.587 Accuracy: 0.013 Validation Accuracy: 0.051:   0%|          | 19/37094 [00:02<1:01:59,  9.97it/s]

Epoch: 0 Iteration: 20 Loss: 2.945 Validation Loss: 2.587 Accuracy: 0.013 Validation Accuracy: 0.051:   0%|          | 19/37094 [00:02<1:01:59,  9.97it/s]

Epoch: 0 Iteration: 20 Loss: 2.945 Validation Loss: 2.587 Accuracy: 0.013 Validation Accuracy: 0.051:   0%|          | 21/37094 [00:02<1:01:18, 10.08it/s]

Epoch: 0 Iteration: 21 Loss: 2.936 Validation Loss: 2.587 Accuracy: 0.016 Validation Accuracy: 0.051:   0%|          | 21/37094 [00:02<1:01:18, 10.08it/s]

Epoch: 0 Iteration: 22 Loss: 2.932 Validation Loss: 2.587 Accuracy: 0.016 Validation Accuracy: 0.051:   0%|          | 21/37094 [00:02<1:01:18, 10.08it/s]

Epoch: 0 Iteration: 22 Loss: 2.932 Validation Loss: 2.587 Accuracy: 0.016 Validation Accuracy: 0.051:   0%|          | 23/37094 [00:02<1:00:54, 10.15it/s]

Epoch: 0 Iteration: 23 Loss: 2.945 Validation Loss: 2.587 Accuracy: 0.019 Validation Accuracy: 0.051:   0%|          | 23/37094 [00:02<1:00:54, 10.15it/s]

Epoch: 0 Iteration: 24 Loss: 2.965 Validation Loss: 2.587 Accuracy: 0.019 Validation Accuracy: 0.051:   0%|          | 23/37094 [00:02<1:00:54, 10.15it/s]

Epoch: 0 Iteration: 24 Loss: 2.965 Validation Loss: 2.587 Accuracy: 0.019 Validation Accuracy: 0.051:   0%|          | 25/37094 [00:02<1:00:33, 10.20it/s]

Epoch: 0 Iteration: 25 Loss: 2.925 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 25/37094 [00:03<1:00:33, 10.20it/s]

Epoch: 0 Iteration: 26 Loss: 2.946 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 25/37094 [00:03<1:00:33, 10.20it/s]

Epoch: 0 Iteration: 26 Loss: 2.946 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 27/37094 [00:03<1:00:15, 10.25it/s]

Epoch: 0 Iteration: 27 Loss: 2.992 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 27/37094 [00:03<1:00:15, 10.25it/s]

Epoch: 0 Iteration: 28 Loss: 2.990 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 27/37094 [00:03<1:00:15, 10.25it/s]

Epoch: 0 Iteration: 28 Loss: 2.990 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 29/37094 [00:03<1:00:04, 10.28it/s]

Epoch: 0 Iteration: 29 Loss: 2.991 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 29/37094 [00:03<1:00:04, 10.28it/s]

Epoch: 0 Iteration: 30 Loss: 3.015 Validation Loss: 2.587 Accuracy: 0.028 Validation Accuracy: 0.051:   0%|          | 29/37094 [00:03<1:00:04, 10.28it/s]

Epoch: 0 Iteration: 30 Loss: 3.015 Validation Loss: 2.587 Accuracy: 0.028 Validation Accuracy: 0.051:   0%|          | 31/37094 [00:03<59:56, 10.31it/s]  

Epoch: 0 Iteration: 31 Loss: 2.949 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 31/37094 [00:03<59:56, 10.31it/s]

Epoch: 0 Iteration: 32 Loss: 2.964 Validation Loss: 2.587 Accuracy: 0.022 Validation Accuracy: 0.051:   0%|          | 31/37094 [00:03<59:56, 10.31it/s]

Epoch: 0 Iteration: 32 Loss: 2.964 Validation Loss: 2.587 Accuracy: 0.022 Validation Accuracy: 0.051:   0%|          | 33/37094 [00:03<59:53, 10.31it/s]

Epoch: 0 Iteration: 33 Loss: 2.897 Validation Loss: 2.587 Accuracy: 0.022 Validation Accuracy: 0.051:   0%|          | 33/37094 [00:03<59:53, 10.31it/s]

Epoch: 0 Iteration: 34 Loss: 2.871 Validation Loss: 2.587 Accuracy: 0.022 Validation Accuracy: 0.051:   0%|          | 33/37094 [00:03<59:53, 10.31it/s]

Epoch: 0 Iteration: 34 Loss: 2.871 Validation Loss: 2.587 Accuracy: 0.022 Validation Accuracy: 0.051:   0%|          | 35/37094 [00:03<59:47, 10.33it/s]

Epoch: 0 Iteration: 35 Loss: 2.839 Validation Loss: 2.587 Accuracy: 0.016 Validation Accuracy: 0.051:   0%|          | 35/37094 [00:04<59:47, 10.33it/s]

Epoch: 0 Iteration: 36 Loss: 2.806 Validation Loss: 2.587 Accuracy: 0.019 Validation Accuracy: 0.051:   0%|          | 35/37094 [00:04<59:47, 10.33it/s]

Epoch: 0 Iteration: 36 Loss: 2.806 Validation Loss: 2.587 Accuracy: 0.019 Validation Accuracy: 0.051:   0%|          | 37/37094 [00:04<59:44, 10.34it/s]

Epoch: 0 Iteration: 37 Loss: 2.814 Validation Loss: 2.587 Accuracy: 0.019 Validation Accuracy: 0.051:   0%|          | 37/37094 [00:04<59:44, 10.34it/s]

Epoch: 0 Iteration: 38 Loss: 2.792 Validation Loss: 2.587 Accuracy: 0.022 Validation Accuracy: 0.051:   0%|          | 37/37094 [00:04<59:44, 10.34it/s]

Epoch: 0 Iteration: 38 Loss: 2.792 Validation Loss: 2.587 Accuracy: 0.022 Validation Accuracy: 0.051:   0%|          | 39/37094 [00:04<59:43, 10.34it/s]

Epoch: 0 Iteration: 39 Loss: 2.782 Validation Loss: 2.587 Accuracy: 0.028 Validation Accuracy: 0.051:   0%|          | 39/37094 [00:04<59:43, 10.34it/s]

Epoch: 0 Iteration: 40 Loss: 2.747 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 39/37094 [00:04<59:43, 10.34it/s]

Epoch: 0 Iteration: 40 Loss: 2.747 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 41/37094 [00:04<59:42, 10.34it/s]

Epoch: 0 Iteration: 41 Loss: 2.794 Validation Loss: 2.587 Accuracy: 0.025 Validation Accuracy: 0.051:   0%|          | 41/37094 [00:04<59:42, 10.34it/s]

Epoch: 0 Iteration: 42 Loss: 2.775 Validation Loss: 2.587 Accuracy: 0.041 Validation Accuracy: 0.051:   0%|          | 41/37094 [00:04<59:42, 10.34it/s]

Epoch: 0 Iteration: 42 Loss: 2.775 Validation Loss: 2.587 Accuracy: 0.041 Validation Accuracy: 0.051:   0%|          | 43/37094 [00:04<59:42, 10.34it/s]

Epoch: 0 Iteration: 43 Loss: 2.753 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 43/37094 [00:04<59:42, 10.34it/s]

Epoch: 0 Iteration: 44 Loss: 2.717 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 43/37094 [00:04<59:42, 10.34it/s]

Epoch: 0 Iteration: 44 Loss: 2.717 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 45/37094 [00:04<59:40, 10.35it/s]

Epoch: 0 Iteration: 45 Loss: 2.712 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 45/37094 [00:04<59:40, 10.35it/s]

Epoch: 0 Iteration: 46 Loss: 2.654 Validation Loss: 2.587 Accuracy: 0.062 Validation Accuracy: 0.051:   0%|          | 45/37094 [00:05<59:40, 10.35it/s]

Epoch: 0 Iteration: 46 Loss: 2.654 Validation Loss: 2.587 Accuracy: 0.062 Validation Accuracy: 0.051:   0%|          | 47/37094 [00:05<59:39, 10.35it/s]

Epoch: 0 Iteration: 47 Loss: 2.566 Validation Loss: 2.587 Accuracy: 0.072 Validation Accuracy: 0.051:   0%|          | 47/37094 [00:05<59:39, 10.35it/s]

Epoch: 0 Iteration: 48 Loss: 2.541 Validation Loss: 2.587 Accuracy: 0.075 Validation Accuracy: 0.051:   0%|          | 47/37094 [00:05<59:39, 10.35it/s]

Epoch: 0 Iteration: 48 Loss: 2.541 Validation Loss: 2.587 Accuracy: 0.075 Validation Accuracy: 0.051:   0%|          | 49/37094 [00:05<59:38, 10.35it/s]

Epoch: 0 Iteration: 49 Loss: 2.495 Validation Loss: 2.587 Accuracy: 0.072 Validation Accuracy: 0.051:   0%|          | 49/37094 [00:05<59:38, 10.35it/s]

Epoch: 0 Iteration: 50 Loss: 2.447 Validation Loss: 2.587 Accuracy: 0.078 Validation Accuracy: 0.051:   0%|          | 49/37094 [00:05<59:38, 10.35it/s]

Epoch: 0 Iteration: 50 Loss: 2.447 Validation Loss: 2.587 Accuracy: 0.078 Validation Accuracy: 0.051:   0%|          | 51/37094 [00:05<59:38, 10.35it/s]

Epoch: 0 Iteration: 51 Loss: 2.431 Validation Loss: 2.587 Accuracy: 0.087 Validation Accuracy: 0.051:   0%|          | 51/37094 [00:05<59:38, 10.35it/s]

Epoch: 0 Iteration: 52 Loss: 2.410 Validation Loss: 2.587 Accuracy: 0.075 Validation Accuracy: 0.051:   0%|          | 51/37094 [00:05<59:38, 10.35it/s]

Epoch: 0 Iteration: 52 Loss: 2.410 Validation Loss: 2.587 Accuracy: 0.075 Validation Accuracy: 0.051:   0%|          | 53/37094 [00:05<59:37, 10.36it/s]

Epoch: 0 Iteration: 53 Loss: 2.409 Validation Loss: 2.587 Accuracy: 0.066 Validation Accuracy: 0.051:   0%|          | 53/37094 [00:05<59:37, 10.36it/s]

Epoch: 0 Iteration: 54 Loss: 2.377 Validation Loss: 2.587 Accuracy: 0.078 Validation Accuracy: 0.051:   0%|          | 53/37094 [00:05<59:37, 10.36it/s]

Epoch: 0 Iteration: 54 Loss: 2.377 Validation Loss: 2.587 Accuracy: 0.078 Validation Accuracy: 0.051:   0%|          | 55/37094 [00:05<59:37, 10.35it/s]

Epoch: 0 Iteration: 55 Loss: 2.369 Validation Loss: 2.587 Accuracy: 0.084 Validation Accuracy: 0.051:   0%|          | 55/37094 [00:05<59:37, 10.35it/s]

Epoch: 0 Iteration: 56 Loss: 2.351 Validation Loss: 2.587 Accuracy: 0.084 Validation Accuracy: 0.051:   0%|          | 55/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 56 Loss: 2.351 Validation Loss: 2.587 Accuracy: 0.084 Validation Accuracy: 0.051:   0%|          | 57/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 57 Loss: 2.281 Validation Loss: 2.587 Accuracy: 0.084 Validation Accuracy: 0.051:   0%|          | 57/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 58 Loss: 2.225 Validation Loss: 2.587 Accuracy: 0.091 Validation Accuracy: 0.051:   0%|          | 57/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 58 Loss: 2.225 Validation Loss: 2.587 Accuracy: 0.091 Validation Accuracy: 0.051:   0%|          | 59/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 59 Loss: 2.250 Validation Loss: 2.587 Accuracy: 0.084 Validation Accuracy: 0.051:   0%|          | 59/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 60 Loss: 2.260 Validation Loss: 2.587 Accuracy: 0.087 Validation Accuracy: 0.051:   0%|          | 59/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 60 Loss: 2.260 Validation Loss: 2.587 Accuracy: 0.087 Validation Accuracy: 0.051:   0%|          | 61/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 61 Loss: 2.231 Validation Loss: 2.587 Accuracy: 0.084 Validation Accuracy: 0.051:   0%|          | 61/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 62 Loss: 2.193 Validation Loss: 2.587 Accuracy: 0.094 Validation Accuracy: 0.051:   0%|          | 61/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 62 Loss: 2.193 Validation Loss: 2.587 Accuracy: 0.094 Validation Accuracy: 0.051:   0%|          | 63/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 63 Loss: 2.147 Validation Loss: 2.587 Accuracy: 0.116 Validation Accuracy: 0.051:   0%|          | 63/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 64 Loss: 2.089 Validation Loss: 2.587 Accuracy: 0.116 Validation Accuracy: 0.051:   0%|          | 63/37094 [00:06<59:37, 10.35it/s]

Epoch: 0 Iteration: 64 Loss: 2.089 Validation Loss: 2.587 Accuracy: 0.116 Validation Accuracy: 0.051:   0%|          | 65/37094 [00:06<59:36, 10.35it/s]

Epoch: 0 Iteration: 65 Loss: 2.101 Validation Loss: 2.587 Accuracy: 0.109 Validation Accuracy: 0.051:   0%|          | 65/37094 [00:06<59:36, 10.35it/s]

Epoch: 0 Iteration: 66 Loss: 2.114 Validation Loss: 2.587 Accuracy: 0.097 Validation Accuracy: 0.051:   0%|          | 65/37094 [00:07<59:36, 10.35it/s]

Epoch: 0 Iteration: 66 Loss: 2.114 Validation Loss: 2.587 Accuracy: 0.097 Validation Accuracy: 0.051:   0%|          | 67/37094 [00:07<59:40, 10.34it/s]

Epoch: 0 Iteration: 67 Loss: 2.164 Validation Loss: 2.587 Accuracy: 0.094 Validation Accuracy: 0.051:   0%|          | 67/37094 [00:07<59:40, 10.34it/s]

Epoch: 0 Iteration: 68 Loss: 2.129 Validation Loss: 2.587 Accuracy: 0.081 Validation Accuracy: 0.051:   0%|          | 67/37094 [00:07<59:40, 10.34it/s]

Epoch: 0 Iteration: 68 Loss: 2.129 Validation Loss: 2.587 Accuracy: 0.081 Validation Accuracy: 0.051:   0%|          | 69/37094 [00:07<1:00:11, 10.25it/s]

Epoch: 0 Iteration: 69 Loss: 2.091 Validation Loss: 2.587 Accuracy: 0.084 Validation Accuracy: 0.051:   0%|          | 69/37094 [00:07<1:00:11, 10.25it/s]

Epoch: 0 Iteration: 70 Loss: 2.080 Validation Loss: 2.587 Accuracy: 0.072 Validation Accuracy: 0.051:   0%|          | 69/37094 [00:07<1:00:11, 10.25it/s]

Epoch: 0 Iteration: 70 Loss: 2.080 Validation Loss: 2.587 Accuracy: 0.072 Validation Accuracy: 0.051:   0%|          | 71/37094 [00:07<1:00:01, 10.28it/s]

Epoch: 0 Iteration: 71 Loss: 2.066 Validation Loss: 2.587 Accuracy: 0.072 Validation Accuracy: 0.051:   0%|          | 71/37094 [00:07<1:00:01, 10.28it/s]

Epoch: 0 Iteration: 72 Loss: 2.046 Validation Loss: 2.587 Accuracy: 0.072 Validation Accuracy: 0.051:   0%|          | 71/37094 [00:07<1:00:01, 10.28it/s]

Epoch: 0 Iteration: 72 Loss: 2.046 Validation Loss: 2.587 Accuracy: 0.072 Validation Accuracy: 0.051:   0%|          | 73/37094 [00:07<59:57, 10.29it/s]  

Epoch: 0 Iteration: 73 Loss: 2.072 Validation Loss: 2.587 Accuracy: 0.053 Validation Accuracy: 0.051:   0%|          | 73/37094 [00:07<59:57, 10.29it/s]

Epoch: 0 Iteration: 74 Loss: 2.077 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 73/37094 [00:07<59:57, 10.29it/s]

Epoch: 0 Iteration: 74 Loss: 2.077 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 75/37094 [00:07<59:50, 10.31it/s]

Epoch: 0 Iteration: 75 Loss: 2.038 Validation Loss: 2.587 Accuracy: 0.044 Validation Accuracy: 0.051:   0%|          | 75/37094 [00:07<59:50, 10.31it/s]

Epoch: 0 Iteration: 76 Loss: 2.046 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 75/37094 [00:07<59:50, 10.31it/s]

Epoch: 0 Iteration: 76 Loss: 2.046 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 77/37094 [00:07<59:46, 10.32it/s]

Epoch: 0 Iteration: 77 Loss: 2.053 Validation Loss: 2.587 Accuracy: 0.041 Validation Accuracy: 0.051:   0%|          | 77/37094 [00:08<59:46, 10.32it/s]

Epoch: 0 Iteration: 78 Loss: 2.093 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 77/37094 [00:08<59:46, 10.32it/s]

Epoch: 0 Iteration: 78 Loss: 2.093 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 79/37094 [00:08<59:43, 10.33it/s]

Epoch: 0 Iteration: 79 Loss: 2.008 Validation Loss: 2.587 Accuracy: 0.047 Validation Accuracy: 0.051:   0%|          | 79/37094 [00:08<59:43, 10.33it/s]

Epoch: 0 Iteration: 80 Loss: 1.961 Validation Loss: 2.587 Accuracy: 0.062 Validation Accuracy: 0.051:   0%|          | 79/37094 [00:08<59:43, 10.33it/s]

Epoch: 0 Iteration: 80 Loss: 1.961 Validation Loss: 2.587 Accuracy: 0.062 Validation Accuracy: 0.051:   0%|          | 81/37094 [00:08<59:40, 10.34it/s]

Epoch: 0 Iteration: 81 Loss: 1.910 Validation Loss: 2.587 Accuracy: 0.066 Validation Accuracy: 0.051:   0%|          | 81/37094 [00:08<59:40, 10.34it/s]

Epoch: 0 Iteration: 82 Loss: 1.891 Validation Loss: 2.587 Accuracy: 0.062 Validation Accuracy: 0.051:   0%|          | 81/37094 [00:08<59:40, 10.34it/s]

Epoch: 0 Iteration: 82 Loss: 1.891 Validation Loss: 2.587 Accuracy: 0.062 Validation Accuracy: 0.051:   0%|          | 83/37094 [00:08<59:38, 10.34it/s]

Epoch: 0 Iteration: 83 Loss: 1.901 Validation Loss: 2.587 Accuracy: 0.069 Validation Accuracy: 0.051:   0%|          | 83/37094 [00:08<59:38, 10.34it/s]

Epoch: 0 Iteration: 84 Loss: 1.896 Validation Loss: 2.587 Accuracy: 0.066 Validation Accuracy: 0.051:   0%|          | 83/37094 [00:08<59:38, 10.34it/s]

Epoch: 0 Iteration: 84 Loss: 1.896 Validation Loss: 2.587 Accuracy: 0.066 Validation Accuracy: 0.051:   0%|          | 85/37094 [00:08<59:37, 10.35it/s]

Epoch: 0 Iteration: 85 Loss: 1.854 Validation Loss: 2.587 Accuracy: 0.075 Validation Accuracy: 0.051:   0%|          | 85/37094 [00:08<59:37, 10.35it/s]

Epoch: 0 Iteration: 86 Loss: 1.809 Validation Loss: 2.587 Accuracy: 0.084 Validation Accuracy: 0.051:   0%|          | 85/37094 [00:08<59:37, 10.35it/s]

Epoch: 0 Iteration: 86 Loss: 1.809 Validation Loss: 2.587 Accuracy: 0.084 Validation Accuracy: 0.051:   0%|          | 87/37094 [00:08<59:35, 10.35it/s]

Epoch: 0 Iteration: 87 Loss: 1.815 Validation Loss: 2.587 Accuracy: 0.084 Validation Accuracy: 0.051:   0%|          | 87/37094 [00:09<59:35, 10.35it/s]

Epoch: 0 Iteration: 88 Loss: 1.839 Validation Loss: 2.587 Accuracy: 0.081 Validation Accuracy: 0.051:   0%|          | 87/37094 [00:09<59:35, 10.35it/s]

Epoch: 0 Iteration: 88 Loss: 1.839 Validation Loss: 2.587 Accuracy: 0.081 Validation Accuracy: 0.051:   0%|          | 89/37094 [00:09<59:35, 10.35it/s]

Epoch: 0 Iteration: 89 Loss: 1.859 Validation Loss: 2.587 Accuracy: 0.081 Validation Accuracy: 0.051:   0%|          | 89/37094 [00:09<59:35, 10.35it/s]

Epoch: 0 Iteration: 90 Loss: 1.890 Validation Loss: 2.587 Accuracy: 0.069 Validation Accuracy: 0.051:   0%|          | 89/37094 [00:09<59:35, 10.35it/s]

Epoch: 0 Iteration: 90 Loss: 1.890 Validation Loss: 2.587 Accuracy: 0.069 Validation Accuracy: 0.051:   0%|          | 91/37094 [00:09<59:37, 10.34it/s]

Epoch: 0 Iteration: 91 Loss: 1.859 Validation Loss: 2.587 Accuracy: 0.062 Validation Accuracy: 0.051:   0%|          | 91/37094 [00:09<59:37, 10.34it/s]

Epoch: 0 Iteration: 92 Loss: 1.827 Validation Loss: 2.587 Accuracy: 0.059 Validation Accuracy: 0.051:   0%|          | 91/37094 [00:09<59:37, 10.34it/s]

Epoch: 0 Iteration: 92 Loss: 1.827 Validation Loss: 2.587 Accuracy: 0.059 Validation Accuracy: 0.051:   0%|          | 93/37094 [00:09<59:36, 10.34it/s]

Epoch: 0 Iteration: 93 Loss: 1.770 Validation Loss: 2.587 Accuracy: 0.059 Validation Accuracy: 0.051:   0%|          | 93/37094 [00:09<59:36, 10.34it/s]

Epoch: 0 Iteration: 94 Loss: 1.711 Validation Loss: 2.587 Accuracy: 0.059 Validation Accuracy: 0.051:   0%|          | 93/37094 [00:09<59:36, 10.34it/s]

Epoch: 0 Iteration: 94 Loss: 1.711 Validation Loss: 2.587 Accuracy: 0.059 Validation Accuracy: 0.051:   0%|          | 95/37094 [00:09<59:36, 10.35it/s]

Epoch: 0 Iteration: 95 Loss: 1.723 Validation Loss: 2.587 Accuracy: 0.053 Validation Accuracy: 0.051:   0%|          | 95/37094 [00:09<59:36, 10.35it/s]

Epoch: 0 Iteration: 96 Loss: 1.670 Validation Loss: 2.587 Accuracy: 0.037 Validation Accuracy: 0.051:   0%|          | 95/37094 [00:09<59:36, 10.35it/s]

Epoch: 0 Iteration: 96 Loss: 1.670 Validation Loss: 2.587 Accuracy: 0.037 Validation Accuracy: 0.051:   0%|          | 97/37094 [00:09<59:35, 10.35it/s]

Epoch: 0 Iteration: 97 Loss: 1.687 Validation Loss: 2.587 Accuracy: 0.044 Validation Accuracy: 0.051:   0%|          | 97/37094 [00:10<59:35, 10.35it/s]

Epoch: 0 Iteration: 98 Loss: 1.662 Validation Loss: 2.587 Accuracy: 0.041 Validation Accuracy: 0.051:   0%|          | 97/37094 [00:10<59:35, 10.35it/s]

Epoch: 0 Iteration: 98 Loss: 1.662 Validation Loss: 2.587 Accuracy: 0.041 Validation Accuracy: 0.051:   0%|          | 99/37094 [00:10<59:34, 10.35it/s]

Epoch: 0 Iteration: 99 Loss: 1.666 Validation Loss: 2.587 Accuracy: 0.037 Validation Accuracy: 0.051:   0%|          | 99/37094 [00:10<59:34, 10.35it/s]

Epoch: 0 Iteration: 100 Loss: 1.678 Validation Loss: 1.607 Accuracy: 0.050 Validation Accuracy: 0.076:   0%|          | 99/37094 [00:10<59:34, 10.35it/s]

Epoch: 0 Iteration: 100 Loss: 1.678 Validation Loss: 1.607 Accuracy: 0.050 Validation Accuracy: 0.076:   0%|          | 101/37094 [00:10<1:50:33,  5.58it/s]

Epoch: 0 Iteration: 101 Loss: 1.715 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 101/37094 [00:10<1:50:33,  5.58it/s]

Epoch: 0 Iteration: 102 Loss: 1.742 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 101/37094 [00:11<1:50:33,  5.58it/s]

Epoch: 0 Iteration: 102 Loss: 1.742 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 103/37094 [00:11<1:34:32,  6.52it/s]

Epoch: 0 Iteration: 103 Loss: 1.721 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 103/37094 [00:11<1:34:32,  6.52it/s]

Epoch: 0 Iteration: 104 Loss: 1.737 Validation Loss: 1.607 Accuracy: 0.059 Validation Accuracy: 0.076:   0%|          | 103/37094 [00:11<1:34:32,  6.52it/s]

Epoch: 0 Iteration: 104 Loss: 1.737 Validation Loss: 1.607 Accuracy: 0.059 Validation Accuracy: 0.076:   0%|          | 105/37094 [00:11<1:24:08,  7.33it/s]

Epoch: 0 Iteration: 105 Loss: 1.763 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   0%|          | 105/37094 [00:11<1:24:08,  7.33it/s]

Epoch: 0 Iteration: 106 Loss: 1.786 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   0%|          | 105/37094 [00:11<1:24:08,  7.33it/s]

Epoch: 0 Iteration: 106 Loss: 1.786 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   0%|          | 107/37094 [00:11<1:16:49,  8.02it/s]

Epoch: 0 Iteration: 107 Loss: 1.764 Validation Loss: 1.607 Accuracy: 0.075 Validation Accuracy: 0.076:   0%|          | 107/37094 [00:11<1:16:49,  8.02it/s]

Epoch: 0 Iteration: 108 Loss: 1.749 Validation Loss: 1.607 Accuracy: 0.078 Validation Accuracy: 0.076:   0%|          | 107/37094 [00:11<1:16:49,  8.02it/s]

Epoch: 0 Iteration: 108 Loss: 1.749 Validation Loss: 1.607 Accuracy: 0.078 Validation Accuracy: 0.076:   0%|          | 109/37094 [00:11<1:11:40,  8.60it/s]

Epoch: 0 Iteration: 109 Loss: 1.743 Validation Loss: 1.607 Accuracy: 0.084 Validation Accuracy: 0.076:   0%|          | 109/37094 [00:11<1:11:40,  8.60it/s]

Epoch: 0 Iteration: 110 Loss: 1.728 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   0%|          | 109/37094 [00:11<1:11:40,  8.60it/s]

Epoch: 0 Iteration: 110 Loss: 1.728 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   0%|          | 111/37094 [00:11<1:08:07,  9.05it/s]

Epoch: 0 Iteration: 111 Loss: 1.748 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 111/37094 [00:11<1:08:07,  9.05it/s]

Epoch: 0 Iteration: 112 Loss: 1.789 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 111/37094 [00:12<1:08:07,  9.05it/s]

Epoch: 0 Iteration: 112 Loss: 1.789 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 113/37094 [00:12<1:05:37,  9.39it/s]

Epoch: 0 Iteration: 113 Loss: 1.851 Validation Loss: 1.607 Accuracy: 0.062 Validation Accuracy: 0.076:   0%|          | 113/37094 [00:12<1:05:37,  9.39it/s]

Epoch: 0 Iteration: 114 Loss: 1.891 Validation Loss: 1.607 Accuracy: 0.062 Validation Accuracy: 0.076:   0%|          | 113/37094 [00:12<1:05:37,  9.39it/s]

Epoch: 0 Iteration: 114 Loss: 1.891 Validation Loss: 1.607 Accuracy: 0.062 Validation Accuracy: 0.076:   0%|          | 115/37094 [00:12<1:03:55,  9.64it/s]

Epoch: 0 Iteration: 115 Loss: 1.915 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 115/37094 [00:12<1:03:55,  9.64it/s]

Epoch: 0 Iteration: 116 Loss: 1.986 Validation Loss: 1.607 Accuracy: 0.059 Validation Accuracy: 0.076:   0%|          | 115/37094 [00:12<1:03:55,  9.64it/s]

Epoch: 0 Iteration: 116 Loss: 1.986 Validation Loss: 1.607 Accuracy: 0.059 Validation Accuracy: 0.076:   0%|          | 117/37094 [00:12<1:02:41,  9.83it/s]

Epoch: 0 Iteration: 117 Loss: 1.943 Validation Loss: 1.607 Accuracy: 0.062 Validation Accuracy: 0.076:   0%|          | 117/37094 [00:12<1:02:41,  9.83it/s]

Epoch: 0 Iteration: 118 Loss: 1.920 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 117/37094 [00:12<1:02:41,  9.83it/s]

Epoch: 0 Iteration: 118 Loss: 1.920 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 119/37094 [00:12<1:01:49,  9.97it/s]

Epoch: 0 Iteration: 119 Loss: 1.931 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 119/37094 [00:12<1:01:49,  9.97it/s]

Epoch: 0 Iteration: 120 Loss: 1.948 Validation Loss: 1.607 Accuracy: 0.078 Validation Accuracy: 0.076:   0%|          | 119/37094 [00:12<1:01:49,  9.97it/s]

Epoch: 0 Iteration: 120 Loss: 1.948 Validation Loss: 1.607 Accuracy: 0.078 Validation Accuracy: 0.076:   0%|          | 121/37094 [00:12<1:01:11, 10.07it/s]

Epoch: 0 Iteration: 121 Loss: 1.908 Validation Loss: 1.607 Accuracy: 0.075 Validation Accuracy: 0.076:   0%|          | 121/37094 [00:12<1:01:11, 10.07it/s]

Epoch: 0 Iteration: 122 Loss: 1.924 Validation Loss: 1.607 Accuracy: 0.075 Validation Accuracy: 0.076:   0%|          | 121/37094 [00:12<1:01:11, 10.07it/s]

Epoch: 0 Iteration: 122 Loss: 1.924 Validation Loss: 1.607 Accuracy: 0.075 Validation Accuracy: 0.076:   0%|          | 123/37094 [00:12<1:00:45, 10.14it/s]

Epoch: 0 Iteration: 123 Loss: 1.999 Validation Loss: 1.607 Accuracy: 0.078 Validation Accuracy: 0.076:   0%|          | 123/37094 [00:13<1:00:45, 10.14it/s]

Epoch: 0 Iteration: 124 Loss: 1.977 Validation Loss: 1.607 Accuracy: 0.075 Validation Accuracy: 0.076:   0%|          | 123/37094 [00:13<1:00:45, 10.14it/s]

Epoch: 0 Iteration: 124 Loss: 1.977 Validation Loss: 1.607 Accuracy: 0.075 Validation Accuracy: 0.076:   0%|          | 125/37094 [00:13<1:00:28, 10.19it/s]

Epoch: 0 Iteration: 125 Loss: 1.969 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 125/37094 [00:13<1:00:28, 10.19it/s]

Epoch: 0 Iteration: 126 Loss: 1.968 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 125/37094 [00:13<1:00:28, 10.19it/s]

Epoch: 0 Iteration: 126 Loss: 1.968 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 127/37094 [00:13<1:00:15, 10.22it/s]

Epoch: 0 Iteration: 127 Loss: 1.936 Validation Loss: 1.607 Accuracy: 0.066 Validation Accuracy: 0.076:   0%|          | 127/37094 [00:13<1:00:15, 10.22it/s]

Epoch: 0 Iteration: 128 Loss: 1.951 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 127/37094 [00:13<1:00:15, 10.22it/s]

Epoch: 0 Iteration: 128 Loss: 1.951 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 129/37094 [00:13<1:00:05, 10.25it/s]

Epoch: 0 Iteration: 129 Loss: 1.999 Validation Loss: 1.607 Accuracy: 0.050 Validation Accuracy: 0.076:   0%|          | 129/37094 [00:13<1:00:05, 10.25it/s]

Epoch: 0 Iteration: 130 Loss: 1.933 Validation Loss: 1.607 Accuracy: 0.050 Validation Accuracy: 0.076:   0%|          | 129/37094 [00:13<1:00:05, 10.25it/s]

Epoch: 0 Iteration: 130 Loss: 1.933 Validation Loss: 1.607 Accuracy: 0.050 Validation Accuracy: 0.076:   0%|          | 131/37094 [00:13<59:58, 10.27it/s]  

Epoch: 0 Iteration: 131 Loss: 1.950 Validation Loss: 1.607 Accuracy: 0.047 Validation Accuracy: 0.076:   0%|          | 131/37094 [00:13<59:58, 10.27it/s]

Epoch: 0 Iteration: 132 Loss: 1.936 Validation Loss: 1.607 Accuracy: 0.047 Validation Accuracy: 0.076:   0%|          | 131/37094 [00:13<59:58, 10.27it/s]

Epoch: 0 Iteration: 132 Loss: 1.936 Validation Loss: 1.607 Accuracy: 0.047 Validation Accuracy: 0.076:   0%|          | 133/37094 [00:13<59:53, 10.29it/s]

Epoch: 0 Iteration: 133 Loss: 1.921 Validation Loss: 1.607 Accuracy: 0.053 Validation Accuracy: 0.076:   0%|          | 133/37094 [00:14<59:53, 10.29it/s]

Epoch: 0 Iteration: 134 Loss: 1.899 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 133/37094 [00:14<59:53, 10.29it/s]

Epoch: 0 Iteration: 134 Loss: 1.899 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 135/37094 [00:14<59:49, 10.30it/s]

Epoch: 0 Iteration: 135 Loss: 1.849 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 135/37094 [00:14<59:49, 10.30it/s]

Epoch: 0 Iteration: 136 Loss: 1.864 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 135/37094 [00:14<59:49, 10.30it/s]

Epoch: 0 Iteration: 136 Loss: 1.864 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 137/37094 [00:14<59:47, 10.30it/s]

Epoch: 0 Iteration: 137 Loss: 1.863 Validation Loss: 1.607 Accuracy: 0.050 Validation Accuracy: 0.076:   0%|          | 137/37094 [00:14<59:47, 10.30it/s]

Epoch: 0 Iteration: 138 Loss: 1.853 Validation Loss: 1.607 Accuracy: 0.059 Validation Accuracy: 0.076:   0%|          | 137/37094 [00:14<59:47, 10.30it/s]

Epoch: 0 Iteration: 138 Loss: 1.853 Validation Loss: 1.607 Accuracy: 0.059 Validation Accuracy: 0.076:   0%|          | 139/37094 [00:14<59:45, 10.31it/s]

Epoch: 0 Iteration: 139 Loss: 1.857 Validation Loss: 1.607 Accuracy: 0.066 Validation Accuracy: 0.076:   0%|          | 139/37094 [00:14<59:45, 10.31it/s]

Epoch: 0 Iteration: 140 Loss: 1.842 Validation Loss: 1.607 Accuracy: 0.066 Validation Accuracy: 0.076:   0%|          | 139/37094 [00:14<59:45, 10.31it/s]

Epoch: 0 Iteration: 140 Loss: 1.842 Validation Loss: 1.607 Accuracy: 0.066 Validation Accuracy: 0.076:   0%|          | 141/37094 [00:14<59:44, 10.31it/s]

Epoch: 0 Iteration: 141 Loss: 1.871 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   0%|          | 141/37094 [00:14<59:44, 10.31it/s]

Epoch: 0 Iteration: 142 Loss: 1.807 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   0%|          | 141/37094 [00:14<59:44, 10.31it/s]

Epoch: 0 Iteration: 142 Loss: 1.807 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   0%|          | 143/37094 [00:14<59:44, 10.31it/s]

Epoch: 0 Iteration: 143 Loss: 1.728 Validation Loss: 1.607 Accuracy: 0.075 Validation Accuracy: 0.076:   0%|          | 143/37094 [00:15<59:44, 10.31it/s]

Epoch: 0 Iteration: 144 Loss: 1.745 Validation Loss: 1.607 Accuracy: 0.075 Validation Accuracy: 0.076:   0%|          | 143/37094 [00:15<59:44, 10.31it/s]

Epoch: 0 Iteration: 144 Loss: 1.745 Validation Loss: 1.607 Accuracy: 0.075 Validation Accuracy: 0.076:   0%|          | 145/37094 [00:15<59:44, 10.31it/s]

Epoch: 0 Iteration: 145 Loss: 1.698 Validation Loss: 1.607 Accuracy: 0.078 Validation Accuracy: 0.076:   0%|          | 145/37094 [00:15<59:44, 10.31it/s]

Epoch: 0 Iteration: 146 Loss: 1.715 Validation Loss: 1.607 Accuracy: 0.081 Validation Accuracy: 0.076:   0%|          | 145/37094 [00:15<59:44, 10.31it/s]

Epoch: 0 Iteration: 146 Loss: 1.715 Validation Loss: 1.607 Accuracy: 0.081 Validation Accuracy: 0.076:   0%|          | 147/37094 [00:15<59:41, 10.31it/s]

Epoch: 0 Iteration: 147 Loss: 1.739 Validation Loss: 1.607 Accuracy: 0.097 Validation Accuracy: 0.076:   0%|          | 147/37094 [00:15<59:41, 10.31it/s]

Epoch: 0 Iteration: 148 Loss: 1.725 Validation Loss: 1.607 Accuracy: 0.091 Validation Accuracy: 0.076:   0%|          | 147/37094 [00:15<59:41, 10.31it/s]

Epoch: 0 Iteration: 148 Loss: 1.725 Validation Loss: 1.607 Accuracy: 0.091 Validation Accuracy: 0.076:   0%|          | 149/37094 [00:15<59:40, 10.32it/s]

Epoch: 0 Iteration: 149 Loss: 1.667 Validation Loss: 1.607 Accuracy: 0.094 Validation Accuracy: 0.076:   0%|          | 149/37094 [00:15<59:40, 10.32it/s]

Epoch: 0 Iteration: 150 Loss: 1.679 Validation Loss: 1.607 Accuracy: 0.094 Validation Accuracy: 0.076:   0%|          | 149/37094 [00:15<59:40, 10.32it/s]

Epoch: 0 Iteration: 150 Loss: 1.679 Validation Loss: 1.607 Accuracy: 0.094 Validation Accuracy: 0.076:   0%|          | 151/37094 [00:15<59:41, 10.31it/s]

Epoch: 0 Iteration: 151 Loss: 1.682 Validation Loss: 1.607 Accuracy: 0.094 Validation Accuracy: 0.076:   0%|          | 151/37094 [00:15<59:41, 10.31it/s]

Epoch: 0 Iteration: 152 Loss: 1.665 Validation Loss: 1.607 Accuracy: 0.097 Validation Accuracy: 0.076:   0%|          | 151/37094 [00:15<59:41, 10.31it/s]

Epoch: 0 Iteration: 152 Loss: 1.665 Validation Loss: 1.607 Accuracy: 0.097 Validation Accuracy: 0.076:   0%|          | 153/37094 [00:15<59:41, 10.31it/s]

Epoch: 0 Iteration: 153 Loss: 1.642 Validation Loss: 1.607 Accuracy: 0.097 Validation Accuracy: 0.076:   0%|          | 153/37094 [00:15<59:41, 10.31it/s]

Epoch: 0 Iteration: 154 Loss: 1.698 Validation Loss: 1.607 Accuracy: 0.097 Validation Accuracy: 0.076:   0%|          | 153/37094 [00:16<59:41, 10.31it/s]

Epoch: 0 Iteration: 154 Loss: 1.698 Validation Loss: 1.607 Accuracy: 0.097 Validation Accuracy: 0.076:   0%|          | 155/37094 [00:16<59:41, 10.31it/s]

Epoch: 0 Iteration: 155 Loss: 1.707 Validation Loss: 1.607 Accuracy: 0.094 Validation Accuracy: 0.076:   0%|          | 155/37094 [00:16<59:41, 10.31it/s]

Epoch: 0 Iteration: 156 Loss: 1.655 Validation Loss: 1.607 Accuracy: 0.100 Validation Accuracy: 0.076:   0%|          | 155/37094 [00:16<59:41, 10.31it/s]

Epoch: 0 Iteration: 156 Loss: 1.655 Validation Loss: 1.607 Accuracy: 0.100 Validation Accuracy: 0.076:   0%|          | 157/37094 [00:16<59:42, 10.31it/s]

Epoch: 0 Iteration: 157 Loss: 1.668 Validation Loss: 1.607 Accuracy: 0.084 Validation Accuracy: 0.076:   0%|          | 157/37094 [00:16<59:42, 10.31it/s]

Epoch: 0 Iteration: 158 Loss: 1.718 Validation Loss: 1.607 Accuracy: 0.081 Validation Accuracy: 0.076:   0%|          | 157/37094 [00:16<59:42, 10.31it/s]

Epoch: 0 Iteration: 158 Loss: 1.718 Validation Loss: 1.607 Accuracy: 0.081 Validation Accuracy: 0.076:   0%|          | 159/37094 [00:16<59:41, 10.31it/s]

Epoch: 0 Iteration: 159 Loss: 1.707 Validation Loss: 1.607 Accuracy: 0.081 Validation Accuracy: 0.076:   0%|          | 159/37094 [00:16<59:41, 10.31it/s]

Epoch: 0 Iteration: 160 Loss: 1.746 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 159/37094 [00:16<59:41, 10.31it/s]

Epoch: 0 Iteration: 160 Loss: 1.746 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 161/37094 [00:16<59:41, 10.31it/s]

Epoch: 0 Iteration: 161 Loss: 1.720 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   0%|          | 161/37094 [00:16<59:41, 10.31it/s]

Epoch: 0 Iteration: 162 Loss: 1.740 Validation Loss: 1.607 Accuracy: 0.066 Validation Accuracy: 0.076:   0%|          | 161/37094 [00:16<59:41, 10.31it/s]

Epoch: 0 Iteration: 162 Loss: 1.740 Validation Loss: 1.607 Accuracy: 0.066 Validation Accuracy: 0.076:   0%|          | 163/37094 [00:16<59:42, 10.31it/s]

Epoch: 0 Iteration: 163 Loss: 1.752 Validation Loss: 1.607 Accuracy: 0.050 Validation Accuracy: 0.076:   0%|          | 163/37094 [00:16<59:42, 10.31it/s]

Epoch: 0 Iteration: 164 Loss: 1.747 Validation Loss: 1.607 Accuracy: 0.047 Validation Accuracy: 0.076:   0%|          | 163/37094 [00:17<59:42, 10.31it/s]

Epoch: 0 Iteration: 164 Loss: 1.747 Validation Loss: 1.607 Accuracy: 0.047 Validation Accuracy: 0.076:   0%|          | 165/37094 [00:17<59:42, 10.31it/s]

Epoch: 0 Iteration: 165 Loss: 1.740 Validation Loss: 1.607 Accuracy: 0.050 Validation Accuracy: 0.076:   0%|          | 165/37094 [00:17<59:42, 10.31it/s]

Epoch: 0 Iteration: 166 Loss: 1.697 Validation Loss: 1.607 Accuracy: 0.037 Validation Accuracy: 0.076:   0%|          | 165/37094 [00:17<59:42, 10.31it/s]

Epoch: 0 Iteration: 166 Loss: 1.697 Validation Loss: 1.607 Accuracy: 0.037 Validation Accuracy: 0.076:   0%|          | 167/37094 [00:17<59:45, 10.30it/s]

Epoch: 0 Iteration: 167 Loss: 1.663 Validation Loss: 1.607 Accuracy: 0.041 Validation Accuracy: 0.076:   0%|          | 167/37094 [00:17<59:45, 10.30it/s]

Epoch: 0 Iteration: 168 Loss: 1.642 Validation Loss: 1.607 Accuracy: 0.044 Validation Accuracy: 0.076:   0%|          | 167/37094 [00:17<59:45, 10.30it/s]

Epoch: 0 Iteration: 168 Loss: 1.642 Validation Loss: 1.607 Accuracy: 0.044 Validation Accuracy: 0.076:   0%|          | 169/37094 [00:17<59:43, 10.30it/s]

Epoch: 0 Iteration: 169 Loss: 1.631 Validation Loss: 1.607 Accuracy: 0.041 Validation Accuracy: 0.076:   0%|          | 169/37094 [00:17<59:43, 10.30it/s]

Epoch: 0 Iteration: 170 Loss: 1.685 Validation Loss: 1.607 Accuracy: 0.044 Validation Accuracy: 0.076:   0%|          | 169/37094 [00:17<59:43, 10.30it/s]

Epoch: 0 Iteration: 170 Loss: 1.685 Validation Loss: 1.607 Accuracy: 0.044 Validation Accuracy: 0.076:   0%|          | 171/37094 [00:17<59:45, 10.30it/s]

Epoch: 0 Iteration: 171 Loss: 1.643 Validation Loss: 1.607 Accuracy: 0.041 Validation Accuracy: 0.076:   0%|          | 171/37094 [00:17<59:45, 10.30it/s]

Epoch: 0 Iteration: 172 Loss: 1.648 Validation Loss: 1.607 Accuracy: 0.037 Validation Accuracy: 0.076:   0%|          | 171/37094 [00:17<59:45, 10.30it/s]

Epoch: 0 Iteration: 172 Loss: 1.648 Validation Loss: 1.607 Accuracy: 0.037 Validation Accuracy: 0.076:   0%|          | 173/37094 [00:17<59:43, 10.30it/s]

Epoch: 0 Iteration: 173 Loss: 1.645 Validation Loss: 1.607 Accuracy: 0.037 Validation Accuracy: 0.076:   0%|          | 173/37094 [00:17<59:43, 10.30it/s]

Epoch: 0 Iteration: 174 Loss: 1.598 Validation Loss: 1.607 Accuracy: 0.041 Validation Accuracy: 0.076:   0%|          | 173/37094 [00:18<59:43, 10.30it/s]

Epoch: 0 Iteration: 174 Loss: 1.598 Validation Loss: 1.607 Accuracy: 0.041 Validation Accuracy: 0.076:   0%|          | 175/37094 [00:18<59:43, 10.30it/s]

Epoch: 0 Iteration: 175 Loss: 1.583 Validation Loss: 1.607 Accuracy: 0.044 Validation Accuracy: 0.076:   0%|          | 175/37094 [00:18<59:43, 10.30it/s]

Epoch: 0 Iteration: 176 Loss: 1.558 Validation Loss: 1.607 Accuracy: 0.044 Validation Accuracy: 0.076:   0%|          | 175/37094 [00:18<59:43, 10.30it/s]

Epoch: 0 Iteration: 176 Loss: 1.558 Validation Loss: 1.607 Accuracy: 0.044 Validation Accuracy: 0.076:   0%|          | 177/37094 [00:18<59:43, 10.30it/s]

Epoch: 0 Iteration: 177 Loss: 1.591 Validation Loss: 1.607 Accuracy: 0.041 Validation Accuracy: 0.076:   0%|          | 177/37094 [00:18<59:43, 10.30it/s]

Epoch: 0 Iteration: 178 Loss: 1.551 Validation Loss: 1.607 Accuracy: 0.044 Validation Accuracy: 0.076:   0%|          | 177/37094 [00:18<59:43, 10.30it/s]

Epoch: 0 Iteration: 178 Loss: 1.551 Validation Loss: 1.607 Accuracy: 0.044 Validation Accuracy: 0.076:   0%|          | 179/37094 [00:18<59:42, 10.30it/s]

Epoch: 0 Iteration: 179 Loss: 1.567 Validation Loss: 1.607 Accuracy: 0.044 Validation Accuracy: 0.076:   0%|          | 179/37094 [00:18<59:42, 10.30it/s]

Epoch: 0 Iteration: 180 Loss: 1.545 Validation Loss: 1.607 Accuracy: 0.047 Validation Accuracy: 0.076:   0%|          | 179/37094 [00:18<59:42, 10.30it/s]

Epoch: 0 Iteration: 180 Loss: 1.545 Validation Loss: 1.607 Accuracy: 0.047 Validation Accuracy: 0.076:   0%|          | 181/37094 [00:18<59:42, 10.30it/s]

Epoch: 0 Iteration: 181 Loss: 1.561 Validation Loss: 1.607 Accuracy: 0.050 Validation Accuracy: 0.076:   0%|          | 181/37094 [00:18<59:42, 10.30it/s]

Epoch: 0 Iteration: 182 Loss: 1.581 Validation Loss: 1.607 Accuracy: 0.053 Validation Accuracy: 0.076:   0%|          | 181/37094 [00:18<59:42, 10.30it/s]

Epoch: 0 Iteration: 182 Loss: 1.581 Validation Loss: 1.607 Accuracy: 0.053 Validation Accuracy: 0.076:   0%|          | 183/37094 [00:18<59:42, 10.30it/s]

Epoch: 0 Iteration: 183 Loss: 1.593 Validation Loss: 1.607 Accuracy: 0.059 Validation Accuracy: 0.076:   0%|          | 183/37094 [00:18<59:42, 10.30it/s]

Epoch: 0 Iteration: 184 Loss: 1.568 Validation Loss: 1.607 Accuracy: 0.062 Validation Accuracy: 0.076:   0%|          | 183/37094 [00:19<59:42, 10.30it/s]

Epoch: 0 Iteration: 184 Loss: 1.568 Validation Loss: 1.607 Accuracy: 0.062 Validation Accuracy: 0.076:   0%|          | 185/37094 [00:19<59:41, 10.31it/s]

Epoch: 0 Iteration: 185 Loss: 1.592 Validation Loss: 1.607 Accuracy: 0.059 Validation Accuracy: 0.076:   0%|          | 185/37094 [00:19<59:41, 10.31it/s]

Epoch: 0 Iteration: 186 Loss: 1.633 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   0%|          | 185/37094 [00:19<59:41, 10.31it/s]

Epoch: 0 Iteration: 186 Loss: 1.633 Validation Loss: 1.607 Accuracy: 0.056 Validation Accuracy: 0.076:   1%|          | 187/37094 [00:19<59:41, 10.30it/s]

Epoch: 0 Iteration: 187 Loss: 1.630 Validation Loss: 1.607 Accuracy: 0.062 Validation Accuracy: 0.076:   1%|          | 187/37094 [00:19<59:41, 10.30it/s]

Epoch: 0 Iteration: 188 Loss: 1.660 Validation Loss: 1.607 Accuracy: 0.062 Validation Accuracy: 0.076:   1%|          | 187/37094 [00:19<59:41, 10.30it/s]

Epoch: 0 Iteration: 188 Loss: 1.660 Validation Loss: 1.607 Accuracy: 0.062 Validation Accuracy: 0.076:   1%|          | 189/37094 [00:19<59:43, 10.30it/s]

Epoch: 0 Iteration: 189 Loss: 1.682 Validation Loss: 1.607 Accuracy: 0.066 Validation Accuracy: 0.076:   1%|          | 189/37094 [00:19<59:43, 10.30it/s]

Epoch: 0 Iteration: 190 Loss: 1.643 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   1%|          | 189/37094 [00:19<59:43, 10.30it/s]

Epoch: 0 Iteration: 190 Loss: 1.643 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   1%|          | 191/37094 [00:19<59:40, 10.31it/s]

Epoch: 0 Iteration: 191 Loss: 1.659 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   1%|          | 191/37094 [00:19<59:40, 10.31it/s]

Epoch: 0 Iteration: 192 Loss: 1.662 Validation Loss: 1.607 Accuracy: 0.084 Validation Accuracy: 0.076:   1%|          | 191/37094 [00:19<59:40, 10.31it/s]

Epoch: 0 Iteration: 192 Loss: 1.662 Validation Loss: 1.607 Accuracy: 0.084 Validation Accuracy: 0.076:   1%|          | 193/37094 [00:19<59:41, 10.30it/s]

Epoch: 0 Iteration: 193 Loss: 1.634 Validation Loss: 1.607 Accuracy: 0.078 Validation Accuracy: 0.076:   1%|          | 193/37094 [00:19<59:41, 10.30it/s]

Epoch: 0 Iteration: 194 Loss: 1.637 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   1%|          | 193/37094 [00:19<59:41, 10.30it/s]

Epoch: 0 Iteration: 194 Loss: 1.637 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   1%|          | 195/37094 [00:19<59:40, 10.31it/s]

Epoch: 0 Iteration: 195 Loss: 1.634 Validation Loss: 1.607 Accuracy: 0.066 Validation Accuracy: 0.076:   1%|          | 195/37094 [00:20<59:40, 10.31it/s]

Epoch: 0 Iteration: 196 Loss: 1.701 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   1%|          | 195/37094 [00:20<59:40, 10.31it/s]

Epoch: 0 Iteration: 196 Loss: 1.701 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   1%|          | 197/37094 [00:20<59:40, 10.31it/s]

Epoch: 0 Iteration: 197 Loss: 1.665 Validation Loss: 1.607 Accuracy: 0.069 Validation Accuracy: 0.076:   1%|          | 197/37094 [00:20<59:40, 10.31it/s]

Epoch: 0 Iteration: 198 Loss: 1.660 Validation Loss: 1.607 Accuracy: 0.066 Validation Accuracy: 0.076:   1%|          | 197/37094 [00:20<59:40, 10.31it/s]

Epoch: 0 Iteration: 198 Loss: 1.660 Validation Loss: 1.607 Accuracy: 0.066 Validation Accuracy: 0.076:   1%|          | 199/37094 [00:20<59:42, 10.30it/s]

Epoch: 0 Iteration: 199 Loss: 1.668 Validation Loss: 1.607 Accuracy: 0.072 Validation Accuracy: 0.076:   1%|          | 199/37094 [00:20<59:42, 10.30it/s]

Epoch: 0 Iteration: 200 Loss: 1.678 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 199/37094 [00:21<59:42, 10.30it/s]

Epoch: 0 Iteration: 200 Loss: 1.678 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 201/37094 [00:21<1:49:57,  5.59it/s]

Epoch: 0 Iteration: 201 Loss: 1.625 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 201/37094 [00:21<1:49:57,  5.59it/s]

Epoch: 0 Iteration: 202 Loss: 1.617 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 201/37094 [00:21<1:49:57,  5.59it/s]

Epoch: 0 Iteration: 202 Loss: 1.617 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 203/37094 [00:21<1:34:04,  6.54it/s]

Epoch: 0 Iteration: 203 Loss: 1.643 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 203/37094 [00:21<1:34:04,  6.54it/s]

Epoch: 0 Iteration: 204 Loss: 1.626 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 203/37094 [00:21<1:34:04,  6.54it/s]

Epoch: 0 Iteration: 204 Loss: 1.626 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 205/37094 [00:21<1:23:47,  7.34it/s]

Epoch: 0 Iteration: 205 Loss: 1.602 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 205/37094 [00:21<1:23:47,  7.34it/s]

Epoch: 0 Iteration: 206 Loss: 1.642 Validation Loss: 1.406 Accuracy: 0.072 Validation Accuracy: 0.089:   1%|          | 205/37094 [00:21<1:23:47,  7.34it/s]

Epoch: 0 Iteration: 206 Loss: 1.642 Validation Loss: 1.406 Accuracy: 0.072 Validation Accuracy: 0.089:   1%|          | 207/37094 [00:21<1:16:32,  8.03it/s]

Epoch: 0 Iteration: 207 Loss: 1.654 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 207/37094 [00:21<1:16:32,  8.03it/s]

Epoch: 0 Iteration: 208 Loss: 1.639 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 207/37094 [00:21<1:16:32,  8.03it/s]

Epoch: 0 Iteration: 208 Loss: 1.639 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 209/37094 [00:21<1:11:28,  8.60it/s]

Epoch: 0 Iteration: 209 Loss: 1.624 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 209/37094 [00:21<1:11:28,  8.60it/s]

Epoch: 0 Iteration: 210 Loss: 1.653 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 209/37094 [00:22<1:11:28,  8.60it/s]

Epoch: 0 Iteration: 210 Loss: 1.653 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 211/37094 [00:22<1:07:55,  9.05it/s]

Epoch: 0 Iteration: 211 Loss: 1.618 Validation Loss: 1.406 Accuracy: 0.053 Validation Accuracy: 0.089:   1%|          | 211/37094 [00:22<1:07:55,  9.05it/s]

Epoch: 0 Iteration: 212 Loss: 1.621 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 211/37094 [00:22<1:07:55,  9.05it/s]

Epoch: 0 Iteration: 212 Loss: 1.621 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 213/37094 [00:22<1:05:26,  9.39it/s]

Epoch: 0 Iteration: 213 Loss: 1.647 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 213/37094 [00:22<1:05:26,  9.39it/s]

Epoch: 0 Iteration: 214 Loss: 1.628 Validation Loss: 1.406 Accuracy: 0.047 Validation Accuracy: 0.089:   1%|          | 213/37094 [00:22<1:05:26,  9.39it/s]

Epoch: 0 Iteration: 214 Loss: 1.628 Validation Loss: 1.406 Accuracy: 0.047 Validation Accuracy: 0.089:   1%|          | 215/37094 [00:22<1:03:42,  9.65it/s]

Epoch: 0 Iteration: 215 Loss: 1.640 Validation Loss: 1.406 Accuracy: 0.050 Validation Accuracy: 0.089:   1%|          | 215/37094 [00:22<1:03:42,  9.65it/s]

Epoch: 0 Iteration: 216 Loss: 1.612 Validation Loss: 1.406 Accuracy: 0.053 Validation Accuracy: 0.089:   1%|          | 215/37094 [00:22<1:03:42,  9.65it/s]

Epoch: 0 Iteration: 216 Loss: 1.612 Validation Loss: 1.406 Accuracy: 0.053 Validation Accuracy: 0.089:   1%|          | 217/37094 [00:22<1:02:29,  9.84it/s]

Epoch: 0 Iteration: 217 Loss: 1.598 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 217/37094 [00:22<1:02:29,  9.84it/s]

Epoch: 0 Iteration: 218 Loss: 1.602 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 217/37094 [00:22<1:02:29,  9.84it/s]

Epoch: 0 Iteration: 218 Loss: 1.602 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 219/37094 [00:22<1:01:37,  9.97it/s]

Epoch: 0 Iteration: 219 Loss: 1.581 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 219/37094 [00:22<1:01:37,  9.97it/s]

Epoch: 0 Iteration: 220 Loss: 1.570 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 219/37094 [00:23<1:01:37,  9.97it/s]

Epoch: 0 Iteration: 220 Loss: 1.570 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 221/37094 [00:23<1:01:01, 10.07it/s]

Epoch: 0 Iteration: 221 Loss: 1.601 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 221/37094 [00:23<1:01:01, 10.07it/s]

Epoch: 0 Iteration: 222 Loss: 1.596 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 221/37094 [00:23<1:01:01, 10.07it/s]

Epoch: 0 Iteration: 222 Loss: 1.596 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 223/37094 [00:23<1:00:36, 10.14it/s]

Epoch: 0 Iteration: 223 Loss: 1.568 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 223/37094 [00:23<1:00:36, 10.14it/s]

Epoch: 0 Iteration: 224 Loss: 1.587 Validation Loss: 1.406 Accuracy: 0.081 Validation Accuracy: 0.089:   1%|          | 223/37094 [00:23<1:00:36, 10.14it/s]

Epoch: 0 Iteration: 224 Loss: 1.587 Validation Loss: 1.406 Accuracy: 0.081 Validation Accuracy: 0.089:   1%|          | 225/37094 [00:23<1:00:18, 10.19it/s]

Epoch: 0 Iteration: 225 Loss: 1.611 Validation Loss: 1.406 Accuracy: 0.081 Validation Accuracy: 0.089:   1%|          | 225/37094 [00:23<1:00:18, 10.19it/s]

Epoch: 0 Iteration: 226 Loss: 1.540 Validation Loss: 1.406 Accuracy: 0.072 Validation Accuracy: 0.089:   1%|          | 225/37094 [00:23<1:00:18, 10.19it/s]

Epoch: 0 Iteration: 226 Loss: 1.540 Validation Loss: 1.406 Accuracy: 0.072 Validation Accuracy: 0.089:   1%|          | 227/37094 [00:23<1:00:06, 10.22it/s]

Epoch: 0 Iteration: 227 Loss: 1.580 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 227/37094 [00:23<1:00:06, 10.22it/s]

Epoch: 0 Iteration: 228 Loss: 1.556 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 227/37094 [00:23<1:00:06, 10.22it/s]

Epoch: 0 Iteration: 228 Loss: 1.556 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 229/37094 [00:23<1:00:01, 10.24it/s]

Epoch: 0 Iteration: 229 Loss: 1.566 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 229/37094 [00:23<1:00:01, 10.24it/s]

Epoch: 0 Iteration: 230 Loss: 1.582 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 229/37094 [00:24<1:00:01, 10.24it/s]

Epoch: 0 Iteration: 230 Loss: 1.582 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 231/37094 [00:24<59:50, 10.27it/s]  

Epoch: 0 Iteration: 231 Loss: 1.626 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 231/37094 [00:24<59:50, 10.27it/s]

Epoch: 0 Iteration: 232 Loss: 1.612 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 231/37094 [00:24<59:50, 10.27it/s]

Epoch: 0 Iteration: 232 Loss: 1.612 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 233/37094 [00:24<59:45, 10.28it/s]

Epoch: 0 Iteration: 233 Loss: 1.604 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 233/37094 [00:24<59:45, 10.28it/s]

Epoch: 0 Iteration: 234 Loss: 1.644 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 233/37094 [00:24<59:45, 10.28it/s]

Epoch: 0 Iteration: 234 Loss: 1.644 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 235/37094 [00:24<59:43, 10.29it/s]

Epoch: 0 Iteration: 235 Loss: 1.630 Validation Loss: 1.406 Accuracy: 0.053 Validation Accuracy: 0.089:   1%|          | 235/37094 [00:24<59:43, 10.29it/s]

Epoch: 0 Iteration: 236 Loss: 1.613 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 235/37094 [00:24<59:43, 10.29it/s]

Epoch: 0 Iteration: 236 Loss: 1.613 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 237/37094 [00:24<59:38, 10.30it/s]

Epoch: 0 Iteration: 237 Loss: 1.634 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 237/37094 [00:24<59:38, 10.30it/s]

Epoch: 0 Iteration: 238 Loss: 1.635 Validation Loss: 1.406 Accuracy: 0.050 Validation Accuracy: 0.089:   1%|          | 237/37094 [00:24<59:38, 10.30it/s]

Epoch: 0 Iteration: 238 Loss: 1.635 Validation Loss: 1.406 Accuracy: 0.050 Validation Accuracy: 0.089:   1%|          | 239/37094 [00:24<59:37, 10.30it/s]

Epoch: 0 Iteration: 239 Loss: 1.644 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 239/37094 [00:24<59:37, 10.30it/s]

Epoch: 0 Iteration: 240 Loss: 1.610 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 239/37094 [00:24<59:37, 10.30it/s]

Epoch: 0 Iteration: 240 Loss: 1.610 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 241/37094 [00:24<59:35, 10.31it/s]

Epoch: 0 Iteration: 241 Loss: 1.609 Validation Loss: 1.406 Accuracy: 0.075 Validation Accuracy: 0.089:   1%|          | 241/37094 [00:25<59:35, 10.31it/s]

Epoch: 0 Iteration: 242 Loss: 1.642 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 241/37094 [00:25<59:35, 10.31it/s]

Epoch: 0 Iteration: 242 Loss: 1.642 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 243/37094 [00:25<59:34, 10.31it/s]

Epoch: 0 Iteration: 243 Loss: 1.626 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 243/37094 [00:25<59:34, 10.31it/s]

Epoch: 0 Iteration: 244 Loss: 1.649 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 243/37094 [00:25<59:34, 10.31it/s]

Epoch: 0 Iteration: 244 Loss: 1.649 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 245/37094 [00:25<59:33, 10.31it/s]

Epoch: 0 Iteration: 245 Loss: 1.636 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 245/37094 [00:25<59:33, 10.31it/s]

Epoch: 0 Iteration: 246 Loss: 1.627 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 245/37094 [00:25<59:33, 10.31it/s]

Epoch: 0 Iteration: 246 Loss: 1.627 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 247/37094 [00:25<59:33, 10.31it/s]

Epoch: 0 Iteration: 247 Loss: 1.599 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 247/37094 [00:25<59:33, 10.31it/s]

Epoch: 0 Iteration: 248 Loss: 1.635 Validation Loss: 1.406 Accuracy: 0.072 Validation Accuracy: 0.089:   1%|          | 247/37094 [00:25<59:33, 10.31it/s]

Epoch: 0 Iteration: 248 Loss: 1.635 Validation Loss: 1.406 Accuracy: 0.072 Validation Accuracy: 0.089:   1%|          | 249/37094 [00:25<59:33, 10.31it/s]

Epoch: 0 Iteration: 249 Loss: 1.655 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 249/37094 [00:25<59:33, 10.31it/s]

Epoch: 0 Iteration: 250 Loss: 1.629 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 249/37094 [00:25<59:33, 10.31it/s]

Epoch: 0 Iteration: 250 Loss: 1.629 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 251/37094 [00:25<59:32, 10.31it/s]

Epoch: 0 Iteration: 251 Loss: 1.577 Validation Loss: 1.406 Accuracy: 0.047 Validation Accuracy: 0.089:   1%|          | 251/37094 [00:26<59:32, 10.31it/s]

Epoch: 0 Iteration: 252 Loss: 1.598 Validation Loss: 1.406 Accuracy: 0.050 Validation Accuracy: 0.089:   1%|          | 251/37094 [00:26<59:32, 10.31it/s]

Epoch: 0 Iteration: 252 Loss: 1.598 Validation Loss: 1.406 Accuracy: 0.050 Validation Accuracy: 0.089:   1%|          | 253/37094 [00:26<59:32, 10.31it/s]

Epoch: 0 Iteration: 253 Loss: 1.566 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 253/37094 [00:26<59:32, 10.31it/s]

Epoch: 0 Iteration: 254 Loss: 1.551 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 253/37094 [00:26<59:32, 10.31it/s]

Epoch: 0 Iteration: 254 Loss: 1.551 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 255/37094 [00:26<59:31, 10.31it/s]

Epoch: 0 Iteration: 255 Loss: 1.586 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 255/37094 [00:26<59:31, 10.31it/s]

Epoch: 0 Iteration: 256 Loss: 1.576 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 255/37094 [00:26<59:31, 10.31it/s]

Epoch: 0 Iteration: 256 Loss: 1.576 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 257/37094 [00:26<59:31, 10.31it/s]

Epoch: 0 Iteration: 257 Loss: 1.557 Validation Loss: 1.406 Accuracy: 0.050 Validation Accuracy: 0.089:   1%|          | 257/37094 [00:26<59:31, 10.31it/s]

Epoch: 0 Iteration: 258 Loss: 1.573 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 257/37094 [00:26<59:31, 10.31it/s]

Epoch: 0 Iteration: 258 Loss: 1.573 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 259/37094 [00:26<59:32, 10.31it/s]

Epoch: 0 Iteration: 259 Loss: 1.600 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 259/37094 [00:26<59:32, 10.31it/s]

Epoch: 0 Iteration: 260 Loss: 1.670 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 259/37094 [00:26<59:32, 10.31it/s]

Epoch: 0 Iteration: 260 Loss: 1.670 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 261/37094 [00:26<59:31, 10.31it/s]

Epoch: 0 Iteration: 261 Loss: 1.660 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 261/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 262 Loss: 1.606 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 261/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 262 Loss: 1.606 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 263/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 263 Loss: 1.593 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 263/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 264 Loss: 1.577 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 263/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 264 Loss: 1.577 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 265/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 265 Loss: 1.575 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 265/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 266 Loss: 1.579 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 265/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 266 Loss: 1.579 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 267/37094 [00:27<59:32, 10.31it/s]

Epoch: 0 Iteration: 267 Loss: 1.565 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 267/37094 [00:27<59:32, 10.31it/s]

Epoch: 0 Iteration: 268 Loss: 1.572 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 267/37094 [00:27<59:32, 10.31it/s]

Epoch: 0 Iteration: 268 Loss: 1.572 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 269/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 269 Loss: 1.580 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 269/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 270 Loss: 1.564 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 269/37094 [00:27<59:31, 10.31it/s]

Epoch: 0 Iteration: 270 Loss: 1.564 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 271/37094 [00:27<59:30, 10.31it/s]

Epoch: 0 Iteration: 271 Loss: 1.587 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 271/37094 [00:27<59:30, 10.31it/s]

Epoch: 0 Iteration: 272 Loss: 1.570 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 271/37094 [00:28<59:30, 10.31it/s]

Epoch: 0 Iteration: 272 Loss: 1.570 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 273/37094 [00:28<59:30, 10.31it/s]

Epoch: 0 Iteration: 273 Loss: 1.634 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 273/37094 [00:28<59:30, 10.31it/s]

Epoch: 0 Iteration: 274 Loss: 1.641 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 273/37094 [00:28<59:30, 10.31it/s]

Epoch: 0 Iteration: 274 Loss: 1.641 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 275/37094 [00:28<59:29, 10.32it/s]

Epoch: 0 Iteration: 275 Loss: 1.625 Validation Loss: 1.406 Accuracy: 0.062 Validation Accuracy: 0.089:   1%|          | 275/37094 [00:28<59:29, 10.32it/s]

Epoch: 0 Iteration: 276 Loss: 1.674 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 275/37094 [00:28<59:29, 10.32it/s]

Epoch: 0 Iteration: 276 Loss: 1.674 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 277/37094 [00:28<59:29, 10.31it/s]

Epoch: 0 Iteration: 277 Loss: 1.703 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 277/37094 [00:28<59:29, 10.31it/s]

Epoch: 0 Iteration: 278 Loss: 1.699 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 277/37094 [00:28<59:29, 10.31it/s]

Epoch: 0 Iteration: 278 Loss: 1.699 Validation Loss: 1.406 Accuracy: 0.066 Validation Accuracy: 0.089:   1%|          | 279/37094 [00:28<59:30, 10.31it/s]

Epoch: 0 Iteration: 279 Loss: 1.671 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 279/37094 [00:28<59:30, 10.31it/s]

Epoch: 0 Iteration: 280 Loss: 1.608 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 279/37094 [00:28<59:30, 10.31it/s]

Epoch: 0 Iteration: 280 Loss: 1.608 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 281/37094 [00:28<59:31, 10.31it/s]

Epoch: 0 Iteration: 281 Loss: 1.585 Validation Loss: 1.406 Accuracy: 0.056 Validation Accuracy: 0.089:   1%|          | 281/37094 [00:28<59:31, 10.31it/s]

Epoch: 0 Iteration: 282 Loss: 1.624 Validation Loss: 1.406 Accuracy: 0.047 Validation Accuracy: 0.089:   1%|          | 281/37094 [00:29<59:31, 10.31it/s]

Epoch: 0 Iteration: 282 Loss: 1.624 Validation Loss: 1.406 Accuracy: 0.047 Validation Accuracy: 0.089:   1%|          | 283/37094 [00:29<59:30, 10.31it/s]

Epoch: 0 Iteration: 283 Loss: 1.655 Validation Loss: 1.406 Accuracy: 0.047 Validation Accuracy: 0.089:   1%|          | 283/37094 [00:29<59:30, 10.31it/s]

Epoch: 0 Iteration: 284 Loss: 1.667 Validation Loss: 1.406 Accuracy: 0.041 Validation Accuracy: 0.089:   1%|          | 283/37094 [00:29<59:30, 10.31it/s]

Epoch: 0 Iteration: 284 Loss: 1.667 Validation Loss: 1.406 Accuracy: 0.041 Validation Accuracy: 0.089:   1%|          | 285/37094 [00:29<59:30, 10.31it/s]

Epoch: 0 Iteration: 285 Loss: 1.685 Validation Loss: 1.406 Accuracy: 0.044 Validation Accuracy: 0.089:   1%|          | 285/37094 [00:29<59:30, 10.31it/s]

Epoch: 0 Iteration: 286 Loss: 1.707 Validation Loss: 1.406 Accuracy: 0.044 Validation Accuracy: 0.089:   1%|          | 285/37094 [00:29<59:30, 10.31it/s]

Epoch: 0 Iteration: 286 Loss: 1.707 Validation Loss: 1.406 Accuracy: 0.044 Validation Accuracy: 0.089:   1%|          | 287/37094 [00:29<59:29, 10.31it/s]

Epoch: 0 Iteration: 287 Loss: 1.696 Validation Loss: 1.406 Accuracy: 0.034 Validation Accuracy: 0.089:   1%|          | 287/37094 [00:29<59:29, 10.31it/s]

Epoch: 0 Iteration: 288 Loss: 1.681 Validation Loss: 1.406 Accuracy: 0.047 Validation Accuracy: 0.089:   1%|          | 287/37094 [00:29<59:29, 10.31it/s]

Epoch: 0 Iteration: 288 Loss: 1.681 Validation Loss: 1.406 Accuracy: 0.047 Validation Accuracy: 0.089:   1%|          | 289/37094 [00:29<59:28, 10.31it/s]

Epoch: 0 Iteration: 289 Loss: 1.670 Validation Loss: 1.406 Accuracy: 0.053 Validation Accuracy: 0.089:   1%|          | 289/37094 [00:29<59:28, 10.31it/s]

Epoch: 0 Iteration: 290 Loss: 1.657 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 289/37094 [00:29<59:28, 10.31it/s]

Epoch: 0 Iteration: 290 Loss: 1.657 Validation Loss: 1.406 Accuracy: 0.059 Validation Accuracy: 0.089:   1%|          | 291/37094 [00:29<59:31, 10.30it/s]

Epoch: 0 Iteration: 291 Loss: 1.677 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 291/37094 [00:29<59:31, 10.30it/s]

Epoch: 0 Iteration: 292 Loss: 1.735 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 291/37094 [00:30<59:31, 10.30it/s]

Epoch: 0 Iteration: 292 Loss: 1.735 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 293/37094 [00:30<59:28, 10.31it/s]

Epoch: 0 Iteration: 293 Loss: 1.730 Validation Loss: 1.406 Accuracy: 0.075 Validation Accuracy: 0.089:   1%|          | 293/37094 [00:30<59:28, 10.31it/s]

Epoch: 0 Iteration: 294 Loss: 1.715 Validation Loss: 1.406 Accuracy: 0.075 Validation Accuracy: 0.089:   1%|          | 293/37094 [00:30<59:28, 10.31it/s]

Epoch: 0 Iteration: 294 Loss: 1.715 Validation Loss: 1.406 Accuracy: 0.075 Validation Accuracy: 0.089:   1%|          | 295/37094 [00:30<59:28, 10.31it/s]

Epoch: 0 Iteration: 295 Loss: 1.688 Validation Loss: 1.406 Accuracy: 0.075 Validation Accuracy: 0.089:   1%|          | 295/37094 [00:30<59:28, 10.31it/s]

Epoch: 0 Iteration: 296 Loss: 1.650 Validation Loss: 1.406 Accuracy: 0.078 Validation Accuracy: 0.089:   1%|          | 295/37094 [00:30<59:28, 10.31it/s]

Epoch: 0 Iteration: 296 Loss: 1.650 Validation Loss: 1.406 Accuracy: 0.078 Validation Accuracy: 0.089:   1%|          | 297/37094 [00:30<59:27, 10.31it/s]

Epoch: 0 Iteration: 297 Loss: 1.646 Validation Loss: 1.406 Accuracy: 0.081 Validation Accuracy: 0.089:   1%|          | 297/37094 [00:30<59:27, 10.31it/s]

Epoch: 0 Iteration: 298 Loss: 1.623 Validation Loss: 1.406 Accuracy: 0.072 Validation Accuracy: 0.089:   1%|          | 297/37094 [00:30<59:27, 10.31it/s]

Epoch: 0 Iteration: 298 Loss: 1.623 Validation Loss: 1.406 Accuracy: 0.072 Validation Accuracy: 0.089:   1%|          | 299/37094 [00:30<59:27, 10.31it/s]

Epoch: 0 Iteration: 299 Loss: 1.646 Validation Loss: 1.406 Accuracy: 0.069 Validation Accuracy: 0.089:   1%|          | 299/37094 [00:30<59:27, 10.31it/s]

Epoch: 0 Iteration: 300 Loss: 1.655 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 299/37094 [00:31<59:27, 10.31it/s]

Epoch: 0 Iteration: 300 Loss: 1.655 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 301/37094 [00:31<1:49:04,  5.62it/s]

Epoch: 0 Iteration: 301 Loss: 1.703 Validation Loss: 1.370 Accuracy: 0.050 Validation Accuracy: 0.094:   1%|          | 301/37094 [00:31<1:49:04,  5.62it/s]

Epoch: 0 Iteration: 302 Loss: 1.712 Validation Loss: 1.370 Accuracy: 0.050 Validation Accuracy: 0.094:   1%|          | 301/37094 [00:31<1:49:04,  5.62it/s]

Epoch: 0 Iteration: 302 Loss: 1.712 Validation Loss: 1.370 Accuracy: 0.050 Validation Accuracy: 0.094:   1%|          | 303/37094 [00:31<1:33:26,  6.56it/s]

Epoch: 0 Iteration: 303 Loss: 1.693 Validation Loss: 1.370 Accuracy: 0.047 Validation Accuracy: 0.094:   1%|          | 303/37094 [00:31<1:33:26,  6.56it/s]

Epoch: 0 Iteration: 304 Loss: 1.662 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 303/37094 [00:31<1:33:26,  6.56it/s]

Epoch: 0 Iteration: 304 Loss: 1.662 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 305/37094 [00:31<1:23:16,  7.36it/s]

Epoch: 0 Iteration: 305 Loss: 1.655 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 305/37094 [00:31<1:23:16,  7.36it/s]

Epoch: 0 Iteration: 306 Loss: 1.617 Validation Loss: 1.370 Accuracy: 0.056 Validation Accuracy: 0.094:   1%|          | 305/37094 [00:31<1:23:16,  7.36it/s]

Epoch: 0 Iteration: 306 Loss: 1.617 Validation Loss: 1.370 Accuracy: 0.056 Validation Accuracy: 0.094:   1%|          | 307/37094 [00:31<1:16:08,  8.05it/s]

Epoch: 0 Iteration: 307 Loss: 1.655 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 307/37094 [00:31<1:16:08,  8.05it/s]

Epoch: 0 Iteration: 308 Loss: 1.686 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 307/37094 [00:32<1:16:08,  8.05it/s]

Epoch: 0 Iteration: 308 Loss: 1.686 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 309/37094 [00:32<1:11:07,  8.62it/s]

Epoch: 0 Iteration: 309 Loss: 1.658 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 309/37094 [00:32<1:11:07,  8.62it/s]

Epoch: 0 Iteration: 310 Loss: 1.681 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 309/37094 [00:32<1:11:07,  8.62it/s]

Epoch: 0 Iteration: 310 Loss: 1.681 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 311/37094 [00:32<1:07:36,  9.07it/s]

Epoch: 0 Iteration: 311 Loss: 1.659 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 311/37094 [00:32<1:07:36,  9.07it/s]

Epoch: 0 Iteration: 312 Loss: 1.617 Validation Loss: 1.370 Accuracy: 0.072 Validation Accuracy: 0.094:   1%|          | 311/37094 [00:32<1:07:36,  9.07it/s]

Epoch: 0 Iteration: 312 Loss: 1.617 Validation Loss: 1.370 Accuracy: 0.072 Validation Accuracy: 0.094:   1%|          | 313/37094 [00:32<1:05:10,  9.41it/s]

Epoch: 0 Iteration: 313 Loss: 1.585 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 313/37094 [00:32<1:05:10,  9.41it/s]

Epoch: 0 Iteration: 314 Loss: 1.569 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 313/37094 [00:32<1:05:10,  9.41it/s]

Epoch: 0 Iteration: 314 Loss: 1.569 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 315/37094 [00:32<1:03:30,  9.65it/s]

Epoch: 0 Iteration: 315 Loss: 1.606 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 315/37094 [00:32<1:03:30,  9.65it/s]

Epoch: 0 Iteration: 316 Loss: 1.627 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 315/37094 [00:32<1:03:30,  9.65it/s]

Epoch: 0 Iteration: 316 Loss: 1.627 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 317/37094 [00:32<1:02:15,  9.85it/s]

Epoch: 0 Iteration: 317 Loss: 1.653 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 317/37094 [00:32<1:02:15,  9.85it/s]

Epoch: 0 Iteration: 318 Loss: 1.662 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 317/37094 [00:33<1:02:15,  9.85it/s]

Epoch: 0 Iteration: 318 Loss: 1.662 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 319/37094 [00:33<1:01:24,  9.98it/s]

Epoch: 0 Iteration: 319 Loss: 1.624 Validation Loss: 1.370 Accuracy: 0.056 Validation Accuracy: 0.094:   1%|          | 319/37094 [00:33<1:01:24,  9.98it/s]

Epoch: 0 Iteration: 320 Loss: 1.653 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 319/37094 [00:33<1:01:24,  9.98it/s]

Epoch: 0 Iteration: 320 Loss: 1.653 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 321/37094 [00:33<1:00:48, 10.08it/s]

Epoch: 0 Iteration: 321 Loss: 1.626 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 321/37094 [00:33<1:00:48, 10.08it/s]

Epoch: 0 Iteration: 322 Loss: 1.566 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 321/37094 [00:33<1:00:48, 10.08it/s]

Epoch: 0 Iteration: 322 Loss: 1.566 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 323/37094 [00:33<1:00:23, 10.15it/s]

Epoch: 0 Iteration: 323 Loss: 1.600 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 323/37094 [00:33<1:00:23, 10.15it/s]

Epoch: 0 Iteration: 324 Loss: 1.662 Validation Loss: 1.370 Accuracy: 0.069 Validation Accuracy: 0.094:   1%|          | 323/37094 [00:33<1:00:23, 10.15it/s]

Epoch: 0 Iteration: 324 Loss: 1.662 Validation Loss: 1.370 Accuracy: 0.069 Validation Accuracy: 0.094:   1%|          | 325/37094 [00:33<1:00:05, 10.20it/s]

Epoch: 0 Iteration: 325 Loss: 1.635 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 325/37094 [00:33<1:00:05, 10.20it/s]

Epoch: 0 Iteration: 326 Loss: 1.670 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 325/37094 [00:33<1:00:05, 10.20it/s]

Epoch: 0 Iteration: 326 Loss: 1.670 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 327/37094 [00:33<59:53, 10.23it/s]  

Epoch: 0 Iteration: 327 Loss: 1.652 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 327/37094 [00:33<59:53, 10.23it/s]

Epoch: 0 Iteration: 328 Loss: 1.598 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 327/37094 [00:34<59:53, 10.23it/s]

Epoch: 0 Iteration: 328 Loss: 1.598 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 329/37094 [00:34<59:44, 10.26it/s]

Epoch: 0 Iteration: 329 Loss: 1.587 Validation Loss: 1.370 Accuracy: 0.078 Validation Accuracy: 0.094:   1%|          | 329/37094 [00:34<59:44, 10.26it/s]

Epoch: 0 Iteration: 330 Loss: 1.560 Validation Loss: 1.370 Accuracy: 0.078 Validation Accuracy: 0.094:   1%|          | 329/37094 [00:34<59:44, 10.26it/s]

Epoch: 0 Iteration: 330 Loss: 1.560 Validation Loss: 1.370 Accuracy: 0.078 Validation Accuracy: 0.094:   1%|          | 331/37094 [00:34<59:38, 10.27it/s]

Epoch: 0 Iteration: 331 Loss: 1.542 Validation Loss: 1.370 Accuracy: 0.078 Validation Accuracy: 0.094:   1%|          | 331/37094 [00:34<59:38, 10.27it/s]

Epoch: 0 Iteration: 332 Loss: 1.543 Validation Loss: 1.370 Accuracy: 0.069 Validation Accuracy: 0.094:   1%|          | 331/37094 [00:34<59:38, 10.27it/s]

Epoch: 0 Iteration: 332 Loss: 1.543 Validation Loss: 1.370 Accuracy: 0.069 Validation Accuracy: 0.094:   1%|          | 333/37094 [00:34<59:34, 10.28it/s]

Epoch: 0 Iteration: 333 Loss: 1.554 Validation Loss: 1.370 Accuracy: 0.075 Validation Accuracy: 0.094:   1%|          | 333/37094 [00:34<59:34, 10.28it/s]

Epoch: 0 Iteration: 334 Loss: 1.598 Validation Loss: 1.370 Accuracy: 0.078 Validation Accuracy: 0.094:   1%|          | 333/37094 [00:34<59:34, 10.28it/s]

Epoch: 0 Iteration: 334 Loss: 1.598 Validation Loss: 1.370 Accuracy: 0.078 Validation Accuracy: 0.094:   1%|          | 335/37094 [00:34<59:31, 10.29it/s]

Epoch: 0 Iteration: 335 Loss: 1.562 Validation Loss: 1.370 Accuracy: 0.081 Validation Accuracy: 0.094:   1%|          | 335/37094 [00:34<59:31, 10.29it/s]

Epoch: 0 Iteration: 336 Loss: 1.539 Validation Loss: 1.370 Accuracy: 0.091 Validation Accuracy: 0.094:   1%|          | 335/37094 [00:34<59:31, 10.29it/s]

Epoch: 0 Iteration: 336 Loss: 1.539 Validation Loss: 1.370 Accuracy: 0.091 Validation Accuracy: 0.094:   1%|          | 337/37094 [00:34<59:29, 10.30it/s]

Epoch: 0 Iteration: 337 Loss: 1.510 Validation Loss: 1.370 Accuracy: 0.094 Validation Accuracy: 0.094:   1%|          | 337/37094 [00:34<59:29, 10.30it/s]

Epoch: 0 Iteration: 338 Loss: 1.557 Validation Loss: 1.370 Accuracy: 0.091 Validation Accuracy: 0.094:   1%|          | 337/37094 [00:35<59:29, 10.30it/s]

Epoch: 0 Iteration: 338 Loss: 1.557 Validation Loss: 1.370 Accuracy: 0.091 Validation Accuracy: 0.094:   1%|          | 339/37094 [00:35<59:29, 10.30it/s]

Epoch: 0 Iteration: 339 Loss: 1.605 Validation Loss: 1.370 Accuracy: 0.075 Validation Accuracy: 0.094:   1%|          | 339/37094 [00:35<59:29, 10.30it/s]

Epoch: 0 Iteration: 340 Loss: 1.573 Validation Loss: 1.370 Accuracy: 0.081 Validation Accuracy: 0.094:   1%|          | 339/37094 [00:35<59:29, 10.30it/s]

Epoch: 0 Iteration: 340 Loss: 1.573 Validation Loss: 1.370 Accuracy: 0.081 Validation Accuracy: 0.094:   1%|          | 341/37094 [00:35<59:27, 10.30it/s]

Epoch: 0 Iteration: 341 Loss: 1.592 Validation Loss: 1.370 Accuracy: 0.075 Validation Accuracy: 0.094:   1%|          | 341/37094 [00:35<59:27, 10.30it/s]

Epoch: 0 Iteration: 342 Loss: 1.625 Validation Loss: 1.370 Accuracy: 0.072 Validation Accuracy: 0.094:   1%|          | 341/37094 [00:35<59:27, 10.30it/s]

Epoch: 0 Iteration: 342 Loss: 1.625 Validation Loss: 1.370 Accuracy: 0.072 Validation Accuracy: 0.094:   1%|          | 343/37094 [00:35<59:27, 10.30it/s]

Epoch: 0 Iteration: 343 Loss: 1.576 Validation Loss: 1.370 Accuracy: 0.072 Validation Accuracy: 0.094:   1%|          | 343/37094 [00:35<59:27, 10.30it/s]

Epoch: 0 Iteration: 344 Loss: 1.561 Validation Loss: 1.370 Accuracy: 0.069 Validation Accuracy: 0.094:   1%|          | 343/37094 [00:35<59:27, 10.30it/s]

Epoch: 0 Iteration: 344 Loss: 1.561 Validation Loss: 1.370 Accuracy: 0.069 Validation Accuracy: 0.094:   1%|          | 345/37094 [00:35<59:27, 10.30it/s]

Epoch: 0 Iteration: 345 Loss: 1.562 Validation Loss: 1.370 Accuracy: 0.072 Validation Accuracy: 0.094:   1%|          | 345/37094 [00:35<59:27, 10.30it/s]

Epoch: 0 Iteration: 346 Loss: 1.570 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 345/37094 [00:35<59:27, 10.30it/s]

Epoch: 0 Iteration: 346 Loss: 1.570 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 347/37094 [00:35<59:29, 10.29it/s]

Epoch: 0 Iteration: 347 Loss: 1.572 Validation Loss: 1.370 Accuracy: 0.047 Validation Accuracy: 0.094:   1%|          | 347/37094 [00:35<59:29, 10.29it/s]

Epoch: 0 Iteration: 348 Loss: 1.584 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 347/37094 [00:35<59:29, 10.29it/s]

Epoch: 0 Iteration: 348 Loss: 1.584 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 349/37094 [00:35<59:27, 10.30it/s]

Epoch: 0 Iteration: 349 Loss: 1.597 Validation Loss: 1.370 Accuracy: 0.056 Validation Accuracy: 0.094:   1%|          | 349/37094 [00:36<59:27, 10.30it/s]

Epoch: 0 Iteration: 350 Loss: 1.591 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 349/37094 [00:36<59:27, 10.30it/s]

Epoch: 0 Iteration: 350 Loss: 1.591 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 351/37094 [00:36<59:25, 10.31it/s]

Epoch: 0 Iteration: 351 Loss: 1.618 Validation Loss: 1.370 Accuracy: 0.056 Validation Accuracy: 0.094:   1%|          | 351/37094 [00:36<59:25, 10.31it/s]

Epoch: 0 Iteration: 352 Loss: 1.651 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 351/37094 [00:36<59:25, 10.31it/s]

Epoch: 0 Iteration: 352 Loss: 1.651 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 353/37094 [00:36<59:23, 10.31it/s]

Epoch: 0 Iteration: 353 Loss: 1.659 Validation Loss: 1.370 Accuracy: 0.050 Validation Accuracy: 0.094:   1%|          | 353/37094 [00:36<59:23, 10.31it/s]

Epoch: 0 Iteration: 354 Loss: 1.612 Validation Loss: 1.370 Accuracy: 0.047 Validation Accuracy: 0.094:   1%|          | 353/37094 [00:36<59:23, 10.31it/s]

Epoch: 0 Iteration: 354 Loss: 1.612 Validation Loss: 1.370 Accuracy: 0.047 Validation Accuracy: 0.094:   1%|          | 355/37094 [00:36<59:23, 10.31it/s]

Epoch: 0 Iteration: 355 Loss: 1.633 Validation Loss: 1.370 Accuracy: 0.037 Validation Accuracy: 0.094:   1%|          | 355/37094 [00:36<59:23, 10.31it/s]

Epoch: 0 Iteration: 356 Loss: 1.626 Validation Loss: 1.370 Accuracy: 0.037 Validation Accuracy: 0.094:   1%|          | 355/37094 [00:36<59:23, 10.31it/s]

Epoch: 0 Iteration: 356 Loss: 1.626 Validation Loss: 1.370 Accuracy: 0.037 Validation Accuracy: 0.094:   1%|          | 357/37094 [00:36<59:23, 10.31it/s]

Epoch: 0 Iteration: 357 Loss: 1.608 Validation Loss: 1.370 Accuracy: 0.037 Validation Accuracy: 0.094:   1%|          | 357/37094 [00:36<59:23, 10.31it/s]

Epoch: 0 Iteration: 358 Loss: 1.569 Validation Loss: 1.370 Accuracy: 0.034 Validation Accuracy: 0.094:   1%|          | 357/37094 [00:36<59:23, 10.31it/s]

Epoch: 0 Iteration: 358 Loss: 1.569 Validation Loss: 1.370 Accuracy: 0.034 Validation Accuracy: 0.094:   1%|          | 359/37094 [00:36<59:24, 10.31it/s]

Epoch: 0 Iteration: 359 Loss: 1.561 Validation Loss: 1.370 Accuracy: 0.037 Validation Accuracy: 0.094:   1%|          | 359/37094 [00:37<59:24, 10.31it/s]

Epoch: 0 Iteration: 360 Loss: 1.566 Validation Loss: 1.370 Accuracy: 0.034 Validation Accuracy: 0.094:   1%|          | 359/37094 [00:37<59:24, 10.31it/s]

Epoch: 0 Iteration: 360 Loss: 1.566 Validation Loss: 1.370 Accuracy: 0.034 Validation Accuracy: 0.094:   1%|          | 361/37094 [00:37<59:24, 10.31it/s]

Epoch: 0 Iteration: 361 Loss: 1.601 Validation Loss: 1.370 Accuracy: 0.037 Validation Accuracy: 0.094:   1%|          | 361/37094 [00:37<59:24, 10.31it/s]

Epoch: 0 Iteration: 362 Loss: 1.592 Validation Loss: 1.370 Accuracy: 0.047 Validation Accuracy: 0.094:   1%|          | 361/37094 [00:37<59:24, 10.31it/s]

Epoch: 0 Iteration: 362 Loss: 1.592 Validation Loss: 1.370 Accuracy: 0.047 Validation Accuracy: 0.094:   1%|          | 363/37094 [00:37<59:24, 10.31it/s]

Epoch: 0 Iteration: 363 Loss: 1.581 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 363/37094 [00:37<59:24, 10.31it/s]

Epoch: 0 Iteration: 364 Loss: 1.585 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 363/37094 [00:37<59:24, 10.31it/s]

Epoch: 0 Iteration: 364 Loss: 1.585 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 365/37094 [00:37<59:23, 10.31it/s]

Epoch: 0 Iteration: 365 Loss: 1.629 Validation Loss: 1.370 Accuracy: 0.050 Validation Accuracy: 0.094:   1%|          | 365/37094 [00:37<59:23, 10.31it/s]

Epoch: 0 Iteration: 366 Loss: 1.600 Validation Loss: 1.370 Accuracy: 0.050 Validation Accuracy: 0.094:   1%|          | 365/37094 [00:37<59:23, 10.31it/s]

Epoch: 0 Iteration: 366 Loss: 1.600 Validation Loss: 1.370 Accuracy: 0.050 Validation Accuracy: 0.094:   1%|          | 367/37094 [00:37<59:24, 10.30it/s]

Epoch: 0 Iteration: 367 Loss: 1.607 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 367/37094 [00:37<59:24, 10.30it/s]

Epoch: 0 Iteration: 368 Loss: 1.597 Validation Loss: 1.370 Accuracy: 0.056 Validation Accuracy: 0.094:   1%|          | 367/37094 [00:37<59:24, 10.30it/s]

Epoch: 0 Iteration: 368 Loss: 1.597 Validation Loss: 1.370 Accuracy: 0.056 Validation Accuracy: 0.094:   1%|          | 369/37094 [00:37<59:23, 10.31it/s]

Epoch: 0 Iteration: 369 Loss: 1.575 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 369/37094 [00:38<59:23, 10.31it/s]

Epoch: 0 Iteration: 370 Loss: 1.599 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 369/37094 [00:38<59:23, 10.31it/s]

Epoch: 0 Iteration: 370 Loss: 1.599 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 371/37094 [00:38<59:22, 10.31it/s]

Epoch: 0 Iteration: 371 Loss: 1.617 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 371/37094 [00:38<59:22, 10.31it/s]

Epoch: 0 Iteration: 372 Loss: 1.589 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 371/37094 [00:38<59:22, 10.31it/s]

Epoch: 0 Iteration: 372 Loss: 1.589 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 373/37094 [00:38<59:22, 10.31it/s]

Epoch: 0 Iteration: 373 Loss: 1.599 Validation Loss: 1.370 Accuracy: 0.056 Validation Accuracy: 0.094:   1%|          | 373/37094 [00:38<59:22, 10.31it/s]

Epoch: 0 Iteration: 374 Loss: 1.621 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 373/37094 [00:38<59:22, 10.31it/s]

Epoch: 0 Iteration: 374 Loss: 1.621 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 375/37094 [00:38<59:21, 10.31it/s]

Epoch: 0 Iteration: 375 Loss: 1.641 Validation Loss: 1.370 Accuracy: 0.069 Validation Accuracy: 0.094:   1%|          | 375/37094 [00:38<59:21, 10.31it/s]

Epoch: 0 Iteration: 376 Loss: 1.653 Validation Loss: 1.370 Accuracy: 0.075 Validation Accuracy: 0.094:   1%|          | 375/37094 [00:38<59:21, 10.31it/s]

Epoch: 0 Iteration: 376 Loss: 1.653 Validation Loss: 1.370 Accuracy: 0.075 Validation Accuracy: 0.094:   1%|          | 377/37094 [00:38<59:22, 10.31it/s]

Epoch: 0 Iteration: 377 Loss: 1.682 Validation Loss: 1.370 Accuracy: 0.075 Validation Accuracy: 0.094:   1%|          | 377/37094 [00:38<59:22, 10.31it/s]

Epoch: 0 Iteration: 378 Loss: 1.696 Validation Loss: 1.370 Accuracy: 0.069 Validation Accuracy: 0.094:   1%|          | 377/37094 [00:38<59:22, 10.31it/s]

Epoch: 0 Iteration: 378 Loss: 1.696 Validation Loss: 1.370 Accuracy: 0.069 Validation Accuracy: 0.094:   1%|          | 379/37094 [00:38<59:21, 10.31it/s]

Epoch: 0 Iteration: 379 Loss: 1.631 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 379/37094 [00:38<59:21, 10.31it/s]

Epoch: 0 Iteration: 380 Loss: 1.649 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 379/37094 [00:39<59:21, 10.31it/s]

Epoch: 0 Iteration: 380 Loss: 1.649 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 381/37094 [00:39<59:20, 10.31it/s]

Epoch: 0 Iteration: 381 Loss: 1.613 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 381/37094 [00:39<59:20, 10.31it/s]

Epoch: 0 Iteration: 382 Loss: 1.634 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 381/37094 [00:39<59:20, 10.31it/s]

Epoch: 0 Iteration: 382 Loss: 1.634 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 383/37094 [00:39<59:20, 10.31it/s]

Epoch: 0 Iteration: 383 Loss: 1.647 Validation Loss: 1.370 Accuracy: 0.062 Validation Accuracy: 0.094:   1%|          | 383/37094 [00:39<59:20, 10.31it/s]

Epoch: 0 Iteration: 384 Loss: 1.633 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 383/37094 [00:39<59:20, 10.31it/s]

Epoch: 0 Iteration: 384 Loss: 1.633 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 385/37094 [00:39<59:19, 10.31it/s]

Epoch: 0 Iteration: 385 Loss: 1.608 Validation Loss: 1.370 Accuracy: 0.053 Validation Accuracy: 0.094:   1%|          | 385/37094 [00:39<59:19, 10.31it/s]

Epoch: 0 Iteration: 386 Loss: 1.587 Validation Loss: 1.370 Accuracy: 0.047 Validation Accuracy: 0.094:   1%|          | 385/37094 [00:39<59:19, 10.31it/s]

Epoch: 0 Iteration: 386 Loss: 1.587 Validation Loss: 1.370 Accuracy: 0.047 Validation Accuracy: 0.094:   1%|          | 387/37094 [00:39<59:19, 10.31it/s]

Epoch: 0 Iteration: 387 Loss: 1.552 Validation Loss: 1.370 Accuracy: 0.056 Validation Accuracy: 0.094:   1%|          | 387/37094 [00:39<59:19, 10.31it/s]

Epoch: 0 Iteration: 388 Loss: 1.554 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 387/37094 [00:39<59:19, 10.31it/s]

Epoch: 0 Iteration: 388 Loss: 1.554 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 389/37094 [00:39<59:21, 10.31it/s]

Epoch: 0 Iteration: 389 Loss: 1.572 Validation Loss: 1.370 Accuracy: 0.059 Validation Accuracy: 0.094:   1%|          | 389/37094 [00:39<59:21, 10.31it/s]

Epoch: 0 Iteration: 390 Loss: 1.577 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 389/37094 [00:40<59:21, 10.31it/s]

Epoch: 0 Iteration: 390 Loss: 1.577 Validation Loss: 1.370 Accuracy: 0.066 Validation Accuracy: 0.094:   1%|          | 391/37094 [00:40<59:19, 10.31it/s]

Epoch: 0 Iteration: 391 Loss: 1.539 Validation Loss: 1.370 Accuracy: 0.072 Validation Accuracy: 0.094:   1%|          | 391/37094 [00:40<59:19, 10.31it/s]

Epoch: 0 Iteration: 392 Loss: 1.515 Validation Loss: 1.370 Accuracy: 0.078 Validation Accuracy: 0.094:   1%|          | 391/37094 [00:40<59:19, 10.31it/s]

Epoch: 0 Iteration: 392 Loss: 1.515 Validation Loss: 1.370 Accuracy: 0.078 Validation Accuracy: 0.094:   1%|          | 393/37094 [00:40<59:20, 10.31it/s]

Epoch: 0 Iteration: 393 Loss: 1.533 Validation Loss: 1.370 Accuracy: 0.081 Validation Accuracy: 0.094:   1%|          | 393/37094 [00:40<59:20, 10.31it/s]

Epoch: 0 Iteration: 394 Loss: 1.538 Validation Loss: 1.370 Accuracy: 0.084 Validation Accuracy: 0.094:   1%|          | 393/37094 [00:40<59:20, 10.31it/s]

Epoch: 0 Iteration: 394 Loss: 1.538 Validation Loss: 1.370 Accuracy: 0.084 Validation Accuracy: 0.094:   1%|          | 395/37094 [00:40<59:20, 10.31it/s]

Epoch: 0 Iteration: 395 Loss: 1.504 Validation Loss: 1.370 Accuracy: 0.091 Validation Accuracy: 0.094:   1%|          | 395/37094 [00:40<59:20, 10.31it/s]

Epoch: 0 Iteration: 396 Loss: 1.521 Validation Loss: 1.370 Accuracy: 0.091 Validation Accuracy: 0.094:   1%|          | 395/37094 [00:40<59:20, 10.31it/s]

Epoch: 0 Iteration: 396 Loss: 1.521 Validation Loss: 1.370 Accuracy: 0.091 Validation Accuracy: 0.094:   1%|          | 397/37094 [00:40<59:18, 10.31it/s]

Epoch: 0 Iteration: 397 Loss: 1.486 Validation Loss: 1.370 Accuracy: 0.081 Validation Accuracy: 0.094:   1%|          | 397/37094 [00:40<59:18, 10.31it/s]

Epoch: 0 Iteration: 398 Loss: 1.503 Validation Loss: 1.370 Accuracy: 0.075 Validation Accuracy: 0.094:   1%|          | 397/37094 [00:40<59:18, 10.31it/s]

Epoch: 0 Iteration: 398 Loss: 1.503 Validation Loss: 1.370 Accuracy: 0.075 Validation Accuracy: 0.094:   1%|          | 399/37094 [00:40<59:18, 10.31it/s]

Epoch: 0 Iteration: 399 Loss: 1.526 Validation Loss: 1.370 Accuracy: 0.075 Validation Accuracy: 0.094:   1%|          | 399/37094 [00:40<59:18, 10.31it/s]

Epoch: 0 Iteration: 400 Loss: 1.513 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 399/37094 [00:41<59:18, 10.31it/s]

Epoch: 0 Iteration: 400 Loss: 1.513 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 401/37094 [00:41<1:48:50,  5.62it/s]

Epoch: 0 Iteration: 401 Loss: 1.526 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 401/37094 [00:41<1:48:50,  5.62it/s]

Epoch: 0 Iteration: 402 Loss: 1.527 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 401/37094 [00:41<1:48:50,  5.62it/s]

Epoch: 0 Iteration: 402 Loss: 1.527 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 403/37094 [00:41<1:33:16,  6.56it/s]

Epoch: 0 Iteration: 403 Loss: 1.534 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 403/37094 [00:41<1:33:16,  6.56it/s]

Epoch: 0 Iteration: 404 Loss: 1.533 Validation Loss: 1.419 Accuracy: 0.056 Validation Accuracy: 0.096:   1%|          | 403/37094 [00:41<1:33:16,  6.56it/s]

Epoch: 0 Iteration: 404 Loss: 1.533 Validation Loss: 1.419 Accuracy: 0.056 Validation Accuracy: 0.096:   1%|          | 405/37094 [00:41<1:23:09,  7.35it/s]

Epoch: 0 Iteration: 405 Loss: 1.531 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 405/37094 [00:42<1:23:09,  7.35it/s]

Epoch: 0 Iteration: 406 Loss: 1.568 Validation Loss: 1.419 Accuracy: 0.062 Validation Accuracy: 0.096:   1%|          | 405/37094 [00:42<1:23:09,  7.35it/s]

Epoch: 0 Iteration: 406 Loss: 1.568 Validation Loss: 1.419 Accuracy: 0.062 Validation Accuracy: 0.096:   1%|          | 407/37094 [00:42<1:16:05,  8.04it/s]

Epoch: 0 Iteration: 407 Loss: 1.573 Validation Loss: 1.419 Accuracy: 0.069 Validation Accuracy: 0.096:   1%|          | 407/37094 [00:42<1:16:05,  8.04it/s]

Epoch: 0 Iteration: 408 Loss: 1.585 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|          | 407/37094 [00:42<1:16:05,  8.04it/s]

Epoch: 0 Iteration: 408 Loss: 1.585 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|          | 409/37094 [00:42<1:11:05,  8.60it/s]

Epoch: 0 Iteration: 409 Loss: 1.625 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|          | 409/37094 [00:42<1:11:05,  8.60it/s]

Epoch: 0 Iteration: 410 Loss: 1.630 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|          | 409/37094 [00:42<1:11:05,  8.60it/s]

Epoch: 0 Iteration: 410 Loss: 1.630 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|          | 411/37094 [00:42<1:07:36,  9.04it/s]

Epoch: 0 Iteration: 411 Loss: 1.703 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|          | 411/37094 [00:42<1:07:36,  9.04it/s]

Epoch: 0 Iteration: 412 Loss: 1.722 Validation Loss: 1.419 Accuracy: 0.062 Validation Accuracy: 0.096:   1%|          | 411/37094 [00:42<1:07:36,  9.04it/s]

Epoch: 0 Iteration: 412 Loss: 1.722 Validation Loss: 1.419 Accuracy: 0.062 Validation Accuracy: 0.096:   1%|          | 413/37094 [00:42<1:05:13,  9.37it/s]

Epoch: 0 Iteration: 413 Loss: 1.706 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|          | 413/37094 [00:42<1:05:13,  9.37it/s]

Epoch: 0 Iteration: 414 Loss: 1.701 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|          | 413/37094 [00:42<1:05:13,  9.37it/s]

Epoch: 0 Iteration: 414 Loss: 1.701 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|          | 415/37094 [00:42<1:03:29,  9.63it/s]

Epoch: 0 Iteration: 415 Loss: 1.702 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|          | 415/37094 [00:43<1:03:29,  9.63it/s]

Epoch: 0 Iteration: 416 Loss: 1.695 Validation Loss: 1.419 Accuracy: 0.069 Validation Accuracy: 0.096:   1%|          | 415/37094 [00:43<1:03:29,  9.63it/s]

Epoch: 0 Iteration: 416 Loss: 1.695 Validation Loss: 1.419 Accuracy: 0.069 Validation Accuracy: 0.096:   1%|          | 417/37094 [00:43<1:02:21,  9.80it/s]

Epoch: 0 Iteration: 417 Loss: 1.707 Validation Loss: 1.419 Accuracy: 0.069 Validation Accuracy: 0.096:   1%|          | 417/37094 [00:43<1:02:21,  9.80it/s]

Epoch: 0 Iteration: 418 Loss: 1.652 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|          | 417/37094 [00:43<1:02:21,  9.80it/s]

Epoch: 0 Iteration: 418 Loss: 1.652 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|          | 419/37094 [00:43<1:01:26,  9.95it/s]

Epoch: 0 Iteration: 419 Loss: 1.659 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|          | 419/37094 [00:43<1:01:26,  9.95it/s]

Epoch: 0 Iteration: 420 Loss: 1.646 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|          | 419/37094 [00:43<1:01:26,  9.95it/s]

Epoch: 0 Iteration: 420 Loss: 1.646 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|          | 421/37094 [00:43<1:00:50, 10.04it/s]

Epoch: 0 Iteration: 421 Loss: 1.617 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|          | 421/37094 [00:43<1:00:50, 10.04it/s]

Epoch: 0 Iteration: 422 Loss: 1.618 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|          | 421/37094 [00:43<1:00:50, 10.04it/s]

Epoch: 0 Iteration: 422 Loss: 1.618 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|          | 423/37094 [00:43<1:00:26, 10.11it/s]

Epoch: 0 Iteration: 423 Loss: 1.648 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|          | 423/37094 [00:43<1:00:26, 10.11it/s]

Epoch: 0 Iteration: 424 Loss: 1.623 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|          | 423/37094 [00:43<1:00:26, 10.11it/s]

Epoch: 0 Iteration: 424 Loss: 1.623 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|          | 425/37094 [00:43<1:00:09, 10.16it/s]

Epoch: 0 Iteration: 425 Loss: 1.634 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|          | 425/37094 [00:43<1:00:09, 10.16it/s]

Epoch: 0 Iteration: 426 Loss: 1.590 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|          | 425/37094 [00:44<1:00:09, 10.16it/s]

Epoch: 0 Iteration: 426 Loss: 1.590 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|          | 427/37094 [00:44<59:57, 10.19it/s]  

Epoch: 0 Iteration: 427 Loss: 1.627 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|          | 427/37094 [00:44<59:57, 10.19it/s]

Epoch: 0 Iteration: 428 Loss: 1.650 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 427/37094 [00:44<59:57, 10.19it/s]

Epoch: 0 Iteration: 428 Loss: 1.650 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 429/37094 [00:44<59:49, 10.22it/s]

Epoch: 0 Iteration: 429 Loss: 1.611 Validation Loss: 1.419 Accuracy: 0.062 Validation Accuracy: 0.096:   1%|          | 429/37094 [00:44<59:49, 10.22it/s]

Epoch: 0 Iteration: 430 Loss: 1.571 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 429/37094 [00:44<59:49, 10.22it/s]

Epoch: 0 Iteration: 430 Loss: 1.571 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 431/37094 [00:44<59:45, 10.23it/s]

Epoch: 0 Iteration: 431 Loss: 1.526 Validation Loss: 1.419 Accuracy: 0.056 Validation Accuracy: 0.096:   1%|          | 431/37094 [00:44<59:45, 10.23it/s]

Epoch: 0 Iteration: 432 Loss: 1.483 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 431/37094 [00:44<59:45, 10.23it/s]

Epoch: 0 Iteration: 432 Loss: 1.483 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 433/37094 [00:44<59:39, 10.24it/s]

Epoch: 0 Iteration: 433 Loss: 1.465 Validation Loss: 1.419 Accuracy: 0.044 Validation Accuracy: 0.096:   1%|          | 433/37094 [00:44<59:39, 10.24it/s]

Epoch: 0 Iteration: 434 Loss: 1.442 Validation Loss: 1.419 Accuracy: 0.050 Validation Accuracy: 0.096:   1%|          | 433/37094 [00:44<59:39, 10.24it/s]

Epoch: 0 Iteration: 434 Loss: 1.442 Validation Loss: 1.419 Accuracy: 0.050 Validation Accuracy: 0.096:   1%|          | 435/37094 [00:44<59:35, 10.25it/s]

Epoch: 0 Iteration: 435 Loss: 1.466 Validation Loss: 1.419 Accuracy: 0.053 Validation Accuracy: 0.096:   1%|          | 435/37094 [00:44<59:35, 10.25it/s]

Epoch: 0 Iteration: 436 Loss: 1.449 Validation Loss: 1.419 Accuracy: 0.053 Validation Accuracy: 0.096:   1%|          | 435/37094 [00:45<59:35, 10.25it/s]

Epoch: 0 Iteration: 436 Loss: 1.449 Validation Loss: 1.419 Accuracy: 0.053 Validation Accuracy: 0.096:   1%|          | 437/37094 [00:45<59:32, 10.26it/s]

Epoch: 0 Iteration: 437 Loss: 1.437 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 437/37094 [00:45<59:32, 10.26it/s]

Epoch: 0 Iteration: 438 Loss: 1.464 Validation Loss: 1.419 Accuracy: 0.062 Validation Accuracy: 0.096:   1%|          | 437/37094 [00:45<59:32, 10.26it/s]

Epoch: 0 Iteration: 438 Loss: 1.464 Validation Loss: 1.419 Accuracy: 0.062 Validation Accuracy: 0.096:   1%|          | 439/37094 [00:45<59:30, 10.27it/s]

Epoch: 0 Iteration: 439 Loss: 1.493 Validation Loss: 1.419 Accuracy: 0.069 Validation Accuracy: 0.096:   1%|          | 439/37094 [00:45<59:30, 10.27it/s]

Epoch: 0 Iteration: 440 Loss: 1.507 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|          | 439/37094 [00:45<59:30, 10.27it/s]

Epoch: 0 Iteration: 440 Loss: 1.507 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|          | 441/37094 [00:45<59:29, 10.27it/s]

Epoch: 0 Iteration: 441 Loss: 1.508 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|          | 441/37094 [00:45<59:29, 10.27it/s]

Epoch: 0 Iteration: 442 Loss: 1.505 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|          | 441/37094 [00:45<59:29, 10.27it/s]

Epoch: 0 Iteration: 442 Loss: 1.505 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|          | 443/37094 [00:45<59:28, 10.27it/s]

Epoch: 0 Iteration: 443 Loss: 1.501 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|          | 443/37094 [00:45<59:28, 10.27it/s]

Epoch: 0 Iteration: 444 Loss: 1.489 Validation Loss: 1.419 Accuracy: 0.069 Validation Accuracy: 0.096:   1%|          | 443/37094 [00:45<59:28, 10.27it/s]

Epoch: 0 Iteration: 444 Loss: 1.489 Validation Loss: 1.419 Accuracy: 0.069 Validation Accuracy: 0.096:   1%|          | 445/37094 [00:45<59:27, 10.27it/s]

Epoch: 0 Iteration: 445 Loss: 1.485 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|          | 445/37094 [00:45<59:27, 10.27it/s]

Epoch: 0 Iteration: 446 Loss: 1.553 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 445/37094 [00:46<59:27, 10.27it/s]

Epoch: 0 Iteration: 446 Loss: 1.553 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 447/37094 [00:46<59:26, 10.27it/s]

Epoch: 0 Iteration: 447 Loss: 1.541 Validation Loss: 1.419 Accuracy: 0.056 Validation Accuracy: 0.096:   1%|          | 447/37094 [00:46<59:26, 10.27it/s]

Epoch: 0 Iteration: 448 Loss: 1.528 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 447/37094 [00:46<59:26, 10.27it/s]

Epoch: 0 Iteration: 448 Loss: 1.528 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 449/37094 [00:46<59:26, 10.28it/s]

Epoch: 0 Iteration: 449 Loss: 1.612 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|          | 449/37094 [00:46<59:26, 10.28it/s]

Epoch: 0 Iteration: 450 Loss: 1.617 Validation Loss: 1.419 Accuracy: 0.050 Validation Accuracy: 0.096:   1%|          | 449/37094 [00:46<59:26, 10.28it/s]

Epoch: 0 Iteration: 450 Loss: 1.617 Validation Loss: 1.419 Accuracy: 0.050 Validation Accuracy: 0.096:   1%|          | 451/37094 [00:46<59:26, 10.27it/s]

Epoch: 0 Iteration: 451 Loss: 1.611 Validation Loss: 1.419 Accuracy: 0.050 Validation Accuracy: 0.096:   1%|          | 451/37094 [00:46<59:26, 10.27it/s]

Epoch: 0 Iteration: 452 Loss: 1.647 Validation Loss: 1.419 Accuracy: 0.056 Validation Accuracy: 0.096:   1%|          | 451/37094 [00:46<59:26, 10.27it/s]

Epoch: 0 Iteration: 452 Loss: 1.647 Validation Loss: 1.419 Accuracy: 0.056 Validation Accuracy: 0.096:   1%|          | 453/37094 [00:46<59:26, 10.27it/s]

Epoch: 0 Iteration: 453 Loss: 1.616 Validation Loss: 1.419 Accuracy: 0.053 Validation Accuracy: 0.096:   1%|          | 453/37094 [00:46<59:26, 10.27it/s]

Epoch: 0 Iteration: 454 Loss: 1.615 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 453/37094 [00:46<59:26, 10.27it/s]

Epoch: 0 Iteration: 454 Loss: 1.615 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 455/37094 [00:46<59:25, 10.28it/s]

Epoch: 0 Iteration: 455 Loss: 1.605 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|          | 455/37094 [00:46<59:25, 10.28it/s]

Epoch: 0 Iteration: 456 Loss: 1.578 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|          | 455/37094 [00:47<59:25, 10.28it/s]

Epoch: 0 Iteration: 456 Loss: 1.578 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|          | 457/37094 [00:47<59:25, 10.27it/s]

Epoch: 0 Iteration: 457 Loss: 1.581 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|          | 457/37094 [00:47<59:25, 10.27it/s]

Epoch: 0 Iteration: 458 Loss: 1.598 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|          | 457/37094 [00:47<59:25, 10.27it/s]

Epoch: 0 Iteration: 458 Loss: 1.598 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|          | 459/37094 [00:47<59:25, 10.28it/s]

Epoch: 0 Iteration: 459 Loss: 1.550 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|          | 459/37094 [00:47<59:25, 10.28it/s]

Epoch: 0 Iteration: 460 Loss: 1.547 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|          | 459/37094 [00:47<59:25, 10.28it/s]

Epoch: 0 Iteration: 460 Loss: 1.547 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|          | 461/37094 [00:47<59:24, 10.28it/s]

Epoch: 0 Iteration: 461 Loss: 1.574 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|          | 461/37094 [00:47<59:24, 10.28it/s]

Epoch: 0 Iteration: 462 Loss: 1.617 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|          | 461/37094 [00:47<59:24, 10.28it/s]

Epoch: 0 Iteration: 462 Loss: 1.617 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|          | 463/37094 [00:47<59:24, 10.28it/s]

Epoch: 0 Iteration: 463 Loss: 1.587 Validation Loss: 1.419 Accuracy: 0.091 Validation Accuracy: 0.096:   1%|          | 463/37094 [00:47<59:24, 10.28it/s]

Epoch: 0 Iteration: 464 Loss: 1.598 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|          | 463/37094 [00:47<59:24, 10.28it/s]

Epoch: 0 Iteration: 464 Loss: 1.598 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|▏         | 465/37094 [00:47<59:24, 10.28it/s]

Epoch: 0 Iteration: 465 Loss: 1.618 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|▏         | 465/37094 [00:47<59:24, 10.28it/s]

Epoch: 0 Iteration: 466 Loss: 1.577 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|▏         | 465/37094 [00:47<59:24, 10.28it/s]

Epoch: 0 Iteration: 466 Loss: 1.577 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|▏         | 467/37094 [00:47<59:36, 10.24it/s]

Epoch: 0 Iteration: 467 Loss: 1.587 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 467/37094 [00:48<59:36, 10.24it/s]

Epoch: 0 Iteration: 468 Loss: 1.559 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 467/37094 [00:48<59:36, 10.24it/s]

Epoch: 0 Iteration: 468 Loss: 1.559 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 469/37094 [00:48<59:30, 10.26it/s]

Epoch: 0 Iteration: 469 Loss: 1.461 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|▏         | 469/37094 [00:48<59:30, 10.26it/s]

Epoch: 0 Iteration: 470 Loss: 1.474 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|▏         | 469/37094 [00:48<59:30, 10.26it/s]

Epoch: 0 Iteration: 470 Loss: 1.474 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|▏         | 471/37094 [00:48<59:25, 10.27it/s]

Epoch: 0 Iteration: 471 Loss: 1.522 Validation Loss: 1.419 Accuracy: 0.069 Validation Accuracy: 0.096:   1%|▏         | 471/37094 [00:48<59:25, 10.27it/s]

Epoch: 0 Iteration: 472 Loss: 1.531 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 471/37094 [00:48<59:25, 10.27it/s]

Epoch: 0 Iteration: 472 Loss: 1.531 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 473/37094 [00:48<59:23, 10.28it/s]

Epoch: 0 Iteration: 473 Loss: 1.549 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|▏         | 473/37094 [00:48<59:23, 10.28it/s]

Epoch: 0 Iteration: 474 Loss: 1.561 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 473/37094 [00:48<59:23, 10.28it/s]

Epoch: 0 Iteration: 474 Loss: 1.561 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 475/37094 [00:48<59:21, 10.28it/s]

Epoch: 0 Iteration: 475 Loss: 1.532 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|▏         | 475/37094 [00:48<59:21, 10.28it/s]

Epoch: 0 Iteration: 476 Loss: 1.564 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 475/37094 [00:48<59:21, 10.28it/s]

Epoch: 0 Iteration: 476 Loss: 1.564 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 477/37094 [00:48<59:20, 10.28it/s]

Epoch: 0 Iteration: 477 Loss: 1.587 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|▏         | 477/37094 [00:49<59:20, 10.28it/s]

Epoch: 0 Iteration: 478 Loss: 1.606 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|▏         | 477/37094 [00:49<59:20, 10.28it/s]

Epoch: 0 Iteration: 478 Loss: 1.606 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|▏         | 479/37094 [00:49<59:19, 10.29it/s]

Epoch: 0 Iteration: 479 Loss: 1.612 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 479/37094 [00:49<59:19, 10.29it/s]

Epoch: 0 Iteration: 480 Loss: 1.593 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|▏         | 479/37094 [00:49<59:19, 10.29it/s]

Epoch: 0 Iteration: 480 Loss: 1.593 Validation Loss: 1.419 Accuracy: 0.081 Validation Accuracy: 0.096:   1%|▏         | 481/37094 [00:49<59:16, 10.29it/s]

Epoch: 0 Iteration: 481 Loss: 1.624 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|▏         | 481/37094 [00:49<59:16, 10.29it/s]

Epoch: 0 Iteration: 482 Loss: 1.572 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|▏         | 481/37094 [00:49<59:16, 10.29it/s]

Epoch: 0 Iteration: 482 Loss: 1.572 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|▏         | 483/37094 [00:49<59:14, 10.30it/s]

Epoch: 0 Iteration: 483 Loss: 1.614 Validation Loss: 1.419 Accuracy: 0.094 Validation Accuracy: 0.096:   1%|▏         | 483/37094 [00:49<59:14, 10.30it/s]

Epoch: 0 Iteration: 484 Loss: 1.641 Validation Loss: 1.419 Accuracy: 0.097 Validation Accuracy: 0.096:   1%|▏         | 483/37094 [00:49<59:14, 10.30it/s]

Epoch: 0 Iteration: 484 Loss: 1.641 Validation Loss: 1.419 Accuracy: 0.097 Validation Accuracy: 0.096:   1%|▏         | 485/37094 [00:49<59:13, 10.30it/s]

Epoch: 0 Iteration: 485 Loss: 1.653 Validation Loss: 1.419 Accuracy: 0.097 Validation Accuracy: 0.096:   1%|▏         | 485/37094 [00:49<59:13, 10.30it/s]

Epoch: 0 Iteration: 486 Loss: 1.657 Validation Loss: 1.419 Accuracy: 0.097 Validation Accuracy: 0.096:   1%|▏         | 485/37094 [00:49<59:13, 10.30it/s]

Epoch: 0 Iteration: 486 Loss: 1.657 Validation Loss: 1.419 Accuracy: 0.097 Validation Accuracy: 0.096:   1%|▏         | 487/37094 [00:49<59:16, 10.29it/s]

Epoch: 0 Iteration: 487 Loss: 1.681 Validation Loss: 1.419 Accuracy: 0.091 Validation Accuracy: 0.096:   1%|▏         | 487/37094 [00:50<59:16, 10.29it/s]

Epoch: 0 Iteration: 488 Loss: 1.704 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|▏         | 487/37094 [00:50<59:16, 10.29it/s]

Epoch: 0 Iteration: 488 Loss: 1.704 Validation Loss: 1.419 Accuracy: 0.084 Validation Accuracy: 0.096:   1%|▏         | 489/37094 [00:50<59:17, 10.29it/s]

Epoch: 0 Iteration: 489 Loss: 1.767 Validation Loss: 1.419 Accuracy: 0.087 Validation Accuracy: 0.096:   1%|▏         | 489/37094 [00:50<59:17, 10.29it/s]

Epoch: 0 Iteration: 490 Loss: 1.767 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|▏         | 489/37094 [00:50<59:17, 10.29it/s]

Epoch: 0 Iteration: 490 Loss: 1.767 Validation Loss: 1.419 Accuracy: 0.078 Validation Accuracy: 0.096:   1%|▏         | 491/37094 [00:50<59:21, 10.28it/s]

Epoch: 0 Iteration: 491 Loss: 1.732 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 491/37094 [00:50<59:21, 10.28it/s]

Epoch: 0 Iteration: 492 Loss: 1.712 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|▏         | 491/37094 [00:50<59:21, 10.28it/s]

Epoch: 0 Iteration: 492 Loss: 1.712 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|▏         | 493/37094 [00:50<59:17, 10.29it/s]

Epoch: 0 Iteration: 493 Loss: 1.759 Validation Loss: 1.419 Accuracy: 0.069 Validation Accuracy: 0.096:   1%|▏         | 493/37094 [00:50<59:17, 10.29it/s]

Epoch: 0 Iteration: 494 Loss: 1.753 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|▏         | 493/37094 [00:50<59:17, 10.29it/s]

Epoch: 0 Iteration: 494 Loss: 1.753 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|▏         | 495/37094 [00:50<59:18, 10.28it/s]

Epoch: 0 Iteration: 495 Loss: 1.804 Validation Loss: 1.419 Accuracy: 0.066 Validation Accuracy: 0.096:   1%|▏         | 495/37094 [00:50<59:18, 10.28it/s]

Epoch: 0 Iteration: 496 Loss: 1.802 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|▏         | 495/37094 [00:50<59:18, 10.28it/s]

Epoch: 0 Iteration: 496 Loss: 1.802 Validation Loss: 1.419 Accuracy: 0.059 Validation Accuracy: 0.096:   1%|▏         | 497/37094 [00:50<59:15, 10.29it/s]

Epoch: 0 Iteration: 497 Loss: 1.766 Validation Loss: 1.419 Accuracy: 0.062 Validation Accuracy: 0.096:   1%|▏         | 497/37094 [00:50<59:15, 10.29it/s]

Epoch: 0 Iteration: 498 Loss: 1.735 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|▏         | 497/37094 [00:51<59:15, 10.29it/s]

Epoch: 0 Iteration: 498 Loss: 1.735 Validation Loss: 1.419 Accuracy: 0.072 Validation Accuracy: 0.096:   1%|▏         | 499/37094 [00:51<59:13, 10.30it/s]

Epoch: 0 Iteration: 499 Loss: 1.748 Validation Loss: 1.419 Accuracy: 0.075 Validation Accuracy: 0.096:   1%|▏         | 499/37094 [00:51<59:13, 10.30it/s]

Epoch: 0 Iteration: 500 Loss: 1.787 Validation Loss: 1.487 Accuracy: 0.081 Validation Accuracy: 0.095:   1%|▏         | 499/37094 [00:51<59:13, 10.30it/s]

Epoch: 0 Iteration: 500 Loss: 1.787 Validation Loss: 1.487 Accuracy: 0.081 Validation Accuracy: 0.095:   1%|▏         | 501/37094 [00:51<1:48:48,  5.61it/s]

Epoch: 0 Iteration: 501 Loss: 1.743 Validation Loss: 1.487 Accuracy: 0.081 Validation Accuracy: 0.095:   1%|▏         | 501/37094 [00:51<1:48:48,  5.61it/s]

Epoch: 0 Iteration: 502 Loss: 1.779 Validation Loss: 1.487 Accuracy: 0.078 Validation Accuracy: 0.095:   1%|▏         | 501/37094 [00:52<1:48:48,  5.61it/s]

Epoch: 0 Iteration: 502 Loss: 1.779 Validation Loss: 1.487 Accuracy: 0.078 Validation Accuracy: 0.095:   1%|▏         | 503/37094 [00:52<1:33:11,  6.54it/s]

Epoch: 0 Iteration: 503 Loss: 1.781 Validation Loss: 1.487 Accuracy: 0.066 Validation Accuracy: 0.095:   1%|▏         | 503/37094 [00:52<1:33:11,  6.54it/s]

Epoch: 0 Iteration: 504 Loss: 1.749 Validation Loss: 1.487 Accuracy: 0.072 Validation Accuracy: 0.095:   1%|▏         | 503/37094 [00:52<1:33:11,  6.54it/s]

Epoch: 0 Iteration: 504 Loss: 1.749 Validation Loss: 1.487 Accuracy: 0.072 Validation Accuracy: 0.095:   1%|▏         | 505/37094 [00:52<1:22:59,  7.35it/s]

Epoch: 0 Iteration: 505 Loss: 1.726 Validation Loss: 1.487 Accuracy: 0.069 Validation Accuracy: 0.095:   1%|▏         | 505/37094 [00:52<1:22:59,  7.35it/s]

Epoch: 0 Iteration: 506 Loss: 1.735 Validation Loss: 1.487 Accuracy: 0.075 Validation Accuracy: 0.095:   1%|▏         | 505/37094 [00:52<1:22:59,  7.35it/s]

Epoch: 0 Iteration: 506 Loss: 1.735 Validation Loss: 1.487 Accuracy: 0.075 Validation Accuracy: 0.095:   1%|▏         | 507/37094 [00:52<1:15:51,  8.04it/s]

Epoch: 0 Iteration: 507 Loss: 1.684 Validation Loss: 1.487 Accuracy: 0.078 Validation Accuracy: 0.095:   1%|▏         | 507/37094 [00:52<1:15:51,  8.04it/s]

Epoch: 0 Iteration: 508 Loss: 1.678 Validation Loss: 1.487 Accuracy: 0.084 Validation Accuracy: 0.095:   1%|▏         | 507/37094 [00:52<1:15:51,  8.04it/s]

Epoch: 0 Iteration: 508 Loss: 1.678 Validation Loss: 1.487 Accuracy: 0.084 Validation Accuracy: 0.095:   1%|▏         | 509/37094 [00:52<1:10:49,  8.61it/s]

Epoch: 0 Iteration: 509 Loss: 1.655 Validation Loss: 1.487 Accuracy: 0.087 Validation Accuracy: 0.095:   1%|▏         | 509/37094 [00:52<1:10:49,  8.61it/s]

Epoch: 0 Iteration: 510 Loss: 1.651 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   1%|▏         | 509/37094 [00:52<1:10:49,  8.61it/s]

Epoch: 0 Iteration: 510 Loss: 1.651 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   1%|▏         | 511/37094 [00:52<1:07:19,  9.06it/s]

Epoch: 0 Iteration: 511 Loss: 1.687 Validation Loss: 1.487 Accuracy: 0.084 Validation Accuracy: 0.095:   1%|▏         | 511/37094 [00:52<1:07:19,  9.06it/s]

Epoch: 0 Iteration: 512 Loss: 1.683 Validation Loss: 1.487 Accuracy: 0.087 Validation Accuracy: 0.095:   1%|▏         | 511/37094 [00:52<1:07:19,  9.06it/s]

Epoch: 0 Iteration: 512 Loss: 1.683 Validation Loss: 1.487 Accuracy: 0.087 Validation Accuracy: 0.095:   1%|▏         | 513/37094 [00:52<1:04:53,  9.40it/s]

Epoch: 0 Iteration: 513 Loss: 1.635 Validation Loss: 1.487 Accuracy: 0.097 Validation Accuracy: 0.095:   1%|▏         | 513/37094 [00:53<1:04:53,  9.40it/s]

Epoch: 0 Iteration: 514 Loss: 1.640 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   1%|▏         | 513/37094 [00:53<1:04:53,  9.40it/s]

Epoch: 0 Iteration: 514 Loss: 1.640 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   1%|▏         | 515/37094 [00:53<1:03:12,  9.65it/s]

Epoch: 0 Iteration: 515 Loss: 1.597 Validation Loss: 1.487 Accuracy: 0.097 Validation Accuracy: 0.095:   1%|▏         | 515/37094 [00:53<1:03:12,  9.65it/s]

Epoch: 0 Iteration: 516 Loss: 1.575 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   1%|▏         | 515/37094 [00:53<1:03:12,  9.65it/s]

Epoch: 0 Iteration: 516 Loss: 1.575 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   1%|▏         | 517/37094 [00:53<1:01:59,  9.83it/s]

Epoch: 0 Iteration: 517 Loss: 1.598 Validation Loss: 1.487 Accuracy: 0.087 Validation Accuracy: 0.095:   1%|▏         | 517/37094 [00:53<1:01:59,  9.83it/s]

Epoch: 0 Iteration: 518 Loss: 1.639 Validation Loss: 1.487 Accuracy: 0.075 Validation Accuracy: 0.095:   1%|▏         | 517/37094 [00:53<1:01:59,  9.83it/s]

Epoch: 0 Iteration: 518 Loss: 1.639 Validation Loss: 1.487 Accuracy: 0.075 Validation Accuracy: 0.095:   1%|▏         | 519/37094 [00:53<1:01:11,  9.96it/s]

Epoch: 0 Iteration: 519 Loss: 1.628 Validation Loss: 1.487 Accuracy: 0.075 Validation Accuracy: 0.095:   1%|▏         | 519/37094 [00:53<1:01:11,  9.96it/s]

Epoch: 0 Iteration: 520 Loss: 1.624 Validation Loss: 1.487 Accuracy: 0.062 Validation Accuracy: 0.095:   1%|▏         | 519/37094 [00:53<1:01:11,  9.96it/s]

Epoch: 0 Iteration: 520 Loss: 1.624 Validation Loss: 1.487 Accuracy: 0.062 Validation Accuracy: 0.095:   1%|▏         | 521/37094 [00:53<1:00:36, 10.06it/s]

Epoch: 0 Iteration: 521 Loss: 1.619 Validation Loss: 1.487 Accuracy: 0.069 Validation Accuracy: 0.095:   1%|▏         | 521/37094 [00:53<1:00:36, 10.06it/s]

Epoch: 0 Iteration: 522 Loss: 1.622 Validation Loss: 1.487 Accuracy: 0.066 Validation Accuracy: 0.095:   1%|▏         | 521/37094 [00:53<1:00:36, 10.06it/s]

Epoch: 0 Iteration: 522 Loss: 1.622 Validation Loss: 1.487 Accuracy: 0.066 Validation Accuracy: 0.095:   1%|▏         | 523/37094 [00:53<1:00:09, 10.13it/s]

Epoch: 0 Iteration: 523 Loss: 1.596 Validation Loss: 1.487 Accuracy: 0.059 Validation Accuracy: 0.095:   1%|▏         | 523/37094 [00:54<1:00:09, 10.13it/s]

Epoch: 0 Iteration: 524 Loss: 1.620 Validation Loss: 1.487 Accuracy: 0.059 Validation Accuracy: 0.095:   1%|▏         | 523/37094 [00:54<1:00:09, 10.13it/s]

Epoch: 0 Iteration: 524 Loss: 1.620 Validation Loss: 1.487 Accuracy: 0.059 Validation Accuracy: 0.095:   1%|▏         | 525/37094 [00:54<59:55, 10.17it/s]  

Epoch: 0 Iteration: 525 Loss: 1.630 Validation Loss: 1.487 Accuracy: 0.062 Validation Accuracy: 0.095:   1%|▏         | 525/37094 [00:54<59:55, 10.17it/s]

Epoch: 0 Iteration: 526 Loss: 1.625 Validation Loss: 1.487 Accuracy: 0.062 Validation Accuracy: 0.095:   1%|▏         | 525/37094 [00:54<59:55, 10.17it/s]

Epoch: 0 Iteration: 526 Loss: 1.625 Validation Loss: 1.487 Accuracy: 0.062 Validation Accuracy: 0.095:   1%|▏         | 527/37094 [00:54<59:39, 10.21it/s]

Epoch: 0 Iteration: 527 Loss: 1.592 Validation Loss: 1.487 Accuracy: 0.069 Validation Accuracy: 0.095:   1%|▏         | 527/37094 [00:54<59:39, 10.21it/s]

Epoch: 0 Iteration: 528 Loss: 1.573 Validation Loss: 1.487 Accuracy: 0.072 Validation Accuracy: 0.095:   1%|▏         | 527/37094 [00:54<59:39, 10.21it/s]

Epoch: 0 Iteration: 528 Loss: 1.573 Validation Loss: 1.487 Accuracy: 0.072 Validation Accuracy: 0.095:   1%|▏         | 529/37094 [00:54<59:29, 10.24it/s]

Epoch: 0 Iteration: 529 Loss: 1.563 Validation Loss: 1.487 Accuracy: 0.081 Validation Accuracy: 0.095:   1%|▏         | 529/37094 [00:54<59:29, 10.24it/s]

Epoch: 0 Iteration: 530 Loss: 1.561 Validation Loss: 1.487 Accuracy: 0.106 Validation Accuracy: 0.095:   1%|▏         | 529/37094 [00:54<59:29, 10.24it/s]

Epoch: 0 Iteration: 530 Loss: 1.561 Validation Loss: 1.487 Accuracy: 0.106 Validation Accuracy: 0.095:   1%|▏         | 531/37094 [00:54<59:21, 10.27it/s]

Epoch: 0 Iteration: 531 Loss: 1.506 Validation Loss: 1.487 Accuracy: 0.103 Validation Accuracy: 0.095:   1%|▏         | 531/37094 [00:54<59:21, 10.27it/s]

Epoch: 0 Iteration: 532 Loss: 1.510 Validation Loss: 1.487 Accuracy: 0.106 Validation Accuracy: 0.095:   1%|▏         | 531/37094 [00:54<59:21, 10.27it/s]

Epoch: 0 Iteration: 532 Loss: 1.510 Validation Loss: 1.487 Accuracy: 0.106 Validation Accuracy: 0.095:   1%|▏         | 533/37094 [00:54<59:19, 10.27it/s]

Epoch: 0 Iteration: 533 Loss: 1.541 Validation Loss: 1.487 Accuracy: 0.119 Validation Accuracy: 0.095:   1%|▏         | 533/37094 [00:55<59:19, 10.27it/s]

Epoch: 0 Iteration: 534 Loss: 1.529 Validation Loss: 1.487 Accuracy: 0.125 Validation Accuracy: 0.095:   1%|▏         | 533/37094 [00:55<59:19, 10.27it/s]

Epoch: 0 Iteration: 534 Loss: 1.529 Validation Loss: 1.487 Accuracy: 0.125 Validation Accuracy: 0.095:   1%|▏         | 535/37094 [00:55<59:14, 10.29it/s]

Epoch: 0 Iteration: 535 Loss: 1.517 Validation Loss: 1.487 Accuracy: 0.125 Validation Accuracy: 0.095:   1%|▏         | 535/37094 [00:55<59:14, 10.29it/s]

Epoch: 0 Iteration: 536 Loss: 1.511 Validation Loss: 1.487 Accuracy: 0.138 Validation Accuracy: 0.095:   1%|▏         | 535/37094 [00:55<59:14, 10.29it/s]

Epoch: 0 Iteration: 536 Loss: 1.511 Validation Loss: 1.487 Accuracy: 0.138 Validation Accuracy: 0.095:   1%|▏         | 537/37094 [00:55<59:12, 10.29it/s]

Epoch: 0 Iteration: 537 Loss: 1.522 Validation Loss: 1.487 Accuracy: 0.131 Validation Accuracy: 0.095:   1%|▏         | 537/37094 [00:55<59:12, 10.29it/s]

Epoch: 0 Iteration: 538 Loss: 1.467 Validation Loss: 1.487 Accuracy: 0.128 Validation Accuracy: 0.095:   1%|▏         | 537/37094 [00:55<59:12, 10.29it/s]

Epoch: 0 Iteration: 538 Loss: 1.467 Validation Loss: 1.487 Accuracy: 0.128 Validation Accuracy: 0.095:   1%|▏         | 539/37094 [00:55<59:11, 10.29it/s]

Epoch: 0 Iteration: 539 Loss: 1.477 Validation Loss: 1.487 Accuracy: 0.131 Validation Accuracy: 0.095:   1%|▏         | 539/37094 [00:55<59:11, 10.29it/s]

Epoch: 0 Iteration: 540 Loss: 1.487 Validation Loss: 1.487 Accuracy: 0.113 Validation Accuracy: 0.095:   1%|▏         | 539/37094 [00:55<59:11, 10.29it/s]

Epoch: 0 Iteration: 540 Loss: 1.487 Validation Loss: 1.487 Accuracy: 0.113 Validation Accuracy: 0.095:   1%|▏         | 541/37094 [00:55<59:10, 10.30it/s]

Epoch: 0 Iteration: 541 Loss: 1.461 Validation Loss: 1.487 Accuracy: 0.119 Validation Accuracy: 0.095:   1%|▏         | 541/37094 [00:55<59:10, 10.30it/s]

Epoch: 0 Iteration: 542 Loss: 1.429 Validation Loss: 1.487 Accuracy: 0.116 Validation Accuracy: 0.095:   1%|▏         | 541/37094 [00:55<59:10, 10.30it/s]

Epoch: 0 Iteration: 542 Loss: 1.429 Validation Loss: 1.487 Accuracy: 0.116 Validation Accuracy: 0.095:   1%|▏         | 543/37094 [00:55<59:08, 10.30it/s]

Epoch: 0 Iteration: 543 Loss: 1.381 Validation Loss: 1.487 Accuracy: 0.109 Validation Accuracy: 0.095:   1%|▏         | 543/37094 [00:55<59:08, 10.30it/s]

Epoch: 0 Iteration: 544 Loss: 1.395 Validation Loss: 1.487 Accuracy: 0.103 Validation Accuracy: 0.095:   1%|▏         | 543/37094 [00:56<59:08, 10.30it/s]

Epoch: 0 Iteration: 544 Loss: 1.395 Validation Loss: 1.487 Accuracy: 0.103 Validation Accuracy: 0.095:   1%|▏         | 545/37094 [00:56<59:07, 10.30it/s]

Epoch: 0 Iteration: 545 Loss: 1.423 Validation Loss: 1.487 Accuracy: 0.094 Validation Accuracy: 0.095:   1%|▏         | 545/37094 [00:56<59:07, 10.30it/s]

Epoch: 0 Iteration: 546 Loss: 1.420 Validation Loss: 1.487 Accuracy: 0.081 Validation Accuracy: 0.095:   1%|▏         | 545/37094 [00:56<59:07, 10.30it/s]

Epoch: 0 Iteration: 546 Loss: 1.420 Validation Loss: 1.487 Accuracy: 0.081 Validation Accuracy: 0.095:   1%|▏         | 547/37094 [00:56<59:08, 10.30it/s]

Epoch: 0 Iteration: 547 Loss: 1.462 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   1%|▏         | 547/37094 [00:56<59:08, 10.30it/s]

Epoch: 0 Iteration: 548 Loss: 1.495 Validation Loss: 1.487 Accuracy: 0.100 Validation Accuracy: 0.095:   1%|▏         | 547/37094 [00:56<59:08, 10.30it/s]

Epoch: 0 Iteration: 548 Loss: 1.495 Validation Loss: 1.487 Accuracy: 0.100 Validation Accuracy: 0.095:   1%|▏         | 549/37094 [00:56<59:07, 10.30it/s]

Epoch: 0 Iteration: 549 Loss: 1.467 Validation Loss: 1.487 Accuracy: 0.087 Validation Accuracy: 0.095:   1%|▏         | 549/37094 [00:56<59:07, 10.30it/s]

Epoch: 0 Iteration: 550 Loss: 1.501 Validation Loss: 1.487 Accuracy: 0.094 Validation Accuracy: 0.095:   1%|▏         | 549/37094 [00:56<59:07, 10.30it/s]

Epoch: 0 Iteration: 550 Loss: 1.501 Validation Loss: 1.487 Accuracy: 0.094 Validation Accuracy: 0.095:   1%|▏         | 551/37094 [00:56<59:08, 10.30it/s]

Epoch: 0 Iteration: 551 Loss: 1.541 Validation Loss: 1.487 Accuracy: 0.094 Validation Accuracy: 0.095:   1%|▏         | 551/37094 [00:56<59:08, 10.30it/s]

Epoch: 0 Iteration: 552 Loss: 1.560 Validation Loss: 1.487 Accuracy: 0.097 Validation Accuracy: 0.095:   1%|▏         | 551/37094 [00:56<59:08, 10.30it/s]

Epoch: 0 Iteration: 552 Loss: 1.560 Validation Loss: 1.487 Accuracy: 0.097 Validation Accuracy: 0.095:   1%|▏         | 553/37094 [00:56<59:06, 10.30it/s]

Epoch: 0 Iteration: 553 Loss: 1.546 Validation Loss: 1.487 Accuracy: 0.094 Validation Accuracy: 0.095:   1%|▏         | 553/37094 [00:56<59:06, 10.30it/s]

Epoch: 0 Iteration: 554 Loss: 1.567 Validation Loss: 1.487 Accuracy: 0.087 Validation Accuracy: 0.095:   1%|▏         | 553/37094 [00:57<59:06, 10.30it/s]

Epoch: 0 Iteration: 554 Loss: 1.567 Validation Loss: 1.487 Accuracy: 0.087 Validation Accuracy: 0.095:   1%|▏         | 555/37094 [00:57<59:08, 10.30it/s]

Epoch: 0 Iteration: 555 Loss: 1.598 Validation Loss: 1.487 Accuracy: 0.097 Validation Accuracy: 0.095:   1%|▏         | 555/37094 [00:57<59:08, 10.30it/s]

Epoch: 0 Iteration: 556 Loss: 1.651 Validation Loss: 1.487 Accuracy: 0.103 Validation Accuracy: 0.095:   1%|▏         | 555/37094 [00:57<59:08, 10.30it/s]

Epoch: 0 Iteration: 556 Loss: 1.651 Validation Loss: 1.487 Accuracy: 0.103 Validation Accuracy: 0.095:   2%|▏         | 557/37094 [00:57<59:10, 10.29it/s]

Epoch: 0 Iteration: 557 Loss: 1.629 Validation Loss: 1.487 Accuracy: 0.097 Validation Accuracy: 0.095:   2%|▏         | 557/37094 [00:57<59:10, 10.29it/s]

Epoch: 0 Iteration: 558 Loss: 1.605 Validation Loss: 1.487 Accuracy: 0.097 Validation Accuracy: 0.095:   2%|▏         | 557/37094 [00:57<59:10, 10.29it/s]

Epoch: 0 Iteration: 558 Loss: 1.605 Validation Loss: 1.487 Accuracy: 0.097 Validation Accuracy: 0.095:   2%|▏         | 559/37094 [00:57<59:08, 10.30it/s]

Epoch: 0 Iteration: 559 Loss: 1.647 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   2%|▏         | 559/37094 [00:57<59:08, 10.30it/s]

Epoch: 0 Iteration: 560 Loss: 1.632 Validation Loss: 1.487 Accuracy: 0.087 Validation Accuracy: 0.095:   2%|▏         | 559/37094 [00:57<59:08, 10.30it/s]

Epoch: 0 Iteration: 560 Loss: 1.632 Validation Loss: 1.487 Accuracy: 0.087 Validation Accuracy: 0.095:   2%|▏         | 561/37094 [00:57<59:05, 10.30it/s]

Epoch: 0 Iteration: 561 Loss: 1.639 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   2%|▏         | 561/37094 [00:57<59:05, 10.30it/s]

Epoch: 0 Iteration: 562 Loss: 1.604 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   2%|▏         | 561/37094 [00:57<59:05, 10.30it/s]

Epoch: 0 Iteration: 562 Loss: 1.604 Validation Loss: 1.487 Accuracy: 0.091 Validation Accuracy: 0.095:   2%|▏         | 563/37094 [00:57<59:04, 10.31it/s]

Epoch: 0 Iteration: 563 Loss: 1.624 Validation Loss: 1.487 Accuracy: 0.094 Validation Accuracy: 0.095:   2%|▏         | 563/37094 [00:57<59:04, 10.31it/s]

Epoch: 0 Iteration: 564 Loss: 1.597 Validation Loss: 1.487 Accuracy: 0.103 Validation Accuracy: 0.095:   2%|▏         | 563/37094 [00:58<59:04, 10.31it/s]

Epoch: 0 Iteration: 564 Loss: 1.597 Validation Loss: 1.487 Accuracy: 0.103 Validation Accuracy: 0.095:   2%|▏         | 565/37094 [00:58<59:04, 10.31it/s]

Epoch: 0 Iteration: 565 Loss: 1.555 Validation Loss: 1.487 Accuracy: 0.103 Validation Accuracy: 0.095:   2%|▏         | 565/37094 [00:58<59:04, 10.31it/s]

Epoch: 0 Iteration: 566 Loss: 1.568 Validation Loss: 1.487 Accuracy: 0.109 Validation Accuracy: 0.095:   2%|▏         | 565/37094 [00:58<59:04, 10.31it/s]

Epoch: 0 Iteration: 566 Loss: 1.568 Validation Loss: 1.487 Accuracy: 0.109 Validation Accuracy: 0.095:   2%|▏         | 567/37094 [00:58<59:09, 10.29it/s]

Epoch: 0 Iteration: 567 Loss: 1.555 Validation Loss: 1.487 Accuracy: 0.122 Validation Accuracy: 0.095:   2%|▏         | 567/37094 [00:58<59:09, 10.29it/s]

Epoch: 0 Iteration: 568 Loss: 1.549 Validation Loss: 1.487 Accuracy: 0.113 Validation Accuracy: 0.095:   2%|▏         | 567/37094 [00:58<59:09, 10.29it/s]

Epoch: 0 Iteration: 568 Loss: 1.549 Validation Loss: 1.487 Accuracy: 0.113 Validation Accuracy: 0.095:   2%|▏         | 569/37094 [00:58<59:05, 10.30it/s]

Epoch: 0 Iteration: 569 Loss: 1.588 Validation Loss: 1.487 Accuracy: 0.116 Validation Accuracy: 0.095:   2%|▏         | 569/37094 [00:58<59:05, 10.30it/s]

Epoch: 0 Iteration: 570 Loss: 1.578 Validation Loss: 1.487 Accuracy: 0.141 Validation Accuracy: 0.095:   2%|▏         | 569/37094 [00:58<59:05, 10.30it/s]

Epoch: 0 Iteration: 570 Loss: 1.578 Validation Loss: 1.487 Accuracy: 0.141 Validation Accuracy: 0.095:   2%|▏         | 571/37094 [00:58<59:03, 10.31it/s]

Epoch: 0 Iteration: 571 Loss: 1.558 Validation Loss: 1.487 Accuracy: 0.141 Validation Accuracy: 0.095:   2%|▏         | 571/37094 [00:58<59:03, 10.31it/s]

Epoch: 0 Iteration: 572 Loss: 1.537 Validation Loss: 1.487 Accuracy: 0.147 Validation Accuracy: 0.095:   2%|▏         | 571/37094 [00:58<59:03, 10.31it/s]

Epoch: 0 Iteration: 572 Loss: 1.537 Validation Loss: 1.487 Accuracy: 0.147 Validation Accuracy: 0.095:   2%|▏         | 573/37094 [00:58<59:02, 10.31it/s]

Epoch: 0 Iteration: 573 Loss: 1.539 Validation Loss: 1.487 Accuracy: 0.163 Validation Accuracy: 0.095:   2%|▏         | 573/37094 [00:58<59:02, 10.31it/s]

Epoch: 0 Iteration: 574 Loss: 1.541 Validation Loss: 1.487 Accuracy: 0.166 Validation Accuracy: 0.095:   2%|▏         | 573/37094 [00:58<59:02, 10.31it/s]

Epoch: 0 Iteration: 574 Loss: 1.541 Validation Loss: 1.487 Accuracy: 0.166 Validation Accuracy: 0.095:   2%|▏         | 575/37094 [00:59<59:02, 10.31it/s]

Epoch: 0 Iteration: 575 Loss: 1.516 Validation Loss: 1.487 Accuracy: 0.166 Validation Accuracy: 0.095:   2%|▏         | 575/37094 [00:59<59:02, 10.31it/s]

Epoch: 0 Iteration: 576 Loss: 1.554 Validation Loss: 1.487 Accuracy: 0.153 Validation Accuracy: 0.095:   2%|▏         | 575/37094 [00:59<59:02, 10.31it/s]

Epoch: 0 Iteration: 576 Loss: 1.554 Validation Loss: 1.487 Accuracy: 0.153 Validation Accuracy: 0.095:   2%|▏         | 577/37094 [00:59<59:01, 10.31it/s]

Epoch: 0 Iteration: 577 Loss: 1.560 Validation Loss: 1.487 Accuracy: 0.153 Validation Accuracy: 0.095:   2%|▏         | 577/37094 [00:59<59:01, 10.31it/s]

Epoch: 0 Iteration: 578 Loss: 1.561 Validation Loss: 1.487 Accuracy: 0.163 Validation Accuracy: 0.095:   2%|▏         | 577/37094 [00:59<59:01, 10.31it/s]

Epoch: 0 Iteration: 578 Loss: 1.561 Validation Loss: 1.487 Accuracy: 0.163 Validation Accuracy: 0.095:   2%|▏         | 579/37094 [00:59<59:05, 10.30it/s]

Epoch: 0 Iteration: 579 Loss: 1.519 Validation Loss: 1.487 Accuracy: 0.166 Validation Accuracy: 0.095:   2%|▏         | 579/37094 [00:59<59:05, 10.30it/s]

Epoch: 0 Iteration: 580 Loss: 1.546 Validation Loss: 1.487 Accuracy: 0.138 Validation Accuracy: 0.095:   2%|▏         | 579/37094 [00:59<59:05, 10.30it/s]

Epoch: 0 Iteration: 580 Loss: 1.546 Validation Loss: 1.487 Accuracy: 0.138 Validation Accuracy: 0.095:   2%|▏         | 581/37094 [00:59<59:04, 10.30it/s]

Epoch: 0 Iteration: 581 Loss: 1.554 Validation Loss: 1.487 Accuracy: 0.131 Validation Accuracy: 0.095:   2%|▏         | 581/37094 [00:59<59:04, 10.30it/s]

Epoch: 0 Iteration: 582 Loss: 1.579 Validation Loss: 1.487 Accuracy: 0.134 Validation Accuracy: 0.095:   2%|▏         | 581/37094 [00:59<59:04, 10.30it/s]

Epoch: 0 Iteration: 582 Loss: 1.579 Validation Loss: 1.487 Accuracy: 0.134 Validation Accuracy: 0.095:   2%|▏         | 583/37094 [00:59<59:03, 10.30it/s]

Epoch: 0 Iteration: 583 Loss: 1.628 Validation Loss: 1.487 Accuracy: 0.116 Validation Accuracy: 0.095:   2%|▏         | 583/37094 [00:59<59:03, 10.30it/s]

Epoch: 0 Iteration: 584 Loss: 1.627 Validation Loss: 1.487 Accuracy: 0.106 Validation Accuracy: 0.095:   2%|▏         | 583/37094 [00:59<59:03, 10.30it/s]

Epoch: 0 Iteration: 584 Loss: 1.627 Validation Loss: 1.487 Accuracy: 0.106 Validation Accuracy: 0.095:   2%|▏         | 585/37094 [00:59<59:01, 10.31it/s]

Epoch: 0 Iteration: 585 Loss: 1.614 Validation Loss: 1.487 Accuracy: 0.100 Validation Accuracy: 0.095:   2%|▏         | 585/37094 [01:00<59:01, 10.31it/s]

Epoch: 0 Iteration: 586 Loss: 1.593 Validation Loss: 1.487 Accuracy: 0.106 Validation Accuracy: 0.095:   2%|▏         | 585/37094 [01:00<59:01, 10.31it/s]

Epoch: 0 Iteration: 586 Loss: 1.593 Validation Loss: 1.487 Accuracy: 0.106 Validation Accuracy: 0.095:   2%|▏         | 587/37094 [01:00<59:01, 10.31it/s]

Epoch: 0 Iteration: 587 Loss: 1.610 Validation Loss: 1.487 Accuracy: 0.097 Validation Accuracy: 0.095:   2%|▏         | 587/37094 [01:00<59:01, 10.31it/s]

Epoch: 0 Iteration: 588 Loss: 1.596 Validation Loss: 1.487 Accuracy: 0.094 Validation Accuracy: 0.095:   2%|▏         | 587/37094 [01:00<59:01, 10.31it/s]

Epoch: 0 Iteration: 588 Loss: 1.596 Validation Loss: 1.487 Accuracy: 0.094 Validation Accuracy: 0.095:   2%|▏         | 589/37094 [01:00<59:00, 10.31it/s]

Epoch: 0 Iteration: 589 Loss: 1.535 Validation Loss: 1.487 Accuracy: 0.119 Validation Accuracy: 0.095:   2%|▏         | 589/37094 [01:00<59:00, 10.31it/s]

Epoch: 0 Iteration: 590 Loss: 1.548 Validation Loss: 1.487 Accuracy: 0.128 Validation Accuracy: 0.095:   2%|▏         | 589/37094 [01:00<59:00, 10.31it/s]

Epoch: 0 Iteration: 590 Loss: 1.548 Validation Loss: 1.487 Accuracy: 0.128 Validation Accuracy: 0.095:   2%|▏         | 591/37094 [01:00<59:01, 10.31it/s]

Epoch: 0 Iteration: 591 Loss: 1.536 Validation Loss: 1.487 Accuracy: 0.128 Validation Accuracy: 0.095:   2%|▏         | 591/37094 [01:00<59:01, 10.31it/s]

Epoch: 0 Iteration: 592 Loss: 1.582 Validation Loss: 1.487 Accuracy: 0.116 Validation Accuracy: 0.095:   2%|▏         | 591/37094 [01:00<59:01, 10.31it/s]

Epoch: 0 Iteration: 592 Loss: 1.582 Validation Loss: 1.487 Accuracy: 0.116 Validation Accuracy: 0.095:   2%|▏         | 593/37094 [01:00<59:00, 10.31it/s]

Epoch: 0 Iteration: 593 Loss: 1.594 Validation Loss: 1.487 Accuracy: 0.116 Validation Accuracy: 0.095:   2%|▏         | 593/37094 [01:00<59:00, 10.31it/s]

Epoch: 0 Iteration: 594 Loss: 1.580 Validation Loss: 1.487 Accuracy: 0.119 Validation Accuracy: 0.095:   2%|▏         | 593/37094 [01:00<59:00, 10.31it/s]

Epoch: 0 Iteration: 594 Loss: 1.580 Validation Loss: 1.487 Accuracy: 0.119 Validation Accuracy: 0.095:   2%|▏         | 595/37094 [01:00<58:59, 10.31it/s]

Epoch: 0 Iteration: 595 Loss: 1.565 Validation Loss: 1.487 Accuracy: 0.122 Validation Accuracy: 0.095:   2%|▏         | 595/37094 [01:01<58:59, 10.31it/s]

Epoch: 0 Iteration: 596 Loss: 1.481 Validation Loss: 1.487 Accuracy: 0.128 Validation Accuracy: 0.095:   2%|▏         | 595/37094 [01:01<58:59, 10.31it/s]

Epoch: 0 Iteration: 596 Loss: 1.481 Validation Loss: 1.487 Accuracy: 0.128 Validation Accuracy: 0.095:   2%|▏         | 597/37094 [01:01<58:58, 10.31it/s]

Epoch: 0 Iteration: 597 Loss: 1.450 Validation Loss: 1.487 Accuracy: 0.141 Validation Accuracy: 0.095:   2%|▏         | 597/37094 [01:01<58:58, 10.31it/s]

Epoch: 0 Iteration: 598 Loss: 1.482 Validation Loss: 1.487 Accuracy: 0.144 Validation Accuracy: 0.095:   2%|▏         | 597/37094 [01:01<58:58, 10.31it/s]

Epoch: 0 Iteration: 598 Loss: 1.482 Validation Loss: 1.487 Accuracy: 0.144 Validation Accuracy: 0.095:   2%|▏         | 599/37094 [01:01<58:59, 10.31it/s]

Epoch: 0 Iteration: 599 Loss: 1.497 Validation Loss: 1.487 Accuracy: 0.122 Validation Accuracy: 0.095:   2%|▏         | 599/37094 [01:01<58:59, 10.31it/s]

Epoch: 0 Iteration: 600 Loss: 1.466 Validation Loss: 1.390 Accuracy: 0.125 Validation Accuracy: 0.098:   2%|▏         | 599/37094 [01:02<58:59, 10.31it/s]

Epoch: 0 Iteration: 600 Loss: 1.466 Validation Loss: 1.390 Accuracy: 0.125 Validation Accuracy: 0.098:   2%|▏         | 601/37094 [01:02<1:48:46,  5.59it/s]

Epoch: 0 Iteration: 601 Loss: 1.492 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 601/37094 [01:02<1:48:46,  5.59it/s]

Epoch: 0 Iteration: 602 Loss: 1.483 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 601/37094 [01:02<1:48:46,  5.59it/s]

Epoch: 0 Iteration: 602 Loss: 1.483 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 603/37094 [01:02<1:33:04,  6.53it/s]

Epoch: 0 Iteration: 603 Loss: 1.433 Validation Loss: 1.390 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 603/37094 [01:02<1:33:04,  6.53it/s]

Epoch: 0 Iteration: 604 Loss: 1.467 Validation Loss: 1.390 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 603/37094 [01:02<1:33:04,  6.53it/s]

Epoch: 0 Iteration: 604 Loss: 1.467 Validation Loss: 1.390 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 605/37094 [01:02<1:22:51,  7.34it/s]

Epoch: 0 Iteration: 605 Loss: 1.481 Validation Loss: 1.390 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 605/37094 [01:02<1:22:51,  7.34it/s]

Epoch: 0 Iteration: 606 Loss: 1.502 Validation Loss: 1.390 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 605/37094 [01:02<1:22:51,  7.34it/s]

Epoch: 0 Iteration: 606 Loss: 1.502 Validation Loss: 1.390 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 607/37094 [01:02<1:15:40,  8.04it/s]

Epoch: 0 Iteration: 607 Loss: 1.519 Validation Loss: 1.390 Accuracy: 0.128 Validation Accuracy: 0.098:   2%|▏         | 607/37094 [01:02<1:15:40,  8.04it/s]

Epoch: 0 Iteration: 608 Loss: 1.522 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 607/37094 [01:02<1:15:40,  8.04it/s]

Epoch: 0 Iteration: 608 Loss: 1.522 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 609/37094 [01:02<1:10:43,  8.60it/s]

Epoch: 0 Iteration: 609 Loss: 1.556 Validation Loss: 1.390 Accuracy: 0.128 Validation Accuracy: 0.098:   2%|▏         | 609/37094 [01:02<1:10:43,  8.60it/s]

Epoch: 0 Iteration: 610 Loss: 1.547 Validation Loss: 1.390 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 609/37094 [01:03<1:10:43,  8.60it/s]

Epoch: 0 Iteration: 610 Loss: 1.547 Validation Loss: 1.390 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 611/37094 [01:03<1:07:15,  9.04it/s]

Epoch: 0 Iteration: 611 Loss: 1.533 Validation Loss: 1.390 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 611/37094 [01:03<1:07:15,  9.04it/s]

Epoch: 0 Iteration: 612 Loss: 1.472 Validation Loss: 1.390 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 611/37094 [01:03<1:07:15,  9.04it/s]

Epoch: 0 Iteration: 612 Loss: 1.472 Validation Loss: 1.390 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 613/37094 [01:03<1:04:49,  9.38it/s]

Epoch: 0 Iteration: 613 Loss: 1.485 Validation Loss: 1.390 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 613/37094 [01:03<1:04:49,  9.38it/s]

Epoch: 0 Iteration: 614 Loss: 1.490 Validation Loss: 1.390 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 613/37094 [01:03<1:04:49,  9.38it/s]

Epoch: 0 Iteration: 614 Loss: 1.490 Validation Loss: 1.390 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 615/37094 [01:03<1:03:09,  9.63it/s]

Epoch: 0 Iteration: 615 Loss: 1.527 Validation Loss: 1.390 Accuracy: 0.119 Validation Accuracy: 0.098:   2%|▏         | 615/37094 [01:03<1:03:09,  9.63it/s]

Epoch: 0 Iteration: 616 Loss: 1.555 Validation Loss: 1.390 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 615/37094 [01:03<1:03:09,  9.63it/s]

Epoch: 0 Iteration: 616 Loss: 1.555 Validation Loss: 1.390 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 617/37094 [01:03<1:01:53,  9.82it/s]

Epoch: 0 Iteration: 617 Loss: 1.595 Validation Loss: 1.390 Accuracy: 0.116 Validation Accuracy: 0.098:   2%|▏         | 617/37094 [01:03<1:01:53,  9.82it/s]

Epoch: 0 Iteration: 618 Loss: 1.601 Validation Loss: 1.390 Accuracy: 0.116 Validation Accuracy: 0.098:   2%|▏         | 617/37094 [01:03<1:01:53,  9.82it/s]

Epoch: 0 Iteration: 618 Loss: 1.601 Validation Loss: 1.390 Accuracy: 0.116 Validation Accuracy: 0.098:   2%|▏         | 619/37094 [01:03<1:01:00,  9.96it/s]

Epoch: 0 Iteration: 619 Loss: 1.564 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 619/37094 [01:03<1:01:00,  9.96it/s]

Epoch: 0 Iteration: 620 Loss: 1.554 Validation Loss: 1.390 Accuracy: 0.116 Validation Accuracy: 0.098:   2%|▏         | 619/37094 [01:04<1:01:00,  9.96it/s]

Epoch: 0 Iteration: 620 Loss: 1.554 Validation Loss: 1.390 Accuracy: 0.116 Validation Accuracy: 0.098:   2%|▏         | 621/37094 [01:04<1:00:23, 10.06it/s]

Epoch: 0 Iteration: 621 Loss: 1.550 Validation Loss: 1.390 Accuracy: 0.113 Validation Accuracy: 0.098:   2%|▏         | 621/37094 [01:04<1:00:23, 10.06it/s]

Epoch: 0 Iteration: 622 Loss: 1.586 Validation Loss: 1.390 Accuracy: 0.119 Validation Accuracy: 0.098:   2%|▏         | 621/37094 [01:04<1:00:23, 10.06it/s]

Epoch: 0 Iteration: 622 Loss: 1.586 Validation Loss: 1.390 Accuracy: 0.119 Validation Accuracy: 0.098:   2%|▏         | 623/37094 [01:04<59:57, 10.14it/s]  

Epoch: 0 Iteration: 623 Loss: 1.628 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 623/37094 [01:04<59:57, 10.14it/s]

Epoch: 0 Iteration: 624 Loss: 1.592 Validation Loss: 1.390 Accuracy: 0.128 Validation Accuracy: 0.098:   2%|▏         | 623/37094 [01:04<59:57, 10.14it/s]

Epoch: 0 Iteration: 624 Loss: 1.592 Validation Loss: 1.390 Accuracy: 0.128 Validation Accuracy: 0.098:   2%|▏         | 625/37094 [01:04<59:38, 10.19it/s]

Epoch: 0 Iteration: 625 Loss: 1.580 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 625/37094 [01:04<59:38, 10.19it/s]

Epoch: 0 Iteration: 626 Loss: 1.598 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 625/37094 [01:04<59:38, 10.19it/s]

Epoch: 0 Iteration: 626 Loss: 1.598 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 627/37094 [01:04<59:26, 10.22it/s]

Epoch: 0 Iteration: 627 Loss: 1.605 Validation Loss: 1.390 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 627/37094 [01:04<59:26, 10.22it/s]

Epoch: 0 Iteration: 628 Loss: 1.629 Validation Loss: 1.390 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 627/37094 [01:04<59:26, 10.22it/s]

Epoch: 0 Iteration: 628 Loss: 1.629 Validation Loss: 1.390 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 629/37094 [01:04<59:16, 10.25it/s]

Epoch: 0 Iteration: 629 Loss: 1.631 Validation Loss: 1.390 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 629/37094 [01:04<59:16, 10.25it/s]

Epoch: 0 Iteration: 630 Loss: 1.630 Validation Loss: 1.390 Accuracy: 0.113 Validation Accuracy: 0.098:   2%|▏         | 629/37094 [01:04<59:16, 10.25it/s]

Epoch: 0 Iteration: 630 Loss: 1.630 Validation Loss: 1.390 Accuracy: 0.113 Validation Accuracy: 0.098:   2%|▏         | 631/37094 [01:04<59:10, 10.27it/s]

Epoch: 0 Iteration: 631 Loss: 1.656 Validation Loss: 1.390 Accuracy: 0.116 Validation Accuracy: 0.098:   2%|▏         | 631/37094 [01:05<59:10, 10.27it/s]

Epoch: 0 Iteration: 632 Loss: 1.664 Validation Loss: 1.390 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 631/37094 [01:05<59:10, 10.27it/s]

Epoch: 0 Iteration: 632 Loss: 1.664 Validation Loss: 1.390 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 633/37094 [01:05<59:05, 10.28it/s]

Epoch: 0 Iteration: 633 Loss: 1.668 Validation Loss: 1.390 Accuracy: 0.125 Validation Accuracy: 0.098:   2%|▏         | 633/37094 [01:05<59:05, 10.28it/s]

Epoch: 0 Iteration: 634 Loss: 1.662 Validation Loss: 1.390 Accuracy: 0.128 Validation Accuracy: 0.098:   2%|▏         | 633/37094 [01:05<59:05, 10.28it/s]

Epoch: 0 Iteration: 634 Loss: 1.662 Validation Loss: 1.390 Accuracy: 0.128 Validation Accuracy: 0.098:   2%|▏         | 635/37094 [01:05<59:01, 10.30it/s]

Epoch: 0 Iteration: 635 Loss: 1.655 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 635/37094 [01:05<59:01, 10.30it/s]

Epoch: 0 Iteration: 636 Loss: 1.662 Validation Loss: 1.390 Accuracy: 0.119 Validation Accuracy: 0.098:   2%|▏         | 635/37094 [01:05<59:01, 10.30it/s]

Epoch: 0 Iteration: 636 Loss: 1.662 Validation Loss: 1.390 Accuracy: 0.119 Validation Accuracy: 0.098:   2%|▏         | 637/37094 [01:05<58:59, 10.30it/s]

Epoch: 0 Iteration: 637 Loss: 1.664 Validation Loss: 1.390 Accuracy: 0.119 Validation Accuracy: 0.098:   2%|▏         | 637/37094 [01:05<58:59, 10.30it/s]

Epoch: 0 Iteration: 638 Loss: 1.658 Validation Loss: 1.390 Accuracy: 0.100 Validation Accuracy: 0.098:   2%|▏         | 637/37094 [01:05<58:59, 10.30it/s]

Epoch: 0 Iteration: 638 Loss: 1.658 Validation Loss: 1.390 Accuracy: 0.100 Validation Accuracy: 0.098:   2%|▏         | 639/37094 [01:05<58:58, 10.30it/s]

Epoch: 0 Iteration: 639 Loss: 1.669 Validation Loss: 1.390 Accuracy: 0.100 Validation Accuracy: 0.098:   2%|▏         | 639/37094 [01:05<58:58, 10.30it/s]

Epoch: 0 Iteration: 640 Loss: 1.674 Validation Loss: 1.390 Accuracy: 0.109 Validation Accuracy: 0.098:   2%|▏         | 639/37094 [01:05<58:58, 10.30it/s]

Epoch: 0 Iteration: 640 Loss: 1.674 Validation Loss: 1.390 Accuracy: 0.109 Validation Accuracy: 0.098:   2%|▏         | 641/37094 [01:05<58:57, 10.31it/s]

Epoch: 0 Iteration: 641 Loss: 1.662 Validation Loss: 1.390 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 641/37094 [01:06<58:57, 10.31it/s]

Epoch: 0 Iteration: 642 Loss: 1.694 Validation Loss: 1.390 Accuracy: 0.103 Validation Accuracy: 0.098:   2%|▏         | 641/37094 [01:06<58:57, 10.31it/s]

Epoch: 0 Iteration: 642 Loss: 1.694 Validation Loss: 1.390 Accuracy: 0.103 Validation Accuracy: 0.098:   2%|▏         | 643/37094 [01:06<58:55, 10.31it/s]

Epoch: 0 Iteration: 643 Loss: 1.640 Validation Loss: 1.390 Accuracy: 0.097 Validation Accuracy: 0.098:   2%|▏         | 643/37094 [01:06<58:55, 10.31it/s]

Epoch: 0 Iteration: 644 Loss: 1.679 Validation Loss: 1.390 Accuracy: 0.091 Validation Accuracy: 0.098:   2%|▏         | 643/37094 [01:06<58:55, 10.31it/s]

Epoch: 0 Iteration: 644 Loss: 1.679 Validation Loss: 1.390 Accuracy: 0.091 Validation Accuracy: 0.098:   2%|▏         | 645/37094 [01:06<58:55, 10.31it/s]

Epoch: 0 Iteration: 645 Loss: 1.686 Validation Loss: 1.390 Accuracy: 0.078 Validation Accuracy: 0.098:   2%|▏         | 645/37094 [01:06<58:55, 10.31it/s]

Epoch: 0 Iteration: 646 Loss: 1.649 Validation Loss: 1.390 Accuracy: 0.087 Validation Accuracy: 0.098:   2%|▏         | 645/37094 [01:06<58:55, 10.31it/s]

Epoch: 0 Iteration: 646 Loss: 1.649 Validation Loss: 1.390 Accuracy: 0.087 Validation Accuracy: 0.098:   2%|▏         | 647/37094 [01:06<58:55, 10.31it/s]

Epoch: 0 Iteration: 647 Loss: 1.639 Validation Loss: 1.390 Accuracy: 0.097 Validation Accuracy: 0.098:   2%|▏         | 647/37094 [01:06<58:55, 10.31it/s]

Epoch: 0 Iteration: 648 Loss: 1.617 Validation Loss: 1.390 Accuracy: 0.103 Validation Accuracy: 0.098:   2%|▏         | 647/37094 [01:06<58:55, 10.31it/s]

Epoch: 0 Iteration: 648 Loss: 1.617 Validation Loss: 1.390 Accuracy: 0.103 Validation Accuracy: 0.098:   2%|▏         | 649/37094 [01:06<58:57, 10.30it/s]

Epoch: 0 Iteration: 649 Loss: 1.583 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 649/37094 [01:06<58:57, 10.30it/s]

Epoch: 0 Iteration: 650 Loss: 1.607 Validation Loss: 1.390 Accuracy: 0.128 Validation Accuracy: 0.098:   2%|▏         | 649/37094 [01:06<58:57, 10.30it/s]

Epoch: 0 Iteration: 650 Loss: 1.607 Validation Loss: 1.390 Accuracy: 0.128 Validation Accuracy: 0.098:   2%|▏         | 651/37094 [01:06<58:56, 10.30it/s]

Epoch: 0 Iteration: 651 Loss: 1.596 Validation Loss: 1.390 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 651/37094 [01:07<58:56, 10.30it/s]

Epoch: 0 Iteration: 652 Loss: 1.592 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 651/37094 [01:07<58:56, 10.30it/s]

Epoch: 0 Iteration: 652 Loss: 1.592 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 653/37094 [01:07<58:55, 10.31it/s]

Epoch: 0 Iteration: 653 Loss: 1.562 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 653/37094 [01:07<58:55, 10.31it/s]

Epoch: 0 Iteration: 654 Loss: 1.594 Validation Loss: 1.390 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 653/37094 [01:07<58:55, 10.31it/s]

Epoch: 0 Iteration: 654 Loss: 1.594 Validation Loss: 1.390 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 655/37094 [01:07<58:55, 10.31it/s]

Epoch: 0 Iteration: 655 Loss: 1.618 Validation Loss: 1.390 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 655/37094 [01:07<58:55, 10.31it/s]

Epoch: 0 Iteration: 656 Loss: 1.633 Validation Loss: 1.390 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 655/37094 [01:07<58:55, 10.31it/s]

Epoch: 0 Iteration: 656 Loss: 1.633 Validation Loss: 1.390 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 657/37094 [01:07<58:54, 10.31it/s]

Epoch: 0 Iteration: 657 Loss: 1.633 Validation Loss: 1.390 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 657/37094 [01:07<58:54, 10.31it/s]

Epoch: 0 Iteration: 658 Loss: 1.618 Validation Loss: 1.390 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 657/37094 [01:07<58:54, 10.31it/s]

Epoch: 0 Iteration: 658 Loss: 1.618 Validation Loss: 1.390 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 659/37094 [01:07<58:54, 10.31it/s]

Epoch: 0 Iteration: 659 Loss: 1.600 Validation Loss: 1.390 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 659/37094 [01:07<58:54, 10.31it/s]

Epoch: 0 Iteration: 660 Loss: 1.591 Validation Loss: 1.390 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 659/37094 [01:07<58:54, 10.31it/s]

Epoch: 0 Iteration: 660 Loss: 1.591 Validation Loss: 1.390 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 661/37094 [01:07<58:53, 10.31it/s]

Epoch: 0 Iteration: 661 Loss: 1.612 Validation Loss: 1.390 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 661/37094 [01:07<58:53, 10.31it/s]

Epoch: 0 Iteration: 662 Loss: 1.557 Validation Loss: 1.390 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 661/37094 [01:08<58:53, 10.31it/s]

Epoch: 0 Iteration: 662 Loss: 1.557 Validation Loss: 1.390 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 663/37094 [01:08<58:52, 10.31it/s]

Epoch: 0 Iteration: 663 Loss: 1.574 Validation Loss: 1.390 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 663/37094 [01:08<58:52, 10.31it/s]

Epoch: 0 Iteration: 664 Loss: 1.548 Validation Loss: 1.390 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 663/37094 [01:08<58:52, 10.31it/s]

Epoch: 0 Iteration: 664 Loss: 1.548 Validation Loss: 1.390 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 665/37094 [01:08<58:51, 10.32it/s]

Epoch: 0 Iteration: 665 Loss: 1.562 Validation Loss: 1.390 Accuracy: 0.156 Validation Accuracy: 0.098:   2%|▏         | 665/37094 [01:08<58:51, 10.32it/s]

Epoch: 0 Iteration: 666 Loss: 1.557 Validation Loss: 1.390 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 665/37094 [01:08<58:51, 10.32it/s]

Epoch: 0 Iteration: 666 Loss: 1.557 Validation Loss: 1.390 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 667/37094 [01:08<58:53, 10.31it/s]

Epoch: 0 Iteration: 667 Loss: 1.514 Validation Loss: 1.390 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 667/37094 [01:08<58:53, 10.31it/s]

Epoch: 0 Iteration: 668 Loss: 1.530 Validation Loss: 1.390 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 667/37094 [01:08<58:53, 10.31it/s]

Epoch: 0 Iteration: 668 Loss: 1.530 Validation Loss: 1.390 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 669/37094 [01:08<58:53, 10.31it/s]

Epoch: 0 Iteration: 669 Loss: 1.544 Validation Loss: 1.390 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 669/37094 [01:08<58:53, 10.31it/s]

Epoch: 0 Iteration: 670 Loss: 1.520 Validation Loss: 1.390 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 669/37094 [01:08<58:53, 10.31it/s]

Epoch: 0 Iteration: 670 Loss: 1.520 Validation Loss: 1.390 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 671/37094 [01:08<58:52, 10.31it/s]

Epoch: 0 Iteration: 671 Loss: 1.529 Validation Loss: 1.390 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 671/37094 [01:08<58:52, 10.31it/s]

Epoch: 0 Iteration: 672 Loss: 1.539 Validation Loss: 1.390 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 671/37094 [01:09<58:52, 10.31it/s]

Epoch: 0 Iteration: 672 Loss: 1.539 Validation Loss: 1.390 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 673/37094 [01:09<58:53, 10.31it/s]

Epoch: 0 Iteration: 673 Loss: 1.512 Validation Loss: 1.390 Accuracy: 0.156 Validation Accuracy: 0.098:   2%|▏         | 673/37094 [01:09<58:53, 10.31it/s]

Epoch: 0 Iteration: 674 Loss: 1.448 Validation Loss: 1.390 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 673/37094 [01:09<58:53, 10.31it/s]

Epoch: 0 Iteration: 674 Loss: 1.448 Validation Loss: 1.390 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 675/37094 [01:09<58:51, 10.31it/s]

Epoch: 0 Iteration: 675 Loss: 1.415 Validation Loss: 1.390 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 675/37094 [01:09<58:51, 10.31it/s]

Epoch: 0 Iteration: 676 Loss: 1.389 Validation Loss: 1.390 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 675/37094 [01:09<58:51, 10.31it/s]

Epoch: 0 Iteration: 676 Loss: 1.389 Validation Loss: 1.390 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 677/37094 [01:09<58:51, 10.31it/s]

Epoch: 0 Iteration: 677 Loss: 1.357 Validation Loss: 1.390 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 677/37094 [01:09<58:51, 10.31it/s]

Epoch: 0 Iteration: 678 Loss: 1.358 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 677/37094 [01:09<58:51, 10.31it/s]

Epoch: 0 Iteration: 678 Loss: 1.358 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 679/37094 [01:09<58:51, 10.31it/s]

Epoch: 0 Iteration: 679 Loss: 1.368 Validation Loss: 1.390 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 679/37094 [01:09<58:51, 10.31it/s]

Epoch: 0 Iteration: 680 Loss: 1.369 Validation Loss: 1.390 Accuracy: 0.119 Validation Accuracy: 0.098:   2%|▏         | 679/37094 [01:09<58:51, 10.31it/s]

Epoch: 0 Iteration: 680 Loss: 1.369 Validation Loss: 1.390 Accuracy: 0.119 Validation Accuracy: 0.098:   2%|▏         | 681/37094 [01:09<58:50, 10.31it/s]

Epoch: 0 Iteration: 681 Loss: 1.425 Validation Loss: 1.390 Accuracy: 0.116 Validation Accuracy: 0.098:   2%|▏         | 681/37094 [01:09<58:50, 10.31it/s]

Epoch: 0 Iteration: 682 Loss: 1.429 Validation Loss: 1.390 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 681/37094 [01:10<58:50, 10.31it/s]

Epoch: 0 Iteration: 682 Loss: 1.429 Validation Loss: 1.390 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 683/37094 [01:10<58:51, 10.31it/s]

Epoch: 0 Iteration: 683 Loss: 1.461 Validation Loss: 1.390 Accuracy: 0.100 Validation Accuracy: 0.098:   2%|▏         | 683/37094 [01:10<58:51, 10.31it/s]

Epoch: 0 Iteration: 684 Loss: 1.468 Validation Loss: 1.390 Accuracy: 0.097 Validation Accuracy: 0.098:   2%|▏         | 683/37094 [01:10<58:51, 10.31it/s]

Epoch: 0 Iteration: 684 Loss: 1.468 Validation Loss: 1.390 Accuracy: 0.097 Validation Accuracy: 0.098:   2%|▏         | 685/37094 [01:10<58:50, 10.31it/s]

Epoch: 0 Iteration: 685 Loss: 1.440 Validation Loss: 1.390 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 685/37094 [01:10<58:50, 10.31it/s]

Epoch: 0 Iteration: 686 Loss: 1.431 Validation Loss: 1.390 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 685/37094 [01:10<58:50, 10.31it/s]

Epoch: 0 Iteration: 686 Loss: 1.431 Validation Loss: 1.390 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 687/37094 [01:10<58:50, 10.31it/s]

Epoch: 0 Iteration: 687 Loss: 1.462 Validation Loss: 1.390 Accuracy: 0.103 Validation Accuracy: 0.098:   2%|▏         | 687/37094 [01:10<58:50, 10.31it/s]

Epoch: 0 Iteration: 688 Loss: 1.419 Validation Loss: 1.390 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 687/37094 [01:10<58:50, 10.31it/s]

Epoch: 0 Iteration: 688 Loss: 1.419 Validation Loss: 1.390 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 689/37094 [01:10<58:51, 10.31it/s]

Epoch: 0 Iteration: 689 Loss: 1.442 Validation Loss: 1.390 Accuracy: 0.097 Validation Accuracy: 0.098:   2%|▏         | 689/37094 [01:10<58:51, 10.31it/s]

Epoch: 0 Iteration: 690 Loss: 1.441 Validation Loss: 1.390 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 689/37094 [01:10<58:51, 10.31it/s]

Epoch: 0 Iteration: 690 Loss: 1.441 Validation Loss: 1.390 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 691/37094 [01:10<58:51, 10.31it/s]

Epoch: 0 Iteration: 691 Loss: 1.422 Validation Loss: 1.390 Accuracy: 0.103 Validation Accuracy: 0.098:   2%|▏         | 691/37094 [01:10<58:51, 10.31it/s]

Epoch: 0 Iteration: 692 Loss: 1.428 Validation Loss: 1.390 Accuracy: 0.100 Validation Accuracy: 0.098:   2%|▏         | 691/37094 [01:10<58:51, 10.31it/s]

Epoch: 0 Iteration: 692 Loss: 1.428 Validation Loss: 1.390 Accuracy: 0.100 Validation Accuracy: 0.098:   2%|▏         | 693/37094 [01:10<58:51, 10.31it/s]

Epoch: 0 Iteration: 693 Loss: 1.453 Validation Loss: 1.390 Accuracy: 0.100 Validation Accuracy: 0.098:   2%|▏         | 693/37094 [01:11<58:51, 10.31it/s]

Epoch: 0 Iteration: 694 Loss: 1.476 Validation Loss: 1.390 Accuracy: 0.116 Validation Accuracy: 0.098:   2%|▏         | 693/37094 [01:11<58:51, 10.31it/s]

Epoch: 0 Iteration: 694 Loss: 1.476 Validation Loss: 1.390 Accuracy: 0.116 Validation Accuracy: 0.098:   2%|▏         | 695/37094 [01:11<58:50, 10.31it/s]

Epoch: 0 Iteration: 695 Loss: 1.500 Validation Loss: 1.390 Accuracy: 0.100 Validation Accuracy: 0.098:   2%|▏         | 695/37094 [01:11<58:50, 10.31it/s]

Epoch: 0 Iteration: 696 Loss: 1.468 Validation Loss: 1.390 Accuracy: 0.113 Validation Accuracy: 0.098:   2%|▏         | 695/37094 [01:11<58:50, 10.31it/s]

Epoch: 0 Iteration: 696 Loss: 1.468 Validation Loss: 1.390 Accuracy: 0.113 Validation Accuracy: 0.098:   2%|▏         | 697/37094 [01:11<58:50, 10.31it/s]

Epoch: 0 Iteration: 697 Loss: 1.569 Validation Loss: 1.390 Accuracy: 0.116 Validation Accuracy: 0.098:   2%|▏         | 697/37094 [01:11<58:50, 10.31it/s]

Epoch: 0 Iteration: 698 Loss: 1.564 Validation Loss: 1.390 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 697/37094 [01:11<58:50, 10.31it/s]

Epoch: 0 Iteration: 698 Loss: 1.564 Validation Loss: 1.390 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 699/37094 [01:11<58:51, 10.31it/s]

Epoch: 0 Iteration: 699 Loss: 1.587 Validation Loss: 1.390 Accuracy: 0.156 Validation Accuracy: 0.098:   2%|▏         | 699/37094 [01:11<58:51, 10.31it/s]

Epoch: 0 Iteration: 700 Loss: 1.623 Validation Loss: 1.307 Accuracy: 0.163 Validation Accuracy: 0.098:   2%|▏         | 699/37094 [01:12<58:51, 10.31it/s]

Epoch: 0 Iteration: 700 Loss: 1.623 Validation Loss: 1.307 Accuracy: 0.163 Validation Accuracy: 0.098:   2%|▏         | 701/37094 [01:12<1:48:03,  5.61it/s]

Epoch: 0 Iteration: 701 Loss: 1.577 Validation Loss: 1.307 Accuracy: 0.175 Validation Accuracy: 0.098:   2%|▏         | 701/37094 [01:12<1:48:03,  5.61it/s]

Epoch: 0 Iteration: 702 Loss: 1.569 Validation Loss: 1.307 Accuracy: 0.175 Validation Accuracy: 0.098:   2%|▏         | 701/37094 [01:12<1:48:03,  5.61it/s]

Epoch: 0 Iteration: 702 Loss: 1.569 Validation Loss: 1.307 Accuracy: 0.175 Validation Accuracy: 0.098:   2%|▏         | 703/37094 [01:12<1:32:26,  6.56it/s]

Epoch: 0 Iteration: 703 Loss: 1.552 Validation Loss: 1.307 Accuracy: 0.175 Validation Accuracy: 0.098:   2%|▏         | 703/37094 [01:12<1:32:26,  6.56it/s]

Epoch: 0 Iteration: 704 Loss: 1.509 Validation Loss: 1.307 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 703/37094 [01:12<1:32:26,  6.56it/s]

Epoch: 0 Iteration: 704 Loss: 1.509 Validation Loss: 1.307 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 705/37094 [01:12<1:22:24,  7.36it/s]

Epoch: 0 Iteration: 705 Loss: 1.529 Validation Loss: 1.307 Accuracy: 0.169 Validation Accuracy: 0.098:   2%|▏         | 705/37094 [01:12<1:22:24,  7.36it/s]

Epoch: 0 Iteration: 706 Loss: 1.543 Validation Loss: 1.307 Accuracy: 0.156 Validation Accuracy: 0.098:   2%|▏         | 705/37094 [01:12<1:22:24,  7.36it/s]

Epoch: 0 Iteration: 706 Loss: 1.543 Validation Loss: 1.307 Accuracy: 0.156 Validation Accuracy: 0.098:   2%|▏         | 707/37094 [01:12<1:15:21,  8.05it/s]

Epoch: 0 Iteration: 707 Loss: 1.582 Validation Loss: 1.307 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 707/37094 [01:12<1:15:21,  8.05it/s]

Epoch: 0 Iteration: 708 Loss: 1.587 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 707/37094 [01:13<1:15:21,  8.05it/s]

Epoch: 0 Iteration: 708 Loss: 1.587 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 709/37094 [01:13<1:10:25,  8.61it/s]

Epoch: 0 Iteration: 709 Loss: 1.611 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 709/37094 [01:13<1:10:25,  8.61it/s]

Epoch: 0 Iteration: 710 Loss: 1.603 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 709/37094 [01:13<1:10:25,  8.61it/s]

Epoch: 0 Iteration: 710 Loss: 1.603 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 711/37094 [01:13<1:06:58,  9.05it/s]

Epoch: 0 Iteration: 711 Loss: 1.583 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 711/37094 [01:13<1:06:58,  9.05it/s]

Epoch: 0 Iteration: 712 Loss: 1.540 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 711/37094 [01:13<1:06:58,  9.05it/s]

Epoch: 0 Iteration: 712 Loss: 1.540 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 713/37094 [01:13<1:04:31,  9.40it/s]

Epoch: 0 Iteration: 713 Loss: 1.514 Validation Loss: 1.307 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 713/37094 [01:13<1:04:31,  9.40it/s]

Epoch: 0 Iteration: 714 Loss: 1.558 Validation Loss: 1.307 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 713/37094 [01:13<1:04:31,  9.40it/s]

Epoch: 0 Iteration: 714 Loss: 1.558 Validation Loss: 1.307 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 715/37094 [01:13<1:02:49,  9.65it/s]

Epoch: 0 Iteration: 715 Loss: 1.524 Validation Loss: 1.307 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 715/37094 [01:13<1:02:49,  9.65it/s]

Epoch: 0 Iteration: 716 Loss: 1.532 Validation Loss: 1.307 Accuracy: 0.163 Validation Accuracy: 0.098:   2%|▏         | 715/37094 [01:13<1:02:49,  9.65it/s]

Epoch: 0 Iteration: 716 Loss: 1.532 Validation Loss: 1.307 Accuracy: 0.163 Validation Accuracy: 0.098:   2%|▏         | 717/37094 [01:13<1:01:36,  9.84it/s]

Epoch: 0 Iteration: 717 Loss: 1.434 Validation Loss: 1.307 Accuracy: 0.166 Validation Accuracy: 0.098:   2%|▏         | 717/37094 [01:13<1:01:36,  9.84it/s]

Epoch: 0 Iteration: 718 Loss: 1.457 Validation Loss: 1.307 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 717/37094 [01:14<1:01:36,  9.84it/s]

Epoch: 0 Iteration: 718 Loss: 1.457 Validation Loss: 1.307 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 719/37094 [01:14<1:00:45,  9.98it/s]

Epoch: 0 Iteration: 719 Loss: 1.449 Validation Loss: 1.307 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 719/37094 [01:14<1:00:45,  9.98it/s]

Epoch: 0 Iteration: 720 Loss: 1.422 Validation Loss: 1.307 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 719/37094 [01:14<1:00:45,  9.98it/s]

Epoch: 0 Iteration: 720 Loss: 1.422 Validation Loss: 1.307 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 721/37094 [01:14<1:00:10, 10.07it/s]

Epoch: 0 Iteration: 721 Loss: 1.465 Validation Loss: 1.307 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 721/37094 [01:14<1:00:10, 10.07it/s]

Epoch: 0 Iteration: 722 Loss: 1.471 Validation Loss: 1.307 Accuracy: 0.128 Validation Accuracy: 0.098:   2%|▏         | 721/37094 [01:14<1:00:10, 10.07it/s]

Epoch: 0 Iteration: 722 Loss: 1.471 Validation Loss: 1.307 Accuracy: 0.128 Validation Accuracy: 0.098:   2%|▏         | 723/37094 [01:14<59:47, 10.14it/s]  

Epoch: 0 Iteration: 723 Loss: 1.482 Validation Loss: 1.307 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 723/37094 [01:14<59:47, 10.14it/s]

Epoch: 0 Iteration: 724 Loss: 1.502 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 723/37094 [01:14<59:47, 10.14it/s]

Epoch: 0 Iteration: 724 Loss: 1.502 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 725/37094 [01:14<59:30, 10.19it/s]

Epoch: 0 Iteration: 725 Loss: 1.486 Validation Loss: 1.307 Accuracy: 0.119 Validation Accuracy: 0.098:   2%|▏         | 725/37094 [01:14<59:30, 10.19it/s]

Epoch: 0 Iteration: 726 Loss: 1.497 Validation Loss: 1.307 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 725/37094 [01:14<59:30, 10.19it/s]

Epoch: 0 Iteration: 726 Loss: 1.497 Validation Loss: 1.307 Accuracy: 0.106 Validation Accuracy: 0.098:   2%|▏         | 727/37094 [01:14<59:17, 10.22it/s]

Epoch: 0 Iteration: 727 Loss: 1.449 Validation Loss: 1.307 Accuracy: 0.113 Validation Accuracy: 0.098:   2%|▏         | 727/37094 [01:14<59:17, 10.22it/s]

Epoch: 0 Iteration: 728 Loss: 1.456 Validation Loss: 1.307 Accuracy: 0.125 Validation Accuracy: 0.098:   2%|▏         | 727/37094 [01:15<59:17, 10.22it/s]

Epoch: 0 Iteration: 728 Loss: 1.456 Validation Loss: 1.307 Accuracy: 0.125 Validation Accuracy: 0.098:   2%|▏         | 729/37094 [01:15<59:07, 10.25it/s]

Epoch: 0 Iteration: 729 Loss: 1.411 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 729/37094 [01:15<59:07, 10.25it/s]

Epoch: 0 Iteration: 730 Loss: 1.434 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 729/37094 [01:15<59:07, 10.25it/s]

Epoch: 0 Iteration: 730 Loss: 1.434 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 731/37094 [01:15<59:02, 10.26it/s]

Epoch: 0 Iteration: 731 Loss: 1.435 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 731/37094 [01:15<59:02, 10.26it/s]

Epoch: 0 Iteration: 732 Loss: 1.470 Validation Loss: 1.307 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 731/37094 [01:15<59:02, 10.26it/s]

Epoch: 0 Iteration: 732 Loss: 1.470 Validation Loss: 1.307 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 733/37094 [01:15<58:56, 10.28it/s]

Epoch: 0 Iteration: 733 Loss: 1.492 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 733/37094 [01:15<58:56, 10.28it/s]

Epoch: 0 Iteration: 734 Loss: 1.480 Validation Loss: 1.307 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 733/37094 [01:15<58:56, 10.28it/s]

Epoch: 0 Iteration: 734 Loss: 1.480 Validation Loss: 1.307 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 735/37094 [01:15<58:53, 10.29it/s]

Epoch: 0 Iteration: 735 Loss: 1.506 Validation Loss: 1.307 Accuracy: 0.172 Validation Accuracy: 0.098:   2%|▏         | 735/37094 [01:15<58:53, 10.29it/s]

Epoch: 0 Iteration: 736 Loss: 1.494 Validation Loss: 1.307 Accuracy: 0.200 Validation Accuracy: 0.098:   2%|▏         | 735/37094 [01:15<58:53, 10.29it/s]

Epoch: 0 Iteration: 736 Loss: 1.494 Validation Loss: 1.307 Accuracy: 0.200 Validation Accuracy: 0.098:   2%|▏         | 737/37094 [01:15<58:51, 10.30it/s]

Epoch: 0 Iteration: 737 Loss: 1.549 Validation Loss: 1.307 Accuracy: 0.209 Validation Accuracy: 0.098:   2%|▏         | 737/37094 [01:15<58:51, 10.30it/s]

Epoch: 0 Iteration: 738 Loss: 1.538 Validation Loss: 1.307 Accuracy: 0.212 Validation Accuracy: 0.098:   2%|▏         | 737/37094 [01:15<58:51, 10.30it/s]

Epoch: 0 Iteration: 738 Loss: 1.538 Validation Loss: 1.307 Accuracy: 0.212 Validation Accuracy: 0.098:   2%|▏         | 739/37094 [01:15<58:49, 10.30it/s]

Epoch: 0 Iteration: 739 Loss: 1.539 Validation Loss: 1.307 Accuracy: 0.206 Validation Accuracy: 0.098:   2%|▏         | 739/37094 [01:16<58:49, 10.30it/s]

Epoch: 0 Iteration: 740 Loss: 1.574 Validation Loss: 1.307 Accuracy: 0.206 Validation Accuracy: 0.098:   2%|▏         | 739/37094 [01:16<58:49, 10.30it/s]

Epoch: 0 Iteration: 740 Loss: 1.574 Validation Loss: 1.307 Accuracy: 0.206 Validation Accuracy: 0.098:   2%|▏         | 741/37094 [01:16<58:49, 10.30it/s]

Epoch: 0 Iteration: 741 Loss: 1.471 Validation Loss: 1.307 Accuracy: 0.216 Validation Accuracy: 0.098:   2%|▏         | 741/37094 [01:16<58:49, 10.30it/s]

Epoch: 0 Iteration: 742 Loss: 1.439 Validation Loss: 1.307 Accuracy: 0.234 Validation Accuracy: 0.098:   2%|▏         | 741/37094 [01:16<58:49, 10.30it/s]

Epoch: 0 Iteration: 742 Loss: 1.439 Validation Loss: 1.307 Accuracy: 0.234 Validation Accuracy: 0.098:   2%|▏         | 743/37094 [01:16<58:47, 10.31it/s]

Epoch: 0 Iteration: 743 Loss: 1.428 Validation Loss: 1.307 Accuracy: 0.234 Validation Accuracy: 0.098:   2%|▏         | 743/37094 [01:16<58:47, 10.31it/s]

Epoch: 0 Iteration: 744 Loss: 1.439 Validation Loss: 1.307 Accuracy: 0.212 Validation Accuracy: 0.098:   2%|▏         | 743/37094 [01:16<58:47, 10.31it/s]

Epoch: 0 Iteration: 744 Loss: 1.439 Validation Loss: 1.307 Accuracy: 0.212 Validation Accuracy: 0.098:   2%|▏         | 745/37094 [01:16<58:46, 10.31it/s]

Epoch: 0 Iteration: 745 Loss: 1.447 Validation Loss: 1.307 Accuracy: 0.200 Validation Accuracy: 0.098:   2%|▏         | 745/37094 [01:16<58:46, 10.31it/s]

Epoch: 0 Iteration: 746 Loss: 1.435 Validation Loss: 1.307 Accuracy: 0.166 Validation Accuracy: 0.098:   2%|▏         | 745/37094 [01:16<58:46, 10.31it/s]

Epoch: 0 Iteration: 746 Loss: 1.435 Validation Loss: 1.307 Accuracy: 0.166 Validation Accuracy: 0.098:   2%|▏         | 747/37094 [01:16<58:48, 10.30it/s]

Epoch: 0 Iteration: 747 Loss: 1.439 Validation Loss: 1.307 Accuracy: 0.163 Validation Accuracy: 0.098:   2%|▏         | 747/37094 [01:16<58:48, 10.30it/s]

Epoch: 0 Iteration: 748 Loss: 1.490 Validation Loss: 1.307 Accuracy: 0.156 Validation Accuracy: 0.098:   2%|▏         | 747/37094 [01:16<58:48, 10.30it/s]

Epoch: 0 Iteration: 748 Loss: 1.490 Validation Loss: 1.307 Accuracy: 0.156 Validation Accuracy: 0.098:   2%|▏         | 749/37094 [01:16<58:48, 10.30it/s]

Epoch: 0 Iteration: 749 Loss: 1.496 Validation Loss: 1.307 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 749/37094 [01:17<58:48, 10.30it/s]

Epoch: 0 Iteration: 750 Loss: 1.483 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 749/37094 [01:17<58:48, 10.30it/s]

Epoch: 0 Iteration: 750 Loss: 1.483 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 751/37094 [01:17<58:49, 10.30it/s]

Epoch: 0 Iteration: 751 Loss: 1.531 Validation Loss: 1.307 Accuracy: 0.131 Validation Accuracy: 0.098:   2%|▏         | 751/37094 [01:17<58:49, 10.30it/s]

Epoch: 0 Iteration: 752 Loss: 1.556 Validation Loss: 1.307 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 751/37094 [01:17<58:49, 10.30it/s]

Epoch: 0 Iteration: 752 Loss: 1.556 Validation Loss: 1.307 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 753/37094 [01:17<58:49, 10.30it/s]

Epoch: 0 Iteration: 753 Loss: 1.581 Validation Loss: 1.307 Accuracy: 0.109 Validation Accuracy: 0.098:   2%|▏         | 753/37094 [01:17<58:49, 10.30it/s]

Epoch: 0 Iteration: 754 Loss: 1.547 Validation Loss: 1.307 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 753/37094 [01:17<58:49, 10.30it/s]

Epoch: 0 Iteration: 754 Loss: 1.547 Validation Loss: 1.307 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 755/37094 [01:17<58:49, 10.29it/s]

Epoch: 0 Iteration: 755 Loss: 1.564 Validation Loss: 1.307 Accuracy: 0.122 Validation Accuracy: 0.098:   2%|▏         | 755/37094 [01:17<58:49, 10.29it/s]

Epoch: 0 Iteration: 756 Loss: 1.580 Validation Loss: 1.307 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 755/37094 [01:17<58:49, 10.29it/s]

Epoch: 0 Iteration: 756 Loss: 1.580 Validation Loss: 1.307 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 757/37094 [01:17<58:49, 10.29it/s]

Epoch: 0 Iteration: 757 Loss: 1.554 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 757/37094 [01:17<58:49, 10.29it/s]

Epoch: 0 Iteration: 758 Loss: 1.538 Validation Loss: 1.307 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 757/37094 [01:17<58:49, 10.29it/s]

Epoch: 0 Iteration: 758 Loss: 1.538 Validation Loss: 1.307 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 759/37094 [01:17<58:48, 10.30it/s]

Epoch: 0 Iteration: 759 Loss: 1.530 Validation Loss: 1.307 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 759/37094 [01:18<58:48, 10.30it/s]

Epoch: 0 Iteration: 760 Loss: 1.500 Validation Loss: 1.307 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 759/37094 [01:18<58:48, 10.30it/s]

Epoch: 0 Iteration: 760 Loss: 1.500 Validation Loss: 1.307 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 761/37094 [01:18<58:47, 10.30it/s]

Epoch: 0 Iteration: 761 Loss: 1.526 Validation Loss: 1.307 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 761/37094 [01:18<58:47, 10.30it/s]

Epoch: 0 Iteration: 762 Loss: 1.541 Validation Loss: 1.307 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 761/37094 [01:18<58:47, 10.30it/s]

Epoch: 0 Iteration: 762 Loss: 1.541 Validation Loss: 1.307 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 763/37094 [01:18<58:47, 10.30it/s]

Epoch: 0 Iteration: 763 Loss: 1.533 Validation Loss: 1.307 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 763/37094 [01:18<58:47, 10.30it/s]

Epoch: 0 Iteration: 764 Loss: 1.558 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 763/37094 [01:18<58:47, 10.30it/s]

Epoch: 0 Iteration: 764 Loss: 1.558 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 765/37094 [01:18<58:45, 10.30it/s]

Epoch: 0 Iteration: 765 Loss: 1.571 Validation Loss: 1.307 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 765/37094 [01:18<58:45, 10.30it/s]

Epoch: 0 Iteration: 766 Loss: 1.585 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 765/37094 [01:18<58:45, 10.30it/s]

Epoch: 0 Iteration: 766 Loss: 1.585 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 767/37094 [01:18<58:49, 10.29it/s]

Epoch: 0 Iteration: 767 Loss: 1.571 Validation Loss: 1.307 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 767/37094 [01:18<58:49, 10.29it/s]

Epoch: 0 Iteration: 768 Loss: 1.506 Validation Loss: 1.307 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 767/37094 [01:18<58:49, 10.29it/s]

Epoch: 0 Iteration: 768 Loss: 1.506 Validation Loss: 1.307 Accuracy: 0.144 Validation Accuracy: 0.098:   2%|▏         | 769/37094 [01:18<58:47, 10.30it/s]

Epoch: 0 Iteration: 769 Loss: 1.505 Validation Loss: 1.307 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 769/37094 [01:18<58:47, 10.30it/s]

Epoch: 0 Iteration: 770 Loss: 1.499 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 769/37094 [01:19<58:47, 10.30it/s]

Epoch: 0 Iteration: 770 Loss: 1.499 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 771/37094 [01:19<58:48, 10.29it/s]

Epoch: 0 Iteration: 771 Loss: 1.488 Validation Loss: 1.307 Accuracy: 0.138 Validation Accuracy: 0.098:   2%|▏         | 771/37094 [01:19<58:48, 10.29it/s]

Epoch: 0 Iteration: 772 Loss: 1.475 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 771/37094 [01:19<58:48, 10.29it/s]

Epoch: 0 Iteration: 772 Loss: 1.475 Validation Loss: 1.307 Accuracy: 0.134 Validation Accuracy: 0.098:   2%|▏         | 773/37094 [01:19<58:48, 10.29it/s]

Epoch: 0 Iteration: 773 Loss: 1.434 Validation Loss: 1.307 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 773/37094 [01:19<58:48, 10.29it/s]

Epoch: 0 Iteration: 774 Loss: 1.458 Validation Loss: 1.307 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 773/37094 [01:19<58:48, 10.29it/s]

Epoch: 0 Iteration: 774 Loss: 1.458 Validation Loss: 1.307 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 775/37094 [01:19<58:46, 10.30it/s]

Epoch: 0 Iteration: 775 Loss: 1.475 Validation Loss: 1.307 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 775/37094 [01:19<58:46, 10.30it/s]

Epoch: 0 Iteration: 776 Loss: 1.476 Validation Loss: 1.307 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 775/37094 [01:19<58:46, 10.30it/s]

Epoch: 0 Iteration: 776 Loss: 1.476 Validation Loss: 1.307 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 777/37094 [01:19<58:45, 10.30it/s]

Epoch: 0 Iteration: 777 Loss: 1.445 Validation Loss: 1.307 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 777/37094 [01:19<58:45, 10.30it/s]

Epoch: 0 Iteration: 778 Loss: 1.455 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 777/37094 [01:19<58:45, 10.30it/s]

Epoch: 0 Iteration: 778 Loss: 1.455 Validation Loss: 1.307 Accuracy: 0.141 Validation Accuracy: 0.098:   2%|▏         | 779/37094 [01:19<58:45, 10.30it/s]

Epoch: 0 Iteration: 779 Loss: 1.448 Validation Loss: 1.307 Accuracy: 0.147 Validation Accuracy: 0.098:   2%|▏         | 779/37094 [01:19<58:45, 10.30it/s]

Epoch: 0 Iteration: 780 Loss: 1.445 Validation Loss: 1.307 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 779/37094 [01:20<58:45, 10.30it/s]

Epoch: 0 Iteration: 780 Loss: 1.445 Validation Loss: 1.307 Accuracy: 0.150 Validation Accuracy: 0.098:   2%|▏         | 781/37094 [01:20<58:43, 10.31it/s]

Epoch: 0 Iteration: 781 Loss: 1.446 Validation Loss: 1.307 Accuracy: 0.153 Validation Accuracy: 0.098:   2%|▏         | 781/37094 [01:20<58:43, 10.31it/s]

Epoch: 0 Iteration: 782 Loss: 1.453 Validation Loss: 1.307 Accuracy: 0.156 Validation Accuracy: 0.098:   2%|▏         | 781/37094 [01:20<58:43, 10.31it/s]

Epoch: 0 Iteration: 782 Loss: 1.453 Validation Loss: 1.307 Accuracy: 0.156 Validation Accuracy: 0.098:   2%|▏         | 783/37094 [01:20<58:43, 10.31it/s]

Epoch: 0 Iteration: 783 Loss: 1.439 Validation Loss: 1.307 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 783/37094 [01:20<58:43, 10.31it/s]

Epoch: 0 Iteration: 784 Loss: 1.426 Validation Loss: 1.307 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 783/37094 [01:20<58:43, 10.31it/s]

Epoch: 0 Iteration: 784 Loss: 1.426 Validation Loss: 1.307 Accuracy: 0.159 Validation Accuracy: 0.098:   2%|▏         | 785/37094 [01:20<58:42, 10.31it/s]

Epoch: 0 Iteration: 785 Loss: 1.457 Validation Loss: 1.307 Accuracy: 0.172 Validation Accuracy: 0.098:   2%|▏         | 785/37094 [01:20<58:42, 10.31it/s]

Epoch: 0 Iteration: 786 Loss: 1.450 Validation Loss: 1.307 Accuracy: 0.175 Validation Accuracy: 0.098:   2%|▏         | 785/37094 [01:20<58:42, 10.31it/s]

Epoch: 0 Iteration: 786 Loss: 1.450 Validation Loss: 1.307 Accuracy: 0.175 Validation Accuracy: 0.098:   2%|▏         | 787/37094 [01:20<58:41, 10.31it/s]

Epoch: 0 Iteration: 787 Loss: 1.442 Validation Loss: 1.307 Accuracy: 0.175 Validation Accuracy: 0.098:   2%|▏         | 787/37094 [01:20<58:41, 10.31it/s]

Epoch: 0 Iteration: 788 Loss: 1.467 Validation Loss: 1.307 Accuracy: 0.178 Validation Accuracy: 0.098:   2%|▏         | 787/37094 [01:20<58:41, 10.31it/s]

Epoch: 0 Iteration: 788 Loss: 1.467 Validation Loss: 1.307 Accuracy: 0.178 Validation Accuracy: 0.098:   2%|▏         | 789/37094 [01:20<58:42, 10.31it/s]

Epoch: 0 Iteration: 789 Loss: 1.481 Validation Loss: 1.307 Accuracy: 0.169 Validation Accuracy: 0.098:   2%|▏         | 789/37094 [01:20<58:42, 10.31it/s]

Epoch: 0 Iteration: 790 Loss: 1.514 Validation Loss: 1.307 Accuracy: 0.175 Validation Accuracy: 0.098:   2%|▏         | 789/37094 [01:21<58:42, 10.31it/s]

Epoch: 0 Iteration: 790 Loss: 1.514 Validation Loss: 1.307 Accuracy: 0.175 Validation Accuracy: 0.098:   2%|▏         | 791/37094 [01:21<58:44, 10.30it/s]

Epoch: 0 Iteration: 791 Loss: 1.476 Validation Loss: 1.307 Accuracy: 0.184 Validation Accuracy: 0.098:   2%|▏         | 791/37094 [01:21<58:44, 10.30it/s]

Epoch: 0 Iteration: 792 Loss: 1.474 Validation Loss: 1.307 Accuracy: 0.184 Validation Accuracy: 0.098:   2%|▏         | 791/37094 [01:21<58:44, 10.30it/s]

Epoch: 0 Iteration: 792 Loss: 1.474 Validation Loss: 1.307 Accuracy: 0.184 Validation Accuracy: 0.098:   2%|▏         | 793/37094 [01:21<58:44, 10.30it/s]

Epoch: 0 Iteration: 793 Loss: 1.484 Validation Loss: 1.307 Accuracy: 0.188 Validation Accuracy: 0.098:   2%|▏         | 793/37094 [01:21<58:44, 10.30it/s]

Epoch: 0 Iteration: 794 Loss: 1.470 Validation Loss: 1.307 Accuracy: 0.191 Validation Accuracy: 0.098:   2%|▏         | 793/37094 [01:21<58:44, 10.30it/s]

Epoch: 0 Iteration: 794 Loss: 1.470 Validation Loss: 1.307 Accuracy: 0.191 Validation Accuracy: 0.098:   2%|▏         | 795/37094 [01:21<58:44, 10.30it/s]

Epoch: 0 Iteration: 795 Loss: 1.395 Validation Loss: 1.307 Accuracy: 0.197 Validation Accuracy: 0.098:   2%|▏         | 795/37094 [01:21<58:44, 10.30it/s]

Epoch: 0 Iteration: 796 Loss: 1.389 Validation Loss: 1.307 Accuracy: 0.191 Validation Accuracy: 0.098:   2%|▏         | 795/37094 [01:21<58:44, 10.30it/s]

Epoch: 0 Iteration: 796 Loss: 1.389 Validation Loss: 1.307 Accuracy: 0.191 Validation Accuracy: 0.098:   2%|▏         | 797/37094 [01:21<58:41, 10.31it/s]

Epoch: 0 Iteration: 797 Loss: 1.409 Validation Loss: 1.307 Accuracy: 0.188 Validation Accuracy: 0.098:   2%|▏         | 797/37094 [01:21<58:41, 10.31it/s]

Epoch: 0 Iteration: 798 Loss: 1.434 Validation Loss: 1.307 Accuracy: 0.191 Validation Accuracy: 0.098:   2%|▏         | 797/37094 [01:21<58:41, 10.31it/s]

Epoch: 0 Iteration: 798 Loss: 1.434 Validation Loss: 1.307 Accuracy: 0.191 Validation Accuracy: 0.098:   2%|▏         | 799/37094 [01:21<58:42, 10.31it/s]

Epoch: 0 Iteration: 799 Loss: 1.465 Validation Loss: 1.307 Accuracy: 0.191 Validation Accuracy: 0.098:   2%|▏         | 799/37094 [01:21<58:42, 10.31it/s]

Epoch: 0 Iteration: 800 Loss: 1.425 Validation Loss: 1.221 Accuracy: 0.184 Validation Accuracy: 0.099:   2%|▏         | 799/37094 [01:22<58:42, 10.31it/s]

Epoch: 0 Iteration: 800 Loss: 1.425 Validation Loss: 1.221 Accuracy: 0.184 Validation Accuracy: 0.099:   2%|▏         | 801/37094 [01:22<1:48:04,  5.60it/s]

Epoch: 0 Iteration: 801 Loss: 1.426 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 801/37094 [01:22<1:48:04,  5.60it/s]

Epoch: 0 Iteration: 802 Loss: 1.427 Validation Loss: 1.221 Accuracy: 0.172 Validation Accuracy: 0.099:   2%|▏         | 801/37094 [01:22<1:48:04,  5.60it/s]

Epoch: 0 Iteration: 802 Loss: 1.427 Validation Loss: 1.221 Accuracy: 0.172 Validation Accuracy: 0.099:   2%|▏         | 803/37094 [01:22<1:32:29,  6.54it/s]

Epoch: 0 Iteration: 803 Loss: 1.428 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 803/37094 [01:22<1:32:29,  6.54it/s]

Epoch: 0 Iteration: 804 Loss: 1.391 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 803/37094 [01:22<1:32:29,  6.54it/s]

Epoch: 0 Iteration: 804 Loss: 1.391 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 805/37094 [01:22<1:22:20,  7.34it/s]

Epoch: 0 Iteration: 805 Loss: 1.336 Validation Loss: 1.221 Accuracy: 0.141 Validation Accuracy: 0.099:   2%|▏         | 805/37094 [01:23<1:22:20,  7.34it/s]

Epoch: 0 Iteration: 806 Loss: 1.320 Validation Loss: 1.221 Accuracy: 0.144 Validation Accuracy: 0.099:   2%|▏         | 805/37094 [01:23<1:22:20,  7.34it/s]

Epoch: 0 Iteration: 806 Loss: 1.320 Validation Loss: 1.221 Accuracy: 0.144 Validation Accuracy: 0.099:   2%|▏         | 807/37094 [01:23<1:15:16,  8.03it/s]

Epoch: 0 Iteration: 807 Loss: 1.355 Validation Loss: 1.221 Accuracy: 0.141 Validation Accuracy: 0.099:   2%|▏         | 807/37094 [01:23<1:15:16,  8.03it/s]

Epoch: 0 Iteration: 808 Loss: 1.369 Validation Loss: 1.221 Accuracy: 0.138 Validation Accuracy: 0.099:   2%|▏         | 807/37094 [01:23<1:15:16,  8.03it/s]

Epoch: 0 Iteration: 808 Loss: 1.369 Validation Loss: 1.221 Accuracy: 0.138 Validation Accuracy: 0.099:   2%|▏         | 809/37094 [01:23<1:10:18,  8.60it/s]

Epoch: 0 Iteration: 809 Loss: 1.368 Validation Loss: 1.221 Accuracy: 0.144 Validation Accuracy: 0.099:   2%|▏         | 809/37094 [01:23<1:10:18,  8.60it/s]

Epoch: 0 Iteration: 810 Loss: 1.300 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 809/37094 [01:23<1:10:18,  8.60it/s]

Epoch: 0 Iteration: 810 Loss: 1.300 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 811/37094 [01:23<1:06:48,  9.05it/s]

Epoch: 0 Iteration: 811 Loss: 1.292 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 811/37094 [01:23<1:06:48,  9.05it/s]

Epoch: 0 Iteration: 812 Loss: 1.333 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 811/37094 [01:23<1:06:48,  9.05it/s]

Epoch: 0 Iteration: 812 Loss: 1.333 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 813/37094 [01:23<1:04:21,  9.39it/s]

Epoch: 0 Iteration: 813 Loss: 1.301 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 813/37094 [01:23<1:04:21,  9.39it/s]

Epoch: 0 Iteration: 814 Loss: 1.318 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 813/37094 [01:23<1:04:21,  9.39it/s]

Epoch: 0 Iteration: 814 Loss: 1.318 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 815/37094 [01:23<1:02:39,  9.65it/s]

Epoch: 0 Iteration: 815 Loss: 1.377 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 815/37094 [01:23<1:02:39,  9.65it/s]

Epoch: 0 Iteration: 816 Loss: 1.363 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 815/37094 [01:24<1:02:39,  9.65it/s]

Epoch: 0 Iteration: 816 Loss: 1.363 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 817/37094 [01:24<1:01:28,  9.84it/s]

Epoch: 0 Iteration: 817 Loss: 1.383 Validation Loss: 1.221 Accuracy: 0.175 Validation Accuracy: 0.099:   2%|▏         | 817/37094 [01:24<1:01:28,  9.84it/s]

Epoch: 0 Iteration: 818 Loss: 1.329 Validation Loss: 1.221 Accuracy: 0.166 Validation Accuracy: 0.099:   2%|▏         | 817/37094 [01:24<1:01:28,  9.84it/s]

Epoch: 0 Iteration: 818 Loss: 1.329 Validation Loss: 1.221 Accuracy: 0.166 Validation Accuracy: 0.099:   2%|▏         | 819/37094 [01:24<1:00:37,  9.97it/s]

Epoch: 0 Iteration: 819 Loss: 1.311 Validation Loss: 1.221 Accuracy: 0.166 Validation Accuracy: 0.099:   2%|▏         | 819/37094 [01:24<1:00:37,  9.97it/s]

Epoch: 0 Iteration: 820 Loss: 1.313 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 819/37094 [01:24<1:00:37,  9.97it/s]

Epoch: 0 Iteration: 820 Loss: 1.313 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 821/37094 [01:24<1:00:02, 10.07it/s]

Epoch: 0 Iteration: 821 Loss: 1.292 Validation Loss: 1.221 Accuracy: 0.153 Validation Accuracy: 0.099:   2%|▏         | 821/37094 [01:24<1:00:02, 10.07it/s]

Epoch: 0 Iteration: 822 Loss: 1.310 Validation Loss: 1.221 Accuracy: 0.150 Validation Accuracy: 0.099:   2%|▏         | 821/37094 [01:24<1:00:02, 10.07it/s]

Epoch: 0 Iteration: 822 Loss: 1.310 Validation Loss: 1.221 Accuracy: 0.150 Validation Accuracy: 0.099:   2%|▏         | 823/37094 [01:24<59:37, 10.14it/s]  

Epoch: 0 Iteration: 823 Loss: 1.300 Validation Loss: 1.221 Accuracy: 0.144 Validation Accuracy: 0.099:   2%|▏         | 823/37094 [01:24<59:37, 10.14it/s]

Epoch: 0 Iteration: 824 Loss: 1.332 Validation Loss: 1.221 Accuracy: 0.156 Validation Accuracy: 0.099:   2%|▏         | 823/37094 [01:24<59:37, 10.14it/s]

Epoch: 0 Iteration: 824 Loss: 1.332 Validation Loss: 1.221 Accuracy: 0.156 Validation Accuracy: 0.099:   2%|▏         | 825/37094 [01:24<59:20, 10.19it/s]

Epoch: 0 Iteration: 825 Loss: 1.331 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 825/37094 [01:24<59:20, 10.19it/s]

Epoch: 0 Iteration: 826 Loss: 1.345 Validation Loss: 1.221 Accuracy: 0.156 Validation Accuracy: 0.099:   2%|▏         | 825/37094 [01:25<59:20, 10.19it/s]

Epoch: 0 Iteration: 826 Loss: 1.345 Validation Loss: 1.221 Accuracy: 0.156 Validation Accuracy: 0.099:   2%|▏         | 827/37094 [01:25<59:10, 10.22it/s]

Epoch: 0 Iteration: 827 Loss: 1.314 Validation Loss: 1.221 Accuracy: 0.153 Validation Accuracy: 0.099:   2%|▏         | 827/37094 [01:25<59:10, 10.22it/s]

Epoch: 0 Iteration: 828 Loss: 1.306 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 827/37094 [01:25<59:10, 10.22it/s]

Epoch: 0 Iteration: 828 Loss: 1.306 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 829/37094 [01:25<59:03, 10.23it/s]

Epoch: 0 Iteration: 829 Loss: 1.269 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 829/37094 [01:25<59:03, 10.23it/s]

Epoch: 0 Iteration: 830 Loss: 1.301 Validation Loss: 1.221 Accuracy: 0.150 Validation Accuracy: 0.099:   2%|▏         | 829/37094 [01:25<59:03, 10.23it/s]

Epoch: 0 Iteration: 830 Loss: 1.301 Validation Loss: 1.221 Accuracy: 0.150 Validation Accuracy: 0.099:   2%|▏         | 831/37094 [01:25<58:58, 10.25it/s]

Epoch: 0 Iteration: 831 Loss: 1.338 Validation Loss: 1.221 Accuracy: 0.156 Validation Accuracy: 0.099:   2%|▏         | 831/37094 [01:25<58:58, 10.25it/s]

Epoch: 0 Iteration: 832 Loss: 1.289 Validation Loss: 1.221 Accuracy: 0.150 Validation Accuracy: 0.099:   2%|▏         | 831/37094 [01:25<58:58, 10.25it/s]

Epoch: 0 Iteration: 832 Loss: 1.289 Validation Loss: 1.221 Accuracy: 0.150 Validation Accuracy: 0.099:   2%|▏         | 833/37094 [01:25<58:52, 10.27it/s]

Epoch: 0 Iteration: 833 Loss: 1.291 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 833/37094 [01:25<58:52, 10.27it/s]

Epoch: 0 Iteration: 834 Loss: 1.305 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 833/37094 [01:25<58:52, 10.27it/s]

Epoch: 0 Iteration: 834 Loss: 1.305 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 835/37094 [01:25<58:48, 10.28it/s]

Epoch: 0 Iteration: 835 Loss: 1.269 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 835/37094 [01:25<58:48, 10.28it/s]

Epoch: 0 Iteration: 836 Loss: 1.266 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 835/37094 [01:26<58:48, 10.28it/s]

Epoch: 0 Iteration: 836 Loss: 1.266 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 837/37094 [01:26<58:44, 10.29it/s]

Epoch: 0 Iteration: 837 Loss: 1.289 Validation Loss: 1.221 Accuracy: 0.166 Validation Accuracy: 0.099:   2%|▏         | 837/37094 [01:26<58:44, 10.29it/s]

Epoch: 0 Iteration: 838 Loss: 1.317 Validation Loss: 1.221 Accuracy: 0.166 Validation Accuracy: 0.099:   2%|▏         | 837/37094 [01:26<58:44, 10.29it/s]

Epoch: 0 Iteration: 838 Loss: 1.317 Validation Loss: 1.221 Accuracy: 0.166 Validation Accuracy: 0.099:   2%|▏         | 839/37094 [01:26<58:43, 10.29it/s]

Epoch: 0 Iteration: 839 Loss: 1.300 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 839/37094 [01:26<58:43, 10.29it/s]

Epoch: 0 Iteration: 840 Loss: 1.334 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 839/37094 [01:26<58:43, 10.29it/s]

Epoch: 0 Iteration: 840 Loss: 1.334 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 841/37094 [01:26<58:43, 10.29it/s]

Epoch: 0 Iteration: 841 Loss: 1.367 Validation Loss: 1.221 Accuracy: 0.166 Validation Accuracy: 0.099:   2%|▏         | 841/37094 [01:26<58:43, 10.29it/s]

Epoch: 0 Iteration: 842 Loss: 1.384 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 841/37094 [01:26<58:43, 10.29it/s]

Epoch: 0 Iteration: 842 Loss: 1.384 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 843/37094 [01:26<58:40, 10.30it/s]

Epoch: 0 Iteration: 843 Loss: 1.391 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 843/37094 [01:26<58:40, 10.30it/s]

Epoch: 0 Iteration: 844 Loss: 1.391 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 843/37094 [01:26<58:40, 10.30it/s]

Epoch: 0 Iteration: 844 Loss: 1.391 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 845/37094 [01:26<58:41, 10.29it/s]

Epoch: 0 Iteration: 845 Loss: 1.409 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 845/37094 [01:26<58:41, 10.29it/s]

Epoch: 0 Iteration: 846 Loss: 1.405 Validation Loss: 1.221 Accuracy: 0.150 Validation Accuracy: 0.099:   2%|▏         | 845/37094 [01:27<58:41, 10.29it/s]

Epoch: 0 Iteration: 846 Loss: 1.405 Validation Loss: 1.221 Accuracy: 0.150 Validation Accuracy: 0.099:   2%|▏         | 847/37094 [01:27<58:43, 10.29it/s]

Epoch: 0 Iteration: 847 Loss: 1.447 Validation Loss: 1.221 Accuracy: 0.153 Validation Accuracy: 0.099:   2%|▏         | 847/37094 [01:27<58:43, 10.29it/s]

Epoch: 0 Iteration: 848 Loss: 1.456 Validation Loss: 1.221 Accuracy: 0.147 Validation Accuracy: 0.099:   2%|▏         | 847/37094 [01:27<58:43, 10.29it/s]

Epoch: 0 Iteration: 848 Loss: 1.456 Validation Loss: 1.221 Accuracy: 0.147 Validation Accuracy: 0.099:   2%|▏         | 849/37094 [01:27<58:44, 10.28it/s]

Epoch: 0 Iteration: 849 Loss: 1.498 Validation Loss: 1.221 Accuracy: 0.163 Validation Accuracy: 0.099:   2%|▏         | 849/37094 [01:27<58:44, 10.28it/s]

Epoch: 0 Iteration: 850 Loss: 1.518 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 849/37094 [01:27<58:44, 10.28it/s]

Epoch: 0 Iteration: 850 Loss: 1.518 Validation Loss: 1.221 Accuracy: 0.169 Validation Accuracy: 0.099:   2%|▏         | 851/37094 [01:27<58:42, 10.29it/s]

Epoch: 0 Iteration: 851 Loss: 1.516 Validation Loss: 1.221 Accuracy: 0.178 Validation Accuracy: 0.099:   2%|▏         | 851/37094 [01:27<58:42, 10.29it/s]

Epoch: 0 Iteration: 852 Loss: 1.523 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 851/37094 [01:27<58:42, 10.29it/s]

Epoch: 0 Iteration: 852 Loss: 1.523 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 853/37094 [01:27<58:41, 10.29it/s]

Epoch: 0 Iteration: 853 Loss: 1.547 Validation Loss: 1.221 Accuracy: 0.172 Validation Accuracy: 0.099:   2%|▏         | 853/37094 [01:27<58:41, 10.29it/s]

Epoch: 0 Iteration: 854 Loss: 1.518 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 853/37094 [01:27<58:41, 10.29it/s]

Epoch: 0 Iteration: 854 Loss: 1.518 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 855/37094 [01:27<58:39, 10.30it/s]

Epoch: 0 Iteration: 855 Loss: 1.561 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 855/37094 [01:27<58:39, 10.30it/s]

Epoch: 0 Iteration: 856 Loss: 1.594 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 855/37094 [01:27<58:39, 10.30it/s]

Epoch: 0 Iteration: 856 Loss: 1.594 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 857/37094 [01:27<58:37, 10.30it/s]

Epoch: 0 Iteration: 857 Loss: 1.579 Validation Loss: 1.221 Accuracy: 0.188 Validation Accuracy: 0.099:   2%|▏         | 857/37094 [01:28<58:37, 10.30it/s]

Epoch: 0 Iteration: 858 Loss: 1.582 Validation Loss: 1.221 Accuracy: 0.203 Validation Accuracy: 0.099:   2%|▏         | 857/37094 [01:28<58:37, 10.30it/s]

Epoch: 0 Iteration: 858 Loss: 1.582 Validation Loss: 1.221 Accuracy: 0.203 Validation Accuracy: 0.099:   2%|▏         | 859/37094 [01:28<58:36, 10.30it/s]

Epoch: 0 Iteration: 859 Loss: 1.589 Validation Loss: 1.221 Accuracy: 0.194 Validation Accuracy: 0.099:   2%|▏         | 859/37094 [01:28<58:36, 10.30it/s]

Epoch: 0 Iteration: 860 Loss: 1.558 Validation Loss: 1.221 Accuracy: 0.184 Validation Accuracy: 0.099:   2%|▏         | 859/37094 [01:28<58:36, 10.30it/s]

Epoch: 0 Iteration: 860 Loss: 1.558 Validation Loss: 1.221 Accuracy: 0.184 Validation Accuracy: 0.099:   2%|▏         | 861/37094 [01:28<58:37, 10.30it/s]

Epoch: 0 Iteration: 861 Loss: 1.625 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 861/37094 [01:28<58:37, 10.30it/s]

Epoch: 0 Iteration: 862 Loss: 1.597 Validation Loss: 1.221 Accuracy: 0.166 Validation Accuracy: 0.099:   2%|▏         | 861/37094 [01:28<58:37, 10.30it/s]

Epoch: 0 Iteration: 862 Loss: 1.597 Validation Loss: 1.221 Accuracy: 0.166 Validation Accuracy: 0.099:   2%|▏         | 863/37094 [01:28<58:40, 10.29it/s]

Epoch: 0 Iteration: 863 Loss: 1.605 Validation Loss: 1.221 Accuracy: 0.166 Validation Accuracy: 0.099:   2%|▏         | 863/37094 [01:28<58:40, 10.29it/s]

Epoch: 0 Iteration: 864 Loss: 1.571 Validation Loss: 1.221 Accuracy: 0.153 Validation Accuracy: 0.099:   2%|▏         | 863/37094 [01:28<58:40, 10.29it/s]

Epoch: 0 Iteration: 864 Loss: 1.571 Validation Loss: 1.221 Accuracy: 0.153 Validation Accuracy: 0.099:   2%|▏         | 865/37094 [01:28<58:39, 10.29it/s]

Epoch: 0 Iteration: 865 Loss: 1.557 Validation Loss: 1.221 Accuracy: 0.150 Validation Accuracy: 0.099:   2%|▏         | 865/37094 [01:28<58:39, 10.29it/s]

Epoch: 0 Iteration: 866 Loss: 1.563 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 865/37094 [01:28<58:39, 10.29it/s]

Epoch: 0 Iteration: 866 Loss: 1.563 Validation Loss: 1.221 Accuracy: 0.159 Validation Accuracy: 0.099:   2%|▏         | 867/37094 [01:28<58:40, 10.29it/s]

Epoch: 0 Iteration: 867 Loss: 1.515 Validation Loss: 1.221 Accuracy: 0.147 Validation Accuracy: 0.099:   2%|▏         | 867/37094 [01:29<58:40, 10.29it/s]

Epoch: 0 Iteration: 868 Loss: 1.495 Validation Loss: 1.221 Accuracy: 0.131 Validation Accuracy: 0.099:   2%|▏         | 867/37094 [01:29<58:40, 10.29it/s]

Epoch: 0 Iteration: 868 Loss: 1.495 Validation Loss: 1.221 Accuracy: 0.131 Validation Accuracy: 0.099:   2%|▏         | 869/37094 [01:29<58:40, 10.29it/s]

Epoch: 0 Iteration: 869 Loss: 1.467 Validation Loss: 1.221 Accuracy: 0.125 Validation Accuracy: 0.099:   2%|▏         | 869/37094 [01:29<58:40, 10.29it/s]

Epoch: 0 Iteration: 870 Loss: 1.438 Validation Loss: 1.221 Accuracy: 0.134 Validation Accuracy: 0.099:   2%|▏         | 869/37094 [01:29<58:40, 10.29it/s]

Epoch: 0 Iteration: 870 Loss: 1.438 Validation Loss: 1.221 Accuracy: 0.134 Validation Accuracy: 0.099:   2%|▏         | 871/37094 [01:29<58:37, 10.30it/s]

Epoch: 0 Iteration: 871 Loss: 1.449 Validation Loss: 1.221 Accuracy: 0.141 Validation Accuracy: 0.099:   2%|▏         | 871/37094 [01:29<58:37, 10.30it/s]

Epoch: 0 Iteration: 872 Loss: 1.427 Validation Loss: 1.221 Accuracy: 0.147 Validation Accuracy: 0.099:   2%|▏         | 871/37094 [01:29<58:37, 10.30it/s]

Epoch: 0 Iteration: 872 Loss: 1.427 Validation Loss: 1.221 Accuracy: 0.147 Validation Accuracy: 0.099:   2%|▏         | 873/37094 [01:29<58:37, 10.30it/s]

Epoch: 0 Iteration: 873 Loss: 1.435 Validation Loss: 1.221 Accuracy: 0.141 Validation Accuracy: 0.099:   2%|▏         | 873/37094 [01:29<58:37, 10.30it/s]

Epoch: 0 Iteration: 874 Loss: 1.431 Validation Loss: 1.221 Accuracy: 0.138 Validation Accuracy: 0.099:   2%|▏         | 873/37094 [01:29<58:37, 10.30it/s]

Epoch: 0 Iteration: 874 Loss: 1.431 Validation Loss: 1.221 Accuracy: 0.138 Validation Accuracy: 0.099:   2%|▏         | 875/37094 [01:29<58:36, 10.30it/s]

Epoch: 0 Iteration: 875 Loss: 1.401 Validation Loss: 1.221 Accuracy: 0.147 Validation Accuracy: 0.099:   2%|▏         | 875/37094 [01:29<58:36, 10.30it/s]

Epoch: 0 Iteration: 876 Loss: 1.377 Validation Loss: 1.221 Accuracy: 0.125 Validation Accuracy: 0.099:   2%|▏         | 875/37094 [01:29<58:36, 10.30it/s]

Epoch: 0 Iteration: 876 Loss: 1.377 Validation Loss: 1.221 Accuracy: 0.125 Validation Accuracy: 0.099:   2%|▏         | 877/37094 [01:29<58:36, 10.30it/s]

Epoch: 0 Iteration: 877 Loss: 1.374 Validation Loss: 1.221 Accuracy: 0.131 Validation Accuracy: 0.099:   2%|▏         | 877/37094 [01:30<58:36, 10.30it/s]

Epoch: 0 Iteration: 878 Loss: 1.403 Validation Loss: 1.221 Accuracy: 0.138 Validation Accuracy: 0.099:   2%|▏         | 877/37094 [01:30<58:36, 10.30it/s]

Epoch: 0 Iteration: 878 Loss: 1.403 Validation Loss: 1.221 Accuracy: 0.138 Validation Accuracy: 0.099:   2%|▏         | 879/37094 [01:30<58:37, 10.30it/s]

Epoch: 0 Iteration: 879 Loss: 1.425 Validation Loss: 1.221 Accuracy: 0.147 Validation Accuracy: 0.099:   2%|▏         | 879/37094 [01:30<58:37, 10.30it/s]

Epoch: 0 Iteration: 880 Loss: 1.456 Validation Loss: 1.221 Accuracy: 0.141 Validation Accuracy: 0.099:   2%|▏         | 879/37094 [01:30<58:37, 10.30it/s]

Epoch: 0 Iteration: 880 Loss: 1.456 Validation Loss: 1.221 Accuracy: 0.141 Validation Accuracy: 0.099:   2%|▏         | 881/37094 [01:30<58:37, 10.30it/s]

Epoch: 0 Iteration: 881 Loss: 1.347 Validation Loss: 1.221 Accuracy: 0.153 Validation Accuracy: 0.099:   2%|▏         | 881/37094 [01:30<58:37, 10.30it/s]

Epoch: 0 Iteration: 882 Loss: 1.353 Validation Loss: 1.221 Accuracy: 0.144 Validation Accuracy: 0.099:   2%|▏         | 881/37094 [01:30<58:37, 10.30it/s]

Epoch: 0 Iteration: 882 Loss: 1.353 Validation Loss: 1.221 Accuracy: 0.144 Validation Accuracy: 0.099:   2%|▏         | 883/37094 [01:30<58:36, 10.30it/s]

Epoch: 0 Iteration: 883 Loss: 1.395 Validation Loss: 1.221 Accuracy: 0.150 Validation Accuracy: 0.099:   2%|▏         | 883/37094 [01:30<58:36, 10.30it/s]

Epoch: 0 Iteration: 884 Loss: 1.439 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 883/37094 [01:30<58:36, 10.30it/s]

Epoch: 0 Iteration: 884 Loss: 1.439 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 885/37094 [01:30<58:36, 10.30it/s]

Epoch: 0 Iteration: 885 Loss: 1.427 Validation Loss: 1.221 Accuracy: 0.172 Validation Accuracy: 0.099:   2%|▏         | 885/37094 [01:30<58:36, 10.30it/s]

Epoch: 0 Iteration: 886 Loss: 1.416 Validation Loss: 1.221 Accuracy: 0.184 Validation Accuracy: 0.099:   2%|▏         | 885/37094 [01:30<58:36, 10.30it/s]

Epoch: 0 Iteration: 886 Loss: 1.416 Validation Loss: 1.221 Accuracy: 0.184 Validation Accuracy: 0.099:   2%|▏         | 887/37094 [01:30<58:36, 10.30it/s]

Epoch: 0 Iteration: 887 Loss: 1.428 Validation Loss: 1.221 Accuracy: 0.178 Validation Accuracy: 0.099:   2%|▏         | 887/37094 [01:30<58:36, 10.30it/s]

Epoch: 0 Iteration: 888 Loss: 1.437 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 887/37094 [01:31<58:36, 10.30it/s]

Epoch: 0 Iteration: 888 Loss: 1.437 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 889/37094 [01:31<58:39, 10.29it/s]

Epoch: 0 Iteration: 889 Loss: 1.428 Validation Loss: 1.221 Accuracy: 0.172 Validation Accuracy: 0.099:   2%|▏         | 889/37094 [01:31<58:39, 10.29it/s]

Epoch: 0 Iteration: 890 Loss: 1.458 Validation Loss: 1.221 Accuracy: 0.175 Validation Accuracy: 0.099:   2%|▏         | 889/37094 [01:31<58:39, 10.29it/s]

Epoch: 0 Iteration: 890 Loss: 1.458 Validation Loss: 1.221 Accuracy: 0.175 Validation Accuracy: 0.099:   2%|▏         | 891/37094 [01:31<58:38, 10.29it/s]

Epoch: 0 Iteration: 891 Loss: 1.420 Validation Loss: 1.221 Accuracy: 0.172 Validation Accuracy: 0.099:   2%|▏         | 891/37094 [01:31<58:38, 10.29it/s]

Epoch: 0 Iteration: 892 Loss: 1.478 Validation Loss: 1.221 Accuracy: 0.184 Validation Accuracy: 0.099:   2%|▏         | 891/37094 [01:31<58:38, 10.29it/s]

Epoch: 0 Iteration: 892 Loss: 1.478 Validation Loss: 1.221 Accuracy: 0.184 Validation Accuracy: 0.099:   2%|▏         | 893/37094 [01:31<58:36, 10.29it/s]

Epoch: 0 Iteration: 893 Loss: 1.487 Validation Loss: 1.221 Accuracy: 0.197 Validation Accuracy: 0.099:   2%|▏         | 893/37094 [01:31<58:36, 10.29it/s]

Epoch: 0 Iteration: 894 Loss: 1.531 Validation Loss: 1.221 Accuracy: 0.172 Validation Accuracy: 0.099:   2%|▏         | 893/37094 [01:31<58:36, 10.29it/s]

Epoch: 0 Iteration: 894 Loss: 1.531 Validation Loss: 1.221 Accuracy: 0.172 Validation Accuracy: 0.099:   2%|▏         | 895/37094 [01:31<58:36, 10.29it/s]

Epoch: 0 Iteration: 895 Loss: 1.511 Validation Loss: 1.221 Accuracy: 0.178 Validation Accuracy: 0.099:   2%|▏         | 895/37094 [01:31<58:36, 10.29it/s]

Epoch: 0 Iteration: 896 Loss: 1.545 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 895/37094 [01:31<58:36, 10.29it/s]

Epoch: 0 Iteration: 896 Loss: 1.545 Validation Loss: 1.221 Accuracy: 0.181 Validation Accuracy: 0.099:   2%|▏         | 897/37094 [01:31<58:37, 10.29it/s]

Epoch: 0 Iteration: 897 Loss: 1.520 Validation Loss: 1.221 Accuracy: 0.188 Validation Accuracy: 0.099:   2%|▏         | 897/37094 [01:31<58:37, 10.29it/s]

Epoch: 0 Iteration: 898 Loss: 1.515 Validation Loss: 1.221 Accuracy: 0.178 Validation Accuracy: 0.099:   2%|▏         | 897/37094 [01:32<58:37, 10.29it/s]

Epoch: 0 Iteration: 898 Loss: 1.515 Validation Loss: 1.221 Accuracy: 0.178 Validation Accuracy: 0.099:   2%|▏         | 899/37094 [01:32<58:35, 10.29it/s]

Epoch: 0 Iteration: 899 Loss: 1.509 Validation Loss: 1.221 Accuracy: 0.197 Validation Accuracy: 0.099:   2%|▏         | 899/37094 [01:32<58:35, 10.29it/s]

Epoch: 0 Iteration: 900 Loss: 1.538 Validation Loss: 1.375 Accuracy: 0.194 Validation Accuracy: 0.104:   2%|▏         | 899/37094 [01:32<58:35, 10.29it/s]

Epoch: 0 Iteration: 900 Loss: 1.538 Validation Loss: 1.375 Accuracy: 0.194 Validation Accuracy: 0.104:   2%|▏         | 901/37094 [01:32<1:48:03,  5.58it/s]

Epoch: 0 Iteration: 901 Loss: 1.590 Validation Loss: 1.375 Accuracy: 0.188 Validation Accuracy: 0.104:   2%|▏         | 901/37094 [01:32<1:48:03,  5.58it/s]

Epoch: 0 Iteration: 902 Loss: 1.614 Validation Loss: 1.375 Accuracy: 0.172 Validation Accuracy: 0.104:   2%|▏         | 901/37094 [01:32<1:48:03,  5.58it/s]

Epoch: 0 Iteration: 902 Loss: 1.614 Validation Loss: 1.375 Accuracy: 0.172 Validation Accuracy: 0.104:   2%|▏         | 903/37094 [01:32<1:32:26,  6.52it/s]

Epoch: 0 Iteration: 903 Loss: 1.555 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   2%|▏         | 903/37094 [01:33<1:32:26,  6.52it/s]

Epoch: 0 Iteration: 904 Loss: 1.561 Validation Loss: 1.375 Accuracy: 0.156 Validation Accuracy: 0.104:   2%|▏         | 903/37094 [01:33<1:32:26,  6.52it/s]

Epoch: 0 Iteration: 904 Loss: 1.561 Validation Loss: 1.375 Accuracy: 0.156 Validation Accuracy: 0.104:   2%|▏         | 905/37094 [01:33<1:22:16,  7.33it/s]

Epoch: 0 Iteration: 905 Loss: 1.574 Validation Loss: 1.375 Accuracy: 0.153 Validation Accuracy: 0.104:   2%|▏         | 905/37094 [01:33<1:22:16,  7.33it/s]

Epoch: 0 Iteration: 906 Loss: 1.560 Validation Loss: 1.375 Accuracy: 0.156 Validation Accuracy: 0.104:   2%|▏         | 905/37094 [01:33<1:22:16,  7.33it/s]

Epoch: 0 Iteration: 906 Loss: 1.560 Validation Loss: 1.375 Accuracy: 0.156 Validation Accuracy: 0.104:   2%|▏         | 907/37094 [01:33<1:15:08,  8.03it/s]

Epoch: 0 Iteration: 907 Loss: 1.541 Validation Loss: 1.375 Accuracy: 0.150 Validation Accuracy: 0.104:   2%|▏         | 907/37094 [01:33<1:15:08,  8.03it/s]

Epoch: 0 Iteration: 908 Loss: 1.501 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   2%|▏         | 907/37094 [01:33<1:15:08,  8.03it/s]

Epoch: 0 Iteration: 908 Loss: 1.501 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   2%|▏         | 909/37094 [01:33<1:10:09,  8.60it/s]

Epoch: 0 Iteration: 909 Loss: 1.509 Validation Loss: 1.375 Accuracy: 0.153 Validation Accuracy: 0.104:   2%|▏         | 909/37094 [01:33<1:10:09,  8.60it/s]

Epoch: 0 Iteration: 910 Loss: 1.493 Validation Loss: 1.375 Accuracy: 0.150 Validation Accuracy: 0.104:   2%|▏         | 909/37094 [01:33<1:10:09,  8.60it/s]

Epoch: 0 Iteration: 910 Loss: 1.493 Validation Loss: 1.375 Accuracy: 0.150 Validation Accuracy: 0.104:   2%|▏         | 911/37094 [01:33<1:06:39,  9.05it/s]

Epoch: 0 Iteration: 911 Loss: 1.522 Validation Loss: 1.375 Accuracy: 0.156 Validation Accuracy: 0.104:   2%|▏         | 911/37094 [01:33<1:06:39,  9.05it/s]

Epoch: 0 Iteration: 912 Loss: 1.474 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   2%|▏         | 911/37094 [01:33<1:06:39,  9.05it/s]

Epoch: 0 Iteration: 912 Loss: 1.474 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   2%|▏         | 913/37094 [01:33<1:04:14,  9.39it/s]

Epoch: 0 Iteration: 913 Loss: 1.448 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   2%|▏         | 913/37094 [01:34<1:04:14,  9.39it/s]

Epoch: 0 Iteration: 914 Loss: 1.402 Validation Loss: 1.375 Accuracy: 0.200 Validation Accuracy: 0.104:   2%|▏         | 913/37094 [01:34<1:04:14,  9.39it/s]

Epoch: 0 Iteration: 914 Loss: 1.402 Validation Loss: 1.375 Accuracy: 0.200 Validation Accuracy: 0.104:   2%|▏         | 915/37094 [01:34<1:02:33,  9.64it/s]

Epoch: 0 Iteration: 915 Loss: 1.432 Validation Loss: 1.375 Accuracy: 0.197 Validation Accuracy: 0.104:   2%|▏         | 915/37094 [01:34<1:02:33,  9.64it/s]

Epoch: 0 Iteration: 916 Loss: 1.428 Validation Loss: 1.375 Accuracy: 0.200 Validation Accuracy: 0.104:   2%|▏         | 915/37094 [01:34<1:02:33,  9.64it/s]

Epoch: 0 Iteration: 916 Loss: 1.428 Validation Loss: 1.375 Accuracy: 0.200 Validation Accuracy: 0.104:   2%|▏         | 917/37094 [01:34<1:01:20,  9.83it/s]

Epoch: 0 Iteration: 917 Loss: 1.426 Validation Loss: 1.375 Accuracy: 0.203 Validation Accuracy: 0.104:   2%|▏         | 917/37094 [01:34<1:01:20,  9.83it/s]

Epoch: 0 Iteration: 918 Loss: 1.398 Validation Loss: 1.375 Accuracy: 0.181 Validation Accuracy: 0.104:   2%|▏         | 917/37094 [01:34<1:01:20,  9.83it/s]

Epoch: 0 Iteration: 918 Loss: 1.398 Validation Loss: 1.375 Accuracy: 0.181 Validation Accuracy: 0.104:   2%|▏         | 919/37094 [01:34<1:00:29,  9.97it/s]

Epoch: 0 Iteration: 919 Loss: 1.389 Validation Loss: 1.375 Accuracy: 0.184 Validation Accuracy: 0.104:   2%|▏         | 919/37094 [01:34<1:00:29,  9.97it/s]

Epoch: 0 Iteration: 920 Loss: 1.332 Validation Loss: 1.375 Accuracy: 0.188 Validation Accuracy: 0.104:   2%|▏         | 919/37094 [01:34<1:00:29,  9.97it/s]

Epoch: 0 Iteration: 920 Loss: 1.332 Validation Loss: 1.375 Accuracy: 0.188 Validation Accuracy: 0.104:   2%|▏         | 921/37094 [01:34<59:53, 10.07it/s]  

Epoch: 0 Iteration: 921 Loss: 1.299 Validation Loss: 1.375 Accuracy: 0.184 Validation Accuracy: 0.104:   2%|▏         | 921/37094 [01:34<59:53, 10.07it/s]

Epoch: 0 Iteration: 922 Loss: 1.287 Validation Loss: 1.375 Accuracy: 0.188 Validation Accuracy: 0.104:   2%|▏         | 921/37094 [01:34<59:53, 10.07it/s]

Epoch: 0 Iteration: 922 Loss: 1.287 Validation Loss: 1.375 Accuracy: 0.188 Validation Accuracy: 0.104:   2%|▏         | 923/37094 [01:34<59:28, 10.14it/s]

Epoch: 0 Iteration: 923 Loss: 1.325 Validation Loss: 1.375 Accuracy: 0.169 Validation Accuracy: 0.104:   2%|▏         | 923/37094 [01:35<59:28, 10.14it/s]

Epoch: 0 Iteration: 924 Loss: 1.303 Validation Loss: 1.375 Accuracy: 0.153 Validation Accuracy: 0.104:   2%|▏         | 923/37094 [01:35<59:28, 10.14it/s]

Epoch: 0 Iteration: 924 Loss: 1.303 Validation Loss: 1.375 Accuracy: 0.153 Validation Accuracy: 0.104:   2%|▏         | 925/37094 [01:35<59:11, 10.19it/s]

Epoch: 0 Iteration: 925 Loss: 1.332 Validation Loss: 1.375 Accuracy: 0.169 Validation Accuracy: 0.104:   2%|▏         | 925/37094 [01:35<59:11, 10.19it/s]

Epoch: 0 Iteration: 926 Loss: 1.351 Validation Loss: 1.375 Accuracy: 0.172 Validation Accuracy: 0.104:   2%|▏         | 925/37094 [01:35<59:11, 10.19it/s]

Epoch: 0 Iteration: 926 Loss: 1.351 Validation Loss: 1.375 Accuracy: 0.172 Validation Accuracy: 0.104:   2%|▏         | 927/37094 [01:35<58:59, 10.22it/s]

Epoch: 0 Iteration: 927 Loss: 1.376 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   2%|▏         | 927/37094 [01:35<58:59, 10.22it/s]

Epoch: 0 Iteration: 928 Loss: 1.415 Validation Loss: 1.375 Accuracy: 0.191 Validation Accuracy: 0.104:   2%|▏         | 927/37094 [01:35<58:59, 10.22it/s]

Epoch: 0 Iteration: 928 Loss: 1.415 Validation Loss: 1.375 Accuracy: 0.191 Validation Accuracy: 0.104:   3%|▎         | 929/37094 [01:35<58:49, 10.25it/s]

Epoch: 0 Iteration: 929 Loss: 1.402 Validation Loss: 1.375 Accuracy: 0.178 Validation Accuracy: 0.104:   3%|▎         | 929/37094 [01:35<58:49, 10.25it/s]

Epoch: 0 Iteration: 930 Loss: 1.433 Validation Loss: 1.375 Accuracy: 0.172 Validation Accuracy: 0.104:   3%|▎         | 929/37094 [01:35<58:49, 10.25it/s]

Epoch: 0 Iteration: 930 Loss: 1.433 Validation Loss: 1.375 Accuracy: 0.172 Validation Accuracy: 0.104:   3%|▎         | 931/37094 [01:35<58:43, 10.26it/s]

Epoch: 0 Iteration: 931 Loss: 1.389 Validation Loss: 1.375 Accuracy: 0.172 Validation Accuracy: 0.104:   3%|▎         | 931/37094 [01:35<58:43, 10.26it/s]

Epoch: 0 Iteration: 932 Loss: 1.375 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   3%|▎         | 931/37094 [01:35<58:43, 10.26it/s]

Epoch: 0 Iteration: 932 Loss: 1.375 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   3%|▎         | 933/37094 [01:35<58:39, 10.28it/s]

Epoch: 0 Iteration: 933 Loss: 1.375 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 933/37094 [01:35<58:39, 10.28it/s]

Epoch: 0 Iteration: 934 Loss: 1.387 Validation Loss: 1.375 Accuracy: 0.181 Validation Accuracy: 0.104:   3%|▎         | 933/37094 [01:36<58:39, 10.28it/s]

Epoch: 0 Iteration: 934 Loss: 1.387 Validation Loss: 1.375 Accuracy: 0.181 Validation Accuracy: 0.104:   3%|▎         | 935/37094 [01:36<58:36, 10.28it/s]

Epoch: 0 Iteration: 935 Loss: 1.392 Validation Loss: 1.375 Accuracy: 0.150 Validation Accuracy: 0.104:   3%|▎         | 935/37094 [01:36<58:36, 10.28it/s]

Epoch: 0 Iteration: 936 Loss: 1.372 Validation Loss: 1.375 Accuracy: 0.144 Validation Accuracy: 0.104:   3%|▎         | 935/37094 [01:36<58:36, 10.28it/s]

Epoch: 0 Iteration: 936 Loss: 1.372 Validation Loss: 1.375 Accuracy: 0.144 Validation Accuracy: 0.104:   3%|▎         | 937/37094 [01:36<58:33, 10.29it/s]

Epoch: 0 Iteration: 937 Loss: 1.388 Validation Loss: 1.375 Accuracy: 0.153 Validation Accuracy: 0.104:   3%|▎         | 937/37094 [01:36<58:33, 10.29it/s]

Epoch: 0 Iteration: 938 Loss: 1.352 Validation Loss: 1.375 Accuracy: 0.144 Validation Accuracy: 0.104:   3%|▎         | 937/37094 [01:36<58:33, 10.29it/s]

Epoch: 0 Iteration: 938 Loss: 1.352 Validation Loss: 1.375 Accuracy: 0.144 Validation Accuracy: 0.104:   3%|▎         | 939/37094 [01:36<58:31, 10.30it/s]

Epoch: 0 Iteration: 939 Loss: 1.380 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   3%|▎         | 939/37094 [01:36<58:31, 10.30it/s]

Epoch: 0 Iteration: 940 Loss: 1.385 Validation Loss: 1.375 Accuracy: 0.172 Validation Accuracy: 0.104:   3%|▎         | 939/37094 [01:36<58:31, 10.30it/s]

Epoch: 0 Iteration: 940 Loss: 1.385 Validation Loss: 1.375 Accuracy: 0.172 Validation Accuracy: 0.104:   3%|▎         | 941/37094 [01:36<58:30, 10.30it/s]

Epoch: 0 Iteration: 941 Loss: 1.419 Validation Loss: 1.375 Accuracy: 0.169 Validation Accuracy: 0.104:   3%|▎         | 941/37094 [01:36<58:30, 10.30it/s]

Epoch: 0 Iteration: 942 Loss: 1.410 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 941/37094 [01:36<58:30, 10.30it/s]

Epoch: 0 Iteration: 942 Loss: 1.410 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 943/37094 [01:36<58:29, 10.30it/s]

Epoch: 0 Iteration: 943 Loss: 1.407 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 943/37094 [01:36<58:29, 10.30it/s]

Epoch: 0 Iteration: 944 Loss: 1.388 Validation Loss: 1.375 Accuracy: 0.166 Validation Accuracy: 0.104:   3%|▎         | 943/37094 [01:37<58:29, 10.30it/s]

Epoch: 0 Iteration: 944 Loss: 1.388 Validation Loss: 1.375 Accuracy: 0.166 Validation Accuracy: 0.104:   3%|▎         | 945/37094 [01:37<58:29, 10.30it/s]

Epoch: 0 Iteration: 945 Loss: 1.360 Validation Loss: 1.375 Accuracy: 0.191 Validation Accuracy: 0.104:   3%|▎         | 945/37094 [01:37<58:29, 10.30it/s]

Epoch: 0 Iteration: 946 Loss: 1.357 Validation Loss: 1.375 Accuracy: 0.197 Validation Accuracy: 0.104:   3%|▎         | 945/37094 [01:37<58:29, 10.30it/s]

Epoch: 0 Iteration: 946 Loss: 1.357 Validation Loss: 1.375 Accuracy: 0.197 Validation Accuracy: 0.104:   3%|▎         | 947/37094 [01:37<58:29, 10.30it/s]

Epoch: 0 Iteration: 947 Loss: 1.392 Validation Loss: 1.375 Accuracy: 0.203 Validation Accuracy: 0.104:   3%|▎         | 947/37094 [01:37<58:29, 10.30it/s]

Epoch: 0 Iteration: 948 Loss: 1.370 Validation Loss: 1.375 Accuracy: 0.200 Validation Accuracy: 0.104:   3%|▎         | 947/37094 [01:37<58:29, 10.30it/s]

Epoch: 0 Iteration: 948 Loss: 1.370 Validation Loss: 1.375 Accuracy: 0.200 Validation Accuracy: 0.104:   3%|▎         | 949/37094 [01:37<58:29, 10.30it/s]

Epoch: 0 Iteration: 949 Loss: 1.400 Validation Loss: 1.375 Accuracy: 0.188 Validation Accuracy: 0.104:   3%|▎         | 949/37094 [01:37<58:29, 10.30it/s]

Epoch: 0 Iteration: 950 Loss: 1.361 Validation Loss: 1.375 Accuracy: 0.194 Validation Accuracy: 0.104:   3%|▎         | 949/37094 [01:37<58:29, 10.30it/s]

Epoch: 0 Iteration: 950 Loss: 1.361 Validation Loss: 1.375 Accuracy: 0.194 Validation Accuracy: 0.104:   3%|▎         | 951/37094 [01:37<58:28, 10.30it/s]

Epoch: 0 Iteration: 951 Loss: 1.424 Validation Loss: 1.375 Accuracy: 0.206 Validation Accuracy: 0.104:   3%|▎         | 951/37094 [01:37<58:28, 10.30it/s]

Epoch: 0 Iteration: 952 Loss: 1.427 Validation Loss: 1.375 Accuracy: 0.209 Validation Accuracy: 0.104:   3%|▎         | 951/37094 [01:37<58:28, 10.30it/s]

Epoch: 0 Iteration: 952 Loss: 1.427 Validation Loss: 1.375 Accuracy: 0.209 Validation Accuracy: 0.104:   3%|▎         | 953/37094 [01:37<58:26, 10.31it/s]

Epoch: 0 Iteration: 953 Loss: 1.470 Validation Loss: 1.375 Accuracy: 0.197 Validation Accuracy: 0.104:   3%|▎         | 953/37094 [01:37<58:26, 10.31it/s]

Epoch: 0 Iteration: 954 Loss: 1.473 Validation Loss: 1.375 Accuracy: 0.200 Validation Accuracy: 0.104:   3%|▎         | 953/37094 [01:38<58:26, 10.31it/s]

Epoch: 0 Iteration: 954 Loss: 1.473 Validation Loss: 1.375 Accuracy: 0.200 Validation Accuracy: 0.104:   3%|▎         | 955/37094 [01:38<58:27, 10.30it/s]

Epoch: 0 Iteration: 955 Loss: 1.440 Validation Loss: 1.375 Accuracy: 0.178 Validation Accuracy: 0.104:   3%|▎         | 955/37094 [01:38<58:27, 10.30it/s]

Epoch: 0 Iteration: 956 Loss: 1.448 Validation Loss: 1.375 Accuracy: 0.169 Validation Accuracy: 0.104:   3%|▎         | 955/37094 [01:38<58:27, 10.30it/s]

Epoch: 0 Iteration: 956 Loss: 1.448 Validation Loss: 1.375 Accuracy: 0.169 Validation Accuracy: 0.104:   3%|▎         | 957/37094 [01:38<58:28, 10.30it/s]

Epoch: 0 Iteration: 957 Loss: 1.426 Validation Loss: 1.375 Accuracy: 0.150 Validation Accuracy: 0.104:   3%|▎         | 957/37094 [01:38<58:28, 10.30it/s]

Epoch: 0 Iteration: 958 Loss: 1.491 Validation Loss: 1.375 Accuracy: 0.144 Validation Accuracy: 0.104:   3%|▎         | 957/37094 [01:38<58:28, 10.30it/s]

Epoch: 0 Iteration: 958 Loss: 1.491 Validation Loss: 1.375 Accuracy: 0.144 Validation Accuracy: 0.104:   3%|▎         | 959/37094 [01:38<58:27, 10.30it/s]

Epoch: 0 Iteration: 959 Loss: 1.447 Validation Loss: 1.375 Accuracy: 0.156 Validation Accuracy: 0.104:   3%|▎         | 959/37094 [01:38<58:27, 10.30it/s]

Epoch: 0 Iteration: 960 Loss: 1.467 Validation Loss: 1.375 Accuracy: 0.138 Validation Accuracy: 0.104:   3%|▎         | 959/37094 [01:38<58:27, 10.30it/s]

Epoch: 0 Iteration: 960 Loss: 1.467 Validation Loss: 1.375 Accuracy: 0.138 Validation Accuracy: 0.104:   3%|▎         | 961/37094 [01:38<58:26, 10.30it/s]

Epoch: 0 Iteration: 961 Loss: 1.444 Validation Loss: 1.375 Accuracy: 0.134 Validation Accuracy: 0.104:   3%|▎         | 961/37094 [01:38<58:26, 10.30it/s]

Epoch: 0 Iteration: 962 Loss: 1.415 Validation Loss: 1.375 Accuracy: 0.138 Validation Accuracy: 0.104:   3%|▎         | 961/37094 [01:38<58:26, 10.30it/s]

Epoch: 0 Iteration: 962 Loss: 1.415 Validation Loss: 1.375 Accuracy: 0.138 Validation Accuracy: 0.104:   3%|▎         | 963/37094 [01:38<58:26, 10.30it/s]

Epoch: 0 Iteration: 963 Loss: 1.439 Validation Loss: 1.375 Accuracy: 0.134 Validation Accuracy: 0.104:   3%|▎         | 963/37094 [01:38<58:26, 10.30it/s]

Epoch: 0 Iteration: 964 Loss: 1.437 Validation Loss: 1.375 Accuracy: 0.141 Validation Accuracy: 0.104:   3%|▎         | 963/37094 [01:38<58:26, 10.30it/s]

Epoch: 0 Iteration: 964 Loss: 1.437 Validation Loss: 1.375 Accuracy: 0.141 Validation Accuracy: 0.104:   3%|▎         | 965/37094 [01:38<58:27, 10.30it/s]

Epoch: 0 Iteration: 965 Loss: 1.461 Validation Loss: 1.375 Accuracy: 0.169 Validation Accuracy: 0.104:   3%|▎         | 965/37094 [01:39<58:27, 10.30it/s]

Epoch: 0 Iteration: 966 Loss: 1.493 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 965/37094 [01:39<58:27, 10.30it/s]

Epoch: 0 Iteration: 966 Loss: 1.493 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 967/37094 [01:39<58:30, 10.29it/s]

Epoch: 0 Iteration: 967 Loss: 1.445 Validation Loss: 1.375 Accuracy: 0.184 Validation Accuracy: 0.104:   3%|▎         | 967/37094 [01:39<58:30, 10.29it/s]

Epoch: 0 Iteration: 968 Loss: 1.422 Validation Loss: 1.375 Accuracy: 0.209 Validation Accuracy: 0.104:   3%|▎         | 967/37094 [01:39<58:30, 10.29it/s]

Epoch: 0 Iteration: 968 Loss: 1.422 Validation Loss: 1.375 Accuracy: 0.209 Validation Accuracy: 0.104:   3%|▎         | 969/37094 [01:39<58:28, 10.30it/s]

Epoch: 0 Iteration: 969 Loss: 1.406 Validation Loss: 1.375 Accuracy: 0.206 Validation Accuracy: 0.104:   3%|▎         | 969/37094 [01:39<58:28, 10.30it/s]

Epoch: 0 Iteration: 970 Loss: 1.393 Validation Loss: 1.375 Accuracy: 0.228 Validation Accuracy: 0.104:   3%|▎         | 969/37094 [01:39<58:28, 10.30it/s]

Epoch: 0 Iteration: 970 Loss: 1.393 Validation Loss: 1.375 Accuracy: 0.228 Validation Accuracy: 0.104:   3%|▎         | 971/37094 [01:39<58:27, 10.30it/s]

Epoch: 0 Iteration: 971 Loss: 1.345 Validation Loss: 1.375 Accuracy: 0.216 Validation Accuracy: 0.104:   3%|▎         | 971/37094 [01:39<58:27, 10.30it/s]

Epoch: 0 Iteration: 972 Loss: 1.367 Validation Loss: 1.375 Accuracy: 0.206 Validation Accuracy: 0.104:   3%|▎         | 971/37094 [01:39<58:27, 10.30it/s]

Epoch: 0 Iteration: 972 Loss: 1.367 Validation Loss: 1.375 Accuracy: 0.206 Validation Accuracy: 0.104:   3%|▎         | 973/37094 [01:39<58:28, 10.29it/s]

Epoch: 0 Iteration: 973 Loss: 1.329 Validation Loss: 1.375 Accuracy: 0.216 Validation Accuracy: 0.104:   3%|▎         | 973/37094 [01:39<58:28, 10.29it/s]

Epoch: 0 Iteration: 974 Loss: 1.292 Validation Loss: 1.375 Accuracy: 0.203 Validation Accuracy: 0.104:   3%|▎         | 973/37094 [01:39<58:28, 10.29it/s]

Epoch: 0 Iteration: 974 Loss: 1.292 Validation Loss: 1.375 Accuracy: 0.203 Validation Accuracy: 0.104:   3%|▎         | 975/37094 [01:39<58:27, 10.30it/s]

Epoch: 0 Iteration: 975 Loss: 1.346 Validation Loss: 1.375 Accuracy: 0.200 Validation Accuracy: 0.104:   3%|▎         | 975/37094 [01:40<58:27, 10.30it/s]

Epoch: 0 Iteration: 976 Loss: 1.336 Validation Loss: 1.375 Accuracy: 0.191 Validation Accuracy: 0.104:   3%|▎         | 975/37094 [01:40<58:27, 10.30it/s]

Epoch: 0 Iteration: 976 Loss: 1.336 Validation Loss: 1.375 Accuracy: 0.191 Validation Accuracy: 0.104:   3%|▎         | 977/37094 [01:40<58:25, 10.30it/s]

Epoch: 0 Iteration: 977 Loss: 1.333 Validation Loss: 1.375 Accuracy: 0.194 Validation Accuracy: 0.104:   3%|▎         | 977/37094 [01:40<58:25, 10.30it/s]

Epoch: 0 Iteration: 978 Loss: 1.312 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 977/37094 [01:40<58:25, 10.30it/s]

Epoch: 0 Iteration: 978 Loss: 1.312 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 979/37094 [01:40<58:25, 10.30it/s]

Epoch: 0 Iteration: 979 Loss: 1.381 Validation Loss: 1.375 Accuracy: 0.159 Validation Accuracy: 0.104:   3%|▎         | 979/37094 [01:40<58:25, 10.30it/s]

Epoch: 0 Iteration: 980 Loss: 1.411 Validation Loss: 1.375 Accuracy: 0.159 Validation Accuracy: 0.104:   3%|▎         | 979/37094 [01:40<58:25, 10.30it/s]

Epoch: 0 Iteration: 980 Loss: 1.411 Validation Loss: 1.375 Accuracy: 0.159 Validation Accuracy: 0.104:   3%|▎         | 981/37094 [01:40<58:26, 10.30it/s]

Epoch: 0 Iteration: 981 Loss: 1.387 Validation Loss: 1.375 Accuracy: 0.178 Validation Accuracy: 0.104:   3%|▎         | 981/37094 [01:40<58:26, 10.30it/s]

Epoch: 0 Iteration: 982 Loss: 1.402 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 981/37094 [01:40<58:26, 10.30it/s]

Epoch: 0 Iteration: 982 Loss: 1.402 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 983/37094 [01:40<58:26, 10.30it/s]

Epoch: 0 Iteration: 983 Loss: 1.383 Validation Loss: 1.375 Accuracy: 0.175 Validation Accuracy: 0.104:   3%|▎         | 983/37094 [01:40<58:26, 10.30it/s]

Epoch: 0 Iteration: 984 Loss: 1.408 Validation Loss: 1.375 Accuracy: 0.191 Validation Accuracy: 0.104:   3%|▎         | 983/37094 [01:40<58:26, 10.30it/s]

Epoch: 0 Iteration: 984 Loss: 1.408 Validation Loss: 1.375 Accuracy: 0.191 Validation Accuracy: 0.104:   3%|▎         | 985/37094 [01:40<58:25, 10.30it/s]

Epoch: 0 Iteration: 985 Loss: 1.413 Validation Loss: 1.375 Accuracy: 0.181 Validation Accuracy: 0.104:   3%|▎         | 985/37094 [01:41<58:25, 10.30it/s]

Epoch: 0 Iteration: 986 Loss: 1.401 Validation Loss: 1.375 Accuracy: 0.181 Validation Accuracy: 0.104:   3%|▎         | 985/37094 [01:41<58:25, 10.30it/s]

Epoch: 0 Iteration: 986 Loss: 1.401 Validation Loss: 1.375 Accuracy: 0.181 Validation Accuracy: 0.104:   3%|▎         | 987/37094 [01:41<58:23, 10.31it/s]

Epoch: 0 Iteration: 987 Loss: 1.378 Validation Loss: 1.375 Accuracy: 0.178 Validation Accuracy: 0.104:   3%|▎         | 987/37094 [01:41<58:23, 10.31it/s]

Epoch: 0 Iteration: 988 Loss: 1.405 Validation Loss: 1.375 Accuracy: 0.181 Validation Accuracy: 0.104:   3%|▎         | 987/37094 [01:41<58:23, 10.31it/s]

Epoch: 0 Iteration: 988 Loss: 1.405 Validation Loss: 1.375 Accuracy: 0.181 Validation Accuracy: 0.104:   3%|▎         | 989/37094 [01:41<58:22, 10.31it/s]

Epoch: 0 Iteration: 989 Loss: 1.420 Validation Loss: 1.375 Accuracy: 0.184 Validation Accuracy: 0.104:   3%|▎         | 989/37094 [01:41<58:22, 10.31it/s]

Epoch: 0 Iteration: 990 Loss: 1.429 Validation Loss: 1.375 Accuracy: 0.178 Validation Accuracy: 0.104:   3%|▎         | 989/37094 [01:41<58:22, 10.31it/s]

Epoch: 0 Iteration: 990 Loss: 1.429 Validation Loss: 1.375 Accuracy: 0.178 Validation Accuracy: 0.104:   3%|▎         | 991/37094 [01:41<58:23, 10.31it/s]

Epoch: 0 Iteration: 991 Loss: 1.437 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   3%|▎         | 991/37094 [01:41<58:23, 10.31it/s]

Epoch: 0 Iteration: 992 Loss: 1.436 Validation Loss: 1.375 Accuracy: 0.169 Validation Accuracy: 0.104:   3%|▎         | 991/37094 [01:41<58:23, 10.31it/s]

Epoch: 0 Iteration: 992 Loss: 1.436 Validation Loss: 1.375 Accuracy: 0.169 Validation Accuracy: 0.104:   3%|▎         | 993/37094 [01:41<58:23, 10.30it/s]

Epoch: 0 Iteration: 993 Loss: 1.471 Validation Loss: 1.375 Accuracy: 0.163 Validation Accuracy: 0.104:   3%|▎         | 993/37094 [01:41<58:23, 10.30it/s]

Epoch: 0 Iteration: 994 Loss: 1.509 Validation Loss: 1.375 Accuracy: 0.153 Validation Accuracy: 0.104:   3%|▎         | 993/37094 [01:41<58:23, 10.30it/s]

Epoch: 0 Iteration: 994 Loss: 1.509 Validation Loss: 1.375 Accuracy: 0.153 Validation Accuracy: 0.104:   3%|▎         | 995/37094 [01:41<58:26, 10.29it/s]

Epoch: 0 Iteration: 995 Loss: 1.446 Validation Loss: 1.375 Accuracy: 0.156 Validation Accuracy: 0.104:   3%|▎         | 995/37094 [01:42<58:26, 10.29it/s]

Epoch: 0 Iteration: 996 Loss: 1.463 Validation Loss: 1.375 Accuracy: 0.156 Validation Accuracy: 0.104:   3%|▎         | 995/37094 [01:42<58:26, 10.29it/s]

Epoch: 0 Iteration: 996 Loss: 1.463 Validation Loss: 1.375 Accuracy: 0.156 Validation Accuracy: 0.104:   3%|▎         | 997/37094 [01:42<58:24, 10.30it/s]

Epoch: 0 Iteration: 997 Loss: 1.462 Validation Loss: 1.375 Accuracy: 0.150 Validation Accuracy: 0.104:   3%|▎         | 997/37094 [01:42<58:24, 10.30it/s]

Epoch: 0 Iteration: 998 Loss: 1.434 Validation Loss: 1.375 Accuracy: 0.153 Validation Accuracy: 0.104:   3%|▎         | 997/37094 [01:42<58:24, 10.30it/s]

Epoch: 0 Iteration: 998 Loss: 1.434 Validation Loss: 1.375 Accuracy: 0.153 Validation Accuracy: 0.104:   3%|▎         | 999/37094 [01:42<58:24, 10.30it/s]

Epoch: 0 Iteration: 999 Loss: 1.413 Validation Loss: 1.375 Accuracy: 0.169 Validation Accuracy: 0.104:   3%|▎         | 999/37094 [01:42<58:24, 10.30it/s]

Epoch: 0 Iteration: 1000 Loss: 1.416 Validation Loss: 1.294 Accuracy: 0.166 Validation Accuracy: 0.113:   3%|▎         | 999/37094 [01:43<58:24, 10.30it/s]

Epoch: 0 Iteration: 1000 Loss: 1.416 Validation Loss: 1.294 Accuracy: 0.166 Validation Accuracy: 0.113:   3%|▎         | 1001/37094 [01:43<1:47:54,  5.57it/s]

Epoch: 0 Iteration: 1001 Loss: 1.450 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1001/37094 [01:43<1:47:54,  5.57it/s]

Epoch: 0 Iteration: 1002 Loss: 1.412 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1001/37094 [01:43<1:47:54,  5.57it/s]

Epoch: 0 Iteration: 1002 Loss: 1.412 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1003/37094 [01:43<1:32:17,  6.52it/s]

Epoch: 0 Iteration: 1003 Loss: 1.398 Validation Loss: 1.294 Accuracy: 0.175 Validation Accuracy: 0.113:   3%|▎         | 1003/37094 [01:43<1:32:17,  6.52it/s]

Epoch: 0 Iteration: 1004 Loss: 1.390 Validation Loss: 1.294 Accuracy: 0.169 Validation Accuracy: 0.113:   3%|▎         | 1003/37094 [01:43<1:32:17,  6.52it/s]

Epoch: 0 Iteration: 1004 Loss: 1.390 Validation Loss: 1.294 Accuracy: 0.169 Validation Accuracy: 0.113:   3%|▎         | 1005/37094 [01:43<1:22:08,  7.32it/s]

Epoch: 0 Iteration: 1005 Loss: 1.340 Validation Loss: 1.294 Accuracy: 0.166 Validation Accuracy: 0.113:   3%|▎         | 1005/37094 [01:43<1:22:08,  7.32it/s]

Epoch: 0 Iteration: 1006 Loss: 1.300 Validation Loss: 1.294 Accuracy: 0.191 Validation Accuracy: 0.113:   3%|▎         | 1005/37094 [01:43<1:22:08,  7.32it/s]

Epoch: 0 Iteration: 1006 Loss: 1.300 Validation Loss: 1.294 Accuracy: 0.191 Validation Accuracy: 0.113:   3%|▎         | 1007/37094 [01:43<1:14:59,  8.02it/s]

Epoch: 0 Iteration: 1007 Loss: 1.301 Validation Loss: 1.294 Accuracy: 0.203 Validation Accuracy: 0.113:   3%|▎         | 1007/37094 [01:43<1:14:59,  8.02it/s]

Epoch: 0 Iteration: 1008 Loss: 1.326 Validation Loss: 1.294 Accuracy: 0.191 Validation Accuracy: 0.113:   3%|▎         | 1007/37094 [01:43<1:14:59,  8.02it/s]

Epoch: 0 Iteration: 1008 Loss: 1.326 Validation Loss: 1.294 Accuracy: 0.191 Validation Accuracy: 0.113:   3%|▎         | 1009/37094 [01:43<1:10:00,  8.59it/s]

Epoch: 0 Iteration: 1009 Loss: 1.307 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1009/37094 [01:43<1:10:00,  8.59it/s]

Epoch: 0 Iteration: 1010 Loss: 1.307 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1009/37094 [01:44<1:10:00,  8.59it/s]

Epoch: 0 Iteration: 1010 Loss: 1.307 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1011/37094 [01:44<1:06:30,  9.04it/s]

Epoch: 0 Iteration: 1011 Loss: 1.335 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1011/37094 [01:44<1:06:30,  9.04it/s]

Epoch: 0 Iteration: 1012 Loss: 1.312 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1011/37094 [01:44<1:06:30,  9.04it/s]

Epoch: 0 Iteration: 1012 Loss: 1.312 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1013/37094 [01:44<1:04:03,  9.39it/s]

Epoch: 0 Iteration: 1013 Loss: 1.288 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1013/37094 [01:44<1:04:03,  9.39it/s]

Epoch: 0 Iteration: 1014 Loss: 1.286 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1013/37094 [01:44<1:04:03,  9.39it/s]

Epoch: 0 Iteration: 1014 Loss: 1.286 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1015/37094 [01:44<1:02:21,  9.64it/s]

Epoch: 0 Iteration: 1015 Loss: 1.310 Validation Loss: 1.294 Accuracy: 0.156 Validation Accuracy: 0.113:   3%|▎         | 1015/37094 [01:44<1:02:21,  9.64it/s]

Epoch: 0 Iteration: 1016 Loss: 1.311 Validation Loss: 1.294 Accuracy: 0.138 Validation Accuracy: 0.113:   3%|▎         | 1015/37094 [01:44<1:02:21,  9.64it/s]

Epoch: 0 Iteration: 1016 Loss: 1.311 Validation Loss: 1.294 Accuracy: 0.138 Validation Accuracy: 0.113:   3%|▎         | 1017/37094 [01:44<1:01:07,  9.84it/s]

Epoch: 0 Iteration: 1017 Loss: 1.312 Validation Loss: 1.294 Accuracy: 0.141 Validation Accuracy: 0.113:   3%|▎         | 1017/37094 [01:44<1:01:07,  9.84it/s]

Epoch: 0 Iteration: 1018 Loss: 1.338 Validation Loss: 1.294 Accuracy: 0.138 Validation Accuracy: 0.113:   3%|▎         | 1017/37094 [01:44<1:01:07,  9.84it/s]

Epoch: 0 Iteration: 1018 Loss: 1.338 Validation Loss: 1.294 Accuracy: 0.138 Validation Accuracy: 0.113:   3%|▎         | 1019/37094 [01:44<1:00:17,  9.97it/s]

Epoch: 0 Iteration: 1019 Loss: 1.302 Validation Loss: 1.294 Accuracy: 0.156 Validation Accuracy: 0.113:   3%|▎         | 1019/37094 [01:44<1:00:17,  9.97it/s]

Epoch: 0 Iteration: 1020 Loss: 1.246 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1019/37094 [01:44<1:00:17,  9.97it/s]

Epoch: 0 Iteration: 1020 Loss: 1.246 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1021/37094 [01:44<59:41, 10.07it/s]  

Epoch: 0 Iteration: 1021 Loss: 1.276 Validation Loss: 1.294 Accuracy: 0.181 Validation Accuracy: 0.113:   3%|▎         | 1021/37094 [01:45<59:41, 10.07it/s]

Epoch: 0 Iteration: 1022 Loss: 1.326 Validation Loss: 1.294 Accuracy: 0.191 Validation Accuracy: 0.113:   3%|▎         | 1021/37094 [01:45<59:41, 10.07it/s]

Epoch: 0 Iteration: 1022 Loss: 1.326 Validation Loss: 1.294 Accuracy: 0.191 Validation Accuracy: 0.113:   3%|▎         | 1023/37094 [01:45<59:17, 10.14it/s]

Epoch: 0 Iteration: 1023 Loss: 1.314 Validation Loss: 1.294 Accuracy: 0.206 Validation Accuracy: 0.113:   3%|▎         | 1023/37094 [01:45<59:17, 10.14it/s]

Epoch: 0 Iteration: 1024 Loss: 1.290 Validation Loss: 1.294 Accuracy: 0.222 Validation Accuracy: 0.113:   3%|▎         | 1023/37094 [01:45<59:17, 10.14it/s]

Epoch: 0 Iteration: 1024 Loss: 1.290 Validation Loss: 1.294 Accuracy: 0.222 Validation Accuracy: 0.113:   3%|▎         | 1025/37094 [01:45<58:59, 10.19it/s]

Epoch: 0 Iteration: 1025 Loss: 1.305 Validation Loss: 1.294 Accuracy: 0.222 Validation Accuracy: 0.113:   3%|▎         | 1025/37094 [01:45<58:59, 10.19it/s]

Epoch: 0 Iteration: 1026 Loss: 1.300 Validation Loss: 1.294 Accuracy: 0.222 Validation Accuracy: 0.113:   3%|▎         | 1025/37094 [01:45<58:59, 10.19it/s]

Epoch: 0 Iteration: 1026 Loss: 1.300 Validation Loss: 1.294 Accuracy: 0.222 Validation Accuracy: 0.113:   3%|▎         | 1027/37094 [01:45<58:47, 10.22it/s]

Epoch: 0 Iteration: 1027 Loss: 1.329 Validation Loss: 1.294 Accuracy: 0.219 Validation Accuracy: 0.113:   3%|▎         | 1027/37094 [01:45<58:47, 10.22it/s]

Epoch: 0 Iteration: 1028 Loss: 1.298 Validation Loss: 1.294 Accuracy: 0.228 Validation Accuracy: 0.113:   3%|▎         | 1027/37094 [01:45<58:47, 10.22it/s]

Epoch: 0 Iteration: 1028 Loss: 1.298 Validation Loss: 1.294 Accuracy: 0.228 Validation Accuracy: 0.113:   3%|▎         | 1029/37094 [01:45<58:38, 10.25it/s]

Epoch: 0 Iteration: 1029 Loss: 1.298 Validation Loss: 1.294 Accuracy: 0.209 Validation Accuracy: 0.113:   3%|▎         | 1029/37094 [01:45<58:38, 10.25it/s]

Epoch: 0 Iteration: 1030 Loss: 1.295 Validation Loss: 1.294 Accuracy: 0.188 Validation Accuracy: 0.113:   3%|▎         | 1029/37094 [01:45<58:38, 10.25it/s]

Epoch: 0 Iteration: 1030 Loss: 1.295 Validation Loss: 1.294 Accuracy: 0.188 Validation Accuracy: 0.113:   3%|▎         | 1031/37094 [01:45<58:31, 10.27it/s]

Epoch: 0 Iteration: 1031 Loss: 1.298 Validation Loss: 1.294 Accuracy: 0.175 Validation Accuracy: 0.113:   3%|▎         | 1031/37094 [01:46<58:31, 10.27it/s]

Epoch: 0 Iteration: 1032 Loss: 1.310 Validation Loss: 1.294 Accuracy: 0.169 Validation Accuracy: 0.113:   3%|▎         | 1031/37094 [01:46<58:31, 10.27it/s]

Epoch: 0 Iteration: 1032 Loss: 1.310 Validation Loss: 1.294 Accuracy: 0.169 Validation Accuracy: 0.113:   3%|▎         | 1033/37094 [01:46<58:29, 10.28it/s]

Epoch: 0 Iteration: 1033 Loss: 1.301 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1033/37094 [01:46<58:29, 10.28it/s]

Epoch: 0 Iteration: 1034 Loss: 1.266 Validation Loss: 1.294 Accuracy: 0.141 Validation Accuracy: 0.113:   3%|▎         | 1033/37094 [01:46<58:29, 10.28it/s]

Epoch: 0 Iteration: 1034 Loss: 1.266 Validation Loss: 1.294 Accuracy: 0.141 Validation Accuracy: 0.113:   3%|▎         | 1035/37094 [01:46<58:26, 10.28it/s]

Epoch: 0 Iteration: 1035 Loss: 1.263 Validation Loss: 1.294 Accuracy: 0.134 Validation Accuracy: 0.113:   3%|▎         | 1035/37094 [01:46<58:26, 10.28it/s]

Epoch: 0 Iteration: 1036 Loss: 1.261 Validation Loss: 1.294 Accuracy: 0.131 Validation Accuracy: 0.113:   3%|▎         | 1035/37094 [01:46<58:26, 10.28it/s]

Epoch: 0 Iteration: 1036 Loss: 1.261 Validation Loss: 1.294 Accuracy: 0.131 Validation Accuracy: 0.113:   3%|▎         | 1037/37094 [01:46<58:25, 10.29it/s]

Epoch: 0 Iteration: 1037 Loss: 1.258 Validation Loss: 1.294 Accuracy: 0.128 Validation Accuracy: 0.113:   3%|▎         | 1037/37094 [01:46<58:25, 10.29it/s]

Epoch: 0 Iteration: 1038 Loss: 1.260 Validation Loss: 1.294 Accuracy: 0.128 Validation Accuracy: 0.113:   3%|▎         | 1037/37094 [01:46<58:25, 10.29it/s]

Epoch: 0 Iteration: 1038 Loss: 1.260 Validation Loss: 1.294 Accuracy: 0.128 Validation Accuracy: 0.113:   3%|▎         | 1039/37094 [01:46<58:22, 10.29it/s]

Epoch: 0 Iteration: 1039 Loss: 1.255 Validation Loss: 1.294 Accuracy: 0.141 Validation Accuracy: 0.113:   3%|▎         | 1039/37094 [01:46<58:22, 10.29it/s]

Epoch: 0 Iteration: 1040 Loss: 1.255 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1039/37094 [01:46<58:22, 10.29it/s]

Epoch: 0 Iteration: 1040 Loss: 1.255 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1041/37094 [01:46<58:19, 10.30it/s]

Epoch: 0 Iteration: 1041 Loss: 1.233 Validation Loss: 1.294 Accuracy: 0.175 Validation Accuracy: 0.113:   3%|▎         | 1041/37094 [01:47<58:19, 10.30it/s]

Epoch: 0 Iteration: 1042 Loss: 1.221 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1041/37094 [01:47<58:19, 10.30it/s]

Epoch: 0 Iteration: 1042 Loss: 1.221 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1043/37094 [01:47<58:19, 10.30it/s]

Epoch: 0 Iteration: 1043 Loss: 1.225 Validation Loss: 1.294 Accuracy: 0.178 Validation Accuracy: 0.113:   3%|▎         | 1043/37094 [01:47<58:19, 10.30it/s]

Epoch: 0 Iteration: 1044 Loss: 1.258 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1043/37094 [01:47<58:19, 10.30it/s]

Epoch: 0 Iteration: 1044 Loss: 1.258 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1045/37094 [01:47<58:19, 10.30it/s]

Epoch: 0 Iteration: 1045 Loss: 1.255 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1045/37094 [01:47<58:19, 10.30it/s]

Epoch: 0 Iteration: 1046 Loss: 1.258 Validation Loss: 1.294 Accuracy: 0.181 Validation Accuracy: 0.113:   3%|▎         | 1045/37094 [01:47<58:19, 10.30it/s]

Epoch: 0 Iteration: 1046 Loss: 1.258 Validation Loss: 1.294 Accuracy: 0.181 Validation Accuracy: 0.113:   3%|▎         | 1047/37094 [01:47<58:20, 10.30it/s]

Epoch: 0 Iteration: 1047 Loss: 1.281 Validation Loss: 1.294 Accuracy: 0.188 Validation Accuracy: 0.113:   3%|▎         | 1047/37094 [01:47<58:20, 10.30it/s]

Epoch: 0 Iteration: 1048 Loss: 1.280 Validation Loss: 1.294 Accuracy: 0.191 Validation Accuracy: 0.113:   3%|▎         | 1047/37094 [01:47<58:20, 10.30it/s]

Epoch: 0 Iteration: 1048 Loss: 1.280 Validation Loss: 1.294 Accuracy: 0.191 Validation Accuracy: 0.113:   3%|▎         | 1049/37094 [01:47<58:20, 10.30it/s]

Epoch: 0 Iteration: 1049 Loss: 1.274 Validation Loss: 1.294 Accuracy: 0.200 Validation Accuracy: 0.113:   3%|▎         | 1049/37094 [01:47<58:20, 10.30it/s]

Epoch: 0 Iteration: 1050 Loss: 1.288 Validation Loss: 1.294 Accuracy: 0.200 Validation Accuracy: 0.113:   3%|▎         | 1049/37094 [01:47<58:20, 10.30it/s]

Epoch: 0 Iteration: 1050 Loss: 1.288 Validation Loss: 1.294 Accuracy: 0.200 Validation Accuracy: 0.113:   3%|▎         | 1051/37094 [01:47<58:20, 10.30it/s]

Epoch: 0 Iteration: 1051 Loss: 1.258 Validation Loss: 1.294 Accuracy: 0.200 Validation Accuracy: 0.113:   3%|▎         | 1051/37094 [01:47<58:20, 10.30it/s]

Epoch: 0 Iteration: 1052 Loss: 1.270 Validation Loss: 1.294 Accuracy: 0.212 Validation Accuracy: 0.113:   3%|▎         | 1051/37094 [01:48<58:20, 10.30it/s]

Epoch: 0 Iteration: 1052 Loss: 1.270 Validation Loss: 1.294 Accuracy: 0.212 Validation Accuracy: 0.113:   3%|▎         | 1053/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1053 Loss: 1.300 Validation Loss: 1.294 Accuracy: 0.212 Validation Accuracy: 0.113:   3%|▎         | 1053/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1054 Loss: 1.327 Validation Loss: 1.294 Accuracy: 0.225 Validation Accuracy: 0.113:   3%|▎         | 1053/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1054 Loss: 1.327 Validation Loss: 1.294 Accuracy: 0.225 Validation Accuracy: 0.113:   3%|▎         | 1055/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1055 Loss: 1.320 Validation Loss: 1.294 Accuracy: 0.219 Validation Accuracy: 0.113:   3%|▎         | 1055/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1056 Loss: 1.321 Validation Loss: 1.294 Accuracy: 0.216 Validation Accuracy: 0.113:   3%|▎         | 1055/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1056 Loss: 1.321 Validation Loss: 1.294 Accuracy: 0.216 Validation Accuracy: 0.113:   3%|▎         | 1057/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1057 Loss: 1.329 Validation Loss: 1.294 Accuracy: 0.206 Validation Accuracy: 0.113:   3%|▎         | 1057/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1058 Loss: 1.348 Validation Loss: 1.294 Accuracy: 0.197 Validation Accuracy: 0.113:   3%|▎         | 1057/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1058 Loss: 1.348 Validation Loss: 1.294 Accuracy: 0.197 Validation Accuracy: 0.113:   3%|▎         | 1059/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1059 Loss: 1.351 Validation Loss: 1.294 Accuracy: 0.188 Validation Accuracy: 0.113:   3%|▎         | 1059/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1060 Loss: 1.398 Validation Loss: 1.294 Accuracy: 0.178 Validation Accuracy: 0.113:   3%|▎         | 1059/37094 [01:48<58:19, 10.30it/s]

Epoch: 0 Iteration: 1060 Loss: 1.398 Validation Loss: 1.294 Accuracy: 0.178 Validation Accuracy: 0.113:   3%|▎         | 1061/37094 [01:48<58:17, 10.30it/s]

Epoch: 0 Iteration: 1061 Loss: 1.350 Validation Loss: 1.294 Accuracy: 0.175 Validation Accuracy: 0.113:   3%|▎         | 1061/37094 [01:48<58:17, 10.30it/s]

Epoch: 0 Iteration: 1062 Loss: 1.386 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1061/37094 [01:49<58:17, 10.30it/s]

Epoch: 0 Iteration: 1062 Loss: 1.386 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1063/37094 [01:49<58:16, 10.31it/s]

Epoch: 0 Iteration: 1063 Loss: 1.433 Validation Loss: 1.294 Accuracy: 0.156 Validation Accuracy: 0.113:   3%|▎         | 1063/37094 [01:49<58:16, 10.31it/s]

Epoch: 0 Iteration: 1064 Loss: 1.417 Validation Loss: 1.294 Accuracy: 0.141 Validation Accuracy: 0.113:   3%|▎         | 1063/37094 [01:49<58:16, 10.31it/s]

Epoch: 0 Iteration: 1064 Loss: 1.417 Validation Loss: 1.294 Accuracy: 0.141 Validation Accuracy: 0.113:   3%|▎         | 1065/37094 [01:49<58:15, 10.31it/s]

Epoch: 0 Iteration: 1065 Loss: 1.428 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1065/37094 [01:49<58:15, 10.31it/s]

Epoch: 0 Iteration: 1066 Loss: 1.412 Validation Loss: 1.294 Accuracy: 0.144 Validation Accuracy: 0.113:   3%|▎         | 1065/37094 [01:49<58:15, 10.31it/s]

Epoch: 0 Iteration: 1066 Loss: 1.412 Validation Loss: 1.294 Accuracy: 0.144 Validation Accuracy: 0.113:   3%|▎         | 1067/37094 [01:49<58:17, 10.30it/s]

Epoch: 0 Iteration: 1067 Loss: 1.369 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1067/37094 [01:49<58:17, 10.30it/s]

Epoch: 0 Iteration: 1068 Loss: 1.413 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1067/37094 [01:49<58:17, 10.30it/s]

Epoch: 0 Iteration: 1068 Loss: 1.413 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1069/37094 [01:49<58:18, 10.30it/s]

Epoch: 0 Iteration: 1069 Loss: 1.418 Validation Loss: 1.294 Accuracy: 0.156 Validation Accuracy: 0.113:   3%|▎         | 1069/37094 [01:49<58:18, 10.30it/s]

Epoch: 0 Iteration: 1070 Loss: 1.428 Validation Loss: 1.294 Accuracy: 0.159 Validation Accuracy: 0.113:   3%|▎         | 1069/37094 [01:49<58:18, 10.30it/s]

Epoch: 0 Iteration: 1070 Loss: 1.428 Validation Loss: 1.294 Accuracy: 0.159 Validation Accuracy: 0.113:   3%|▎         | 1071/37094 [01:49<58:19, 10.29it/s]

Epoch: 0 Iteration: 1071 Loss: 1.420 Validation Loss: 1.294 Accuracy: 0.156 Validation Accuracy: 0.113:   3%|▎         | 1071/37094 [01:49<58:19, 10.29it/s]

Epoch: 0 Iteration: 1072 Loss: 1.409 Validation Loss: 1.294 Accuracy: 0.153 Validation Accuracy: 0.113:   3%|▎         | 1071/37094 [01:50<58:19, 10.29it/s]

Epoch: 0 Iteration: 1072 Loss: 1.409 Validation Loss: 1.294 Accuracy: 0.153 Validation Accuracy: 0.113:   3%|▎         | 1073/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1073 Loss: 1.384 Validation Loss: 1.294 Accuracy: 0.159 Validation Accuracy: 0.113:   3%|▎         | 1073/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1074 Loss: 1.406 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1073/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1074 Loss: 1.406 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1075/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1075 Loss: 1.392 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1075/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1076 Loss: 1.375 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1075/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1076 Loss: 1.375 Validation Loss: 1.294 Accuracy: 0.172 Validation Accuracy: 0.113:   3%|▎         | 1077/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1077 Loss: 1.376 Validation Loss: 1.294 Accuracy: 0.159 Validation Accuracy: 0.113:   3%|▎         | 1077/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1078 Loss: 1.351 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1077/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1078 Loss: 1.351 Validation Loss: 1.294 Accuracy: 0.163 Validation Accuracy: 0.113:   3%|▎         | 1079/37094 [01:50<58:17, 10.30it/s]

Epoch: 0 Iteration: 1079 Loss: 1.342 Validation Loss: 1.294 Accuracy: 0.153 Validation Accuracy: 0.113:   3%|▎         | 1079/37094 [01:50<58:17, 10.30it/s]

Epoch: 0 Iteration: 1080 Loss: 1.297 Validation Loss: 1.294 Accuracy: 0.147 Validation Accuracy: 0.113:   3%|▎         | 1079/37094 [01:50<58:17, 10.30it/s]

Epoch: 0 Iteration: 1080 Loss: 1.297 Validation Loss: 1.294 Accuracy: 0.147 Validation Accuracy: 0.113:   3%|▎         | 1081/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1081 Loss: 1.328 Validation Loss: 1.294 Accuracy: 0.144 Validation Accuracy: 0.113:   3%|▎         | 1081/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1082 Loss: 1.289 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1081/37094 [01:50<58:16, 10.30it/s]

Epoch: 0 Iteration: 1082 Loss: 1.289 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1083/37094 [01:50<58:15, 10.30it/s]

Epoch: 0 Iteration: 1083 Loss: 1.262 Validation Loss: 1.294 Accuracy: 0.159 Validation Accuracy: 0.113:   3%|▎         | 1083/37094 [01:51<58:15, 10.30it/s]

Epoch: 0 Iteration: 1084 Loss: 1.257 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1083/37094 [01:51<58:15, 10.30it/s]

Epoch: 0 Iteration: 1084 Loss: 1.257 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1085/37094 [01:51<58:15, 10.30it/s]

Epoch: 0 Iteration: 1085 Loss: 1.311 Validation Loss: 1.294 Accuracy: 0.138 Validation Accuracy: 0.113:   3%|▎         | 1085/37094 [01:51<58:15, 10.30it/s]

Epoch: 0 Iteration: 1086 Loss: 1.348 Validation Loss: 1.294 Accuracy: 0.128 Validation Accuracy: 0.113:   3%|▎         | 1085/37094 [01:51<58:15, 10.30it/s]

Epoch: 0 Iteration: 1086 Loss: 1.348 Validation Loss: 1.294 Accuracy: 0.128 Validation Accuracy: 0.113:   3%|▎         | 1087/37094 [01:51<58:15, 10.30it/s]

Epoch: 0 Iteration: 1087 Loss: 1.390 Validation Loss: 1.294 Accuracy: 0.128 Validation Accuracy: 0.113:   3%|▎         | 1087/37094 [01:51<58:15, 10.30it/s]

Epoch: 0 Iteration: 1088 Loss: 1.404 Validation Loss: 1.294 Accuracy: 0.125 Validation Accuracy: 0.113:   3%|▎         | 1087/37094 [01:51<58:15, 10.30it/s]

Epoch: 0 Iteration: 1088 Loss: 1.404 Validation Loss: 1.294 Accuracy: 0.125 Validation Accuracy: 0.113:   3%|▎         | 1089/37094 [01:51<58:17, 10.30it/s]

Epoch: 0 Iteration: 1089 Loss: 1.484 Validation Loss: 1.294 Accuracy: 0.131 Validation Accuracy: 0.113:   3%|▎         | 1089/37094 [01:51<58:17, 10.30it/s]

Epoch: 0 Iteration: 1090 Loss: 1.474 Validation Loss: 1.294 Accuracy: 0.141 Validation Accuracy: 0.113:   3%|▎         | 1089/37094 [01:51<58:17, 10.30it/s]

Epoch: 0 Iteration: 1090 Loss: 1.474 Validation Loss: 1.294 Accuracy: 0.141 Validation Accuracy: 0.113:   3%|▎         | 1091/37094 [01:51<58:16, 10.30it/s]

Epoch: 0 Iteration: 1091 Loss: 1.512 Validation Loss: 1.294 Accuracy: 0.144 Validation Accuracy: 0.113:   3%|▎         | 1091/37094 [01:51<58:16, 10.30it/s]

Epoch: 0 Iteration: 1092 Loss: 1.488 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1091/37094 [01:51<58:16, 10.30it/s]

Epoch: 0 Iteration: 1092 Loss: 1.488 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1093/37094 [01:51<58:14, 10.30it/s]

Epoch: 0 Iteration: 1093 Loss: 1.492 Validation Loss: 1.294 Accuracy: 0.131 Validation Accuracy: 0.113:   3%|▎         | 1093/37094 [01:52<58:14, 10.30it/s]

Epoch: 0 Iteration: 1094 Loss: 1.440 Validation Loss: 1.294 Accuracy: 0.134 Validation Accuracy: 0.113:   3%|▎         | 1093/37094 [01:52<58:14, 10.30it/s]

Epoch: 0 Iteration: 1094 Loss: 1.440 Validation Loss: 1.294 Accuracy: 0.134 Validation Accuracy: 0.113:   3%|▎         | 1095/37094 [01:52<58:13, 10.31it/s]

Epoch: 0 Iteration: 1095 Loss: 1.467 Validation Loss: 1.294 Accuracy: 0.138 Validation Accuracy: 0.113:   3%|▎         | 1095/37094 [01:52<58:13, 10.31it/s]

Epoch: 0 Iteration: 1096 Loss: 1.469 Validation Loss: 1.294 Accuracy: 0.159 Validation Accuracy: 0.113:   3%|▎         | 1095/37094 [01:52<58:13, 10.31it/s]

Epoch: 0 Iteration: 1096 Loss: 1.469 Validation Loss: 1.294 Accuracy: 0.159 Validation Accuracy: 0.113:   3%|▎         | 1097/37094 [01:52<58:13, 10.30it/s]

Epoch: 0 Iteration: 1097 Loss: 1.494 Validation Loss: 1.294 Accuracy: 0.156 Validation Accuracy: 0.113:   3%|▎         | 1097/37094 [01:52<58:13, 10.30it/s]

Epoch: 0 Iteration: 1098 Loss: 1.512 Validation Loss: 1.294 Accuracy: 0.159 Validation Accuracy: 0.113:   3%|▎         | 1097/37094 [01:52<58:13, 10.30it/s]

Epoch: 0 Iteration: 1098 Loss: 1.512 Validation Loss: 1.294 Accuracy: 0.159 Validation Accuracy: 0.113:   3%|▎         | 1099/37094 [01:52<58:16, 10.30it/s]

Epoch: 0 Iteration: 1099 Loss: 1.529 Validation Loss: 1.294 Accuracy: 0.150 Validation Accuracy: 0.113:   3%|▎         | 1099/37094 [01:52<58:16, 10.30it/s]

Epoch: 0 Iteration: 1100 Loss: 1.527 Validation Loss: 1.339 Accuracy: 0.147 Validation Accuracy: 0.117:   3%|▎         | 1099/37094 [01:53<58:16, 10.30it/s]

Epoch: 0 Iteration: 1100 Loss: 1.527 Validation Loss: 1.339 Accuracy: 0.147 Validation Accuracy: 0.117:   3%|▎         | 1101/37094 [01:53<1:47:06,  5.60it/s]

Epoch: 0 Iteration: 1101 Loss: 1.520 Validation Loss: 1.339 Accuracy: 0.141 Validation Accuracy: 0.117:   3%|▎         | 1101/37094 [01:53<1:47:06,  5.60it/s]

Epoch: 0 Iteration: 1102 Loss: 1.526 Validation Loss: 1.339 Accuracy: 0.144 Validation Accuracy: 0.117:   3%|▎         | 1101/37094 [01:53<1:47:06,  5.60it/s]

Epoch: 0 Iteration: 1102 Loss: 1.526 Validation Loss: 1.339 Accuracy: 0.144 Validation Accuracy: 0.117:   3%|▎         | 1103/37094 [01:53<1:31:42,  6.54it/s]

Epoch: 0 Iteration: 1103 Loss: 1.488 Validation Loss: 1.339 Accuracy: 0.169 Validation Accuracy: 0.117:   3%|▎         | 1103/37094 [01:53<1:31:42,  6.54it/s]

Epoch: 0 Iteration: 1104 Loss: 1.519 Validation Loss: 1.339 Accuracy: 0.175 Validation Accuracy: 0.117:   3%|▎         | 1103/37094 [01:53<1:31:42,  6.54it/s]

Epoch: 0 Iteration: 1104 Loss: 1.519 Validation Loss: 1.339 Accuracy: 0.175 Validation Accuracy: 0.117:   3%|▎         | 1105/37094 [01:53<1:21:41,  7.34it/s]

Epoch: 0 Iteration: 1105 Loss: 1.466 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1105/37094 [01:53<1:21:41,  7.34it/s]

Epoch: 0 Iteration: 1106 Loss: 1.453 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1105/37094 [01:53<1:21:41,  7.34it/s]

Epoch: 0 Iteration: 1106 Loss: 1.453 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1107/37094 [01:53<1:14:41,  8.03it/s]

Epoch: 0 Iteration: 1107 Loss: 1.518 Validation Loss: 1.339 Accuracy: 0.166 Validation Accuracy: 0.117:   3%|▎         | 1107/37094 [01:53<1:14:41,  8.03it/s]

Epoch: 0 Iteration: 1108 Loss: 1.482 Validation Loss: 1.339 Accuracy: 0.188 Validation Accuracy: 0.117:   3%|▎         | 1107/37094 [01:54<1:14:41,  8.03it/s]

Epoch: 0 Iteration: 1108 Loss: 1.482 Validation Loss: 1.339 Accuracy: 0.188 Validation Accuracy: 0.117:   3%|▎         | 1109/37094 [01:54<1:09:44,  8.60it/s]

Epoch: 0 Iteration: 1109 Loss: 1.431 Validation Loss: 1.339 Accuracy: 0.212 Validation Accuracy: 0.117:   3%|▎         | 1109/37094 [01:54<1:09:44,  8.60it/s]

Epoch: 0 Iteration: 1110 Loss: 1.444 Validation Loss: 1.339 Accuracy: 0.219 Validation Accuracy: 0.117:   3%|▎         | 1109/37094 [01:54<1:09:44,  8.60it/s]

Epoch: 0 Iteration: 1110 Loss: 1.444 Validation Loss: 1.339 Accuracy: 0.219 Validation Accuracy: 0.117:   3%|▎         | 1111/37094 [01:54<1:06:16,  9.05it/s]

Epoch: 0 Iteration: 1111 Loss: 1.443 Validation Loss: 1.339 Accuracy: 0.225 Validation Accuracy: 0.117:   3%|▎         | 1111/37094 [01:54<1:06:16,  9.05it/s]

Epoch: 0 Iteration: 1112 Loss: 1.458 Validation Loss: 1.339 Accuracy: 0.216 Validation Accuracy: 0.117:   3%|▎         | 1111/37094 [01:54<1:06:16,  9.05it/s]

Epoch: 0 Iteration: 1112 Loss: 1.458 Validation Loss: 1.339 Accuracy: 0.216 Validation Accuracy: 0.117:   3%|▎         | 1113/37094 [01:54<1:03:51,  9.39it/s]

Epoch: 0 Iteration: 1113 Loss: 1.431 Validation Loss: 1.339 Accuracy: 0.194 Validation Accuracy: 0.117:   3%|▎         | 1113/37094 [01:54<1:03:51,  9.39it/s]

Epoch: 0 Iteration: 1114 Loss: 1.428 Validation Loss: 1.339 Accuracy: 0.206 Validation Accuracy: 0.117:   3%|▎         | 1113/37094 [01:54<1:03:51,  9.39it/s]

Epoch: 0 Iteration: 1114 Loss: 1.428 Validation Loss: 1.339 Accuracy: 0.206 Validation Accuracy: 0.117:   3%|▎         | 1115/37094 [01:54<1:02:11,  9.64it/s]

Epoch: 0 Iteration: 1115 Loss: 1.439 Validation Loss: 1.339 Accuracy: 0.200 Validation Accuracy: 0.117:   3%|▎         | 1115/37094 [01:54<1:02:11,  9.64it/s]

Epoch: 0 Iteration: 1116 Loss: 1.464 Validation Loss: 1.339 Accuracy: 0.181 Validation Accuracy: 0.117:   3%|▎         | 1115/37094 [01:54<1:02:11,  9.64it/s]

Epoch: 0 Iteration: 1116 Loss: 1.464 Validation Loss: 1.339 Accuracy: 0.181 Validation Accuracy: 0.117:   3%|▎         | 1117/37094 [01:54<1:01:01,  9.83it/s]

Epoch: 0 Iteration: 1117 Loss: 1.473 Validation Loss: 1.339 Accuracy: 0.184 Validation Accuracy: 0.117:   3%|▎         | 1117/37094 [01:54<1:01:01,  9.83it/s]

Epoch: 0 Iteration: 1118 Loss: 1.445 Validation Loss: 1.339 Accuracy: 0.163 Validation Accuracy: 0.117:   3%|▎         | 1117/37094 [01:55<1:01:01,  9.83it/s]

Epoch: 0 Iteration: 1118 Loss: 1.445 Validation Loss: 1.339 Accuracy: 0.163 Validation Accuracy: 0.117:   3%|▎         | 1119/37094 [01:55<1:00:10,  9.96it/s]

Epoch: 0 Iteration: 1119 Loss: 1.457 Validation Loss: 1.339 Accuracy: 0.138 Validation Accuracy: 0.117:   3%|▎         | 1119/37094 [01:55<1:00:10,  9.96it/s]

Epoch: 0 Iteration: 1120 Loss: 1.508 Validation Loss: 1.339 Accuracy: 0.125 Validation Accuracy: 0.117:   3%|▎         | 1119/37094 [01:55<1:00:10,  9.96it/s]

Epoch: 0 Iteration: 1120 Loss: 1.508 Validation Loss: 1.339 Accuracy: 0.125 Validation Accuracy: 0.117:   3%|▎         | 1121/37094 [01:55<59:33, 10.07it/s]  

Epoch: 0 Iteration: 1121 Loss: 1.512 Validation Loss: 1.339 Accuracy: 0.113 Validation Accuracy: 0.117:   3%|▎         | 1121/37094 [01:55<59:33, 10.07it/s]

Epoch: 0 Iteration: 1122 Loss: 1.510 Validation Loss: 1.339 Accuracy: 0.103 Validation Accuracy: 0.117:   3%|▎         | 1121/37094 [01:55<59:33, 10.07it/s]

Epoch: 0 Iteration: 1122 Loss: 1.510 Validation Loss: 1.339 Accuracy: 0.103 Validation Accuracy: 0.117:   3%|▎         | 1123/37094 [01:55<59:08, 10.14it/s]

Epoch: 0 Iteration: 1123 Loss: 1.521 Validation Loss: 1.339 Accuracy: 0.119 Validation Accuracy: 0.117:   3%|▎         | 1123/37094 [01:55<59:08, 10.14it/s]

Epoch: 0 Iteration: 1124 Loss: 1.526 Validation Loss: 1.339 Accuracy: 0.119 Validation Accuracy: 0.117:   3%|▎         | 1123/37094 [01:55<59:08, 10.14it/s]

Epoch: 0 Iteration: 1124 Loss: 1.526 Validation Loss: 1.339 Accuracy: 0.119 Validation Accuracy: 0.117:   3%|▎         | 1125/37094 [01:55<58:52, 10.18it/s]

Epoch: 0 Iteration: 1125 Loss: 1.515 Validation Loss: 1.339 Accuracy: 0.125 Validation Accuracy: 0.117:   3%|▎         | 1125/37094 [01:55<58:52, 10.18it/s]

Epoch: 0 Iteration: 1126 Loss: 1.539 Validation Loss: 1.339 Accuracy: 0.141 Validation Accuracy: 0.117:   3%|▎         | 1125/37094 [01:55<58:52, 10.18it/s]

Epoch: 0 Iteration: 1126 Loss: 1.539 Validation Loss: 1.339 Accuracy: 0.141 Validation Accuracy: 0.117:   3%|▎         | 1127/37094 [01:55<58:38, 10.22it/s]

Epoch: 0 Iteration: 1127 Loss: 1.474 Validation Loss: 1.339 Accuracy: 0.147 Validation Accuracy: 0.117:   3%|▎         | 1127/37094 [01:55<58:38, 10.22it/s]

Epoch: 0 Iteration: 1128 Loss: 1.463 Validation Loss: 1.339 Accuracy: 0.156 Validation Accuracy: 0.117:   3%|▎         | 1127/37094 [01:55<58:38, 10.22it/s]

Epoch: 0 Iteration: 1128 Loss: 1.463 Validation Loss: 1.339 Accuracy: 0.156 Validation Accuracy: 0.117:   3%|▎         | 1129/37094 [01:55<58:28, 10.25it/s]

Epoch: 0 Iteration: 1129 Loss: 1.477 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1129/37094 [01:56<58:28, 10.25it/s]

Epoch: 0 Iteration: 1130 Loss: 1.494 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1129/37094 [01:56<58:28, 10.25it/s]

Epoch: 0 Iteration: 1130 Loss: 1.494 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1131/37094 [01:56<58:26, 10.26it/s]

Epoch: 0 Iteration: 1131 Loss: 1.465 Validation Loss: 1.339 Accuracy: 0.194 Validation Accuracy: 0.117:   3%|▎         | 1131/37094 [01:56<58:26, 10.26it/s]

Epoch: 0 Iteration: 1132 Loss: 1.471 Validation Loss: 1.339 Accuracy: 0.200 Validation Accuracy: 0.117:   3%|▎         | 1131/37094 [01:56<58:26, 10.26it/s]

Epoch: 0 Iteration: 1132 Loss: 1.471 Validation Loss: 1.339 Accuracy: 0.200 Validation Accuracy: 0.117:   3%|▎         | 1133/37094 [01:56<58:20, 10.27it/s]

Epoch: 0 Iteration: 1133 Loss: 1.473 Validation Loss: 1.339 Accuracy: 0.188 Validation Accuracy: 0.117:   3%|▎         | 1133/37094 [01:56<58:20, 10.27it/s]

Epoch: 0 Iteration: 1134 Loss: 1.490 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1133/37094 [01:56<58:20, 10.27it/s]

Epoch: 0 Iteration: 1134 Loss: 1.490 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1135/37094 [01:56<58:18, 10.28it/s]

Epoch: 0 Iteration: 1135 Loss: 1.479 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1135/37094 [01:56<58:18, 10.28it/s]

Epoch: 0 Iteration: 1136 Loss: 1.468 Validation Loss: 1.339 Accuracy: 0.159 Validation Accuracy: 0.117:   3%|▎         | 1135/37094 [01:56<58:18, 10.28it/s]

Epoch: 0 Iteration: 1136 Loss: 1.468 Validation Loss: 1.339 Accuracy: 0.159 Validation Accuracy: 0.117:   3%|▎         | 1137/37094 [01:56<58:15, 10.29it/s]

Epoch: 0 Iteration: 1137 Loss: 1.427 Validation Loss: 1.339 Accuracy: 0.166 Validation Accuracy: 0.117:   3%|▎         | 1137/37094 [01:56<58:15, 10.29it/s]

Epoch: 0 Iteration: 1138 Loss: 1.415 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1137/37094 [01:56<58:15, 10.29it/s]

Epoch: 0 Iteration: 1138 Loss: 1.415 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1139/37094 [01:56<58:13, 10.29it/s]

Epoch: 0 Iteration: 1139 Loss: 1.406 Validation Loss: 1.339 Accuracy: 0.175 Validation Accuracy: 0.117:   3%|▎         | 1139/37094 [01:57<58:13, 10.29it/s]

Epoch: 0 Iteration: 1140 Loss: 1.388 Validation Loss: 1.339 Accuracy: 0.175 Validation Accuracy: 0.117:   3%|▎         | 1139/37094 [01:57<58:13, 10.29it/s]

Epoch: 0 Iteration: 1140 Loss: 1.388 Validation Loss: 1.339 Accuracy: 0.175 Validation Accuracy: 0.117:   3%|▎         | 1141/37094 [01:57<58:11, 10.30it/s]

Epoch: 0 Iteration: 1141 Loss: 1.390 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1141/37094 [01:57<58:11, 10.30it/s]

Epoch: 0 Iteration: 1142 Loss: 1.372 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1141/37094 [01:57<58:11, 10.30it/s]

Epoch: 0 Iteration: 1142 Loss: 1.372 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1143/37094 [01:57<58:09, 10.30it/s]

Epoch: 0 Iteration: 1143 Loss: 1.431 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1143/37094 [01:57<58:09, 10.30it/s]

Epoch: 0 Iteration: 1144 Loss: 1.393 Validation Loss: 1.339 Accuracy: 0.181 Validation Accuracy: 0.117:   3%|▎         | 1143/37094 [01:57<58:09, 10.30it/s]

Epoch: 0 Iteration: 1144 Loss: 1.393 Validation Loss: 1.339 Accuracy: 0.181 Validation Accuracy: 0.117:   3%|▎         | 1145/37094 [01:57<58:09, 10.30it/s]

Epoch: 0 Iteration: 1145 Loss: 1.423 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1145/37094 [01:57<58:09, 10.30it/s]

Epoch: 0 Iteration: 1146 Loss: 1.451 Validation Loss: 1.339 Accuracy: 0.184 Validation Accuracy: 0.117:   3%|▎         | 1145/37094 [01:57<58:09, 10.30it/s]

Epoch: 0 Iteration: 1146 Loss: 1.451 Validation Loss: 1.339 Accuracy: 0.184 Validation Accuracy: 0.117:   3%|▎         | 1147/37094 [01:57<58:08, 10.30it/s]

Epoch: 0 Iteration: 1147 Loss: 1.485 Validation Loss: 1.339 Accuracy: 0.175 Validation Accuracy: 0.117:   3%|▎         | 1147/37094 [01:57<58:08, 10.30it/s]

Epoch: 0 Iteration: 1148 Loss: 1.517 Validation Loss: 1.339 Accuracy: 0.159 Validation Accuracy: 0.117:   3%|▎         | 1147/37094 [01:57<58:08, 10.30it/s]

Epoch: 0 Iteration: 1148 Loss: 1.517 Validation Loss: 1.339 Accuracy: 0.159 Validation Accuracy: 0.117:   3%|▎         | 1149/37094 [01:57<58:08, 10.30it/s]

Epoch: 0 Iteration: 1149 Loss: 1.469 Validation Loss: 1.339 Accuracy: 0.156 Validation Accuracy: 0.117:   3%|▎         | 1149/37094 [01:58<58:08, 10.30it/s]

Epoch: 0 Iteration: 1150 Loss: 1.464 Validation Loss: 1.339 Accuracy: 0.156 Validation Accuracy: 0.117:   3%|▎         | 1149/37094 [01:58<58:08, 10.30it/s]

Epoch: 0 Iteration: 1150 Loss: 1.464 Validation Loss: 1.339 Accuracy: 0.156 Validation Accuracy: 0.117:   3%|▎         | 1151/37094 [01:58<58:08, 10.30it/s]

Epoch: 0 Iteration: 1151 Loss: 1.464 Validation Loss: 1.339 Accuracy: 0.163 Validation Accuracy: 0.117:   3%|▎         | 1151/37094 [01:58<58:08, 10.30it/s]

Epoch: 0 Iteration: 1152 Loss: 1.478 Validation Loss: 1.339 Accuracy: 0.159 Validation Accuracy: 0.117:   3%|▎         | 1151/37094 [01:58<58:08, 10.30it/s]

Epoch: 0 Iteration: 1152 Loss: 1.478 Validation Loss: 1.339 Accuracy: 0.159 Validation Accuracy: 0.117:   3%|▎         | 1153/37094 [01:58<58:07, 10.31it/s]

Epoch: 0 Iteration: 1153 Loss: 1.484 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1153/37094 [01:58<58:07, 10.31it/s]

Epoch: 0 Iteration: 1154 Loss: 1.533 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1153/37094 [01:58<58:07, 10.31it/s]

Epoch: 0 Iteration: 1154 Loss: 1.533 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1155/37094 [01:58<58:07, 10.31it/s]

Epoch: 0 Iteration: 1155 Loss: 1.516 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1155/37094 [01:58<58:07, 10.31it/s]

Epoch: 0 Iteration: 1156 Loss: 1.528 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1155/37094 [01:58<58:07, 10.31it/s]

Epoch: 0 Iteration: 1156 Loss: 1.528 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1157/37094 [01:58<58:07, 10.31it/s]

Epoch: 0 Iteration: 1157 Loss: 1.538 Validation Loss: 1.339 Accuracy: 0.191 Validation Accuracy: 0.117:   3%|▎         | 1157/37094 [01:58<58:07, 10.31it/s]

Epoch: 0 Iteration: 1158 Loss: 1.553 Validation Loss: 1.339 Accuracy: 0.197 Validation Accuracy: 0.117:   3%|▎         | 1157/37094 [01:58<58:07, 10.31it/s]

Epoch: 0 Iteration: 1158 Loss: 1.553 Validation Loss: 1.339 Accuracy: 0.197 Validation Accuracy: 0.117:   3%|▎         | 1159/37094 [01:58<58:06, 10.31it/s]

Epoch: 0 Iteration: 1159 Loss: 1.557 Validation Loss: 1.339 Accuracy: 0.178 Validation Accuracy: 0.117:   3%|▎         | 1159/37094 [01:59<58:06, 10.31it/s]

Epoch: 0 Iteration: 1160 Loss: 1.520 Validation Loss: 1.339 Accuracy: 0.188 Validation Accuracy: 0.117:   3%|▎         | 1159/37094 [01:59<58:06, 10.31it/s]

Epoch: 0 Iteration: 1160 Loss: 1.520 Validation Loss: 1.339 Accuracy: 0.188 Validation Accuracy: 0.117:   3%|▎         | 1161/37094 [01:59<58:05, 10.31it/s]

Epoch: 0 Iteration: 1161 Loss: 1.525 Validation Loss: 1.339 Accuracy: 0.169 Validation Accuracy: 0.117:   3%|▎         | 1161/37094 [01:59<58:05, 10.31it/s]

Epoch: 0 Iteration: 1162 Loss: 1.531 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1161/37094 [01:59<58:05, 10.31it/s]

Epoch: 0 Iteration: 1162 Loss: 1.531 Validation Loss: 1.339 Accuracy: 0.172 Validation Accuracy: 0.117:   3%|▎         | 1163/37094 [01:59<58:05, 10.31it/s]

Epoch: 0 Iteration: 1163 Loss: 1.490 Validation Loss: 1.339 Accuracy: 0.150 Validation Accuracy: 0.117:   3%|▎         | 1163/37094 [01:59<58:05, 10.31it/s]

Epoch: 0 Iteration: 1164 Loss: 1.505 Validation Loss: 1.339 Accuracy: 0.138 Validation Accuracy: 0.117:   3%|▎         | 1163/37094 [01:59<58:05, 10.31it/s]

Epoch: 0 Iteration: 1164 Loss: 1.505 Validation Loss: 1.339 Accuracy: 0.138 Validation Accuracy: 0.117:   3%|▎         | 1165/37094 [01:59<58:05, 10.31it/s]

Epoch: 0 Iteration: 1165 Loss: 1.523 Validation Loss: 1.339 Accuracy: 0.122 Validation Accuracy: 0.117:   3%|▎         | 1165/37094 [01:59<58:05, 10.31it/s]

Epoch: 0 Iteration: 1166 Loss: 1.534 Validation Loss: 1.339 Accuracy: 0.119 Validation Accuracy: 0.117:   3%|▎         | 1165/37094 [01:59<58:05, 10.31it/s]

Epoch: 0 Iteration: 1166 Loss: 1.534 Validation Loss: 1.339 Accuracy: 0.119 Validation Accuracy: 0.117:   3%|▎         | 1167/37094 [01:59<58:10, 10.29it/s]

Epoch: 0 Iteration: 1167 Loss: 1.478 Validation Loss: 1.339 Accuracy: 0.106 Validation Accuracy: 0.117:   3%|▎         | 1167/37094 [01:59<58:10, 10.29it/s]

Epoch: 0 Iteration: 1168 Loss: 1.458 Validation Loss: 1.339 Accuracy: 0.113 Validation Accuracy: 0.117:   3%|▎         | 1167/37094 [01:59<58:10, 10.29it/s]

Epoch: 0 Iteration: 1168 Loss: 1.458 Validation Loss: 1.339 Accuracy: 0.113 Validation Accuracy: 0.117:   3%|▎         | 1169/37094 [01:59<58:12, 10.29it/s]

Epoch: 0 Iteration: 1169 Loss: 1.483 Validation Loss: 1.339 Accuracy: 0.128 Validation Accuracy: 0.117:   3%|▎         | 1169/37094 [01:59<58:12, 10.29it/s]

Epoch: 0 Iteration: 1170 Loss: 1.444 Validation Loss: 1.339 Accuracy: 0.134 Validation Accuracy: 0.117:   3%|▎         | 1169/37094 [02:00<58:12, 10.29it/s]

Epoch: 0 Iteration: 1170 Loss: 1.444 Validation Loss: 1.339 Accuracy: 0.134 Validation Accuracy: 0.117:   3%|▎         | 1171/37094 [02:00<58:09, 10.30it/s]

Epoch: 0 Iteration: 1171 Loss: 1.441 Validation Loss: 1.339 Accuracy: 0.131 Validation Accuracy: 0.117:   3%|▎         | 1171/37094 [02:00<58:09, 10.30it/s]

Epoch: 0 Iteration: 1172 Loss: 1.445 Validation Loss: 1.339 Accuracy: 0.138 Validation Accuracy: 0.117:   3%|▎         | 1171/37094 [02:00<58:09, 10.30it/s]

Epoch: 0 Iteration: 1172 Loss: 1.445 Validation Loss: 1.339 Accuracy: 0.138 Validation Accuracy: 0.117:   3%|▎         | 1173/37094 [02:00<58:07, 10.30it/s]

Epoch: 0 Iteration: 1173 Loss: 1.491 Validation Loss: 1.339 Accuracy: 0.153 Validation Accuracy: 0.117:   3%|▎         | 1173/37094 [02:00<58:07, 10.30it/s]

Epoch: 0 Iteration: 1174 Loss: 1.473 Validation Loss: 1.339 Accuracy: 0.175 Validation Accuracy: 0.117:   3%|▎         | 1173/37094 [02:00<58:07, 10.30it/s]

Epoch: 0 Iteration: 1174 Loss: 1.473 Validation Loss: 1.339 Accuracy: 0.175 Validation Accuracy: 0.117:   3%|▎         | 1175/37094 [02:00<58:07, 10.30it/s]

Epoch: 0 Iteration: 1175 Loss: 1.503 Validation Loss: 1.339 Accuracy: 0.194 Validation Accuracy: 0.117:   3%|▎         | 1175/37094 [02:00<58:07, 10.30it/s]

Epoch: 0 Iteration: 1176 Loss: 1.459 Validation Loss: 1.339 Accuracy: 0.222 Validation Accuracy: 0.117:   3%|▎         | 1175/37094 [02:00<58:07, 10.30it/s]

Epoch: 0 Iteration: 1176 Loss: 1.459 Validation Loss: 1.339 Accuracy: 0.222 Validation Accuracy: 0.117:   3%|▎         | 1177/37094 [02:00<58:07, 10.30it/s]

Epoch: 0 Iteration: 1177 Loss: 1.473 Validation Loss: 1.339 Accuracy: 0.228 Validation Accuracy: 0.117:   3%|▎         | 1177/37094 [02:00<58:07, 10.30it/s]

Epoch: 0 Iteration: 1178 Loss: 1.469 Validation Loss: 1.339 Accuracy: 0.228 Validation Accuracy: 0.117:   3%|▎         | 1177/37094 [02:00<58:07, 10.30it/s]

Epoch: 0 Iteration: 1178 Loss: 1.469 Validation Loss: 1.339 Accuracy: 0.228 Validation Accuracy: 0.117:   3%|▎         | 1179/37094 [02:00<58:08, 10.30it/s]

Epoch: 0 Iteration: 1179 Loss: 1.513 Validation Loss: 1.339 Accuracy: 0.222 Validation Accuracy: 0.117:   3%|▎         | 1179/37094 [02:00<58:08, 10.30it/s]

Epoch: 0 Iteration: 1180 Loss: 1.504 Validation Loss: 1.339 Accuracy: 0.241 Validation Accuracy: 0.117:   3%|▎         | 1179/37094 [02:01<58:08, 10.30it/s]

Epoch: 0 Iteration: 1180 Loss: 1.504 Validation Loss: 1.339 Accuracy: 0.241 Validation Accuracy: 0.117:   3%|▎         | 1181/37094 [02:01<58:07, 10.30it/s]

Epoch: 0 Iteration: 1181 Loss: 1.519 Validation Loss: 1.339 Accuracy: 0.269 Validation Accuracy: 0.117:   3%|▎         | 1181/37094 [02:01<58:07, 10.30it/s]

Epoch: 0 Iteration: 1182 Loss: 1.543 Validation Loss: 1.339 Accuracy: 0.275 Validation Accuracy: 0.117:   3%|▎         | 1181/37094 [02:01<58:07, 10.30it/s]

Epoch: 0 Iteration: 1182 Loss: 1.543 Validation Loss: 1.339 Accuracy: 0.275 Validation Accuracy: 0.117:   3%|▎         | 1183/37094 [02:01<58:07, 10.30it/s]

Epoch: 0 Iteration: 1183 Loss: 1.559 Validation Loss: 1.339 Accuracy: 0.272 Validation Accuracy: 0.117:   3%|▎         | 1183/37094 [02:01<58:07, 10.30it/s]

Epoch: 0 Iteration: 1184 Loss: 1.552 Validation Loss: 1.339 Accuracy: 0.269 Validation Accuracy: 0.117:   3%|▎         | 1183/37094 [02:01<58:07, 10.30it/s]

Epoch: 0 Iteration: 1184 Loss: 1.552 Validation Loss: 1.339 Accuracy: 0.269 Validation Accuracy: 0.117:   3%|▎         | 1185/37094 [02:01<58:06, 10.30it/s]

Epoch: 0 Iteration: 1185 Loss: 1.525 Validation Loss: 1.339 Accuracy: 0.269 Validation Accuracy: 0.117:   3%|▎         | 1185/37094 [02:01<58:06, 10.30it/s]

Epoch: 0 Iteration: 1186 Loss: 1.459 Validation Loss: 1.339 Accuracy: 0.256 Validation Accuracy: 0.117:   3%|▎         | 1185/37094 [02:01<58:06, 10.30it/s]

Epoch: 0 Iteration: 1186 Loss: 1.459 Validation Loss: 1.339 Accuracy: 0.256 Validation Accuracy: 0.117:   3%|▎         | 1187/37094 [02:01<58:04, 10.30it/s]

Epoch: 0 Iteration: 1187 Loss: 1.472 Validation Loss: 1.339 Accuracy: 0.266 Validation Accuracy: 0.117:   3%|▎         | 1187/37094 [02:01<58:04, 10.30it/s]

Epoch: 0 Iteration: 1188 Loss: 1.452 Validation Loss: 1.339 Accuracy: 0.266 Validation Accuracy: 0.117:   3%|▎         | 1187/37094 [02:01<58:04, 10.30it/s]

Epoch: 0 Iteration: 1188 Loss: 1.452 Validation Loss: 1.339 Accuracy: 0.266 Validation Accuracy: 0.117:   3%|▎         | 1189/37094 [02:01<58:02, 10.31it/s]

Epoch: 0 Iteration: 1189 Loss: 1.458 Validation Loss: 1.339 Accuracy: 0.278 Validation Accuracy: 0.117:   3%|▎         | 1189/37094 [02:01<58:02, 10.31it/s]

Epoch: 0 Iteration: 1190 Loss: 1.483 Validation Loss: 1.339 Accuracy: 0.250 Validation Accuracy: 0.117:   3%|▎         | 1189/37094 [02:02<58:02, 10.31it/s]

Epoch: 0 Iteration: 1190 Loss: 1.483 Validation Loss: 1.339 Accuracy: 0.250 Validation Accuracy: 0.117:   3%|▎         | 1191/37094 [02:02<58:03, 10.31it/s]

Epoch: 0 Iteration: 1191 Loss: 1.508 Validation Loss: 1.339 Accuracy: 0.234 Validation Accuracy: 0.117:   3%|▎         | 1191/37094 [02:02<58:03, 10.31it/s]

Epoch: 0 Iteration: 1192 Loss: 1.470 Validation Loss: 1.339 Accuracy: 0.234 Validation Accuracy: 0.117:   3%|▎         | 1191/37094 [02:02<58:03, 10.31it/s]

Epoch: 0 Iteration: 1192 Loss: 1.470 Validation Loss: 1.339 Accuracy: 0.234 Validation Accuracy: 0.117:   3%|▎         | 1193/37094 [02:02<58:04, 10.30it/s]

Epoch: 0 Iteration: 1193 Loss: 1.432 Validation Loss: 1.339 Accuracy: 0.244 Validation Accuracy: 0.117:   3%|▎         | 1193/37094 [02:02<58:04, 10.30it/s]

Epoch: 0 Iteration: 1194 Loss: 1.514 Validation Loss: 1.339 Accuracy: 0.234 Validation Accuracy: 0.117:   3%|▎         | 1193/37094 [02:02<58:04, 10.30it/s]

Epoch: 0 Iteration: 1194 Loss: 1.514 Validation Loss: 1.339 Accuracy: 0.234 Validation Accuracy: 0.117:   3%|▎         | 1195/37094 [02:02<58:04, 10.30it/s]

Epoch: 0 Iteration: 1195 Loss: 1.456 Validation Loss: 1.339 Accuracy: 0.231 Validation Accuracy: 0.117:   3%|▎         | 1195/37094 [02:02<58:04, 10.30it/s]

Epoch: 0 Iteration: 1196 Loss: 1.502 Validation Loss: 1.339 Accuracy: 0.222 Validation Accuracy: 0.117:   3%|▎         | 1195/37094 [02:02<58:04, 10.30it/s]

Epoch: 0 Iteration: 1196 Loss: 1.502 Validation Loss: 1.339 Accuracy: 0.222 Validation Accuracy: 0.117:   3%|▎         | 1197/37094 [02:02<58:04, 10.30it/s]

Epoch: 0 Iteration: 1197 Loss: 1.469 Validation Loss: 1.339 Accuracy: 0.222 Validation Accuracy: 0.117:   3%|▎         | 1197/37094 [02:02<58:04, 10.30it/s]

Epoch: 0 Iteration: 1198 Loss: 1.438 Validation Loss: 1.339 Accuracy: 0.225 Validation Accuracy: 0.117:   3%|▎         | 1197/37094 [02:02<58:04, 10.30it/s]

Epoch: 0 Iteration: 1198 Loss: 1.438 Validation Loss: 1.339 Accuracy: 0.225 Validation Accuracy: 0.117:   3%|▎         | 1199/37094 [02:02<58:05, 10.30it/s]

Epoch: 0 Iteration: 1199 Loss: 1.381 Validation Loss: 1.339 Accuracy: 0.203 Validation Accuracy: 0.117:   3%|▎         | 1199/37094 [02:02<58:05, 10.30it/s]

Epoch: 0 Iteration: 1200 Loss: 1.421 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1199/37094 [02:03<58:05, 10.30it/s]

Epoch: 0 Iteration: 1200 Loss: 1.421 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1201/37094 [02:03<1:46:47,  5.60it/s]

Epoch: 0 Iteration: 1201 Loss: 1.401 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1201/37094 [02:03<1:46:47,  5.60it/s]

Epoch: 0 Iteration: 1202 Loss: 1.403 Validation Loss: 1.264 Accuracy: 0.197 Validation Accuracy: 0.123:   3%|▎         | 1201/37094 [02:03<1:46:47,  5.60it/s]

Epoch: 0 Iteration: 1202 Loss: 1.403 Validation Loss: 1.264 Accuracy: 0.197 Validation Accuracy: 0.123:   3%|▎         | 1203/37094 [02:03<1:31:24,  6.54it/s]

Epoch: 0 Iteration: 1203 Loss: 1.361 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1203/37094 [02:03<1:31:24,  6.54it/s]

Epoch: 0 Iteration: 1204 Loss: 1.359 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1203/37094 [02:03<1:31:24,  6.54it/s]

Epoch: 0 Iteration: 1204 Loss: 1.359 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1205/37094 [02:03<1:21:29,  7.34it/s]

Epoch: 0 Iteration: 1205 Loss: 1.322 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1205/37094 [02:04<1:21:29,  7.34it/s]

Epoch: 0 Iteration: 1206 Loss: 1.364 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1205/37094 [02:04<1:21:29,  7.34it/s]

Epoch: 0 Iteration: 1206 Loss: 1.364 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1207/37094 [02:04<1:14:28,  8.03it/s]

Epoch: 0 Iteration: 1207 Loss: 1.368 Validation Loss: 1.264 Accuracy: 0.181 Validation Accuracy: 0.123:   3%|▎         | 1207/37094 [02:04<1:14:28,  8.03it/s]

Epoch: 0 Iteration: 1208 Loss: 1.386 Validation Loss: 1.264 Accuracy: 0.172 Validation Accuracy: 0.123:   3%|▎         | 1207/37094 [02:04<1:14:28,  8.03it/s]

Epoch: 0 Iteration: 1208 Loss: 1.386 Validation Loss: 1.264 Accuracy: 0.172 Validation Accuracy: 0.123:   3%|▎         | 1209/37094 [02:04<1:09:33,  8.60it/s]

Epoch: 0 Iteration: 1209 Loss: 1.348 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1209/37094 [02:04<1:09:33,  8.60it/s]

Epoch: 0 Iteration: 1210 Loss: 1.344 Validation Loss: 1.264 Accuracy: 0.184 Validation Accuracy: 0.123:   3%|▎         | 1209/37094 [02:04<1:09:33,  8.60it/s]

Epoch: 0 Iteration: 1210 Loss: 1.344 Validation Loss: 1.264 Accuracy: 0.184 Validation Accuracy: 0.123:   3%|▎         | 1211/37094 [02:04<1:06:06,  9.05it/s]

Epoch: 0 Iteration: 1211 Loss: 1.308 Validation Loss: 1.264 Accuracy: 0.188 Validation Accuracy: 0.123:   3%|▎         | 1211/37094 [02:04<1:06:06,  9.05it/s]

Epoch: 0 Iteration: 1212 Loss: 1.335 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1211/37094 [02:04<1:06:06,  9.05it/s]

Epoch: 0 Iteration: 1212 Loss: 1.335 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1213/37094 [02:04<1:03:39,  9.39it/s]

Epoch: 0 Iteration: 1213 Loss: 1.343 Validation Loss: 1.264 Accuracy: 0.197 Validation Accuracy: 0.123:   3%|▎         | 1213/37094 [02:04<1:03:39,  9.39it/s]

Epoch: 0 Iteration: 1214 Loss: 1.238 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1213/37094 [02:04<1:03:39,  9.39it/s]

Epoch: 0 Iteration: 1214 Loss: 1.238 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1215/37094 [02:04<1:02:04,  9.63it/s]

Epoch: 0 Iteration: 1215 Loss: 1.284 Validation Loss: 1.264 Accuracy: 0.184 Validation Accuracy: 0.123:   3%|▎         | 1215/37094 [02:04<1:02:04,  9.63it/s]

Epoch: 0 Iteration: 1216 Loss: 1.253 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1215/37094 [02:05<1:02:04,  9.63it/s]

Epoch: 0 Iteration: 1216 Loss: 1.253 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1217/37094 [02:05<1:00:53,  9.82it/s]

Epoch: 0 Iteration: 1217 Loss: 1.259 Validation Loss: 1.264 Accuracy: 0.228 Validation Accuracy: 0.123:   3%|▎         | 1217/37094 [02:05<1:00:53,  9.82it/s]

Epoch: 0 Iteration: 1218 Loss: 1.310 Validation Loss: 1.264 Accuracy: 0.222 Validation Accuracy: 0.123:   3%|▎         | 1217/37094 [02:05<1:00:53,  9.82it/s]

Epoch: 0 Iteration: 1218 Loss: 1.310 Validation Loss: 1.264 Accuracy: 0.222 Validation Accuracy: 0.123:   3%|▎         | 1219/37094 [02:05<1:00:01,  9.96it/s]

Epoch: 0 Iteration: 1219 Loss: 1.338 Validation Loss: 1.264 Accuracy: 0.206 Validation Accuracy: 0.123:   3%|▎         | 1219/37094 [02:05<1:00:01,  9.96it/s]

Epoch: 0 Iteration: 1220 Loss: 1.365 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1219/37094 [02:05<1:00:01,  9.96it/s]

Epoch: 0 Iteration: 1220 Loss: 1.365 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1221/37094 [02:05<59:26, 10.06it/s]  

Epoch: 0 Iteration: 1221 Loss: 1.353 Validation Loss: 1.264 Accuracy: 0.197 Validation Accuracy: 0.123:   3%|▎         | 1221/37094 [02:05<59:26, 10.06it/s]

Epoch: 0 Iteration: 1222 Loss: 1.333 Validation Loss: 1.264 Accuracy: 0.194 Validation Accuracy: 0.123:   3%|▎         | 1221/37094 [02:05<59:26, 10.06it/s]

Epoch: 0 Iteration: 1222 Loss: 1.333 Validation Loss: 1.264 Accuracy: 0.194 Validation Accuracy: 0.123:   3%|▎         | 1223/37094 [02:05<59:00, 10.13it/s]

Epoch: 0 Iteration: 1223 Loss: 1.354 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1223/37094 [02:05<59:00, 10.13it/s]

Epoch: 0 Iteration: 1224 Loss: 1.368 Validation Loss: 1.264 Accuracy: 0.178 Validation Accuracy: 0.123:   3%|▎         | 1223/37094 [02:05<59:00, 10.13it/s]

Epoch: 0 Iteration: 1224 Loss: 1.368 Validation Loss: 1.264 Accuracy: 0.178 Validation Accuracy: 0.123:   3%|▎         | 1225/37094 [02:05<58:43, 10.18it/s]

Epoch: 0 Iteration: 1225 Loss: 1.387 Validation Loss: 1.264 Accuracy: 0.181 Validation Accuracy: 0.123:   3%|▎         | 1225/37094 [02:05<58:43, 10.18it/s]

Epoch: 0 Iteration: 1226 Loss: 1.389 Validation Loss: 1.264 Accuracy: 0.169 Validation Accuracy: 0.123:   3%|▎         | 1225/37094 [02:06<58:43, 10.18it/s]

Epoch: 0 Iteration: 1226 Loss: 1.389 Validation Loss: 1.264 Accuracy: 0.169 Validation Accuracy: 0.123:   3%|▎         | 1227/37094 [02:06<58:30, 10.22it/s]

Epoch: 0 Iteration: 1227 Loss: 1.372 Validation Loss: 1.264 Accuracy: 0.159 Validation Accuracy: 0.123:   3%|▎         | 1227/37094 [02:06<58:30, 10.22it/s]

Epoch: 0 Iteration: 1228 Loss: 1.374 Validation Loss: 1.264 Accuracy: 0.166 Validation Accuracy: 0.123:   3%|▎         | 1227/37094 [02:06<58:30, 10.22it/s]

Epoch: 0 Iteration: 1228 Loss: 1.374 Validation Loss: 1.264 Accuracy: 0.166 Validation Accuracy: 0.123:   3%|▎         | 1229/37094 [02:06<58:23, 10.24it/s]

Epoch: 0 Iteration: 1229 Loss: 1.444 Validation Loss: 1.264 Accuracy: 0.169 Validation Accuracy: 0.123:   3%|▎         | 1229/37094 [02:06<58:23, 10.24it/s]

Epoch: 0 Iteration: 1230 Loss: 1.421 Validation Loss: 1.264 Accuracy: 0.166 Validation Accuracy: 0.123:   3%|▎         | 1229/37094 [02:06<58:23, 10.24it/s]

Epoch: 0 Iteration: 1230 Loss: 1.421 Validation Loss: 1.264 Accuracy: 0.166 Validation Accuracy: 0.123:   3%|▎         | 1231/37094 [02:06<58:16, 10.26it/s]

Epoch: 0 Iteration: 1231 Loss: 1.491 Validation Loss: 1.264 Accuracy: 0.178 Validation Accuracy: 0.123:   3%|▎         | 1231/37094 [02:06<58:16, 10.26it/s]

Epoch: 0 Iteration: 1232 Loss: 1.476 Validation Loss: 1.264 Accuracy: 0.206 Validation Accuracy: 0.123:   3%|▎         | 1231/37094 [02:06<58:16, 10.26it/s]

Epoch: 0 Iteration: 1232 Loss: 1.476 Validation Loss: 1.264 Accuracy: 0.206 Validation Accuracy: 0.123:   3%|▎         | 1233/37094 [02:06<58:11, 10.27it/s]

Epoch: 0 Iteration: 1233 Loss: 1.469 Validation Loss: 1.264 Accuracy: 0.197 Validation Accuracy: 0.123:   3%|▎         | 1233/37094 [02:06<58:11, 10.27it/s]

Epoch: 0 Iteration: 1234 Loss: 1.510 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1233/37094 [02:06<58:11, 10.27it/s]

Epoch: 0 Iteration: 1234 Loss: 1.510 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1235/37094 [02:06<58:08, 10.28it/s]

Epoch: 0 Iteration: 1235 Loss: 1.471 Validation Loss: 1.264 Accuracy: 0.219 Validation Accuracy: 0.123:   3%|▎         | 1235/37094 [02:06<58:08, 10.28it/s]

Epoch: 0 Iteration: 1236 Loss: 1.474 Validation Loss: 1.264 Accuracy: 0.222 Validation Accuracy: 0.123:   3%|▎         | 1235/37094 [02:07<58:08, 10.28it/s]

Epoch: 0 Iteration: 1236 Loss: 1.474 Validation Loss: 1.264 Accuracy: 0.222 Validation Accuracy: 0.123:   3%|▎         | 1237/37094 [02:07<58:06, 10.28it/s]

Epoch: 0 Iteration: 1237 Loss: 1.476 Validation Loss: 1.264 Accuracy: 0.234 Validation Accuracy: 0.123:   3%|▎         | 1237/37094 [02:07<58:06, 10.28it/s]

Epoch: 0 Iteration: 1238 Loss: 1.462 Validation Loss: 1.264 Accuracy: 0.234 Validation Accuracy: 0.123:   3%|▎         | 1237/37094 [02:07<58:06, 10.28it/s]

Epoch: 0 Iteration: 1238 Loss: 1.462 Validation Loss: 1.264 Accuracy: 0.234 Validation Accuracy: 0.123:   3%|▎         | 1239/37094 [02:07<58:06, 10.28it/s]

Epoch: 0 Iteration: 1239 Loss: 1.418 Validation Loss: 1.264 Accuracy: 0.241 Validation Accuracy: 0.123:   3%|▎         | 1239/37094 [02:07<58:06, 10.28it/s]

Epoch: 0 Iteration: 1240 Loss: 1.390 Validation Loss: 1.264 Accuracy: 0.237 Validation Accuracy: 0.123:   3%|▎         | 1239/37094 [02:07<58:06, 10.28it/s]

Epoch: 0 Iteration: 1240 Loss: 1.390 Validation Loss: 1.264 Accuracy: 0.237 Validation Accuracy: 0.123:   3%|▎         | 1241/37094 [02:07<58:03, 10.29it/s]

Epoch: 0 Iteration: 1241 Loss: 1.421 Validation Loss: 1.264 Accuracy: 0.222 Validation Accuracy: 0.123:   3%|▎         | 1241/37094 [02:07<58:03, 10.29it/s]

Epoch: 0 Iteration: 1242 Loss: 1.434 Validation Loss: 1.264 Accuracy: 0.203 Validation Accuracy: 0.123:   3%|▎         | 1241/37094 [02:07<58:03, 10.29it/s]

Epoch: 0 Iteration: 1242 Loss: 1.434 Validation Loss: 1.264 Accuracy: 0.203 Validation Accuracy: 0.123:   3%|▎         | 1243/37094 [02:07<58:02, 10.29it/s]

Epoch: 0 Iteration: 1243 Loss: 1.426 Validation Loss: 1.264 Accuracy: 0.216 Validation Accuracy: 0.123:   3%|▎         | 1243/37094 [02:07<58:02, 10.29it/s]

Epoch: 0 Iteration: 1244 Loss: 1.401 Validation Loss: 1.264 Accuracy: 0.212 Validation Accuracy: 0.123:   3%|▎         | 1243/37094 [02:07<58:02, 10.29it/s]

Epoch: 0 Iteration: 1244 Loss: 1.401 Validation Loss: 1.264 Accuracy: 0.212 Validation Accuracy: 0.123:   3%|▎         | 1245/37094 [02:07<58:01, 10.30it/s]

Epoch: 0 Iteration: 1245 Loss: 1.403 Validation Loss: 1.264 Accuracy: 0.203 Validation Accuracy: 0.123:   3%|▎         | 1245/37094 [02:07<58:01, 10.30it/s]

Epoch: 0 Iteration: 1246 Loss: 1.391 Validation Loss: 1.264 Accuracy: 0.203 Validation Accuracy: 0.123:   3%|▎         | 1245/37094 [02:07<58:01, 10.30it/s]

Epoch: 0 Iteration: 1246 Loss: 1.391 Validation Loss: 1.264 Accuracy: 0.203 Validation Accuracy: 0.123:   3%|▎         | 1247/37094 [02:07<58:02, 10.29it/s]

Epoch: 0 Iteration: 1247 Loss: 1.419 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1247/37094 [02:08<58:02, 10.29it/s]

Epoch: 0 Iteration: 1248 Loss: 1.391 Validation Loss: 1.264 Accuracy: 0.216 Validation Accuracy: 0.123:   3%|▎         | 1247/37094 [02:08<58:02, 10.29it/s]

Epoch: 0 Iteration: 1248 Loss: 1.391 Validation Loss: 1.264 Accuracy: 0.216 Validation Accuracy: 0.123:   3%|▎         | 1249/37094 [02:08<58:01, 10.29it/s]

Epoch: 0 Iteration: 1249 Loss: 1.338 Validation Loss: 1.264 Accuracy: 0.225 Validation Accuracy: 0.123:   3%|▎         | 1249/37094 [02:08<58:01, 10.29it/s]

Epoch: 0 Iteration: 1250 Loss: 1.372 Validation Loss: 1.264 Accuracy: 0.228 Validation Accuracy: 0.123:   3%|▎         | 1249/37094 [02:08<58:01, 10.29it/s]

Epoch: 0 Iteration: 1250 Loss: 1.372 Validation Loss: 1.264 Accuracy: 0.228 Validation Accuracy: 0.123:   3%|▎         | 1251/37094 [02:08<58:01, 10.30it/s]

Epoch: 0 Iteration: 1251 Loss: 1.278 Validation Loss: 1.264 Accuracy: 0.244 Validation Accuracy: 0.123:   3%|▎         | 1251/37094 [02:08<58:01, 10.30it/s]

Epoch: 0 Iteration: 1252 Loss: 1.314 Validation Loss: 1.264 Accuracy: 0.237 Validation Accuracy: 0.123:   3%|▎         | 1251/37094 [02:08<58:01, 10.30it/s]

Epoch: 0 Iteration: 1252 Loss: 1.314 Validation Loss: 1.264 Accuracy: 0.237 Validation Accuracy: 0.123:   3%|▎         | 1253/37094 [02:08<57:59, 10.30it/s]

Epoch: 0 Iteration: 1253 Loss: 1.324 Validation Loss: 1.264 Accuracy: 0.225 Validation Accuracy: 0.123:   3%|▎         | 1253/37094 [02:08<57:59, 10.30it/s]

Epoch: 0 Iteration: 1254 Loss: 1.281 Validation Loss: 1.264 Accuracy: 0.231 Validation Accuracy: 0.123:   3%|▎         | 1253/37094 [02:08<57:59, 10.30it/s]

Epoch: 0 Iteration: 1254 Loss: 1.281 Validation Loss: 1.264 Accuracy: 0.231 Validation Accuracy: 0.123:   3%|▎         | 1255/37094 [02:08<57:58, 10.30it/s]

Epoch: 0 Iteration: 1255 Loss: 1.381 Validation Loss: 1.264 Accuracy: 0.244 Validation Accuracy: 0.123:   3%|▎         | 1255/37094 [02:08<57:58, 10.30it/s]

Epoch: 0 Iteration: 1256 Loss: 1.389 Validation Loss: 1.264 Accuracy: 0.250 Validation Accuracy: 0.123:   3%|▎         | 1255/37094 [02:08<57:58, 10.30it/s]

Epoch: 0 Iteration: 1256 Loss: 1.389 Validation Loss: 1.264 Accuracy: 0.250 Validation Accuracy: 0.123:   3%|▎         | 1257/37094 [02:08<57:59, 10.30it/s]

Epoch: 0 Iteration: 1257 Loss: 1.400 Validation Loss: 1.264 Accuracy: 0.253 Validation Accuracy: 0.123:   3%|▎         | 1257/37094 [02:09<57:59, 10.30it/s]

Epoch: 0 Iteration: 1258 Loss: 1.399 Validation Loss: 1.264 Accuracy: 0.247 Validation Accuracy: 0.123:   3%|▎         | 1257/37094 [02:09<57:59, 10.30it/s]

Epoch: 0 Iteration: 1258 Loss: 1.399 Validation Loss: 1.264 Accuracy: 0.247 Validation Accuracy: 0.123:   3%|▎         | 1259/37094 [02:09<57:59, 10.30it/s]

Epoch: 0 Iteration: 1259 Loss: 1.393 Validation Loss: 1.264 Accuracy: 0.250 Validation Accuracy: 0.123:   3%|▎         | 1259/37094 [02:09<57:59, 10.30it/s]

Epoch: 0 Iteration: 1260 Loss: 1.399 Validation Loss: 1.264 Accuracy: 0.247 Validation Accuracy: 0.123:   3%|▎         | 1259/37094 [02:09<57:59, 10.30it/s]

Epoch: 0 Iteration: 1260 Loss: 1.399 Validation Loss: 1.264 Accuracy: 0.247 Validation Accuracy: 0.123:   3%|▎         | 1261/37094 [02:09<57:59, 10.30it/s]

Epoch: 0 Iteration: 1261 Loss: 1.370 Validation Loss: 1.264 Accuracy: 0.253 Validation Accuracy: 0.123:   3%|▎         | 1261/37094 [02:09<57:59, 10.30it/s]

Epoch: 0 Iteration: 1262 Loss: 1.327 Validation Loss: 1.264 Accuracy: 0.250 Validation Accuracy: 0.123:   3%|▎         | 1261/37094 [02:09<57:59, 10.30it/s]

Epoch: 0 Iteration: 1262 Loss: 1.327 Validation Loss: 1.264 Accuracy: 0.250 Validation Accuracy: 0.123:   3%|▎         | 1263/37094 [02:09<57:57, 10.30it/s]

Epoch: 0 Iteration: 1263 Loss: 1.369 Validation Loss: 1.264 Accuracy: 0.250 Validation Accuracy: 0.123:   3%|▎         | 1263/37094 [02:09<57:57, 10.30it/s]

Epoch: 0 Iteration: 1264 Loss: 1.354 Validation Loss: 1.264 Accuracy: 0.253 Validation Accuracy: 0.123:   3%|▎         | 1263/37094 [02:09<57:57, 10.30it/s]

Epoch: 0 Iteration: 1264 Loss: 1.354 Validation Loss: 1.264 Accuracy: 0.253 Validation Accuracy: 0.123:   3%|▎         | 1265/37094 [02:09<57:57, 10.30it/s]

Epoch: 0 Iteration: 1265 Loss: 1.360 Validation Loss: 1.264 Accuracy: 0.237 Validation Accuracy: 0.123:   3%|▎         | 1265/37094 [02:09<57:57, 10.30it/s]

Epoch: 0 Iteration: 1266 Loss: 1.374 Validation Loss: 1.264 Accuracy: 0.225 Validation Accuracy: 0.123:   3%|▎         | 1265/37094 [02:09<57:57, 10.30it/s]

Epoch: 0 Iteration: 1266 Loss: 1.374 Validation Loss: 1.264 Accuracy: 0.225 Validation Accuracy: 0.123:   3%|▎         | 1267/37094 [02:09<57:59, 10.30it/s]

Epoch: 0 Iteration: 1267 Loss: 1.352 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1267/37094 [02:10<57:59, 10.30it/s]

Epoch: 0 Iteration: 1268 Loss: 1.356 Validation Loss: 1.264 Accuracy: 0.203 Validation Accuracy: 0.123:   3%|▎         | 1267/37094 [02:10<57:59, 10.30it/s]

Epoch: 0 Iteration: 1268 Loss: 1.356 Validation Loss: 1.264 Accuracy: 0.203 Validation Accuracy: 0.123:   3%|▎         | 1269/37094 [02:10<58:00, 10.29it/s]

Epoch: 0 Iteration: 1269 Loss: 1.386 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1269/37094 [02:10<58:00, 10.29it/s]

Epoch: 0 Iteration: 1270 Loss: 1.347 Validation Loss: 1.264 Accuracy: 0.194 Validation Accuracy: 0.123:   3%|▎         | 1269/37094 [02:10<58:00, 10.29it/s]

Epoch: 0 Iteration: 1270 Loss: 1.347 Validation Loss: 1.264 Accuracy: 0.194 Validation Accuracy: 0.123:   3%|▎         | 1271/37094 [02:10<58:01, 10.29it/s]

Epoch: 0 Iteration: 1271 Loss: 1.382 Validation Loss: 1.264 Accuracy: 0.194 Validation Accuracy: 0.123:   3%|▎         | 1271/37094 [02:10<58:01, 10.29it/s]

Epoch: 0 Iteration: 1272 Loss: 1.372 Validation Loss: 1.264 Accuracy: 0.212 Validation Accuracy: 0.123:   3%|▎         | 1271/37094 [02:10<58:01, 10.29it/s]

Epoch: 0 Iteration: 1272 Loss: 1.372 Validation Loss: 1.264 Accuracy: 0.212 Validation Accuracy: 0.123:   3%|▎         | 1273/37094 [02:10<58:00, 10.29it/s]

Epoch: 0 Iteration: 1273 Loss: 1.377 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1273/37094 [02:10<58:00, 10.29it/s]

Epoch: 0 Iteration: 1274 Loss: 1.389 Validation Loss: 1.264 Accuracy: 0.216 Validation Accuracy: 0.123:   3%|▎         | 1273/37094 [02:10<58:00, 10.29it/s]

Epoch: 0 Iteration: 1274 Loss: 1.389 Validation Loss: 1.264 Accuracy: 0.216 Validation Accuracy: 0.123:   3%|▎         | 1275/37094 [02:10<57:59, 10.29it/s]

Epoch: 0 Iteration: 1275 Loss: 1.334 Validation Loss: 1.264 Accuracy: 0.225 Validation Accuracy: 0.123:   3%|▎         | 1275/37094 [02:10<57:59, 10.29it/s]

Epoch: 0 Iteration: 1276 Loss: 1.362 Validation Loss: 1.264 Accuracy: 0.225 Validation Accuracy: 0.123:   3%|▎         | 1275/37094 [02:10<57:59, 10.29it/s]

Epoch: 0 Iteration: 1276 Loss: 1.362 Validation Loss: 1.264 Accuracy: 0.225 Validation Accuracy: 0.123:   3%|▎         | 1277/37094 [02:10<57:58, 10.30it/s]

Epoch: 0 Iteration: 1277 Loss: 1.373 Validation Loss: 1.264 Accuracy: 0.228 Validation Accuracy: 0.123:   3%|▎         | 1277/37094 [02:10<57:58, 10.30it/s]

Epoch: 0 Iteration: 1278 Loss: 1.406 Validation Loss: 1.264 Accuracy: 0.228 Validation Accuracy: 0.123:   3%|▎         | 1277/37094 [02:11<57:58, 10.30it/s]

Epoch: 0 Iteration: 1278 Loss: 1.406 Validation Loss: 1.264 Accuracy: 0.228 Validation Accuracy: 0.123:   3%|▎         | 1279/37094 [02:11<57:57, 10.30it/s]

Epoch: 0 Iteration: 1279 Loss: 1.434 Validation Loss: 1.264 Accuracy: 0.234 Validation Accuracy: 0.123:   3%|▎         | 1279/37094 [02:11<57:57, 10.30it/s]

Epoch: 0 Iteration: 1280 Loss: 1.425 Validation Loss: 1.264 Accuracy: 0.228 Validation Accuracy: 0.123:   3%|▎         | 1279/37094 [02:11<57:57, 10.30it/s]

Epoch: 0 Iteration: 1280 Loss: 1.425 Validation Loss: 1.264 Accuracy: 0.228 Validation Accuracy: 0.123:   3%|▎         | 1281/37094 [02:11<57:56, 10.30it/s]

Epoch: 0 Iteration: 1281 Loss: 1.435 Validation Loss: 1.264 Accuracy: 0.222 Validation Accuracy: 0.123:   3%|▎         | 1281/37094 [02:11<57:56, 10.30it/s]

Epoch: 0 Iteration: 1282 Loss: 1.507 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1281/37094 [02:11<57:56, 10.30it/s]

Epoch: 0 Iteration: 1282 Loss: 1.507 Validation Loss: 1.264 Accuracy: 0.209 Validation Accuracy: 0.123:   3%|▎         | 1283/37094 [02:11<57:56, 10.30it/s]

Epoch: 0 Iteration: 1283 Loss: 1.480 Validation Loss: 1.264 Accuracy: 0.216 Validation Accuracy: 0.123:   3%|▎         | 1283/37094 [02:11<57:56, 10.30it/s]

Epoch: 0 Iteration: 1284 Loss: 1.526 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1283/37094 [02:11<57:56, 10.30it/s]

Epoch: 0 Iteration: 1284 Loss: 1.526 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1285/37094 [02:11<57:55, 10.30it/s]

Epoch: 0 Iteration: 1285 Loss: 1.519 Validation Loss: 1.264 Accuracy: 0.194 Validation Accuracy: 0.123:   3%|▎         | 1285/37094 [02:11<57:55, 10.30it/s]

Epoch: 0 Iteration: 1286 Loss: 1.520 Validation Loss: 1.264 Accuracy: 0.188 Validation Accuracy: 0.123:   3%|▎         | 1285/37094 [02:11<57:55, 10.30it/s]

Epoch: 0 Iteration: 1286 Loss: 1.520 Validation Loss: 1.264 Accuracy: 0.188 Validation Accuracy: 0.123:   3%|▎         | 1287/37094 [02:11<57:56, 10.30it/s]

Epoch: 0 Iteration: 1287 Loss: 1.502 Validation Loss: 1.264 Accuracy: 0.181 Validation Accuracy: 0.123:   3%|▎         | 1287/37094 [02:11<57:56, 10.30it/s]

Epoch: 0 Iteration: 1288 Loss: 1.513 Validation Loss: 1.264 Accuracy: 0.172 Validation Accuracy: 0.123:   3%|▎         | 1287/37094 [02:12<57:56, 10.30it/s]

Epoch: 0 Iteration: 1288 Loss: 1.513 Validation Loss: 1.264 Accuracy: 0.172 Validation Accuracy: 0.123:   3%|▎         | 1289/37094 [02:12<57:54, 10.30it/s]

Epoch: 0 Iteration: 1289 Loss: 1.510 Validation Loss: 1.264 Accuracy: 0.169 Validation Accuracy: 0.123:   3%|▎         | 1289/37094 [02:12<57:54, 10.30it/s]

Epoch: 0 Iteration: 1290 Loss: 1.529 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1289/37094 [02:12<57:54, 10.30it/s]

Epoch: 0 Iteration: 1290 Loss: 1.529 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1291/37094 [02:12<57:56, 10.30it/s]

Epoch: 0 Iteration: 1291 Loss: 1.511 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1291/37094 [02:12<57:56, 10.30it/s]

Epoch: 0 Iteration: 1292 Loss: 1.529 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1291/37094 [02:12<57:56, 10.30it/s]

Epoch: 0 Iteration: 1292 Loss: 1.529 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1293/37094 [02:12<57:56, 10.30it/s]

Epoch: 0 Iteration: 1293 Loss: 1.490 Validation Loss: 1.264 Accuracy: 0.200 Validation Accuracy: 0.123:   3%|▎         | 1293/37094 [02:12<57:56, 10.30it/s]

Epoch: 0 Iteration: 1294 Loss: 1.513 Validation Loss: 1.264 Accuracy: 0.206 Validation Accuracy: 0.123:   3%|▎         | 1293/37094 [02:12<57:56, 10.30it/s]

Epoch: 0 Iteration: 1294 Loss: 1.513 Validation Loss: 1.264 Accuracy: 0.206 Validation Accuracy: 0.123:   3%|▎         | 1295/37094 [02:12<57:57, 10.29it/s]

Epoch: 0 Iteration: 1295 Loss: 1.497 Validation Loss: 1.264 Accuracy: 0.194 Validation Accuracy: 0.123:   3%|▎         | 1295/37094 [02:12<57:57, 10.29it/s]

Epoch: 0 Iteration: 1296 Loss: 1.499 Validation Loss: 1.264 Accuracy: 0.197 Validation Accuracy: 0.123:   3%|▎         | 1295/37094 [02:12<57:57, 10.29it/s]

Epoch: 0 Iteration: 1296 Loss: 1.499 Validation Loss: 1.264 Accuracy: 0.197 Validation Accuracy: 0.123:   3%|▎         | 1297/37094 [02:12<57:59, 10.29it/s]

Epoch: 0 Iteration: 1297 Loss: 1.474 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1297/37094 [02:12<57:59, 10.29it/s]

Epoch: 0 Iteration: 1298 Loss: 1.472 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   3%|▎         | 1297/37094 [02:13<57:59, 10.29it/s]

Epoch: 0 Iteration: 1298 Loss: 1.472 Validation Loss: 1.264 Accuracy: 0.191 Validation Accuracy: 0.123:   4%|▎         | 1299/37094 [02:13<57:58, 10.29it/s]

Epoch: 0 Iteration: 1299 Loss: 1.489 Validation Loss: 1.264 Accuracy: 0.184 Validation Accuracy: 0.123:   4%|▎         | 1299/37094 [02:13<57:58, 10.29it/s]

Epoch: 0 Iteration: 1300 Loss: 1.559 Validation Loss: 1.310 Accuracy: 0.169 Validation Accuracy: 0.129:   4%|▎         | 1299/37094 [02:13<57:58, 10.29it/s]

Epoch: 0 Iteration: 1300 Loss: 1.559 Validation Loss: 1.310 Accuracy: 0.169 Validation Accuracy: 0.129:   4%|▎         | 1301/37094 [02:13<1:47:19,  5.56it/s]

Epoch: 0 Iteration: 1301 Loss: 1.587 Validation Loss: 1.310 Accuracy: 0.166 Validation Accuracy: 0.129:   4%|▎         | 1301/37094 [02:13<1:47:19,  5.56it/s]

Epoch: 0 Iteration: 1302 Loss: 1.515 Validation Loss: 1.310 Accuracy: 0.159 Validation Accuracy: 0.129:   4%|▎         | 1301/37094 [02:13<1:47:19,  5.56it/s]

Epoch: 0 Iteration: 1302 Loss: 1.515 Validation Loss: 1.310 Accuracy: 0.159 Validation Accuracy: 0.129:   4%|▎         | 1303/37094 [02:13<1:31:44,  6.50it/s]

Epoch: 0 Iteration: 1303 Loss: 1.463 Validation Loss: 1.310 Accuracy: 0.169 Validation Accuracy: 0.129:   4%|▎         | 1303/37094 [02:14<1:31:44,  6.50it/s]

Epoch: 0 Iteration: 1304 Loss: 1.455 Validation Loss: 1.310 Accuracy: 0.178 Validation Accuracy: 0.129:   4%|▎         | 1303/37094 [02:14<1:31:44,  6.50it/s]

Epoch: 0 Iteration: 1304 Loss: 1.455 Validation Loss: 1.310 Accuracy: 0.178 Validation Accuracy: 0.129:   4%|▎         | 1305/37094 [02:14<1:21:36,  7.31it/s]

Epoch: 0 Iteration: 1305 Loss: 1.439 Validation Loss: 1.310 Accuracy: 0.191 Validation Accuracy: 0.129:   4%|▎         | 1305/37094 [02:14<1:21:36,  7.31it/s]

Epoch: 0 Iteration: 1306 Loss: 1.451 Validation Loss: 1.310 Accuracy: 0.188 Validation Accuracy: 0.129:   4%|▎         | 1305/37094 [02:14<1:21:36,  7.31it/s]

Epoch: 0 Iteration: 1306 Loss: 1.451 Validation Loss: 1.310 Accuracy: 0.188 Validation Accuracy: 0.129:   4%|▎         | 1307/37094 [02:14<1:14:27,  8.01it/s]

Epoch: 0 Iteration: 1307 Loss: 1.482 Validation Loss: 1.310 Accuracy: 0.197 Validation Accuracy: 0.129:   4%|▎         | 1307/37094 [02:14<1:14:27,  8.01it/s]

Epoch: 0 Iteration: 1308 Loss: 1.499 Validation Loss: 1.310 Accuracy: 0.200 Validation Accuracy: 0.129:   4%|▎         | 1307/37094 [02:14<1:14:27,  8.01it/s]

Epoch: 0 Iteration: 1308 Loss: 1.499 Validation Loss: 1.310 Accuracy: 0.200 Validation Accuracy: 0.129:   4%|▎         | 1309/37094 [02:14<1:09:28,  8.58it/s]

Epoch: 0 Iteration: 1309 Loss: 1.493 Validation Loss: 1.310 Accuracy: 0.206 Validation Accuracy: 0.129:   4%|▎         | 1309/37094 [02:14<1:09:28,  8.58it/s]

Epoch: 0 Iteration: 1310 Loss: 1.505 Validation Loss: 1.310 Accuracy: 0.206 Validation Accuracy: 0.129:   4%|▎         | 1309/37094 [02:14<1:09:28,  8.58it/s]

Epoch: 0 Iteration: 1310 Loss: 1.505 Validation Loss: 1.310 Accuracy: 0.206 Validation Accuracy: 0.129:   4%|▎         | 1311/37094 [02:14<1:05:59,  9.04it/s]

Epoch: 0 Iteration: 1311 Loss: 1.516 Validation Loss: 1.310 Accuracy: 0.212 Validation Accuracy: 0.129:   4%|▎         | 1311/37094 [02:14<1:05:59,  9.04it/s]

Epoch: 0 Iteration: 1312 Loss: 1.506 Validation Loss: 1.310 Accuracy: 0.228 Validation Accuracy: 0.129:   4%|▎         | 1311/37094 [02:14<1:05:59,  9.04it/s]

Epoch: 0 Iteration: 1312 Loss: 1.506 Validation Loss: 1.310 Accuracy: 0.228 Validation Accuracy: 0.129:   4%|▎         | 1313/37094 [02:14<1:03:33,  9.38it/s]

Epoch: 0 Iteration: 1313 Loss: 1.519 Validation Loss: 1.310 Accuracy: 0.222 Validation Accuracy: 0.129:   4%|▎         | 1313/37094 [02:15<1:03:33,  9.38it/s]

Epoch: 0 Iteration: 1314 Loss: 1.504 Validation Loss: 1.310 Accuracy: 0.231 Validation Accuracy: 0.129:   4%|▎         | 1313/37094 [02:15<1:03:33,  9.38it/s]

Epoch: 0 Iteration: 1314 Loss: 1.504 Validation Loss: 1.310 Accuracy: 0.231 Validation Accuracy: 0.129:   4%|▎         | 1315/37094 [02:15<1:01:52,  9.64it/s]

Epoch: 0 Iteration: 1315 Loss: 1.489 Validation Loss: 1.310 Accuracy: 0.237 Validation Accuracy: 0.129:   4%|▎         | 1315/37094 [02:15<1:01:52,  9.64it/s]

Epoch: 0 Iteration: 1316 Loss: 1.456 Validation Loss: 1.310 Accuracy: 0.247 Validation Accuracy: 0.129:   4%|▎         | 1315/37094 [02:15<1:01:52,  9.64it/s]

Epoch: 0 Iteration: 1316 Loss: 1.456 Validation Loss: 1.310 Accuracy: 0.247 Validation Accuracy: 0.129:   4%|▎         | 1317/37094 [02:15<1:00:43,  9.82it/s]

Epoch: 0 Iteration: 1317 Loss: 1.416 Validation Loss: 1.310 Accuracy: 0.256 Validation Accuracy: 0.129:   4%|▎         | 1317/37094 [02:15<1:00:43,  9.82it/s]

Epoch: 0 Iteration: 1318 Loss: 1.376 Validation Loss: 1.310 Accuracy: 0.247 Validation Accuracy: 0.129:   4%|▎         | 1317/37094 [02:15<1:00:43,  9.82it/s]

Epoch: 0 Iteration: 1318 Loss: 1.376 Validation Loss: 1.310 Accuracy: 0.247 Validation Accuracy: 0.129:   4%|▎         | 1319/37094 [02:15<59:51,  9.96it/s]  

Epoch: 0 Iteration: 1319 Loss: 1.384 Validation Loss: 1.310 Accuracy: 0.234 Validation Accuracy: 0.129:   4%|▎         | 1319/37094 [02:15<59:51,  9.96it/s]

Epoch: 0 Iteration: 1320 Loss: 1.332 Validation Loss: 1.310 Accuracy: 0.234 Validation Accuracy: 0.129:   4%|▎         | 1319/37094 [02:15<59:51,  9.96it/s]

Epoch: 0 Iteration: 1320 Loss: 1.332 Validation Loss: 1.310 Accuracy: 0.234 Validation Accuracy: 0.129:   4%|▎         | 1321/37094 [02:15<59:16, 10.06it/s]

Epoch: 0 Iteration: 1321 Loss: 1.331 Validation Loss: 1.310 Accuracy: 0.228 Validation Accuracy: 0.129:   4%|▎         | 1321/37094 [02:15<59:16, 10.06it/s]

Epoch: 0 Iteration: 1322 Loss: 1.353 Validation Loss: 1.310 Accuracy: 0.225 Validation Accuracy: 0.129:   4%|▎         | 1321/37094 [02:15<59:16, 10.06it/s]

Epoch: 0 Iteration: 1322 Loss: 1.353 Validation Loss: 1.310 Accuracy: 0.225 Validation Accuracy: 0.129:   4%|▎         | 1323/37094 [02:15<58:50, 10.13it/s]

Epoch: 0 Iteration: 1323 Loss: 1.392 Validation Loss: 1.310 Accuracy: 0.212 Validation Accuracy: 0.129:   4%|▎         | 1323/37094 [02:16<58:50, 10.13it/s]

Epoch: 0 Iteration: 1324 Loss: 1.364 Validation Loss: 1.310 Accuracy: 0.197 Validation Accuracy: 0.129:   4%|▎         | 1323/37094 [02:16<58:50, 10.13it/s]

Epoch: 0 Iteration: 1324 Loss: 1.364 Validation Loss: 1.310 Accuracy: 0.197 Validation Accuracy: 0.129:   4%|▎         | 1325/37094 [02:16<58:32, 10.18it/s]

Epoch: 0 Iteration: 1325 Loss: 1.380 Validation Loss: 1.310 Accuracy: 0.175 Validation Accuracy: 0.129:   4%|▎         | 1325/37094 [02:16<58:32, 10.18it/s]

Epoch: 0 Iteration: 1326 Loss: 1.337 Validation Loss: 1.310 Accuracy: 0.184 Validation Accuracy: 0.129:   4%|▎         | 1325/37094 [02:16<58:32, 10.18it/s]

Epoch: 0 Iteration: 1326 Loss: 1.337 Validation Loss: 1.310 Accuracy: 0.184 Validation Accuracy: 0.129:   4%|▎         | 1327/37094 [02:16<58:20, 10.22it/s]

Epoch: 0 Iteration: 1327 Loss: 1.330 Validation Loss: 1.310 Accuracy: 0.184 Validation Accuracy: 0.129:   4%|▎         | 1327/37094 [02:16<58:20, 10.22it/s]

Epoch: 0 Iteration: 1328 Loss: 1.317 Validation Loss: 1.310 Accuracy: 0.212 Validation Accuracy: 0.129:   4%|▎         | 1327/37094 [02:16<58:20, 10.22it/s]

Epoch: 0 Iteration: 1328 Loss: 1.317 Validation Loss: 1.310 Accuracy: 0.212 Validation Accuracy: 0.129:   4%|▎         | 1329/37094 [02:16<58:11, 10.24it/s]

Epoch: 0 Iteration: 1329 Loss: 1.290 Validation Loss: 1.310 Accuracy: 0.244 Validation Accuracy: 0.129:   4%|▎         | 1329/37094 [02:16<58:11, 10.24it/s]

Epoch: 0 Iteration: 1330 Loss: 1.258 Validation Loss: 1.310 Accuracy: 0.237 Validation Accuracy: 0.129:   4%|▎         | 1329/37094 [02:16<58:11, 10.24it/s]

Epoch: 0 Iteration: 1330 Loss: 1.258 Validation Loss: 1.310 Accuracy: 0.237 Validation Accuracy: 0.129:   4%|▎         | 1331/37094 [02:16<58:04, 10.26it/s]

Epoch: 0 Iteration: 1331 Loss: 1.266 Validation Loss: 1.310 Accuracy: 0.247 Validation Accuracy: 0.129:   4%|▎         | 1331/37094 [02:16<58:04, 10.26it/s]

Epoch: 0 Iteration: 1332 Loss: 1.278 Validation Loss: 1.310 Accuracy: 0.241 Validation Accuracy: 0.129:   4%|▎         | 1331/37094 [02:16<58:04, 10.26it/s]

Epoch: 0 Iteration: 1332 Loss: 1.278 Validation Loss: 1.310 Accuracy: 0.241 Validation Accuracy: 0.129:   4%|▎         | 1333/37094 [02:16<58:01, 10.27it/s]

Epoch: 0 Iteration: 1333 Loss: 1.279 Validation Loss: 1.310 Accuracy: 0.247 Validation Accuracy: 0.129:   4%|▎         | 1333/37094 [02:16<58:01, 10.27it/s]

Epoch: 0 Iteration: 1334 Loss: 1.239 Validation Loss: 1.310 Accuracy: 0.250 Validation Accuracy: 0.129:   4%|▎         | 1333/37094 [02:17<58:01, 10.27it/s]

Epoch: 0 Iteration: 1334 Loss: 1.239 Validation Loss: 1.310 Accuracy: 0.250 Validation Accuracy: 0.129:   4%|▎         | 1335/37094 [02:17<57:57, 10.28it/s]

Epoch: 0 Iteration: 1335 Loss: 1.241 Validation Loss: 1.310 Accuracy: 0.263 Validation Accuracy: 0.129:   4%|▎         | 1335/37094 [02:17<57:57, 10.28it/s]

Epoch: 0 Iteration: 1336 Loss: 1.256 Validation Loss: 1.310 Accuracy: 0.259 Validation Accuracy: 0.129:   4%|▎         | 1335/37094 [02:17<57:57, 10.28it/s]

Epoch: 0 Iteration: 1336 Loss: 1.256 Validation Loss: 1.310 Accuracy: 0.259 Validation Accuracy: 0.129:   4%|▎         | 1337/37094 [02:17<57:55, 10.29it/s]

Epoch: 0 Iteration: 1337 Loss: 1.276 Validation Loss: 1.310 Accuracy: 0.269 Validation Accuracy: 0.129:   4%|▎         | 1337/37094 [02:17<57:55, 10.29it/s]

Epoch: 0 Iteration: 1338 Loss: 1.319 Validation Loss: 1.310 Accuracy: 0.259 Validation Accuracy: 0.129:   4%|▎         | 1337/37094 [02:17<57:55, 10.29it/s]

Epoch: 0 Iteration: 1338 Loss: 1.319 Validation Loss: 1.310 Accuracy: 0.259 Validation Accuracy: 0.129:   4%|▎         | 1339/37094 [02:17<57:55, 10.29it/s]

Epoch: 0 Iteration: 1339 Loss: 1.280 Validation Loss: 1.310 Accuracy: 0.247 Validation Accuracy: 0.129:   4%|▎         | 1339/37094 [02:17<57:55, 10.29it/s]

Epoch: 0 Iteration: 1340 Loss: 1.229 Validation Loss: 1.310 Accuracy: 0.256 Validation Accuracy: 0.129:   4%|▎         | 1339/37094 [02:17<57:55, 10.29it/s]

Epoch: 0 Iteration: 1340 Loss: 1.229 Validation Loss: 1.310 Accuracy: 0.256 Validation Accuracy: 0.129:   4%|▎         | 1341/37094 [02:17<57:57, 10.28it/s]

Epoch: 0 Iteration: 1341 Loss: 1.201 Validation Loss: 1.310 Accuracy: 0.256 Validation Accuracy: 0.129:   4%|▎         | 1341/37094 [02:17<57:57, 10.28it/s]

Epoch: 0 Iteration: 1342 Loss: 1.201 Validation Loss: 1.310 Accuracy: 0.250 Validation Accuracy: 0.129:   4%|▎         | 1341/37094 [02:17<57:57, 10.28it/s]

Epoch: 0 Iteration: 1342 Loss: 1.201 Validation Loss: 1.310 Accuracy: 0.250 Validation Accuracy: 0.129:   4%|▎         | 1343/37094 [02:17<57:54, 10.29it/s]

Epoch: 0 Iteration: 1343 Loss: 1.223 Validation Loss: 1.310 Accuracy: 0.253 Validation Accuracy: 0.129:   4%|▎         | 1343/37094 [02:17<57:54, 10.29it/s]

Epoch: 0 Iteration: 1344 Loss: 1.224 Validation Loss: 1.310 Accuracy: 0.266 Validation Accuracy: 0.129:   4%|▎         | 1343/37094 [02:18<57:54, 10.29it/s]

Epoch: 0 Iteration: 1344 Loss: 1.224 Validation Loss: 1.310 Accuracy: 0.266 Validation Accuracy: 0.129:   4%|▎         | 1345/37094 [02:18<57:52, 10.30it/s]

Epoch: 0 Iteration: 1345 Loss: 1.198 Validation Loss: 1.310 Accuracy: 0.278 Validation Accuracy: 0.129:   4%|▎         | 1345/37094 [02:18<57:52, 10.30it/s]

Epoch: 0 Iteration: 1346 Loss: 1.241 Validation Loss: 1.310 Accuracy: 0.284 Validation Accuracy: 0.129:   4%|▎         | 1345/37094 [02:18<57:52, 10.30it/s]

Epoch: 0 Iteration: 1346 Loss: 1.241 Validation Loss: 1.310 Accuracy: 0.284 Validation Accuracy: 0.129:   4%|▎         | 1347/37094 [02:18<57:50, 10.30it/s]

Epoch: 0 Iteration: 1347 Loss: 1.192 Validation Loss: 1.310 Accuracy: 0.266 Validation Accuracy: 0.129:   4%|▎         | 1347/37094 [02:18<57:50, 10.30it/s]

Epoch: 0 Iteration: 1348 Loss: 1.161 Validation Loss: 1.310 Accuracy: 0.281 Validation Accuracy: 0.129:   4%|▎         | 1347/37094 [02:18<57:50, 10.30it/s]

Epoch: 0 Iteration: 1348 Loss: 1.161 Validation Loss: 1.310 Accuracy: 0.281 Validation Accuracy: 0.129:   4%|▎         | 1349/37094 [02:18<57:50, 10.30it/s]

Epoch: 0 Iteration: 1349 Loss: 1.172 Validation Loss: 1.310 Accuracy: 0.266 Validation Accuracy: 0.129:   4%|▎         | 1349/37094 [02:18<57:50, 10.30it/s]

Epoch: 0 Iteration: 1350 Loss: 1.181 Validation Loss: 1.310 Accuracy: 0.266 Validation Accuracy: 0.129:   4%|▎         | 1349/37094 [02:18<57:50, 10.30it/s]

Epoch: 0 Iteration: 1350 Loss: 1.181 Validation Loss: 1.310 Accuracy: 0.266 Validation Accuracy: 0.129:   4%|▎         | 1351/37094 [02:18<57:51, 10.30it/s]

Epoch: 0 Iteration: 1351 Loss: 1.187 Validation Loss: 1.310 Accuracy: 0.266 Validation Accuracy: 0.129:   4%|▎         | 1351/37094 [02:18<57:51, 10.30it/s]

Epoch: 0 Iteration: 1352 Loss: 1.117 Validation Loss: 1.310 Accuracy: 0.287 Validation Accuracy: 0.129:   4%|▎         | 1351/37094 [02:18<57:51, 10.30it/s]

Epoch: 0 Iteration: 1352 Loss: 1.117 Validation Loss: 1.310 Accuracy: 0.287 Validation Accuracy: 0.129:   4%|▎         | 1353/37094 [02:18<57:52, 10.29it/s]

Epoch: 0 Iteration: 1353 Loss: 1.182 Validation Loss: 1.310 Accuracy: 0.284 Validation Accuracy: 0.129:   4%|▎         | 1353/37094 [02:18<57:52, 10.29it/s]

Epoch: 0 Iteration: 1354 Loss: 1.194 Validation Loss: 1.310 Accuracy: 0.284 Validation Accuracy: 0.129:   4%|▎         | 1353/37094 [02:19<57:52, 10.29it/s]

Epoch: 0 Iteration: 1354 Loss: 1.194 Validation Loss: 1.310 Accuracy: 0.284 Validation Accuracy: 0.129:   4%|▎         | 1355/37094 [02:19<57:51, 10.29it/s]

Epoch: 0 Iteration: 1355 Loss: 1.249 Validation Loss: 1.310 Accuracy: 0.275 Validation Accuracy: 0.129:   4%|▎         | 1355/37094 [02:19<57:51, 10.29it/s]

Epoch: 0 Iteration: 1356 Loss: 1.253 Validation Loss: 1.310 Accuracy: 0.266 Validation Accuracy: 0.129:   4%|▎         | 1355/37094 [02:19<57:51, 10.29it/s]

Epoch: 0 Iteration: 1356 Loss: 1.253 Validation Loss: 1.310 Accuracy: 0.266 Validation Accuracy: 0.129:   4%|▎         | 1357/37094 [02:19<57:52, 10.29it/s]

Epoch: 0 Iteration: 1357 Loss: 1.298 Validation Loss: 1.310 Accuracy: 0.253 Validation Accuracy: 0.129:   4%|▎         | 1357/37094 [02:19<57:52, 10.29it/s]

Epoch: 0 Iteration: 1358 Loss: 1.266 Validation Loss: 1.310 Accuracy: 0.237 Validation Accuracy: 0.129:   4%|▎         | 1357/37094 [02:19<57:52, 10.29it/s]

Epoch: 0 Iteration: 1358 Loss: 1.266 Validation Loss: 1.310 Accuracy: 0.237 Validation Accuracy: 0.129:   4%|▎         | 1359/37094 [02:19<57:50, 10.30it/s]

Epoch: 0 Iteration: 1359 Loss: 1.274 Validation Loss: 1.310 Accuracy: 0.241 Validation Accuracy: 0.129:   4%|▎         | 1359/37094 [02:19<57:50, 10.30it/s]

Epoch: 0 Iteration: 1360 Loss: 1.291 Validation Loss: 1.310 Accuracy: 0.244 Validation Accuracy: 0.129:   4%|▎         | 1359/37094 [02:19<57:50, 10.30it/s]

Epoch: 0 Iteration: 1360 Loss: 1.291 Validation Loss: 1.310 Accuracy: 0.244 Validation Accuracy: 0.129:   4%|▎         | 1361/37094 [02:19<57:50, 10.30it/s]

Epoch: 0 Iteration: 1361 Loss: 1.302 Validation Loss: 1.310 Accuracy: 0.241 Validation Accuracy: 0.129:   4%|▎         | 1361/37094 [02:19<57:50, 10.30it/s]

Epoch: 0 Iteration: 1362 Loss: 1.312 Validation Loss: 1.310 Accuracy: 0.228 Validation Accuracy: 0.129:   4%|▎         | 1361/37094 [02:19<57:50, 10.30it/s]

Epoch: 0 Iteration: 1362 Loss: 1.312 Validation Loss: 1.310 Accuracy: 0.228 Validation Accuracy: 0.129:   4%|▎         | 1363/37094 [02:19<57:50, 10.30it/s]

Epoch: 0 Iteration: 1363 Loss: 1.286 Validation Loss: 1.310 Accuracy: 0.231 Validation Accuracy: 0.129:   4%|▎         | 1363/37094 [02:19<57:50, 10.30it/s]

Epoch: 0 Iteration: 1364 Loss: 1.299 Validation Loss: 1.310 Accuracy: 0.222 Validation Accuracy: 0.129:   4%|▎         | 1363/37094 [02:19<57:50, 10.30it/s]

Epoch: 0 Iteration: 1364 Loss: 1.299 Validation Loss: 1.310 Accuracy: 0.222 Validation Accuracy: 0.129:   4%|▎         | 1365/37094 [02:19<57:50, 10.29it/s]

Epoch: 0 Iteration: 1365 Loss: 1.357 Validation Loss: 1.310 Accuracy: 0.216 Validation Accuracy: 0.129:   4%|▎         | 1365/37094 [02:20<57:50, 10.29it/s]

Epoch: 0 Iteration: 1366 Loss: 1.339 Validation Loss: 1.310 Accuracy: 0.206 Validation Accuracy: 0.129:   4%|▎         | 1365/37094 [02:20<57:50, 10.29it/s]

Epoch: 0 Iteration: 1366 Loss: 1.339 Validation Loss: 1.310 Accuracy: 0.206 Validation Accuracy: 0.129:   4%|▎         | 1367/37094 [02:20<57:50, 10.29it/s]

Epoch: 0 Iteration: 1367 Loss: 1.350 Validation Loss: 1.310 Accuracy: 0.219 Validation Accuracy: 0.129:   4%|▎         | 1367/37094 [02:20<57:50, 10.29it/s]

Epoch: 0 Iteration: 1368 Loss: 1.393 Validation Loss: 1.310 Accuracy: 0.200 Validation Accuracy: 0.129:   4%|▎         | 1367/37094 [02:20<57:50, 10.29it/s]

Epoch: 0 Iteration: 1368 Loss: 1.393 Validation Loss: 1.310 Accuracy: 0.200 Validation Accuracy: 0.129:   4%|▎         | 1369/37094 [02:20<57:48, 10.30it/s]

Epoch: 0 Iteration: 1369 Loss: 1.406 Validation Loss: 1.310 Accuracy: 0.206 Validation Accuracy: 0.129:   4%|▎         | 1369/37094 [02:20<57:48, 10.30it/s]

Epoch: 0 Iteration: 1370 Loss: 1.398 Validation Loss: 1.310 Accuracy: 0.200 Validation Accuracy: 0.129:   4%|▎         | 1369/37094 [02:20<57:48, 10.30it/s]

Epoch: 0 Iteration: 1370 Loss: 1.398 Validation Loss: 1.310 Accuracy: 0.200 Validation Accuracy: 0.129:   4%|▎         | 1371/37094 [02:20<57:46, 10.30it/s]

Epoch: 0 Iteration: 1371 Loss: 1.442 Validation Loss: 1.310 Accuracy: 0.191 Validation Accuracy: 0.129:   4%|▎         | 1371/37094 [02:20<57:46, 10.30it/s]

Epoch: 0 Iteration: 1372 Loss: 1.462 Validation Loss: 1.310 Accuracy: 0.184 Validation Accuracy: 0.129:   4%|▎         | 1371/37094 [02:20<57:46, 10.30it/s]

Epoch: 0 Iteration: 1372 Loss: 1.462 Validation Loss: 1.310 Accuracy: 0.184 Validation Accuracy: 0.129:   4%|▎         | 1373/37094 [02:20<57:47, 10.30it/s]

Epoch: 0 Iteration: 1373 Loss: 1.406 Validation Loss: 1.310 Accuracy: 0.175 Validation Accuracy: 0.129:   4%|▎         | 1373/37094 [02:20<57:47, 10.30it/s]

Epoch: 0 Iteration: 1374 Loss: 1.462 Validation Loss: 1.310 Accuracy: 0.175 Validation Accuracy: 0.129:   4%|▎         | 1373/37094 [02:20<57:47, 10.30it/s]

Epoch: 0 Iteration: 1374 Loss: 1.462 Validation Loss: 1.310 Accuracy: 0.175 Validation Accuracy: 0.129:   4%|▎         | 1375/37094 [02:20<57:47, 10.30it/s]

Epoch: 0 Iteration: 1375 Loss: 1.455 Validation Loss: 1.310 Accuracy: 0.175 Validation Accuracy: 0.129:   4%|▎         | 1375/37094 [02:21<57:47, 10.30it/s]

Epoch: 0 Iteration: 1376 Loss: 1.458 Validation Loss: 1.310 Accuracy: 0.178 Validation Accuracy: 0.129:   4%|▎         | 1375/37094 [02:21<57:47, 10.30it/s]

Epoch: 0 Iteration: 1376 Loss: 1.458 Validation Loss: 1.310 Accuracy: 0.178 Validation Accuracy: 0.129:   4%|▎         | 1377/37094 [02:21<57:46, 10.30it/s]

Epoch: 0 Iteration: 1377 Loss: 1.432 Validation Loss: 1.310 Accuracy: 0.188 Validation Accuracy: 0.129:   4%|▎         | 1377/37094 [02:21<57:46, 10.30it/s]

Epoch: 0 Iteration: 1378 Loss: 1.445 Validation Loss: 1.310 Accuracy: 0.212 Validation Accuracy: 0.129:   4%|▎         | 1377/37094 [02:21<57:46, 10.30it/s]

Epoch: 0 Iteration: 1378 Loss: 1.445 Validation Loss: 1.310 Accuracy: 0.212 Validation Accuracy: 0.129:   4%|▎         | 1379/37094 [02:21<57:46, 10.30it/s]

Epoch: 0 Iteration: 1379 Loss: 1.430 Validation Loss: 1.310 Accuracy: 0.225 Validation Accuracy: 0.129:   4%|▎         | 1379/37094 [02:21<57:46, 10.30it/s]

Epoch: 0 Iteration: 1380 Loss: 1.484 Validation Loss: 1.310 Accuracy: 0.219 Validation Accuracy: 0.129:   4%|▎         | 1379/37094 [02:21<57:46, 10.30it/s]

Epoch: 0 Iteration: 1380 Loss: 1.484 Validation Loss: 1.310 Accuracy: 0.219 Validation Accuracy: 0.129:   4%|▎         | 1381/37094 [02:21<57:46, 10.30it/s]

Epoch: 0 Iteration: 1381 Loss: 1.535 Validation Loss: 1.310 Accuracy: 0.219 Validation Accuracy: 0.129:   4%|▎         | 1381/37094 [02:21<57:46, 10.30it/s]

Epoch: 0 Iteration: 1382 Loss: 1.561 Validation Loss: 1.310 Accuracy: 0.206 Validation Accuracy: 0.129:   4%|▎         | 1381/37094 [02:21<57:46, 10.30it/s]

Epoch: 0 Iteration: 1382 Loss: 1.561 Validation Loss: 1.310 Accuracy: 0.206 Validation Accuracy: 0.129:   4%|▎         | 1383/37094 [02:21<57:45, 10.30it/s]

Epoch: 0 Iteration: 1383 Loss: 1.552 Validation Loss: 1.310 Accuracy: 0.222 Validation Accuracy: 0.129:   4%|▎         | 1383/37094 [02:21<57:45, 10.30it/s]

Epoch: 0 Iteration: 1384 Loss: 1.558 Validation Loss: 1.310 Accuracy: 0.225 Validation Accuracy: 0.129:   4%|▎         | 1383/37094 [02:21<57:45, 10.30it/s]

Epoch: 0 Iteration: 1384 Loss: 1.558 Validation Loss: 1.310 Accuracy: 0.225 Validation Accuracy: 0.129:   4%|▎         | 1385/37094 [02:21<57:46, 10.30it/s]

Epoch: 0 Iteration: 1385 Loss: 1.554 Validation Loss: 1.310 Accuracy: 0.225 Validation Accuracy: 0.129:   4%|▎         | 1385/37094 [02:22<57:46, 10.30it/s]

Epoch: 0 Iteration: 1386 Loss: 1.558 Validation Loss: 1.310 Accuracy: 0.222 Validation Accuracy: 0.129:   4%|▎         | 1385/37094 [02:22<57:46, 10.30it/s]

Epoch: 0 Iteration: 1386 Loss: 1.558 Validation Loss: 1.310 Accuracy: 0.222 Validation Accuracy: 0.129:   4%|▎         | 1387/37094 [02:22<57:47, 10.30it/s]

Epoch: 0 Iteration: 1387 Loss: 1.609 Validation Loss: 1.310 Accuracy: 0.203 Validation Accuracy: 0.129:   4%|▎         | 1387/37094 [02:22<57:47, 10.30it/s]

Epoch: 0 Iteration: 1388 Loss: 1.611 Validation Loss: 1.310 Accuracy: 0.191 Validation Accuracy: 0.129:   4%|▎         | 1387/37094 [02:22<57:47, 10.30it/s]

Epoch: 0 Iteration: 1388 Loss: 1.611 Validation Loss: 1.310 Accuracy: 0.191 Validation Accuracy: 0.129:   4%|▎         | 1389/37094 [02:22<57:46, 10.30it/s]

Epoch: 0 Iteration: 1389 Loss: 1.579 Validation Loss: 1.310 Accuracy: 0.188 Validation Accuracy: 0.129:   4%|▎         | 1389/37094 [02:22<57:46, 10.30it/s]

Epoch: 0 Iteration: 1390 Loss: 1.583 Validation Loss: 1.310 Accuracy: 0.181 Validation Accuracy: 0.129:   4%|▎         | 1389/37094 [02:22<57:46, 10.30it/s]

Epoch: 0 Iteration: 1390 Loss: 1.583 Validation Loss: 1.310 Accuracy: 0.181 Validation Accuracy: 0.129:   4%|▎         | 1391/37094 [02:22<57:47, 10.30it/s]

Epoch: 0 Iteration: 1391 Loss: 1.525 Validation Loss: 1.310 Accuracy: 0.194 Validation Accuracy: 0.129:   4%|▎         | 1391/37094 [02:22<57:47, 10.30it/s]

Epoch: 0 Iteration: 1392 Loss: 1.541 Validation Loss: 1.310 Accuracy: 0.212 Validation Accuracy: 0.129:   4%|▎         | 1391/37094 [02:22<57:47, 10.30it/s]

Epoch: 0 Iteration: 1392 Loss: 1.541 Validation Loss: 1.310 Accuracy: 0.212 Validation Accuracy: 0.129:   4%|▍         | 1393/37094 [02:22<57:48, 10.29it/s]

Epoch: 0 Iteration: 1393 Loss: 1.524 Validation Loss: 1.310 Accuracy: 0.197 Validation Accuracy: 0.129:   4%|▍         | 1393/37094 [02:22<57:48, 10.29it/s]

Epoch: 0 Iteration: 1394 Loss: 1.486 Validation Loss: 1.310 Accuracy: 0.194 Validation Accuracy: 0.129:   4%|▍         | 1393/37094 [02:22<57:48, 10.29it/s]

Epoch: 0 Iteration: 1394 Loss: 1.486 Validation Loss: 1.310 Accuracy: 0.194 Validation Accuracy: 0.129:   4%|▍         | 1395/37094 [02:22<57:46, 10.30it/s]

Epoch: 0 Iteration: 1395 Loss: 1.446 Validation Loss: 1.310 Accuracy: 0.194 Validation Accuracy: 0.129:   4%|▍         | 1395/37094 [02:22<57:46, 10.30it/s]

Epoch: 0 Iteration: 1396 Loss: 1.426 Validation Loss: 1.310 Accuracy: 0.200 Validation Accuracy: 0.129:   4%|▍         | 1395/37094 [02:23<57:46, 10.30it/s]

Epoch: 0 Iteration: 1396 Loss: 1.426 Validation Loss: 1.310 Accuracy: 0.200 Validation Accuracy: 0.129:   4%|▍         | 1397/37094 [02:23<57:44, 10.30it/s]

Epoch: 0 Iteration: 1397 Loss: 1.424 Validation Loss: 1.310 Accuracy: 0.216 Validation Accuracy: 0.129:   4%|▍         | 1397/37094 [02:23<57:44, 10.30it/s]

Epoch: 0 Iteration: 1398 Loss: 1.424 Validation Loss: 1.310 Accuracy: 0.216 Validation Accuracy: 0.129:   4%|▍         | 1397/37094 [02:23<57:44, 10.30it/s]

Epoch: 0 Iteration: 1398 Loss: 1.424 Validation Loss: 1.310 Accuracy: 0.216 Validation Accuracy: 0.129:   4%|▍         | 1399/37094 [02:23<57:44, 10.30it/s]

Epoch: 0 Iteration: 1399 Loss: 1.477 Validation Loss: 1.310 Accuracy: 0.203 Validation Accuracy: 0.129:   4%|▍         | 1399/37094 [02:23<57:44, 10.30it/s]

Epoch: 0 Iteration: 1400 Loss: 1.413 Validation Loss: 1.285 Accuracy: 0.212 Validation Accuracy: 0.136:   4%|▍         | 1399/37094 [02:24<57:44, 10.30it/s]

Epoch: 0 Iteration: 1400 Loss: 1.413 Validation Loss: 1.285 Accuracy: 0.212 Validation Accuracy: 0.136:   4%|▍         | 1401/37094 [02:24<1:46:09,  5.60it/s]

Epoch: 0 Iteration: 1401 Loss: 1.364 Validation Loss: 1.285 Accuracy: 0.203 Validation Accuracy: 0.136:   4%|▍         | 1401/37094 [02:24<1:46:09,  5.60it/s]

Epoch: 0 Iteration: 1402 Loss: 1.367 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1401/37094 [02:24<1:46:09,  5.60it/s]

Epoch: 0 Iteration: 1402 Loss: 1.367 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1403/37094 [02:24<1:30:53,  6.54it/s]

Epoch: 0 Iteration: 1403 Loss: 1.381 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1403/37094 [02:24<1:30:53,  6.54it/s]

Epoch: 0 Iteration: 1404 Loss: 1.382 Validation Loss: 1.285 Accuracy: 0.178 Validation Accuracy: 0.136:   4%|▍         | 1403/37094 [02:24<1:30:53,  6.54it/s]

Epoch: 0 Iteration: 1404 Loss: 1.382 Validation Loss: 1.285 Accuracy: 0.178 Validation Accuracy: 0.136:   4%|▍         | 1405/37094 [02:24<1:20:58,  7.35it/s]

Epoch: 0 Iteration: 1405 Loss: 1.320 Validation Loss: 1.285 Accuracy: 0.184 Validation Accuracy: 0.136:   4%|▍         | 1405/37094 [02:24<1:20:58,  7.35it/s]

Epoch: 0 Iteration: 1406 Loss: 1.305 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1405/37094 [02:24<1:20:58,  7.35it/s]

Epoch: 0 Iteration: 1406 Loss: 1.305 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1407/37094 [02:24<1:13:59,  8.04it/s]

Epoch: 0 Iteration: 1407 Loss: 1.310 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1407/37094 [02:24<1:13:59,  8.04it/s]

Epoch: 0 Iteration: 1408 Loss: 1.292 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1407/37094 [02:24<1:13:59,  8.04it/s]

Epoch: 0 Iteration: 1408 Loss: 1.292 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1409/37094 [02:24<1:09:05,  8.61it/s]

Epoch: 0 Iteration: 1409 Loss: 1.300 Validation Loss: 1.285 Accuracy: 0.194 Validation Accuracy: 0.136:   4%|▍         | 1409/37094 [02:24<1:09:05,  8.61it/s]

Epoch: 0 Iteration: 1410 Loss: 1.308 Validation Loss: 1.285 Accuracy: 0.197 Validation Accuracy: 0.136:   4%|▍         | 1409/37094 [02:24<1:09:05,  8.61it/s]

Epoch: 0 Iteration: 1410 Loss: 1.308 Validation Loss: 1.285 Accuracy: 0.197 Validation Accuracy: 0.136:   4%|▍         | 1411/37094 [02:24<1:05:40,  9.05it/s]

Epoch: 0 Iteration: 1411 Loss: 1.308 Validation Loss: 1.285 Accuracy: 0.203 Validation Accuracy: 0.136:   4%|▍         | 1411/37094 [02:25<1:05:40,  9.05it/s]

Epoch: 0 Iteration: 1412 Loss: 1.299 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1411/37094 [02:25<1:05:40,  9.05it/s]

Epoch: 0 Iteration: 1412 Loss: 1.299 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1413/37094 [02:25<1:03:21,  9.39it/s]

Epoch: 0 Iteration: 1413 Loss: 1.308 Validation Loss: 1.285 Accuracy: 0.209 Validation Accuracy: 0.136:   4%|▍         | 1413/37094 [02:25<1:03:21,  9.39it/s]

Epoch: 0 Iteration: 1414 Loss: 1.343 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1413/37094 [02:25<1:03:21,  9.39it/s]

Epoch: 0 Iteration: 1414 Loss: 1.343 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1415/37094 [02:25<1:01:40,  9.64it/s]

Epoch: 0 Iteration: 1415 Loss: 1.337 Validation Loss: 1.285 Accuracy: 0.194 Validation Accuracy: 0.136:   4%|▍         | 1415/37094 [02:25<1:01:40,  9.64it/s]

Epoch: 0 Iteration: 1416 Loss: 1.330 Validation Loss: 1.285 Accuracy: 0.200 Validation Accuracy: 0.136:   4%|▍         | 1415/37094 [02:25<1:01:40,  9.64it/s]

Epoch: 0 Iteration: 1416 Loss: 1.330 Validation Loss: 1.285 Accuracy: 0.200 Validation Accuracy: 0.136:   4%|▍         | 1417/37094 [02:25<1:00:31,  9.82it/s]

Epoch: 0 Iteration: 1417 Loss: 1.384 Validation Loss: 1.285 Accuracy: 0.200 Validation Accuracy: 0.136:   4%|▍         | 1417/37094 [02:25<1:00:31,  9.82it/s]

Epoch: 0 Iteration: 1418 Loss: 1.357 Validation Loss: 1.285 Accuracy: 0.194 Validation Accuracy: 0.136:   4%|▍         | 1417/37094 [02:25<1:00:31,  9.82it/s]

Epoch: 0 Iteration: 1418 Loss: 1.357 Validation Loss: 1.285 Accuracy: 0.194 Validation Accuracy: 0.136:   4%|▍         | 1419/37094 [02:25<59:39,  9.97it/s]  

Epoch: 0 Iteration: 1419 Loss: 1.356 Validation Loss: 1.285 Accuracy: 0.181 Validation Accuracy: 0.136:   4%|▍         | 1419/37094 [02:25<59:39,  9.97it/s]

Epoch: 0 Iteration: 1420 Loss: 1.384 Validation Loss: 1.285 Accuracy: 0.175 Validation Accuracy: 0.136:   4%|▍         | 1419/37094 [02:25<59:39,  9.97it/s]

Epoch: 0 Iteration: 1420 Loss: 1.384 Validation Loss: 1.285 Accuracy: 0.175 Validation Accuracy: 0.136:   4%|▍         | 1421/37094 [02:25<59:03, 10.07it/s]

Epoch: 0 Iteration: 1421 Loss: 1.386 Validation Loss: 1.285 Accuracy: 0.166 Validation Accuracy: 0.136:   4%|▍         | 1421/37094 [02:26<59:03, 10.07it/s]

Epoch: 0 Iteration: 1422 Loss: 1.337 Validation Loss: 1.285 Accuracy: 0.163 Validation Accuracy: 0.136:   4%|▍         | 1421/37094 [02:26<59:03, 10.07it/s]

Epoch: 0 Iteration: 1422 Loss: 1.337 Validation Loss: 1.285 Accuracy: 0.163 Validation Accuracy: 0.136:   4%|▍         | 1423/37094 [02:26<58:39, 10.14it/s]

Epoch: 0 Iteration: 1423 Loss: 1.380 Validation Loss: 1.285 Accuracy: 0.156 Validation Accuracy: 0.136:   4%|▍         | 1423/37094 [02:26<58:39, 10.14it/s]

Epoch: 0 Iteration: 1424 Loss: 1.362 Validation Loss: 1.285 Accuracy: 0.166 Validation Accuracy: 0.136:   4%|▍         | 1423/37094 [02:26<58:39, 10.14it/s]

Epoch: 0 Iteration: 1424 Loss: 1.362 Validation Loss: 1.285 Accuracy: 0.166 Validation Accuracy: 0.136:   4%|▍         | 1425/37094 [02:26<58:22, 10.18it/s]

Epoch: 0 Iteration: 1425 Loss: 1.415 Validation Loss: 1.285 Accuracy: 0.172 Validation Accuracy: 0.136:   4%|▍         | 1425/37094 [02:26<58:22, 10.18it/s]

Epoch: 0 Iteration: 1426 Loss: 1.382 Validation Loss: 1.285 Accuracy: 0.175 Validation Accuracy: 0.136:   4%|▍         | 1425/37094 [02:26<58:22, 10.18it/s]

Epoch: 0 Iteration: 1426 Loss: 1.382 Validation Loss: 1.285 Accuracy: 0.175 Validation Accuracy: 0.136:   4%|▍         | 1427/37094 [02:26<58:09, 10.22it/s]

Epoch: 0 Iteration: 1427 Loss: 1.339 Validation Loss: 1.285 Accuracy: 0.181 Validation Accuracy: 0.136:   4%|▍         | 1427/37094 [02:26<58:09, 10.22it/s]

Epoch: 0 Iteration: 1428 Loss: 1.342 Validation Loss: 1.285 Accuracy: 0.184 Validation Accuracy: 0.136:   4%|▍         | 1427/37094 [02:26<58:09, 10.22it/s]

Epoch: 0 Iteration: 1428 Loss: 1.342 Validation Loss: 1.285 Accuracy: 0.184 Validation Accuracy: 0.136:   4%|▍         | 1429/37094 [02:26<58:03, 10.24it/s]

Epoch: 0 Iteration: 1429 Loss: 1.390 Validation Loss: 1.285 Accuracy: 0.200 Validation Accuracy: 0.136:   4%|▍         | 1429/37094 [02:26<58:03, 10.24it/s]

Epoch: 0 Iteration: 1430 Loss: 1.353 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1429/37094 [02:26<58:03, 10.24it/s]

Epoch: 0 Iteration: 1430 Loss: 1.353 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1431/37094 [02:26<57:55, 10.26it/s]

Epoch: 0 Iteration: 1431 Loss: 1.385 Validation Loss: 1.285 Accuracy: 0.212 Validation Accuracy: 0.136:   4%|▍         | 1431/37094 [02:27<57:55, 10.26it/s]

Epoch: 0 Iteration: 1432 Loss: 1.395 Validation Loss: 1.285 Accuracy: 0.219 Validation Accuracy: 0.136:   4%|▍         | 1431/37094 [02:27<57:55, 10.26it/s]

Epoch: 0 Iteration: 1432 Loss: 1.395 Validation Loss: 1.285 Accuracy: 0.219 Validation Accuracy: 0.136:   4%|▍         | 1433/37094 [02:27<57:52, 10.27it/s]

Epoch: 0 Iteration: 1433 Loss: 1.433 Validation Loss: 1.285 Accuracy: 0.222 Validation Accuracy: 0.136:   4%|▍         | 1433/37094 [02:27<57:52, 10.27it/s]

Epoch: 0 Iteration: 1434 Loss: 1.370 Validation Loss: 1.285 Accuracy: 0.228 Validation Accuracy: 0.136:   4%|▍         | 1433/37094 [02:27<57:52, 10.27it/s]

Epoch: 0 Iteration: 1434 Loss: 1.370 Validation Loss: 1.285 Accuracy: 0.228 Validation Accuracy: 0.136:   4%|▍         | 1435/37094 [02:27<57:48, 10.28it/s]

Epoch: 0 Iteration: 1435 Loss: 1.379 Validation Loss: 1.285 Accuracy: 0.231 Validation Accuracy: 0.136:   4%|▍         | 1435/37094 [02:27<57:48, 10.28it/s]

Epoch: 0 Iteration: 1436 Loss: 1.401 Validation Loss: 1.285 Accuracy: 0.216 Validation Accuracy: 0.136:   4%|▍         | 1435/37094 [02:27<57:48, 10.28it/s]

Epoch: 0 Iteration: 1436 Loss: 1.401 Validation Loss: 1.285 Accuracy: 0.216 Validation Accuracy: 0.136:   4%|▍         | 1437/37094 [02:27<57:47, 10.28it/s]

Epoch: 0 Iteration: 1437 Loss: 1.333 Validation Loss: 1.285 Accuracy: 0.216 Validation Accuracy: 0.136:   4%|▍         | 1437/37094 [02:27<57:47, 10.28it/s]

Epoch: 0 Iteration: 1438 Loss: 1.341 Validation Loss: 1.285 Accuracy: 0.219 Validation Accuracy: 0.136:   4%|▍         | 1437/37094 [02:27<57:47, 10.28it/s]

Epoch: 0 Iteration: 1438 Loss: 1.341 Validation Loss: 1.285 Accuracy: 0.219 Validation Accuracy: 0.136:   4%|▍         | 1439/37094 [02:27<57:44, 10.29it/s]

Epoch: 0 Iteration: 1439 Loss: 1.323 Validation Loss: 1.285 Accuracy: 0.216 Validation Accuracy: 0.136:   4%|▍         | 1439/37094 [02:27<57:44, 10.29it/s]

Epoch: 0 Iteration: 1440 Loss: 1.320 Validation Loss: 1.285 Accuracy: 0.216 Validation Accuracy: 0.136:   4%|▍         | 1439/37094 [02:27<57:44, 10.29it/s]

Epoch: 0 Iteration: 1440 Loss: 1.320 Validation Loss: 1.285 Accuracy: 0.216 Validation Accuracy: 0.136:   4%|▍         | 1441/37094 [02:27<57:44, 10.29it/s]

Epoch: 0 Iteration: 1441 Loss: 1.292 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1441/37094 [02:27<57:44, 10.29it/s]

Epoch: 0 Iteration: 1442 Loss: 1.296 Validation Loss: 1.285 Accuracy: 0.200 Validation Accuracy: 0.136:   4%|▍         | 1441/37094 [02:28<57:44, 10.29it/s]

Epoch: 0 Iteration: 1442 Loss: 1.296 Validation Loss: 1.285 Accuracy: 0.200 Validation Accuracy: 0.136:   4%|▍         | 1443/37094 [02:28<57:43, 10.29it/s]

Epoch: 0 Iteration: 1443 Loss: 1.221 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1443/37094 [02:28<57:43, 10.29it/s]

Epoch: 0 Iteration: 1444 Loss: 1.234 Validation Loss: 1.285 Accuracy: 0.197 Validation Accuracy: 0.136:   4%|▍         | 1443/37094 [02:28<57:43, 10.29it/s]

Epoch: 0 Iteration: 1444 Loss: 1.234 Validation Loss: 1.285 Accuracy: 0.197 Validation Accuracy: 0.136:   4%|▍         | 1445/37094 [02:28<57:45, 10.29it/s]

Epoch: 0 Iteration: 1445 Loss: 1.236 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1445/37094 [02:28<57:45, 10.29it/s]

Epoch: 0 Iteration: 1446 Loss: 1.240 Validation Loss: 1.285 Accuracy: 0.197 Validation Accuracy: 0.136:   4%|▍         | 1445/37094 [02:28<57:45, 10.29it/s]

Epoch: 0 Iteration: 1446 Loss: 1.240 Validation Loss: 1.285 Accuracy: 0.197 Validation Accuracy: 0.136:   4%|▍         | 1447/37094 [02:28<57:44, 10.29it/s]

Epoch: 0 Iteration: 1447 Loss: 1.292 Validation Loss: 1.285 Accuracy: 0.175 Validation Accuracy: 0.136:   4%|▍         | 1447/37094 [02:28<57:44, 10.29it/s]

Epoch: 0 Iteration: 1448 Loss: 1.295 Validation Loss: 1.285 Accuracy: 0.184 Validation Accuracy: 0.136:   4%|▍         | 1447/37094 [02:28<57:44, 10.29it/s]

Epoch: 0 Iteration: 1448 Loss: 1.295 Validation Loss: 1.285 Accuracy: 0.184 Validation Accuracy: 0.136:   4%|▍         | 1449/37094 [02:28<57:45, 10.29it/s]

Epoch: 0 Iteration: 1449 Loss: 1.241 Validation Loss: 1.285 Accuracy: 0.194 Validation Accuracy: 0.136:   4%|▍         | 1449/37094 [02:28<57:45, 10.29it/s]

Epoch: 0 Iteration: 1450 Loss: 1.289 Validation Loss: 1.285 Accuracy: 0.194 Validation Accuracy: 0.136:   4%|▍         | 1449/37094 [02:28<57:45, 10.29it/s]

Epoch: 0 Iteration: 1450 Loss: 1.289 Validation Loss: 1.285 Accuracy: 0.194 Validation Accuracy: 0.136:   4%|▍         | 1451/37094 [02:28<57:44, 10.29it/s]

Epoch: 0 Iteration: 1451 Loss: 1.296 Validation Loss: 1.285 Accuracy: 0.216 Validation Accuracy: 0.136:   4%|▍         | 1451/37094 [02:28<57:44, 10.29it/s]

Epoch: 0 Iteration: 1452 Loss: 1.301 Validation Loss: 1.285 Accuracy: 0.225 Validation Accuracy: 0.136:   4%|▍         | 1451/37094 [02:29<57:44, 10.29it/s]

Epoch: 0 Iteration: 1452 Loss: 1.301 Validation Loss: 1.285 Accuracy: 0.225 Validation Accuracy: 0.136:   4%|▍         | 1453/37094 [02:29<57:41, 10.30it/s]

Epoch: 0 Iteration: 1453 Loss: 1.324 Validation Loss: 1.285 Accuracy: 0.216 Validation Accuracy: 0.136:   4%|▍         | 1453/37094 [02:29<57:41, 10.30it/s]

Epoch: 0 Iteration: 1454 Loss: 1.395 Validation Loss: 1.285 Accuracy: 0.222 Validation Accuracy: 0.136:   4%|▍         | 1453/37094 [02:29<57:41, 10.30it/s]

Epoch: 0 Iteration: 1454 Loss: 1.395 Validation Loss: 1.285 Accuracy: 0.222 Validation Accuracy: 0.136:   4%|▍         | 1455/37094 [02:29<57:40, 10.30it/s]

Epoch: 0 Iteration: 1455 Loss: 1.362 Validation Loss: 1.285 Accuracy: 0.231 Validation Accuracy: 0.136:   4%|▍         | 1455/37094 [02:29<57:40, 10.30it/s]

Epoch: 0 Iteration: 1456 Loss: 1.410 Validation Loss: 1.285 Accuracy: 0.228 Validation Accuracy: 0.136:   4%|▍         | 1455/37094 [02:29<57:40, 10.30it/s]

Epoch: 0 Iteration: 1456 Loss: 1.410 Validation Loss: 1.285 Accuracy: 0.228 Validation Accuracy: 0.136:   4%|▍         | 1457/37094 [02:29<57:40, 10.30it/s]

Epoch: 0 Iteration: 1457 Loss: 1.437 Validation Loss: 1.285 Accuracy: 0.244 Validation Accuracy: 0.136:   4%|▍         | 1457/37094 [02:29<57:40, 10.30it/s]

Epoch: 0 Iteration: 1458 Loss: 1.460 Validation Loss: 1.285 Accuracy: 0.234 Validation Accuracy: 0.136:   4%|▍         | 1457/37094 [02:29<57:40, 10.30it/s]

Epoch: 0 Iteration: 1458 Loss: 1.460 Validation Loss: 1.285 Accuracy: 0.234 Validation Accuracy: 0.136:   4%|▍         | 1459/37094 [02:29<57:40, 10.30it/s]

Epoch: 0 Iteration: 1459 Loss: 1.444 Validation Loss: 1.285 Accuracy: 0.212 Validation Accuracy: 0.136:   4%|▍         | 1459/37094 [02:29<57:40, 10.30it/s]

Epoch: 0 Iteration: 1460 Loss: 1.462 Validation Loss: 1.285 Accuracy: 0.209 Validation Accuracy: 0.136:   4%|▍         | 1459/37094 [02:29<57:40, 10.30it/s]

Epoch: 0 Iteration: 1460 Loss: 1.462 Validation Loss: 1.285 Accuracy: 0.209 Validation Accuracy: 0.136:   4%|▍         | 1461/37094 [02:29<57:39, 10.30it/s]

Epoch: 0 Iteration: 1461 Loss: 1.518 Validation Loss: 1.285 Accuracy: 0.194 Validation Accuracy: 0.136:   4%|▍         | 1461/37094 [02:29<57:39, 10.30it/s]

Epoch: 0 Iteration: 1462 Loss: 1.520 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1461/37094 [02:30<57:39, 10.30it/s]

Epoch: 0 Iteration: 1462 Loss: 1.520 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1463/37094 [02:30<57:38, 10.30it/s]

Epoch: 0 Iteration: 1463 Loss: 1.517 Validation Loss: 1.285 Accuracy: 0.203 Validation Accuracy: 0.136:   4%|▍         | 1463/37094 [02:30<57:38, 10.30it/s]

Epoch: 0 Iteration: 1464 Loss: 1.506 Validation Loss: 1.285 Accuracy: 0.203 Validation Accuracy: 0.136:   4%|▍         | 1463/37094 [02:30<57:38, 10.30it/s]

Epoch: 0 Iteration: 1464 Loss: 1.506 Validation Loss: 1.285 Accuracy: 0.203 Validation Accuracy: 0.136:   4%|▍         | 1465/37094 [02:30<57:36, 10.31it/s]

Epoch: 0 Iteration: 1465 Loss: 1.527 Validation Loss: 1.285 Accuracy: 0.203 Validation Accuracy: 0.136:   4%|▍         | 1465/37094 [02:30<57:36, 10.31it/s]

Epoch: 0 Iteration: 1466 Loss: 1.555 Validation Loss: 1.285 Accuracy: 0.203 Validation Accuracy: 0.136:   4%|▍         | 1465/37094 [02:30<57:36, 10.31it/s]

Epoch: 0 Iteration: 1466 Loss: 1.555 Validation Loss: 1.285 Accuracy: 0.203 Validation Accuracy: 0.136:   4%|▍         | 1467/37094 [02:30<57:38, 10.30it/s]

Epoch: 0 Iteration: 1467 Loss: 1.522 Validation Loss: 1.285 Accuracy: 0.197 Validation Accuracy: 0.136:   4%|▍         | 1467/37094 [02:30<57:38, 10.30it/s]

Epoch: 0 Iteration: 1468 Loss: 1.529 Validation Loss: 1.285 Accuracy: 0.200 Validation Accuracy: 0.136:   4%|▍         | 1467/37094 [02:30<57:38, 10.30it/s]

Epoch: 0 Iteration: 1468 Loss: 1.529 Validation Loss: 1.285 Accuracy: 0.200 Validation Accuracy: 0.136:   4%|▍         | 1469/37094 [02:30<57:37, 10.30it/s]

Epoch: 0 Iteration: 1469 Loss: 1.568 Validation Loss: 1.285 Accuracy: 0.203 Validation Accuracy: 0.136:   4%|▍         | 1469/37094 [02:30<57:37, 10.30it/s]

Epoch: 0 Iteration: 1470 Loss: 1.560 Validation Loss: 1.285 Accuracy: 0.209 Validation Accuracy: 0.136:   4%|▍         | 1469/37094 [02:30<57:37, 10.30it/s]

Epoch: 0 Iteration: 1470 Loss: 1.560 Validation Loss: 1.285 Accuracy: 0.209 Validation Accuracy: 0.136:   4%|▍         | 1471/37094 [02:30<57:36, 10.31it/s]

Epoch: 0 Iteration: 1471 Loss: 1.542 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1471/37094 [02:30<57:36, 10.31it/s]

Epoch: 0 Iteration: 1472 Loss: 1.553 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1471/37094 [02:31<57:36, 10.31it/s]

Epoch: 0 Iteration: 1472 Loss: 1.553 Validation Loss: 1.285 Accuracy: 0.206 Validation Accuracy: 0.136:   4%|▍         | 1473/37094 [02:31<57:36, 10.31it/s]

Epoch: 0 Iteration: 1473 Loss: 1.526 Validation Loss: 1.285 Accuracy: 0.188 Validation Accuracy: 0.136:   4%|▍         | 1473/37094 [02:31<57:36, 10.31it/s]

Epoch: 0 Iteration: 1474 Loss: 1.465 Validation Loss: 1.285 Accuracy: 0.197 Validation Accuracy: 0.136:   4%|▍         | 1473/37094 [02:31<57:36, 10.31it/s]

Epoch: 0 Iteration: 1474 Loss: 1.465 Validation Loss: 1.285 Accuracy: 0.197 Validation Accuracy: 0.136:   4%|▍         | 1475/37094 [02:31<57:35, 10.31it/s]

Epoch: 0 Iteration: 1475 Loss: 1.516 Validation Loss: 1.285 Accuracy: 0.181 Validation Accuracy: 0.136:   4%|▍         | 1475/37094 [02:31<57:35, 10.31it/s]

Epoch: 0 Iteration: 1476 Loss: 1.470 Validation Loss: 1.285 Accuracy: 0.178 Validation Accuracy: 0.136:   4%|▍         | 1475/37094 [02:31<57:35, 10.31it/s]

Epoch: 0 Iteration: 1476 Loss: 1.470 Validation Loss: 1.285 Accuracy: 0.178 Validation Accuracy: 0.136:   4%|▍         | 1477/37094 [02:31<57:34, 10.31it/s]

Epoch: 0 Iteration: 1477 Loss: 1.455 Validation Loss: 1.285 Accuracy: 0.169 Validation Accuracy: 0.136:   4%|▍         | 1477/37094 [02:31<57:34, 10.31it/s]

Epoch: 0 Iteration: 1478 Loss: 1.476 Validation Loss: 1.285 Accuracy: 0.166 Validation Accuracy: 0.136:   4%|▍         | 1477/37094 [02:31<57:34, 10.31it/s]

Epoch: 0 Iteration: 1478 Loss: 1.476 Validation Loss: 1.285 Accuracy: 0.166 Validation Accuracy: 0.136:   4%|▍         | 1479/37094 [02:31<57:34, 10.31it/s]

Epoch: 0 Iteration: 1479 Loss: 1.471 Validation Loss: 1.285 Accuracy: 0.163 Validation Accuracy: 0.136:   4%|▍         | 1479/37094 [02:31<57:34, 10.31it/s]

Epoch: 0 Iteration: 1480 Loss: 1.460 Validation Loss: 1.285 Accuracy: 0.166 Validation Accuracy: 0.136:   4%|▍         | 1479/37094 [02:31<57:34, 10.31it/s]

Epoch: 0 Iteration: 1480 Loss: 1.460 Validation Loss: 1.285 Accuracy: 0.166 Validation Accuracy: 0.136:   4%|▍         | 1481/37094 [02:31<57:33, 10.31it/s]

Epoch: 0 Iteration: 1481 Loss: 1.418 Validation Loss: 1.285 Accuracy: 0.172 Validation Accuracy: 0.136:   4%|▍         | 1481/37094 [02:31<57:33, 10.31it/s]

Epoch: 0 Iteration: 1482 Loss: 1.438 Validation Loss: 1.285 Accuracy: 0.169 Validation Accuracy: 0.136:   4%|▍         | 1481/37094 [02:31<57:33, 10.31it/s]

Epoch: 0 Iteration: 1482 Loss: 1.438 Validation Loss: 1.285 Accuracy: 0.169 Validation Accuracy: 0.136:   4%|▍         | 1483/37094 [02:31<57:32, 10.31it/s]

Epoch: 0 Iteration: 1483 Loss: 1.487 Validation Loss: 1.285 Accuracy: 0.175 Validation Accuracy: 0.136:   4%|▍         | 1483/37094 [02:32<57:32, 10.31it/s]

Epoch: 0 Iteration: 1484 Loss: 1.490 Validation Loss: 1.285 Accuracy: 0.156 Validation Accuracy: 0.136:   4%|▍         | 1483/37094 [02:32<57:32, 10.31it/s]

Epoch: 0 Iteration: 1484 Loss: 1.490 Validation Loss: 1.285 Accuracy: 0.156 Validation Accuracy: 0.136:   4%|▍         | 1485/37094 [02:32<57:34, 10.31it/s]

Epoch: 0 Iteration: 1485 Loss: 1.433 Validation Loss: 1.285 Accuracy: 0.159 Validation Accuracy: 0.136:   4%|▍         | 1485/37094 [02:32<57:34, 10.31it/s]

Epoch: 0 Iteration: 1486 Loss: 1.412 Validation Loss: 1.285 Accuracy: 0.163 Validation Accuracy: 0.136:   4%|▍         | 1485/37094 [02:32<57:34, 10.31it/s]

Epoch: 0 Iteration: 1486 Loss: 1.412 Validation Loss: 1.285 Accuracy: 0.163 Validation Accuracy: 0.136:   4%|▍         | 1487/37094 [02:32<57:35, 10.30it/s]

Epoch: 0 Iteration: 1487 Loss: 1.421 Validation Loss: 1.285 Accuracy: 0.166 Validation Accuracy: 0.136:   4%|▍         | 1487/37094 [02:32<57:35, 10.30it/s]

Epoch: 0 Iteration: 1488 Loss: 1.440 Validation Loss: 1.285 Accuracy: 0.153 Validation Accuracy: 0.136:   4%|▍         | 1487/37094 [02:32<57:35, 10.30it/s]

Epoch: 0 Iteration: 1488 Loss: 1.440 Validation Loss: 1.285 Accuracy: 0.153 Validation Accuracy: 0.136:   4%|▍         | 1489/37094 [02:32<57:34, 10.31it/s]

Epoch: 0 Iteration: 1489 Loss: 1.402 Validation Loss: 1.285 Accuracy: 0.172 Validation Accuracy: 0.136:   4%|▍         | 1489/37094 [02:32<57:34, 10.31it/s]

Epoch: 0 Iteration: 1490 Loss: 1.401 Validation Loss: 1.285 Accuracy: 0.169 Validation Accuracy: 0.136:   4%|▍         | 1489/37094 [02:32<57:34, 10.31it/s]

Epoch: 0 Iteration: 1490 Loss: 1.401 Validation Loss: 1.285 Accuracy: 0.169 Validation Accuracy: 0.136:   4%|▍         | 1491/37094 [02:32<57:35, 10.30it/s]

Epoch: 0 Iteration: 1491 Loss: 1.412 Validation Loss: 1.285 Accuracy: 0.166 Validation Accuracy: 0.136:   4%|▍         | 1491/37094 [02:32<57:35, 10.30it/s]

Epoch: 0 Iteration: 1492 Loss: 1.379 Validation Loss: 1.285 Accuracy: 0.178 Validation Accuracy: 0.136:   4%|▍         | 1491/37094 [02:32<57:35, 10.30it/s]

Epoch: 0 Iteration: 1492 Loss: 1.379 Validation Loss: 1.285 Accuracy: 0.178 Validation Accuracy: 0.136:   4%|▍         | 1493/37094 [02:32<57:33, 10.31it/s]

Epoch: 0 Iteration: 1493 Loss: 1.347 Validation Loss: 1.285 Accuracy: 0.178 Validation Accuracy: 0.136:   4%|▍         | 1493/37094 [02:33<57:33, 10.31it/s]

Epoch: 0 Iteration: 1494 Loss: 1.367 Validation Loss: 1.285 Accuracy: 0.181 Validation Accuracy: 0.136:   4%|▍         | 1493/37094 [02:33<57:33, 10.31it/s]

Epoch: 0 Iteration: 1494 Loss: 1.367 Validation Loss: 1.285 Accuracy: 0.181 Validation Accuracy: 0.136:   4%|▍         | 1495/37094 [02:33<57:34, 10.31it/s]

Epoch: 0 Iteration: 1495 Loss: 1.375 Validation Loss: 1.285 Accuracy: 0.184 Validation Accuracy: 0.136:   4%|▍         | 1495/37094 [02:33<57:34, 10.31it/s]

Epoch: 0 Iteration: 1496 Loss: 1.356 Validation Loss: 1.285 Accuracy: 0.178 Validation Accuracy: 0.136:   4%|▍         | 1495/37094 [02:33<57:34, 10.31it/s]

Epoch: 0 Iteration: 1496 Loss: 1.356 Validation Loss: 1.285 Accuracy: 0.178 Validation Accuracy: 0.136:   4%|▍         | 1497/37094 [02:33<57:35, 10.30it/s]

Epoch: 0 Iteration: 1497 Loss: 1.370 Validation Loss: 1.285 Accuracy: 0.181 Validation Accuracy: 0.136:   4%|▍         | 1497/37094 [02:33<57:35, 10.30it/s]

Epoch: 0 Iteration: 1498 Loss: 1.375 Validation Loss: 1.285 Accuracy: 0.194 Validation Accuracy: 0.136:   4%|▍         | 1497/37094 [02:33<57:35, 10.30it/s]

Epoch: 0 Iteration: 1498 Loss: 1.375 Validation Loss: 1.285 Accuracy: 0.194 Validation Accuracy: 0.136:   4%|▍         | 1499/37094 [02:33<57:34, 10.30it/s]

Epoch: 0 Iteration: 1499 Loss: 1.377 Validation Loss: 1.285 Accuracy: 0.184 Validation Accuracy: 0.136:   4%|▍         | 1499/37094 [02:33<57:34, 10.30it/s]

Epoch: 0 Iteration: 1500 Loss: 1.372 Validation Loss: 1.259 Accuracy: 0.175 Validation Accuracy: 0.147:   4%|▍         | 1499/37094 [02:34<57:34, 10.30it/s]

Epoch: 0 Iteration: 1500 Loss: 1.372 Validation Loss: 1.259 Accuracy: 0.175 Validation Accuracy: 0.147:   4%|▍         | 1501/37094 [02:34<1:45:59,  5.60it/s]

Epoch: 0 Iteration: 1501 Loss: 1.363 Validation Loss: 1.259 Accuracy: 0.172 Validation Accuracy: 0.147:   4%|▍         | 1501/37094 [02:34<1:45:59,  5.60it/s]

Epoch: 0 Iteration: 1502 Loss: 1.375 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1501/37094 [02:34<1:45:59,  5.60it/s]

Epoch: 0 Iteration: 1502 Loss: 1.375 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1503/37094 [02:34<1:30:43,  6.54it/s]

Epoch: 0 Iteration: 1503 Loss: 1.401 Validation Loss: 1.259 Accuracy: 0.178 Validation Accuracy: 0.147:   4%|▍         | 1503/37094 [02:34<1:30:43,  6.54it/s]

Epoch: 0 Iteration: 1504 Loss: 1.407 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1503/37094 [02:34<1:30:43,  6.54it/s]

Epoch: 0 Iteration: 1504 Loss: 1.407 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1505/37094 [02:34<1:20:47,  7.34it/s]

Epoch: 0 Iteration: 1505 Loss: 1.427 Validation Loss: 1.259 Accuracy: 0.188 Validation Accuracy: 0.147:   4%|▍         | 1505/37094 [02:34<1:20:47,  7.34it/s]

Epoch: 0 Iteration: 1506 Loss: 1.444 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1505/37094 [02:34<1:20:47,  7.34it/s]

Epoch: 0 Iteration: 1506 Loss: 1.444 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1507/37094 [02:34<1:13:47,  8.04it/s]

Epoch: 0 Iteration: 1507 Loss: 1.480 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1507/37094 [02:34<1:13:47,  8.04it/s]

Epoch: 0 Iteration: 1508 Loss: 1.488 Validation Loss: 1.259 Accuracy: 0.175 Validation Accuracy: 0.147:   4%|▍         | 1507/37094 [02:35<1:13:47,  8.04it/s]

Epoch: 0 Iteration: 1508 Loss: 1.488 Validation Loss: 1.259 Accuracy: 0.175 Validation Accuracy: 0.147:   4%|▍         | 1509/37094 [02:35<1:08:57,  8.60it/s]

Epoch: 0 Iteration: 1509 Loss: 1.514 Validation Loss: 1.259 Accuracy: 0.169 Validation Accuracy: 0.147:   4%|▍         | 1509/37094 [02:35<1:08:57,  8.60it/s]

Epoch: 0 Iteration: 1510 Loss: 1.546 Validation Loss: 1.259 Accuracy: 0.172 Validation Accuracy: 0.147:   4%|▍         | 1509/37094 [02:35<1:08:57,  8.60it/s]

Epoch: 0 Iteration: 1510 Loss: 1.546 Validation Loss: 1.259 Accuracy: 0.172 Validation Accuracy: 0.147:   4%|▍         | 1511/37094 [02:35<1:05:32,  9.05it/s]

Epoch: 0 Iteration: 1511 Loss: 1.602 Validation Loss: 1.259 Accuracy: 0.172 Validation Accuracy: 0.147:   4%|▍         | 1511/37094 [02:35<1:05:32,  9.05it/s]

Epoch: 0 Iteration: 1512 Loss: 1.616 Validation Loss: 1.259 Accuracy: 0.156 Validation Accuracy: 0.147:   4%|▍         | 1511/37094 [02:35<1:05:32,  9.05it/s]

Epoch: 0 Iteration: 1512 Loss: 1.616 Validation Loss: 1.259 Accuracy: 0.156 Validation Accuracy: 0.147:   4%|▍         | 1513/37094 [02:35<1:03:08,  9.39it/s]

Epoch: 0 Iteration: 1513 Loss: 1.625 Validation Loss: 1.259 Accuracy: 0.153 Validation Accuracy: 0.147:   4%|▍         | 1513/37094 [02:35<1:03:08,  9.39it/s]

Epoch: 0 Iteration: 1514 Loss: 1.629 Validation Loss: 1.259 Accuracy: 0.153 Validation Accuracy: 0.147:   4%|▍         | 1513/37094 [02:35<1:03:08,  9.39it/s]

Epoch: 0 Iteration: 1514 Loss: 1.629 Validation Loss: 1.259 Accuracy: 0.153 Validation Accuracy: 0.147:   4%|▍         | 1515/37094 [02:35<1:01:28,  9.65it/s]

Epoch: 0 Iteration: 1515 Loss: 1.626 Validation Loss: 1.259 Accuracy: 0.159 Validation Accuracy: 0.147:   4%|▍         | 1515/37094 [02:35<1:01:28,  9.65it/s]

Epoch: 0 Iteration: 1516 Loss: 1.683 Validation Loss: 1.259 Accuracy: 0.172 Validation Accuracy: 0.147:   4%|▍         | 1515/37094 [02:35<1:01:28,  9.65it/s]

Epoch: 0 Iteration: 1516 Loss: 1.683 Validation Loss: 1.259 Accuracy: 0.172 Validation Accuracy: 0.147:   4%|▍         | 1517/37094 [02:35<1:00:21,  9.82it/s]

Epoch: 0 Iteration: 1517 Loss: 1.663 Validation Loss: 1.259 Accuracy: 0.163 Validation Accuracy: 0.147:   4%|▍         | 1517/37094 [02:35<1:00:21,  9.82it/s]

Epoch: 0 Iteration: 1518 Loss: 1.628 Validation Loss: 1.259 Accuracy: 0.159 Validation Accuracy: 0.147:   4%|▍         | 1517/37094 [02:36<1:00:21,  9.82it/s]

Epoch: 0 Iteration: 1518 Loss: 1.628 Validation Loss: 1.259 Accuracy: 0.159 Validation Accuracy: 0.147:   4%|▍         | 1519/37094 [02:36<59:32,  9.96it/s]  

Epoch: 0 Iteration: 1519 Loss: 1.610 Validation Loss: 1.259 Accuracy: 0.163 Validation Accuracy: 0.147:   4%|▍         | 1519/37094 [02:36<59:32,  9.96it/s]

Epoch: 0 Iteration: 1520 Loss: 1.580 Validation Loss: 1.259 Accuracy: 0.184 Validation Accuracy: 0.147:   4%|▍         | 1519/37094 [02:36<59:32,  9.96it/s]

Epoch: 0 Iteration: 1520 Loss: 1.580 Validation Loss: 1.259 Accuracy: 0.184 Validation Accuracy: 0.147:   4%|▍         | 1521/37094 [02:36<58:56, 10.06it/s]

Epoch: 0 Iteration: 1521 Loss: 1.631 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1521/37094 [02:36<58:56, 10.06it/s]

Epoch: 0 Iteration: 1522 Loss: 1.622 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1521/37094 [02:36<58:56, 10.06it/s]

Epoch: 0 Iteration: 1522 Loss: 1.622 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1523/37094 [02:36<58:30, 10.13it/s]

Epoch: 0 Iteration: 1523 Loss: 1.580 Validation Loss: 1.259 Accuracy: 0.206 Validation Accuracy: 0.147:   4%|▍         | 1523/37094 [02:36<58:30, 10.13it/s]

Epoch: 0 Iteration: 1524 Loss: 1.594 Validation Loss: 1.259 Accuracy: 0.206 Validation Accuracy: 0.147:   4%|▍         | 1523/37094 [02:36<58:30, 10.13it/s]

Epoch: 0 Iteration: 1524 Loss: 1.594 Validation Loss: 1.259 Accuracy: 0.206 Validation Accuracy: 0.147:   4%|▍         | 1525/37094 [02:36<58:14, 10.18it/s]

Epoch: 0 Iteration: 1525 Loss: 1.606 Validation Loss: 1.259 Accuracy: 0.197 Validation Accuracy: 0.147:   4%|▍         | 1525/37094 [02:36<58:14, 10.18it/s]

Epoch: 0 Iteration: 1526 Loss: 1.599 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1525/37094 [02:36<58:14, 10.18it/s]

Epoch: 0 Iteration: 1526 Loss: 1.599 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1527/37094 [02:36<58:01, 10.22it/s]

Epoch: 0 Iteration: 1527 Loss: 1.588 Validation Loss: 1.259 Accuracy: 0.197 Validation Accuracy: 0.147:   4%|▍         | 1527/37094 [02:36<58:01, 10.22it/s]

Epoch: 0 Iteration: 1528 Loss: 1.551 Validation Loss: 1.259 Accuracy: 0.209 Validation Accuracy: 0.147:   4%|▍         | 1527/37094 [02:36<58:01, 10.22it/s]

Epoch: 0 Iteration: 1528 Loss: 1.551 Validation Loss: 1.259 Accuracy: 0.209 Validation Accuracy: 0.147:   4%|▍         | 1529/37094 [02:36<57:51, 10.24it/s]

Epoch: 0 Iteration: 1529 Loss: 1.560 Validation Loss: 1.259 Accuracy: 0.203 Validation Accuracy: 0.147:   4%|▍         | 1529/37094 [02:37<57:51, 10.24it/s]

Epoch: 0 Iteration: 1530 Loss: 1.535 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1529/37094 [02:37<57:51, 10.24it/s]

Epoch: 0 Iteration: 1530 Loss: 1.535 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1531/37094 [02:37<57:44, 10.27it/s]

Epoch: 0 Iteration: 1531 Loss: 1.454 Validation Loss: 1.259 Accuracy: 0.172 Validation Accuracy: 0.147:   4%|▍         | 1531/37094 [02:37<57:44, 10.27it/s]

Epoch: 0 Iteration: 1532 Loss: 1.458 Validation Loss: 1.259 Accuracy: 0.166 Validation Accuracy: 0.147:   4%|▍         | 1531/37094 [02:37<57:44, 10.27it/s]

Epoch: 0 Iteration: 1532 Loss: 1.458 Validation Loss: 1.259 Accuracy: 0.166 Validation Accuracy: 0.147:   4%|▍         | 1533/37094 [02:37<57:40, 10.28it/s]

Epoch: 0 Iteration: 1533 Loss: 1.470 Validation Loss: 1.259 Accuracy: 0.153 Validation Accuracy: 0.147:   4%|▍         | 1533/37094 [02:37<57:40, 10.28it/s]

Epoch: 0 Iteration: 1534 Loss: 1.447 Validation Loss: 1.259 Accuracy: 0.147 Validation Accuracy: 0.147:   4%|▍         | 1533/37094 [02:37<57:40, 10.28it/s]

Epoch: 0 Iteration: 1534 Loss: 1.447 Validation Loss: 1.259 Accuracy: 0.147 Validation Accuracy: 0.147:   4%|▍         | 1535/37094 [02:37<57:37, 10.28it/s]

Epoch: 0 Iteration: 1535 Loss: 1.431 Validation Loss: 1.259 Accuracy: 0.159 Validation Accuracy: 0.147:   4%|▍         | 1535/37094 [02:37<57:37, 10.28it/s]

Epoch: 0 Iteration: 1536 Loss: 1.361 Validation Loss: 1.259 Accuracy: 0.175 Validation Accuracy: 0.147:   4%|▍         | 1535/37094 [02:37<57:37, 10.28it/s]

Epoch: 0 Iteration: 1536 Loss: 1.361 Validation Loss: 1.259 Accuracy: 0.175 Validation Accuracy: 0.147:   4%|▍         | 1537/37094 [02:37<57:36, 10.29it/s]

Epoch: 0 Iteration: 1537 Loss: 1.364 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1537/37094 [02:37<57:36, 10.29it/s]

Epoch: 0 Iteration: 1538 Loss: 1.369 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1537/37094 [02:37<57:36, 10.29it/s]

Epoch: 0 Iteration: 1538 Loss: 1.369 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1539/37094 [02:37<57:37, 10.28it/s]

Epoch: 0 Iteration: 1539 Loss: 1.422 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1539/37094 [02:38<57:37, 10.28it/s]

Epoch: 0 Iteration: 1540 Loss: 1.441 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1539/37094 [02:38<57:37, 10.28it/s]

Epoch: 0 Iteration: 1540 Loss: 1.441 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1541/37094 [02:38<57:37, 10.28it/s]

Epoch: 0 Iteration: 1541 Loss: 1.379 Validation Loss: 1.259 Accuracy: 0.203 Validation Accuracy: 0.147:   4%|▍         | 1541/37094 [02:38<57:37, 10.28it/s]

Epoch: 0 Iteration: 1542 Loss: 1.347 Validation Loss: 1.259 Accuracy: 0.209 Validation Accuracy: 0.147:   4%|▍         | 1541/37094 [02:38<57:37, 10.28it/s]

Epoch: 0 Iteration: 1542 Loss: 1.347 Validation Loss: 1.259 Accuracy: 0.209 Validation Accuracy: 0.147:   4%|▍         | 1543/37094 [02:38<57:35, 10.29it/s]

Epoch: 0 Iteration: 1543 Loss: 1.381 Validation Loss: 1.259 Accuracy: 0.212 Validation Accuracy: 0.147:   4%|▍         | 1543/37094 [02:38<57:35, 10.29it/s]

Epoch: 0 Iteration: 1544 Loss: 1.391 Validation Loss: 1.259 Accuracy: 0.212 Validation Accuracy: 0.147:   4%|▍         | 1543/37094 [02:38<57:35, 10.29it/s]

Epoch: 0 Iteration: 1544 Loss: 1.391 Validation Loss: 1.259 Accuracy: 0.212 Validation Accuracy: 0.147:   4%|▍         | 1545/37094 [02:38<57:32, 10.30it/s]

Epoch: 0 Iteration: 1545 Loss: 1.370 Validation Loss: 1.259 Accuracy: 0.206 Validation Accuracy: 0.147:   4%|▍         | 1545/37094 [02:38<57:32, 10.30it/s]

Epoch: 0 Iteration: 1546 Loss: 1.384 Validation Loss: 1.259 Accuracy: 0.200 Validation Accuracy: 0.147:   4%|▍         | 1545/37094 [02:38<57:32, 10.30it/s]

Epoch: 0 Iteration: 1546 Loss: 1.384 Validation Loss: 1.259 Accuracy: 0.200 Validation Accuracy: 0.147:   4%|▍         | 1547/37094 [02:38<57:31, 10.30it/s]

Epoch: 0 Iteration: 1547 Loss: 1.336 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1547/37094 [02:38<57:31, 10.30it/s]

Epoch: 0 Iteration: 1548 Loss: 1.355 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1547/37094 [02:38<57:31, 10.30it/s]

Epoch: 0 Iteration: 1548 Loss: 1.355 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1549/37094 [02:38<57:30, 10.30it/s]

Epoch: 0 Iteration: 1549 Loss: 1.318 Validation Loss: 1.259 Accuracy: 0.194 Validation Accuracy: 0.147:   4%|▍         | 1549/37094 [02:39<57:30, 10.30it/s]

Epoch: 0 Iteration: 1550 Loss: 1.308 Validation Loss: 1.259 Accuracy: 0.200 Validation Accuracy: 0.147:   4%|▍         | 1549/37094 [02:39<57:30, 10.30it/s]

Epoch: 0 Iteration: 1550 Loss: 1.308 Validation Loss: 1.259 Accuracy: 0.200 Validation Accuracy: 0.147:   4%|▍         | 1551/37094 [02:39<57:30, 10.30it/s]

Epoch: 0 Iteration: 1551 Loss: 1.325 Validation Loss: 1.259 Accuracy: 0.197 Validation Accuracy: 0.147:   4%|▍         | 1551/37094 [02:39<57:30, 10.30it/s]

Epoch: 0 Iteration: 1552 Loss: 1.340 Validation Loss: 1.259 Accuracy: 0.194 Validation Accuracy: 0.147:   4%|▍         | 1551/37094 [02:39<57:30, 10.30it/s]

Epoch: 0 Iteration: 1552 Loss: 1.340 Validation Loss: 1.259 Accuracy: 0.194 Validation Accuracy: 0.147:   4%|▍         | 1553/37094 [02:39<57:28, 10.31it/s]

Epoch: 0 Iteration: 1553 Loss: 1.350 Validation Loss: 1.259 Accuracy: 0.197 Validation Accuracy: 0.147:   4%|▍         | 1553/37094 [02:39<57:28, 10.31it/s]

Epoch: 0 Iteration: 1554 Loss: 1.388 Validation Loss: 1.259 Accuracy: 0.209 Validation Accuracy: 0.147:   4%|▍         | 1553/37094 [02:39<57:28, 10.31it/s]

Epoch: 0 Iteration: 1554 Loss: 1.388 Validation Loss: 1.259 Accuracy: 0.209 Validation Accuracy: 0.147:   4%|▍         | 1555/37094 [02:39<57:27, 10.31it/s]

Epoch: 0 Iteration: 1555 Loss: 1.393 Validation Loss: 1.259 Accuracy: 0.222 Validation Accuracy: 0.147:   4%|▍         | 1555/37094 [02:39<57:27, 10.31it/s]

Epoch: 0 Iteration: 1556 Loss: 1.414 Validation Loss: 1.259 Accuracy: 0.225 Validation Accuracy: 0.147:   4%|▍         | 1555/37094 [02:39<57:27, 10.31it/s]

Epoch: 0 Iteration: 1556 Loss: 1.414 Validation Loss: 1.259 Accuracy: 0.225 Validation Accuracy: 0.147:   4%|▍         | 1557/37094 [02:39<57:28, 10.31it/s]

Epoch: 0 Iteration: 1557 Loss: 1.427 Validation Loss: 1.259 Accuracy: 0.231 Validation Accuracy: 0.147:   4%|▍         | 1557/37094 [02:39<57:28, 10.31it/s]

Epoch: 0 Iteration: 1558 Loss: 1.432 Validation Loss: 1.259 Accuracy: 0.244 Validation Accuracy: 0.147:   4%|▍         | 1557/37094 [02:39<57:28, 10.31it/s]

Epoch: 0 Iteration: 1558 Loss: 1.432 Validation Loss: 1.259 Accuracy: 0.244 Validation Accuracy: 0.147:   4%|▍         | 1559/37094 [02:39<57:29, 10.30it/s]

Epoch: 0 Iteration: 1559 Loss: 1.378 Validation Loss: 1.259 Accuracy: 0.250 Validation Accuracy: 0.147:   4%|▍         | 1559/37094 [02:39<57:29, 10.30it/s]

Epoch: 0 Iteration: 1560 Loss: 1.399 Validation Loss: 1.259 Accuracy: 0.244 Validation Accuracy: 0.147:   4%|▍         | 1559/37094 [02:40<57:29, 10.30it/s]

Epoch: 0 Iteration: 1560 Loss: 1.399 Validation Loss: 1.259 Accuracy: 0.244 Validation Accuracy: 0.147:   4%|▍         | 1561/37094 [02:40<57:28, 10.30it/s]

Epoch: 0 Iteration: 1561 Loss: 1.433 Validation Loss: 1.259 Accuracy: 0.244 Validation Accuracy: 0.147:   4%|▍         | 1561/37094 [02:40<57:28, 10.30it/s]

Epoch: 0 Iteration: 1562 Loss: 1.447 Validation Loss: 1.259 Accuracy: 0.263 Validation Accuracy: 0.147:   4%|▍         | 1561/37094 [02:40<57:28, 10.30it/s]

Epoch: 0 Iteration: 1562 Loss: 1.447 Validation Loss: 1.259 Accuracy: 0.263 Validation Accuracy: 0.147:   4%|▍         | 1563/37094 [02:40<57:27, 10.31it/s]

Epoch: 0 Iteration: 1563 Loss: 1.426 Validation Loss: 1.259 Accuracy: 0.263 Validation Accuracy: 0.147:   4%|▍         | 1563/37094 [02:40<57:27, 10.31it/s]

Epoch: 0 Iteration: 1564 Loss: 1.410 Validation Loss: 1.259 Accuracy: 0.253 Validation Accuracy: 0.147:   4%|▍         | 1563/37094 [02:40<57:27, 10.31it/s]

Epoch: 0 Iteration: 1564 Loss: 1.410 Validation Loss: 1.259 Accuracy: 0.253 Validation Accuracy: 0.147:   4%|▍         | 1565/37094 [02:40<57:26, 10.31it/s]

Epoch: 0 Iteration: 1565 Loss: 1.423 Validation Loss: 1.259 Accuracy: 0.231 Validation Accuracy: 0.147:   4%|▍         | 1565/37094 [02:40<57:26, 10.31it/s]

Epoch: 0 Iteration: 1566 Loss: 1.414 Validation Loss: 1.259 Accuracy: 0.228 Validation Accuracy: 0.147:   4%|▍         | 1565/37094 [02:40<57:26, 10.31it/s]

Epoch: 0 Iteration: 1566 Loss: 1.414 Validation Loss: 1.259 Accuracy: 0.228 Validation Accuracy: 0.147:   4%|▍         | 1567/37094 [02:40<57:29, 10.30it/s]

Epoch: 0 Iteration: 1567 Loss: 1.461 Validation Loss: 1.259 Accuracy: 0.225 Validation Accuracy: 0.147:   4%|▍         | 1567/37094 [02:40<57:29, 10.30it/s]

Epoch: 0 Iteration: 1568 Loss: 1.432 Validation Loss: 1.259 Accuracy: 0.234 Validation Accuracy: 0.147:   4%|▍         | 1567/37094 [02:40<57:29, 10.30it/s]

Epoch: 0 Iteration: 1568 Loss: 1.432 Validation Loss: 1.259 Accuracy: 0.234 Validation Accuracy: 0.147:   4%|▍         | 1569/37094 [02:40<57:27, 10.30it/s]

Epoch: 0 Iteration: 1569 Loss: 1.465 Validation Loss: 1.259 Accuracy: 0.231 Validation Accuracy: 0.147:   4%|▍         | 1569/37094 [02:40<57:27, 10.30it/s]

Epoch: 0 Iteration: 1570 Loss: 1.457 Validation Loss: 1.259 Accuracy: 0.241 Validation Accuracy: 0.147:   4%|▍         | 1569/37094 [02:41<57:27, 10.30it/s]

Epoch: 0 Iteration: 1570 Loss: 1.457 Validation Loss: 1.259 Accuracy: 0.241 Validation Accuracy: 0.147:   4%|▍         | 1571/37094 [02:41<57:26, 10.31it/s]

Epoch: 0 Iteration: 1571 Loss: 1.463 Validation Loss: 1.259 Accuracy: 0.231 Validation Accuracy: 0.147:   4%|▍         | 1571/37094 [02:41<57:26, 10.31it/s]

Epoch: 0 Iteration: 1572 Loss: 1.451 Validation Loss: 1.259 Accuracy: 0.209 Validation Accuracy: 0.147:   4%|▍         | 1571/37094 [02:41<57:26, 10.31it/s]

Epoch: 0 Iteration: 1572 Loss: 1.451 Validation Loss: 1.259 Accuracy: 0.209 Validation Accuracy: 0.147:   4%|▍         | 1573/37094 [02:41<57:27, 10.30it/s]

Epoch: 0 Iteration: 1573 Loss: 1.414 Validation Loss: 1.259 Accuracy: 0.225 Validation Accuracy: 0.147:   4%|▍         | 1573/37094 [02:41<57:27, 10.30it/s]

Epoch: 0 Iteration: 1574 Loss: 1.385 Validation Loss: 1.259 Accuracy: 0.222 Validation Accuracy: 0.147:   4%|▍         | 1573/37094 [02:41<57:27, 10.30it/s]

Epoch: 0 Iteration: 1574 Loss: 1.385 Validation Loss: 1.259 Accuracy: 0.222 Validation Accuracy: 0.147:   4%|▍         | 1575/37094 [02:41<57:26, 10.30it/s]

Epoch: 0 Iteration: 1575 Loss: 1.367 Validation Loss: 1.259 Accuracy: 0.228 Validation Accuracy: 0.147:   4%|▍         | 1575/37094 [02:41<57:26, 10.30it/s]

Epoch: 0 Iteration: 1576 Loss: 1.385 Validation Loss: 1.259 Accuracy: 0.212 Validation Accuracy: 0.147:   4%|▍         | 1575/37094 [02:41<57:26, 10.30it/s]

Epoch: 0 Iteration: 1576 Loss: 1.385 Validation Loss: 1.259 Accuracy: 0.212 Validation Accuracy: 0.147:   4%|▍         | 1577/37094 [02:41<57:27, 10.30it/s]

Epoch: 0 Iteration: 1577 Loss: 1.369 Validation Loss: 1.259 Accuracy: 0.203 Validation Accuracy: 0.147:   4%|▍         | 1577/37094 [02:41<57:27, 10.30it/s]

Epoch: 0 Iteration: 1578 Loss: 1.329 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1577/37094 [02:41<57:27, 10.30it/s]

Epoch: 0 Iteration: 1578 Loss: 1.329 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1579/37094 [02:41<57:26, 10.30it/s]

Epoch: 0 Iteration: 1579 Loss: 1.389 Validation Loss: 1.259 Accuracy: 0.184 Validation Accuracy: 0.147:   4%|▍         | 1579/37094 [02:41<57:26, 10.30it/s]

Epoch: 0 Iteration: 1580 Loss: 1.352 Validation Loss: 1.259 Accuracy: 0.178 Validation Accuracy: 0.147:   4%|▍         | 1579/37094 [02:42<57:26, 10.30it/s]

Epoch: 0 Iteration: 1580 Loss: 1.352 Validation Loss: 1.259 Accuracy: 0.178 Validation Accuracy: 0.147:   4%|▍         | 1581/37094 [02:42<57:27, 10.30it/s]

Epoch: 0 Iteration: 1581 Loss: 1.341 Validation Loss: 1.259 Accuracy: 0.178 Validation Accuracy: 0.147:   4%|▍         | 1581/37094 [02:42<57:27, 10.30it/s]

Epoch: 0 Iteration: 1582 Loss: 1.338 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1581/37094 [02:42<57:27, 10.30it/s]

Epoch: 0 Iteration: 1582 Loss: 1.338 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1583/37094 [02:42<57:26, 10.30it/s]

Epoch: 0 Iteration: 1583 Loss: 1.361 Validation Loss: 1.259 Accuracy: 0.169 Validation Accuracy: 0.147:   4%|▍         | 1583/37094 [02:42<57:26, 10.30it/s]

Epoch: 0 Iteration: 1584 Loss: 1.335 Validation Loss: 1.259 Accuracy: 0.172 Validation Accuracy: 0.147:   4%|▍         | 1583/37094 [02:42<57:26, 10.30it/s]

Epoch: 0 Iteration: 1584 Loss: 1.335 Validation Loss: 1.259 Accuracy: 0.172 Validation Accuracy: 0.147:   4%|▍         | 1585/37094 [02:42<57:25, 10.31it/s]

Epoch: 0 Iteration: 1585 Loss: 1.317 Validation Loss: 1.259 Accuracy: 0.166 Validation Accuracy: 0.147:   4%|▍         | 1585/37094 [02:42<57:25, 10.31it/s]

Epoch: 0 Iteration: 1586 Loss: 1.351 Validation Loss: 1.259 Accuracy: 0.169 Validation Accuracy: 0.147:   4%|▍         | 1585/37094 [02:42<57:25, 10.31it/s]

Epoch: 0 Iteration: 1586 Loss: 1.351 Validation Loss: 1.259 Accuracy: 0.169 Validation Accuracy: 0.147:   4%|▍         | 1587/37094 [02:42<57:25, 10.31it/s]

Epoch: 0 Iteration: 1587 Loss: 1.319 Validation Loss: 1.259 Accuracy: 0.178 Validation Accuracy: 0.147:   4%|▍         | 1587/37094 [02:42<57:25, 10.31it/s]

Epoch: 0 Iteration: 1588 Loss: 1.292 Validation Loss: 1.259 Accuracy: 0.188 Validation Accuracy: 0.147:   4%|▍         | 1587/37094 [02:42<57:25, 10.31it/s]

Epoch: 0 Iteration: 1588 Loss: 1.292 Validation Loss: 1.259 Accuracy: 0.188 Validation Accuracy: 0.147:   4%|▍         | 1589/37094 [02:42<57:24, 10.31it/s]

Epoch: 0 Iteration: 1589 Loss: 1.301 Validation Loss: 1.259 Accuracy: 0.191 Validation Accuracy: 0.147:   4%|▍         | 1589/37094 [02:42<57:24, 10.31it/s]

Epoch: 0 Iteration: 1590 Loss: 1.314 Validation Loss: 1.259 Accuracy: 0.178 Validation Accuracy: 0.147:   4%|▍         | 1589/37094 [02:42<57:24, 10.31it/s]

Epoch: 0 Iteration: 1590 Loss: 1.314 Validation Loss: 1.259 Accuracy: 0.178 Validation Accuracy: 0.147:   4%|▍         | 1591/37094 [02:42<57:23, 10.31it/s]

Epoch: 0 Iteration: 1591 Loss: 1.353 Validation Loss: 1.259 Accuracy: 0.172 Validation Accuracy: 0.147:   4%|▍         | 1591/37094 [02:43<57:23, 10.31it/s]

Epoch: 0 Iteration: 1592 Loss: 1.324 Validation Loss: 1.259 Accuracy: 0.178 Validation Accuracy: 0.147:   4%|▍         | 1591/37094 [02:43<57:23, 10.31it/s]

Epoch: 0 Iteration: 1592 Loss: 1.324 Validation Loss: 1.259 Accuracy: 0.178 Validation Accuracy: 0.147:   4%|▍         | 1593/37094 [02:43<57:23, 10.31it/s]

Epoch: 0 Iteration: 1593 Loss: 1.333 Validation Loss: 1.259 Accuracy: 0.181 Validation Accuracy: 0.147:   4%|▍         | 1593/37094 [02:43<57:23, 10.31it/s]

Epoch: 0 Iteration: 1594 Loss: 1.363 Validation Loss: 1.259 Accuracy: 0.188 Validation Accuracy: 0.147:   4%|▍         | 1593/37094 [02:43<57:23, 10.31it/s]

Epoch: 0 Iteration: 1594 Loss: 1.363 Validation Loss: 1.259 Accuracy: 0.188 Validation Accuracy: 0.147:   4%|▍         | 1595/37094 [02:43<57:22, 10.31it/s]

Epoch: 0 Iteration: 1595 Loss: 1.352 Validation Loss: 1.259 Accuracy: 0.200 Validation Accuracy: 0.147:   4%|▍         | 1595/37094 [02:43<57:22, 10.31it/s]

Epoch: 0 Iteration: 1596 Loss: 1.352 Validation Loss: 1.259 Accuracy: 0.200 Validation Accuracy: 0.147:   4%|▍         | 1595/37094 [02:43<57:22, 10.31it/s]

Epoch: 0 Iteration: 1596 Loss: 1.352 Validation Loss: 1.259 Accuracy: 0.200 Validation Accuracy: 0.147:   4%|▍         | 1597/37094 [02:43<57:23, 10.31it/s]

Epoch: 0 Iteration: 1597 Loss: 1.361 Validation Loss: 1.259 Accuracy: 0.194 Validation Accuracy: 0.147:   4%|▍         | 1597/37094 [02:43<57:23, 10.31it/s]

Epoch: 0 Iteration: 1598 Loss: 1.410 Validation Loss: 1.259 Accuracy: 0.184 Validation Accuracy: 0.147:   4%|▍         | 1597/37094 [02:43<57:23, 10.31it/s]

Epoch: 0 Iteration: 1598 Loss: 1.410 Validation Loss: 1.259 Accuracy: 0.184 Validation Accuracy: 0.147:   4%|▍         | 1599/37094 [02:43<57:22, 10.31it/s]

Epoch: 0 Iteration: 1599 Loss: 1.402 Validation Loss: 1.259 Accuracy: 0.178 Validation Accuracy: 0.147:   4%|▍         | 1599/37094 [02:43<57:22, 10.31it/s]

Epoch: 0 Iteration: 1600 Loss: 1.450 Validation Loss: 1.247 Accuracy: 0.191 Validation Accuracy: 0.156:   4%|▍         | 1599/37094 [02:44<57:22, 10.31it/s]

Epoch: 0 Iteration: 1600 Loss: 1.450 Validation Loss: 1.247 Accuracy: 0.191 Validation Accuracy: 0.156:   4%|▍         | 1601/37094 [02:44<1:46:15,  5.57it/s]

Epoch: 0 Iteration: 1601 Loss: 1.447 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   4%|▍         | 1601/37094 [02:44<1:46:15,  5.57it/s]

Epoch: 0 Iteration: 1602 Loss: 1.428 Validation Loss: 1.247 Accuracy: 0.197 Validation Accuracy: 0.156:   4%|▍         | 1601/37094 [02:44<1:46:15,  5.57it/s]

Epoch: 0 Iteration: 1602 Loss: 1.428 Validation Loss: 1.247 Accuracy: 0.197 Validation Accuracy: 0.156:   4%|▍         | 1603/37094 [02:44<1:30:49,  6.51it/s]

Epoch: 0 Iteration: 1603 Loss: 1.378 Validation Loss: 1.247 Accuracy: 0.200 Validation Accuracy: 0.156:   4%|▍         | 1603/37094 [02:44<1:30:49,  6.51it/s]

Epoch: 0 Iteration: 1604 Loss: 1.391 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   4%|▍         | 1603/37094 [02:44<1:30:49,  6.51it/s]

Epoch: 0 Iteration: 1604 Loss: 1.391 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   4%|▍         | 1605/37094 [02:44<1:20:48,  7.32it/s]

Epoch: 0 Iteration: 1605 Loss: 1.435 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   4%|▍         | 1605/37094 [02:44<1:20:48,  7.32it/s]

Epoch: 0 Iteration: 1606 Loss: 1.435 Validation Loss: 1.247 Accuracy: 0.225 Validation Accuracy: 0.156:   4%|▍         | 1605/37094 [02:45<1:20:48,  7.32it/s]

Epoch: 0 Iteration: 1606 Loss: 1.435 Validation Loss: 1.247 Accuracy: 0.225 Validation Accuracy: 0.156:   4%|▍         | 1607/37094 [02:45<1:13:46,  8.02it/s]

Epoch: 0 Iteration: 1607 Loss: 1.405 Validation Loss: 1.247 Accuracy: 0.244 Validation Accuracy: 0.156:   4%|▍         | 1607/37094 [02:45<1:13:46,  8.02it/s]

Epoch: 0 Iteration: 1608 Loss: 1.477 Validation Loss: 1.247 Accuracy: 0.241 Validation Accuracy: 0.156:   4%|▍         | 1607/37094 [02:45<1:13:46,  8.02it/s]

Epoch: 0 Iteration: 1608 Loss: 1.477 Validation Loss: 1.247 Accuracy: 0.241 Validation Accuracy: 0.156:   4%|▍         | 1609/37094 [02:45<1:08:51,  8.59it/s]

Epoch: 0 Iteration: 1609 Loss: 1.480 Validation Loss: 1.247 Accuracy: 0.247 Validation Accuracy: 0.156:   4%|▍         | 1609/37094 [02:45<1:08:51,  8.59it/s]

Epoch: 0 Iteration: 1610 Loss: 1.464 Validation Loss: 1.247 Accuracy: 0.263 Validation Accuracy: 0.156:   4%|▍         | 1609/37094 [02:45<1:08:51,  8.59it/s]

Epoch: 0 Iteration: 1610 Loss: 1.464 Validation Loss: 1.247 Accuracy: 0.263 Validation Accuracy: 0.156:   4%|▍         | 1611/37094 [02:45<1:05:24,  9.04it/s]

Epoch: 0 Iteration: 1611 Loss: 1.410 Validation Loss: 1.247 Accuracy: 0.269 Validation Accuracy: 0.156:   4%|▍         | 1611/37094 [02:45<1:05:24,  9.04it/s]

Epoch: 0 Iteration: 1612 Loss: 1.424 Validation Loss: 1.247 Accuracy: 0.272 Validation Accuracy: 0.156:   4%|▍         | 1611/37094 [02:45<1:05:24,  9.04it/s]

Epoch: 0 Iteration: 1612 Loss: 1.424 Validation Loss: 1.247 Accuracy: 0.272 Validation Accuracy: 0.156:   4%|▍         | 1613/37094 [02:45<1:03:00,  9.39it/s]

Epoch: 0 Iteration: 1613 Loss: 1.412 Validation Loss: 1.247 Accuracy: 0.281 Validation Accuracy: 0.156:   4%|▍         | 1613/37094 [02:45<1:03:00,  9.39it/s]

Epoch: 0 Iteration: 1614 Loss: 1.402 Validation Loss: 1.247 Accuracy: 0.281 Validation Accuracy: 0.156:   4%|▍         | 1613/37094 [02:45<1:03:00,  9.39it/s]

Epoch: 0 Iteration: 1614 Loss: 1.402 Validation Loss: 1.247 Accuracy: 0.281 Validation Accuracy: 0.156:   4%|▍         | 1615/37094 [02:45<1:01:22,  9.64it/s]

Epoch: 0 Iteration: 1615 Loss: 1.441 Validation Loss: 1.247 Accuracy: 0.284 Validation Accuracy: 0.156:   4%|▍         | 1615/37094 [02:45<1:01:22,  9.64it/s]

Epoch: 0 Iteration: 1616 Loss: 1.434 Validation Loss: 1.247 Accuracy: 0.266 Validation Accuracy: 0.156:   4%|▍         | 1615/37094 [02:46<1:01:22,  9.64it/s]

Epoch: 0 Iteration: 1616 Loss: 1.434 Validation Loss: 1.247 Accuracy: 0.266 Validation Accuracy: 0.156:   4%|▍         | 1617/37094 [02:46<1:00:12,  9.82it/s]

Epoch: 0 Iteration: 1617 Loss: 1.433 Validation Loss: 1.247 Accuracy: 0.269 Validation Accuracy: 0.156:   4%|▍         | 1617/37094 [02:46<1:00:12,  9.82it/s]

Epoch: 0 Iteration: 1618 Loss: 1.421 Validation Loss: 1.247 Accuracy: 0.259 Validation Accuracy: 0.156:   4%|▍         | 1617/37094 [02:46<1:00:12,  9.82it/s]

Epoch: 0 Iteration: 1618 Loss: 1.421 Validation Loss: 1.247 Accuracy: 0.259 Validation Accuracy: 0.156:   4%|▍         | 1619/37094 [02:46<59:22,  9.96it/s]  

Epoch: 0 Iteration: 1619 Loss: 1.364 Validation Loss: 1.247 Accuracy: 0.253 Validation Accuracy: 0.156:   4%|▍         | 1619/37094 [02:46<59:22,  9.96it/s]

Epoch: 0 Iteration: 1620 Loss: 1.324 Validation Loss: 1.247 Accuracy: 0.244 Validation Accuracy: 0.156:   4%|▍         | 1619/37094 [02:46<59:22,  9.96it/s]

Epoch: 0 Iteration: 1620 Loss: 1.324 Validation Loss: 1.247 Accuracy: 0.244 Validation Accuracy: 0.156:   4%|▍         | 1621/37094 [02:46<58:46, 10.06it/s]

Epoch: 0 Iteration: 1621 Loss: 1.305 Validation Loss: 1.247 Accuracy: 0.237 Validation Accuracy: 0.156:   4%|▍         | 1621/37094 [02:46<58:46, 10.06it/s]

Epoch: 0 Iteration: 1622 Loss: 1.314 Validation Loss: 1.247 Accuracy: 0.237 Validation Accuracy: 0.156:   4%|▍         | 1621/37094 [02:46<58:46, 10.06it/s]

Epoch: 0 Iteration: 1622 Loss: 1.314 Validation Loss: 1.247 Accuracy: 0.237 Validation Accuracy: 0.156:   4%|▍         | 1623/37094 [02:46<58:21, 10.13it/s]

Epoch: 0 Iteration: 1623 Loss: 1.328 Validation Loss: 1.247 Accuracy: 0.228 Validation Accuracy: 0.156:   4%|▍         | 1623/37094 [02:46<58:21, 10.13it/s]

Epoch: 0 Iteration: 1624 Loss: 1.361 Validation Loss: 1.247 Accuracy: 0.219 Validation Accuracy: 0.156:   4%|▍         | 1623/37094 [02:46<58:21, 10.13it/s]

Epoch: 0 Iteration: 1624 Loss: 1.361 Validation Loss: 1.247 Accuracy: 0.219 Validation Accuracy: 0.156:   4%|▍         | 1625/37094 [02:46<58:04, 10.18it/s]

Epoch: 0 Iteration: 1625 Loss: 1.331 Validation Loss: 1.247 Accuracy: 0.206 Validation Accuracy: 0.156:   4%|▍         | 1625/37094 [02:46<58:04, 10.18it/s]

Epoch: 0 Iteration: 1626 Loss: 1.269 Validation Loss: 1.247 Accuracy: 0.228 Validation Accuracy: 0.156:   4%|▍         | 1625/37094 [02:47<58:04, 10.18it/s]

Epoch: 0 Iteration: 1626 Loss: 1.269 Validation Loss: 1.247 Accuracy: 0.228 Validation Accuracy: 0.156:   4%|▍         | 1627/37094 [02:47<57:51, 10.22it/s]

Epoch: 0 Iteration: 1627 Loss: 1.266 Validation Loss: 1.247 Accuracy: 0.216 Validation Accuracy: 0.156:   4%|▍         | 1627/37094 [02:47<57:51, 10.22it/s]

Epoch: 0 Iteration: 1628 Loss: 1.252 Validation Loss: 1.247 Accuracy: 0.231 Validation Accuracy: 0.156:   4%|▍         | 1627/37094 [02:47<57:51, 10.22it/s]

Epoch: 0 Iteration: 1628 Loss: 1.252 Validation Loss: 1.247 Accuracy: 0.231 Validation Accuracy: 0.156:   4%|▍         | 1629/37094 [02:47<57:42, 10.24it/s]

Epoch: 0 Iteration: 1629 Loss: 1.189 Validation Loss: 1.247 Accuracy: 0.253 Validation Accuracy: 0.156:   4%|▍         | 1629/37094 [02:47<57:42, 10.24it/s]

Epoch: 0 Iteration: 1630 Loss: 1.216 Validation Loss: 1.247 Accuracy: 0.263 Validation Accuracy: 0.156:   4%|▍         | 1629/37094 [02:47<57:42, 10.24it/s]

Epoch: 0 Iteration: 1630 Loss: 1.216 Validation Loss: 1.247 Accuracy: 0.263 Validation Accuracy: 0.156:   4%|▍         | 1631/37094 [02:47<57:35, 10.26it/s]

Epoch: 0 Iteration: 1631 Loss: 1.199 Validation Loss: 1.247 Accuracy: 0.272 Validation Accuracy: 0.156:   4%|▍         | 1631/37094 [02:47<57:35, 10.26it/s]

Epoch: 0 Iteration: 1632 Loss: 1.229 Validation Loss: 1.247 Accuracy: 0.259 Validation Accuracy: 0.156:   4%|▍         | 1631/37094 [02:47<57:35, 10.26it/s]

Epoch: 0 Iteration: 1632 Loss: 1.229 Validation Loss: 1.247 Accuracy: 0.259 Validation Accuracy: 0.156:   4%|▍         | 1633/37094 [02:47<57:30, 10.28it/s]

Epoch: 0 Iteration: 1633 Loss: 1.284 Validation Loss: 1.247 Accuracy: 0.275 Validation Accuracy: 0.156:   4%|▍         | 1633/37094 [02:47<57:30, 10.28it/s]

Epoch: 0 Iteration: 1634 Loss: 1.264 Validation Loss: 1.247 Accuracy: 0.284 Validation Accuracy: 0.156:   4%|▍         | 1633/37094 [02:47<57:30, 10.28it/s]

Epoch: 0 Iteration: 1634 Loss: 1.264 Validation Loss: 1.247 Accuracy: 0.284 Validation Accuracy: 0.156:   4%|▍         | 1635/37094 [02:47<57:26, 10.29it/s]

Epoch: 0 Iteration: 1635 Loss: 1.227 Validation Loss: 1.247 Accuracy: 0.306 Validation Accuracy: 0.156:   4%|▍         | 1635/37094 [02:47<57:26, 10.29it/s]

Epoch: 0 Iteration: 1636 Loss: 1.258 Validation Loss: 1.247 Accuracy: 0.300 Validation Accuracy: 0.156:   4%|▍         | 1635/37094 [02:48<57:26, 10.29it/s]

Epoch: 0 Iteration: 1636 Loss: 1.258 Validation Loss: 1.247 Accuracy: 0.300 Validation Accuracy: 0.156:   4%|▍         | 1637/37094 [02:48<57:23, 10.30it/s]

Epoch: 0 Iteration: 1637 Loss: 1.286 Validation Loss: 1.247 Accuracy: 0.297 Validation Accuracy: 0.156:   4%|▍         | 1637/37094 [02:48<57:23, 10.30it/s]

Epoch: 0 Iteration: 1638 Loss: 1.291 Validation Loss: 1.247 Accuracy: 0.294 Validation Accuracy: 0.156:   4%|▍         | 1637/37094 [02:48<57:23, 10.30it/s]

Epoch: 0 Iteration: 1638 Loss: 1.291 Validation Loss: 1.247 Accuracy: 0.294 Validation Accuracy: 0.156:   4%|▍         | 1639/37094 [02:48<57:24, 10.29it/s]

Epoch: 0 Iteration: 1639 Loss: 1.303 Validation Loss: 1.247 Accuracy: 0.269 Validation Accuracy: 0.156:   4%|▍         | 1639/37094 [02:48<57:24, 10.29it/s]

Epoch: 0 Iteration: 1640 Loss: 1.354 Validation Loss: 1.247 Accuracy: 0.244 Validation Accuracy: 0.156:   4%|▍         | 1639/37094 [02:48<57:24, 10.29it/s]

Epoch: 0 Iteration: 1640 Loss: 1.354 Validation Loss: 1.247 Accuracy: 0.244 Validation Accuracy: 0.156:   4%|▍         | 1641/37094 [02:48<57:22, 10.30it/s]

Epoch: 0 Iteration: 1641 Loss: 1.396 Validation Loss: 1.247 Accuracy: 0.228 Validation Accuracy: 0.156:   4%|▍         | 1641/37094 [02:48<57:22, 10.30it/s]

Epoch: 0 Iteration: 1642 Loss: 1.433 Validation Loss: 1.247 Accuracy: 0.237 Validation Accuracy: 0.156:   4%|▍         | 1641/37094 [02:48<57:22, 10.30it/s]

Epoch: 0 Iteration: 1642 Loss: 1.433 Validation Loss: 1.247 Accuracy: 0.237 Validation Accuracy: 0.156:   4%|▍         | 1643/37094 [02:48<57:21, 10.30it/s]

Epoch: 0 Iteration: 1643 Loss: 1.418 Validation Loss: 1.247 Accuracy: 0.234 Validation Accuracy: 0.156:   4%|▍         | 1643/37094 [02:48<57:21, 10.30it/s]

Epoch: 0 Iteration: 1644 Loss: 1.447 Validation Loss: 1.247 Accuracy: 0.228 Validation Accuracy: 0.156:   4%|▍         | 1643/37094 [02:48<57:21, 10.30it/s]

Epoch: 0 Iteration: 1644 Loss: 1.447 Validation Loss: 1.247 Accuracy: 0.228 Validation Accuracy: 0.156:   4%|▍         | 1645/37094 [02:48<57:21, 10.30it/s]

Epoch: 0 Iteration: 1645 Loss: 1.458 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   4%|▍         | 1645/37094 [02:48<57:21, 10.30it/s]

Epoch: 0 Iteration: 1646 Loss: 1.476 Validation Loss: 1.247 Accuracy: 0.184 Validation Accuracy: 0.156:   4%|▍         | 1645/37094 [02:48<57:21, 10.30it/s]

Epoch: 0 Iteration: 1646 Loss: 1.476 Validation Loss: 1.247 Accuracy: 0.184 Validation Accuracy: 0.156:   4%|▍         | 1647/37094 [02:48<57:22, 10.30it/s]

Epoch: 0 Iteration: 1647 Loss: 1.521 Validation Loss: 1.247 Accuracy: 0.181 Validation Accuracy: 0.156:   4%|▍         | 1647/37094 [02:49<57:22, 10.30it/s]

Epoch: 0 Iteration: 1648 Loss: 1.520 Validation Loss: 1.247 Accuracy: 0.181 Validation Accuracy: 0.156:   4%|▍         | 1647/37094 [02:49<57:22, 10.30it/s]

Epoch: 0 Iteration: 1648 Loss: 1.520 Validation Loss: 1.247 Accuracy: 0.181 Validation Accuracy: 0.156:   4%|▍         | 1649/37094 [02:49<57:22, 10.30it/s]

Epoch: 0 Iteration: 1649 Loss: 1.573 Validation Loss: 1.247 Accuracy: 0.184 Validation Accuracy: 0.156:   4%|▍         | 1649/37094 [02:49<57:22, 10.30it/s]

Epoch: 0 Iteration: 1650 Loss: 1.583 Validation Loss: 1.247 Accuracy: 0.194 Validation Accuracy: 0.156:   4%|▍         | 1649/37094 [02:49<57:22, 10.30it/s]

Epoch: 0 Iteration: 1650 Loss: 1.583 Validation Loss: 1.247 Accuracy: 0.194 Validation Accuracy: 0.156:   4%|▍         | 1651/37094 [02:49<57:20, 10.30it/s]

Epoch: 0 Iteration: 1651 Loss: 1.631 Validation Loss: 1.247 Accuracy: 0.194 Validation Accuracy: 0.156:   4%|▍         | 1651/37094 [02:49<57:20, 10.30it/s]

Epoch: 0 Iteration: 1652 Loss: 1.623 Validation Loss: 1.247 Accuracy: 0.175 Validation Accuracy: 0.156:   4%|▍         | 1651/37094 [02:49<57:20, 10.30it/s]

Epoch: 0 Iteration: 1652 Loss: 1.623 Validation Loss: 1.247 Accuracy: 0.175 Validation Accuracy: 0.156:   4%|▍         | 1653/37094 [02:49<57:18, 10.31it/s]

Epoch: 0 Iteration: 1653 Loss: 1.590 Validation Loss: 1.247 Accuracy: 0.169 Validation Accuracy: 0.156:   4%|▍         | 1653/37094 [02:49<57:18, 10.31it/s]

Epoch: 0 Iteration: 1654 Loss: 1.606 Validation Loss: 1.247 Accuracy: 0.166 Validation Accuracy: 0.156:   4%|▍         | 1653/37094 [02:49<57:18, 10.31it/s]

Epoch: 0 Iteration: 1654 Loss: 1.606 Validation Loss: 1.247 Accuracy: 0.166 Validation Accuracy: 0.156:   4%|▍         | 1655/37094 [02:49<57:18, 10.31it/s]

Epoch: 0 Iteration: 1655 Loss: 1.601 Validation Loss: 1.247 Accuracy: 0.172 Validation Accuracy: 0.156:   4%|▍         | 1655/37094 [02:49<57:18, 10.31it/s]

Epoch: 0 Iteration: 1656 Loss: 1.535 Validation Loss: 1.247 Accuracy: 0.163 Validation Accuracy: 0.156:   4%|▍         | 1655/37094 [02:49<57:18, 10.31it/s]

Epoch: 0 Iteration: 1656 Loss: 1.535 Validation Loss: 1.247 Accuracy: 0.163 Validation Accuracy: 0.156:   4%|▍         | 1657/37094 [02:49<57:18, 10.31it/s]

Epoch: 0 Iteration: 1657 Loss: 1.569 Validation Loss: 1.247 Accuracy: 0.163 Validation Accuracy: 0.156:   4%|▍         | 1657/37094 [02:50<57:18, 10.31it/s]

Epoch: 0 Iteration: 1658 Loss: 1.557 Validation Loss: 1.247 Accuracy: 0.147 Validation Accuracy: 0.156:   4%|▍         | 1657/37094 [02:50<57:18, 10.31it/s]

Epoch: 0 Iteration: 1658 Loss: 1.557 Validation Loss: 1.247 Accuracy: 0.147 Validation Accuracy: 0.156:   4%|▍         | 1659/37094 [02:50<57:17, 10.31it/s]

Epoch: 0 Iteration: 1659 Loss: 1.573 Validation Loss: 1.247 Accuracy: 0.144 Validation Accuracy: 0.156:   4%|▍         | 1659/37094 [02:50<57:17, 10.31it/s]

Epoch: 0 Iteration: 1660 Loss: 1.530 Validation Loss: 1.247 Accuracy: 0.138 Validation Accuracy: 0.156:   4%|▍         | 1659/37094 [02:50<57:17, 10.31it/s]

Epoch: 0 Iteration: 1660 Loss: 1.530 Validation Loss: 1.247 Accuracy: 0.138 Validation Accuracy: 0.156:   4%|▍         | 1661/37094 [02:50<57:16, 10.31it/s]

Epoch: 0 Iteration: 1661 Loss: 1.503 Validation Loss: 1.247 Accuracy: 0.144 Validation Accuracy: 0.156:   4%|▍         | 1661/37094 [02:50<57:16, 10.31it/s]

Epoch: 0 Iteration: 1662 Loss: 1.524 Validation Loss: 1.247 Accuracy: 0.156 Validation Accuracy: 0.156:   4%|▍         | 1661/37094 [02:50<57:16, 10.31it/s]

Epoch: 0 Iteration: 1662 Loss: 1.524 Validation Loss: 1.247 Accuracy: 0.156 Validation Accuracy: 0.156:   4%|▍         | 1663/37094 [02:50<57:16, 10.31it/s]

Epoch: 0 Iteration: 1663 Loss: 1.568 Validation Loss: 1.247 Accuracy: 0.144 Validation Accuracy: 0.156:   4%|▍         | 1663/37094 [02:50<57:16, 10.31it/s]

Epoch: 0 Iteration: 1664 Loss: 1.506 Validation Loss: 1.247 Accuracy: 0.141 Validation Accuracy: 0.156:   4%|▍         | 1663/37094 [02:50<57:16, 10.31it/s]

Epoch: 0 Iteration: 1664 Loss: 1.506 Validation Loss: 1.247 Accuracy: 0.141 Validation Accuracy: 0.156:   4%|▍         | 1665/37094 [02:50<57:16, 10.31it/s]

Epoch: 0 Iteration: 1665 Loss: 1.505 Validation Loss: 1.247 Accuracy: 0.134 Validation Accuracy: 0.156:   4%|▍         | 1665/37094 [02:50<57:16, 10.31it/s]

Epoch: 0 Iteration: 1666 Loss: 1.504 Validation Loss: 1.247 Accuracy: 0.144 Validation Accuracy: 0.156:   4%|▍         | 1665/37094 [02:50<57:16, 10.31it/s]

Epoch: 0 Iteration: 1666 Loss: 1.504 Validation Loss: 1.247 Accuracy: 0.144 Validation Accuracy: 0.156:   4%|▍         | 1667/37094 [02:50<57:21, 10.29it/s]

Epoch: 0 Iteration: 1667 Loss: 1.503 Validation Loss: 1.247 Accuracy: 0.144 Validation Accuracy: 0.156:   4%|▍         | 1667/37094 [02:51<57:21, 10.29it/s]

Epoch: 0 Iteration: 1668 Loss: 1.468 Validation Loss: 1.247 Accuracy: 0.163 Validation Accuracy: 0.156:   4%|▍         | 1667/37094 [02:51<57:21, 10.29it/s]

Epoch: 0 Iteration: 1668 Loss: 1.468 Validation Loss: 1.247 Accuracy: 0.163 Validation Accuracy: 0.156:   4%|▍         | 1669/37094 [02:51<57:19, 10.30it/s]

Epoch: 0 Iteration: 1669 Loss: 1.440 Validation Loss: 1.247 Accuracy: 0.166 Validation Accuracy: 0.156:   4%|▍         | 1669/37094 [02:51<57:19, 10.30it/s]

Epoch: 0 Iteration: 1670 Loss: 1.383 Validation Loss: 1.247 Accuracy: 0.163 Validation Accuracy: 0.156:   4%|▍         | 1669/37094 [02:51<57:19, 10.30it/s]

Epoch: 0 Iteration: 1670 Loss: 1.383 Validation Loss: 1.247 Accuracy: 0.163 Validation Accuracy: 0.156:   5%|▍         | 1671/37094 [02:51<57:18, 10.30it/s]

Epoch: 0 Iteration: 1671 Loss: 1.349 Validation Loss: 1.247 Accuracy: 0.175 Validation Accuracy: 0.156:   5%|▍         | 1671/37094 [02:51<57:18, 10.30it/s]

Epoch: 0 Iteration: 1672 Loss: 1.331 Validation Loss: 1.247 Accuracy: 0.169 Validation Accuracy: 0.156:   5%|▍         | 1671/37094 [02:51<57:18, 10.30it/s]

Epoch: 0 Iteration: 1672 Loss: 1.331 Validation Loss: 1.247 Accuracy: 0.169 Validation Accuracy: 0.156:   5%|▍         | 1673/37094 [02:51<57:16, 10.31it/s]

Epoch: 0 Iteration: 1673 Loss: 1.310 Validation Loss: 1.247 Accuracy: 0.191 Validation Accuracy: 0.156:   5%|▍         | 1673/37094 [02:51<57:16, 10.31it/s]

Epoch: 0 Iteration: 1674 Loss: 1.315 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   5%|▍         | 1673/37094 [02:51<57:16, 10.31it/s]

Epoch: 0 Iteration: 1674 Loss: 1.315 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   5%|▍         | 1675/37094 [02:51<57:16, 10.31it/s]

Epoch: 0 Iteration: 1675 Loss: 1.329 Validation Loss: 1.247 Accuracy: 0.209 Validation Accuracy: 0.156:   5%|▍         | 1675/37094 [02:51<57:16, 10.31it/s]

Epoch: 0 Iteration: 1676 Loss: 1.344 Validation Loss: 1.247 Accuracy: 0.219 Validation Accuracy: 0.156:   5%|▍         | 1675/37094 [02:51<57:16, 10.31it/s]

Epoch: 0 Iteration: 1676 Loss: 1.344 Validation Loss: 1.247 Accuracy: 0.219 Validation Accuracy: 0.156:   5%|▍         | 1677/37094 [02:51<57:14, 10.31it/s]

Epoch: 0 Iteration: 1677 Loss: 1.256 Validation Loss: 1.247 Accuracy: 0.225 Validation Accuracy: 0.156:   5%|▍         | 1677/37094 [02:51<57:14, 10.31it/s]

Epoch: 0 Iteration: 1678 Loss: 1.248 Validation Loss: 1.247 Accuracy: 0.237 Validation Accuracy: 0.156:   5%|▍         | 1677/37094 [02:52<57:14, 10.31it/s]

Epoch: 0 Iteration: 1678 Loss: 1.248 Validation Loss: 1.247 Accuracy: 0.237 Validation Accuracy: 0.156:   5%|▍         | 1679/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1679 Loss: 1.230 Validation Loss: 1.247 Accuracy: 0.231 Validation Accuracy: 0.156:   5%|▍         | 1679/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1680 Loss: 1.248 Validation Loss: 1.247 Accuracy: 0.250 Validation Accuracy: 0.156:   5%|▍         | 1679/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1680 Loss: 1.248 Validation Loss: 1.247 Accuracy: 0.250 Validation Accuracy: 0.156:   5%|▍         | 1681/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1681 Loss: 1.265 Validation Loss: 1.247 Accuracy: 0.234 Validation Accuracy: 0.156:   5%|▍         | 1681/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1682 Loss: 1.215 Validation Loss: 1.247 Accuracy: 0.234 Validation Accuracy: 0.156:   5%|▍         | 1681/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1682 Loss: 1.215 Validation Loss: 1.247 Accuracy: 0.234 Validation Accuracy: 0.156:   5%|▍         | 1683/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1683 Loss: 1.219 Validation Loss: 1.247 Accuracy: 0.219 Validation Accuracy: 0.156:   5%|▍         | 1683/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1684 Loss: 1.240 Validation Loss: 1.247 Accuracy: 0.219 Validation Accuracy: 0.156:   5%|▍         | 1683/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1684 Loss: 1.240 Validation Loss: 1.247 Accuracy: 0.219 Validation Accuracy: 0.156:   5%|▍         | 1685/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1685 Loss: 1.216 Validation Loss: 1.247 Accuracy: 0.225 Validation Accuracy: 0.156:   5%|▍         | 1685/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1686 Loss: 1.235 Validation Loss: 1.247 Accuracy: 0.222 Validation Accuracy: 0.156:   5%|▍         | 1685/37094 [02:52<57:12, 10.32it/s]

Epoch: 0 Iteration: 1686 Loss: 1.235 Validation Loss: 1.247 Accuracy: 0.222 Validation Accuracy: 0.156:   5%|▍         | 1687/37094 [02:52<57:13, 10.31it/s]

Epoch: 0 Iteration: 1687 Loss: 1.218 Validation Loss: 1.247 Accuracy: 0.216 Validation Accuracy: 0.156:   5%|▍         | 1687/37094 [02:52<57:13, 10.31it/s]

Epoch: 0 Iteration: 1688 Loss: 1.197 Validation Loss: 1.247 Accuracy: 0.200 Validation Accuracy: 0.156:   5%|▍         | 1687/37094 [02:53<57:13, 10.31it/s]

Epoch: 0 Iteration: 1688 Loss: 1.197 Validation Loss: 1.247 Accuracy: 0.200 Validation Accuracy: 0.156:   5%|▍         | 1689/37094 [02:53<57:13, 10.31it/s]

Epoch: 0 Iteration: 1689 Loss: 1.192 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   5%|▍         | 1689/37094 [02:53<57:13, 10.31it/s]

Epoch: 0 Iteration: 1690 Loss: 1.272 Validation Loss: 1.247 Accuracy: 0.197 Validation Accuracy: 0.156:   5%|▍         | 1689/37094 [02:53<57:13, 10.31it/s]

Epoch: 0 Iteration: 1690 Loss: 1.272 Validation Loss: 1.247 Accuracy: 0.197 Validation Accuracy: 0.156:   5%|▍         | 1691/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1691 Loss: 1.277 Validation Loss: 1.247 Accuracy: 0.216 Validation Accuracy: 0.156:   5%|▍         | 1691/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1692 Loss: 1.319 Validation Loss: 1.247 Accuracy: 0.206 Validation Accuracy: 0.156:   5%|▍         | 1691/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1692 Loss: 1.319 Validation Loss: 1.247 Accuracy: 0.206 Validation Accuracy: 0.156:   5%|▍         | 1693/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1693 Loss: 1.347 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   5%|▍         | 1693/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1694 Loss: 1.378 Validation Loss: 1.247 Accuracy: 0.197 Validation Accuracy: 0.156:   5%|▍         | 1693/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1694 Loss: 1.378 Validation Loss: 1.247 Accuracy: 0.197 Validation Accuracy: 0.156:   5%|▍         | 1695/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1695 Loss: 1.373 Validation Loss: 1.247 Accuracy: 0.200 Validation Accuracy: 0.156:   5%|▍         | 1695/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1696 Loss: 1.412 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   5%|▍         | 1695/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1696 Loss: 1.412 Validation Loss: 1.247 Accuracy: 0.203 Validation Accuracy: 0.156:   5%|▍         | 1697/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1697 Loss: 1.474 Validation Loss: 1.247 Accuracy: 0.206 Validation Accuracy: 0.156:   5%|▍         | 1697/37094 [02:53<57:12, 10.31it/s]

Epoch: 0 Iteration: 1698 Loss: 1.517 Validation Loss: 1.247 Accuracy: 0.209 Validation Accuracy: 0.156:   5%|▍         | 1697/37094 [02:54<57:12, 10.31it/s]

Epoch: 0 Iteration: 1698 Loss: 1.517 Validation Loss: 1.247 Accuracy: 0.209 Validation Accuracy: 0.156:   5%|▍         | 1699/37094 [02:54<57:13, 10.31it/s]

Epoch: 0 Iteration: 1699 Loss: 1.523 Validation Loss: 1.247 Accuracy: 0.212 Validation Accuracy: 0.156:   5%|▍         | 1699/37094 [02:54<57:13, 10.31it/s]

Epoch: 0 Iteration: 1700 Loss: 1.485 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1699/37094 [02:54<57:13, 10.31it/s]

Epoch: 0 Iteration: 1700 Loss: 1.485 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1701/37094 [02:54<1:44:59,  5.62it/s]

Epoch: 0 Iteration: 1701 Loss: 1.478 Validation Loss: 1.347 Accuracy: 0.216 Validation Accuracy: 0.164:   5%|▍         | 1701/37094 [02:54<1:44:59,  5.62it/s]

Epoch: 0 Iteration: 1702 Loss: 1.470 Validation Loss: 1.347 Accuracy: 0.231 Validation Accuracy: 0.164:   5%|▍         | 1701/37094 [02:54<1:44:59,  5.62it/s]

Epoch: 0 Iteration: 1702 Loss: 1.470 Validation Loss: 1.347 Accuracy: 0.231 Validation Accuracy: 0.164:   5%|▍         | 1703/37094 [02:54<1:29:56,  6.56it/s]

Epoch: 0 Iteration: 1703 Loss: 1.452 Validation Loss: 1.347 Accuracy: 0.237 Validation Accuracy: 0.164:   5%|▍         | 1703/37094 [02:55<1:29:56,  6.56it/s]

Epoch: 0 Iteration: 1704 Loss: 1.434 Validation Loss: 1.347 Accuracy: 0.241 Validation Accuracy: 0.164:   5%|▍         | 1703/37094 [02:55<1:29:56,  6.56it/s]

Epoch: 0 Iteration: 1704 Loss: 1.434 Validation Loss: 1.347 Accuracy: 0.241 Validation Accuracy: 0.164:   5%|▍         | 1705/37094 [02:55<1:20:08,  7.36it/s]

Epoch: 0 Iteration: 1705 Loss: 1.428 Validation Loss: 1.347 Accuracy: 0.234 Validation Accuracy: 0.164:   5%|▍         | 1705/37094 [02:55<1:20:08,  7.36it/s]

Epoch: 0 Iteration: 1706 Loss: 1.393 Validation Loss: 1.347 Accuracy: 0.225 Validation Accuracy: 0.164:   5%|▍         | 1705/37094 [02:55<1:20:08,  7.36it/s]

Epoch: 0 Iteration: 1706 Loss: 1.393 Validation Loss: 1.347 Accuracy: 0.225 Validation Accuracy: 0.164:   5%|▍         | 1707/37094 [02:55<1:13:15,  8.05it/s]

Epoch: 0 Iteration: 1707 Loss: 1.386 Validation Loss: 1.347 Accuracy: 0.225 Validation Accuracy: 0.164:   5%|▍         | 1707/37094 [02:55<1:13:15,  8.05it/s]

Epoch: 0 Iteration: 1708 Loss: 1.495 Validation Loss: 1.347 Accuracy: 0.212 Validation Accuracy: 0.164:   5%|▍         | 1707/37094 [02:55<1:13:15,  8.05it/s]

Epoch: 0 Iteration: 1708 Loss: 1.495 Validation Loss: 1.347 Accuracy: 0.212 Validation Accuracy: 0.164:   5%|▍         | 1709/37094 [02:55<1:08:25,  8.62it/s]

Epoch: 0 Iteration: 1709 Loss: 1.523 Validation Loss: 1.347 Accuracy: 0.203 Validation Accuracy: 0.164:   5%|▍         | 1709/37094 [02:55<1:08:25,  8.62it/s]

Epoch: 0 Iteration: 1710 Loss: 1.498 Validation Loss: 1.347 Accuracy: 0.169 Validation Accuracy: 0.164:   5%|▍         | 1709/37094 [02:55<1:08:25,  8.62it/s]

Epoch: 0 Iteration: 1710 Loss: 1.498 Validation Loss: 1.347 Accuracy: 0.169 Validation Accuracy: 0.164:   5%|▍         | 1711/37094 [02:55<1:05:07,  9.06it/s]

Epoch: 0 Iteration: 1711 Loss: 1.493 Validation Loss: 1.347 Accuracy: 0.159 Validation Accuracy: 0.164:   5%|▍         | 1711/37094 [02:55<1:05:07,  9.06it/s]

Epoch: 0 Iteration: 1712 Loss: 1.443 Validation Loss: 1.347 Accuracy: 0.166 Validation Accuracy: 0.164:   5%|▍         | 1711/37094 [02:55<1:05:07,  9.06it/s]

Epoch: 0 Iteration: 1712 Loss: 1.443 Validation Loss: 1.347 Accuracy: 0.166 Validation Accuracy: 0.164:   5%|▍         | 1713/37094 [02:55<1:02:45,  9.40it/s]

Epoch: 0 Iteration: 1713 Loss: 1.439 Validation Loss: 1.347 Accuracy: 0.166 Validation Accuracy: 0.164:   5%|▍         | 1713/37094 [02:56<1:02:45,  9.40it/s]

Epoch: 0 Iteration: 1714 Loss: 1.401 Validation Loss: 1.347 Accuracy: 0.163 Validation Accuracy: 0.164:   5%|▍         | 1713/37094 [02:56<1:02:45,  9.40it/s]

Epoch: 0 Iteration: 1714 Loss: 1.401 Validation Loss: 1.347 Accuracy: 0.163 Validation Accuracy: 0.164:   5%|▍         | 1715/37094 [02:56<1:01:11,  9.64it/s]

Epoch: 0 Iteration: 1715 Loss: 1.417 Validation Loss: 1.347 Accuracy: 0.159 Validation Accuracy: 0.164:   5%|▍         | 1715/37094 [02:56<1:01:11,  9.64it/s]

Epoch: 0 Iteration: 1716 Loss: 1.434 Validation Loss: 1.347 Accuracy: 0.172 Validation Accuracy: 0.164:   5%|▍         | 1715/37094 [02:56<1:01:11,  9.64it/s]

Epoch: 0 Iteration: 1716 Loss: 1.434 Validation Loss: 1.347 Accuracy: 0.172 Validation Accuracy: 0.164:   5%|▍         | 1717/37094 [02:56<59:57,  9.83it/s]  

Epoch: 0 Iteration: 1717 Loss: 1.418 Validation Loss: 1.347 Accuracy: 0.169 Validation Accuracy: 0.164:   5%|▍         | 1717/37094 [02:56<59:57,  9.83it/s]

Epoch: 0 Iteration: 1718 Loss: 1.401 Validation Loss: 1.347 Accuracy: 0.178 Validation Accuracy: 0.164:   5%|▍         | 1717/37094 [02:56<59:57,  9.83it/s]

Epoch: 0 Iteration: 1718 Loss: 1.401 Validation Loss: 1.347 Accuracy: 0.178 Validation Accuracy: 0.164:   5%|▍         | 1719/37094 [02:56<59:10,  9.96it/s]

Epoch: 0 Iteration: 1719 Loss: 1.408 Validation Loss: 1.347 Accuracy: 0.191 Validation Accuracy: 0.164:   5%|▍         | 1719/37094 [02:56<59:10,  9.96it/s]

Epoch: 0 Iteration: 1720 Loss: 1.438 Validation Loss: 1.347 Accuracy: 0.209 Validation Accuracy: 0.164:   5%|▍         | 1719/37094 [02:56<59:10,  9.96it/s]

Epoch: 0 Iteration: 1720 Loss: 1.438 Validation Loss: 1.347 Accuracy: 0.209 Validation Accuracy: 0.164:   5%|▍         | 1721/37094 [02:56<58:34, 10.06it/s]

Epoch: 0 Iteration: 1721 Loss: 1.466 Validation Loss: 1.347 Accuracy: 0.222 Validation Accuracy: 0.164:   5%|▍         | 1721/37094 [02:56<58:34, 10.06it/s]

Epoch: 0 Iteration: 1722 Loss: 1.502 Validation Loss: 1.347 Accuracy: 0.209 Validation Accuracy: 0.164:   5%|▍         | 1721/37094 [02:56<58:34, 10.06it/s]

Epoch: 0 Iteration: 1722 Loss: 1.502 Validation Loss: 1.347 Accuracy: 0.209 Validation Accuracy: 0.164:   5%|▍         | 1723/37094 [02:56<58:13, 10.12it/s]

Epoch: 0 Iteration: 1723 Loss: 1.485 Validation Loss: 1.347 Accuracy: 0.203 Validation Accuracy: 0.164:   5%|▍         | 1723/37094 [02:56<58:13, 10.12it/s]

Epoch: 0 Iteration: 1724 Loss: 1.469 Validation Loss: 1.347 Accuracy: 0.197 Validation Accuracy: 0.164:   5%|▍         | 1723/37094 [02:57<58:13, 10.12it/s]

Epoch: 0 Iteration: 1724 Loss: 1.469 Validation Loss: 1.347 Accuracy: 0.197 Validation Accuracy: 0.164:   5%|▍         | 1725/37094 [02:57<57:54, 10.18it/s]

Epoch: 0 Iteration: 1725 Loss: 1.509 Validation Loss: 1.347 Accuracy: 0.200 Validation Accuracy: 0.164:   5%|▍         | 1725/37094 [02:57<57:54, 10.18it/s]

Epoch: 0 Iteration: 1726 Loss: 1.526 Validation Loss: 1.347 Accuracy: 0.188 Validation Accuracy: 0.164:   5%|▍         | 1725/37094 [02:57<57:54, 10.18it/s]

Epoch: 0 Iteration: 1726 Loss: 1.526 Validation Loss: 1.347 Accuracy: 0.188 Validation Accuracy: 0.164:   5%|▍         | 1727/37094 [02:57<57:42, 10.22it/s]

Epoch: 0 Iteration: 1727 Loss: 1.527 Validation Loss: 1.347 Accuracy: 0.203 Validation Accuracy: 0.164:   5%|▍         | 1727/37094 [02:57<57:42, 10.22it/s]

Epoch: 0 Iteration: 1728 Loss: 1.446 Validation Loss: 1.347 Accuracy: 0.212 Validation Accuracy: 0.164:   5%|▍         | 1727/37094 [02:57<57:42, 10.22it/s]

Epoch: 0 Iteration: 1728 Loss: 1.446 Validation Loss: 1.347 Accuracy: 0.212 Validation Accuracy: 0.164:   5%|▍         | 1729/37094 [02:57<57:31, 10.25it/s]

Epoch: 0 Iteration: 1729 Loss: 1.463 Validation Loss: 1.347 Accuracy: 0.216 Validation Accuracy: 0.164:   5%|▍         | 1729/37094 [02:57<57:31, 10.25it/s]

Epoch: 0 Iteration: 1730 Loss: 1.461 Validation Loss: 1.347 Accuracy: 0.206 Validation Accuracy: 0.164:   5%|▍         | 1729/37094 [02:57<57:31, 10.25it/s]

Epoch: 0 Iteration: 1730 Loss: 1.461 Validation Loss: 1.347 Accuracy: 0.206 Validation Accuracy: 0.164:   5%|▍         | 1731/37094 [02:57<57:25, 10.26it/s]

Epoch: 0 Iteration: 1731 Loss: 1.483 Validation Loss: 1.347 Accuracy: 0.194 Validation Accuracy: 0.164:   5%|▍         | 1731/37094 [02:57<57:25, 10.26it/s]

Epoch: 0 Iteration: 1732 Loss: 1.506 Validation Loss: 1.347 Accuracy: 0.200 Validation Accuracy: 0.164:   5%|▍         | 1731/37094 [02:57<57:25, 10.26it/s]

Epoch: 0 Iteration: 1732 Loss: 1.506 Validation Loss: 1.347 Accuracy: 0.200 Validation Accuracy: 0.164:   5%|▍         | 1733/37094 [02:57<57:20, 10.28it/s]

Epoch: 0 Iteration: 1733 Loss: 1.503 Validation Loss: 1.347 Accuracy: 0.212 Validation Accuracy: 0.164:   5%|▍         | 1733/37094 [02:57<57:20, 10.28it/s]

Epoch: 0 Iteration: 1734 Loss: 1.509 Validation Loss: 1.347 Accuracy: 0.219 Validation Accuracy: 0.164:   5%|▍         | 1733/37094 [02:58<57:20, 10.28it/s]

Epoch: 0 Iteration: 1734 Loss: 1.509 Validation Loss: 1.347 Accuracy: 0.219 Validation Accuracy: 0.164:   5%|▍         | 1735/37094 [02:58<57:18, 10.28it/s]

Epoch: 0 Iteration: 1735 Loss: 1.502 Validation Loss: 1.347 Accuracy: 0.209 Validation Accuracy: 0.164:   5%|▍         | 1735/37094 [02:58<57:18, 10.28it/s]

Epoch: 0 Iteration: 1736 Loss: 1.457 Validation Loss: 1.347 Accuracy: 0.225 Validation Accuracy: 0.164:   5%|▍         | 1735/37094 [02:58<57:18, 10.28it/s]

Epoch: 0 Iteration: 1736 Loss: 1.457 Validation Loss: 1.347 Accuracy: 0.225 Validation Accuracy: 0.164:   5%|▍         | 1737/37094 [02:58<57:16, 10.29it/s]

Epoch: 0 Iteration: 1737 Loss: 1.447 Validation Loss: 1.347 Accuracy: 0.200 Validation Accuracy: 0.164:   5%|▍         | 1737/37094 [02:58<57:16, 10.29it/s]

Epoch: 0 Iteration: 1738 Loss: 1.482 Validation Loss: 1.347 Accuracy: 0.181 Validation Accuracy: 0.164:   5%|▍         | 1737/37094 [02:58<57:16, 10.29it/s]

Epoch: 0 Iteration: 1738 Loss: 1.482 Validation Loss: 1.347 Accuracy: 0.181 Validation Accuracy: 0.164:   5%|▍         | 1739/37094 [02:58<57:14, 10.30it/s]

Epoch: 0 Iteration: 1739 Loss: 1.484 Validation Loss: 1.347 Accuracy: 0.175 Validation Accuracy: 0.164:   5%|▍         | 1739/37094 [02:58<57:14, 10.30it/s]

Epoch: 0 Iteration: 1740 Loss: 1.443 Validation Loss: 1.347 Accuracy: 0.188 Validation Accuracy: 0.164:   5%|▍         | 1739/37094 [02:58<57:14, 10.30it/s]

Epoch: 0 Iteration: 1740 Loss: 1.443 Validation Loss: 1.347 Accuracy: 0.188 Validation Accuracy: 0.164:   5%|▍         | 1741/37094 [02:58<57:11, 10.30it/s]

Epoch: 0 Iteration: 1741 Loss: 1.397 Validation Loss: 1.347 Accuracy: 0.194 Validation Accuracy: 0.164:   5%|▍         | 1741/37094 [02:58<57:11, 10.30it/s]

Epoch: 0 Iteration: 1742 Loss: 1.369 Validation Loss: 1.347 Accuracy: 0.194 Validation Accuracy: 0.164:   5%|▍         | 1741/37094 [02:58<57:11, 10.30it/s]

Epoch: 0 Iteration: 1742 Loss: 1.369 Validation Loss: 1.347 Accuracy: 0.194 Validation Accuracy: 0.164:   5%|▍         | 1743/37094 [02:58<57:15, 10.29it/s]

Epoch: 0 Iteration: 1743 Loss: 1.394 Validation Loss: 1.347 Accuracy: 0.194 Validation Accuracy: 0.164:   5%|▍         | 1743/37094 [02:58<57:15, 10.29it/s]

Epoch: 0 Iteration: 1744 Loss: 1.423 Validation Loss: 1.347 Accuracy: 0.188 Validation Accuracy: 0.164:   5%|▍         | 1743/37094 [02:59<57:15, 10.29it/s]

Epoch: 0 Iteration: 1744 Loss: 1.423 Validation Loss: 1.347 Accuracy: 0.188 Validation Accuracy: 0.164:   5%|▍         | 1745/37094 [02:59<57:14, 10.29it/s]

Epoch: 0 Iteration: 1745 Loss: 1.403 Validation Loss: 1.347 Accuracy: 0.197 Validation Accuracy: 0.164:   5%|▍         | 1745/37094 [02:59<57:14, 10.29it/s]

Epoch: 0 Iteration: 1746 Loss: 1.389 Validation Loss: 1.347 Accuracy: 0.191 Validation Accuracy: 0.164:   5%|▍         | 1745/37094 [02:59<57:14, 10.29it/s]

Epoch: 0 Iteration: 1746 Loss: 1.389 Validation Loss: 1.347 Accuracy: 0.191 Validation Accuracy: 0.164:   5%|▍         | 1747/37094 [02:59<57:11, 10.30it/s]

Epoch: 0 Iteration: 1747 Loss: 1.410 Validation Loss: 1.347 Accuracy: 0.197 Validation Accuracy: 0.164:   5%|▍         | 1747/37094 [02:59<57:11, 10.30it/s]

Epoch: 0 Iteration: 1748 Loss: 1.401 Validation Loss: 1.347 Accuracy: 0.212 Validation Accuracy: 0.164:   5%|▍         | 1747/37094 [02:59<57:11, 10.30it/s]

Epoch: 0 Iteration: 1748 Loss: 1.401 Validation Loss: 1.347 Accuracy: 0.212 Validation Accuracy: 0.164:   5%|▍         | 1749/37094 [02:59<57:10, 10.30it/s]

Epoch: 0 Iteration: 1749 Loss: 1.379 Validation Loss: 1.347 Accuracy: 0.209 Validation Accuracy: 0.164:   5%|▍         | 1749/37094 [02:59<57:10, 10.30it/s]

Epoch: 0 Iteration: 1750 Loss: 1.362 Validation Loss: 1.347 Accuracy: 0.212 Validation Accuracy: 0.164:   5%|▍         | 1749/37094 [02:59<57:10, 10.30it/s]

Epoch: 0 Iteration: 1750 Loss: 1.362 Validation Loss: 1.347 Accuracy: 0.212 Validation Accuracy: 0.164:   5%|▍         | 1751/37094 [02:59<57:08, 10.31it/s]

Epoch: 0 Iteration: 1751 Loss: 1.332 Validation Loss: 1.347 Accuracy: 0.209 Validation Accuracy: 0.164:   5%|▍         | 1751/37094 [02:59<57:08, 10.31it/s]

Epoch: 0 Iteration: 1752 Loss: 1.339 Validation Loss: 1.347 Accuracy: 0.209 Validation Accuracy: 0.164:   5%|▍         | 1751/37094 [02:59<57:08, 10.31it/s]

Epoch: 0 Iteration: 1752 Loss: 1.339 Validation Loss: 1.347 Accuracy: 0.209 Validation Accuracy: 0.164:   5%|▍         | 1753/37094 [02:59<57:08, 10.31it/s]

Epoch: 0 Iteration: 1753 Loss: 1.325 Validation Loss: 1.347 Accuracy: 0.216 Validation Accuracy: 0.164:   5%|▍         | 1753/37094 [02:59<57:08, 10.31it/s]

Epoch: 0 Iteration: 1754 Loss: 1.327 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1753/37094 [02:59<57:08, 10.31it/s]

Epoch: 0 Iteration: 1754 Loss: 1.327 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1755/37094 [02:59<57:09, 10.30it/s]

Epoch: 0 Iteration: 1755 Loss: 1.353 Validation Loss: 1.347 Accuracy: 0.237 Validation Accuracy: 0.164:   5%|▍         | 1755/37094 [03:00<57:09, 10.30it/s]

Epoch: 0 Iteration: 1756 Loss: 1.339 Validation Loss: 1.347 Accuracy: 0.250 Validation Accuracy: 0.164:   5%|▍         | 1755/37094 [03:00<57:09, 10.30it/s]

Epoch: 0 Iteration: 1756 Loss: 1.339 Validation Loss: 1.347 Accuracy: 0.250 Validation Accuracy: 0.164:   5%|▍         | 1757/37094 [03:00<57:09, 10.30it/s]

Epoch: 0 Iteration: 1757 Loss: 1.341 Validation Loss: 1.347 Accuracy: 0.259 Validation Accuracy: 0.164:   5%|▍         | 1757/37094 [03:00<57:09, 10.30it/s]

Epoch: 0 Iteration: 1758 Loss: 1.303 Validation Loss: 1.347 Accuracy: 0.256 Validation Accuracy: 0.164:   5%|▍         | 1757/37094 [03:00<57:09, 10.30it/s]

Epoch: 0 Iteration: 1758 Loss: 1.303 Validation Loss: 1.347 Accuracy: 0.256 Validation Accuracy: 0.164:   5%|▍         | 1759/37094 [03:00<57:07, 10.31it/s]

Epoch: 0 Iteration: 1759 Loss: 1.329 Validation Loss: 1.347 Accuracy: 0.263 Validation Accuracy: 0.164:   5%|▍         | 1759/37094 [03:00<57:07, 10.31it/s]

Epoch: 0 Iteration: 1760 Loss: 1.342 Validation Loss: 1.347 Accuracy: 0.253 Validation Accuracy: 0.164:   5%|▍         | 1759/37094 [03:00<57:07, 10.31it/s]

Epoch: 0 Iteration: 1760 Loss: 1.342 Validation Loss: 1.347 Accuracy: 0.253 Validation Accuracy: 0.164:   5%|▍         | 1761/37094 [03:00<57:06, 10.31it/s]

Epoch: 0 Iteration: 1761 Loss: 1.376 Validation Loss: 1.347 Accuracy: 0.250 Validation Accuracy: 0.164:   5%|▍         | 1761/37094 [03:00<57:06, 10.31it/s]

Epoch: 0 Iteration: 1762 Loss: 1.355 Validation Loss: 1.347 Accuracy: 0.269 Validation Accuracy: 0.164:   5%|▍         | 1761/37094 [03:00<57:06, 10.31it/s]

Epoch: 0 Iteration: 1762 Loss: 1.355 Validation Loss: 1.347 Accuracy: 0.269 Validation Accuracy: 0.164:   5%|▍         | 1763/37094 [03:00<57:06, 10.31it/s]

Epoch: 0 Iteration: 1763 Loss: 1.309 Validation Loss: 1.347 Accuracy: 0.259 Validation Accuracy: 0.164:   5%|▍         | 1763/37094 [03:00<57:06, 10.31it/s]

Epoch: 0 Iteration: 1764 Loss: 1.325 Validation Loss: 1.347 Accuracy: 0.253 Validation Accuracy: 0.164:   5%|▍         | 1763/37094 [03:00<57:06, 10.31it/s]

Epoch: 0 Iteration: 1764 Loss: 1.325 Validation Loss: 1.347 Accuracy: 0.253 Validation Accuracy: 0.164:   5%|▍         | 1765/37094 [03:00<57:06, 10.31it/s]

Epoch: 0 Iteration: 1765 Loss: 1.365 Validation Loss: 1.347 Accuracy: 0.247 Validation Accuracy: 0.164:   5%|▍         | 1765/37094 [03:01<57:06, 10.31it/s]

Epoch: 0 Iteration: 1766 Loss: 1.380 Validation Loss: 1.347 Accuracy: 0.244 Validation Accuracy: 0.164:   5%|▍         | 1765/37094 [03:01<57:06, 10.31it/s]

Epoch: 0 Iteration: 1766 Loss: 1.380 Validation Loss: 1.347 Accuracy: 0.244 Validation Accuracy: 0.164:   5%|▍         | 1767/37094 [03:01<57:11, 10.29it/s]

Epoch: 0 Iteration: 1767 Loss: 1.354 Validation Loss: 1.347 Accuracy: 0.259 Validation Accuracy: 0.164:   5%|▍         | 1767/37094 [03:01<57:11, 10.29it/s]

Epoch: 0 Iteration: 1768 Loss: 1.371 Validation Loss: 1.347 Accuracy: 0.266 Validation Accuracy: 0.164:   5%|▍         | 1767/37094 [03:01<57:11, 10.29it/s]

Epoch: 0 Iteration: 1768 Loss: 1.371 Validation Loss: 1.347 Accuracy: 0.266 Validation Accuracy: 0.164:   5%|▍         | 1769/37094 [03:01<57:10, 10.30it/s]

Epoch: 0 Iteration: 1769 Loss: 1.399 Validation Loss: 1.347 Accuracy: 0.256 Validation Accuracy: 0.164:   5%|▍         | 1769/37094 [03:01<57:10, 10.30it/s]

Epoch: 0 Iteration: 1770 Loss: 1.392 Validation Loss: 1.347 Accuracy: 0.256 Validation Accuracy: 0.164:   5%|▍         | 1769/37094 [03:01<57:10, 10.30it/s]

Epoch: 0 Iteration: 1770 Loss: 1.392 Validation Loss: 1.347 Accuracy: 0.256 Validation Accuracy: 0.164:   5%|▍         | 1771/37094 [03:01<57:09, 10.30it/s]

Epoch: 0 Iteration: 1771 Loss: 1.452 Validation Loss: 1.347 Accuracy: 0.266 Validation Accuracy: 0.164:   5%|▍         | 1771/37094 [03:01<57:09, 10.30it/s]

Epoch: 0 Iteration: 1772 Loss: 1.415 Validation Loss: 1.347 Accuracy: 0.259 Validation Accuracy: 0.164:   5%|▍         | 1771/37094 [03:01<57:09, 10.30it/s]

Epoch: 0 Iteration: 1772 Loss: 1.415 Validation Loss: 1.347 Accuracy: 0.259 Validation Accuracy: 0.164:   5%|▍         | 1773/37094 [03:01<57:07, 10.30it/s]

Epoch: 0 Iteration: 1773 Loss: 1.400 Validation Loss: 1.347 Accuracy: 0.266 Validation Accuracy: 0.164:   5%|▍         | 1773/37094 [03:01<57:07, 10.30it/s]

Epoch: 0 Iteration: 1774 Loss: 1.409 Validation Loss: 1.347 Accuracy: 0.263 Validation Accuracy: 0.164:   5%|▍         | 1773/37094 [03:01<57:07, 10.30it/s]

Epoch: 0 Iteration: 1774 Loss: 1.409 Validation Loss: 1.347 Accuracy: 0.263 Validation Accuracy: 0.164:   5%|▍         | 1775/37094 [03:01<57:07, 10.31it/s]

Epoch: 0 Iteration: 1775 Loss: 1.407 Validation Loss: 1.347 Accuracy: 0.253 Validation Accuracy: 0.164:   5%|▍         | 1775/37094 [03:02<57:07, 10.31it/s]

Epoch: 0 Iteration: 1776 Loss: 1.418 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1775/37094 [03:02<57:07, 10.31it/s]

Epoch: 0 Iteration: 1776 Loss: 1.418 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1777/37094 [03:02<57:07, 10.30it/s]

Epoch: 0 Iteration: 1777 Loss: 1.440 Validation Loss: 1.347 Accuracy: 0.206 Validation Accuracy: 0.164:   5%|▍         | 1777/37094 [03:02<57:07, 10.30it/s]

Epoch: 0 Iteration: 1778 Loss: 1.434 Validation Loss: 1.347 Accuracy: 0.203 Validation Accuracy: 0.164:   5%|▍         | 1777/37094 [03:02<57:07, 10.30it/s]

Epoch: 0 Iteration: 1778 Loss: 1.434 Validation Loss: 1.347 Accuracy: 0.203 Validation Accuracy: 0.164:   5%|▍         | 1779/37094 [03:02<57:06, 10.31it/s]

Epoch: 0 Iteration: 1779 Loss: 1.398 Validation Loss: 1.347 Accuracy: 0.225 Validation Accuracy: 0.164:   5%|▍         | 1779/37094 [03:02<57:06, 10.31it/s]

Epoch: 0 Iteration: 1780 Loss: 1.432 Validation Loss: 1.347 Accuracy: 0.234 Validation Accuracy: 0.164:   5%|▍         | 1779/37094 [03:02<57:06, 10.31it/s]

Epoch: 0 Iteration: 1780 Loss: 1.432 Validation Loss: 1.347 Accuracy: 0.234 Validation Accuracy: 0.164:   5%|▍         | 1781/37094 [03:02<57:05, 10.31it/s]

Epoch: 0 Iteration: 1781 Loss: 1.385 Validation Loss: 1.347 Accuracy: 0.241 Validation Accuracy: 0.164:   5%|▍         | 1781/37094 [03:02<57:05, 10.31it/s]

Epoch: 0 Iteration: 1782 Loss: 1.402 Validation Loss: 1.347 Accuracy: 0.225 Validation Accuracy: 0.164:   5%|▍         | 1781/37094 [03:02<57:05, 10.31it/s]

Epoch: 0 Iteration: 1782 Loss: 1.402 Validation Loss: 1.347 Accuracy: 0.225 Validation Accuracy: 0.164:   5%|▍         | 1783/37094 [03:02<57:04, 10.31it/s]

Epoch: 0 Iteration: 1783 Loss: 1.431 Validation Loss: 1.347 Accuracy: 0.219 Validation Accuracy: 0.164:   5%|▍         | 1783/37094 [03:02<57:04, 10.31it/s]

Epoch: 0 Iteration: 1784 Loss: 1.406 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1783/37094 [03:02<57:04, 10.31it/s]

Epoch: 0 Iteration: 1784 Loss: 1.406 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1785/37094 [03:02<57:04, 10.31it/s]

Epoch: 0 Iteration: 1785 Loss: 1.388 Validation Loss: 1.347 Accuracy: 0.234 Validation Accuracy: 0.164:   5%|▍         | 1785/37094 [03:02<57:04, 10.31it/s]

Epoch: 0 Iteration: 1786 Loss: 1.375 Validation Loss: 1.347 Accuracy: 0.250 Validation Accuracy: 0.164:   5%|▍         | 1785/37094 [03:03<57:04, 10.31it/s]

Epoch: 0 Iteration: 1786 Loss: 1.375 Validation Loss: 1.347 Accuracy: 0.250 Validation Accuracy: 0.164:   5%|▍         | 1787/37094 [03:03<57:03, 10.31it/s]

Epoch: 0 Iteration: 1787 Loss: 1.369 Validation Loss: 1.347 Accuracy: 0.253 Validation Accuracy: 0.164:   5%|▍         | 1787/37094 [03:03<57:03, 10.31it/s]

Epoch: 0 Iteration: 1788 Loss: 1.365 Validation Loss: 1.347 Accuracy: 0.244 Validation Accuracy: 0.164:   5%|▍         | 1787/37094 [03:03<57:03, 10.31it/s]

Epoch: 0 Iteration: 1788 Loss: 1.365 Validation Loss: 1.347 Accuracy: 0.244 Validation Accuracy: 0.164:   5%|▍         | 1789/37094 [03:03<57:03, 10.31it/s]

Epoch: 0 Iteration: 1789 Loss: 1.310 Validation Loss: 1.347 Accuracy: 0.244 Validation Accuracy: 0.164:   5%|▍         | 1789/37094 [03:03<57:03, 10.31it/s]

Epoch: 0 Iteration: 1790 Loss: 1.302 Validation Loss: 1.347 Accuracy: 0.241 Validation Accuracy: 0.164:   5%|▍         | 1789/37094 [03:03<57:03, 10.31it/s]

Epoch: 0 Iteration: 1790 Loss: 1.302 Validation Loss: 1.347 Accuracy: 0.241 Validation Accuracy: 0.164:   5%|▍         | 1791/37094 [03:03<57:02, 10.32it/s]

Epoch: 0 Iteration: 1791 Loss: 1.214 Validation Loss: 1.347 Accuracy: 0.244 Validation Accuracy: 0.164:   5%|▍         | 1791/37094 [03:03<57:02, 10.32it/s]

Epoch: 0 Iteration: 1792 Loss: 1.237 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1791/37094 [03:03<57:02, 10.32it/s]

Epoch: 0 Iteration: 1792 Loss: 1.237 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1793/37094 [03:03<57:02, 10.32it/s]

Epoch: 0 Iteration: 1793 Loss: 1.240 Validation Loss: 1.347 Accuracy: 0.231 Validation Accuracy: 0.164:   5%|▍         | 1793/37094 [03:03<57:02, 10.32it/s]

Epoch: 0 Iteration: 1794 Loss: 1.197 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1793/37094 [03:03<57:02, 10.32it/s]

Epoch: 0 Iteration: 1794 Loss: 1.197 Validation Loss: 1.347 Accuracy: 0.228 Validation Accuracy: 0.164:   5%|▍         | 1795/37094 [03:03<57:02, 10.31it/s]

Epoch: 0 Iteration: 1795 Loss: 1.180 Validation Loss: 1.347 Accuracy: 0.237 Validation Accuracy: 0.164:   5%|▍         | 1795/37094 [03:03<57:02, 10.31it/s]

Epoch: 0 Iteration: 1796 Loss: 1.150 Validation Loss: 1.347 Accuracy: 0.253 Validation Accuracy: 0.164:   5%|▍         | 1795/37094 [03:04<57:02, 10.31it/s]

Epoch: 0 Iteration: 1796 Loss: 1.150 Validation Loss: 1.347 Accuracy: 0.253 Validation Accuracy: 0.164:   5%|▍         | 1797/37094 [03:04<57:02, 10.31it/s]

Epoch: 0 Iteration: 1797 Loss: 1.139 Validation Loss: 1.347 Accuracy: 0.263 Validation Accuracy: 0.164:   5%|▍         | 1797/37094 [03:04<57:02, 10.31it/s]

Epoch: 0 Iteration: 1798 Loss: 1.097 Validation Loss: 1.347 Accuracy: 0.269 Validation Accuracy: 0.164:   5%|▍         | 1797/37094 [03:04<57:02, 10.31it/s]

Epoch: 0 Iteration: 1798 Loss: 1.097 Validation Loss: 1.347 Accuracy: 0.269 Validation Accuracy: 0.164:   5%|▍         | 1799/37094 [03:04<57:02, 10.31it/s]

Epoch: 0 Iteration: 1799 Loss: 1.072 Validation Loss: 1.347 Accuracy: 0.269 Validation Accuracy: 0.164:   5%|▍         | 1799/37094 [03:04<57:02, 10.31it/s]

Epoch: 0 Iteration: 1800 Loss: 1.099 Validation Loss: 1.267 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▍         | 1799/37094 [03:04<57:02, 10.31it/s]

Epoch: 0 Iteration: 1800 Loss: 1.099 Validation Loss: 1.267 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▍         | 1801/37094 [03:04<1:44:49,  5.61it/s]

Epoch: 0 Iteration: 1801 Loss: 1.109 Validation Loss: 1.267 Accuracy: 0.256 Validation Accuracy: 0.179:   5%|▍         | 1801/37094 [03:05<1:44:49,  5.61it/s]

Epoch: 0 Iteration: 1802 Loss: 1.171 Validation Loss: 1.267 Accuracy: 0.278 Validation Accuracy: 0.179:   5%|▍         | 1801/37094 [03:05<1:44:49,  5.61it/s]

Epoch: 0 Iteration: 1802 Loss: 1.171 Validation Loss: 1.267 Accuracy: 0.278 Validation Accuracy: 0.179:   5%|▍         | 1803/37094 [03:05<1:29:45,  6.55it/s]

Epoch: 0 Iteration: 1803 Loss: 1.194 Validation Loss: 1.267 Accuracy: 0.269 Validation Accuracy: 0.179:   5%|▍         | 1803/37094 [03:05<1:29:45,  6.55it/s]

Epoch: 0 Iteration: 1804 Loss: 1.205 Validation Loss: 1.267 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▍         | 1803/37094 [03:05<1:29:45,  6.55it/s]

Epoch: 0 Iteration: 1804 Loss: 1.205 Validation Loss: 1.267 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▍         | 1805/37094 [03:05<1:19:57,  7.36it/s]

Epoch: 0 Iteration: 1805 Loss: 1.183 Validation Loss: 1.267 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▍         | 1805/37094 [03:05<1:19:57,  7.36it/s]

Epoch: 0 Iteration: 1806 Loss: 1.205 Validation Loss: 1.267 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▍         | 1805/37094 [03:05<1:19:57,  7.36it/s]

Epoch: 0 Iteration: 1806 Loss: 1.205 Validation Loss: 1.267 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▍         | 1807/37094 [03:05<1:13:05,  8.05it/s]

Epoch: 0 Iteration: 1807 Loss: 1.223 Validation Loss: 1.267 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▍         | 1807/37094 [03:05<1:13:05,  8.05it/s]

Epoch: 0 Iteration: 1808 Loss: 1.217 Validation Loss: 1.267 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▍         | 1807/37094 [03:05<1:13:05,  8.05it/s]

Epoch: 0 Iteration: 1808 Loss: 1.217 Validation Loss: 1.267 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▍         | 1809/37094 [03:05<1:08:15,  8.62it/s]

Epoch: 0 Iteration: 1809 Loss: 1.249 Validation Loss: 1.267 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▍         | 1809/37094 [03:05<1:08:15,  8.62it/s]

Epoch: 0 Iteration: 1810 Loss: 1.303 Validation Loss: 1.267 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▍         | 1809/37094 [03:05<1:08:15,  8.62it/s]

Epoch: 0 Iteration: 1810 Loss: 1.303 Validation Loss: 1.267 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▍         | 1811/37094 [03:05<1:04:52,  9.06it/s]

Epoch: 0 Iteration: 1811 Loss: 1.324 Validation Loss: 1.267 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▍         | 1811/37094 [03:06<1:04:52,  9.06it/s]

Epoch: 0 Iteration: 1812 Loss: 1.340 Validation Loss: 1.267 Accuracy: 0.256 Validation Accuracy: 0.179:   5%|▍         | 1811/37094 [03:06<1:04:52,  9.06it/s]

Epoch: 0 Iteration: 1812 Loss: 1.340 Validation Loss: 1.267 Accuracy: 0.256 Validation Accuracy: 0.179:   5%|▍         | 1813/37094 [03:06<1:02:30,  9.41it/s]

Epoch: 0 Iteration: 1813 Loss: 1.396 Validation Loss: 1.267 Accuracy: 0.275 Validation Accuracy: 0.179:   5%|▍         | 1813/37094 [03:06<1:02:30,  9.41it/s]

Epoch: 0 Iteration: 1814 Loss: 1.446 Validation Loss: 1.267 Accuracy: 0.272 Validation Accuracy: 0.179:   5%|▍         | 1813/37094 [03:06<1:02:30,  9.41it/s]

Epoch: 0 Iteration: 1814 Loss: 1.446 Validation Loss: 1.267 Accuracy: 0.272 Validation Accuracy: 0.179:   5%|▍         | 1815/37094 [03:06<1:00:54,  9.65it/s]

Epoch: 0 Iteration: 1815 Loss: 1.456 Validation Loss: 1.267 Accuracy: 0.284 Validation Accuracy: 0.179:   5%|▍         | 1815/37094 [03:06<1:00:54,  9.65it/s]

Epoch: 0 Iteration: 1816 Loss: 1.493 Validation Loss: 1.267 Accuracy: 0.281 Validation Accuracy: 0.179:   5%|▍         | 1815/37094 [03:06<1:00:54,  9.65it/s]

Epoch: 0 Iteration: 1816 Loss: 1.493 Validation Loss: 1.267 Accuracy: 0.281 Validation Accuracy: 0.179:   5%|▍         | 1817/37094 [03:06<59:42,  9.85it/s]  

Epoch: 0 Iteration: 1817 Loss: 1.475 Validation Loss: 1.267 Accuracy: 0.269 Validation Accuracy: 0.179:   5%|▍         | 1817/37094 [03:06<59:42,  9.85it/s]

Epoch: 0 Iteration: 1818 Loss: 1.489 Validation Loss: 1.267 Accuracy: 0.278 Validation Accuracy: 0.179:   5%|▍         | 1817/37094 [03:06<59:42,  9.85it/s]

Epoch: 0 Iteration: 1818 Loss: 1.489 Validation Loss: 1.267 Accuracy: 0.278 Validation Accuracy: 0.179:   5%|▍         | 1819/37094 [03:06<58:55,  9.98it/s]

Epoch: 0 Iteration: 1819 Loss: 1.518 Validation Loss: 1.267 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▍         | 1819/37094 [03:06<58:55,  9.98it/s]

Epoch: 0 Iteration: 1820 Loss: 1.500 Validation Loss: 1.267 Accuracy: 0.275 Validation Accuracy: 0.179:   5%|▍         | 1819/37094 [03:06<58:55,  9.98it/s]

Epoch: 0 Iteration: 1820 Loss: 1.500 Validation Loss: 1.267 Accuracy: 0.275 Validation Accuracy: 0.179:   5%|▍         | 1821/37094 [03:06<58:20, 10.08it/s]

Epoch: 0 Iteration: 1821 Loss: 1.524 Validation Loss: 1.267 Accuracy: 0.272 Validation Accuracy: 0.179:   5%|▍         | 1821/37094 [03:07<58:20, 10.08it/s]

Epoch: 0 Iteration: 1822 Loss: 1.517 Validation Loss: 1.267 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▍         | 1821/37094 [03:07<58:20, 10.08it/s]

Epoch: 0 Iteration: 1822 Loss: 1.517 Validation Loss: 1.267 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▍         | 1823/37094 [03:07<57:58, 10.14it/s]

Epoch: 0 Iteration: 1823 Loss: 1.547 Validation Loss: 1.267 Accuracy: 0.244 Validation Accuracy: 0.179:   5%|▍         | 1823/37094 [03:07<57:58, 10.14it/s]

Epoch: 0 Iteration: 1824 Loss: 1.535 Validation Loss: 1.267 Accuracy: 0.247 Validation Accuracy: 0.179:   5%|▍         | 1823/37094 [03:07<57:58, 10.14it/s]

Epoch: 0 Iteration: 1824 Loss: 1.535 Validation Loss: 1.267 Accuracy: 0.247 Validation Accuracy: 0.179:   5%|▍         | 1825/37094 [03:07<57:43, 10.18it/s]

Epoch: 0 Iteration: 1825 Loss: 1.539 Validation Loss: 1.267 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▍         | 1825/37094 [03:07<57:43, 10.18it/s]

Epoch: 0 Iteration: 1826 Loss: 1.550 Validation Loss: 1.267 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▍         | 1825/37094 [03:07<57:43, 10.18it/s]

Epoch: 0 Iteration: 1826 Loss: 1.550 Validation Loss: 1.267 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▍         | 1827/37094 [03:07<57:31, 10.22it/s]

Epoch: 0 Iteration: 1827 Loss: 1.524 Validation Loss: 1.267 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▍         | 1827/37094 [03:07<57:31, 10.22it/s]

Epoch: 0 Iteration: 1828 Loss: 1.579 Validation Loss: 1.267 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▍         | 1827/37094 [03:07<57:31, 10.22it/s]

Epoch: 0 Iteration: 1828 Loss: 1.579 Validation Loss: 1.267 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▍         | 1829/37094 [03:07<57:23, 10.24it/s]

Epoch: 0 Iteration: 1829 Loss: 1.573 Validation Loss: 1.267 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▍         | 1829/37094 [03:07<57:23, 10.24it/s]

Epoch: 0 Iteration: 1830 Loss: 1.516 Validation Loss: 1.267 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▍         | 1829/37094 [03:07<57:23, 10.24it/s]

Epoch: 0 Iteration: 1830 Loss: 1.516 Validation Loss: 1.267 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▍         | 1831/37094 [03:07<57:13, 10.27it/s]

Epoch: 0 Iteration: 1831 Loss: 1.560 Validation Loss: 1.267 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▍         | 1831/37094 [03:07<57:13, 10.27it/s]

Epoch: 0 Iteration: 1832 Loss: 1.571 Validation Loss: 1.267 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▍         | 1831/37094 [03:08<57:13, 10.27it/s]

Epoch: 0 Iteration: 1832 Loss: 1.571 Validation Loss: 1.267 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▍         | 1833/37094 [03:08<57:08, 10.29it/s]

Epoch: 0 Iteration: 1833 Loss: 1.561 Validation Loss: 1.267 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▍         | 1833/37094 [03:08<57:08, 10.29it/s]

Epoch: 0 Iteration: 1834 Loss: 1.511 Validation Loss: 1.267 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▍         | 1833/37094 [03:08<57:08, 10.29it/s]

Epoch: 0 Iteration: 1834 Loss: 1.511 Validation Loss: 1.267 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▍         | 1835/37094 [03:08<57:05, 10.29it/s]

Epoch: 0 Iteration: 1835 Loss: 1.520 Validation Loss: 1.267 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▍         | 1835/37094 [03:08<57:05, 10.29it/s]

Epoch: 0 Iteration: 1836 Loss: 1.510 Validation Loss: 1.267 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▍         | 1835/37094 [03:08<57:05, 10.29it/s]

Epoch: 0 Iteration: 1836 Loss: 1.510 Validation Loss: 1.267 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▍         | 1837/37094 [03:08<57:02, 10.30it/s]

Epoch: 0 Iteration: 1837 Loss: 1.549 Validation Loss: 1.267 Accuracy: 0.191 Validation Accuracy: 0.179:   5%|▍         | 1837/37094 [03:08<57:02, 10.30it/s]

Epoch: 0 Iteration: 1838 Loss: 1.582 Validation Loss: 1.267 Accuracy: 0.178 Validation Accuracy: 0.179:   5%|▍         | 1837/37094 [03:08<57:02, 10.30it/s]

Epoch: 0 Iteration: 1838 Loss: 1.582 Validation Loss: 1.267 Accuracy: 0.178 Validation Accuracy: 0.179:   5%|▍         | 1839/37094 [03:08<57:04, 10.30it/s]

Epoch: 0 Iteration: 1839 Loss: 1.591 Validation Loss: 1.267 Accuracy: 0.166 Validation Accuracy: 0.179:   5%|▍         | 1839/37094 [03:08<57:04, 10.30it/s]

Epoch: 0 Iteration: 1840 Loss: 1.565 Validation Loss: 1.267 Accuracy: 0.153 Validation Accuracy: 0.179:   5%|▍         | 1839/37094 [03:08<57:04, 10.30it/s]

Epoch: 0 Iteration: 1840 Loss: 1.565 Validation Loss: 1.267 Accuracy: 0.153 Validation Accuracy: 0.179:   5%|▍         | 1841/37094 [03:08<57:02, 10.30it/s]

Epoch: 0 Iteration: 1841 Loss: 1.572 Validation Loss: 1.267 Accuracy: 0.150 Validation Accuracy: 0.179:   5%|▍         | 1841/37094 [03:08<57:02, 10.30it/s]

Epoch: 0 Iteration: 1842 Loss: 1.532 Validation Loss: 1.267 Accuracy: 0.159 Validation Accuracy: 0.179:   5%|▍         | 1841/37094 [03:09<57:02, 10.30it/s]

Epoch: 0 Iteration: 1842 Loss: 1.532 Validation Loss: 1.267 Accuracy: 0.159 Validation Accuracy: 0.179:   5%|▍         | 1843/37094 [03:09<57:02, 10.30it/s]

Epoch: 0 Iteration: 1843 Loss: 1.445 Validation Loss: 1.267 Accuracy: 0.169 Validation Accuracy: 0.179:   5%|▍         | 1843/37094 [03:09<57:02, 10.30it/s]

Epoch: 0 Iteration: 1844 Loss: 1.435 Validation Loss: 1.267 Accuracy: 0.159 Validation Accuracy: 0.179:   5%|▍         | 1843/37094 [03:09<57:02, 10.30it/s]

Epoch: 0 Iteration: 1844 Loss: 1.435 Validation Loss: 1.267 Accuracy: 0.159 Validation Accuracy: 0.179:   5%|▍         | 1845/37094 [03:09<57:01, 10.30it/s]

Epoch: 0 Iteration: 1845 Loss: 1.456 Validation Loss: 1.267 Accuracy: 0.150 Validation Accuracy: 0.179:   5%|▍         | 1845/37094 [03:09<57:01, 10.30it/s]

Epoch: 0 Iteration: 1846 Loss: 1.441 Validation Loss: 1.267 Accuracy: 0.150 Validation Accuracy: 0.179:   5%|▍         | 1845/37094 [03:09<57:01, 10.30it/s]

Epoch: 0 Iteration: 1846 Loss: 1.441 Validation Loss: 1.267 Accuracy: 0.150 Validation Accuracy: 0.179:   5%|▍         | 1847/37094 [03:09<56:59, 10.31it/s]

Epoch: 0 Iteration: 1847 Loss: 1.477 Validation Loss: 1.267 Accuracy: 0.147 Validation Accuracy: 0.179:   5%|▍         | 1847/37094 [03:09<56:59, 10.31it/s]

Epoch: 0 Iteration: 1848 Loss: 1.486 Validation Loss: 1.267 Accuracy: 0.153 Validation Accuracy: 0.179:   5%|▍         | 1847/37094 [03:09<56:59, 10.31it/s]

Epoch: 0 Iteration: 1848 Loss: 1.486 Validation Loss: 1.267 Accuracy: 0.153 Validation Accuracy: 0.179:   5%|▍         | 1849/37094 [03:09<56:59, 10.31it/s]

Epoch: 0 Iteration: 1849 Loss: 1.486 Validation Loss: 1.267 Accuracy: 0.156 Validation Accuracy: 0.179:   5%|▍         | 1849/37094 [03:09<56:59, 10.31it/s]

Epoch: 0 Iteration: 1850 Loss: 1.505 Validation Loss: 1.267 Accuracy: 0.166 Validation Accuracy: 0.179:   5%|▍         | 1849/37094 [03:09<56:59, 10.31it/s]

Epoch: 0 Iteration: 1850 Loss: 1.505 Validation Loss: 1.267 Accuracy: 0.166 Validation Accuracy: 0.179:   5%|▍         | 1851/37094 [03:09<57:00, 10.30it/s]

Epoch: 0 Iteration: 1851 Loss: 1.476 Validation Loss: 1.267 Accuracy: 0.169 Validation Accuracy: 0.179:   5%|▍         | 1851/37094 [03:09<57:00, 10.30it/s]

Epoch: 0 Iteration: 1852 Loss: 1.475 Validation Loss: 1.267 Accuracy: 0.156 Validation Accuracy: 0.179:   5%|▍         | 1851/37094 [03:10<57:00, 10.30it/s]

Epoch: 0 Iteration: 1852 Loss: 1.475 Validation Loss: 1.267 Accuracy: 0.156 Validation Accuracy: 0.179:   5%|▍         | 1853/37094 [03:10<56:59, 10.31it/s]

Epoch: 0 Iteration: 1853 Loss: 1.455 Validation Loss: 1.267 Accuracy: 0.156 Validation Accuracy: 0.179:   5%|▍         | 1853/37094 [03:10<56:59, 10.31it/s]

Epoch: 0 Iteration: 1854 Loss: 1.505 Validation Loss: 1.267 Accuracy: 0.166 Validation Accuracy: 0.179:   5%|▍         | 1853/37094 [03:10<56:59, 10.31it/s]

Epoch: 0 Iteration: 1854 Loss: 1.505 Validation Loss: 1.267 Accuracy: 0.166 Validation Accuracy: 0.179:   5%|▌         | 1855/37094 [03:10<57:01, 10.30it/s]

Epoch: 0 Iteration: 1855 Loss: 1.482 Validation Loss: 1.267 Accuracy: 0.169 Validation Accuracy: 0.179:   5%|▌         | 1855/37094 [03:10<57:01, 10.30it/s]

Epoch: 0 Iteration: 1856 Loss: 1.516 Validation Loss: 1.267 Accuracy: 0.169 Validation Accuracy: 0.179:   5%|▌         | 1855/37094 [03:10<57:01, 10.30it/s]

Epoch: 0 Iteration: 1856 Loss: 1.516 Validation Loss: 1.267 Accuracy: 0.169 Validation Accuracy: 0.179:   5%|▌         | 1857/37094 [03:10<57:01, 10.30it/s]

Epoch: 0 Iteration: 1857 Loss: 1.467 Validation Loss: 1.267 Accuracy: 0.191 Validation Accuracy: 0.179:   5%|▌         | 1857/37094 [03:10<57:01, 10.30it/s]

Epoch: 0 Iteration: 1858 Loss: 1.472 Validation Loss: 1.267 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▌         | 1857/37094 [03:10<57:01, 10.30it/s]

Epoch: 0 Iteration: 1858 Loss: 1.472 Validation Loss: 1.267 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▌         | 1859/37094 [03:10<57:01, 10.30it/s]

Epoch: 0 Iteration: 1859 Loss: 1.464 Validation Loss: 1.267 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▌         | 1859/37094 [03:10<57:01, 10.30it/s]

Epoch: 0 Iteration: 1860 Loss: 1.456 Validation Loss: 1.267 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1859/37094 [03:10<57:01, 10.30it/s]

Epoch: 0 Iteration: 1860 Loss: 1.456 Validation Loss: 1.267 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1861/37094 [03:10<56:59, 10.30it/s]

Epoch: 0 Iteration: 1861 Loss: 1.428 Validation Loss: 1.267 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▌         | 1861/37094 [03:10<56:59, 10.30it/s]

Epoch: 0 Iteration: 1862 Loss: 1.436 Validation Loss: 1.267 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1861/37094 [03:10<56:59, 10.30it/s]

Epoch: 0 Iteration: 1862 Loss: 1.436 Validation Loss: 1.267 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1863/37094 [03:10<56:58, 10.31it/s]

Epoch: 0 Iteration: 1863 Loss: 1.461 Validation Loss: 1.267 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1863/37094 [03:11<56:58, 10.31it/s]

Epoch: 0 Iteration: 1864 Loss: 1.484 Validation Loss: 1.267 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▌         | 1863/37094 [03:11<56:58, 10.31it/s]

Epoch: 0 Iteration: 1864 Loss: 1.484 Validation Loss: 1.267 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▌         | 1865/37094 [03:11<57:00, 10.30it/s]

Epoch: 0 Iteration: 1865 Loss: 1.478 Validation Loss: 1.267 Accuracy: 0.241 Validation Accuracy: 0.179:   5%|▌         | 1865/37094 [03:11<57:00, 10.30it/s]

Epoch: 0 Iteration: 1866 Loss: 1.477 Validation Loss: 1.267 Accuracy: 0.244 Validation Accuracy: 0.179:   5%|▌         | 1865/37094 [03:11<57:00, 10.30it/s]

Epoch: 0 Iteration: 1866 Loss: 1.477 Validation Loss: 1.267 Accuracy: 0.244 Validation Accuracy: 0.179:   5%|▌         | 1867/37094 [03:11<57:00, 10.30it/s]

Epoch: 0 Iteration: 1867 Loss: 1.469 Validation Loss: 1.267 Accuracy: 0.228 Validation Accuracy: 0.179:   5%|▌         | 1867/37094 [03:11<57:00, 10.30it/s]

Epoch: 0 Iteration: 1868 Loss: 1.409 Validation Loss: 1.267 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1867/37094 [03:11<57:00, 10.30it/s]

Epoch: 0 Iteration: 1868 Loss: 1.409 Validation Loss: 1.267 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1869/37094 [03:11<57:00, 10.30it/s]

Epoch: 0 Iteration: 1869 Loss: 1.455 Validation Loss: 1.267 Accuracy: 0.225 Validation Accuracy: 0.179:   5%|▌         | 1869/37094 [03:11<57:00, 10.30it/s]

Epoch: 0 Iteration: 1870 Loss: 1.477 Validation Loss: 1.267 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1869/37094 [03:11<57:00, 10.30it/s]

Epoch: 0 Iteration: 1870 Loss: 1.477 Validation Loss: 1.267 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1871/37094 [03:11<56:59, 10.30it/s]

Epoch: 0 Iteration: 1871 Loss: 1.520 Validation Loss: 1.267 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1871/37094 [03:11<56:59, 10.30it/s]

Epoch: 0 Iteration: 1872 Loss: 1.501 Validation Loss: 1.267 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1871/37094 [03:11<56:59, 10.30it/s]

Epoch: 0 Iteration: 1872 Loss: 1.501 Validation Loss: 1.267 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1873/37094 [03:11<56:58, 10.30it/s]

Epoch: 0 Iteration: 1873 Loss: 1.542 Validation Loss: 1.267 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1873/37094 [03:12<56:58, 10.30it/s]

Epoch: 0 Iteration: 1874 Loss: 1.514 Validation Loss: 1.267 Accuracy: 0.206 Validation Accuracy: 0.179:   5%|▌         | 1873/37094 [03:12<56:58, 10.30it/s]

Epoch: 0 Iteration: 1874 Loss: 1.514 Validation Loss: 1.267 Accuracy: 0.206 Validation Accuracy: 0.179:   5%|▌         | 1875/37094 [03:12<56:57, 10.31it/s]

Epoch: 0 Iteration: 1875 Loss: 1.548 Validation Loss: 1.267 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▌         | 1875/37094 [03:12<56:57, 10.31it/s]

Epoch: 0 Iteration: 1876 Loss: 1.540 Validation Loss: 1.267 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1875/37094 [03:12<56:57, 10.31it/s]

Epoch: 0 Iteration: 1876 Loss: 1.540 Validation Loss: 1.267 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1877/37094 [03:12<56:56, 10.31it/s]

Epoch: 0 Iteration: 1877 Loss: 1.590 Validation Loss: 1.267 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▌         | 1877/37094 [03:12<56:56, 10.31it/s]

Epoch: 0 Iteration: 1878 Loss: 1.531 Validation Loss: 1.267 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1877/37094 [03:12<56:56, 10.31it/s]

Epoch: 0 Iteration: 1878 Loss: 1.531 Validation Loss: 1.267 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1879/37094 [03:12<56:57, 10.31it/s]

Epoch: 0 Iteration: 1879 Loss: 1.521 Validation Loss: 1.267 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1879/37094 [03:12<56:57, 10.31it/s]

Epoch: 0 Iteration: 1880 Loss: 1.517 Validation Loss: 1.267 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1879/37094 [03:12<56:57, 10.31it/s]

Epoch: 0 Iteration: 1880 Loss: 1.517 Validation Loss: 1.267 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1881/37094 [03:12<56:56, 10.31it/s]

Epoch: 0 Iteration: 1881 Loss: 1.513 Validation Loss: 1.267 Accuracy: 0.228 Validation Accuracy: 0.179:   5%|▌         | 1881/37094 [03:12<56:56, 10.31it/s]

Epoch: 0 Iteration: 1882 Loss: 1.544 Validation Loss: 1.267 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1881/37094 [03:12<56:56, 10.31it/s]

Epoch: 0 Iteration: 1882 Loss: 1.544 Validation Loss: 1.267 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1883/37094 [03:12<56:59, 10.30it/s]

Epoch: 0 Iteration: 1883 Loss: 1.551 Validation Loss: 1.267 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1883/37094 [03:13<56:59, 10.30it/s]

Epoch: 0 Iteration: 1884 Loss: 1.552 Validation Loss: 1.267 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1883/37094 [03:13<56:59, 10.30it/s]

Epoch: 0 Iteration: 1884 Loss: 1.552 Validation Loss: 1.267 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1885/37094 [03:13<56:57, 10.30it/s]

Epoch: 0 Iteration: 1885 Loss: 1.524 Validation Loss: 1.267 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1885/37094 [03:13<56:57, 10.30it/s]

Epoch: 0 Iteration: 1886 Loss: 1.545 Validation Loss: 1.267 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1885/37094 [03:13<56:57, 10.30it/s]

Epoch: 0 Iteration: 1886 Loss: 1.545 Validation Loss: 1.267 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1887/37094 [03:13<56:56, 10.31it/s]

Epoch: 0 Iteration: 1887 Loss: 1.542 Validation Loss: 1.267 Accuracy: 0.228 Validation Accuracy: 0.179:   5%|▌         | 1887/37094 [03:13<56:56, 10.31it/s]

Epoch: 0 Iteration: 1888 Loss: 1.542 Validation Loss: 1.267 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▌         | 1887/37094 [03:13<56:56, 10.31it/s]

Epoch: 0 Iteration: 1888 Loss: 1.542 Validation Loss: 1.267 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▌         | 1889/37094 [03:13<56:55, 10.31it/s]

Epoch: 0 Iteration: 1889 Loss: 1.511 Validation Loss: 1.267 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▌         | 1889/37094 [03:13<56:55, 10.31it/s]

Epoch: 0 Iteration: 1890 Loss: 1.475 Validation Loss: 1.267 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▌         | 1889/37094 [03:13<56:55, 10.31it/s]

Epoch: 0 Iteration: 1890 Loss: 1.475 Validation Loss: 1.267 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▌         | 1891/37094 [03:13<56:54, 10.31it/s]

Epoch: 0 Iteration: 1891 Loss: 1.439 Validation Loss: 1.267 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▌         | 1891/37094 [03:13<56:54, 10.31it/s]

Epoch: 0 Iteration: 1892 Loss: 1.386 Validation Loss: 1.267 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1891/37094 [03:13<56:54, 10.31it/s]

Epoch: 0 Iteration: 1892 Loss: 1.386 Validation Loss: 1.267 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1893/37094 [03:13<56:54, 10.31it/s]

Epoch: 0 Iteration: 1893 Loss: 1.376 Validation Loss: 1.267 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1893/37094 [03:14<56:54, 10.31it/s]

Epoch: 0 Iteration: 1894 Loss: 1.332 Validation Loss: 1.267 Accuracy: 0.237 Validation Accuracy: 0.179:   5%|▌         | 1893/37094 [03:14<56:54, 10.31it/s]

Epoch: 0 Iteration: 1894 Loss: 1.332 Validation Loss: 1.267 Accuracy: 0.237 Validation Accuracy: 0.179:   5%|▌         | 1895/37094 [03:14<56:54, 10.31it/s]

Epoch: 0 Iteration: 1895 Loss: 1.288 Validation Loss: 1.267 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▌         | 1895/37094 [03:14<56:54, 10.31it/s]

Epoch: 0 Iteration: 1896 Loss: 1.241 Validation Loss: 1.267 Accuracy: 0.237 Validation Accuracy: 0.179:   5%|▌         | 1895/37094 [03:14<56:54, 10.31it/s]

Epoch: 0 Iteration: 1896 Loss: 1.241 Validation Loss: 1.267 Accuracy: 0.237 Validation Accuracy: 0.179:   5%|▌         | 1897/37094 [03:14<56:54, 10.31it/s]

Epoch: 0 Iteration: 1897 Loss: 1.238 Validation Loss: 1.267 Accuracy: 0.225 Validation Accuracy: 0.179:   5%|▌         | 1897/37094 [03:14<56:54, 10.31it/s]

Epoch: 0 Iteration: 1898 Loss: 1.255 Validation Loss: 1.267 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1897/37094 [03:14<56:54, 10.31it/s]

Epoch: 0 Iteration: 1898 Loss: 1.255 Validation Loss: 1.267 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1899/37094 [03:14<56:53, 10.31it/s]

Epoch: 0 Iteration: 1899 Loss: 1.253 Validation Loss: 1.267 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1899/37094 [03:14<56:53, 10.31it/s]

Epoch: 0 Iteration: 1900 Loss: 1.237 Validation Loss: 1.261 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▌         | 1899/37094 [03:15<56:53, 10.31it/s]

Epoch: 0 Iteration: 1900 Loss: 1.237 Validation Loss: 1.261 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▌         | 1901/37094 [03:15<1:44:17,  5.62it/s]

Epoch: 0 Iteration: 1901 Loss: 1.223 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1901/37094 [03:15<1:44:17,  5.62it/s]

Epoch: 0 Iteration: 1902 Loss: 1.177 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1901/37094 [03:15<1:44:17,  5.62it/s]

Epoch: 0 Iteration: 1902 Loss: 1.177 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1903/37094 [03:15<1:29:18,  6.57it/s]

Epoch: 0 Iteration: 1903 Loss: 1.176 Validation Loss: 1.261 Accuracy: 0.225 Validation Accuracy: 0.179:   5%|▌         | 1903/37094 [03:15<1:29:18,  6.57it/s]

Epoch: 0 Iteration: 1904 Loss: 1.179 Validation Loss: 1.261 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▌         | 1903/37094 [03:15<1:29:18,  6.57it/s]

Epoch: 0 Iteration: 1904 Loss: 1.179 Validation Loss: 1.261 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▌         | 1905/37094 [03:15<1:19:36,  7.37it/s]

Epoch: 0 Iteration: 1905 Loss: 1.221 Validation Loss: 1.261 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1905/37094 [03:15<1:19:36,  7.37it/s]

Epoch: 0 Iteration: 1906 Loss: 1.186 Validation Loss: 1.261 Accuracy: 0.228 Validation Accuracy: 0.179:   5%|▌         | 1905/37094 [03:15<1:19:36,  7.37it/s]

Epoch: 0 Iteration: 1906 Loss: 1.186 Validation Loss: 1.261 Accuracy: 0.228 Validation Accuracy: 0.179:   5%|▌         | 1907/37094 [03:15<1:12:47,  8.06it/s]

Epoch: 0 Iteration: 1907 Loss: 1.185 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1907/37094 [03:15<1:12:47,  8.06it/s]

Epoch: 0 Iteration: 1908 Loss: 1.213 Validation Loss: 1.261 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▌         | 1907/37094 [03:15<1:12:47,  8.06it/s]

Epoch: 0 Iteration: 1908 Loss: 1.213 Validation Loss: 1.261 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▌         | 1909/37094 [03:15<1:08:00,  8.62it/s]

Epoch: 0 Iteration: 1909 Loss: 1.215 Validation Loss: 1.261 Accuracy: 0.263 Validation Accuracy: 0.179:   5%|▌         | 1909/37094 [03:16<1:08:00,  8.62it/s]

Epoch: 0 Iteration: 1910 Loss: 1.217 Validation Loss: 1.261 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▌         | 1909/37094 [03:16<1:08:00,  8.62it/s]

Epoch: 0 Iteration: 1910 Loss: 1.217 Validation Loss: 1.261 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▌         | 1911/37094 [03:16<1:04:39,  9.07it/s]

Epoch: 0 Iteration: 1911 Loss: 1.232 Validation Loss: 1.261 Accuracy: 0.256 Validation Accuracy: 0.179:   5%|▌         | 1911/37094 [03:16<1:04:39,  9.07it/s]

Epoch: 0 Iteration: 1912 Loss: 1.304 Validation Loss: 1.261 Accuracy: 0.237 Validation Accuracy: 0.179:   5%|▌         | 1911/37094 [03:16<1:04:39,  9.07it/s]

Epoch: 0 Iteration: 1912 Loss: 1.304 Validation Loss: 1.261 Accuracy: 0.237 Validation Accuracy: 0.179:   5%|▌         | 1913/37094 [03:16<1:02:19,  9.41it/s]

Epoch: 0 Iteration: 1913 Loss: 1.270 Validation Loss: 1.261 Accuracy: 0.247 Validation Accuracy: 0.179:   5%|▌         | 1913/37094 [03:16<1:02:19,  9.41it/s]

Epoch: 0 Iteration: 1914 Loss: 1.299 Validation Loss: 1.261 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▌         | 1913/37094 [03:16<1:02:19,  9.41it/s]

Epoch: 0 Iteration: 1914 Loss: 1.299 Validation Loss: 1.261 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▌         | 1915/37094 [03:16<1:00:44,  9.65it/s]

Epoch: 0 Iteration: 1915 Loss: 1.330 Validation Loss: 1.261 Accuracy: 0.247 Validation Accuracy: 0.179:   5%|▌         | 1915/37094 [03:16<1:00:44,  9.65it/s]

Epoch: 0 Iteration: 1916 Loss: 1.353 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1915/37094 [03:16<1:00:44,  9.65it/s]

Epoch: 0 Iteration: 1916 Loss: 1.353 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1917/37094 [03:16<59:36,  9.84it/s]  

Epoch: 0 Iteration: 1917 Loss: 1.325 Validation Loss: 1.261 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▌         | 1917/37094 [03:16<59:36,  9.84it/s]

Epoch: 0 Iteration: 1918 Loss: 1.346 Validation Loss: 1.261 Accuracy: 0.244 Validation Accuracy: 0.179:   5%|▌         | 1917/37094 [03:16<59:36,  9.84it/s]

Epoch: 0 Iteration: 1918 Loss: 1.346 Validation Loss: 1.261 Accuracy: 0.244 Validation Accuracy: 0.179:   5%|▌         | 1919/37094 [03:16<58:47,  9.97it/s]

Epoch: 0 Iteration: 1919 Loss: 1.376 Validation Loss: 1.261 Accuracy: 0.244 Validation Accuracy: 0.179:   5%|▌         | 1919/37094 [03:17<58:47,  9.97it/s]

Epoch: 0 Iteration: 1920 Loss: 1.402 Validation Loss: 1.261 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▌         | 1919/37094 [03:17<58:47,  9.97it/s]

Epoch: 0 Iteration: 1920 Loss: 1.402 Validation Loss: 1.261 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▌         | 1921/37094 [03:17<58:11, 10.07it/s]

Epoch: 0 Iteration: 1921 Loss: 1.396 Validation Loss: 1.261 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▌         | 1921/37094 [03:17<58:11, 10.07it/s]

Epoch: 0 Iteration: 1922 Loss: 1.407 Validation Loss: 1.261 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▌         | 1921/37094 [03:17<58:11, 10.07it/s]

Epoch: 0 Iteration: 1922 Loss: 1.407 Validation Loss: 1.261 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▌         | 1923/37094 [03:17<57:47, 10.14it/s]

Epoch: 0 Iteration: 1923 Loss: 1.406 Validation Loss: 1.261 Accuracy: 0.247 Validation Accuracy: 0.179:   5%|▌         | 1923/37094 [03:17<57:47, 10.14it/s]

Epoch: 0 Iteration: 1924 Loss: 1.361 Validation Loss: 1.261 Accuracy: 0.253 Validation Accuracy: 0.179:   5%|▌         | 1923/37094 [03:17<57:47, 10.14it/s]

Epoch: 0 Iteration: 1924 Loss: 1.361 Validation Loss: 1.261 Accuracy: 0.253 Validation Accuracy: 0.179:   5%|▌         | 1925/37094 [03:17<57:37, 10.17it/s]

Epoch: 0 Iteration: 1925 Loss: 1.298 Validation Loss: 1.261 Accuracy: 0.256 Validation Accuracy: 0.179:   5%|▌         | 1925/37094 [03:17<57:37, 10.17it/s]

Epoch: 0 Iteration: 1926 Loss: 1.296 Validation Loss: 1.261 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▌         | 1925/37094 [03:17<57:37, 10.17it/s]

Epoch: 0 Iteration: 1926 Loss: 1.296 Validation Loss: 1.261 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▌         | 1927/37094 [03:17<57:22, 10.22it/s]

Epoch: 0 Iteration: 1927 Loss: 1.333 Validation Loss: 1.261 Accuracy: 0.259 Validation Accuracy: 0.179:   5%|▌         | 1927/37094 [03:17<57:22, 10.22it/s]

Epoch: 0 Iteration: 1928 Loss: 1.322 Validation Loss: 1.261 Accuracy: 0.275 Validation Accuracy: 0.179:   5%|▌         | 1927/37094 [03:17<57:22, 10.22it/s]

Epoch: 0 Iteration: 1928 Loss: 1.322 Validation Loss: 1.261 Accuracy: 0.275 Validation Accuracy: 0.179:   5%|▌         | 1929/37094 [03:17<57:12, 10.25it/s]

Epoch: 0 Iteration: 1929 Loss: 1.286 Validation Loss: 1.261 Accuracy: 0.266 Validation Accuracy: 0.179:   5%|▌         | 1929/37094 [03:18<57:12, 10.25it/s]

Epoch: 0 Iteration: 1930 Loss: 1.326 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1929/37094 [03:18<57:12, 10.25it/s]

Epoch: 0 Iteration: 1930 Loss: 1.326 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1931/37094 [03:18<57:05, 10.26it/s]

Epoch: 0 Iteration: 1931 Loss: 1.317 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1931/37094 [03:18<57:05, 10.26it/s]

Epoch: 0 Iteration: 1932 Loss: 1.301 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1931/37094 [03:18<57:05, 10.26it/s]

Epoch: 0 Iteration: 1932 Loss: 1.301 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1933/37094 [03:18<57:00, 10.28it/s]

Epoch: 0 Iteration: 1933 Loss: 1.299 Validation Loss: 1.261 Accuracy: 0.253 Validation Accuracy: 0.179:   5%|▌         | 1933/37094 [03:18<57:00, 10.28it/s]

Epoch: 0 Iteration: 1934 Loss: 1.295 Validation Loss: 1.261 Accuracy: 0.241 Validation Accuracy: 0.179:   5%|▌         | 1933/37094 [03:18<57:00, 10.28it/s]

Epoch: 0 Iteration: 1934 Loss: 1.295 Validation Loss: 1.261 Accuracy: 0.241 Validation Accuracy: 0.179:   5%|▌         | 1935/37094 [03:18<56:57, 10.29it/s]

Epoch: 0 Iteration: 1935 Loss: 1.259 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1935/37094 [03:18<56:57, 10.29it/s]

Epoch: 0 Iteration: 1936 Loss: 1.261 Validation Loss: 1.261 Accuracy: 0.231 Validation Accuracy: 0.179:   5%|▌         | 1935/37094 [03:18<56:57, 10.29it/s]

Epoch: 0 Iteration: 1936 Loss: 1.261 Validation Loss: 1.261 Accuracy: 0.231 Validation Accuracy: 0.179:   5%|▌         | 1937/37094 [03:18<56:55, 10.29it/s]

Epoch: 0 Iteration: 1937 Loss: 1.278 Validation Loss: 1.261 Accuracy: 0.225 Validation Accuracy: 0.179:   5%|▌         | 1937/37094 [03:18<56:55, 10.29it/s]

Epoch: 0 Iteration: 1938 Loss: 1.264 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1937/37094 [03:18<56:55, 10.29it/s]

Epoch: 0 Iteration: 1938 Loss: 1.264 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1939/37094 [03:18<56:55, 10.29it/s]

Epoch: 0 Iteration: 1939 Loss: 1.252 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1939/37094 [03:19<56:55, 10.29it/s]

Epoch: 0 Iteration: 1940 Loss: 1.278 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1939/37094 [03:19<56:55, 10.29it/s]

Epoch: 0 Iteration: 1940 Loss: 1.278 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1941/37094 [03:19<56:53, 10.30it/s]

Epoch: 0 Iteration: 1941 Loss: 1.287 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1941/37094 [03:19<56:53, 10.30it/s]

Epoch: 0 Iteration: 1942 Loss: 1.302 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1941/37094 [03:19<56:53, 10.30it/s]

Epoch: 0 Iteration: 1942 Loss: 1.302 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1943/37094 [03:19<56:55, 10.29it/s]

Epoch: 0 Iteration: 1943 Loss: 1.279 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1943/37094 [03:19<56:55, 10.29it/s]

Epoch: 0 Iteration: 1944 Loss: 1.299 Validation Loss: 1.261 Accuracy: 0.206 Validation Accuracy: 0.179:   5%|▌         | 1943/37094 [03:19<56:55, 10.29it/s]

Epoch: 0 Iteration: 1944 Loss: 1.299 Validation Loss: 1.261 Accuracy: 0.206 Validation Accuracy: 0.179:   5%|▌         | 1945/37094 [03:19<56:52, 10.30it/s]

Epoch: 0 Iteration: 1945 Loss: 1.355 Validation Loss: 1.261 Accuracy: 0.206 Validation Accuracy: 0.179:   5%|▌         | 1945/37094 [03:19<56:52, 10.30it/s]

Epoch: 0 Iteration: 1946 Loss: 1.399 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1945/37094 [03:19<56:52, 10.30it/s]

Epoch: 0 Iteration: 1946 Loss: 1.399 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1947/37094 [03:19<56:51, 10.30it/s]

Epoch: 0 Iteration: 1947 Loss: 1.370 Validation Loss: 1.261 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▌         | 1947/37094 [03:19<56:51, 10.30it/s]

Epoch: 0 Iteration: 1948 Loss: 1.389 Validation Loss: 1.261 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1947/37094 [03:19<56:51, 10.30it/s]

Epoch: 0 Iteration: 1948 Loss: 1.389 Validation Loss: 1.261 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1949/37094 [03:19<56:52, 10.30it/s]

Epoch: 0 Iteration: 1949 Loss: 1.402 Validation Loss: 1.261 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▌         | 1949/37094 [03:19<56:52, 10.30it/s]

Epoch: 0 Iteration: 1950 Loss: 1.362 Validation Loss: 1.261 Accuracy: 0.228 Validation Accuracy: 0.179:   5%|▌         | 1949/37094 [03:20<56:52, 10.30it/s]

Epoch: 0 Iteration: 1950 Loss: 1.362 Validation Loss: 1.261 Accuracy: 0.228 Validation Accuracy: 0.179:   5%|▌         | 1951/37094 [03:20<56:51, 10.30it/s]

Epoch: 0 Iteration: 1951 Loss: 1.365 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1951/37094 [03:20<56:51, 10.30it/s]

Epoch: 0 Iteration: 1952 Loss: 1.354 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1951/37094 [03:20<56:51, 10.30it/s]

Epoch: 0 Iteration: 1952 Loss: 1.354 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1953/37094 [03:20<56:48, 10.31it/s]

Epoch: 0 Iteration: 1953 Loss: 1.332 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1953/37094 [03:20<56:48, 10.31it/s]

Epoch: 0 Iteration: 1954 Loss: 1.339 Validation Loss: 1.261 Accuracy: 0.191 Validation Accuracy: 0.179:   5%|▌         | 1953/37094 [03:20<56:48, 10.31it/s]

Epoch: 0 Iteration: 1954 Loss: 1.339 Validation Loss: 1.261 Accuracy: 0.191 Validation Accuracy: 0.179:   5%|▌         | 1955/37094 [03:20<56:49, 10.31it/s]

Epoch: 0 Iteration: 1955 Loss: 1.342 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1955/37094 [03:20<56:49, 10.31it/s]

Epoch: 0 Iteration: 1956 Loss: 1.340 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1955/37094 [03:20<56:49, 10.31it/s]

Epoch: 0 Iteration: 1956 Loss: 1.340 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1957/37094 [03:20<56:50, 10.30it/s]

Epoch: 0 Iteration: 1957 Loss: 1.319 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1957/37094 [03:20<56:50, 10.30it/s]

Epoch: 0 Iteration: 1958 Loss: 1.339 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1957/37094 [03:20<56:50, 10.30it/s]

Epoch: 0 Iteration: 1958 Loss: 1.339 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1959/37094 [03:20<56:50, 10.30it/s]

Epoch: 0 Iteration: 1959 Loss: 1.361 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1959/37094 [03:20<56:50, 10.30it/s]

Epoch: 0 Iteration: 1960 Loss: 1.374 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1959/37094 [03:21<56:50, 10.30it/s]

Epoch: 0 Iteration: 1960 Loss: 1.374 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1961/37094 [03:21<56:49, 10.31it/s]

Epoch: 0 Iteration: 1961 Loss: 1.468 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1961/37094 [03:21<56:49, 10.31it/s]

Epoch: 0 Iteration: 1962 Loss: 1.463 Validation Loss: 1.261 Accuracy: 0.206 Validation Accuracy: 0.179:   5%|▌         | 1961/37094 [03:21<56:49, 10.31it/s]

Epoch: 0 Iteration: 1962 Loss: 1.463 Validation Loss: 1.261 Accuracy: 0.206 Validation Accuracy: 0.179:   5%|▌         | 1963/37094 [03:21<56:49, 10.30it/s]

Epoch: 0 Iteration: 1963 Loss: 1.466 Validation Loss: 1.261 Accuracy: 0.206 Validation Accuracy: 0.179:   5%|▌         | 1963/37094 [03:21<56:49, 10.30it/s]

Epoch: 0 Iteration: 1964 Loss: 1.485 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1963/37094 [03:21<56:49, 10.30it/s]

Epoch: 0 Iteration: 1964 Loss: 1.485 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1965/37094 [03:21<56:47, 10.31it/s]

Epoch: 0 Iteration: 1965 Loss: 1.459 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1965/37094 [03:21<56:47, 10.31it/s]

Epoch: 0 Iteration: 1966 Loss: 1.440 Validation Loss: 1.261 Accuracy: 0.191 Validation Accuracy: 0.179:   5%|▌         | 1965/37094 [03:21<56:47, 10.31it/s]

Epoch: 0 Iteration: 1966 Loss: 1.440 Validation Loss: 1.261 Accuracy: 0.191 Validation Accuracy: 0.179:   5%|▌         | 1967/37094 [03:21<56:49, 10.30it/s]

Epoch: 0 Iteration: 1967 Loss: 1.450 Validation Loss: 1.261 Accuracy: 0.194 Validation Accuracy: 0.179:   5%|▌         | 1967/37094 [03:21<56:49, 10.30it/s]

Epoch: 0 Iteration: 1968 Loss: 1.446 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1967/37094 [03:21<56:49, 10.30it/s]

Epoch: 0 Iteration: 1968 Loss: 1.446 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1969/37094 [03:21<56:48, 10.30it/s]

Epoch: 0 Iteration: 1969 Loss: 1.461 Validation Loss: 1.261 Accuracy: 0.194 Validation Accuracy: 0.179:   5%|▌         | 1969/37094 [03:21<56:48, 10.30it/s]

Epoch: 0 Iteration: 1970 Loss: 1.470 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1969/37094 [03:22<56:48, 10.30it/s]

Epoch: 0 Iteration: 1970 Loss: 1.470 Validation Loss: 1.261 Accuracy: 0.200 Validation Accuracy: 0.179:   5%|▌         | 1971/37094 [03:22<56:48, 10.31it/s]

Epoch: 0 Iteration: 1971 Loss: 1.483 Validation Loss: 1.261 Accuracy: 0.194 Validation Accuracy: 0.179:   5%|▌         | 1971/37094 [03:22<56:48, 10.31it/s]

Epoch: 0 Iteration: 1972 Loss: 1.455 Validation Loss: 1.261 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▌         | 1971/37094 [03:22<56:48, 10.31it/s]

Epoch: 0 Iteration: 1972 Loss: 1.455 Validation Loss: 1.261 Accuracy: 0.197 Validation Accuracy: 0.179:   5%|▌         | 1973/37094 [03:22<56:46, 10.31it/s]

Epoch: 0 Iteration: 1973 Loss: 1.465 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1973/37094 [03:22<56:46, 10.31it/s]

Epoch: 0 Iteration: 1974 Loss: 1.462 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1973/37094 [03:22<56:46, 10.31it/s]

Epoch: 0 Iteration: 1974 Loss: 1.462 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1975/37094 [03:22<56:46, 10.31it/s]

Epoch: 0 Iteration: 1975 Loss: 1.481 Validation Loss: 1.261 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1975/37094 [03:22<56:46, 10.31it/s]

Epoch: 0 Iteration: 1976 Loss: 1.477 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1975/37094 [03:22<56:46, 10.31it/s]

Epoch: 0 Iteration: 1976 Loss: 1.477 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1977/37094 [03:22<56:45, 10.31it/s]

Epoch: 0 Iteration: 1977 Loss: 1.453 Validation Loss: 1.261 Accuracy: 0.228 Validation Accuracy: 0.179:   5%|▌         | 1977/37094 [03:22<56:45, 10.31it/s]

Epoch: 0 Iteration: 1978 Loss: 1.435 Validation Loss: 1.261 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▌         | 1977/37094 [03:22<56:45, 10.31it/s]

Epoch: 0 Iteration: 1978 Loss: 1.435 Validation Loss: 1.261 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▌         | 1979/37094 [03:22<56:48, 10.30it/s]

Epoch: 0 Iteration: 1979 Loss: 1.398 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1979/37094 [03:22<56:48, 10.30it/s]

Epoch: 0 Iteration: 1980 Loss: 1.372 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1979/37094 [03:22<56:48, 10.30it/s]

Epoch: 0 Iteration: 1980 Loss: 1.372 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1981/37094 [03:22<56:47, 10.31it/s]

Epoch: 0 Iteration: 1981 Loss: 1.294 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1981/37094 [03:23<56:47, 10.31it/s]

Epoch: 0 Iteration: 1982 Loss: 1.289 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1981/37094 [03:23<56:47, 10.31it/s]

Epoch: 0 Iteration: 1982 Loss: 1.289 Validation Loss: 1.261 Accuracy: 0.219 Validation Accuracy: 0.179:   5%|▌         | 1983/37094 [03:23<56:46, 10.31it/s]

Epoch: 0 Iteration: 1983 Loss: 1.309 Validation Loss: 1.261 Accuracy: 0.216 Validation Accuracy: 0.179:   5%|▌         | 1983/37094 [03:23<56:46, 10.31it/s]

Epoch: 0 Iteration: 1984 Loss: 1.283 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1983/37094 [03:23<56:46, 10.31it/s]

Epoch: 0 Iteration: 1984 Loss: 1.283 Validation Loss: 1.261 Accuracy: 0.203 Validation Accuracy: 0.179:   5%|▌         | 1985/37094 [03:23<56:46, 10.31it/s]

Epoch: 0 Iteration: 1985 Loss: 1.269 Validation Loss: 1.261 Accuracy: 0.206 Validation Accuracy: 0.179:   5%|▌         | 1985/37094 [03:23<56:46, 10.31it/s]

Epoch: 0 Iteration: 1986 Loss: 1.296 Validation Loss: 1.261 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1985/37094 [03:23<56:46, 10.31it/s]

Epoch: 0 Iteration: 1986 Loss: 1.296 Validation Loss: 1.261 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1987/37094 [03:23<56:44, 10.31it/s]

Epoch: 0 Iteration: 1987 Loss: 1.257 Validation Loss: 1.261 Accuracy: 0.209 Validation Accuracy: 0.179:   5%|▌         | 1987/37094 [03:23<56:44, 10.31it/s]

Epoch: 0 Iteration: 1988 Loss: 1.230 Validation Loss: 1.261 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1987/37094 [03:23<56:44, 10.31it/s]

Epoch: 0 Iteration: 1988 Loss: 1.230 Validation Loss: 1.261 Accuracy: 0.212 Validation Accuracy: 0.179:   5%|▌         | 1989/37094 [03:23<56:45, 10.31it/s]

Epoch: 0 Iteration: 1989 Loss: 1.186 Validation Loss: 1.261 Accuracy: 0.222 Validation Accuracy: 0.179:   5%|▌         | 1989/37094 [03:23<56:45, 10.31it/s]

Epoch: 0 Iteration: 1990 Loss: 1.177 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1989/37094 [03:23<56:45, 10.31it/s]

Epoch: 0 Iteration: 1990 Loss: 1.177 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1991/37094 [03:23<56:46, 10.30it/s]

Epoch: 0 Iteration: 1991 Loss: 1.116 Validation Loss: 1.261 Accuracy: 0.234 Validation Accuracy: 0.179:   5%|▌         | 1991/37094 [03:24<56:46, 10.30it/s]

Epoch: 0 Iteration: 1992 Loss: 1.138 Validation Loss: 1.261 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▌         | 1991/37094 [03:24<56:46, 10.30it/s]

Epoch: 0 Iteration: 1992 Loss: 1.138 Validation Loss: 1.261 Accuracy: 0.250 Validation Accuracy: 0.179:   5%|▌         | 1993/37094 [03:24<56:45, 10.31it/s]

Epoch: 0 Iteration: 1993 Loss: 1.185 Validation Loss: 1.261 Accuracy: 0.247 Validation Accuracy: 0.179:   5%|▌         | 1993/37094 [03:24<56:45, 10.31it/s]

Epoch: 0 Iteration: 1994 Loss: 1.192 Validation Loss: 1.261 Accuracy: 0.244 Validation Accuracy: 0.179:   5%|▌         | 1993/37094 [03:24<56:45, 10.31it/s]

Epoch: 0 Iteration: 1994 Loss: 1.192 Validation Loss: 1.261 Accuracy: 0.244 Validation Accuracy: 0.179:   5%|▌         | 1995/37094 [03:24<56:44, 10.31it/s]

Epoch: 0 Iteration: 1995 Loss: 1.215 Validation Loss: 1.261 Accuracy: 0.241 Validation Accuracy: 0.179:   5%|▌         | 1995/37094 [03:24<56:44, 10.31it/s]

Epoch: 0 Iteration: 1996 Loss: 1.229 Validation Loss: 1.261 Accuracy: 0.237 Validation Accuracy: 0.179:   5%|▌         | 1995/37094 [03:24<56:44, 10.31it/s]

Epoch: 0 Iteration: 1996 Loss: 1.229 Validation Loss: 1.261 Accuracy: 0.237 Validation Accuracy: 0.179:   5%|▌         | 1997/37094 [03:24<56:43, 10.31it/s]

Epoch: 0 Iteration: 1997 Loss: 1.249 Validation Loss: 1.261 Accuracy: 0.231 Validation Accuracy: 0.179:   5%|▌         | 1997/37094 [03:24<56:43, 10.31it/s]

Epoch: 0 Iteration: 1998 Loss: 1.240 Validation Loss: 1.261 Accuracy: 0.231 Validation Accuracy: 0.179:   5%|▌         | 1997/37094 [03:24<56:43, 10.31it/s]

Epoch: 0 Iteration: 1998 Loss: 1.240 Validation Loss: 1.261 Accuracy: 0.231 Validation Accuracy: 0.179:   5%|▌         | 1999/37094 [03:24<56:43, 10.31it/s]

Epoch: 0 Iteration: 1999 Loss: 1.243 Validation Loss: 1.261 Accuracy: 0.206 Validation Accuracy: 0.179:   5%|▌         | 1999/37094 [03:24<56:43, 10.31it/s]

Epoch: 0 Iteration: 2000 Loss: 1.276 Validation Loss: 1.228 Accuracy: 0.188 Validation Accuracy: 0.184:   5%|▌         | 1999/37094 [03:25<56:43, 10.31it/s]

Epoch: 0 Iteration: 2000 Loss: 1.276 Validation Loss: 1.228 Accuracy: 0.188 Validation Accuracy: 0.184:   5%|▌         | 2001/37094 [03:25<1:44:36,  5.59it/s]

Epoch: 0 Iteration: 2001 Loss: 1.304 Validation Loss: 1.228 Accuracy: 0.181 Validation Accuracy: 0.184:   5%|▌         | 2001/37094 [03:25<1:44:36,  5.59it/s]

Epoch: 0 Iteration: 2002 Loss: 1.352 Validation Loss: 1.228 Accuracy: 0.166 Validation Accuracy: 0.184:   5%|▌         | 2001/37094 [03:25<1:44:36,  5.59it/s]

Epoch: 0 Iteration: 2002 Loss: 1.352 Validation Loss: 1.228 Accuracy: 0.166 Validation Accuracy: 0.184:   5%|▌         | 2003/37094 [03:25<1:29:29,  6.53it/s]

Epoch: 0 Iteration: 2003 Loss: 1.369 Validation Loss: 1.228 Accuracy: 0.166 Validation Accuracy: 0.184:   5%|▌         | 2003/37094 [03:25<1:29:29,  6.53it/s]

Epoch: 0 Iteration: 2004 Loss: 1.414 Validation Loss: 1.228 Accuracy: 0.166 Validation Accuracy: 0.184:   5%|▌         | 2003/37094 [03:25<1:29:29,  6.53it/s]

Epoch: 0 Iteration: 2004 Loss: 1.414 Validation Loss: 1.228 Accuracy: 0.166 Validation Accuracy: 0.184:   5%|▌         | 2005/37094 [03:25<1:19:41,  7.34it/s]

Epoch: 0 Iteration: 2005 Loss: 1.440 Validation Loss: 1.228 Accuracy: 0.181 Validation Accuracy: 0.184:   5%|▌         | 2005/37094 [03:25<1:19:41,  7.34it/s]

Epoch: 0 Iteration: 2006 Loss: 1.380 Validation Loss: 1.228 Accuracy: 0.197 Validation Accuracy: 0.184:   5%|▌         | 2005/37094 [03:26<1:19:41,  7.34it/s]

Epoch: 0 Iteration: 2006 Loss: 1.380 Validation Loss: 1.228 Accuracy: 0.197 Validation Accuracy: 0.184:   5%|▌         | 2007/37094 [03:26<1:12:47,  8.03it/s]

Epoch: 0 Iteration: 2007 Loss: 1.378 Validation Loss: 1.228 Accuracy: 0.200 Validation Accuracy: 0.184:   5%|▌         | 2007/37094 [03:26<1:12:47,  8.03it/s]

Epoch: 0 Iteration: 2008 Loss: 1.395 Validation Loss: 1.228 Accuracy: 0.197 Validation Accuracy: 0.184:   5%|▌         | 2007/37094 [03:26<1:12:47,  8.03it/s]

Epoch: 0 Iteration: 2008 Loss: 1.395 Validation Loss: 1.228 Accuracy: 0.197 Validation Accuracy: 0.184:   5%|▌         | 2009/37094 [03:26<1:08:03,  8.59it/s]

Epoch: 0 Iteration: 2009 Loss: 1.418 Validation Loss: 1.228 Accuracy: 0.212 Validation Accuracy: 0.184:   5%|▌         | 2009/37094 [03:26<1:08:03,  8.59it/s]

Epoch: 0 Iteration: 2010 Loss: 1.400 Validation Loss: 1.228 Accuracy: 0.225 Validation Accuracy: 0.184:   5%|▌         | 2009/37094 [03:26<1:08:03,  8.59it/s]

Epoch: 0 Iteration: 2010 Loss: 1.400 Validation Loss: 1.228 Accuracy: 0.225 Validation Accuracy: 0.184:   5%|▌         | 2011/37094 [03:26<1:04:38,  9.04it/s]

Epoch: 0 Iteration: 2011 Loss: 1.434 Validation Loss: 1.228 Accuracy: 0.225 Validation Accuracy: 0.184:   5%|▌         | 2011/37094 [03:26<1:04:38,  9.04it/s]

Epoch: 0 Iteration: 2012 Loss: 1.473 Validation Loss: 1.228 Accuracy: 0.212 Validation Accuracy: 0.184:   5%|▌         | 2011/37094 [03:26<1:04:38,  9.04it/s]

Epoch: 0 Iteration: 2012 Loss: 1.473 Validation Loss: 1.228 Accuracy: 0.212 Validation Accuracy: 0.184:   5%|▌         | 2013/37094 [03:26<1:02:15,  9.39it/s]

Epoch: 0 Iteration: 2013 Loss: 1.421 Validation Loss: 1.228 Accuracy: 0.219 Validation Accuracy: 0.184:   5%|▌         | 2013/37094 [03:26<1:02:15,  9.39it/s]

Epoch: 0 Iteration: 2014 Loss: 1.441 Validation Loss: 1.228 Accuracy: 0.228 Validation Accuracy: 0.184:   5%|▌         | 2013/37094 [03:26<1:02:15,  9.39it/s]

Epoch: 0 Iteration: 2014 Loss: 1.441 Validation Loss: 1.228 Accuracy: 0.228 Validation Accuracy: 0.184:   5%|▌         | 2015/37094 [03:26<1:00:38,  9.64it/s]

Epoch: 0 Iteration: 2015 Loss: 1.414 Validation Loss: 1.228 Accuracy: 0.222 Validation Accuracy: 0.184:   5%|▌         | 2015/37094 [03:26<1:00:38,  9.64it/s]

Epoch: 0 Iteration: 2016 Loss: 1.450 Validation Loss: 1.228 Accuracy: 0.209 Validation Accuracy: 0.184:   5%|▌         | 2015/37094 [03:27<1:00:38,  9.64it/s]

Epoch: 0 Iteration: 2016 Loss: 1.450 Validation Loss: 1.228 Accuracy: 0.209 Validation Accuracy: 0.184:   5%|▌         | 2017/37094 [03:27<59:30,  9.82it/s]  

Epoch: 0 Iteration: 2017 Loss: 1.452 Validation Loss: 1.228 Accuracy: 0.209 Validation Accuracy: 0.184:   5%|▌         | 2017/37094 [03:27<59:30,  9.82it/s]

Epoch: 0 Iteration: 2018 Loss: 1.473 Validation Loss: 1.228 Accuracy: 0.212 Validation Accuracy: 0.184:   5%|▌         | 2017/37094 [03:27<59:30,  9.82it/s]

Epoch: 0 Iteration: 2018 Loss: 1.473 Validation Loss: 1.228 Accuracy: 0.212 Validation Accuracy: 0.184:   5%|▌         | 2019/37094 [03:27<58:40,  9.96it/s]

Epoch: 0 Iteration: 2019 Loss: 1.471 Validation Loss: 1.228 Accuracy: 0.216 Validation Accuracy: 0.184:   5%|▌         | 2019/37094 [03:27<58:40,  9.96it/s]

Epoch: 0 Iteration: 2020 Loss: 1.451 Validation Loss: 1.228 Accuracy: 0.222 Validation Accuracy: 0.184:   5%|▌         | 2019/37094 [03:27<58:40,  9.96it/s]

Epoch: 0 Iteration: 2020 Loss: 1.451 Validation Loss: 1.228 Accuracy: 0.222 Validation Accuracy: 0.184:   5%|▌         | 2021/37094 [03:27<58:03, 10.07it/s]

Epoch: 0 Iteration: 2021 Loss: 1.423 Validation Loss: 1.228 Accuracy: 0.225 Validation Accuracy: 0.184:   5%|▌         | 2021/37094 [03:27<58:03, 10.07it/s]

Epoch: 0 Iteration: 2022 Loss: 1.343 Validation Loss: 1.228 Accuracy: 0.253 Validation Accuracy: 0.184:   5%|▌         | 2021/37094 [03:27<58:03, 10.07it/s]

Epoch: 0 Iteration: 2022 Loss: 1.343 Validation Loss: 1.228 Accuracy: 0.253 Validation Accuracy: 0.184:   5%|▌         | 2023/37094 [03:27<57:40, 10.13it/s]

Epoch: 0 Iteration: 2023 Loss: 1.324 Validation Loss: 1.228 Accuracy: 0.256 Validation Accuracy: 0.184:   5%|▌         | 2023/37094 [03:27<57:40, 10.13it/s]

Epoch: 0 Iteration: 2024 Loss: 1.271 Validation Loss: 1.228 Accuracy: 0.250 Validation Accuracy: 0.184:   5%|▌         | 2023/37094 [03:27<57:40, 10.13it/s]

Epoch: 0 Iteration: 2024 Loss: 1.271 Validation Loss: 1.228 Accuracy: 0.250 Validation Accuracy: 0.184:   5%|▌         | 2025/37094 [03:27<57:23, 10.18it/s]

Epoch: 0 Iteration: 2025 Loss: 1.280 Validation Loss: 1.228 Accuracy: 0.253 Validation Accuracy: 0.184:   5%|▌         | 2025/37094 [03:27<57:23, 10.18it/s]

Epoch: 0 Iteration: 2026 Loss: 1.284 Validation Loss: 1.228 Accuracy: 0.259 Validation Accuracy: 0.184:   5%|▌         | 2025/37094 [03:27<57:23, 10.18it/s]

Epoch: 0 Iteration: 2026 Loss: 1.284 Validation Loss: 1.228 Accuracy: 0.259 Validation Accuracy: 0.184:   5%|▌         | 2027/37094 [03:27<57:10, 10.22it/s]

Epoch: 0 Iteration: 2027 Loss: 1.300 Validation Loss: 1.228 Accuracy: 0.250 Validation Accuracy: 0.184:   5%|▌         | 2027/37094 [03:28<57:10, 10.22it/s]

Epoch: 0 Iteration: 2028 Loss: 1.283 Validation Loss: 1.228 Accuracy: 0.256 Validation Accuracy: 0.184:   5%|▌         | 2027/37094 [03:28<57:10, 10.22it/s]

Epoch: 0 Iteration: 2028 Loss: 1.283 Validation Loss: 1.228 Accuracy: 0.256 Validation Accuracy: 0.184:   5%|▌         | 2029/37094 [03:28<57:01, 10.25it/s]

Epoch: 0 Iteration: 2029 Loss: 1.291 Validation Loss: 1.228 Accuracy: 0.259 Validation Accuracy: 0.184:   5%|▌         | 2029/37094 [03:28<57:01, 10.25it/s]

Epoch: 0 Iteration: 2030 Loss: 1.334 Validation Loss: 1.228 Accuracy: 0.250 Validation Accuracy: 0.184:   5%|▌         | 2029/37094 [03:28<57:01, 10.25it/s]

Epoch: 0 Iteration: 2030 Loss: 1.334 Validation Loss: 1.228 Accuracy: 0.250 Validation Accuracy: 0.184:   5%|▌         | 2031/37094 [03:28<56:54, 10.27it/s]

Epoch: 0 Iteration: 2031 Loss: 1.334 Validation Loss: 1.228 Accuracy: 0.263 Validation Accuracy: 0.184:   5%|▌         | 2031/37094 [03:28<56:54, 10.27it/s]

Epoch: 0 Iteration: 2032 Loss: 1.289 Validation Loss: 1.228 Accuracy: 0.250 Validation Accuracy: 0.184:   5%|▌         | 2031/37094 [03:28<56:54, 10.27it/s]

Epoch: 0 Iteration: 2032 Loss: 1.289 Validation Loss: 1.228 Accuracy: 0.250 Validation Accuracy: 0.184:   5%|▌         | 2033/37094 [03:28<56:50, 10.28it/s]

Epoch: 0 Iteration: 2033 Loss: 1.276 Validation Loss: 1.228 Accuracy: 0.256 Validation Accuracy: 0.184:   5%|▌         | 2033/37094 [03:28<56:50, 10.28it/s]

Epoch: 0 Iteration: 2034 Loss: 1.240 Validation Loss: 1.228 Accuracy: 0.263 Validation Accuracy: 0.184:   5%|▌         | 2033/37094 [03:28<56:50, 10.28it/s]

Epoch: 0 Iteration: 2034 Loss: 1.240 Validation Loss: 1.228 Accuracy: 0.263 Validation Accuracy: 0.184:   5%|▌         | 2035/37094 [03:28<56:47, 10.29it/s]

Epoch: 0 Iteration: 2035 Loss: 1.227 Validation Loss: 1.228 Accuracy: 0.263 Validation Accuracy: 0.184:   5%|▌         | 2035/37094 [03:28<56:47, 10.29it/s]

Epoch: 0 Iteration: 2036 Loss: 1.175 Validation Loss: 1.228 Accuracy: 0.259 Validation Accuracy: 0.184:   5%|▌         | 2035/37094 [03:28<56:47, 10.29it/s]

Epoch: 0 Iteration: 2036 Loss: 1.175 Validation Loss: 1.228 Accuracy: 0.259 Validation Accuracy: 0.184:   5%|▌         | 2037/37094 [03:28<56:44, 10.30it/s]

Epoch: 0 Iteration: 2037 Loss: 1.224 Validation Loss: 1.228 Accuracy: 0.266 Validation Accuracy: 0.184:   5%|▌         | 2037/37094 [03:29<56:44, 10.30it/s]

Epoch: 0 Iteration: 2038 Loss: 1.206 Validation Loss: 1.228 Accuracy: 0.256 Validation Accuracy: 0.184:   5%|▌         | 2037/37094 [03:29<56:44, 10.30it/s]

Epoch: 0 Iteration: 2038 Loss: 1.206 Validation Loss: 1.228 Accuracy: 0.256 Validation Accuracy: 0.184:   5%|▌         | 2039/37094 [03:29<56:43, 10.30it/s]

Epoch: 0 Iteration: 2039 Loss: 1.231 Validation Loss: 1.228 Accuracy: 0.259 Validation Accuracy: 0.184:   5%|▌         | 2039/37094 [03:29<56:43, 10.30it/s]

Epoch: 0 Iteration: 2040 Loss: 1.181 Validation Loss: 1.228 Accuracy: 0.269 Validation Accuracy: 0.184:   5%|▌         | 2039/37094 [03:29<56:43, 10.30it/s]

Epoch: 0 Iteration: 2040 Loss: 1.181 Validation Loss: 1.228 Accuracy: 0.269 Validation Accuracy: 0.184:   6%|▌         | 2041/37094 [03:29<56:43, 10.30it/s]

Epoch: 0 Iteration: 2041 Loss: 1.163 Validation Loss: 1.228 Accuracy: 0.272 Validation Accuracy: 0.184:   6%|▌         | 2041/37094 [03:29<56:43, 10.30it/s]

Epoch: 0 Iteration: 2042 Loss: 1.185 Validation Loss: 1.228 Accuracy: 0.284 Validation Accuracy: 0.184:   6%|▌         | 2041/37094 [03:29<56:43, 10.30it/s]

Epoch: 0 Iteration: 2042 Loss: 1.185 Validation Loss: 1.228 Accuracy: 0.284 Validation Accuracy: 0.184:   6%|▌         | 2043/37094 [03:29<56:41, 10.30it/s]

Epoch: 0 Iteration: 2043 Loss: 1.201 Validation Loss: 1.228 Accuracy: 0.272 Validation Accuracy: 0.184:   6%|▌         | 2043/37094 [03:29<56:41, 10.30it/s]

Epoch: 0 Iteration: 2044 Loss: 1.253 Validation Loss: 1.228 Accuracy: 0.269 Validation Accuracy: 0.184:   6%|▌         | 2043/37094 [03:29<56:41, 10.30it/s]

Epoch: 0 Iteration: 2044 Loss: 1.253 Validation Loss: 1.228 Accuracy: 0.269 Validation Accuracy: 0.184:   6%|▌         | 2045/37094 [03:29<56:40, 10.31it/s]

Epoch: 0 Iteration: 2045 Loss: 1.270 Validation Loss: 1.228 Accuracy: 0.272 Validation Accuracy: 0.184:   6%|▌         | 2045/37094 [03:29<56:40, 10.31it/s]

Epoch: 0 Iteration: 2046 Loss: 1.315 Validation Loss: 1.228 Accuracy: 0.269 Validation Accuracy: 0.184:   6%|▌         | 2045/37094 [03:29<56:40, 10.31it/s]

Epoch: 0 Iteration: 2046 Loss: 1.315 Validation Loss: 1.228 Accuracy: 0.269 Validation Accuracy: 0.184:   6%|▌         | 2047/37094 [03:29<56:42, 10.30it/s]

Epoch: 0 Iteration: 2047 Loss: 1.333 Validation Loss: 1.228 Accuracy: 0.287 Validation Accuracy: 0.184:   6%|▌         | 2047/37094 [03:30<56:42, 10.30it/s]

Epoch: 0 Iteration: 2048 Loss: 1.331 Validation Loss: 1.228 Accuracy: 0.297 Validation Accuracy: 0.184:   6%|▌         | 2047/37094 [03:30<56:42, 10.30it/s]

Epoch: 0 Iteration: 2048 Loss: 1.331 Validation Loss: 1.228 Accuracy: 0.297 Validation Accuracy: 0.184:   6%|▌         | 2049/37094 [03:30<56:40, 10.31it/s]

Epoch: 0 Iteration: 2049 Loss: 1.341 Validation Loss: 1.228 Accuracy: 0.294 Validation Accuracy: 0.184:   6%|▌         | 2049/37094 [03:30<56:40, 10.31it/s]

Epoch: 0 Iteration: 2050 Loss: 1.375 Validation Loss: 1.228 Accuracy: 0.297 Validation Accuracy: 0.184:   6%|▌         | 2049/37094 [03:30<56:40, 10.31it/s]

Epoch: 0 Iteration: 2050 Loss: 1.375 Validation Loss: 1.228 Accuracy: 0.297 Validation Accuracy: 0.184:   6%|▌         | 2051/37094 [03:30<56:39, 10.31it/s]

Epoch: 0 Iteration: 2051 Loss: 1.346 Validation Loss: 1.228 Accuracy: 0.287 Validation Accuracy: 0.184:   6%|▌         | 2051/37094 [03:30<56:39, 10.31it/s]

Epoch: 0 Iteration: 2052 Loss: 1.335 Validation Loss: 1.228 Accuracy: 0.266 Validation Accuracy: 0.184:   6%|▌         | 2051/37094 [03:30<56:39, 10.31it/s]

Epoch: 0 Iteration: 2052 Loss: 1.335 Validation Loss: 1.228 Accuracy: 0.266 Validation Accuracy: 0.184:   6%|▌         | 2053/37094 [03:30<56:39, 10.31it/s]

Epoch: 0 Iteration: 2053 Loss: 1.356 Validation Loss: 1.228 Accuracy: 0.281 Validation Accuracy: 0.184:   6%|▌         | 2053/37094 [03:30<56:39, 10.31it/s]

Epoch: 0 Iteration: 2054 Loss: 1.357 Validation Loss: 1.228 Accuracy: 0.284 Validation Accuracy: 0.184:   6%|▌         | 2053/37094 [03:30<56:39, 10.31it/s]

Epoch: 0 Iteration: 2054 Loss: 1.357 Validation Loss: 1.228 Accuracy: 0.284 Validation Accuracy: 0.184:   6%|▌         | 2055/37094 [03:30<56:39, 10.31it/s]

Epoch: 0 Iteration: 2055 Loss: 1.390 Validation Loss: 1.228 Accuracy: 0.275 Validation Accuracy: 0.184:   6%|▌         | 2055/37094 [03:30<56:39, 10.31it/s]

Epoch: 0 Iteration: 2056 Loss: 1.384 Validation Loss: 1.228 Accuracy: 0.278 Validation Accuracy: 0.184:   6%|▌         | 2055/37094 [03:30<56:39, 10.31it/s]

Epoch: 0 Iteration: 2056 Loss: 1.384 Validation Loss: 1.228 Accuracy: 0.278 Validation Accuracy: 0.184:   6%|▌         | 2057/37094 [03:30<56:40, 10.30it/s]

Epoch: 0 Iteration: 2057 Loss: 1.317 Validation Loss: 1.228 Accuracy: 0.272 Validation Accuracy: 0.184:   6%|▌         | 2057/37094 [03:30<56:40, 10.30it/s]

Epoch: 0 Iteration: 2058 Loss: 1.323 Validation Loss: 1.228 Accuracy: 0.259 Validation Accuracy: 0.184:   6%|▌         | 2057/37094 [03:31<56:40, 10.30it/s]

Epoch: 0 Iteration: 2058 Loss: 1.323 Validation Loss: 1.228 Accuracy: 0.259 Validation Accuracy: 0.184:   6%|▌         | 2059/37094 [03:31<56:39, 10.31it/s]

Epoch: 0 Iteration: 2059 Loss: 1.291 Validation Loss: 1.228 Accuracy: 0.244 Validation Accuracy: 0.184:   6%|▌         | 2059/37094 [03:31<56:39, 10.31it/s]

Epoch: 0 Iteration: 2060 Loss: 1.329 Validation Loss: 1.228 Accuracy: 0.237 Validation Accuracy: 0.184:   6%|▌         | 2059/37094 [03:31<56:39, 10.31it/s]

Epoch: 0 Iteration: 2060 Loss: 1.329 Validation Loss: 1.228 Accuracy: 0.237 Validation Accuracy: 0.184:   6%|▌         | 2061/37094 [03:31<56:41, 10.30it/s]

Epoch: 0 Iteration: 2061 Loss: 1.363 Validation Loss: 1.228 Accuracy: 0.219 Validation Accuracy: 0.184:   6%|▌         | 2061/37094 [03:31<56:41, 10.30it/s]

Epoch: 0 Iteration: 2062 Loss: 1.393 Validation Loss: 1.228 Accuracy: 0.225 Validation Accuracy: 0.184:   6%|▌         | 2061/37094 [03:31<56:41, 10.30it/s]

Epoch: 0 Iteration: 2062 Loss: 1.393 Validation Loss: 1.228 Accuracy: 0.225 Validation Accuracy: 0.184:   6%|▌         | 2063/37094 [03:31<56:39, 10.30it/s]

Epoch: 0 Iteration: 2063 Loss: 1.404 Validation Loss: 1.228 Accuracy: 0.216 Validation Accuracy: 0.184:   6%|▌         | 2063/37094 [03:31<56:39, 10.30it/s]

Epoch: 0 Iteration: 2064 Loss: 1.415 Validation Loss: 1.228 Accuracy: 0.216 Validation Accuracy: 0.184:   6%|▌         | 2063/37094 [03:31<56:39, 10.30it/s]

Epoch: 0 Iteration: 2064 Loss: 1.415 Validation Loss: 1.228 Accuracy: 0.216 Validation Accuracy: 0.184:   6%|▌         | 2065/37094 [03:31<56:38, 10.31it/s]

Epoch: 0 Iteration: 2065 Loss: 1.378 Validation Loss: 1.228 Accuracy: 0.228 Validation Accuracy: 0.184:   6%|▌         | 2065/37094 [03:31<56:38, 10.31it/s]

Epoch: 0 Iteration: 2066 Loss: 1.368 Validation Loss: 1.228 Accuracy: 0.222 Validation Accuracy: 0.184:   6%|▌         | 2065/37094 [03:31<56:38, 10.31it/s]

Epoch: 0 Iteration: 2066 Loss: 1.368 Validation Loss: 1.228 Accuracy: 0.222 Validation Accuracy: 0.184:   6%|▌         | 2067/37094 [03:31<56:40, 10.30it/s]

Epoch: 0 Iteration: 2067 Loss: 1.324 Validation Loss: 1.228 Accuracy: 0.212 Validation Accuracy: 0.184:   6%|▌         | 2067/37094 [03:31<56:40, 10.30it/s]

Epoch: 0 Iteration: 2068 Loss: 1.309 Validation Loss: 1.228 Accuracy: 0.219 Validation Accuracy: 0.184:   6%|▌         | 2067/37094 [03:32<56:40, 10.30it/s]

Epoch: 0 Iteration: 2068 Loss: 1.309 Validation Loss: 1.228 Accuracy: 0.219 Validation Accuracy: 0.184:   6%|▌         | 2069/37094 [03:32<56:40, 10.30it/s]

Epoch: 0 Iteration: 2069 Loss: 1.283 Validation Loss: 1.228 Accuracy: 0.228 Validation Accuracy: 0.184:   6%|▌         | 2069/37094 [03:32<56:40, 10.30it/s]

Epoch: 0 Iteration: 2070 Loss: 1.213 Validation Loss: 1.228 Accuracy: 0.225 Validation Accuracy: 0.184:   6%|▌         | 2069/37094 [03:32<56:40, 10.30it/s]

Epoch: 0 Iteration: 2070 Loss: 1.213 Validation Loss: 1.228 Accuracy: 0.225 Validation Accuracy: 0.184:   6%|▌         | 2071/37094 [03:32<56:39, 10.30it/s]

Epoch: 0 Iteration: 2071 Loss: 1.267 Validation Loss: 1.228 Accuracy: 0.228 Validation Accuracy: 0.184:   6%|▌         | 2071/37094 [03:32<56:39, 10.30it/s]

Epoch: 0 Iteration: 2072 Loss: 1.315 Validation Loss: 1.228 Accuracy: 0.228 Validation Accuracy: 0.184:   6%|▌         | 2071/37094 [03:32<56:39, 10.30it/s]

Epoch: 0 Iteration: 2072 Loss: 1.315 Validation Loss: 1.228 Accuracy: 0.228 Validation Accuracy: 0.184:   6%|▌         | 2073/37094 [03:32<56:38, 10.31it/s]

Epoch: 0 Iteration: 2073 Loss: 1.306 Validation Loss: 1.228 Accuracy: 0.247 Validation Accuracy: 0.184:   6%|▌         | 2073/37094 [03:32<56:38, 10.31it/s]

Epoch: 0 Iteration: 2074 Loss: 1.325 Validation Loss: 1.228 Accuracy: 0.247 Validation Accuracy: 0.184:   6%|▌         | 2073/37094 [03:32<56:38, 10.31it/s]

Epoch: 0 Iteration: 2074 Loss: 1.325 Validation Loss: 1.228 Accuracy: 0.247 Validation Accuracy: 0.184:   6%|▌         | 2075/37094 [03:32<56:38, 10.31it/s]

Epoch: 0 Iteration: 2075 Loss: 1.291 Validation Loss: 1.228 Accuracy: 0.237 Validation Accuracy: 0.184:   6%|▌         | 2075/37094 [03:32<56:38, 10.31it/s]

Epoch: 0 Iteration: 2076 Loss: 1.303 Validation Loss: 1.228 Accuracy: 0.241 Validation Accuracy: 0.184:   6%|▌         | 2075/37094 [03:32<56:38, 10.31it/s]

Epoch: 0 Iteration: 2076 Loss: 1.303 Validation Loss: 1.228 Accuracy: 0.241 Validation Accuracy: 0.184:   6%|▌         | 2077/37094 [03:32<56:39, 10.30it/s]

Epoch: 0 Iteration: 2077 Loss: 1.313 Validation Loss: 1.228 Accuracy: 0.244 Validation Accuracy: 0.184:   6%|▌         | 2077/37094 [03:32<56:39, 10.30it/s]

Epoch: 0 Iteration: 2078 Loss: 1.290 Validation Loss: 1.228 Accuracy: 0.253 Validation Accuracy: 0.184:   6%|▌         | 2077/37094 [03:33<56:39, 10.30it/s]

Epoch: 0 Iteration: 2078 Loss: 1.290 Validation Loss: 1.228 Accuracy: 0.253 Validation Accuracy: 0.184:   6%|▌         | 2079/37094 [03:33<56:37, 10.31it/s]

Epoch: 0 Iteration: 2079 Loss: 1.346 Validation Loss: 1.228 Accuracy: 0.241 Validation Accuracy: 0.184:   6%|▌         | 2079/37094 [03:33<56:37, 10.31it/s]

Epoch: 0 Iteration: 2080 Loss: 1.327 Validation Loss: 1.228 Accuracy: 0.234 Validation Accuracy: 0.184:   6%|▌         | 2079/37094 [03:33<56:37, 10.31it/s]

Epoch: 0 Iteration: 2080 Loss: 1.327 Validation Loss: 1.228 Accuracy: 0.234 Validation Accuracy: 0.184:   6%|▌         | 2081/37094 [03:33<56:36, 10.31it/s]

Epoch: 0 Iteration: 2081 Loss: 1.329 Validation Loss: 1.228 Accuracy: 0.241 Validation Accuracy: 0.184:   6%|▌         | 2081/37094 [03:33<56:36, 10.31it/s]

Epoch: 0 Iteration: 2082 Loss: 1.299 Validation Loss: 1.228 Accuracy: 0.259 Validation Accuracy: 0.184:   6%|▌         | 2081/37094 [03:33<56:36, 10.31it/s]

Epoch: 0 Iteration: 2082 Loss: 1.299 Validation Loss: 1.228 Accuracy: 0.259 Validation Accuracy: 0.184:   6%|▌         | 2083/37094 [03:33<56:35, 10.31it/s]

Epoch: 0 Iteration: 2083 Loss: 1.267 Validation Loss: 1.228 Accuracy: 0.247 Validation Accuracy: 0.184:   6%|▌         | 2083/37094 [03:33<56:35, 10.31it/s]

Epoch: 0 Iteration: 2084 Loss: 1.248 Validation Loss: 1.228 Accuracy: 0.247 Validation Accuracy: 0.184:   6%|▌         | 2083/37094 [03:33<56:35, 10.31it/s]

Epoch: 0 Iteration: 2084 Loss: 1.248 Validation Loss: 1.228 Accuracy: 0.247 Validation Accuracy: 0.184:   6%|▌         | 2085/37094 [03:33<56:35, 10.31it/s]

Epoch: 0 Iteration: 2085 Loss: 1.305 Validation Loss: 1.228 Accuracy: 0.244 Validation Accuracy: 0.184:   6%|▌         | 2085/37094 [03:33<56:35, 10.31it/s]

Epoch: 0 Iteration: 2086 Loss: 1.336 Validation Loss: 1.228 Accuracy: 0.247 Validation Accuracy: 0.184:   6%|▌         | 2085/37094 [03:33<56:35, 10.31it/s]

Epoch: 0 Iteration: 2086 Loss: 1.336 Validation Loss: 1.228 Accuracy: 0.247 Validation Accuracy: 0.184:   6%|▌         | 2087/37094 [03:33<56:35, 10.31it/s]

Epoch: 0 Iteration: 2087 Loss: 1.356 Validation Loss: 1.228 Accuracy: 0.247 Validation Accuracy: 0.184:   6%|▌         | 2087/37094 [03:33<56:35, 10.31it/s]

Epoch: 0 Iteration: 2088 Loss: 1.401 Validation Loss: 1.228 Accuracy: 0.241 Validation Accuracy: 0.184:   6%|▌         | 2087/37094 [03:33<56:35, 10.31it/s]

Epoch: 0 Iteration: 2088 Loss: 1.401 Validation Loss: 1.228 Accuracy: 0.241 Validation Accuracy: 0.184:   6%|▌         | 2089/37094 [03:33<56:36, 10.31it/s]

Epoch: 0 Iteration: 2089 Loss: 1.415 Validation Loss: 1.228 Accuracy: 0.244 Validation Accuracy: 0.184:   6%|▌         | 2089/37094 [03:34<56:36, 10.31it/s]

Epoch: 0 Iteration: 2090 Loss: 1.416 Validation Loss: 1.228 Accuracy: 0.256 Validation Accuracy: 0.184:   6%|▌         | 2089/37094 [03:34<56:36, 10.31it/s]

Epoch: 0 Iteration: 2090 Loss: 1.416 Validation Loss: 1.228 Accuracy: 0.256 Validation Accuracy: 0.184:   6%|▌         | 2091/37094 [03:34<56:36, 10.31it/s]

Epoch: 0 Iteration: 2091 Loss: 1.383 Validation Loss: 1.228 Accuracy: 0.247 Validation Accuracy: 0.184:   6%|▌         | 2091/37094 [03:34<56:36, 10.31it/s]

Epoch: 0 Iteration: 2092 Loss: 1.361 Validation Loss: 1.228 Accuracy: 0.225 Validation Accuracy: 0.184:   6%|▌         | 2091/37094 [03:34<56:36, 10.31it/s]

Epoch: 0 Iteration: 2092 Loss: 1.361 Validation Loss: 1.228 Accuracy: 0.225 Validation Accuracy: 0.184:   6%|▌         | 2093/37094 [03:34<56:35, 10.31it/s]

Epoch: 0 Iteration: 2093 Loss: 1.370 Validation Loss: 1.228 Accuracy: 0.219 Validation Accuracy: 0.184:   6%|▌         | 2093/37094 [03:34<56:35, 10.31it/s]

Epoch: 0 Iteration: 2094 Loss: 1.432 Validation Loss: 1.228 Accuracy: 0.216 Validation Accuracy: 0.184:   6%|▌         | 2093/37094 [03:34<56:35, 10.31it/s]

Epoch: 0 Iteration: 2094 Loss: 1.432 Validation Loss: 1.228 Accuracy: 0.216 Validation Accuracy: 0.184:   6%|▌         | 2095/37094 [03:34<56:35, 10.31it/s]

Epoch: 0 Iteration: 2095 Loss: 1.417 Validation Loss: 1.228 Accuracy: 0.219 Validation Accuracy: 0.184:   6%|▌         | 2095/37094 [03:34<56:35, 10.31it/s]

Epoch: 0 Iteration: 2096 Loss: 1.440 Validation Loss: 1.228 Accuracy: 0.209 Validation Accuracy: 0.184:   6%|▌         | 2095/37094 [03:34<56:35, 10.31it/s]

Epoch: 0 Iteration: 2096 Loss: 1.440 Validation Loss: 1.228 Accuracy: 0.209 Validation Accuracy: 0.184:   6%|▌         | 2097/37094 [03:34<56:35, 10.31it/s]

Epoch: 0 Iteration: 2097 Loss: 1.464 Validation Loss: 1.228 Accuracy: 0.206 Validation Accuracy: 0.184:   6%|▌         | 2097/37094 [03:34<56:35, 10.31it/s]

Epoch: 0 Iteration: 2098 Loss: 1.487 Validation Loss: 1.228 Accuracy: 0.209 Validation Accuracy: 0.184:   6%|▌         | 2097/37094 [03:34<56:35, 10.31it/s]

Epoch: 0 Iteration: 2098 Loss: 1.487 Validation Loss: 1.228 Accuracy: 0.209 Validation Accuracy: 0.184:   6%|▌         | 2099/37094 [03:34<56:34, 10.31it/s]

Epoch: 0 Iteration: 2099 Loss: 1.501 Validation Loss: 1.228 Accuracy: 0.212 Validation Accuracy: 0.184:   6%|▌         | 2099/37094 [03:35<56:34, 10.31it/s]

Epoch: 0 Iteration: 2100 Loss: 1.470 Validation Loss: 1.239 Accuracy: 0.216 Validation Accuracy: 0.188:   6%|▌         | 2099/37094 [03:35<56:34, 10.31it/s]

Epoch: 0 Iteration: 2100 Loss: 1.470 Validation Loss: 1.239 Accuracy: 0.216 Validation Accuracy: 0.188:   6%|▌         | 2101/37094 [03:35<1:44:19,  5.59it/s]

Epoch: 0 Iteration: 2101 Loss: 1.472 Validation Loss: 1.239 Accuracy: 0.222 Validation Accuracy: 0.188:   6%|▌         | 2101/37094 [03:35<1:44:19,  5.59it/s]

Epoch: 0 Iteration: 2102 Loss: 1.469 Validation Loss: 1.239 Accuracy: 0.225 Validation Accuracy: 0.188:   6%|▌         | 2101/37094 [03:35<1:44:19,  5.59it/s]

Epoch: 0 Iteration: 2102 Loss: 1.469 Validation Loss: 1.239 Accuracy: 0.225 Validation Accuracy: 0.188:   6%|▌         | 2103/37094 [03:35<1:29:16,  6.53it/s]

Epoch: 0 Iteration: 2103 Loss: 1.488 Validation Loss: 1.239 Accuracy: 0.212 Validation Accuracy: 0.188:   6%|▌         | 2103/37094 [03:35<1:29:16,  6.53it/s]

Epoch: 0 Iteration: 2104 Loss: 1.458 Validation Loss: 1.239 Accuracy: 0.225 Validation Accuracy: 0.188:   6%|▌         | 2103/37094 [03:36<1:29:16,  6.53it/s]

Epoch: 0 Iteration: 2104 Loss: 1.458 Validation Loss: 1.239 Accuracy: 0.225 Validation Accuracy: 0.188:   6%|▌         | 2105/37094 [03:36<1:19:29,  7.34it/s]

Epoch: 0 Iteration: 2105 Loss: 1.443 Validation Loss: 1.239 Accuracy: 0.222 Validation Accuracy: 0.188:   6%|▌         | 2105/37094 [03:36<1:19:29,  7.34it/s]

Epoch: 0 Iteration: 2106 Loss: 1.361 Validation Loss: 1.239 Accuracy: 0.244 Validation Accuracy: 0.188:   6%|▌         | 2105/37094 [03:36<1:19:29,  7.34it/s]

Epoch: 0 Iteration: 2106 Loss: 1.361 Validation Loss: 1.239 Accuracy: 0.244 Validation Accuracy: 0.188:   6%|▌         | 2107/37094 [03:36<1:12:37,  8.03it/s]

Epoch: 0 Iteration: 2107 Loss: 1.363 Validation Loss: 1.239 Accuracy: 0.237 Validation Accuracy: 0.188:   6%|▌         | 2107/37094 [03:36<1:12:37,  8.03it/s]

Epoch: 0 Iteration: 2108 Loss: 1.331 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2107/37094 [03:36<1:12:37,  8.03it/s]

Epoch: 0 Iteration: 2108 Loss: 1.331 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2109/37094 [03:36<1:07:47,  8.60it/s]

Epoch: 0 Iteration: 2109 Loss: 1.336 Validation Loss: 1.239 Accuracy: 0.225 Validation Accuracy: 0.188:   6%|▌         | 2109/37094 [03:36<1:07:47,  8.60it/s]

Epoch: 0 Iteration: 2110 Loss: 1.365 Validation Loss: 1.239 Accuracy: 0.206 Validation Accuracy: 0.188:   6%|▌         | 2109/37094 [03:36<1:07:47,  8.60it/s]

Epoch: 0 Iteration: 2110 Loss: 1.365 Validation Loss: 1.239 Accuracy: 0.206 Validation Accuracy: 0.188:   6%|▌         | 2111/37094 [03:36<1:04:27,  9.04it/s]

Epoch: 0 Iteration: 2111 Loss: 1.387 Validation Loss: 1.239 Accuracy: 0.200 Validation Accuracy: 0.188:   6%|▌         | 2111/37094 [03:36<1:04:27,  9.04it/s]

Epoch: 0 Iteration: 2112 Loss: 1.376 Validation Loss: 1.239 Accuracy: 0.197 Validation Accuracy: 0.188:   6%|▌         | 2111/37094 [03:36<1:04:27,  9.04it/s]

Epoch: 0 Iteration: 2112 Loss: 1.376 Validation Loss: 1.239 Accuracy: 0.197 Validation Accuracy: 0.188:   6%|▌         | 2113/37094 [03:36<1:02:07,  9.39it/s]

Epoch: 0 Iteration: 2113 Loss: 1.375 Validation Loss: 1.239 Accuracy: 0.209 Validation Accuracy: 0.188:   6%|▌         | 2113/37094 [03:36<1:02:07,  9.39it/s]

Epoch: 0 Iteration: 2114 Loss: 1.285 Validation Loss: 1.239 Accuracy: 0.203 Validation Accuracy: 0.188:   6%|▌         | 2113/37094 [03:37<1:02:07,  9.39it/s]

Epoch: 0 Iteration: 2114 Loss: 1.285 Validation Loss: 1.239 Accuracy: 0.203 Validation Accuracy: 0.188:   6%|▌         | 2115/37094 [03:37<1:00:27,  9.64it/s]

Epoch: 0 Iteration: 2115 Loss: 1.360 Validation Loss: 1.239 Accuracy: 0.212 Validation Accuracy: 0.188:   6%|▌         | 2115/37094 [03:37<1:00:27,  9.64it/s]

Epoch: 0 Iteration: 2116 Loss: 1.356 Validation Loss: 1.239 Accuracy: 0.200 Validation Accuracy: 0.188:   6%|▌         | 2115/37094 [03:37<1:00:27,  9.64it/s]

Epoch: 0 Iteration: 2116 Loss: 1.356 Validation Loss: 1.239 Accuracy: 0.200 Validation Accuracy: 0.188:   6%|▌         | 2117/37094 [03:37<59:17,  9.83it/s]  

Epoch: 0 Iteration: 2117 Loss: 1.320 Validation Loss: 1.239 Accuracy: 0.216 Validation Accuracy: 0.188:   6%|▌         | 2117/37094 [03:37<59:17,  9.83it/s]

Epoch: 0 Iteration: 2118 Loss: 1.326 Validation Loss: 1.239 Accuracy: 0.216 Validation Accuracy: 0.188:   6%|▌         | 2117/37094 [03:37<59:17,  9.83it/s]

Epoch: 0 Iteration: 2118 Loss: 1.326 Validation Loss: 1.239 Accuracy: 0.216 Validation Accuracy: 0.188:   6%|▌         | 2119/37094 [03:37<58:26,  9.97it/s]

Epoch: 0 Iteration: 2119 Loss: 1.256 Validation Loss: 1.239 Accuracy: 0.228 Validation Accuracy: 0.188:   6%|▌         | 2119/37094 [03:37<58:26,  9.97it/s]

Epoch: 0 Iteration: 2120 Loss: 1.280 Validation Loss: 1.239 Accuracy: 0.247 Validation Accuracy: 0.188:   6%|▌         | 2119/37094 [03:37<58:26,  9.97it/s]

Epoch: 0 Iteration: 2120 Loss: 1.280 Validation Loss: 1.239 Accuracy: 0.247 Validation Accuracy: 0.188:   6%|▌         | 2121/37094 [03:37<57:52, 10.07it/s]

Epoch: 0 Iteration: 2121 Loss: 1.268 Validation Loss: 1.239 Accuracy: 0.247 Validation Accuracy: 0.188:   6%|▌         | 2121/37094 [03:37<57:52, 10.07it/s]

Epoch: 0 Iteration: 2122 Loss: 1.270 Validation Loss: 1.239 Accuracy: 0.256 Validation Accuracy: 0.188:   6%|▌         | 2121/37094 [03:37<57:52, 10.07it/s]

Epoch: 0 Iteration: 2122 Loss: 1.270 Validation Loss: 1.239 Accuracy: 0.256 Validation Accuracy: 0.188:   6%|▌         | 2123/37094 [03:37<57:28, 10.14it/s]

Epoch: 0 Iteration: 2123 Loss: 1.260 Validation Loss: 1.239 Accuracy: 0.259 Validation Accuracy: 0.188:   6%|▌         | 2123/37094 [03:37<57:28, 10.14it/s]

Epoch: 0 Iteration: 2124 Loss: 1.245 Validation Loss: 1.239 Accuracy: 0.263 Validation Accuracy: 0.188:   6%|▌         | 2123/37094 [03:38<57:28, 10.14it/s]

Epoch: 0 Iteration: 2124 Loss: 1.245 Validation Loss: 1.239 Accuracy: 0.263 Validation Accuracy: 0.188:   6%|▌         | 2125/37094 [03:38<57:12, 10.19it/s]

Epoch: 0 Iteration: 2125 Loss: 1.217 Validation Loss: 1.239 Accuracy: 0.272 Validation Accuracy: 0.188:   6%|▌         | 2125/37094 [03:38<57:12, 10.19it/s]

Epoch: 0 Iteration: 2126 Loss: 1.274 Validation Loss: 1.239 Accuracy: 0.266 Validation Accuracy: 0.188:   6%|▌         | 2125/37094 [03:38<57:12, 10.19it/s]

Epoch: 0 Iteration: 2126 Loss: 1.274 Validation Loss: 1.239 Accuracy: 0.266 Validation Accuracy: 0.188:   6%|▌         | 2127/37094 [03:38<57:00, 10.22it/s]

Epoch: 0 Iteration: 2127 Loss: 1.323 Validation Loss: 1.239 Accuracy: 0.256 Validation Accuracy: 0.188:   6%|▌         | 2127/37094 [03:38<57:00, 10.22it/s]

Epoch: 0 Iteration: 2128 Loss: 1.325 Validation Loss: 1.239 Accuracy: 0.263 Validation Accuracy: 0.188:   6%|▌         | 2127/37094 [03:38<57:00, 10.22it/s]

Epoch: 0 Iteration: 2128 Loss: 1.325 Validation Loss: 1.239 Accuracy: 0.263 Validation Accuracy: 0.188:   6%|▌         | 2129/37094 [03:38<56:51, 10.25it/s]

Epoch: 0 Iteration: 2129 Loss: 1.310 Validation Loss: 1.239 Accuracy: 0.250 Validation Accuracy: 0.188:   6%|▌         | 2129/37094 [03:38<56:51, 10.25it/s]

Epoch: 0 Iteration: 2130 Loss: 1.320 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2129/37094 [03:38<56:51, 10.25it/s]

Epoch: 0 Iteration: 2130 Loss: 1.320 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2131/37094 [03:38<56:45, 10.27it/s]

Epoch: 0 Iteration: 2131 Loss: 1.288 Validation Loss: 1.239 Accuracy: 0.244 Validation Accuracy: 0.188:   6%|▌         | 2131/37094 [03:38<56:45, 10.27it/s]

Epoch: 0 Iteration: 2132 Loss: 1.285 Validation Loss: 1.239 Accuracy: 0.247 Validation Accuracy: 0.188:   6%|▌         | 2131/37094 [03:38<56:45, 10.27it/s]

Epoch: 0 Iteration: 2132 Loss: 1.285 Validation Loss: 1.239 Accuracy: 0.247 Validation Accuracy: 0.188:   6%|▌         | 2133/37094 [03:38<56:41, 10.28it/s]

Epoch: 0 Iteration: 2133 Loss: 1.296 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2133/37094 [03:38<56:41, 10.28it/s]

Epoch: 0 Iteration: 2134 Loss: 1.334 Validation Loss: 1.239 Accuracy: 0.228 Validation Accuracy: 0.188:   6%|▌         | 2133/37094 [03:38<56:41, 10.28it/s]

Epoch: 0 Iteration: 2134 Loss: 1.334 Validation Loss: 1.239 Accuracy: 0.228 Validation Accuracy: 0.188:   6%|▌         | 2135/37094 [03:38<56:38, 10.29it/s]

Epoch: 0 Iteration: 2135 Loss: 1.273 Validation Loss: 1.239 Accuracy: 0.219 Validation Accuracy: 0.188:   6%|▌         | 2135/37094 [03:39<56:38, 10.29it/s]

Epoch: 0 Iteration: 2136 Loss: 1.253 Validation Loss: 1.239 Accuracy: 0.222 Validation Accuracy: 0.188:   6%|▌         | 2135/37094 [03:39<56:38, 10.29it/s]

Epoch: 0 Iteration: 2136 Loss: 1.253 Validation Loss: 1.239 Accuracy: 0.222 Validation Accuracy: 0.188:   6%|▌         | 2137/37094 [03:39<56:36, 10.29it/s]

Epoch: 0 Iteration: 2137 Loss: 1.340 Validation Loss: 1.239 Accuracy: 0.219 Validation Accuracy: 0.188:   6%|▌         | 2137/37094 [03:39<56:36, 10.29it/s]

Epoch: 0 Iteration: 2138 Loss: 1.345 Validation Loss: 1.239 Accuracy: 0.216 Validation Accuracy: 0.188:   6%|▌         | 2137/37094 [03:39<56:36, 10.29it/s]

Epoch: 0 Iteration: 2138 Loss: 1.345 Validation Loss: 1.239 Accuracy: 0.216 Validation Accuracy: 0.188:   6%|▌         | 2139/37094 [03:39<56:38, 10.29it/s]

Epoch: 0 Iteration: 2139 Loss: 1.351 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2139/37094 [03:39<56:38, 10.29it/s]

Epoch: 0 Iteration: 2140 Loss: 1.370 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2139/37094 [03:39<56:38, 10.29it/s]

Epoch: 0 Iteration: 2140 Loss: 1.370 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2141/37094 [03:39<56:35, 10.29it/s]

Epoch: 0 Iteration: 2141 Loss: 1.394 Validation Loss: 1.239 Accuracy: 0.244 Validation Accuracy: 0.188:   6%|▌         | 2141/37094 [03:39<56:35, 10.29it/s]

Epoch: 0 Iteration: 2142 Loss: 1.394 Validation Loss: 1.239 Accuracy: 0.228 Validation Accuracy: 0.188:   6%|▌         | 2141/37094 [03:39<56:35, 10.29it/s]

Epoch: 0 Iteration: 2142 Loss: 1.394 Validation Loss: 1.239 Accuracy: 0.228 Validation Accuracy: 0.188:   6%|▌         | 2143/37094 [03:39<56:34, 10.30it/s]

Epoch: 0 Iteration: 2143 Loss: 1.392 Validation Loss: 1.239 Accuracy: 0.244 Validation Accuracy: 0.188:   6%|▌         | 2143/37094 [03:39<56:34, 10.30it/s]

Epoch: 0 Iteration: 2144 Loss: 1.392 Validation Loss: 1.239 Accuracy: 0.253 Validation Accuracy: 0.188:   6%|▌         | 2143/37094 [03:39<56:34, 10.30it/s]

Epoch: 0 Iteration: 2144 Loss: 1.392 Validation Loss: 1.239 Accuracy: 0.253 Validation Accuracy: 0.188:   6%|▌         | 2145/37094 [03:39<56:33, 10.30it/s]

Epoch: 0 Iteration: 2145 Loss: 1.398 Validation Loss: 1.239 Accuracy: 0.250 Validation Accuracy: 0.188:   6%|▌         | 2145/37094 [03:40<56:33, 10.30it/s]

Epoch: 0 Iteration: 2146 Loss: 1.391 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2145/37094 [03:40<56:33, 10.30it/s]

Epoch: 0 Iteration: 2146 Loss: 1.391 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2147/37094 [03:40<56:32, 10.30it/s]

Epoch: 0 Iteration: 2147 Loss: 1.373 Validation Loss: 1.239 Accuracy: 0.241 Validation Accuracy: 0.188:   6%|▌         | 2147/37094 [03:40<56:32, 10.30it/s]

Epoch: 0 Iteration: 2148 Loss: 1.401 Validation Loss: 1.239 Accuracy: 0.250 Validation Accuracy: 0.188:   6%|▌         | 2147/37094 [03:40<56:32, 10.30it/s]

Epoch: 0 Iteration: 2148 Loss: 1.401 Validation Loss: 1.239 Accuracy: 0.250 Validation Accuracy: 0.188:   6%|▌         | 2149/37094 [03:40<56:35, 10.29it/s]

Epoch: 0 Iteration: 2149 Loss: 1.411 Validation Loss: 1.239 Accuracy: 0.250 Validation Accuracy: 0.188:   6%|▌         | 2149/37094 [03:40<56:35, 10.29it/s]

Epoch: 0 Iteration: 2150 Loss: 1.380 Validation Loss: 1.239 Accuracy: 0.256 Validation Accuracy: 0.188:   6%|▌         | 2149/37094 [03:40<56:35, 10.29it/s]

Epoch: 0 Iteration: 2150 Loss: 1.380 Validation Loss: 1.239 Accuracy: 0.256 Validation Accuracy: 0.188:   6%|▌         | 2151/37094 [03:40<56:33, 10.30it/s]

Epoch: 0 Iteration: 2151 Loss: 1.367 Validation Loss: 1.239 Accuracy: 0.250 Validation Accuracy: 0.188:   6%|▌         | 2151/37094 [03:40<56:33, 10.30it/s]

Epoch: 0 Iteration: 2152 Loss: 1.356 Validation Loss: 1.239 Accuracy: 0.272 Validation Accuracy: 0.188:   6%|▌         | 2151/37094 [03:40<56:33, 10.30it/s]

Epoch: 0 Iteration: 2152 Loss: 1.356 Validation Loss: 1.239 Accuracy: 0.272 Validation Accuracy: 0.188:   6%|▌         | 2153/37094 [03:40<56:32, 10.30it/s]

Epoch: 0 Iteration: 2153 Loss: 1.387 Validation Loss: 1.239 Accuracy: 0.256 Validation Accuracy: 0.188:   6%|▌         | 2153/37094 [03:40<56:32, 10.30it/s]

Epoch: 0 Iteration: 2154 Loss: 1.344 Validation Loss: 1.239 Accuracy: 0.256 Validation Accuracy: 0.188:   6%|▌         | 2153/37094 [03:40<56:32, 10.30it/s]

Epoch: 0 Iteration: 2154 Loss: 1.344 Validation Loss: 1.239 Accuracy: 0.256 Validation Accuracy: 0.188:   6%|▌         | 2155/37094 [03:40<56:32, 10.30it/s]

Epoch: 0 Iteration: 2155 Loss: 1.323 Validation Loss: 1.239 Accuracy: 0.269 Validation Accuracy: 0.188:   6%|▌         | 2155/37094 [03:41<56:32, 10.30it/s]

Epoch: 0 Iteration: 2156 Loss: 1.335 Validation Loss: 1.239 Accuracy: 0.275 Validation Accuracy: 0.188:   6%|▌         | 2155/37094 [03:41<56:32, 10.30it/s]

Epoch: 0 Iteration: 2156 Loss: 1.335 Validation Loss: 1.239 Accuracy: 0.275 Validation Accuracy: 0.188:   6%|▌         | 2157/37094 [03:41<56:32, 10.30it/s]

Epoch: 0 Iteration: 2157 Loss: 1.293 Validation Loss: 1.239 Accuracy: 0.278 Validation Accuracy: 0.188:   6%|▌         | 2157/37094 [03:41<56:32, 10.30it/s]

Epoch: 0 Iteration: 2158 Loss: 1.309 Validation Loss: 1.239 Accuracy: 0.275 Validation Accuracy: 0.188:   6%|▌         | 2157/37094 [03:41<56:32, 10.30it/s]

Epoch: 0 Iteration: 2158 Loss: 1.309 Validation Loss: 1.239 Accuracy: 0.275 Validation Accuracy: 0.188:   6%|▌         | 2159/37094 [03:41<56:31, 10.30it/s]

Epoch: 0 Iteration: 2159 Loss: 1.286 Validation Loss: 1.239 Accuracy: 0.269 Validation Accuracy: 0.188:   6%|▌         | 2159/37094 [03:41<56:31, 10.30it/s]

Epoch: 0 Iteration: 2160 Loss: 1.274 Validation Loss: 1.239 Accuracy: 0.263 Validation Accuracy: 0.188:   6%|▌         | 2159/37094 [03:41<56:31, 10.30it/s]

Epoch: 0 Iteration: 2160 Loss: 1.274 Validation Loss: 1.239 Accuracy: 0.263 Validation Accuracy: 0.188:   6%|▌         | 2161/37094 [03:41<56:29, 10.31it/s]

Epoch: 0 Iteration: 2161 Loss: 1.253 Validation Loss: 1.239 Accuracy: 0.266 Validation Accuracy: 0.188:   6%|▌         | 2161/37094 [03:41<56:29, 10.31it/s]

Epoch: 0 Iteration: 2162 Loss: 1.259 Validation Loss: 1.239 Accuracy: 0.241 Validation Accuracy: 0.188:   6%|▌         | 2161/37094 [03:41<56:29, 10.31it/s]

Epoch: 0 Iteration: 2162 Loss: 1.259 Validation Loss: 1.239 Accuracy: 0.241 Validation Accuracy: 0.188:   6%|▌         | 2163/37094 [03:41<56:29, 10.31it/s]

Epoch: 0 Iteration: 2163 Loss: 1.324 Validation Loss: 1.239 Accuracy: 0.241 Validation Accuracy: 0.188:   6%|▌         | 2163/37094 [03:41<56:29, 10.31it/s]

Epoch: 0 Iteration: 2164 Loss: 1.349 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2163/37094 [03:41<56:29, 10.31it/s]

Epoch: 0 Iteration: 2164 Loss: 1.349 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2165/37094 [03:41<56:29, 10.30it/s]

Epoch: 0 Iteration: 2165 Loss: 1.345 Validation Loss: 1.239 Accuracy: 0.219 Validation Accuracy: 0.188:   6%|▌         | 2165/37094 [03:42<56:29, 10.30it/s]

Epoch: 0 Iteration: 2166 Loss: 1.306 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2165/37094 [03:42<56:29, 10.30it/s]

Epoch: 0 Iteration: 2166 Loss: 1.306 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2167/37094 [03:42<56:30, 10.30it/s]

Epoch: 0 Iteration: 2167 Loss: 1.302 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2167/37094 [03:42<56:30, 10.30it/s]

Epoch: 0 Iteration: 2168 Loss: 1.284 Validation Loss: 1.239 Accuracy: 0.222 Validation Accuracy: 0.188:   6%|▌         | 2167/37094 [03:42<56:30, 10.30it/s]

Epoch: 0 Iteration: 2168 Loss: 1.284 Validation Loss: 1.239 Accuracy: 0.222 Validation Accuracy: 0.188:   6%|▌         | 2169/37094 [03:42<56:28, 10.31it/s]

Epoch: 0 Iteration: 2169 Loss: 1.258 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2169/37094 [03:42<56:28, 10.31it/s]

Epoch: 0 Iteration: 2170 Loss: 1.306 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2169/37094 [03:42<56:28, 10.31it/s]

Epoch: 0 Iteration: 2170 Loss: 1.306 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2171/37094 [03:42<56:29, 10.30it/s]

Epoch: 0 Iteration: 2171 Loss: 1.317 Validation Loss: 1.239 Accuracy: 0.247 Validation Accuracy: 0.188:   6%|▌         | 2171/37094 [03:42<56:29, 10.30it/s]

Epoch: 0 Iteration: 2172 Loss: 1.328 Validation Loss: 1.239 Accuracy: 0.269 Validation Accuracy: 0.188:   6%|▌         | 2171/37094 [03:42<56:29, 10.30it/s]

Epoch: 0 Iteration: 2172 Loss: 1.328 Validation Loss: 1.239 Accuracy: 0.269 Validation Accuracy: 0.188:   6%|▌         | 2173/37094 [03:42<56:28, 10.31it/s]

Epoch: 0 Iteration: 2173 Loss: 1.313 Validation Loss: 1.239 Accuracy: 0.275 Validation Accuracy: 0.188:   6%|▌         | 2173/37094 [03:42<56:28, 10.31it/s]

Epoch: 0 Iteration: 2174 Loss: 1.326 Validation Loss: 1.239 Accuracy: 0.275 Validation Accuracy: 0.188:   6%|▌         | 2173/37094 [03:42<56:28, 10.31it/s]

Epoch: 0 Iteration: 2174 Loss: 1.326 Validation Loss: 1.239 Accuracy: 0.275 Validation Accuracy: 0.188:   6%|▌         | 2175/37094 [03:42<56:27, 10.31it/s]

Epoch: 0 Iteration: 2175 Loss: 1.369 Validation Loss: 1.239 Accuracy: 0.281 Validation Accuracy: 0.188:   6%|▌         | 2175/37094 [03:42<56:27, 10.31it/s]

Epoch: 0 Iteration: 2176 Loss: 1.359 Validation Loss: 1.239 Accuracy: 0.281 Validation Accuracy: 0.188:   6%|▌         | 2175/37094 [03:43<56:27, 10.31it/s]

Epoch: 0 Iteration: 2176 Loss: 1.359 Validation Loss: 1.239 Accuracy: 0.281 Validation Accuracy: 0.188:   6%|▌         | 2177/37094 [03:43<56:27, 10.31it/s]

Epoch: 0 Iteration: 2177 Loss: 1.339 Validation Loss: 1.239 Accuracy: 0.259 Validation Accuracy: 0.188:   6%|▌         | 2177/37094 [03:43<56:27, 10.31it/s]

Epoch: 0 Iteration: 2178 Loss: 1.315 Validation Loss: 1.239 Accuracy: 0.269 Validation Accuracy: 0.188:   6%|▌         | 2177/37094 [03:43<56:27, 10.31it/s]

Epoch: 0 Iteration: 2178 Loss: 1.315 Validation Loss: 1.239 Accuracy: 0.269 Validation Accuracy: 0.188:   6%|▌         | 2179/37094 [03:43<56:27, 10.31it/s]

Epoch: 0 Iteration: 2179 Loss: 1.402 Validation Loss: 1.239 Accuracy: 0.266 Validation Accuracy: 0.188:   6%|▌         | 2179/37094 [03:43<56:27, 10.31it/s]

Epoch: 0 Iteration: 2180 Loss: 1.405 Validation Loss: 1.239 Accuracy: 0.263 Validation Accuracy: 0.188:   6%|▌         | 2179/37094 [03:43<56:27, 10.31it/s]

Epoch: 0 Iteration: 2180 Loss: 1.405 Validation Loss: 1.239 Accuracy: 0.263 Validation Accuracy: 0.188:   6%|▌         | 2181/37094 [03:43<56:26, 10.31it/s]

Epoch: 0 Iteration: 2181 Loss: 1.459 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2181/37094 [03:43<56:26, 10.31it/s]

Epoch: 0 Iteration: 2182 Loss: 1.444 Validation Loss: 1.239 Accuracy: 0.225 Validation Accuracy: 0.188:   6%|▌         | 2181/37094 [03:43<56:26, 10.31it/s]

Epoch: 0 Iteration: 2182 Loss: 1.444 Validation Loss: 1.239 Accuracy: 0.225 Validation Accuracy: 0.188:   6%|▌         | 2183/37094 [03:43<56:26, 10.31it/s]

Epoch: 0 Iteration: 2183 Loss: 1.368 Validation Loss: 1.239 Accuracy: 0.237 Validation Accuracy: 0.188:   6%|▌         | 2183/37094 [03:43<56:26, 10.31it/s]

Epoch: 0 Iteration: 2184 Loss: 1.386 Validation Loss: 1.239 Accuracy: 0.244 Validation Accuracy: 0.188:   6%|▌         | 2183/37094 [03:43<56:26, 10.31it/s]

Epoch: 0 Iteration: 2184 Loss: 1.386 Validation Loss: 1.239 Accuracy: 0.244 Validation Accuracy: 0.188:   6%|▌         | 2185/37094 [03:43<56:26, 10.31it/s]

Epoch: 0 Iteration: 2185 Loss: 1.375 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2185/37094 [03:43<56:26, 10.31it/s]

Epoch: 0 Iteration: 2186 Loss: 1.388 Validation Loss: 1.239 Accuracy: 0.241 Validation Accuracy: 0.188:   6%|▌         | 2185/37094 [03:44<56:26, 10.31it/s]

Epoch: 0 Iteration: 2186 Loss: 1.388 Validation Loss: 1.239 Accuracy: 0.241 Validation Accuracy: 0.188:   6%|▌         | 2187/37094 [03:44<56:27, 10.30it/s]

Epoch: 0 Iteration: 2187 Loss: 1.370 Validation Loss: 1.239 Accuracy: 0.259 Validation Accuracy: 0.188:   6%|▌         | 2187/37094 [03:44<56:27, 10.30it/s]

Epoch: 0 Iteration: 2188 Loss: 1.401 Validation Loss: 1.239 Accuracy: 0.247 Validation Accuracy: 0.188:   6%|▌         | 2187/37094 [03:44<56:27, 10.30it/s]

Epoch: 0 Iteration: 2188 Loss: 1.401 Validation Loss: 1.239 Accuracy: 0.247 Validation Accuracy: 0.188:   6%|▌         | 2189/37094 [03:44<56:27, 10.31it/s]

Epoch: 0 Iteration: 2189 Loss: 1.463 Validation Loss: 1.239 Accuracy: 0.228 Validation Accuracy: 0.188:   6%|▌         | 2189/37094 [03:44<56:27, 10.31it/s]

Epoch: 0 Iteration: 2190 Loss: 1.416 Validation Loss: 1.239 Accuracy: 0.228 Validation Accuracy: 0.188:   6%|▌         | 2189/37094 [03:44<56:27, 10.31it/s]

Epoch: 0 Iteration: 2190 Loss: 1.416 Validation Loss: 1.239 Accuracy: 0.228 Validation Accuracy: 0.188:   6%|▌         | 2191/37094 [03:44<56:26, 10.31it/s]

Epoch: 0 Iteration: 2191 Loss: 1.465 Validation Loss: 1.239 Accuracy: 0.234 Validation Accuracy: 0.188:   6%|▌         | 2191/37094 [03:44<56:26, 10.31it/s]

Epoch: 0 Iteration: 2192 Loss: 1.483 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2191/37094 [03:44<56:26, 10.31it/s]

Epoch: 0 Iteration: 2192 Loss: 1.483 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2193/37094 [03:44<56:25, 10.31it/s]

Epoch: 0 Iteration: 2193 Loss: 1.443 Validation Loss: 1.239 Accuracy: 0.231 Validation Accuracy: 0.188:   6%|▌         | 2193/37094 [03:44<56:25, 10.31it/s]

Epoch: 0 Iteration: 2194 Loss: 1.475 Validation Loss: 1.239 Accuracy: 0.219 Validation Accuracy: 0.188:   6%|▌         | 2193/37094 [03:44<56:25, 10.31it/s]

Epoch: 0 Iteration: 2194 Loss: 1.475 Validation Loss: 1.239 Accuracy: 0.219 Validation Accuracy: 0.188:   6%|▌         | 2195/37094 [03:44<56:25, 10.31it/s]

Epoch: 0 Iteration: 2195 Loss: 1.456 Validation Loss: 1.239 Accuracy: 0.209 Validation Accuracy: 0.188:   6%|▌         | 2195/37094 [03:44<56:25, 10.31it/s]

Epoch: 0 Iteration: 2196 Loss: 1.465 Validation Loss: 1.239 Accuracy: 0.203 Validation Accuracy: 0.188:   6%|▌         | 2195/37094 [03:45<56:25, 10.31it/s]

Epoch: 0 Iteration: 2196 Loss: 1.465 Validation Loss: 1.239 Accuracy: 0.203 Validation Accuracy: 0.188:   6%|▌         | 2197/37094 [03:45<56:26, 10.31it/s]

Epoch: 0 Iteration: 2197 Loss: 1.509 Validation Loss: 1.239 Accuracy: 0.200 Validation Accuracy: 0.188:   6%|▌         | 2197/37094 [03:45<56:26, 10.31it/s]

Epoch: 0 Iteration: 2198 Loss: 1.514 Validation Loss: 1.239 Accuracy: 0.197 Validation Accuracy: 0.188:   6%|▌         | 2197/37094 [03:45<56:26, 10.31it/s]

Epoch: 0 Iteration: 2198 Loss: 1.514 Validation Loss: 1.239 Accuracy: 0.197 Validation Accuracy: 0.188:   6%|▌         | 2199/37094 [03:45<56:27, 10.30it/s]

Epoch: 0 Iteration: 2199 Loss: 1.446 Validation Loss: 1.239 Accuracy: 0.212 Validation Accuracy: 0.188:   6%|▌         | 2199/37094 [03:45<56:27, 10.30it/s]

Epoch: 0 Iteration: 2200 Loss: 1.450 Validation Loss: 1.289 Accuracy: 0.209 Validation Accuracy: 0.186:   6%|▌         | 2199/37094 [03:45<56:27, 10.30it/s]

Epoch: 0 Iteration: 2200 Loss: 1.450 Validation Loss: 1.289 Accuracy: 0.209 Validation Accuracy: 0.186:   6%|▌         | 2201/37094 [03:45<1:44:00,  5.59it/s]

Epoch: 0 Iteration: 2201 Loss: 1.421 Validation Loss: 1.289 Accuracy: 0.209 Validation Accuracy: 0.186:   6%|▌         | 2201/37094 [03:46<1:44:00,  5.59it/s]

Epoch: 0 Iteration: 2202 Loss: 1.437 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2201/37094 [03:46<1:44:00,  5.59it/s]

Epoch: 0 Iteration: 2202 Loss: 1.437 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2203/37094 [03:46<1:29:01,  6.53it/s]

Epoch: 0 Iteration: 2203 Loss: 1.432 Validation Loss: 1.289 Accuracy: 0.206 Validation Accuracy: 0.186:   6%|▌         | 2203/37094 [03:46<1:29:01,  6.53it/s]

Epoch: 0 Iteration: 2204 Loss: 1.448 Validation Loss: 1.289 Accuracy: 0.212 Validation Accuracy: 0.186:   6%|▌         | 2203/37094 [03:46<1:29:01,  6.53it/s]

Epoch: 0 Iteration: 2204 Loss: 1.448 Validation Loss: 1.289 Accuracy: 0.212 Validation Accuracy: 0.186:   6%|▌         | 2205/37094 [03:46<1:19:16,  7.34it/s]

Epoch: 0 Iteration: 2205 Loss: 1.418 Validation Loss: 1.289 Accuracy: 0.216 Validation Accuracy: 0.186:   6%|▌         | 2205/37094 [03:46<1:19:16,  7.34it/s]

Epoch: 0 Iteration: 2206 Loss: 1.421 Validation Loss: 1.289 Accuracy: 0.209 Validation Accuracy: 0.186:   6%|▌         | 2205/37094 [03:46<1:19:16,  7.34it/s]

Epoch: 0 Iteration: 2206 Loss: 1.421 Validation Loss: 1.289 Accuracy: 0.209 Validation Accuracy: 0.186:   6%|▌         | 2207/37094 [03:46<1:12:25,  8.03it/s]

Epoch: 0 Iteration: 2207 Loss: 1.398 Validation Loss: 1.289 Accuracy: 0.209 Validation Accuracy: 0.186:   6%|▌         | 2207/37094 [03:46<1:12:25,  8.03it/s]

Epoch: 0 Iteration: 2208 Loss: 1.355 Validation Loss: 1.289 Accuracy: 0.225 Validation Accuracy: 0.186:   6%|▌         | 2207/37094 [03:46<1:12:25,  8.03it/s]

Epoch: 0 Iteration: 2208 Loss: 1.355 Validation Loss: 1.289 Accuracy: 0.225 Validation Accuracy: 0.186:   6%|▌         | 2209/37094 [03:46<1:07:37,  8.60it/s]

Epoch: 0 Iteration: 2209 Loss: 1.342 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2209/37094 [03:46<1:07:37,  8.60it/s]

Epoch: 0 Iteration: 2210 Loss: 1.339 Validation Loss: 1.289 Accuracy: 0.241 Validation Accuracy: 0.186:   6%|▌         | 2209/37094 [03:46<1:07:37,  8.60it/s]

Epoch: 0 Iteration: 2210 Loss: 1.339 Validation Loss: 1.289 Accuracy: 0.241 Validation Accuracy: 0.186:   6%|▌         | 2211/37094 [03:46<1:04:21,  9.03it/s]

Epoch: 0 Iteration: 2211 Loss: 1.346 Validation Loss: 1.289 Accuracy: 0.228 Validation Accuracy: 0.186:   6%|▌         | 2211/37094 [03:47<1:04:21,  9.03it/s]

Epoch: 0 Iteration: 2212 Loss: 1.337 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2211/37094 [03:47<1:04:21,  9.03it/s]

Epoch: 0 Iteration: 2212 Loss: 1.337 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2213/37094 [03:47<1:01:58,  9.38it/s]

Epoch: 0 Iteration: 2213 Loss: 1.364 Validation Loss: 1.289 Accuracy: 0.231 Validation Accuracy: 0.186:   6%|▌         | 2213/37094 [03:47<1:01:58,  9.38it/s]

Epoch: 0 Iteration: 2214 Loss: 1.341 Validation Loss: 1.289 Accuracy: 0.237 Validation Accuracy: 0.186:   6%|▌         | 2213/37094 [03:47<1:01:58,  9.38it/s]

Epoch: 0 Iteration: 2214 Loss: 1.341 Validation Loss: 1.289 Accuracy: 0.237 Validation Accuracy: 0.186:   6%|▌         | 2215/37094 [03:47<1:00:20,  9.63it/s]

Epoch: 0 Iteration: 2215 Loss: 1.345 Validation Loss: 1.289 Accuracy: 0.237 Validation Accuracy: 0.186:   6%|▌         | 2215/37094 [03:47<1:00:20,  9.63it/s]

Epoch: 0 Iteration: 2216 Loss: 1.316 Validation Loss: 1.289 Accuracy: 0.244 Validation Accuracy: 0.186:   6%|▌         | 2215/37094 [03:47<1:00:20,  9.63it/s]

Epoch: 0 Iteration: 2216 Loss: 1.316 Validation Loss: 1.289 Accuracy: 0.244 Validation Accuracy: 0.186:   6%|▌         | 2217/37094 [03:47<59:09,  9.83it/s]  

Epoch: 0 Iteration: 2217 Loss: 1.283 Validation Loss: 1.289 Accuracy: 0.234 Validation Accuracy: 0.186:   6%|▌         | 2217/37094 [03:47<59:09,  9.83it/s]

Epoch: 0 Iteration: 2218 Loss: 1.275 Validation Loss: 1.289 Accuracy: 0.231 Validation Accuracy: 0.186:   6%|▌         | 2217/37094 [03:47<59:09,  9.83it/s]

Epoch: 0 Iteration: 2218 Loss: 1.275 Validation Loss: 1.289 Accuracy: 0.231 Validation Accuracy: 0.186:   6%|▌         | 2219/37094 [03:47<58:20,  9.96it/s]

Epoch: 0 Iteration: 2219 Loss: 1.275 Validation Loss: 1.289 Accuracy: 0.228 Validation Accuracy: 0.186:   6%|▌         | 2219/37094 [03:47<58:20,  9.96it/s]

Epoch: 0 Iteration: 2220 Loss: 1.286 Validation Loss: 1.289 Accuracy: 0.212 Validation Accuracy: 0.186:   6%|▌         | 2219/37094 [03:47<58:20,  9.96it/s]

Epoch: 0 Iteration: 2220 Loss: 1.286 Validation Loss: 1.289 Accuracy: 0.212 Validation Accuracy: 0.186:   6%|▌         | 2221/37094 [03:47<57:45, 10.06it/s]

Epoch: 0 Iteration: 2221 Loss: 1.257 Validation Loss: 1.289 Accuracy: 0.225 Validation Accuracy: 0.186:   6%|▌         | 2221/37094 [03:47<57:45, 10.06it/s]

Epoch: 0 Iteration: 2222 Loss: 1.251 Validation Loss: 1.289 Accuracy: 0.222 Validation Accuracy: 0.186:   6%|▌         | 2221/37094 [03:48<57:45, 10.06it/s]

Epoch: 0 Iteration: 2222 Loss: 1.251 Validation Loss: 1.289 Accuracy: 0.222 Validation Accuracy: 0.186:   6%|▌         | 2223/37094 [03:48<57:21, 10.13it/s]

Epoch: 0 Iteration: 2223 Loss: 1.292 Validation Loss: 1.289 Accuracy: 0.197 Validation Accuracy: 0.186:   6%|▌         | 2223/37094 [03:48<57:21, 10.13it/s]

Epoch: 0 Iteration: 2224 Loss: 1.257 Validation Loss: 1.289 Accuracy: 0.197 Validation Accuracy: 0.186:   6%|▌         | 2223/37094 [03:48<57:21, 10.13it/s]

Epoch: 0 Iteration: 2224 Loss: 1.257 Validation Loss: 1.289 Accuracy: 0.197 Validation Accuracy: 0.186:   6%|▌         | 2225/37094 [03:48<57:06, 10.18it/s]

Epoch: 0 Iteration: 2225 Loss: 1.295 Validation Loss: 1.289 Accuracy: 0.206 Validation Accuracy: 0.186:   6%|▌         | 2225/37094 [03:48<57:06, 10.18it/s]

Epoch: 0 Iteration: 2226 Loss: 1.283 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2225/37094 [03:48<57:06, 10.18it/s]

Epoch: 0 Iteration: 2226 Loss: 1.283 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2227/37094 [03:48<56:53, 10.21it/s]

Epoch: 0 Iteration: 2227 Loss: 1.295 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2227/37094 [03:48<56:53, 10.21it/s]

Epoch: 0 Iteration: 2228 Loss: 1.286 Validation Loss: 1.289 Accuracy: 0.216 Validation Accuracy: 0.186:   6%|▌         | 2227/37094 [03:48<56:53, 10.21it/s]

Epoch: 0 Iteration: 2228 Loss: 1.286 Validation Loss: 1.289 Accuracy: 0.216 Validation Accuracy: 0.186:   6%|▌         | 2229/37094 [03:48<56:45, 10.24it/s]

Epoch: 0 Iteration: 2229 Loss: 1.312 Validation Loss: 1.289 Accuracy: 0.222 Validation Accuracy: 0.186:   6%|▌         | 2229/37094 [03:48<56:45, 10.24it/s]

Epoch: 0 Iteration: 2230 Loss: 1.302 Validation Loss: 1.289 Accuracy: 0.234 Validation Accuracy: 0.186:   6%|▌         | 2229/37094 [03:48<56:45, 10.24it/s]

Epoch: 0 Iteration: 2230 Loss: 1.302 Validation Loss: 1.289 Accuracy: 0.234 Validation Accuracy: 0.186:   6%|▌         | 2231/37094 [03:48<56:38, 10.26it/s]

Epoch: 0 Iteration: 2231 Loss: 1.269 Validation Loss: 1.289 Accuracy: 0.244 Validation Accuracy: 0.186:   6%|▌         | 2231/37094 [03:48<56:38, 10.26it/s]

Epoch: 0 Iteration: 2232 Loss: 1.267 Validation Loss: 1.289 Accuracy: 0.244 Validation Accuracy: 0.186:   6%|▌         | 2231/37094 [03:49<56:38, 10.26it/s]

Epoch: 0 Iteration: 2232 Loss: 1.267 Validation Loss: 1.289 Accuracy: 0.244 Validation Accuracy: 0.186:   6%|▌         | 2233/37094 [03:49<56:34, 10.27it/s]

Epoch: 0 Iteration: 2233 Loss: 1.266 Validation Loss: 1.289 Accuracy: 0.256 Validation Accuracy: 0.186:   6%|▌         | 2233/37094 [03:49<56:34, 10.27it/s]

Epoch: 0 Iteration: 2234 Loss: 1.257 Validation Loss: 1.289 Accuracy: 0.253 Validation Accuracy: 0.186:   6%|▌         | 2233/37094 [03:49<56:34, 10.27it/s]

Epoch: 0 Iteration: 2234 Loss: 1.257 Validation Loss: 1.289 Accuracy: 0.253 Validation Accuracy: 0.186:   6%|▌         | 2235/37094 [03:49<56:31, 10.28it/s]

Epoch: 0 Iteration: 2235 Loss: 1.258 Validation Loss: 1.289 Accuracy: 0.259 Validation Accuracy: 0.186:   6%|▌         | 2235/37094 [03:49<56:31, 10.28it/s]

Epoch: 0 Iteration: 2236 Loss: 1.279 Validation Loss: 1.289 Accuracy: 0.241 Validation Accuracy: 0.186:   6%|▌         | 2235/37094 [03:49<56:31, 10.28it/s]

Epoch: 0 Iteration: 2236 Loss: 1.279 Validation Loss: 1.289 Accuracy: 0.241 Validation Accuracy: 0.186:   6%|▌         | 2237/37094 [03:49<56:31, 10.28it/s]

Epoch: 0 Iteration: 2237 Loss: 1.289 Validation Loss: 1.289 Accuracy: 0.263 Validation Accuracy: 0.186:   6%|▌         | 2237/37094 [03:49<56:31, 10.28it/s]

Epoch: 0 Iteration: 2238 Loss: 1.304 Validation Loss: 1.289 Accuracy: 0.266 Validation Accuracy: 0.186:   6%|▌         | 2237/37094 [03:49<56:31, 10.28it/s]

Epoch: 0 Iteration: 2238 Loss: 1.304 Validation Loss: 1.289 Accuracy: 0.266 Validation Accuracy: 0.186:   6%|▌         | 2239/37094 [03:49<56:32, 10.27it/s]

Epoch: 0 Iteration: 2239 Loss: 1.331 Validation Loss: 1.289 Accuracy: 0.269 Validation Accuracy: 0.186:   6%|▌         | 2239/37094 [03:49<56:32, 10.27it/s]

Epoch: 0 Iteration: 2240 Loss: 1.295 Validation Loss: 1.289 Accuracy: 0.253 Validation Accuracy: 0.186:   6%|▌         | 2239/37094 [03:49<56:32, 10.27it/s]

Epoch: 0 Iteration: 2240 Loss: 1.295 Validation Loss: 1.289 Accuracy: 0.253 Validation Accuracy: 0.186:   6%|▌         | 2241/37094 [03:49<56:33, 10.27it/s]

Epoch: 0 Iteration: 2241 Loss: 1.304 Validation Loss: 1.289 Accuracy: 0.247 Validation Accuracy: 0.186:   6%|▌         | 2241/37094 [03:49<56:33, 10.27it/s]

Epoch: 0 Iteration: 2242 Loss: 1.321 Validation Loss: 1.289 Accuracy: 0.263 Validation Accuracy: 0.186:   6%|▌         | 2241/37094 [03:50<56:33, 10.27it/s]

Epoch: 0 Iteration: 2242 Loss: 1.321 Validation Loss: 1.289 Accuracy: 0.263 Validation Accuracy: 0.186:   6%|▌         | 2243/37094 [03:50<56:34, 10.27it/s]

Epoch: 0 Iteration: 2243 Loss: 1.308 Validation Loss: 1.289 Accuracy: 0.256 Validation Accuracy: 0.186:   6%|▌         | 2243/37094 [03:50<56:34, 10.27it/s]

Epoch: 0 Iteration: 2244 Loss: 1.359 Validation Loss: 1.289 Accuracy: 0.237 Validation Accuracy: 0.186:   6%|▌         | 2243/37094 [03:50<56:34, 10.27it/s]

Epoch: 0 Iteration: 2244 Loss: 1.359 Validation Loss: 1.289 Accuracy: 0.237 Validation Accuracy: 0.186:   6%|▌         | 2245/37094 [03:50<56:31, 10.28it/s]

Epoch: 0 Iteration: 2245 Loss: 1.351 Validation Loss: 1.289 Accuracy: 0.216 Validation Accuracy: 0.186:   6%|▌         | 2245/37094 [03:50<56:31, 10.28it/s]

Epoch: 0 Iteration: 2246 Loss: 1.355 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2245/37094 [03:50<56:31, 10.28it/s]

Epoch: 0 Iteration: 2246 Loss: 1.355 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2247/37094 [03:50<56:28, 10.28it/s]

Epoch: 0 Iteration: 2247 Loss: 1.390 Validation Loss: 1.289 Accuracy: 0.203 Validation Accuracy: 0.186:   6%|▌         | 2247/37094 [03:50<56:28, 10.28it/s]

Epoch: 0 Iteration: 2248 Loss: 1.402 Validation Loss: 1.289 Accuracy: 0.206 Validation Accuracy: 0.186:   6%|▌         | 2247/37094 [03:50<56:28, 10.28it/s]

Epoch: 0 Iteration: 2248 Loss: 1.402 Validation Loss: 1.289 Accuracy: 0.206 Validation Accuracy: 0.186:   6%|▌         | 2249/37094 [03:50<56:26, 10.29it/s]

Epoch: 0 Iteration: 2249 Loss: 1.360 Validation Loss: 1.289 Accuracy: 0.194 Validation Accuracy: 0.186:   6%|▌         | 2249/37094 [03:50<56:26, 10.29it/s]

Epoch: 0 Iteration: 2250 Loss: 1.349 Validation Loss: 1.289 Accuracy: 0.197 Validation Accuracy: 0.186:   6%|▌         | 2249/37094 [03:50<56:26, 10.29it/s]

Epoch: 0 Iteration: 2250 Loss: 1.349 Validation Loss: 1.289 Accuracy: 0.197 Validation Accuracy: 0.186:   6%|▌         | 2251/37094 [03:50<56:24, 10.29it/s]

Epoch: 0 Iteration: 2251 Loss: 1.372 Validation Loss: 1.289 Accuracy: 0.197 Validation Accuracy: 0.186:   6%|▌         | 2251/37094 [03:50<56:24, 10.29it/s]

Epoch: 0 Iteration: 2252 Loss: 1.369 Validation Loss: 1.289 Accuracy: 0.194 Validation Accuracy: 0.186:   6%|▌         | 2251/37094 [03:50<56:24, 10.29it/s]

Epoch: 0 Iteration: 2252 Loss: 1.369 Validation Loss: 1.289 Accuracy: 0.194 Validation Accuracy: 0.186:   6%|▌         | 2253/37094 [03:50<56:25, 10.29it/s]

Epoch: 0 Iteration: 2253 Loss: 1.352 Validation Loss: 1.289 Accuracy: 0.203 Validation Accuracy: 0.186:   6%|▌         | 2253/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2254 Loss: 1.354 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2253/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2254 Loss: 1.354 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2255/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2255 Loss: 1.400 Validation Loss: 1.289 Accuracy: 0.228 Validation Accuracy: 0.186:   6%|▌         | 2255/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2256 Loss: 1.409 Validation Loss: 1.289 Accuracy: 0.234 Validation Accuracy: 0.186:   6%|▌         | 2255/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2256 Loss: 1.409 Validation Loss: 1.289 Accuracy: 0.234 Validation Accuracy: 0.186:   6%|▌         | 2257/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2257 Loss: 1.370 Validation Loss: 1.289 Accuracy: 0.244 Validation Accuracy: 0.186:   6%|▌         | 2257/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2258 Loss: 1.361 Validation Loss: 1.289 Accuracy: 0.228 Validation Accuracy: 0.186:   6%|▌         | 2257/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2258 Loss: 1.361 Validation Loss: 1.289 Accuracy: 0.228 Validation Accuracy: 0.186:   6%|▌         | 2259/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2259 Loss: 1.344 Validation Loss: 1.289 Accuracy: 0.231 Validation Accuracy: 0.186:   6%|▌         | 2259/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2260 Loss: 1.381 Validation Loss: 1.289 Accuracy: 0.225 Validation Accuracy: 0.186:   6%|▌         | 2259/37094 [03:51<56:25, 10.29it/s]

Epoch: 0 Iteration: 2260 Loss: 1.381 Validation Loss: 1.289 Accuracy: 0.225 Validation Accuracy: 0.186:   6%|▌         | 2261/37094 [03:51<56:26, 10.29it/s]

Epoch: 0 Iteration: 2261 Loss: 1.379 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2261/37094 [03:51<56:26, 10.29it/s]

Epoch: 0 Iteration: 2262 Loss: 1.360 Validation Loss: 1.289 Accuracy: 0.212 Validation Accuracy: 0.186:   6%|▌         | 2261/37094 [03:51<56:26, 10.29it/s]

Epoch: 0 Iteration: 2262 Loss: 1.360 Validation Loss: 1.289 Accuracy: 0.212 Validation Accuracy: 0.186:   6%|▌         | 2263/37094 [03:51<56:24, 10.29it/s]

Epoch: 0 Iteration: 2263 Loss: 1.351 Validation Loss: 1.289 Accuracy: 0.203 Validation Accuracy: 0.186:   6%|▌         | 2263/37094 [03:52<56:24, 10.29it/s]

Epoch: 0 Iteration: 2264 Loss: 1.305 Validation Loss: 1.289 Accuracy: 0.206 Validation Accuracy: 0.186:   6%|▌         | 2263/37094 [03:52<56:24, 10.29it/s]

Epoch: 0 Iteration: 2264 Loss: 1.305 Validation Loss: 1.289 Accuracy: 0.206 Validation Accuracy: 0.186:   6%|▌         | 2265/37094 [03:52<56:25, 10.29it/s]

Epoch: 0 Iteration: 2265 Loss: 1.309 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2265/37094 [03:52<56:25, 10.29it/s]

Epoch: 0 Iteration: 2266 Loss: 1.336 Validation Loss: 1.289 Accuracy: 0.237 Validation Accuracy: 0.186:   6%|▌         | 2265/37094 [03:52<56:25, 10.29it/s]

Epoch: 0 Iteration: 2266 Loss: 1.336 Validation Loss: 1.289 Accuracy: 0.237 Validation Accuracy: 0.186:   6%|▌         | 2267/37094 [03:52<56:23, 10.29it/s]

Epoch: 0 Iteration: 2267 Loss: 1.312 Validation Loss: 1.289 Accuracy: 0.237 Validation Accuracy: 0.186:   6%|▌         | 2267/37094 [03:52<56:23, 10.29it/s]

Epoch: 0 Iteration: 2268 Loss: 1.351 Validation Loss: 1.289 Accuracy: 0.263 Validation Accuracy: 0.186:   6%|▌         | 2267/37094 [03:52<56:23, 10.29it/s]

Epoch: 0 Iteration: 2268 Loss: 1.351 Validation Loss: 1.289 Accuracy: 0.263 Validation Accuracy: 0.186:   6%|▌         | 2269/37094 [03:52<56:22, 10.30it/s]

Epoch: 0 Iteration: 2269 Loss: 1.326 Validation Loss: 1.289 Accuracy: 0.275 Validation Accuracy: 0.186:   6%|▌         | 2269/37094 [03:52<56:22, 10.30it/s]

Epoch: 0 Iteration: 2270 Loss: 1.371 Validation Loss: 1.289 Accuracy: 0.284 Validation Accuracy: 0.186:   6%|▌         | 2269/37094 [03:52<56:22, 10.30it/s]

Epoch: 0 Iteration: 2270 Loss: 1.371 Validation Loss: 1.289 Accuracy: 0.284 Validation Accuracy: 0.186:   6%|▌         | 2271/37094 [03:52<56:22, 10.30it/s]

Epoch: 0 Iteration: 2271 Loss: 1.364 Validation Loss: 1.289 Accuracy: 0.303 Validation Accuracy: 0.186:   6%|▌         | 2271/37094 [03:52<56:22, 10.30it/s]

Epoch: 0 Iteration: 2272 Loss: 1.356 Validation Loss: 1.289 Accuracy: 0.309 Validation Accuracy: 0.186:   6%|▌         | 2271/37094 [03:52<56:22, 10.30it/s]

Epoch: 0 Iteration: 2272 Loss: 1.356 Validation Loss: 1.289 Accuracy: 0.309 Validation Accuracy: 0.186:   6%|▌         | 2273/37094 [03:52<56:21, 10.30it/s]

Epoch: 0 Iteration: 2273 Loss: 1.399 Validation Loss: 1.289 Accuracy: 0.322 Validation Accuracy: 0.186:   6%|▌         | 2273/37094 [03:53<56:21, 10.30it/s]

Epoch: 0 Iteration: 2274 Loss: 1.417 Validation Loss: 1.289 Accuracy: 0.325 Validation Accuracy: 0.186:   6%|▌         | 2273/37094 [03:53<56:21, 10.30it/s]

Epoch: 0 Iteration: 2274 Loss: 1.417 Validation Loss: 1.289 Accuracy: 0.325 Validation Accuracy: 0.186:   6%|▌         | 2275/37094 [03:53<56:20, 10.30it/s]

Epoch: 0 Iteration: 2275 Loss: 1.375 Validation Loss: 1.289 Accuracy: 0.319 Validation Accuracy: 0.186:   6%|▌         | 2275/37094 [03:53<56:20, 10.30it/s]

Epoch: 0 Iteration: 2276 Loss: 1.330 Validation Loss: 1.289 Accuracy: 0.294 Validation Accuracy: 0.186:   6%|▌         | 2275/37094 [03:53<56:20, 10.30it/s]

Epoch: 0 Iteration: 2276 Loss: 1.330 Validation Loss: 1.289 Accuracy: 0.294 Validation Accuracy: 0.186:   6%|▌         | 2277/37094 [03:53<56:18, 10.31it/s]

Epoch: 0 Iteration: 2277 Loss: 1.328 Validation Loss: 1.289 Accuracy: 0.278 Validation Accuracy: 0.186:   6%|▌         | 2277/37094 [03:53<56:18, 10.31it/s]

Epoch: 0 Iteration: 2278 Loss: 1.336 Validation Loss: 1.289 Accuracy: 0.250 Validation Accuracy: 0.186:   6%|▌         | 2277/37094 [03:53<56:18, 10.31it/s]

Epoch: 0 Iteration: 2278 Loss: 1.336 Validation Loss: 1.289 Accuracy: 0.250 Validation Accuracy: 0.186:   6%|▌         | 2279/37094 [03:53<56:17, 10.31it/s]

Epoch: 0 Iteration: 2279 Loss: 1.321 Validation Loss: 1.289 Accuracy: 0.237 Validation Accuracy: 0.186:   6%|▌         | 2279/37094 [03:53<56:17, 10.31it/s]

Epoch: 0 Iteration: 2280 Loss: 1.292 Validation Loss: 1.289 Accuracy: 0.247 Validation Accuracy: 0.186:   6%|▌         | 2279/37094 [03:53<56:17, 10.31it/s]

Epoch: 0 Iteration: 2280 Loss: 1.292 Validation Loss: 1.289 Accuracy: 0.247 Validation Accuracy: 0.186:   6%|▌         | 2281/37094 [03:53<56:17, 10.31it/s]

Epoch: 0 Iteration: 2281 Loss: 1.324 Validation Loss: 1.289 Accuracy: 0.237 Validation Accuracy: 0.186:   6%|▌         | 2281/37094 [03:53<56:17, 10.31it/s]

Epoch: 0 Iteration: 2282 Loss: 1.348 Validation Loss: 1.289 Accuracy: 0.225 Validation Accuracy: 0.186:   6%|▌         | 2281/37094 [03:53<56:17, 10.31it/s]

Epoch: 0 Iteration: 2282 Loss: 1.348 Validation Loss: 1.289 Accuracy: 0.225 Validation Accuracy: 0.186:   6%|▌         | 2283/37094 [03:53<56:17, 10.31it/s]

Epoch: 0 Iteration: 2283 Loss: 1.325 Validation Loss: 1.289 Accuracy: 0.228 Validation Accuracy: 0.186:   6%|▌         | 2283/37094 [03:54<56:17, 10.31it/s]

Epoch: 0 Iteration: 2284 Loss: 1.330 Validation Loss: 1.289 Accuracy: 0.222 Validation Accuracy: 0.186:   6%|▌         | 2283/37094 [03:54<56:17, 10.31it/s]

Epoch: 0 Iteration: 2284 Loss: 1.330 Validation Loss: 1.289 Accuracy: 0.222 Validation Accuracy: 0.186:   6%|▌         | 2285/37094 [03:54<56:17, 10.31it/s]

Epoch: 0 Iteration: 2285 Loss: 1.366 Validation Loss: 1.289 Accuracy: 0.225 Validation Accuracy: 0.186:   6%|▌         | 2285/37094 [03:54<56:17, 10.31it/s]

Epoch: 0 Iteration: 2286 Loss: 1.366 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2285/37094 [03:54<56:17, 10.31it/s]

Epoch: 0 Iteration: 2286 Loss: 1.366 Validation Loss: 1.289 Accuracy: 0.219 Validation Accuracy: 0.186:   6%|▌         | 2287/37094 [03:54<56:17, 10.31it/s]

Epoch: 0 Iteration: 2287 Loss: 1.354 Validation Loss: 1.289 Accuracy: 0.256 Validation Accuracy: 0.186:   6%|▌         | 2287/37094 [03:54<56:17, 10.31it/s]

Epoch: 0 Iteration: 2288 Loss: 1.290 Validation Loss: 1.289 Accuracy: 0.272 Validation Accuracy: 0.186:   6%|▌         | 2287/37094 [03:54<56:17, 10.31it/s]

Epoch: 0 Iteration: 2288 Loss: 1.290 Validation Loss: 1.289 Accuracy: 0.272 Validation Accuracy: 0.186:   6%|▌         | 2289/37094 [03:54<56:18, 10.30it/s]

Epoch: 0 Iteration: 2289 Loss: 1.292 Validation Loss: 1.289 Accuracy: 0.275 Validation Accuracy: 0.186:   6%|▌         | 2289/37094 [03:54<56:18, 10.30it/s]

Epoch: 0 Iteration: 2290 Loss: 1.246 Validation Loss: 1.289 Accuracy: 0.259 Validation Accuracy: 0.186:   6%|▌         | 2289/37094 [03:54<56:18, 10.30it/s]

Epoch: 0 Iteration: 2290 Loss: 1.246 Validation Loss: 1.289 Accuracy: 0.259 Validation Accuracy: 0.186:   6%|▌         | 2291/37094 [03:54<56:16, 10.31it/s]

Epoch: 0 Iteration: 2291 Loss: 1.264 Validation Loss: 1.289 Accuracy: 0.253 Validation Accuracy: 0.186:   6%|▌         | 2291/37094 [03:54<56:16, 10.31it/s]

Epoch: 0 Iteration: 2292 Loss: 1.244 Validation Loss: 1.289 Accuracy: 0.263 Validation Accuracy: 0.186:   6%|▌         | 2291/37094 [03:54<56:16, 10.31it/s]

Epoch: 0 Iteration: 2292 Loss: 1.244 Validation Loss: 1.289 Accuracy: 0.263 Validation Accuracy: 0.186:   6%|▌         | 2293/37094 [03:54<56:16, 10.31it/s]

Epoch: 0 Iteration: 2293 Loss: 1.210 Validation Loss: 1.289 Accuracy: 0.266 Validation Accuracy: 0.186:   6%|▌         | 2293/37094 [03:54<56:16, 10.31it/s]

Epoch: 0 Iteration: 2294 Loss: 1.170 Validation Loss: 1.289 Accuracy: 0.278 Validation Accuracy: 0.186:   6%|▌         | 2293/37094 [03:55<56:16, 10.31it/s]

Epoch: 0 Iteration: 2294 Loss: 1.170 Validation Loss: 1.289 Accuracy: 0.278 Validation Accuracy: 0.186:   6%|▌         | 2295/37094 [03:55<56:17, 10.30it/s]

Epoch: 0 Iteration: 2295 Loss: 1.159 Validation Loss: 1.289 Accuracy: 0.278 Validation Accuracy: 0.186:   6%|▌         | 2295/37094 [03:55<56:17, 10.30it/s]

Epoch: 0 Iteration: 2296 Loss: 1.197 Validation Loss: 1.289 Accuracy: 0.297 Validation Accuracy: 0.186:   6%|▌         | 2295/37094 [03:55<56:17, 10.30it/s]

Epoch: 0 Iteration: 2296 Loss: 1.197 Validation Loss: 1.289 Accuracy: 0.297 Validation Accuracy: 0.186:   6%|▌         | 2297/37094 [03:55<56:17, 10.30it/s]

Epoch: 0 Iteration: 2297 Loss: 1.210 Validation Loss: 1.289 Accuracy: 0.263 Validation Accuracy: 0.186:   6%|▌         | 2297/37094 [03:55<56:17, 10.30it/s]

Epoch: 0 Iteration: 2298 Loss: 1.205 Validation Loss: 1.289 Accuracy: 0.259 Validation Accuracy: 0.186:   6%|▌         | 2297/37094 [03:55<56:17, 10.30it/s]

Epoch: 0 Iteration: 2298 Loss: 1.205 Validation Loss: 1.289 Accuracy: 0.259 Validation Accuracy: 0.186:   6%|▌         | 2299/37094 [03:55<56:18, 10.30it/s]

Epoch: 0 Iteration: 2299 Loss: 1.264 Validation Loss: 1.289 Accuracy: 0.256 Validation Accuracy: 0.186:   6%|▌         | 2299/37094 [03:55<56:18, 10.30it/s]

Epoch: 0 Iteration: 2300 Loss: 1.291 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▌         | 2299/37094 [03:56<56:18, 10.30it/s]

Epoch: 0 Iteration: 2300 Loss: 1.291 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▌         | 2301/37094 [03:56<1:42:21,  5.67it/s]

Epoch: 0 Iteration: 2301 Loss: 1.260 Validation Loss: 1.271 Accuracy: 0.269 Validation Accuracy: 0.190:   6%|▌         | 2301/37094 [03:56<1:42:21,  5.67it/s]

Epoch: 0 Iteration: 2302 Loss: 1.272 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▌         | 2301/37094 [03:56<1:42:21,  5.67it/s]

Epoch: 0 Iteration: 2302 Loss: 1.272 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▌         | 2303/37094 [03:56<1:27:54,  6.60it/s]

Epoch: 0 Iteration: 2303 Loss: 1.269 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▌         | 2303/37094 [03:56<1:27:54,  6.60it/s]

Epoch: 0 Iteration: 2304 Loss: 1.257 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▌         | 2303/37094 [03:56<1:27:54,  6.60it/s]

Epoch: 0 Iteration: 2304 Loss: 1.257 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▌         | 2305/37094 [03:56<1:18:33,  7.38it/s]

Epoch: 0 Iteration: 2305 Loss: 1.217 Validation Loss: 1.271 Accuracy: 0.231 Validation Accuracy: 0.190:   6%|▌         | 2305/37094 [03:56<1:18:33,  7.38it/s]

Epoch: 0 Iteration: 2306 Loss: 1.207 Validation Loss: 1.271 Accuracy: 0.219 Validation Accuracy: 0.190:   6%|▌         | 2305/37094 [03:56<1:18:33,  7.38it/s]

Epoch: 0 Iteration: 2306 Loss: 1.207 Validation Loss: 1.271 Accuracy: 0.219 Validation Accuracy: 0.190:   6%|▌         | 2307/37094 [03:56<1:11:55,  8.06it/s]

Epoch: 0 Iteration: 2307 Loss: 1.214 Validation Loss: 1.271 Accuracy: 0.222 Validation Accuracy: 0.190:   6%|▌         | 2307/37094 [03:56<1:11:55,  8.06it/s]

Epoch: 0 Iteration: 2308 Loss: 1.252 Validation Loss: 1.271 Accuracy: 0.219 Validation Accuracy: 0.190:   6%|▌         | 2307/37094 [03:56<1:11:55,  8.06it/s]

Epoch: 0 Iteration: 2308 Loss: 1.252 Validation Loss: 1.271 Accuracy: 0.219 Validation Accuracy: 0.190:   6%|▌         | 2309/37094 [03:56<1:07:17,  8.62it/s]

Epoch: 0 Iteration: 2309 Loss: 1.255 Validation Loss: 1.271 Accuracy: 0.219 Validation Accuracy: 0.190:   6%|▌         | 2309/37094 [03:57<1:07:17,  8.62it/s]

Epoch: 0 Iteration: 2310 Loss: 1.287 Validation Loss: 1.271 Accuracy: 0.228 Validation Accuracy: 0.190:   6%|▌         | 2309/37094 [03:57<1:07:17,  8.62it/s]

Epoch: 0 Iteration: 2310 Loss: 1.287 Validation Loss: 1.271 Accuracy: 0.228 Validation Accuracy: 0.190:   6%|▌         | 2311/37094 [03:57<1:04:03,  9.05it/s]

Epoch: 0 Iteration: 2311 Loss: 1.237 Validation Loss: 1.271 Accuracy: 0.219 Validation Accuracy: 0.190:   6%|▌         | 2311/37094 [03:57<1:04:03,  9.05it/s]

Epoch: 0 Iteration: 2312 Loss: 1.242 Validation Loss: 1.271 Accuracy: 0.231 Validation Accuracy: 0.190:   6%|▌         | 2311/37094 [03:57<1:04:03,  9.05it/s]

Epoch: 0 Iteration: 2312 Loss: 1.242 Validation Loss: 1.271 Accuracy: 0.231 Validation Accuracy: 0.190:   6%|▌         | 2313/37094 [03:57<1:01:46,  9.38it/s]

Epoch: 0 Iteration: 2313 Loss: 1.243 Validation Loss: 1.271 Accuracy: 0.244 Validation Accuracy: 0.190:   6%|▌         | 2313/37094 [03:57<1:01:46,  9.38it/s]

Epoch: 0 Iteration: 2314 Loss: 1.283 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▌         | 2313/37094 [03:57<1:01:46,  9.38it/s]

Epoch: 0 Iteration: 2314 Loss: 1.283 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▌         | 2315/37094 [03:57<1:00:11,  9.63it/s]

Epoch: 0 Iteration: 2315 Loss: 1.280 Validation Loss: 1.271 Accuracy: 0.256 Validation Accuracy: 0.190:   6%|▌         | 2315/37094 [03:57<1:00:11,  9.63it/s]

Epoch: 0 Iteration: 2316 Loss: 1.257 Validation Loss: 1.271 Accuracy: 0.250 Validation Accuracy: 0.190:   6%|▌         | 2315/37094 [03:57<1:00:11,  9.63it/s]

Epoch: 0 Iteration: 2316 Loss: 1.257 Validation Loss: 1.271 Accuracy: 0.250 Validation Accuracy: 0.190:   6%|▌         | 2317/37094 [03:57<59:02,  9.82it/s]  

Epoch: 0 Iteration: 2317 Loss: 1.257 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▌         | 2317/37094 [03:57<59:02,  9.82it/s]

Epoch: 0 Iteration: 2318 Loss: 1.291 Validation Loss: 1.271 Accuracy: 0.247 Validation Accuracy: 0.190:   6%|▌         | 2317/37094 [03:57<59:02,  9.82it/s]

Epoch: 0 Iteration: 2318 Loss: 1.291 Validation Loss: 1.271 Accuracy: 0.247 Validation Accuracy: 0.190:   6%|▋         | 2319/37094 [03:57<58:23,  9.93it/s]

Epoch: 0 Iteration: 2319 Loss: 1.250 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2319/37094 [03:58<58:23,  9.93it/s]

Epoch: 0 Iteration: 2320 Loss: 1.262 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2319/37094 [03:58<58:23,  9.93it/s]

Epoch: 0 Iteration: 2320 Loss: 1.262 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2321/37094 [03:58<57:50, 10.02it/s]

Epoch: 0 Iteration: 2321 Loss: 1.260 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▋         | 2321/37094 [03:58<57:50, 10.02it/s]

Epoch: 0 Iteration: 2322 Loss: 1.193 Validation Loss: 1.271 Accuracy: 0.244 Validation Accuracy: 0.190:   6%|▋         | 2321/37094 [03:58<57:50, 10.02it/s]

Epoch: 0 Iteration: 2322 Loss: 1.193 Validation Loss: 1.271 Accuracy: 0.244 Validation Accuracy: 0.190:   6%|▋         | 2323/37094 [03:58<57:26, 10.09it/s]

Epoch: 0 Iteration: 2323 Loss: 1.181 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▋         | 2323/37094 [03:58<57:26, 10.09it/s]

Epoch: 0 Iteration: 2324 Loss: 1.160 Validation Loss: 1.271 Accuracy: 0.231 Validation Accuracy: 0.190:   6%|▋         | 2323/37094 [03:58<57:26, 10.09it/s]

Epoch: 0 Iteration: 2324 Loss: 1.160 Validation Loss: 1.271 Accuracy: 0.231 Validation Accuracy: 0.190:   6%|▋         | 2325/37094 [03:58<57:08, 10.14it/s]

Epoch: 0 Iteration: 2325 Loss: 1.149 Validation Loss: 1.271 Accuracy: 0.256 Validation Accuracy: 0.190:   6%|▋         | 2325/37094 [03:58<57:08, 10.14it/s]

Epoch: 0 Iteration: 2326 Loss: 1.144 Validation Loss: 1.271 Accuracy: 0.266 Validation Accuracy: 0.190:   6%|▋         | 2325/37094 [03:58<57:08, 10.14it/s]

Epoch: 0 Iteration: 2326 Loss: 1.144 Validation Loss: 1.271 Accuracy: 0.266 Validation Accuracy: 0.190:   6%|▋         | 2327/37094 [03:58<56:57, 10.17it/s]

Epoch: 0 Iteration: 2327 Loss: 1.210 Validation Loss: 1.271 Accuracy: 0.256 Validation Accuracy: 0.190:   6%|▋         | 2327/37094 [03:58<56:57, 10.17it/s]

Epoch: 0 Iteration: 2328 Loss: 1.196 Validation Loss: 1.271 Accuracy: 0.269 Validation Accuracy: 0.190:   6%|▋         | 2327/37094 [03:58<56:57, 10.17it/s]

Epoch: 0 Iteration: 2328 Loss: 1.196 Validation Loss: 1.271 Accuracy: 0.269 Validation Accuracy: 0.190:   6%|▋         | 2329/37094 [03:58<56:46, 10.20it/s]

Epoch: 0 Iteration: 2329 Loss: 1.225 Validation Loss: 1.271 Accuracy: 0.263 Validation Accuracy: 0.190:   6%|▋         | 2329/37094 [03:59<56:46, 10.20it/s]

Epoch: 0 Iteration: 2330 Loss: 1.223 Validation Loss: 1.271 Accuracy: 0.275 Validation Accuracy: 0.190:   6%|▋         | 2329/37094 [03:59<56:46, 10.20it/s]

Epoch: 0 Iteration: 2330 Loss: 1.223 Validation Loss: 1.271 Accuracy: 0.275 Validation Accuracy: 0.190:   6%|▋         | 2331/37094 [03:59<56:39, 10.22it/s]

Epoch: 0 Iteration: 2331 Loss: 1.255 Validation Loss: 1.271 Accuracy: 0.294 Validation Accuracy: 0.190:   6%|▋         | 2331/37094 [03:59<56:39, 10.22it/s]

Epoch: 0 Iteration: 2332 Loss: 1.283 Validation Loss: 1.271 Accuracy: 0.284 Validation Accuracy: 0.190:   6%|▋         | 2331/37094 [03:59<56:39, 10.22it/s]

Epoch: 0 Iteration: 2332 Loss: 1.283 Validation Loss: 1.271 Accuracy: 0.284 Validation Accuracy: 0.190:   6%|▋         | 2333/37094 [03:59<56:35, 10.24it/s]

Epoch: 0 Iteration: 2333 Loss: 1.276 Validation Loss: 1.271 Accuracy: 0.278 Validation Accuracy: 0.190:   6%|▋         | 2333/37094 [03:59<56:35, 10.24it/s]

Epoch: 0 Iteration: 2334 Loss: 1.259 Validation Loss: 1.271 Accuracy: 0.287 Validation Accuracy: 0.190:   6%|▋         | 2333/37094 [03:59<56:35, 10.24it/s]

Epoch: 0 Iteration: 2334 Loss: 1.259 Validation Loss: 1.271 Accuracy: 0.287 Validation Accuracy: 0.190:   6%|▋         | 2335/37094 [03:59<56:32, 10.25it/s]

Epoch: 0 Iteration: 2335 Loss: 1.308 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▋         | 2335/37094 [03:59<56:32, 10.25it/s]

Epoch: 0 Iteration: 2336 Loss: 1.304 Validation Loss: 1.271 Accuracy: 0.269 Validation Accuracy: 0.190:   6%|▋         | 2335/37094 [03:59<56:32, 10.25it/s]

Epoch: 0 Iteration: 2336 Loss: 1.304 Validation Loss: 1.271 Accuracy: 0.269 Validation Accuracy: 0.190:   6%|▋         | 2337/37094 [03:59<56:30, 10.25it/s]

Epoch: 0 Iteration: 2337 Loss: 1.350 Validation Loss: 1.271 Accuracy: 0.272 Validation Accuracy: 0.190:   6%|▋         | 2337/37094 [03:59<56:30, 10.25it/s]

Epoch: 0 Iteration: 2338 Loss: 1.322 Validation Loss: 1.271 Accuracy: 0.284 Validation Accuracy: 0.190:   6%|▋         | 2337/37094 [03:59<56:30, 10.25it/s]

Epoch: 0 Iteration: 2338 Loss: 1.322 Validation Loss: 1.271 Accuracy: 0.284 Validation Accuracy: 0.190:   6%|▋         | 2339/37094 [03:59<56:28, 10.26it/s]

Epoch: 0 Iteration: 2339 Loss: 1.340 Validation Loss: 1.271 Accuracy: 0.278 Validation Accuracy: 0.190:   6%|▋         | 2339/37094 [03:59<56:28, 10.26it/s]

Epoch: 0 Iteration: 2340 Loss: 1.364 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2339/37094 [04:00<56:28, 10.26it/s]

Epoch: 0 Iteration: 2340 Loss: 1.364 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2341/37094 [04:00<56:32, 10.24it/s]

Epoch: 0 Iteration: 2341 Loss: 1.358 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2341/37094 [04:00<56:32, 10.24it/s]

Epoch: 0 Iteration: 2342 Loss: 1.390 Validation Loss: 1.271 Accuracy: 0.244 Validation Accuracy: 0.190:   6%|▋         | 2341/37094 [04:00<56:32, 10.24it/s]

Epoch: 0 Iteration: 2342 Loss: 1.390 Validation Loss: 1.271 Accuracy: 0.244 Validation Accuracy: 0.190:   6%|▋         | 2343/37094 [04:00<56:29, 10.25it/s]

Epoch: 0 Iteration: 2343 Loss: 1.406 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▋         | 2343/37094 [04:00<56:29, 10.25it/s]

Epoch: 0 Iteration: 2344 Loss: 1.426 Validation Loss: 1.271 Accuracy: 0.231 Validation Accuracy: 0.190:   6%|▋         | 2343/37094 [04:00<56:29, 10.25it/s]

Epoch: 0 Iteration: 2344 Loss: 1.426 Validation Loss: 1.271 Accuracy: 0.231 Validation Accuracy: 0.190:   6%|▋         | 2345/37094 [04:00<56:27, 10.26it/s]

Epoch: 0 Iteration: 2345 Loss: 1.433 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▋         | 2345/37094 [04:00<56:27, 10.26it/s]

Epoch: 0 Iteration: 2346 Loss: 1.451 Validation Loss: 1.271 Accuracy: 0.228 Validation Accuracy: 0.190:   6%|▋         | 2345/37094 [04:00<56:27, 10.26it/s]

Epoch: 0 Iteration: 2346 Loss: 1.451 Validation Loss: 1.271 Accuracy: 0.228 Validation Accuracy: 0.190:   6%|▋         | 2347/37094 [04:00<56:28, 10.25it/s]

Epoch: 0 Iteration: 2347 Loss: 1.364 Validation Loss: 1.271 Accuracy: 0.244 Validation Accuracy: 0.190:   6%|▋         | 2347/37094 [04:00<56:28, 10.25it/s]

Epoch: 0 Iteration: 2348 Loss: 1.369 Validation Loss: 1.271 Accuracy: 0.216 Validation Accuracy: 0.190:   6%|▋         | 2347/37094 [04:00<56:28, 10.25it/s]

Epoch: 0 Iteration: 2348 Loss: 1.369 Validation Loss: 1.271 Accuracy: 0.216 Validation Accuracy: 0.190:   6%|▋         | 2349/37094 [04:00<56:26, 10.26it/s]

Epoch: 0 Iteration: 2349 Loss: 1.336 Validation Loss: 1.271 Accuracy: 0.228 Validation Accuracy: 0.190:   6%|▋         | 2349/37094 [04:00<56:26, 10.26it/s]

Epoch: 0 Iteration: 2350 Loss: 1.322 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▋         | 2349/37094 [04:01<56:26, 10.26it/s]

Epoch: 0 Iteration: 2350 Loss: 1.322 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▋         | 2351/37094 [04:01<56:25, 10.26it/s]

Epoch: 0 Iteration: 2351 Loss: 1.310 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▋         | 2351/37094 [04:01<56:25, 10.26it/s]

Epoch: 0 Iteration: 2352 Loss: 1.286 Validation Loss: 1.271 Accuracy: 0.263 Validation Accuracy: 0.190:   6%|▋         | 2351/37094 [04:01<56:25, 10.26it/s]

Epoch: 0 Iteration: 2352 Loss: 1.286 Validation Loss: 1.271 Accuracy: 0.263 Validation Accuracy: 0.190:   6%|▋         | 2353/37094 [04:01<56:24, 10.26it/s]

Epoch: 0 Iteration: 2353 Loss: 1.318 Validation Loss: 1.271 Accuracy: 0.256 Validation Accuracy: 0.190:   6%|▋         | 2353/37094 [04:01<56:24, 10.26it/s]

Epoch: 0 Iteration: 2354 Loss: 1.325 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▋         | 2353/37094 [04:01<56:24, 10.26it/s]

Epoch: 0 Iteration: 2354 Loss: 1.325 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▋         | 2355/37094 [04:01<56:23, 10.27it/s]

Epoch: 0 Iteration: 2355 Loss: 1.340 Validation Loss: 1.271 Accuracy: 0.247 Validation Accuracy: 0.190:   6%|▋         | 2355/37094 [04:01<56:23, 10.27it/s]

Epoch: 0 Iteration: 2356 Loss: 1.362 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▋         | 2355/37094 [04:01<56:23, 10.27it/s]

Epoch: 0 Iteration: 2356 Loss: 1.362 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▋         | 2357/37094 [04:01<56:23, 10.27it/s]

Epoch: 0 Iteration: 2357 Loss: 1.314 Validation Loss: 1.271 Accuracy: 0.244 Validation Accuracy: 0.190:   6%|▋         | 2357/37094 [04:01<56:23, 10.27it/s]

Epoch: 0 Iteration: 2358 Loss: 1.293 Validation Loss: 1.271 Accuracy: 0.263 Validation Accuracy: 0.190:   6%|▋         | 2357/37094 [04:01<56:23, 10.27it/s]

Epoch: 0 Iteration: 2358 Loss: 1.293 Validation Loss: 1.271 Accuracy: 0.263 Validation Accuracy: 0.190:   6%|▋         | 2359/37094 [04:01<56:23, 10.27it/s]

Epoch: 0 Iteration: 2359 Loss: 1.281 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2359/37094 [04:01<56:23, 10.27it/s]

Epoch: 0 Iteration: 2360 Loss: 1.254 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2359/37094 [04:02<56:23, 10.27it/s]

Epoch: 0 Iteration: 2360 Loss: 1.254 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2361/37094 [04:02<56:22, 10.27it/s]

Epoch: 0 Iteration: 2361 Loss: 1.254 Validation Loss: 1.271 Accuracy: 0.256 Validation Accuracy: 0.190:   6%|▋         | 2361/37094 [04:02<56:22, 10.27it/s]

Epoch: 0 Iteration: 2362 Loss: 1.266 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2361/37094 [04:02<56:22, 10.27it/s]

Epoch: 0 Iteration: 2362 Loss: 1.266 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2363/37094 [04:02<56:28, 10.25it/s]

Epoch: 0 Iteration: 2363 Loss: 1.289 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▋         | 2363/37094 [04:02<56:28, 10.25it/s]

Epoch: 0 Iteration: 2364 Loss: 1.320 Validation Loss: 1.271 Accuracy: 0.275 Validation Accuracy: 0.190:   6%|▋         | 2363/37094 [04:02<56:28, 10.25it/s]

Epoch: 0 Iteration: 2364 Loss: 1.320 Validation Loss: 1.271 Accuracy: 0.275 Validation Accuracy: 0.190:   6%|▋         | 2365/37094 [04:02<56:25, 10.26it/s]

Epoch: 0 Iteration: 2365 Loss: 1.294 Validation Loss: 1.271 Accuracy: 0.278 Validation Accuracy: 0.190:   6%|▋         | 2365/37094 [04:02<56:25, 10.26it/s]

Epoch: 0 Iteration: 2366 Loss: 1.268 Validation Loss: 1.271 Accuracy: 0.263 Validation Accuracy: 0.190:   6%|▋         | 2365/37094 [04:02<56:25, 10.26it/s]

Epoch: 0 Iteration: 2366 Loss: 1.268 Validation Loss: 1.271 Accuracy: 0.263 Validation Accuracy: 0.190:   6%|▋         | 2367/37094 [04:02<56:25, 10.26it/s]

Epoch: 0 Iteration: 2367 Loss: 1.290 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2367/37094 [04:02<56:25, 10.26it/s]

Epoch: 0 Iteration: 2368 Loss: 1.308 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▋         | 2367/37094 [04:02<56:25, 10.26it/s]

Epoch: 0 Iteration: 2368 Loss: 1.308 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▋         | 2369/37094 [04:02<56:23, 10.26it/s]

Epoch: 0 Iteration: 2369 Loss: 1.350 Validation Loss: 1.271 Accuracy: 0.263 Validation Accuracy: 0.190:   6%|▋         | 2369/37094 [04:02<56:23, 10.26it/s]

Epoch: 0 Iteration: 2370 Loss: 1.331 Validation Loss: 1.271 Accuracy: 0.269 Validation Accuracy: 0.190:   6%|▋         | 2369/37094 [04:02<56:23, 10.26it/s]

Epoch: 0 Iteration: 2370 Loss: 1.331 Validation Loss: 1.271 Accuracy: 0.269 Validation Accuracy: 0.190:   6%|▋         | 2371/37094 [04:02<56:22, 10.27it/s]

Epoch: 0 Iteration: 2371 Loss: 1.296 Validation Loss: 1.271 Accuracy: 0.272 Validation Accuracy: 0.190:   6%|▋         | 2371/37094 [04:03<56:22, 10.27it/s]

Epoch: 0 Iteration: 2372 Loss: 1.327 Validation Loss: 1.271 Accuracy: 0.272 Validation Accuracy: 0.190:   6%|▋         | 2371/37094 [04:03<56:22, 10.27it/s]

Epoch: 0 Iteration: 2372 Loss: 1.327 Validation Loss: 1.271 Accuracy: 0.272 Validation Accuracy: 0.190:   6%|▋         | 2373/37094 [04:03<56:21, 10.27it/s]

Epoch: 0 Iteration: 2373 Loss: 1.318 Validation Loss: 1.271 Accuracy: 0.266 Validation Accuracy: 0.190:   6%|▋         | 2373/37094 [04:03<56:21, 10.27it/s]

Epoch: 0 Iteration: 2374 Loss: 1.318 Validation Loss: 1.271 Accuracy: 0.263 Validation Accuracy: 0.190:   6%|▋         | 2373/37094 [04:03<56:21, 10.27it/s]

Epoch: 0 Iteration: 2374 Loss: 1.318 Validation Loss: 1.271 Accuracy: 0.263 Validation Accuracy: 0.190:   6%|▋         | 2375/37094 [04:03<56:21, 10.27it/s]

Epoch: 0 Iteration: 2375 Loss: 1.295 Validation Loss: 1.271 Accuracy: 0.259 Validation Accuracy: 0.190:   6%|▋         | 2375/37094 [04:03<56:21, 10.27it/s]

Epoch: 0 Iteration: 2376 Loss: 1.294 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2375/37094 [04:03<56:21, 10.27it/s]

Epoch: 0 Iteration: 2376 Loss: 1.294 Validation Loss: 1.271 Accuracy: 0.253 Validation Accuracy: 0.190:   6%|▋         | 2377/37094 [04:03<56:20, 10.27it/s]

Epoch: 0 Iteration: 2377 Loss: 1.329 Validation Loss: 1.271 Accuracy: 0.256 Validation Accuracy: 0.190:   6%|▋         | 2377/37094 [04:03<56:20, 10.27it/s]

Epoch: 0 Iteration: 2378 Loss: 1.322 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▋         | 2377/37094 [04:03<56:20, 10.27it/s]

Epoch: 0 Iteration: 2378 Loss: 1.322 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▋         | 2379/37094 [04:03<56:21, 10.27it/s]

Epoch: 0 Iteration: 2379 Loss: 1.386 Validation Loss: 1.271 Accuracy: 0.241 Validation Accuracy: 0.190:   6%|▋         | 2379/37094 [04:03<56:21, 10.27it/s]

Epoch: 0 Iteration: 2380 Loss: 1.371 Validation Loss: 1.271 Accuracy: 0.234 Validation Accuracy: 0.190:   6%|▋         | 2379/37094 [04:03<56:21, 10.27it/s]

Epoch: 0 Iteration: 2380 Loss: 1.371 Validation Loss: 1.271 Accuracy: 0.234 Validation Accuracy: 0.190:   6%|▋         | 2381/37094 [04:03<56:20, 10.27it/s]

Epoch: 0 Iteration: 2381 Loss: 1.375 Validation Loss: 1.271 Accuracy: 0.219 Validation Accuracy: 0.190:   6%|▋         | 2381/37094 [04:04<56:20, 10.27it/s]

Epoch: 0 Iteration: 2382 Loss: 1.351 Validation Loss: 1.271 Accuracy: 0.216 Validation Accuracy: 0.190:   6%|▋         | 2381/37094 [04:04<56:20, 10.27it/s]

Epoch: 0 Iteration: 2382 Loss: 1.351 Validation Loss: 1.271 Accuracy: 0.216 Validation Accuracy: 0.190:   6%|▋         | 2383/37094 [04:04<56:14, 10.29it/s]

Epoch: 0 Iteration: 2383 Loss: 1.348 Validation Loss: 1.271 Accuracy: 0.216 Validation Accuracy: 0.190:   6%|▋         | 2383/37094 [04:04<56:14, 10.29it/s]

Epoch: 0 Iteration: 2384 Loss: 1.343 Validation Loss: 1.271 Accuracy: 0.194 Validation Accuracy: 0.190:   6%|▋         | 2383/37094 [04:04<56:14, 10.29it/s]

Epoch: 0 Iteration: 2384 Loss: 1.343 Validation Loss: 1.271 Accuracy: 0.194 Validation Accuracy: 0.190:   6%|▋         | 2385/37094 [04:04<56:13, 10.29it/s]

Epoch: 0 Iteration: 2385 Loss: 1.382 Validation Loss: 1.271 Accuracy: 0.206 Validation Accuracy: 0.190:   6%|▋         | 2385/37094 [04:04<56:13, 10.29it/s]

Epoch: 0 Iteration: 2386 Loss: 1.388 Validation Loss: 1.271 Accuracy: 0.209 Validation Accuracy: 0.190:   6%|▋         | 2385/37094 [04:04<56:13, 10.29it/s]

Epoch: 0 Iteration: 2386 Loss: 1.388 Validation Loss: 1.271 Accuracy: 0.209 Validation Accuracy: 0.190:   6%|▋         | 2387/37094 [04:04<56:09, 10.30it/s]

Epoch: 0 Iteration: 2387 Loss: 1.384 Validation Loss: 1.271 Accuracy: 0.209 Validation Accuracy: 0.190:   6%|▋         | 2387/37094 [04:04<56:09, 10.30it/s]

Epoch: 0 Iteration: 2388 Loss: 1.362 Validation Loss: 1.271 Accuracy: 0.225 Validation Accuracy: 0.190:   6%|▋         | 2387/37094 [04:04<56:09, 10.30it/s]

Epoch: 0 Iteration: 2388 Loss: 1.362 Validation Loss: 1.271 Accuracy: 0.225 Validation Accuracy: 0.190:   6%|▋         | 2389/37094 [04:04<56:06, 10.31it/s]

Epoch: 0 Iteration: 2389 Loss: 1.343 Validation Loss: 1.271 Accuracy: 0.234 Validation Accuracy: 0.190:   6%|▋         | 2389/37094 [04:04<56:06, 10.31it/s]

Epoch: 0 Iteration: 2390 Loss: 1.400 Validation Loss: 1.271 Accuracy: 0.222 Validation Accuracy: 0.190:   6%|▋         | 2389/37094 [04:04<56:06, 10.31it/s]

Epoch: 0 Iteration: 2390 Loss: 1.400 Validation Loss: 1.271 Accuracy: 0.222 Validation Accuracy: 0.190:   6%|▋         | 2391/37094 [04:04<56:05, 10.31it/s]

Epoch: 0 Iteration: 2391 Loss: 1.425 Validation Loss: 1.271 Accuracy: 0.219 Validation Accuracy: 0.190:   6%|▋         | 2391/37094 [04:05<56:05, 10.31it/s]

Epoch: 0 Iteration: 2392 Loss: 1.413 Validation Loss: 1.271 Accuracy: 0.209 Validation Accuracy: 0.190:   6%|▋         | 2391/37094 [04:05<56:05, 10.31it/s]

Epoch: 0 Iteration: 2392 Loss: 1.413 Validation Loss: 1.271 Accuracy: 0.209 Validation Accuracy: 0.190:   6%|▋         | 2393/37094 [04:05<56:04, 10.32it/s]

Epoch: 0 Iteration: 2393 Loss: 1.424 Validation Loss: 1.271 Accuracy: 0.206 Validation Accuracy: 0.190:   6%|▋         | 2393/37094 [04:05<56:04, 10.32it/s]

Epoch: 0 Iteration: 2394 Loss: 1.447 Validation Loss: 1.271 Accuracy: 0.216 Validation Accuracy: 0.190:   6%|▋         | 2393/37094 [04:05<56:04, 10.32it/s]

Epoch: 0 Iteration: 2394 Loss: 1.447 Validation Loss: 1.271 Accuracy: 0.216 Validation Accuracy: 0.190:   6%|▋         | 2395/37094 [04:05<56:03, 10.32it/s]

Epoch: 0 Iteration: 2395 Loss: 1.434 Validation Loss: 1.271 Accuracy: 0.203 Validation Accuracy: 0.190:   6%|▋         | 2395/37094 [04:05<56:03, 10.32it/s]

Epoch: 0 Iteration: 2396 Loss: 1.463 Validation Loss: 1.271 Accuracy: 0.206 Validation Accuracy: 0.190:   6%|▋         | 2395/37094 [04:05<56:03, 10.32it/s]

Epoch: 0 Iteration: 2396 Loss: 1.463 Validation Loss: 1.271 Accuracy: 0.206 Validation Accuracy: 0.190:   6%|▋         | 2397/37094 [04:05<56:02, 10.32it/s]

Epoch: 0 Iteration: 2397 Loss: 1.406 Validation Loss: 1.271 Accuracy: 0.212 Validation Accuracy: 0.190:   6%|▋         | 2397/37094 [04:05<56:02, 10.32it/s]

Epoch: 0 Iteration: 2398 Loss: 1.442 Validation Loss: 1.271 Accuracy: 0.200 Validation Accuracy: 0.190:   6%|▋         | 2397/37094 [04:05<56:02, 10.32it/s]

Epoch: 0 Iteration: 2398 Loss: 1.442 Validation Loss: 1.271 Accuracy: 0.200 Validation Accuracy: 0.190:   6%|▋         | 2399/37094 [04:05<56:01, 10.32it/s]

Epoch: 0 Iteration: 2399 Loss: 1.386 Validation Loss: 1.271 Accuracy: 0.181 Validation Accuracy: 0.190:   6%|▋         | 2399/37094 [04:05<56:01, 10.32it/s]

Epoch: 0 Iteration: 2400 Loss: 1.387 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   6%|▋         | 2399/37094 [04:06<56:01, 10.32it/s]

Epoch: 0 Iteration: 2400 Loss: 1.387 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   6%|▋         | 2401/37094 [04:06<1:43:19,  5.60it/s]

Epoch: 0 Iteration: 2401 Loss: 1.392 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   6%|▋         | 2401/37094 [04:06<1:43:19,  5.60it/s]

Epoch: 0 Iteration: 2402 Loss: 1.421 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   6%|▋         | 2401/37094 [04:06<1:43:19,  5.60it/s]

Epoch: 0 Iteration: 2402 Loss: 1.421 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   6%|▋         | 2403/37094 [04:06<1:28:25,  6.54it/s]

Epoch: 0 Iteration: 2403 Loss: 1.455 Validation Loss: 1.332 Accuracy: 0.197 Validation Accuracy: 0.196:   6%|▋         | 2403/37094 [04:06<1:28:25,  6.54it/s]

Epoch: 0 Iteration: 2404 Loss: 1.443 Validation Loss: 1.332 Accuracy: 0.184 Validation Accuracy: 0.196:   6%|▋         | 2403/37094 [04:06<1:28:25,  6.54it/s]

Epoch: 0 Iteration: 2404 Loss: 1.443 Validation Loss: 1.332 Accuracy: 0.184 Validation Accuracy: 0.196:   6%|▋         | 2405/37094 [04:06<1:18:42,  7.34it/s]

Epoch: 0 Iteration: 2405 Loss: 1.458 Validation Loss: 1.332 Accuracy: 0.203 Validation Accuracy: 0.196:   6%|▋         | 2405/37094 [04:06<1:18:42,  7.34it/s]

Epoch: 0 Iteration: 2406 Loss: 1.500 Validation Loss: 1.332 Accuracy: 0.216 Validation Accuracy: 0.196:   6%|▋         | 2405/37094 [04:07<1:18:42,  7.34it/s]

Epoch: 0 Iteration: 2406 Loss: 1.500 Validation Loss: 1.332 Accuracy: 0.216 Validation Accuracy: 0.196:   6%|▋         | 2407/37094 [04:07<1:11:55,  8.04it/s]

Epoch: 0 Iteration: 2407 Loss: 1.501 Validation Loss: 1.332 Accuracy: 0.228 Validation Accuracy: 0.196:   6%|▋         | 2407/37094 [04:07<1:11:55,  8.04it/s]

Epoch: 0 Iteration: 2408 Loss: 1.491 Validation Loss: 1.332 Accuracy: 0.237 Validation Accuracy: 0.196:   6%|▋         | 2407/37094 [04:07<1:11:55,  8.04it/s]

Epoch: 0 Iteration: 2408 Loss: 1.491 Validation Loss: 1.332 Accuracy: 0.237 Validation Accuracy: 0.196:   6%|▋         | 2409/37094 [04:07<1:07:14,  8.60it/s]

Epoch: 0 Iteration: 2409 Loss: 1.499 Validation Loss: 1.332 Accuracy: 0.253 Validation Accuracy: 0.196:   6%|▋         | 2409/37094 [04:07<1:07:14,  8.60it/s]

Epoch: 0 Iteration: 2410 Loss: 1.474 Validation Loss: 1.332 Accuracy: 0.253 Validation Accuracy: 0.196:   6%|▋         | 2409/37094 [04:07<1:07:14,  8.60it/s]

Epoch: 0 Iteration: 2410 Loss: 1.474 Validation Loss: 1.332 Accuracy: 0.253 Validation Accuracy: 0.196:   6%|▋         | 2411/37094 [04:07<1:03:51,  9.05it/s]

Epoch: 0 Iteration: 2411 Loss: 1.532 Validation Loss: 1.332 Accuracy: 0.263 Validation Accuracy: 0.196:   6%|▋         | 2411/37094 [04:07<1:03:51,  9.05it/s]

Epoch: 0 Iteration: 2412 Loss: 1.538 Validation Loss: 1.332 Accuracy: 0.256 Validation Accuracy: 0.196:   6%|▋         | 2411/37094 [04:07<1:03:51,  9.05it/s]

Epoch: 0 Iteration: 2412 Loss: 1.538 Validation Loss: 1.332 Accuracy: 0.256 Validation Accuracy: 0.196:   7%|▋         | 2413/37094 [04:07<1:01:31,  9.40it/s]

Epoch: 0 Iteration: 2413 Loss: 1.507 Validation Loss: 1.332 Accuracy: 0.259 Validation Accuracy: 0.196:   7%|▋         | 2413/37094 [04:07<1:01:31,  9.40it/s]

Epoch: 0 Iteration: 2414 Loss: 1.552 Validation Loss: 1.332 Accuracy: 0.256 Validation Accuracy: 0.196:   7%|▋         | 2413/37094 [04:07<1:01:31,  9.40it/s]

Epoch: 0 Iteration: 2414 Loss: 1.552 Validation Loss: 1.332 Accuracy: 0.256 Validation Accuracy: 0.196:   7%|▋         | 2415/37094 [04:07<59:51,  9.66it/s]  

Epoch: 0 Iteration: 2415 Loss: 1.547 Validation Loss: 1.332 Accuracy: 0.234 Validation Accuracy: 0.196:   7%|▋         | 2415/37094 [04:07<59:51,  9.66it/s]

Epoch: 0 Iteration: 2416 Loss: 1.539 Validation Loss: 1.332 Accuracy: 0.228 Validation Accuracy: 0.196:   7%|▋         | 2415/37094 [04:08<59:51,  9.66it/s]

Epoch: 0 Iteration: 2416 Loss: 1.539 Validation Loss: 1.332 Accuracy: 0.228 Validation Accuracy: 0.196:   7%|▋         | 2417/37094 [04:08<58:42,  9.85it/s]

Epoch: 0 Iteration: 2417 Loss: 1.579 Validation Loss: 1.332 Accuracy: 0.219 Validation Accuracy: 0.196:   7%|▋         | 2417/37094 [04:08<58:42,  9.85it/s]

Epoch: 0 Iteration: 2418 Loss: 1.587 Validation Loss: 1.332 Accuracy: 0.203 Validation Accuracy: 0.196:   7%|▋         | 2417/37094 [04:08<58:42,  9.85it/s]

Epoch: 0 Iteration: 2418 Loss: 1.587 Validation Loss: 1.332 Accuracy: 0.203 Validation Accuracy: 0.196:   7%|▋         | 2419/37094 [04:08<57:52,  9.98it/s]

Epoch: 0 Iteration: 2419 Loss: 1.600 Validation Loss: 1.332 Accuracy: 0.191 Validation Accuracy: 0.196:   7%|▋         | 2419/37094 [04:08<57:52,  9.98it/s]

Epoch: 0 Iteration: 2420 Loss: 1.603 Validation Loss: 1.332 Accuracy: 0.181 Validation Accuracy: 0.196:   7%|▋         | 2419/37094 [04:08<57:52,  9.98it/s]

Epoch: 0 Iteration: 2420 Loss: 1.603 Validation Loss: 1.332 Accuracy: 0.181 Validation Accuracy: 0.196:   7%|▋         | 2421/37094 [04:08<57:19, 10.08it/s]

Epoch: 0 Iteration: 2421 Loss: 1.618 Validation Loss: 1.332 Accuracy: 0.166 Validation Accuracy: 0.196:   7%|▋         | 2421/37094 [04:08<57:19, 10.08it/s]

Epoch: 0 Iteration: 2422 Loss: 1.595 Validation Loss: 1.332 Accuracy: 0.163 Validation Accuracy: 0.196:   7%|▋         | 2421/37094 [04:08<57:19, 10.08it/s]

Epoch: 0 Iteration: 2422 Loss: 1.595 Validation Loss: 1.332 Accuracy: 0.163 Validation Accuracy: 0.196:   7%|▋         | 2423/37094 [04:08<56:56, 10.15it/s]

Epoch: 0 Iteration: 2423 Loss: 1.545 Validation Loss: 1.332 Accuracy: 0.153 Validation Accuracy: 0.196:   7%|▋         | 2423/37094 [04:08<56:56, 10.15it/s]

Epoch: 0 Iteration: 2424 Loss: 1.541 Validation Loss: 1.332 Accuracy: 0.159 Validation Accuracy: 0.196:   7%|▋         | 2423/37094 [04:08<56:56, 10.15it/s]

Epoch: 0 Iteration: 2424 Loss: 1.541 Validation Loss: 1.332 Accuracy: 0.159 Validation Accuracy: 0.196:   7%|▋         | 2425/37094 [04:08<56:39, 10.20it/s]

Epoch: 0 Iteration: 2425 Loss: 1.494 Validation Loss: 1.332 Accuracy: 0.181 Validation Accuracy: 0.196:   7%|▋         | 2425/37094 [04:08<56:39, 10.20it/s]

Epoch: 0 Iteration: 2426 Loss: 1.476 Validation Loss: 1.332 Accuracy: 0.172 Validation Accuracy: 0.196:   7%|▋         | 2425/37094 [04:08<56:39, 10.20it/s]

Epoch: 0 Iteration: 2426 Loss: 1.476 Validation Loss: 1.332 Accuracy: 0.172 Validation Accuracy: 0.196:   7%|▋         | 2427/37094 [04:08<56:27, 10.23it/s]

Epoch: 0 Iteration: 2427 Loss: 1.468 Validation Loss: 1.332 Accuracy: 0.159 Validation Accuracy: 0.196:   7%|▋         | 2427/37094 [04:09<56:27, 10.23it/s]

Epoch: 0 Iteration: 2428 Loss: 1.476 Validation Loss: 1.332 Accuracy: 0.166 Validation Accuracy: 0.196:   7%|▋         | 2427/37094 [04:09<56:27, 10.23it/s]

Epoch: 0 Iteration: 2428 Loss: 1.476 Validation Loss: 1.332 Accuracy: 0.166 Validation Accuracy: 0.196:   7%|▋         | 2429/37094 [04:09<56:18, 10.26it/s]

Epoch: 0 Iteration: 2429 Loss: 1.438 Validation Loss: 1.332 Accuracy: 0.172 Validation Accuracy: 0.196:   7%|▋         | 2429/37094 [04:09<56:18, 10.26it/s]

Epoch: 0 Iteration: 2430 Loss: 1.467 Validation Loss: 1.332 Accuracy: 0.166 Validation Accuracy: 0.196:   7%|▋         | 2429/37094 [04:09<56:18, 10.26it/s]

Epoch: 0 Iteration: 2430 Loss: 1.467 Validation Loss: 1.332 Accuracy: 0.166 Validation Accuracy: 0.196:   7%|▋         | 2431/37094 [04:09<56:11, 10.28it/s]

Epoch: 0 Iteration: 2431 Loss: 1.397 Validation Loss: 1.332 Accuracy: 0.172 Validation Accuracy: 0.196:   7%|▋         | 2431/37094 [04:09<56:11, 10.28it/s]

Epoch: 0 Iteration: 2432 Loss: 1.418 Validation Loss: 1.332 Accuracy: 0.181 Validation Accuracy: 0.196:   7%|▋         | 2431/37094 [04:09<56:11, 10.28it/s]

Epoch: 0 Iteration: 2432 Loss: 1.418 Validation Loss: 1.332 Accuracy: 0.181 Validation Accuracy: 0.196:   7%|▋         | 2433/37094 [04:09<56:07, 10.29it/s]

Epoch: 0 Iteration: 2433 Loss: 1.439 Validation Loss: 1.332 Accuracy: 0.197 Validation Accuracy: 0.196:   7%|▋         | 2433/37094 [04:09<56:07, 10.29it/s]

Epoch: 0 Iteration: 2434 Loss: 1.368 Validation Loss: 1.332 Accuracy: 0.206 Validation Accuracy: 0.196:   7%|▋         | 2433/37094 [04:09<56:07, 10.29it/s]

Epoch: 0 Iteration: 2434 Loss: 1.368 Validation Loss: 1.332 Accuracy: 0.206 Validation Accuracy: 0.196:   7%|▋         | 2435/37094 [04:09<56:04, 10.30it/s]

Epoch: 0 Iteration: 2435 Loss: 1.349 Validation Loss: 1.332 Accuracy: 0.184 Validation Accuracy: 0.196:   7%|▋         | 2435/37094 [04:09<56:04, 10.30it/s]

Epoch: 0 Iteration: 2436 Loss: 1.309 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   7%|▋         | 2435/37094 [04:09<56:04, 10.30it/s]

Epoch: 0 Iteration: 2436 Loss: 1.309 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   7%|▋         | 2437/37094 [04:09<56:02, 10.31it/s]

Epoch: 0 Iteration: 2437 Loss: 1.311 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   7%|▋         | 2437/37094 [04:10<56:02, 10.31it/s]

Epoch: 0 Iteration: 2438 Loss: 1.260 Validation Loss: 1.332 Accuracy: 0.191 Validation Accuracy: 0.196:   7%|▋         | 2437/37094 [04:10<56:02, 10.31it/s]

Epoch: 0 Iteration: 2438 Loss: 1.260 Validation Loss: 1.332 Accuracy: 0.191 Validation Accuracy: 0.196:   7%|▋         | 2439/37094 [04:10<56:01, 10.31it/s]

Epoch: 0 Iteration: 2439 Loss: 1.298 Validation Loss: 1.332 Accuracy: 0.181 Validation Accuracy: 0.196:   7%|▋         | 2439/37094 [04:10<56:01, 10.31it/s]

Epoch: 0 Iteration: 2440 Loss: 1.289 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   7%|▋         | 2439/37094 [04:10<56:01, 10.31it/s]

Epoch: 0 Iteration: 2440 Loss: 1.289 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   7%|▋         | 2441/37094 [04:10<56:00, 10.31it/s]

Epoch: 0 Iteration: 2441 Loss: 1.274 Validation Loss: 1.332 Accuracy: 0.197 Validation Accuracy: 0.196:   7%|▋         | 2441/37094 [04:10<56:00, 10.31it/s]

Epoch: 0 Iteration: 2442 Loss: 1.251 Validation Loss: 1.332 Accuracy: 0.203 Validation Accuracy: 0.196:   7%|▋         | 2441/37094 [04:10<56:00, 10.31it/s]

Epoch: 0 Iteration: 2442 Loss: 1.251 Validation Loss: 1.332 Accuracy: 0.203 Validation Accuracy: 0.196:   7%|▋         | 2443/37094 [04:10<56:02, 10.30it/s]

Epoch: 0 Iteration: 2443 Loss: 1.281 Validation Loss: 1.332 Accuracy: 0.181 Validation Accuracy: 0.196:   7%|▋         | 2443/37094 [04:10<56:02, 10.30it/s]

Epoch: 0 Iteration: 2444 Loss: 1.313 Validation Loss: 1.332 Accuracy: 0.166 Validation Accuracy: 0.196:   7%|▋         | 2443/37094 [04:10<56:02, 10.30it/s]

Epoch: 0 Iteration: 2444 Loss: 1.313 Validation Loss: 1.332 Accuracy: 0.166 Validation Accuracy: 0.196:   7%|▋         | 2445/37094 [04:10<56:00, 10.31it/s]

Epoch: 0 Iteration: 2445 Loss: 1.326 Validation Loss: 1.332 Accuracy: 0.178 Validation Accuracy: 0.196:   7%|▋         | 2445/37094 [04:10<56:00, 10.31it/s]

Epoch: 0 Iteration: 2446 Loss: 1.299 Validation Loss: 1.332 Accuracy: 0.172 Validation Accuracy: 0.196:   7%|▋         | 2445/37094 [04:10<56:00, 10.31it/s]

Epoch: 0 Iteration: 2446 Loss: 1.299 Validation Loss: 1.332 Accuracy: 0.172 Validation Accuracy: 0.196:   7%|▋         | 2447/37094 [04:10<56:01, 10.31it/s]

Epoch: 0 Iteration: 2447 Loss: 1.316 Validation Loss: 1.332 Accuracy: 0.178 Validation Accuracy: 0.196:   7%|▋         | 2447/37094 [04:11<56:01, 10.31it/s]

Epoch: 0 Iteration: 2448 Loss: 1.295 Validation Loss: 1.332 Accuracy: 0.175 Validation Accuracy: 0.196:   7%|▋         | 2447/37094 [04:11<56:01, 10.31it/s]

Epoch: 0 Iteration: 2448 Loss: 1.295 Validation Loss: 1.332 Accuracy: 0.175 Validation Accuracy: 0.196:   7%|▋         | 2449/37094 [04:11<55:59, 10.31it/s]

Epoch: 0 Iteration: 2449 Loss: 1.292 Validation Loss: 1.332 Accuracy: 0.191 Validation Accuracy: 0.196:   7%|▋         | 2449/37094 [04:11<55:59, 10.31it/s]

Epoch: 0 Iteration: 2450 Loss: 1.257 Validation Loss: 1.332 Accuracy: 0.191 Validation Accuracy: 0.196:   7%|▋         | 2449/37094 [04:11<55:59, 10.31it/s]

Epoch: 0 Iteration: 2450 Loss: 1.257 Validation Loss: 1.332 Accuracy: 0.191 Validation Accuracy: 0.196:   7%|▋         | 2451/37094 [04:11<55:57, 10.32it/s]

Epoch: 0 Iteration: 2451 Loss: 1.280 Validation Loss: 1.332 Accuracy: 0.188 Validation Accuracy: 0.196:   7%|▋         | 2451/37094 [04:11<55:57, 10.32it/s]

Epoch: 0 Iteration: 2452 Loss: 1.284 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   7%|▋         | 2451/37094 [04:11<55:57, 10.32it/s]

Epoch: 0 Iteration: 2452 Loss: 1.284 Validation Loss: 1.332 Accuracy: 0.194 Validation Accuracy: 0.196:   7%|▋         | 2453/37094 [04:11<55:57, 10.32it/s]

Epoch: 0 Iteration: 2453 Loss: 1.297 Validation Loss: 1.332 Accuracy: 0.216 Validation Accuracy: 0.196:   7%|▋         | 2453/37094 [04:11<55:57, 10.32it/s]

Epoch: 0 Iteration: 2454 Loss: 1.322 Validation Loss: 1.332 Accuracy: 0.231 Validation Accuracy: 0.196:   7%|▋         | 2453/37094 [04:11<55:57, 10.32it/s]

Epoch: 0 Iteration: 2454 Loss: 1.322 Validation Loss: 1.332 Accuracy: 0.231 Validation Accuracy: 0.196:   7%|▋         | 2455/37094 [04:11<55:55, 10.32it/s]

Epoch: 0 Iteration: 2455 Loss: 1.339 Validation Loss: 1.332 Accuracy: 0.219 Validation Accuracy: 0.196:   7%|▋         | 2455/37094 [04:11<55:55, 10.32it/s]

Epoch: 0 Iteration: 2456 Loss: 1.357 Validation Loss: 1.332 Accuracy: 0.225 Validation Accuracy: 0.196:   7%|▋         | 2455/37094 [04:11<55:55, 10.32it/s]

Epoch: 0 Iteration: 2456 Loss: 1.357 Validation Loss: 1.332 Accuracy: 0.225 Validation Accuracy: 0.196:   7%|▋         | 2457/37094 [04:11<55:55, 10.32it/s]

Epoch: 0 Iteration: 2457 Loss: 1.350 Validation Loss: 1.332 Accuracy: 0.225 Validation Accuracy: 0.196:   7%|▋         | 2457/37094 [04:11<55:55, 10.32it/s]

Epoch: 0 Iteration: 2458 Loss: 1.358 Validation Loss: 1.332 Accuracy: 0.237 Validation Accuracy: 0.196:   7%|▋         | 2457/37094 [04:12<55:55, 10.32it/s]

Epoch: 0 Iteration: 2458 Loss: 1.358 Validation Loss: 1.332 Accuracy: 0.237 Validation Accuracy: 0.196:   7%|▋         | 2459/37094 [04:12<55:54, 10.32it/s]

Epoch: 0 Iteration: 2459 Loss: 1.311 Validation Loss: 1.332 Accuracy: 0.231 Validation Accuracy: 0.196:   7%|▋         | 2459/37094 [04:12<55:54, 10.32it/s]

Epoch: 0 Iteration: 2460 Loss: 1.284 Validation Loss: 1.332 Accuracy: 0.234 Validation Accuracy: 0.196:   7%|▋         | 2459/37094 [04:12<55:54, 10.32it/s]

Epoch: 0 Iteration: 2460 Loss: 1.284 Validation Loss: 1.332 Accuracy: 0.234 Validation Accuracy: 0.196:   7%|▋         | 2461/37094 [04:12<55:54, 10.32it/s]

Epoch: 0 Iteration: 2461 Loss: 1.309 Validation Loss: 1.332 Accuracy: 0.244 Validation Accuracy: 0.196:   7%|▋         | 2461/37094 [04:12<55:54, 10.32it/s]

Epoch: 0 Iteration: 2462 Loss: 1.349 Validation Loss: 1.332 Accuracy: 0.237 Validation Accuracy: 0.196:   7%|▋         | 2461/37094 [04:12<55:54, 10.32it/s]

Epoch: 0 Iteration: 2462 Loss: 1.349 Validation Loss: 1.332 Accuracy: 0.237 Validation Accuracy: 0.196:   7%|▋         | 2463/37094 [04:12<55:55, 10.32it/s]

Epoch: 0 Iteration: 2463 Loss: 1.337 Validation Loss: 1.332 Accuracy: 0.234 Validation Accuracy: 0.196:   7%|▋         | 2463/37094 [04:12<55:55, 10.32it/s]

Epoch: 0 Iteration: 2464 Loss: 1.321 Validation Loss: 1.332 Accuracy: 0.237 Validation Accuracy: 0.196:   7%|▋         | 2463/37094 [04:12<55:55, 10.32it/s]

Epoch: 0 Iteration: 2464 Loss: 1.321 Validation Loss: 1.332 Accuracy: 0.237 Validation Accuracy: 0.196:   7%|▋         | 2465/37094 [04:12<55:57, 10.31it/s]

Epoch: 0 Iteration: 2465 Loss: 1.364 Validation Loss: 1.332 Accuracy: 0.250 Validation Accuracy: 0.196:   7%|▋         | 2465/37094 [04:12<55:57, 10.31it/s]

Epoch: 0 Iteration: 2466 Loss: 1.365 Validation Loss: 1.332 Accuracy: 0.263 Validation Accuracy: 0.196:   7%|▋         | 2465/37094 [04:12<55:57, 10.31it/s]

Epoch: 0 Iteration: 2466 Loss: 1.365 Validation Loss: 1.332 Accuracy: 0.263 Validation Accuracy: 0.196:   7%|▋         | 2467/37094 [04:12<55:55, 10.32it/s]

Epoch: 0 Iteration: 2467 Loss: 1.371 Validation Loss: 1.332 Accuracy: 0.266 Validation Accuracy: 0.196:   7%|▋         | 2467/37094 [04:12<55:55, 10.32it/s]

Epoch: 0 Iteration: 2468 Loss: 1.421 Validation Loss: 1.332 Accuracy: 0.259 Validation Accuracy: 0.196:   7%|▋         | 2467/37094 [04:13<55:55, 10.32it/s]

Epoch: 0 Iteration: 2468 Loss: 1.421 Validation Loss: 1.332 Accuracy: 0.259 Validation Accuracy: 0.196:   7%|▋         | 2469/37094 [04:13<55:55, 10.32it/s]

Epoch: 0 Iteration: 2469 Loss: 1.464 Validation Loss: 1.332 Accuracy: 0.278 Validation Accuracy: 0.196:   7%|▋         | 2469/37094 [04:13<55:55, 10.32it/s]

Epoch: 0 Iteration: 2470 Loss: 1.480 Validation Loss: 1.332 Accuracy: 0.269 Validation Accuracy: 0.196:   7%|▋         | 2469/37094 [04:13<55:55, 10.32it/s]

Epoch: 0 Iteration: 2470 Loss: 1.480 Validation Loss: 1.332 Accuracy: 0.269 Validation Accuracy: 0.196:   7%|▋         | 2471/37094 [04:13<55:54, 10.32it/s]

Epoch: 0 Iteration: 2471 Loss: 1.462 Validation Loss: 1.332 Accuracy: 0.266 Validation Accuracy: 0.196:   7%|▋         | 2471/37094 [04:13<55:54, 10.32it/s]

Epoch: 0 Iteration: 2472 Loss: 1.471 Validation Loss: 1.332 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2471/37094 [04:13<55:54, 10.32it/s]

Epoch: 0 Iteration: 2472 Loss: 1.471 Validation Loss: 1.332 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2473/37094 [04:13<55:54, 10.32it/s]

Epoch: 0 Iteration: 2473 Loss: 1.420 Validation Loss: 1.332 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2473/37094 [04:13<55:54, 10.32it/s]

Epoch: 0 Iteration: 2474 Loss: 1.382 Validation Loss: 1.332 Accuracy: 0.247 Validation Accuracy: 0.196:   7%|▋         | 2473/37094 [04:13<55:54, 10.32it/s]

Epoch: 0 Iteration: 2474 Loss: 1.382 Validation Loss: 1.332 Accuracy: 0.247 Validation Accuracy: 0.196:   7%|▋         | 2475/37094 [04:13<55:54, 10.32it/s]

Epoch: 0 Iteration: 2475 Loss: 1.403 Validation Loss: 1.332 Accuracy: 0.234 Validation Accuracy: 0.196:   7%|▋         | 2475/37094 [04:13<55:54, 10.32it/s]

Epoch: 0 Iteration: 2476 Loss: 1.375 Validation Loss: 1.332 Accuracy: 0.225 Validation Accuracy: 0.196:   7%|▋         | 2475/37094 [04:13<55:54, 10.32it/s]

Epoch: 0 Iteration: 2476 Loss: 1.375 Validation Loss: 1.332 Accuracy: 0.225 Validation Accuracy: 0.196:   7%|▋         | 2477/37094 [04:13<55:53, 10.32it/s]

Epoch: 0 Iteration: 2477 Loss: 1.373 Validation Loss: 1.332 Accuracy: 0.212 Validation Accuracy: 0.196:   7%|▋         | 2477/37094 [04:13<55:53, 10.32it/s]

Epoch: 0 Iteration: 2478 Loss: 1.398 Validation Loss: 1.332 Accuracy: 0.206 Validation Accuracy: 0.196:   7%|▋         | 2477/37094 [04:14<55:53, 10.32it/s]

Epoch: 0 Iteration: 2478 Loss: 1.398 Validation Loss: 1.332 Accuracy: 0.206 Validation Accuracy: 0.196:   7%|▋         | 2479/37094 [04:14<55:55, 10.32it/s]

Epoch: 0 Iteration: 2479 Loss: 1.371 Validation Loss: 1.332 Accuracy: 0.181 Validation Accuracy: 0.196:   7%|▋         | 2479/37094 [04:14<55:55, 10.32it/s]

Epoch: 0 Iteration: 2480 Loss: 1.437 Validation Loss: 1.332 Accuracy: 0.184 Validation Accuracy: 0.196:   7%|▋         | 2479/37094 [04:14<55:55, 10.32it/s]

Epoch: 0 Iteration: 2480 Loss: 1.437 Validation Loss: 1.332 Accuracy: 0.184 Validation Accuracy: 0.196:   7%|▋         | 2481/37094 [04:14<55:55, 10.32it/s]

Epoch: 0 Iteration: 2481 Loss: 1.414 Validation Loss: 1.332 Accuracy: 0.188 Validation Accuracy: 0.196:   7%|▋         | 2481/37094 [04:14<55:55, 10.32it/s]

Epoch: 0 Iteration: 2482 Loss: 1.407 Validation Loss: 1.332 Accuracy: 0.197 Validation Accuracy: 0.196:   7%|▋         | 2481/37094 [04:14<55:55, 10.32it/s]

Epoch: 0 Iteration: 2482 Loss: 1.407 Validation Loss: 1.332 Accuracy: 0.197 Validation Accuracy: 0.196:   7%|▋         | 2483/37094 [04:14<55:55, 10.31it/s]

Epoch: 0 Iteration: 2483 Loss: 1.428 Validation Loss: 1.332 Accuracy: 0.191 Validation Accuracy: 0.196:   7%|▋         | 2483/37094 [04:14<55:55, 10.31it/s]

Epoch: 0 Iteration: 2484 Loss: 1.385 Validation Loss: 1.332 Accuracy: 0.209 Validation Accuracy: 0.196:   7%|▋         | 2483/37094 [04:14<55:55, 10.31it/s]

Epoch: 0 Iteration: 2484 Loss: 1.385 Validation Loss: 1.332 Accuracy: 0.209 Validation Accuracy: 0.196:   7%|▋         | 2485/37094 [04:14<55:57, 10.31it/s]

Epoch: 0 Iteration: 2485 Loss: 1.356 Validation Loss: 1.332 Accuracy: 0.222 Validation Accuracy: 0.196:   7%|▋         | 2485/37094 [04:14<55:57, 10.31it/s]

Epoch: 0 Iteration: 2486 Loss: 1.334 Validation Loss: 1.332 Accuracy: 0.212 Validation Accuracy: 0.196:   7%|▋         | 2485/37094 [04:14<55:57, 10.31it/s]

Epoch: 0 Iteration: 2486 Loss: 1.334 Validation Loss: 1.332 Accuracy: 0.212 Validation Accuracy: 0.196:   7%|▋         | 2487/37094 [04:14<56:01, 10.30it/s]

Epoch: 0 Iteration: 2487 Loss: 1.302 Validation Loss: 1.332 Accuracy: 0.234 Validation Accuracy: 0.196:   7%|▋         | 2487/37094 [04:14<56:01, 10.30it/s]

Epoch: 0 Iteration: 2488 Loss: 1.277 Validation Loss: 1.332 Accuracy: 0.241 Validation Accuracy: 0.196:   7%|▋         | 2487/37094 [04:14<56:01, 10.30it/s]

Epoch: 0 Iteration: 2488 Loss: 1.277 Validation Loss: 1.332 Accuracy: 0.241 Validation Accuracy: 0.196:   7%|▋         | 2489/37094 [04:14<56:00, 10.30it/s]

Epoch: 0 Iteration: 2489 Loss: 1.248 Validation Loss: 1.332 Accuracy: 0.266 Validation Accuracy: 0.196:   7%|▋         | 2489/37094 [04:15<56:00, 10.30it/s]

Epoch: 0 Iteration: 2490 Loss: 1.281 Validation Loss: 1.332 Accuracy: 0.266 Validation Accuracy: 0.196:   7%|▋         | 2489/37094 [04:15<56:00, 10.30it/s]

Epoch: 0 Iteration: 2490 Loss: 1.281 Validation Loss: 1.332 Accuracy: 0.266 Validation Accuracy: 0.196:   7%|▋         | 2491/37094 [04:15<56:00, 10.30it/s]

Epoch: 0 Iteration: 2491 Loss: 1.304 Validation Loss: 1.332 Accuracy: 0.256 Validation Accuracy: 0.196:   7%|▋         | 2491/37094 [04:15<56:00, 10.30it/s]

Epoch: 0 Iteration: 2492 Loss: 1.298 Validation Loss: 1.332 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2491/37094 [04:15<56:00, 10.30it/s]

Epoch: 0 Iteration: 2492 Loss: 1.298 Validation Loss: 1.332 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2493/37094 [04:15<55:58, 10.30it/s]

Epoch: 0 Iteration: 2493 Loss: 1.332 Validation Loss: 1.332 Accuracy: 0.256 Validation Accuracy: 0.196:   7%|▋         | 2493/37094 [04:15<55:58, 10.30it/s]

Epoch: 0 Iteration: 2494 Loss: 1.345 Validation Loss: 1.332 Accuracy: 0.250 Validation Accuracy: 0.196:   7%|▋         | 2493/37094 [04:15<55:58, 10.30it/s]

Epoch: 0 Iteration: 2494 Loss: 1.345 Validation Loss: 1.332 Accuracy: 0.250 Validation Accuracy: 0.196:   7%|▋         | 2495/37094 [04:15<55:57, 10.30it/s]

Epoch: 0 Iteration: 2495 Loss: 1.309 Validation Loss: 1.332 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2495/37094 [04:15<55:57, 10.30it/s]

Epoch: 0 Iteration: 2496 Loss: 1.331 Validation Loss: 1.332 Accuracy: 0.272 Validation Accuracy: 0.196:   7%|▋         | 2495/37094 [04:15<55:57, 10.30it/s]

Epoch: 0 Iteration: 2496 Loss: 1.331 Validation Loss: 1.332 Accuracy: 0.272 Validation Accuracy: 0.196:   7%|▋         | 2497/37094 [04:15<55:56, 10.31it/s]

Epoch: 0 Iteration: 2497 Loss: 1.308 Validation Loss: 1.332 Accuracy: 0.269 Validation Accuracy: 0.196:   7%|▋         | 2497/37094 [04:15<55:56, 10.31it/s]

Epoch: 0 Iteration: 2498 Loss: 1.370 Validation Loss: 1.332 Accuracy: 0.278 Validation Accuracy: 0.196:   7%|▋         | 2497/37094 [04:15<55:56, 10.31it/s]

Epoch: 0 Iteration: 2498 Loss: 1.370 Validation Loss: 1.332 Accuracy: 0.278 Validation Accuracy: 0.196:   7%|▋         | 2499/37094 [04:15<55:57, 10.30it/s]

Epoch: 0 Iteration: 2499 Loss: 1.386 Validation Loss: 1.332 Accuracy: 0.259 Validation Accuracy: 0.196:   7%|▋         | 2499/37094 [04:16<55:57, 10.30it/s]

Epoch: 0 Iteration: 2500 Loss: 1.320 Validation Loss: 1.201 Accuracy: 0.266 Validation Accuracy: 0.196:   7%|▋         | 2499/37094 [04:16<55:57, 10.30it/s]

Epoch: 0 Iteration: 2500 Loss: 1.320 Validation Loss: 1.201 Accuracy: 0.266 Validation Accuracy: 0.196:   7%|▋         | 2501/37094 [04:16<1:43:26,  5.57it/s]

Epoch: 0 Iteration: 2501 Loss: 1.287 Validation Loss: 1.201 Accuracy: 0.281 Validation Accuracy: 0.196:   7%|▋         | 2501/37094 [04:16<1:43:26,  5.57it/s]

Epoch: 0 Iteration: 2502 Loss: 1.259 Validation Loss: 1.201 Accuracy: 0.303 Validation Accuracy: 0.196:   7%|▋         | 2501/37094 [04:16<1:43:26,  5.57it/s]

Epoch: 0 Iteration: 2502 Loss: 1.259 Validation Loss: 1.201 Accuracy: 0.303 Validation Accuracy: 0.196:   7%|▋         | 2503/37094 [04:16<1:28:36,  6.51it/s]

Epoch: 0 Iteration: 2503 Loss: 1.224 Validation Loss: 1.201 Accuracy: 0.312 Validation Accuracy: 0.196:   7%|▋         | 2503/37094 [04:16<1:28:36,  6.51it/s]

Epoch: 0 Iteration: 2504 Loss: 1.233 Validation Loss: 1.201 Accuracy: 0.297 Validation Accuracy: 0.196:   7%|▋         | 2503/37094 [04:17<1:28:36,  6.51it/s]

Epoch: 0 Iteration: 2504 Loss: 1.233 Validation Loss: 1.201 Accuracy: 0.297 Validation Accuracy: 0.196:   7%|▋         | 2505/37094 [04:17<1:18:52,  7.31it/s]

Epoch: 0 Iteration: 2505 Loss: 1.207 Validation Loss: 1.201 Accuracy: 0.287 Validation Accuracy: 0.196:   7%|▋         | 2505/37094 [04:17<1:18:52,  7.31it/s]

Epoch: 0 Iteration: 2506 Loss: 1.231 Validation Loss: 1.201 Accuracy: 0.278 Validation Accuracy: 0.196:   7%|▋         | 2505/37094 [04:17<1:18:52,  7.31it/s]

Epoch: 0 Iteration: 2506 Loss: 1.231 Validation Loss: 1.201 Accuracy: 0.278 Validation Accuracy: 0.196:   7%|▋         | 2507/37094 [04:17<1:12:04,  8.00it/s]

Epoch: 0 Iteration: 2507 Loss: 1.225 Validation Loss: 1.201 Accuracy: 0.287 Validation Accuracy: 0.196:   7%|▋         | 2507/37094 [04:17<1:12:04,  8.00it/s]

Epoch: 0 Iteration: 2508 Loss: 1.216 Validation Loss: 1.201 Accuracy: 0.284 Validation Accuracy: 0.196:   7%|▋         | 2507/37094 [04:17<1:12:04,  8.00it/s]

Epoch: 0 Iteration: 2508 Loss: 1.216 Validation Loss: 1.201 Accuracy: 0.284 Validation Accuracy: 0.196:   7%|▋         | 2509/37094 [04:17<1:07:17,  8.57it/s]

Epoch: 0 Iteration: 2509 Loss: 1.224 Validation Loss: 1.201 Accuracy: 0.281 Validation Accuracy: 0.196:   7%|▋         | 2509/37094 [04:17<1:07:17,  8.57it/s]

Epoch: 0 Iteration: 2510 Loss: 1.181 Validation Loss: 1.201 Accuracy: 0.278 Validation Accuracy: 0.196:   7%|▋         | 2509/37094 [04:17<1:07:17,  8.57it/s]

Epoch: 0 Iteration: 2510 Loss: 1.181 Validation Loss: 1.201 Accuracy: 0.278 Validation Accuracy: 0.196:   7%|▋         | 2511/37094 [04:17<1:03:55,  9.02it/s]

Epoch: 0 Iteration: 2511 Loss: 1.155 Validation Loss: 1.201 Accuracy: 0.269 Validation Accuracy: 0.196:   7%|▋         | 2511/37094 [04:17<1:03:55,  9.02it/s]

Epoch: 0 Iteration: 2512 Loss: 1.119 Validation Loss: 1.201 Accuracy: 0.247 Validation Accuracy: 0.196:   7%|▋         | 2511/37094 [04:17<1:03:55,  9.02it/s]

Epoch: 0 Iteration: 2512 Loss: 1.119 Validation Loss: 1.201 Accuracy: 0.247 Validation Accuracy: 0.196:   7%|▋         | 2513/37094 [04:17<1:01:37,  9.35it/s]

Epoch: 0 Iteration: 2513 Loss: 1.116 Validation Loss: 1.201 Accuracy: 0.241 Validation Accuracy: 0.196:   7%|▋         | 2513/37094 [04:17<1:01:37,  9.35it/s]

Epoch: 0 Iteration: 2514 Loss: 1.095 Validation Loss: 1.201 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2513/37094 [04:18<1:01:37,  9.35it/s]

Epoch: 0 Iteration: 2514 Loss: 1.095 Validation Loss: 1.201 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2515/37094 [04:18<59:58,  9.61it/s]  

Epoch: 0 Iteration: 2515 Loss: 1.096 Validation Loss: 1.201 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2515/37094 [04:18<59:58,  9.61it/s]

Epoch: 0 Iteration: 2516 Loss: 1.099 Validation Loss: 1.201 Accuracy: 0.244 Validation Accuracy: 0.196:   7%|▋         | 2515/37094 [04:18<59:58,  9.61it/s]

Epoch: 0 Iteration: 2516 Loss: 1.099 Validation Loss: 1.201 Accuracy: 0.244 Validation Accuracy: 0.196:   7%|▋         | 2517/37094 [04:18<58:48,  9.80it/s]

Epoch: 0 Iteration: 2517 Loss: 1.125 Validation Loss: 1.201 Accuracy: 0.237 Validation Accuracy: 0.196:   7%|▋         | 2517/37094 [04:18<58:48,  9.80it/s]

Epoch: 0 Iteration: 2518 Loss: 1.090 Validation Loss: 1.201 Accuracy: 0.234 Validation Accuracy: 0.196:   7%|▋         | 2517/37094 [04:18<58:48,  9.80it/s]

Epoch: 0 Iteration: 2518 Loss: 1.090 Validation Loss: 1.201 Accuracy: 0.234 Validation Accuracy: 0.196:   7%|▋         | 2519/37094 [04:18<57:59,  9.94it/s]

Epoch: 0 Iteration: 2519 Loss: 1.110 Validation Loss: 1.201 Accuracy: 0.244 Validation Accuracy: 0.196:   7%|▋         | 2519/37094 [04:18<57:59,  9.94it/s]

Epoch: 0 Iteration: 2520 Loss: 1.123 Validation Loss: 1.201 Accuracy: 0.256 Validation Accuracy: 0.196:   7%|▋         | 2519/37094 [04:18<57:59,  9.94it/s]

Epoch: 0 Iteration: 2520 Loss: 1.123 Validation Loss: 1.201 Accuracy: 0.256 Validation Accuracy: 0.196:   7%|▋         | 2521/37094 [04:18<57:24, 10.04it/s]

Epoch: 0 Iteration: 2521 Loss: 1.160 Validation Loss: 1.201 Accuracy: 0.256 Validation Accuracy: 0.196:   7%|▋         | 2521/37094 [04:18<57:24, 10.04it/s]

Epoch: 0 Iteration: 2522 Loss: 1.184 Validation Loss: 1.201 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2521/37094 [04:18<57:24, 10.04it/s]

Epoch: 0 Iteration: 2522 Loss: 1.184 Validation Loss: 1.201 Accuracy: 0.253 Validation Accuracy: 0.196:   7%|▋         | 2523/37094 [04:18<57:03, 10.10it/s]

Epoch: 0 Iteration: 2523 Loss: 1.202 Validation Loss: 1.201 Accuracy: 0.237 Validation Accuracy: 0.196:   7%|▋         | 2523/37094 [04:18<57:03, 10.10it/s]

Epoch: 0 Iteration: 2524 Loss: 1.237 Validation Loss: 1.201 Accuracy: 0.219 Validation Accuracy: 0.196:   7%|▋         | 2523/37094 [04:19<57:03, 10.10it/s]

Epoch: 0 Iteration: 2524 Loss: 1.237 Validation Loss: 1.201 Accuracy: 0.219 Validation Accuracy: 0.196:   7%|▋         | 2525/37094 [04:19<56:45, 10.15it/s]

Epoch: 0 Iteration: 2525 Loss: 1.256 Validation Loss: 1.201 Accuracy: 0.212 Validation Accuracy: 0.196:   7%|▋         | 2525/37094 [04:19<56:45, 10.15it/s]

Epoch: 0 Iteration: 2526 Loss: 1.259 Validation Loss: 1.201 Accuracy: 0.206 Validation Accuracy: 0.196:   7%|▋         | 2525/37094 [04:19<56:45, 10.15it/s]

Epoch: 0 Iteration: 2526 Loss: 1.259 Validation Loss: 1.201 Accuracy: 0.206 Validation Accuracy: 0.196:   7%|▋         | 2527/37094 [04:19<56:33, 10.19it/s]

Epoch: 0 Iteration: 2527 Loss: 1.258 Validation Loss: 1.201 Accuracy: 0.197 Validation Accuracy: 0.196:   7%|▋         | 2527/37094 [04:19<56:33, 10.19it/s]

Epoch: 0 Iteration: 2528 Loss: 1.265 Validation Loss: 1.201 Accuracy: 0.184 Validation Accuracy: 0.196:   7%|▋         | 2527/37094 [04:19<56:33, 10.19it/s]

Epoch: 0 Iteration: 2528 Loss: 1.265 Validation Loss: 1.201 Accuracy: 0.184 Validation Accuracy: 0.196:   7%|▋         | 2529/37094 [04:19<56:24, 10.21it/s]

Epoch: 0 Iteration: 2529 Loss: 1.267 Validation Loss: 1.201 Accuracy: 0.178 Validation Accuracy: 0.196:   7%|▋         | 2529/37094 [04:19<56:24, 10.21it/s]

Epoch: 0 Iteration: 2530 Loss: 1.253 Validation Loss: 1.201 Accuracy: 0.163 Validation Accuracy: 0.196:   7%|▋         | 2529/37094 [04:19<56:24, 10.21it/s]

Epoch: 0 Iteration: 2530 Loss: 1.253 Validation Loss: 1.201 Accuracy: 0.163 Validation Accuracy: 0.196:   7%|▋         | 2531/37094 [04:19<56:17, 10.23it/s]

Epoch: 0 Iteration: 2531 Loss: 1.310 Validation Loss: 1.201 Accuracy: 0.163 Validation Accuracy: 0.196:   7%|▋         | 2531/37094 [04:19<56:17, 10.23it/s]

Epoch: 0 Iteration: 2532 Loss: 1.335 Validation Loss: 1.201 Accuracy: 0.172 Validation Accuracy: 0.196:   7%|▋         | 2531/37094 [04:19<56:17, 10.23it/s]

Epoch: 0 Iteration: 2532 Loss: 1.335 Validation Loss: 1.201 Accuracy: 0.172 Validation Accuracy: 0.196:   7%|▋         | 2533/37094 [04:19<56:14, 10.24it/s]

Epoch: 0 Iteration: 2533 Loss: 1.320 Validation Loss: 1.201 Accuracy: 0.191 Validation Accuracy: 0.196:   7%|▋         | 2533/37094 [04:19<56:14, 10.24it/s]

Epoch: 0 Iteration: 2534 Loss: 1.349 Validation Loss: 1.201 Accuracy: 0.219 Validation Accuracy: 0.196:   7%|▋         | 2533/37094 [04:19<56:14, 10.24it/s]

Epoch: 0 Iteration: 2534 Loss: 1.349 Validation Loss: 1.201 Accuracy: 0.219 Validation Accuracy: 0.196:   7%|▋         | 2535/37094 [04:19<56:12, 10.25it/s]

Epoch: 0 Iteration: 2535 Loss: 1.335 Validation Loss: 1.201 Accuracy: 0.234 Validation Accuracy: 0.196:   7%|▋         | 2535/37094 [04:20<56:12, 10.25it/s]

Epoch: 0 Iteration: 2536 Loss: 1.344 Validation Loss: 1.201 Accuracy: 0.247 Validation Accuracy: 0.196:   7%|▋         | 2535/37094 [04:20<56:12, 10.25it/s]

Epoch: 0 Iteration: 2536 Loss: 1.344 Validation Loss: 1.201 Accuracy: 0.247 Validation Accuracy: 0.196:   7%|▋         | 2537/37094 [04:20<56:09, 10.26it/s]

Epoch: 0 Iteration: 2537 Loss: 1.310 Validation Loss: 1.201 Accuracy: 0.247 Validation Accuracy: 0.196:   7%|▋         | 2537/37094 [04:20<56:09, 10.26it/s]

Epoch: 0 Iteration: 2538 Loss: 1.263 Validation Loss: 1.201 Accuracy: 0.259 Validation Accuracy: 0.196:   7%|▋         | 2537/37094 [04:20<56:09, 10.26it/s]

Epoch: 0 Iteration: 2538 Loss: 1.263 Validation Loss: 1.201 Accuracy: 0.259 Validation Accuracy: 0.196:   7%|▋         | 2539/37094 [04:20<56:07, 10.26it/s]

Epoch: 0 Iteration: 2539 Loss: 1.247 Validation Loss: 1.201 Accuracy: 0.263 Validation Accuracy: 0.196:   7%|▋         | 2539/37094 [04:20<56:07, 10.26it/s]

Epoch: 0 Iteration: 2540 Loss: 1.261 Validation Loss: 1.201 Accuracy: 0.269 Validation Accuracy: 0.196:   7%|▋         | 2539/37094 [04:20<56:07, 10.26it/s]

Epoch: 0 Iteration: 2540 Loss: 1.261 Validation Loss: 1.201 Accuracy: 0.269 Validation Accuracy: 0.196:   7%|▋         | 2541/37094 [04:20<56:07, 10.26it/s]

Epoch: 0 Iteration: 2541 Loss: 1.264 Validation Loss: 1.201 Accuracy: 0.272 Validation Accuracy: 0.196:   7%|▋         | 2541/37094 [04:20<56:07, 10.26it/s]

In [None]:
import matplotlib.pyplot as plt
plt.plot(range(len(training_accuracies_finetuning)), training_accuracies_finetuning, label = "Training Accuracy")
plt.plot(list(map(lambda x: x * 100, range(len(validation_accuracies_finetuning)))), validation_accuracies_finetuning, label = "Validation Accuracy")
plt.xlabel("Training Iteration")
plt.ylabel("Accuracy")
plt.legend()

In [None]:
import torch as th
import numpy as np

embedding_bert.to(device)

def get_embeds(x_data):
    concatted_shape = None
    pad_value = None
    embeds = []
    for text in tqdm(x_data):
        doc = nlp(text)
        embeddeds = []
        sents = list(doc.sents)
        all_tokenized = []
        for sentence in sents[:num_sentences]:
            sentence = str(sentence)
            tokenized = tokenizer(sentence, truncation=True, padding="max_length", max_length=max_tokenized_length)[0]
            all_tokenized.append(tokenized.ids)
        
        with th.no_grad():
            sentence_tensor = th.tensor(all_tokenized).to(device)
            concatted = np.concatenate([
                # take output corresponding to CLS
                embedding_bert.bert(sentence_tensor, output_hidden_states=True, return_dict=True)[1].cpu().numpy(),
                np.zeros((len(all_tokenized), 1))
            ], axis=1)
            
            if not concatted_shape:
                concatted_shape = concatted.shape
                pad_value = np.zeros(concatted_shape[1])
                pad_value[-1] = 1
            
            embeddeds += list(concatted)

        if len(sents) < num_sentences:
            embeddeds += [pad_value] * (num_sentences - len(sents))

        embeds.append(embeddeds)
    return np.array(embeds)

main_data_key = f"cache-core/training-data-main-max-tokens-{max_tokenized_length}-split-{split_hash}-finetuned-{finetuning_model_hash}"
train_x_embeds, valid_x_embeds = memo_load(
    lambda: (
        get_embeds(train_x),
        get_embeds(valid_x)
    ),
    main_data_key
)
main_data_hash = hash_file(main_data_key + ".hkl")

In [None]:
from model_sentence_lstm import ReviewPredictionModel
import torch as th
import torch.optim as optim

def train_main():
    model_to_train = ReviewPredictionModel(train_x_embeds.shape[2], lstm_hidden_size)
    model_to_train.to(device)
    optimizer = optim.Adam(model_to_train.parameters(), lr=main_model_lr)
    
    training_accuracies, validation_accuracies = run_training_loop(
        model_to_train, optimizer, device,
        batch_size, epochs,
        train_x_embeds, None, np.array(train_y), valid_x_embeds, None, np.array(valid_y),
        model_id=experiment_id, max_validation_examples=512
    )
    
    return model_to_train, training_accuracies, validation_accuracies

def store_main(tup, folder):
    model_to_train, training_accuracies, validation_accuracies = tup
    th.save(model_to_train.state_dict(), f"{folder}/model.pt")
    hickle.dump((training_accuracies, validation_accuracies), f"{folder}/accuracies.hkl", mode="w")

def load_main(folder):
    model_to_train = ReviewPredictionModel(train_x_embeds.shape[2], lstm_hidden_size)
    model_to_train.load_state_dict(th.load(f"{folder}/model.pt"))
    model_to_train.eval()
    model_to_train.to(device)
    training_accuracies, validation_accuracies = hickle.load(f"{folder}/accuracies.hkl")
    return model_to_train, training_accuracies, validation_accuracies

In [None]:
from training_utils import run_training_loop

main_model_key = f"cache-core/main-model-lstm-{lstm_hidden_size}-lr-{main_model_lr}-batch-size-{batch_size}-epochs-{epochs}-data-{main_data_hash}"
main_model, training_accuracies, validation_accuracies = manual_memo(
    train_main, store_main, load_main,
    main_model_key
)

th.save(main_model.state_dict(), f"{experiment_dir}/main-model.pt")
hickle.dump((training_accuracies, validation_accuracies), f"{experiment_dir}/main-accuracies.hkl", mode="w")

In [None]:
import matplotlib.pyplot as plt
plt.plot(range(len(training_accuracies)), training_accuracies, label = "Training Accuracy")
plt.plot(list(map(lambda x: x * 100, range(len(validation_accuracies)))), validation_accuracies, label = "Validation Accuracy")
plt.xlabel("Training Iteration")
plt.ylabel("Accuracy")
plt.legend()