In [3]:
import requests
import warnings
import time
from tqdm import tqdm
import pandas as pd
from datasets import Dataset
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch
from unidecode import unidecode
import re      #  để làm việc với biểu thức chính quy (regular expressions), để xử lý,thao tác chuỗi theo các pattern cụ thể
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import pandas as pd

In [5]:
a = pd.read_csv("./dataset.txt", on_bad_lines='skip', header=None)

In [6]:
a.head(50)

Unnamed: 0,0
0,nước ép dưa hấu siêu ngọt khổng lồ||smoothie d...
1,nuoc ep dua hau sieu ngot khong lo||smoothie d...
2,nước ép dưa hấu siêu ngọt khổng lồ||cà phê sữa...
3,nuoc ep dua hau sieu ngot khong lo||cà phê sữa...
4,nước ép dưa hấu siêu ngọt khổng lồ||nước ép th...
5,nuoc ep dua hau sieu ngot khong lo||nước ép th...
6,nước ép dưa hấu siêu ngọt khổng lồ||khay dưa h...
7,nuoc ep dua hau sieu ngot khong lo||khay dưa h...
8,nước ép dưa hấu siêu ngọt khổng lồ||cơm gà xối...
9,nuoc ep dua hau sieu ngot khong lo||cơm gà xối...


In [7]:
df = pd.read_csv("./dataset.txt", sep='|', on_bad_lines='skip', header=None)
df = df[[0,2,4]]
df.columns = ['description','name','labels']
df['labels'] = df['labels'].astype(int)

In [8]:
pd.options.display.max_colwidth = 2000

In [9]:
df

Unnamed: 0,description,name,labels
0,nước ép dưa hấu siêu ngọt khổng lồ,smoothie dưa hấu,1
1,nuoc ep dua hau sieu ngot khong lo,smoothie dưa hấu,1
2,nước ép dưa hấu siêu ngọt khổng lồ,cà phê sữa gấu,0
3,nuoc ep dua hau sieu ngot khong lo,cà phê sữa gấu,0
4,nước ép dưa hấu siêu ngọt khổng lồ,nước ép thơm nguyên chất chai,1
...,...,...,...
1356855,rtc canh ga sot ro ti khay,bánh tráng cuốn sốt me bơ,0
1356856,súp rau củ trứng cút,trứng cút rim bơ tỏi,1
1356857,sup rau cu trung cut,trứng cút rim bơ tỏi,1
1356858,súp rau củ trứng cút,chả cả thêm,0


In [10]:
from sklearn.model_selection import train_test_split
df, remove = train_test_split(df, test_size=0.95, random_state=42)

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [12]:
device

device(type='cuda')

In [13]:
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")

In [24]:
# Custom Dataset class
class VietnameseDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

# Tokenization function
def preprocess_function(descriptions, names):
    combined_texts = descriptions + " [SEP] " + names
    return tokenizer(combined_texts.tolist(), padding="max_length", truncation=True, max_length=64, return_tensors="pt")

# Split the dataset into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df[["description", "name"]], df["labels"], test_size=0.2, random_state=42, stratify=df["labels"].values.tolist()
)

# Tokenize training and validation data
train_encodings = preprocess_function(train_texts["description"], train_texts["name"])
val_encodings = preprocess_function(val_texts["description"], val_texts["name"])

# Create Dataset objects
train_dataset = VietnameseDataset(train_encodings, train_labels.tolist())
val_dataset = VietnameseDataset(val_encodings, val_labels.tolist())

In [15]:
from transformers import RobertaModel, AutoTokenizer, AdamW
import torch
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score

from tqdm import tqdm
import time

from tqdm import tqdm
import time

class PhoBERTWithClassification(torch.nn.Module):
    def __init__(self):
        super(PhoBERTWithClassification, self).__init__()
        self.phobert = RobertaModel.from_pretrained("vinai/phobert-base")
        self.linear = torch.nn.Linear(768, 768)
        self.activation = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, 2)

    def forward(self, input_ids, attention_mask):
        # PhoBERT does not use `token_type_ids`
        output_with_pooling = self.phobert(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output_with_pooling[0]
        pooler = hidden_state[:, 0]
        pooler = self.linear(pooler)
        pooler = self.activation(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output

class PhoBERTTrainer:
    def __init__(self, model, tokenizer, train_dataset, val_dataset, batch_size=16, lr=5e-5, device='cuda'):
        self.model = model.to(device)
        self.tokenizer = tokenizer
        self.train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        self.val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
        self.optimizer = AdamW(model.parameters(), lr=lr)
        self.loss_fn = torch.nn.CrossEntropyLoss()
        self.device = device

    def train_epoch(self, epoch, total_epochs):
        self.model.train()
        total_loss = 0

        # Initialize tqdm progress bar (dynamic update frequency)
        progress_bar = tqdm(
            self.train_dataloader,
            desc=f"Epoch {epoch+1}/{total_epochs} - Training",
            unit="batch",
            leave=False,
            miniters=50,  # Update bar every 10 iterations (reduces CPU load)
        )
        start_time = time.time()
        total_batches = len(self.train_dataloader)

        for batch_idx, batch in enumerate(self.train_dataloader):
            input_ids = batch['input_ids'].to(self.device, non_blocking=True)
            attention_mask = batch['attention_mask'].to(self.device, non_blocking=True)
            labels = batch['labels'].to(self.device, non_blocking=True)

            self.optimizer.zero_grad()
            outputs = self.model(input_ids, attention_mask)
            loss = self.loss_fn(outputs, labels)
            loss.backward()
            self.optimizer.step()
            total_loss += loss.item()

            # Update progress bar only every `miniters`
            if batch_idx % progress_bar.miniters == 0 or batch_idx == total_batches - 1:
                elapsed_time = time.time() - start_time
                batches_done = batch_idx + 1
                batches_left = total_batches - batches_done
                time_left = elapsed_time / batches_done * batches_left

                progress_bar.set_postfix(
                    {
                        "Batch Loss": f"{loss.item():.4f}",
                        "Avg Loss": f"{total_loss / batches_done:.4f}",
                        "Time Left": f"{time_left / 60:.2f} min",
                    }
                )
                progress_bar.update(progress_bar.miniters)  # Advance the progress bar by `miniters`
        return total_loss / len(self.train_dataloader)

    def evaluate(self, epoch, total_epochs):
        self.model.eval()
        total_loss = 0
        preds, true_labels = [], []

        # Initialize tqdm progress bar for evaluation
        progress_bar = tqdm(
            self.val_dataloader,
            desc=f"Epoch {epoch+1}/{total_epochs} - Evaluating",
            unit="batch",
            leave=False,
        )

        with torch.no_grad():
            for batch in self.val_dataloader:
                input_ids = batch['input_ids'].to(self.device, non_blocking=True)
                attention_mask = batch['attention_mask'].to(self.device, non_blocking=True)
                labels = batch['labels'].to(self.device, non_blocking=True)

                outputs = self.model(input_ids, attention_mask)
                loss = self.loss_fn(outputs, labels)
                total_loss += loss.item()

                preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
                true_labels.extend(labels.cpu().numpy())

        accuracy = accuracy_score(true_labels, preds)
        return total_loss / len(self.val_dataloader), accuracy

    def fine_tune(self, epochs=3):
        for epoch in range(epochs):
            print(f"Starting Epoch {epoch+1}/{epochs}")
            
            train_loss = self.train_epoch(epoch, epochs)
            val_loss, val_accuracy = self.evaluate(epoch, epochs)

            print(f"\nEpoch {epoch+1}/{epochs} Results:")
            print(f"Train Loss: {train_loss:.4f}")
            print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}\n")

In [16]:
model = PhoBERTWithClassification()

In [125]:
# Initialize the PhoBERTTrainer
trainer = PhoBERTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    batch_size=16,
    lr=2e-5,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

# Fine-tune the model
trainer.fine_tune(epochs=3)

# Save the fine-tuned model
model.phobert.save_pretrained("./phobert-finetuned-vietnamese")
tokenizer.save_pretrained("./phobert-finetuned-vietnamese")



Starting Epoch 1/3



  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

Epoch 1/3 - Training:   0%|       | 0/3393 [00:00<?, ?batch/s, Batch Loss=0.7031, Avg Loss=0.7031, Time Left=30.21 min][A
Epoch 1/3 - Training:   3%| | 100/3393 [00:00<00:17, 187.15batch/s, Batch Loss=0.7031, Avg Loss=0.7031, Time Left=30.21[A
Epoch 1/3 - Training:   3%| | 100/3393 [00:11<00:17, 187.15batch/s, Batch Loss=0.7031, Avg Loss=0.7031, Time Left=30.21[A
Epoch 1/3 - Training:   3%| | 100/3393 [00:11<00:17, 187.15batch/s, Batch Loss=0.6931, Avg Loss=0.6948, Time Left=25.18[A
Epoch 1/3 - Training:   3%| | 101/3393 [00:11<08:32,  6.42batch/s, Batch Loss=0.6931, Avg Loss=0.6948, Time Left=25.18 [A
Epoch 1/3 - Training:   3%| | 101/3393 [00:11<08:32,  6.42batch/s, Batch Loss=0.6816, Avg Loss=0.6943, Time Left=25.24 [A
Epoch 1/3 - Training:   3%| | 102/3393 [00:11<08:53,  6.17batch/s, Batch Loss=0.6816, Avg Loss=0.6943, Time Left=25.24 [A
Epoch 1/3 - Training:   3%| | 102/3393 [00:12<08:53,  6.17b

Epoch 1/3 - Training:   4%| | 130/3393 [00:25<26:28,  2.05batch/s, Batch Loss=0.6965, Avg Loss=0.6921, Time Left=25.90 [A
Epoch 1/3 - Training:   4%| | 130/3393 [00:25<26:28,  2.05batch/s, Batch Loss=0.6528, Avg Loss=0.6914, Time Left=25.89 [A
Epoch 1/3 - Training:   4%| | 131/3393 [00:25<26:11,  2.08batch/s, Batch Loss=0.6528, Avg Loss=0.6914, Time Left=25.89 [A
Epoch 1/3 - Training:   4%| | 131/3393 [00:26<26:11,  2.08batch/s, Batch Loss=0.6710, Avg Loss=0.6910, Time Left=25.93 [A
Epoch 1/3 - Training:   4%| | 132/3393 [00:26<26:40,  2.04batch/s, Batch Loss=0.6710, Avg Loss=0.6910, Time Left=25.93 [A
Epoch 1/3 - Training:   4%| | 132/3393 [00:26<26:40,  2.04batch/s, Batch Loss=0.6814, Avg Loss=0.6908, Time Left=25.93 [A
Epoch 1/3 - Training:   4%| | 133/3393 [00:26<26:21,  2.06batch/s, Batch Loss=0.6814, Avg Loss=0.6908, Time Left=25.93 [A
Epoch 1/3 - Training:   4%| | 133/3393 [00:27<26:21,  2.06batch/s, Batch Loss=0.6709, Avg Loss=0.6905, Time Left=25.96 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:   5%| | 163/3393 [00:41<26:01,  2.07batch/s, Batch Loss=0.6688, Avg Loss=0.6852, Time Left=26.43 [A
Epoch 1/3 - Training:   5%| | 163/3393 [00:42<26:01,  2.07batch/s, Batch Loss=0.6218, Avg Loss=0.6845, Time Left=26.45 [A
Epoch 1/3 - Training:   5%| | 164/3393 [00:42<26:33,  2.03batch/s, Batch Loss=0.6218, Avg Loss=0.6845, Time Left=26.45 [A
Epoch 1/3 - Training:   5%| | 164/3393 [00:42<26:33,  2.03batch/s, Batch Loss=0.6264, Avg Loss=0.6838, Time Left=26.43 [A
Epoch 1/3 - Training:   5%| | 165/3393 [00:42<26:19,  2.04batch/s, Batch Loss=0.6264, Avg Loss=0.6838, Time Left=26.43 [A
Epoch 1/3 - Training:   5%| | 165/3393 [00:43<26:19,  2.04batch/s, Batch Loss=0.6349, Avg Loss=0.6833, Time Left=26.42 [A
Epoch 1/3 - Training:   5%| | 166/3393 [00:43<25:43,  2.09batch/s, Batch Loss=0.6349, Avg Loss=0.6833, Time Left=26.42 [A
Epoch 1/3 - Training:   5%| | 166/3393 [00:43<25:43,  2.09batch/s, Batch Loss=0.6096, Avg Loss=0.6825, Time Left=26.43 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:   6%| | 196/3393 [00:57<25:30,  2.09batch/s, Batch Loss=0.2866, Avg Loss=0.6588, Time Left=26.33 [A
Epoch 1/3 - Training:   6%| | 196/3393 [00:58<25:30,  2.09batch/s, Batch Loss=0.4623, Avg Loss=0.6571, Time Left=26.33 [A
Epoch 1/3 - Training:   6%| | 197/3393 [00:58<25:51,  2.06batch/s, Batch Loss=0.4623, Avg Loss=0.6571, Time Left=26.33 [A
Epoch 1/3 - Training:   6%| | 197/3393 [00:58<25:51,  2.06batch/s, Batch Loss=0.6178, Avg Loss=0.6568, Time Left=26.33 [A
Epoch 1/3 - Training:   6%| | 198/3393 [00:58<25:48,  2.06batch/s, Batch Loss=0.6178, Avg Loss=0.6568, Time Left=26.33 [A
Epoch 1/3 - Training:   6%| | 198/3393 [00:59<25:48,  2.06batch/s, Batch Loss=0.3994, Avg Loss=0.6547, Time Left=26.31 [A
Epoch 1/3 - Training:   6%| | 199/3393 [00:59<25:31,  2.09batch/s, Batch Loss=0.3994, Avg Loss=0.6547, Time Left=26.31 [A
Epoch 1/3 - Training:   6%| | 199/3393 [00:59<25:31,  2.09batch/s, Batch Loss=0.4241, Avg Loss=0.6529, Time Left=26.32 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:   7%| | 229/3393 [01:14<25:20,  2.08batch/s, Batch Loss=0.3709, Avg Loss=0.6173, Time Left=26.15 [A
Epoch 1/3 - Training:   7%| | 229/3393 [01:14<25:20,  2.08batch/s, Batch Loss=0.4393, Avg Loss=0.6162, Time Left=26.15 [A
Epoch 1/3 - Training:   7%| | 230/3393 [01:14<25:24,  2.08batch/s, Batch Loss=0.4393, Avg Loss=0.6162, Time Left=26.15 [A
Epoch 1/3 - Training:   7%| | 230/3393 [01:15<25:24,  2.08batch/s, Batch Loss=0.3010, Avg Loss=0.6141, Time Left=26.13 [A
Epoch 1/3 - Training:   7%| | 231/3393 [01:15<25:11,  2.09batch/s, Batch Loss=0.3010, Avg Loss=0.6141, Time Left=26.13 [A
Epoch 1/3 - Training:   7%| | 231/3393 [01:15<25:11,  2.09batch/s, Batch Loss=0.3672, Avg Loss=0.6125, Time Left=26.13 [A
Epoch 1/3 - Training:   7%| | 232/3393 [01:15<25:15,  2.09batch/s, Batch Loss=0.3672, Avg Loss=0.6125, Time Left=26.13 [A
Epoch 1/3 - Training:   7%| | 232/3393 [01:16<25:15,  2.09batch/s, Batch Loss=0.3015, Avg Loss=0.6106, Time Left=26.13 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:   8%| | 262/3393 [01:30<25:06,  2.08batch/s, Batch Loss=0.4053, Avg Loss=0.5793, Time Left=25.87 [A
Epoch 1/3 - Training:   8%| | 262/3393 [01:30<25:06,  2.08batch/s, Batch Loss=0.3227, Avg Loss=0.5779, Time Left=25.86 [A
Epoch 1/3 - Training:   8%| | 263/3393 [01:30<25:10,  2.07batch/s, Batch Loss=0.3227, Avg Loss=0.5779, Time Left=25.86 [A
Epoch 1/3 - Training:   8%| | 263/3393 [01:31<25:10,  2.07batch/s, Batch Loss=0.6084, Avg Loss=0.5781, Time Left=25.86 [A
Epoch 1/3 - Training:   8%| | 264/3393 [01:31<25:22,  2.05batch/s, Batch Loss=0.6084, Avg Loss=0.5781, Time Left=25.86 [A
Epoch 1/3 - Training:   8%| | 264/3393 [01:31<25:22,  2.05batch/s, Batch Loss=0.4612, Avg Loss=0.5775, Time Left=25.85 [A
Epoch 1/3 - Training:   8%| | 265/3393 [01:31<25:22,  2.05batch/s, Batch Loss=0.4612, Avg Loss=0.5775, Time Left=25.85 [A
Epoch 1/3 - Training:   8%| | 265/3393 [01:31<25:22,  2.05batch/s, Batch Loss=0.5824, Avg Loss=0.5775, Time Left=25.85 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:   9%| | 295/3393 [01:45<24:51,  2.08batch/s, Batch Loss=0.5873, Avg Loss=0.5513, Time Left=25.59 [A
Epoch 1/3 - Training:   9%| | 295/3393 [01:46<24:51,  2.08batch/s, Batch Loss=0.3735, Avg Loss=0.5505, Time Left=25.58 [A
Epoch 1/3 - Training:   9%| | 296/3393 [01:46<24:53,  2.07batch/s, Batch Loss=0.3735, Avg Loss=0.5505, Time Left=25.58 [A
Epoch 1/3 - Training:   9%| | 296/3393 [01:46<24:53,  2.07batch/s, Batch Loss=0.2693, Avg Loss=0.5492, Time Left=25.57 [A
Epoch 1/3 - Training:   9%| | 297/3393 [01:46<24:40,  2.09batch/s, Batch Loss=0.2693, Avg Loss=0.5492, Time Left=25.57 [A
Epoch 1/3 - Training:   9%| | 297/3393 [01:47<24:40,  2.09batch/s, Batch Loss=0.4224, Avg Loss=0.5487, Time Left=25.57 [A
Epoch 1/3 - Training:   9%| | 298/3393 [01:47<25:12,  2.05batch/s, Batch Loss=0.4224, Avg Loss=0.5487, Time Left=25.57 [A
Epoch 1/3 - Training:   9%| | 298/3393 [01:47<25:12,  2.05batch/s, Batch Loss=0.1604, Avg Loss=0.5469, Time Left=25.56 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  10%| | 328/3393 [02:02<27:27,  1.86batch/s, Batch Loss=0.2582, Avg Loss=0.5282, Time Left=25.52 [A
Epoch 1/3 - Training:  10%| | 328/3393 [02:03<27:27,  1.86batch/s, Batch Loss=0.3931, Avg Loss=0.5276, Time Left=25.50 [A
Epoch 1/3 - Training:  10%| | 329/3393 [02:03<26:21,  1.94batch/s, Batch Loss=0.3931, Avg Loss=0.5276, Time Left=25.50 [A
Epoch 1/3 - Training:  10%| | 329/3393 [02:03<26:21,  1.94batch/s, Batch Loss=0.4167, Avg Loss=0.5272, Time Left=25.50 [A
Epoch 1/3 - Training:  10%| | 330/3393 [02:03<26:01,  1.96batch/s, Batch Loss=0.4167, Avg Loss=0.5272, Time Left=25.50 [A
Epoch 1/3 - Training:  10%| | 330/3393 [02:04<26:01,  1.96batch/s, Batch Loss=0.5252, Avg Loss=0.5272, Time Left=25.48 [A
Epoch 1/3 - Training:  10%| | 331/3393 [02:04<25:22,  2.01batch/s, Batch Loss=0.5252, Avg Loss=0.5272, Time Left=25.48 [A
Epoch 1/3 - Training:  10%| | 331/3393 [02:04<25:22,  2.01batch/s, Batch Loss=0.1433, Avg Loss=0.5257, Time Left=25.48 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  11%| | 361/3393 [02:19<25:05,  2.01batch/s, Batch Loss=0.2679, Avg Loss=0.5092, Time Left=25.28 [A
Epoch 1/3 - Training:  11%| | 361/3393 [02:19<25:05,  2.01batch/s, Batch Loss=0.1822, Avg Loss=0.5081, Time Left=25.27 [A
Epoch 1/3 - Training:  11%| | 362/3393 [02:19<24:39,  2.05batch/s, Batch Loss=0.1822, Avg Loss=0.5081, Time Left=25.27 [A
Epoch 1/3 - Training:  11%| | 362/3393 [02:20<24:39,  2.05batch/s, Batch Loss=0.5021, Avg Loss=0.5080, Time Left=25.26 [A
Epoch 1/3 - Training:  11%| | 363/3393 [02:20<24:22,  2.07batch/s, Batch Loss=0.5021, Avg Loss=0.5080, Time Left=25.26 [A
Epoch 1/3 - Training:  11%| | 363/3393 [02:20<24:22,  2.07batch/s, Batch Loss=0.3476, Avg Loss=0.5075, Time Left=25.26 [A
Epoch 1/3 - Training:  11%| | 364/3393 [02:20<24:49,  2.03batch/s, Batch Loss=0.3476, Avg Loss=0.5075, Time Left=25.26 [A
Epoch 1/3 - Training:  11%| | 364/3393 [02:21<24:49,  2.03batch/s, Batch Loss=0.2606, Avg Loss=0.5066, Time Left=25.25 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  12%| | 394/3393 [02:35<24:19,  2.06batch/s, Batch Loss=0.7908, Avg Loss=0.4989, Time Left=25.01 [A
Epoch 1/3 - Training:  12%| | 394/3393 [02:35<24:19,  2.06batch/s, Batch Loss=0.4700, Avg Loss=0.4988, Time Left=24.99 [A
Epoch 1/3 - Training:  12%| | 395/3393 [02:35<24:01,  2.08batch/s, Batch Loss=0.4700, Avg Loss=0.4988, Time Left=24.99 [A
Epoch 1/3 - Training:  12%| | 395/3393 [02:36<24:01,  2.08batch/s, Batch Loss=0.3841, Avg Loss=0.4984, Time Left=24.99 [A
Epoch 1/3 - Training:  12%| | 396/3393 [02:36<24:46,  2.02batch/s, Batch Loss=0.3841, Avg Loss=0.4984, Time Left=24.99 [A
Epoch 1/3 - Training:  12%| | 396/3393 [02:36<24:46,  2.02batch/s, Batch Loss=0.5131, Avg Loss=0.4985, Time Left=24.99 [A
Epoch 1/3 - Training:  12%| | 397/3393 [02:36<25:01,  2.00batch/s, Batch Loss=0.5131, Avg Loss=0.4985, Time Left=24.99 [A
Epoch 1/3 - Training:  12%| | 397/3393 [02:37<25:01,  2.00batch/s, Batch Loss=0.2324, Avg Loss=0.4976, Time Left=24.99 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  13%|▏| 427/3393 [02:51<24:29,  2.02batch/s, Batch Loss=0.5562, Avg Loss=0.4872, Time Left=24.84 [A
Epoch 1/3 - Training:  13%|▏| 427/3393 [02:52<24:29,  2.02batch/s, Batch Loss=0.4205, Avg Loss=0.4870, Time Left=24.85 [A
Epoch 1/3 - Training:  13%|▏| 428/3393 [02:52<25:42,  1.92batch/s, Batch Loss=0.4205, Avg Loss=0.4870, Time Left=24.85 [A
Epoch 1/3 - Training:  13%|▏| 428/3393 [02:53<25:42,  1.92batch/s, Batch Loss=0.3808, Avg Loss=0.4867, Time Left=24.84 [A
Epoch 1/3 - Training:  13%|▏| 429/3393 [02:53<25:35,  1.93batch/s, Batch Loss=0.3808, Avg Loss=0.4867, Time Left=24.84 [A
Epoch 1/3 - Training:  13%|▏| 429/3393 [02:53<25:35,  1.93batch/s, Batch Loss=0.2365, Avg Loss=0.4860, Time Left=24.84 [A
Epoch 1/3 - Training:  13%|▏| 430/3393 [02:53<26:00,  1.90batch/s, Batch Loss=0.2365, Avg Loss=0.4860, Time Left=24.84 [A
Epoch 1/3 - Training:  13%|▏| 430/3393 [02:54<26:00,  1.90batch/s, Batch Loss=0.4237, Avg Loss=0.4858, Time Left=24.84 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  14%|▏| 460/3393 [03:08<25:18,  1.93batch/s, Batch Loss=0.3017, Avg Loss=0.4833, Time Left=24.64 [A
Epoch 1/3 - Training:  14%|▏| 460/3393 [03:09<25:18,  1.93batch/s, Batch Loss=0.3784, Avg Loss=0.4831, Time Left=24.62 [A
Epoch 1/3 - Training:  14%|▏| 461/3393 [03:09<24:32,  1.99batch/s, Batch Loss=0.3784, Avg Loss=0.4831, Time Left=24.62 [A
Epoch 1/3 - Training:  14%|▏| 461/3393 [03:09<24:32,  1.99batch/s, Batch Loss=0.2477, Avg Loss=0.4825, Time Left=24.62 [A
Epoch 1/3 - Training:  14%|▏| 462/3393 [03:09<25:11,  1.94batch/s, Batch Loss=0.2477, Avg Loss=0.4825, Time Left=24.62 [A
Epoch 1/3 - Training:  14%|▏| 462/3393 [03:10<25:11,  1.94batch/s, Batch Loss=0.3143, Avg Loss=0.4820, Time Left=24.62 [A
Epoch 1/3 - Training:  14%|▏| 463/3393 [03:10<25:10,  1.94batch/s, Batch Loss=0.3143, Avg Loss=0.4820, Time Left=24.62 [A
Epoch 1/3 - Training:  14%|▏| 463/3393 [03:10<25:10,  1.94batch/s, Batch Loss=0.3142, Avg Loss=0.4816, Time Left=24.61 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  15%|▏| 493/3393 [03:24<23:21,  2.07batch/s, Batch Loss=0.2841, Avg Loss=0.4689, Time Left=24.38 [A
Epoch 1/3 - Training:  15%|▏| 493/3393 [03:25<23:21,  2.07batch/s, Batch Loss=0.1255, Avg Loss=0.4681, Time Left=24.37 [A
Epoch 1/3 - Training:  15%|▏| 494/3393 [03:25<23:49,  2.03batch/s, Batch Loss=0.1255, Avg Loss=0.4681, Time Left=24.37 [A
Epoch 1/3 - Training:  15%|▏| 494/3393 [03:25<23:49,  2.03batch/s, Batch Loss=0.5250, Avg Loss=0.4682, Time Left=24.36 [A
Epoch 1/3 - Training:  15%|▏| 495/3393 [03:25<23:43,  2.04batch/s, Batch Loss=0.5250, Avg Loss=0.4682, Time Left=24.36 [A
Epoch 1/3 - Training:  15%|▏| 495/3393 [03:26<23:43,  2.04batch/s, Batch Loss=0.1500, Avg Loss=0.4675, Time Left=24.36 [A
Epoch 1/3 - Training:  15%|▏| 496/3393 [03:26<24:34,  1.97batch/s, Batch Loss=0.1500, Avg Loss=0.4675, Time Left=24.36 [A
Epoch 1/3 - Training:  15%|▏| 496/3393 [03:27<24:34,  1.97batch/s, Batch Loss=0.1773, Avg Loss=0.4668, Time Left=24.37 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  16%|▏| 526/3393 [03:42<24:18,  1.97batch/s, Batch Loss=0.1700, Avg Loss=0.4567, Time Left=24.21 [A
Epoch 1/3 - Training:  16%|▏| 526/3393 [03:42<24:18,  1.97batch/s, Batch Loss=0.1671, Avg Loss=0.4561, Time Left=24.20 [A
Epoch 1/3 - Training:  16%|▏| 527/3393 [03:42<23:57,  1.99batch/s, Batch Loss=0.1671, Avg Loss=0.4561, Time Left=24.20 [A
Epoch 1/3 - Training:  16%|▏| 527/3393 [03:43<23:57,  1.99batch/s, Batch Loss=0.2233, Avg Loss=0.4556, Time Left=24.19 [A
Epoch 1/3 - Training:  16%|▏| 528/3393 [03:43<24:07,  1.98batch/s, Batch Loss=0.2233, Avg Loss=0.4556, Time Left=24.19 [A
Epoch 1/3 - Training:  16%|▏| 528/3393 [03:43<24:07,  1.98batch/s, Batch Loss=0.3432, Avg Loss=0.4553, Time Left=24.18 [A
Epoch 1/3 - Training:  16%|▏| 529/3393 [03:43<23:40,  2.02batch/s, Batch Loss=0.3432, Avg Loss=0.4553, Time Left=24.18 [A
Epoch 1/3 - Training:  16%|▏| 529/3393 [03:44<23:40,  2.02batch/s, Batch Loss=0.2980, Avg Loss=0.4550, Time Left=24.17 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  16%|▏| 559/3393 [03:58<23:08,  2.04batch/s, Batch Loss=0.3562, Avg Loss=0.4444, Time Left=23.94 [A
Epoch 1/3 - Training:  16%|▏| 559/3393 [03:58<23:08,  2.04batch/s, Batch Loss=0.4437, Avg Loss=0.4444, Time Left=23.94 [A
Epoch 1/3 - Training:  17%|▏| 560/3393 [03:58<23:04,  2.05batch/s, Batch Loss=0.4437, Avg Loss=0.4444, Time Left=23.94 [A
Epoch 1/3 - Training:  17%|▏| 560/3393 [03:59<23:04,  2.05batch/s, Batch Loss=0.4488, Avg Loss=0.4444, Time Left=23.93 [A
Epoch 1/3 - Training:  17%|▏| 561/3393 [03:59<23:27,  2.01batch/s, Batch Loss=0.4488, Avg Loss=0.4444, Time Left=23.93 [A
Epoch 1/3 - Training:  17%|▏| 561/3393 [03:59<23:27,  2.01batch/s, Batch Loss=0.2921, Avg Loss=0.4441, Time Left=23.92 [A
Epoch 1/3 - Training:  17%|▏| 562/3393 [03:59<23:18,  2.02batch/s, Batch Loss=0.2921, Avg Loss=0.4441, Time Left=23.92 [A
Epoch 1/3 - Training:  17%|▏| 562/3393 [04:00<23:18,  2.02batch/s, Batch Loss=0.0808, Avg Loss=0.4434, Time Left=23.92 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  17%|▏| 592/3393 [04:14<22:51,  2.04batch/s, Batch Loss=0.2340, Avg Loss=0.4451, Time Left=23.67 [A
Epoch 1/3 - Training:  17%|▏| 592/3393 [04:15<22:51,  2.04batch/s, Batch Loss=0.5674, Avg Loss=0.4454, Time Left=23.66 [A
Epoch 1/3 - Training:  17%|▏| 593/3393 [04:15<23:01,  2.03batch/s, Batch Loss=0.5674, Avg Loss=0.4454, Time Left=23.66 [A
Epoch 1/3 - Training:  17%|▏| 593/3393 [04:15<23:01,  2.03batch/s, Batch Loss=0.3822, Avg Loss=0.4453, Time Left=23.66 [A
Epoch 1/3 - Training:  18%|▏| 594/3393 [04:15<22:55,  2.03batch/s, Batch Loss=0.3822, Avg Loss=0.4453, Time Left=23.66 [A
Epoch 1/3 - Training:  18%|▏| 594/3393 [04:16<22:55,  2.03batch/s, Batch Loss=0.2974, Avg Loss=0.4450, Time Left=23.65 [A
Epoch 1/3 - Training:  18%|▏| 595/3393 [04:16<23:05,  2.02batch/s, Batch Loss=0.2974, Avg Loss=0.4450, Time Left=23.65 [A
Epoch 1/3 - Training:  18%|▏| 595/3393 [04:16<23:05,  2.02batch/s, Batch Loss=0.1401, Avg Loss=0.4444, Time Left=23.64 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  18%|▏| 625/3393 [04:31<23:05,  2.00batch/s, Batch Loss=0.2889, Avg Loss=0.4374, Time Left=23.40 [A
Epoch 1/3 - Training:  18%|▏| 625/3393 [04:31<23:05,  2.00batch/s, Batch Loss=0.2940, Avg Loss=0.4371, Time Left=23.40 [A
Epoch 1/3 - Training:  18%|▏| 626/3393 [04:31<23:17,  1.98batch/s, Batch Loss=0.2940, Avg Loss=0.4371, Time Left=23.40 [A
Epoch 1/3 - Training:  18%|▏| 626/3393 [04:32<23:17,  1.98batch/s, Batch Loss=0.3532, Avg Loss=0.4370, Time Left=23.39 [A
Epoch 1/3 - Training:  18%|▏| 627/3393 [04:32<22:48,  2.02batch/s, Batch Loss=0.3532, Avg Loss=0.4370, Time Left=23.39 [A
Epoch 1/3 - Training:  18%|▏| 627/3393 [04:32<22:48,  2.02batch/s, Batch Loss=0.2113, Avg Loss=0.4366, Time Left=23.38 [A
Epoch 1/3 - Training:  19%|▏| 628/3393 [04:32<22:30,  2.05batch/s, Batch Loss=0.2113, Avg Loss=0.4366, Time Left=23.38 [A
Epoch 1/3 - Training:  19%|▏| 628/3393 [04:33<22:30,  2.05batch/s, Batch Loss=0.7097, Avg Loss=0.4371, Time Left=23.37 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  19%|▏| 658/3393 [04:47<22:28,  2.03batch/s, Batch Loss=0.2868, Avg Loss=0.4283, Time Left=23.12 [A
Epoch 1/3 - Training:  19%|▏| 658/3393 [04:47<22:28,  2.03batch/s, Batch Loss=0.2713, Avg Loss=0.4280, Time Left=23.11 [A
Epoch 1/3 - Training:  19%|▏| 659/3393 [04:47<22:10,  2.06batch/s, Batch Loss=0.2713, Avg Loss=0.4280, Time Left=23.11 [A
Epoch 1/3 - Training:  19%|▏| 659/3393 [04:48<22:10,  2.06batch/s, Batch Loss=0.2796, Avg Loss=0.4277, Time Left=23.11 [A
Epoch 1/3 - Training:  19%|▏| 660/3393 [04:48<22:25,  2.03batch/s, Batch Loss=0.2796, Avg Loss=0.4277, Time Left=23.11 [A
Epoch 1/3 - Training:  19%|▏| 660/3393 [04:48<22:25,  2.03batch/s, Batch Loss=0.1558, Avg Loss=0.4273, Time Left=23.10 [A
Epoch 1/3 - Training:  19%|▏| 661/3393 [04:48<22:16,  2.04batch/s, Batch Loss=0.1558, Avg Loss=0.4273, Time Left=23.10 [A
Epoch 1/3 - Training:  19%|▏| 661/3393 [04:49<22:16,  2.04batch/s, Batch Loss=0.3068, Avg Loss=0.4271, Time Left=23.09 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  20%|▏| 691/3393 [05:03<22:00,  2.05batch/s, Batch Loss=0.4556, Avg Loss=0.4236, Time Left=22.87 [A
Epoch 1/3 - Training:  20%|▏| 691/3393 [05:04<22:00,  2.05batch/s, Batch Loss=0.1698, Avg Loss=0.4232, Time Left=22.86 [A
Epoch 1/3 - Training:  20%|▏| 692/3393 [05:04<21:48,  2.06batch/s, Batch Loss=0.1698, Avg Loss=0.4232, Time Left=22.86 [A
Epoch 1/3 - Training:  20%|▏| 692/3393 [05:04<21:48,  2.06batch/s, Batch Loss=0.2183, Avg Loss=0.4229, Time Left=22.86 [A
Epoch 1/3 - Training:  20%|▏| 693/3393 [05:04<22:13,  2.02batch/s, Batch Loss=0.2183, Avg Loss=0.4229, Time Left=22.86 [A
Epoch 1/3 - Training:  20%|▏| 693/3393 [05:05<22:13,  2.02batch/s, Batch Loss=0.2592, Avg Loss=0.4226, Time Left=22.85 [A
Epoch 1/3 - Training:  20%|▏| 694/3393 [05:05<21:51,  2.06batch/s, Batch Loss=0.2592, Avg Loss=0.4226, Time Left=22.85 [A
Epoch 1/3 - Training:  20%|▏| 694/3393 [05:05<21:51,  2.06batch/s, Batch Loss=0.2463, Avg Loss=0.4223, Time Left=22.84 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  21%|▏| 724/3393 [05:20<24:48,  1.79batch/s, Batch Loss=0.3994, Avg Loss=0.4158, Time Left=22.62 [A
Epoch 1/3 - Training:  21%|▏| 724/3393 [05:20<24:48,  1.79batch/s, Batch Loss=0.1488, Avg Loss=0.4154, Time Left=22.61 [A
Epoch 1/3 - Training:  21%|▏| 725/3393 [05:20<23:39,  1.88batch/s, Batch Loss=0.1488, Avg Loss=0.4154, Time Left=22.61 [A
Epoch 1/3 - Training:  21%|▏| 725/3393 [05:21<23:39,  1.88batch/s, Batch Loss=0.2272, Avg Loss=0.4151, Time Left=22.61 [A
Epoch 1/3 - Training:  21%|▏| 726/3393 [05:21<23:57,  1.86batch/s, Batch Loss=0.2272, Avg Loss=0.4151, Time Left=22.61 [A
Epoch 1/3 - Training:  21%|▏| 726/3393 [05:21<23:57,  1.86batch/s, Batch Loss=0.4194, Avg Loss=0.4151, Time Left=22.60 [A
Epoch 1/3 - Training:  21%|▏| 727/3393 [05:21<23:44,  1.87batch/s, Batch Loss=0.4194, Avg Loss=0.4151, Time Left=22.60 [A
Epoch 1/3 - Training:  21%|▏| 727/3393 [05:22<23:44,  1.87batch/s, Batch Loss=0.3660, Avg Loss=0.4151, Time Left=22.60 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  22%|▏| 757/3393 [05:37<21:26,  2.05batch/s, Batch Loss=0.1187, Avg Loss=0.4092, Time Left=22.37 [A
Epoch 1/3 - Training:  22%|▏| 757/3393 [05:37<21:26,  2.05batch/s, Batch Loss=0.1878, Avg Loss=0.4089, Time Left=22.37 [A
Epoch 1/3 - Training:  22%|▏| 758/3393 [05:37<21:57,  2.00batch/s, Batch Loss=0.1878, Avg Loss=0.4089, Time Left=22.37 [A
Epoch 1/3 - Training:  22%|▏| 758/3393 [05:38<21:57,  2.00batch/s, Batch Loss=0.3112, Avg Loss=0.4087, Time Left=22.36 [A
Epoch 1/3 - Training:  22%|▏| 759/3393 [05:38<21:43,  2.02batch/s, Batch Loss=0.3112, Avg Loss=0.4087, Time Left=22.36 [A
Epoch 1/3 - Training:  22%|▏| 759/3393 [05:38<21:43,  2.02batch/s, Batch Loss=0.0699, Avg Loss=0.4083, Time Left=22.35 [A
Epoch 1/3 - Training:  22%|▏| 760/3393 [05:38<22:01,  1.99batch/s, Batch Loss=0.0699, Avg Loss=0.4083, Time Left=22.35 [A
Epoch 1/3 - Training:  22%|▏| 760/3393 [05:39<22:01,  1.99batch/s, Batch Loss=0.3724, Avg Loss=0.4082, Time Left=22.34 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  23%|▏| 790/3393 [05:53<21:41,  2.00batch/s, Batch Loss=0.3659, Avg Loss=0.4037, Time Left=22.12 [A
Epoch 1/3 - Training:  23%|▏| 790/3393 [05:54<21:41,  2.00batch/s, Batch Loss=0.1521, Avg Loss=0.4034, Time Left=22.11 [A
Epoch 1/3 - Training:  23%|▏| 791/3393 [05:54<21:28,  2.02batch/s, Batch Loss=0.1521, Avg Loss=0.4034, Time Left=22.11 [A
Epoch 1/3 - Training:  23%|▏| 791/3393 [05:54<21:28,  2.02batch/s, Batch Loss=0.2772, Avg Loss=0.4032, Time Left=22.10 [A
Epoch 1/3 - Training:  23%|▏| 792/3393 [05:54<21:20,  2.03batch/s, Batch Loss=0.2772, Avg Loss=0.4032, Time Left=22.10 [A
Epoch 1/3 - Training:  23%|▏| 792/3393 [05:55<21:20,  2.03batch/s, Batch Loss=0.3457, Avg Loss=0.4031, Time Left=22.10 [A
Epoch 1/3 - Training:  23%|▏| 793/3393 [05:55<21:27,  2.02batch/s, Batch Loss=0.3457, Avg Loss=0.4031, Time Left=22.10 [A
Epoch 1/3 - Training:  23%|▏| 793/3393 [05:55<21:27,  2.02batch/s, Batch Loss=0.2316, Avg Loss=0.4029, Time Left=22.09 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  24%|▏| 823/3393 [06:10<20:53,  2.05batch/s, Batch Loss=0.3542, Avg Loss=0.3964, Time Left=21.85 [A
Epoch 1/3 - Training:  24%|▏| 823/3393 [06:10<20:53,  2.05batch/s, Batch Loss=0.0941, Avg Loss=0.3960, Time Left=21.84 [A
Epoch 1/3 - Training:  24%|▏| 824/3393 [06:10<21:05,  2.03batch/s, Batch Loss=0.0941, Avg Loss=0.3960, Time Left=21.84 [A
Epoch 1/3 - Training:  24%|▏| 824/3393 [06:11<21:05,  2.03batch/s, Batch Loss=0.0869, Avg Loss=0.3956, Time Left=21.83 [A
Epoch 1/3 - Training:  24%|▏| 825/3393 [06:11<20:48,  2.06batch/s, Batch Loss=0.0869, Avg Loss=0.3956, Time Left=21.83 [A
Epoch 1/3 - Training:  24%|▏| 825/3393 [06:11<20:48,  2.06batch/s, Batch Loss=0.3029, Avg Loss=0.3954, Time Left=21.82 [A
Epoch 1/3 - Training:  24%|▏| 826/3393 [06:11<21:13,  2.02batch/s, Batch Loss=0.3029, Avg Loss=0.3954, Time Left=21.82 [A
Epoch 1/3 - Training:  24%|▏| 826/3393 [06:12<21:13,  2.02batch/s, Batch Loss=0.2694, Avg Loss=0.3953, Time Left=21.81 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  25%|▎| 856/3393 [06:26<20:28,  2.07batch/s, Batch Loss=0.7838, Avg Loss=0.3891, Time Left=21.56 [A
Epoch 1/3 - Training:  25%|▎| 856/3393 [06:26<20:28,  2.07batch/s, Batch Loss=0.4209, Avg Loss=0.3891, Time Left=21.56 [A
Epoch 1/3 - Training:  25%|▎| 857/3393 [06:26<20:53,  2.02batch/s, Batch Loss=0.4209, Avg Loss=0.3891, Time Left=21.56 [A
Epoch 1/3 - Training:  25%|▎| 857/3393 [06:27<20:53,  2.02batch/s, Batch Loss=0.1628, Avg Loss=0.3888, Time Left=21.55 [A
Epoch 1/3 - Training:  25%|▎| 858/3393 [06:27<20:43,  2.04batch/s, Batch Loss=0.1628, Avg Loss=0.3888, Time Left=21.55 [A
Epoch 1/3 - Training:  25%|▎| 858/3393 [06:27<20:43,  2.04batch/s, Batch Loss=0.1352, Avg Loss=0.3885, Time Left=21.54 [A
Epoch 1/3 - Training:  25%|▎| 859/3393 [06:27<20:45,  2.03batch/s, Batch Loss=0.1352, Avg Loss=0.3885, Time Left=21.54 [A
Epoch 1/3 - Training:  25%|▎| 859/3393 [06:28<20:45,  2.03batch/s, Batch Loss=0.1676, Avg Loss=0.3882, Time Left=21.53 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  26%|▎| 889/3393 [06:42<20:24,  2.04batch/s, Batch Loss=0.2715, Avg Loss=0.3826, Time Left=21.28 [A
Epoch 1/3 - Training:  26%|▎| 889/3393 [06:42<20:24,  2.04batch/s, Batch Loss=0.0707, Avg Loss=0.3822, Time Left=21.27 [A
Epoch 1/3 - Training:  26%|▎| 890/3393 [06:42<20:45,  2.01batch/s, Batch Loss=0.0707, Avg Loss=0.3822, Time Left=21.27 [A
Epoch 1/3 - Training:  26%|▎| 890/3393 [06:43<20:45,  2.01batch/s, Batch Loss=0.1148, Avg Loss=0.3819, Time Left=21.27 [A
Epoch 1/3 - Training:  26%|▎| 891/3393 [06:43<20:25,  2.04batch/s, Batch Loss=0.1148, Avg Loss=0.3819, Time Left=21.27 [A
Epoch 1/3 - Training:  26%|▎| 891/3393 [06:43<20:25,  2.04batch/s, Batch Loss=0.1540, Avg Loss=0.3816, Time Left=21.26 [A
Epoch 1/3 - Training:  26%|▎| 892/3393 [06:43<20:21,  2.05batch/s, Batch Loss=0.1540, Avg Loss=0.3816, Time Left=21.26 [A
Epoch 1/3 - Training:  26%|▎| 892/3393 [06:44<20:21,  2.05batch/s, Batch Loss=0.7289, Avg Loss=0.3820, Time Left=21.25 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  27%|▎| 922/3393 [06:58<21:17,  1.93batch/s, Batch Loss=0.2903, Avg Loss=0.3779, Time Left=21.02 [A
Epoch 1/3 - Training:  27%|▎| 922/3393 [06:59<21:17,  1.93batch/s, Batch Loss=0.4753, Avg Loss=0.3780, Time Left=21.02 [A
Epoch 1/3 - Training:  27%|▎| 923/3393 [06:59<21:47,  1.89batch/s, Batch Loss=0.4753, Avg Loss=0.3780, Time Left=21.02 [A
Epoch 1/3 - Training:  27%|▎| 923/3393 [07:00<21:47,  1.89batch/s, Batch Loss=0.3972, Avg Loss=0.3780, Time Left=21.01 [A
Epoch 1/3 - Training:  27%|▎| 924/3393 [07:00<22:47,  1.81batch/s, Batch Loss=0.3972, Avg Loss=0.3780, Time Left=21.01 [A
Epoch 1/3 - Training:  27%|▎| 924/3393 [07:00<22:47,  1.81batch/s, Batch Loss=0.3403, Avg Loss=0.3780, Time Left=21.01 [A
Epoch 1/3 - Training:  27%|▎| 925/3393 [07:00<22:46,  1.81batch/s, Batch Loss=0.3403, Avg Loss=0.3780, Time Left=21.01 [A
Epoch 1/3 - Training:  27%|▎| 925/3393 [07:01<22:46,  1.81batch/s, Batch Loss=0.2909, Avg Loss=0.3779, Time Left=21.00 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  28%|▎| 955/3393 [07:16<20:31,  1.98batch/s, Batch Loss=0.2256, Avg Loss=0.3738, Time Left=20.80 [A
Epoch 1/3 - Training:  28%|▎| 955/3393 [07:16<20:31,  1.98batch/s, Batch Loss=0.1562, Avg Loss=0.3736, Time Left=20.79 [A
Epoch 1/3 - Training:  28%|▎| 956/3393 [07:16<20:20,  2.00batch/s, Batch Loss=0.1562, Avg Loss=0.3736, Time Left=20.79 [A
Epoch 1/3 - Training:  28%|▎| 956/3393 [07:17<20:20,  2.00batch/s, Batch Loss=0.3044, Avg Loss=0.3735, Time Left=20.78 [A
Epoch 1/3 - Training:  28%|▎| 957/3393 [07:17<20:21,  1.99batch/s, Batch Loss=0.3044, Avg Loss=0.3735, Time Left=20.78 [A
Epoch 1/3 - Training:  28%|▎| 957/3393 [07:17<20:21,  1.99batch/s, Batch Loss=0.2609, Avg Loss=0.3734, Time Left=20.77 [A
Epoch 1/3 - Training:  28%|▎| 958/3393 [07:17<19:59,  2.03batch/s, Batch Loss=0.2609, Avg Loss=0.3734, Time Left=20.77 [A
Epoch 1/3 - Training:  28%|▎| 958/3393 [07:18<19:59,  2.03batch/s, Batch Loss=0.1542, Avg Loss=0.3731, Time Left=20.77 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  29%|▎| 988/3393 [07:32<19:50,  2.02batch/s, Batch Loss=0.7142, Avg Loss=0.3696, Time Left=20.51 [A
Epoch 1/3 - Training:  29%|▎| 988/3393 [07:32<19:50,  2.02batch/s, Batch Loss=0.2512, Avg Loss=0.3694, Time Left=20.50 [A
Epoch 1/3 - Training:  29%|▎| 989/3393 [07:32<19:43,  2.03batch/s, Batch Loss=0.2512, Avg Loss=0.3694, Time Left=20.50 [A
Epoch 1/3 - Training:  29%|▎| 989/3393 [07:33<19:43,  2.03batch/s, Batch Loss=0.2756, Avg Loss=0.3693, Time Left=20.50 [A
Epoch 1/3 - Training:  29%|▎| 990/3393 [07:33<19:38,  2.04batch/s, Batch Loss=0.2756, Avg Loss=0.3693, Time Left=20.50 [A
Epoch 1/3 - Training:  29%|▎| 990/3393 [07:33<19:38,  2.04batch/s, Batch Loss=0.1749, Avg Loss=0.3691, Time Left=20.49 [A
Epoch 1/3 - Training:  29%|▎| 991/3393 [07:33<19:34,  2.04batch/s, Batch Loss=0.1749, Avg Loss=0.3691, Time Left=20.49 [A
Epoch 1/3 - Training:  29%|▎| 991/3393 [07:34<19:34,  2.04batch/s, Batch Loss=0.2411, Avg Loss=0.3690, Time Left=20.48 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  30%|▎| 1021/3393 [07:48<19:48,  2.00batch/s, Batch Loss=0.1031, Avg Loss=0.3652, Time Left=20.24[A
Epoch 1/3 - Training:  30%|▎| 1021/3393 [07:49<19:48,  2.00batch/s, Batch Loss=0.2453, Avg Loss=0.3651, Time Left=20.23[A
Epoch 1/3 - Training:  30%|▎| 1022/3393 [07:49<20:00,  1.98batch/s, Batch Loss=0.2453, Avg Loss=0.3651, Time Left=20.23[A
Epoch 1/3 - Training:  30%|▎| 1022/3393 [07:49<20:00,  1.98batch/s, Batch Loss=0.0945, Avg Loss=0.3648, Time Left=20.22[A
Epoch 1/3 - Training:  30%|▎| 1023/3393 [07:49<19:57,  1.98batch/s, Batch Loss=0.0945, Avg Loss=0.3648, Time Left=20.22[A
Epoch 1/3 - Training:  30%|▎| 1023/3393 [07:50<19:57,  1.98batch/s, Batch Loss=0.2206, Avg Loss=0.3646, Time Left=20.21[A
Epoch 1/3 - Training:  30%|▎| 1024/3393 [07:50<19:32,  2.02batch/s, Batch Loss=0.2206, Avg Loss=0.3646, Time Left=20.21[A
Epoch 1/3 - Training:  30%|▎| 1024/3393 [07:50<19:32,  2.02batch/s, Batch Loss=0.3663, Avg Loss=0.3646, Time Left=20.20[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  31%|▎| 1054/3393 [08:05<19:16,  2.02batch/s, Batch Loss=0.2091, Avg Loss=0.3609, Time Left=19.96[A
Epoch 1/3 - Training:  31%|▎| 1054/3393 [08:05<19:16,  2.02batch/s, Batch Loss=0.1274, Avg Loss=0.3607, Time Left=19.95[A
Epoch 1/3 - Training:  31%|▎| 1055/3393 [08:05<18:58,  2.05batch/s, Batch Loss=0.1274, Avg Loss=0.3607, Time Left=19.95[A
Epoch 1/3 - Training:  31%|▎| 1055/3393 [08:06<18:58,  2.05batch/s, Batch Loss=0.0621, Avg Loss=0.3604, Time Left=19.95[A
Epoch 1/3 - Training:  31%|▎| 1056/3393 [08:06<19:09,  2.03batch/s, Batch Loss=0.0621, Avg Loss=0.3604, Time Left=19.95[A
Epoch 1/3 - Training:  31%|▎| 1056/3393 [08:06<19:09,  2.03batch/s, Batch Loss=0.2647, Avg Loss=0.3603, Time Left=19.94[A
Epoch 1/3 - Training:  31%|▎| 1057/3393 [08:06<18:54,  2.06batch/s, Batch Loss=0.2647, Avg Loss=0.3603, Time Left=19.94[A
Epoch 1/3 - Training:  31%|▎| 1057/3393 [08:06<18:54,  2.06batch/s, Batch Loss=0.3620, Avg Loss=0.3603, Time Left=19.93[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  32%|▎| 1087/3393 [08:21<19:10,  2.00batch/s, Batch Loss=0.1798, Avg Loss=0.3577, Time Left=19.68[A
Epoch 1/3 - Training:  32%|▎| 1087/3393 [08:21<19:10,  2.00batch/s, Batch Loss=0.1057, Avg Loss=0.3575, Time Left=19.67[A
Epoch 1/3 - Training:  32%|▎| 1088/3393 [08:21<18:51,  2.04batch/s, Batch Loss=0.1057, Avg Loss=0.3575, Time Left=19.67[A
Epoch 1/3 - Training:  32%|▎| 1088/3393 [08:22<18:51,  2.04batch/s, Batch Loss=0.1582, Avg Loss=0.3573, Time Left=19.66[A
Epoch 1/3 - Training:  32%|▎| 1089/3393 [08:22<18:48,  2.04batch/s, Batch Loss=0.1582, Avg Loss=0.3573, Time Left=19.66[A
Epoch 1/3 - Training:  32%|▎| 1089/3393 [08:22<18:48,  2.04batch/s, Batch Loss=0.3487, Avg Loss=0.3573, Time Left=19.65[A
Epoch 1/3 - Training:  32%|▎| 1090/3393 [08:22<18:56,  2.03batch/s, Batch Loss=0.3487, Avg Loss=0.3573, Time Left=19.65[A
Epoch 1/3 - Training:  32%|▎| 1090/3393 [08:23<18:56,  2.03batch/s, Batch Loss=0.5097, Avg Loss=0.3574, Time Left=19.64[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  33%|▎| 1120/3393 [08:37<18:48,  2.01batch/s, Batch Loss=0.2666, Avg Loss=0.3538, Time Left=19.40[A
Epoch 1/3 - Training:  33%|▎| 1120/3393 [08:37<18:48,  2.01batch/s, Batch Loss=0.2525, Avg Loss=0.3537, Time Left=19.39[A
Epoch 1/3 - Training:  33%|▎| 1121/3393 [08:37<18:31,  2.04batch/s, Batch Loss=0.2525, Avg Loss=0.3537, Time Left=19.39[A
Epoch 1/3 - Training:  33%|▎| 1121/3393 [08:38<18:31,  2.04batch/s, Batch Loss=0.1142, Avg Loss=0.3535, Time Left=19.38[A
Epoch 1/3 - Training:  33%|▎| 1122/3393 [08:38<18:39,  2.03batch/s, Batch Loss=0.1142, Avg Loss=0.3535, Time Left=19.38[A
Epoch 1/3 - Training:  33%|▎| 1122/3393 [08:38<18:39,  2.03batch/s, Batch Loss=0.2188, Avg Loss=0.3534, Time Left=19.38[A
Epoch 1/3 - Training:  33%|▎| 1123/3393 [08:38<19:05,  1.98batch/s, Batch Loss=0.2188, Avg Loss=0.3534, Time Left=19.38[A
Epoch 1/3 - Training:  33%|▎| 1123/3393 [08:39<19:05,  1.98batch/s, Batch Loss=0.1870, Avg Loss=0.3532, Time Left=19.37[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  34%|▎| 1153/3393 [08:53<18:48,  1.99batch/s, Batch Loss=0.0399, Avg Loss=0.3485, Time Left=19.13[A
Epoch 1/3 - Training:  34%|▎| 1153/3393 [08:54<18:48,  1.99batch/s, Batch Loss=0.4765, Avg Loss=0.3486, Time Left=19.13[A
Epoch 1/3 - Training:  34%|▎| 1154/3393 [08:54<18:40,  2.00batch/s, Batch Loss=0.4765, Avg Loss=0.3486, Time Left=19.13[A
Epoch 1/3 - Training:  34%|▎| 1154/3393 [08:54<18:40,  2.00batch/s, Batch Loss=0.1738, Avg Loss=0.3485, Time Left=19.12[A
Epoch 1/3 - Training:  34%|▎| 1155/3393 [08:54<18:31,  2.01batch/s, Batch Loss=0.1738, Avg Loss=0.3485, Time Left=19.12[A
Epoch 1/3 - Training:  34%|▎| 1155/3393 [08:55<18:31,  2.01batch/s, Batch Loss=0.1129, Avg Loss=0.3483, Time Left=19.11[A
Epoch 1/3 - Training:  34%|▎| 1156/3393 [08:55<18:55,  1.97batch/s, Batch Loss=0.1129, Avg Loss=0.3483, Time Left=19.11[A
Epoch 1/3 - Training:  34%|▎| 1156/3393 [08:55<18:55,  1.97batch/s, Batch Loss=0.1300, Avg Loss=0.3481, Time Left=19.10[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  35%|▎| 1186/3393 [09:10<17:55,  2.05batch/s, Batch Loss=0.1735, Avg Loss=0.3449, Time Left=18.86[A
Epoch 1/3 - Training:  35%|▎| 1186/3393 [09:10<17:55,  2.05batch/s, Batch Loss=0.3725, Avg Loss=0.3450, Time Left=18.85[A
Epoch 1/3 - Training:  35%|▎| 1187/3393 [09:10<17:55,  2.05batch/s, Batch Loss=0.3725, Avg Loss=0.3450, Time Left=18.85[A
Epoch 1/3 - Training:  35%|▎| 1187/3393 [09:11<17:55,  2.05batch/s, Batch Loss=0.4922, Avg Loss=0.3451, Time Left=18.85[A
Epoch 1/3 - Training:  35%|▎| 1188/3393 [09:11<17:34,  2.09batch/s, Batch Loss=0.4922, Avg Loss=0.3451, Time Left=18.85[A
Epoch 1/3 - Training:  35%|▎| 1188/3393 [09:11<17:34,  2.09batch/s, Batch Loss=0.2582, Avg Loss=0.3450, Time Left=18.84[A
Epoch 1/3 - Training:  35%|▎| 1189/3393 [09:11<17:44,  2.07batch/s, Batch Loss=0.2582, Avg Loss=0.3450, Time Left=18.84[A
Epoch 1/3 - Training:  35%|▎| 1189/3393 [09:12<17:44,  2.07batch/s, Batch Loss=0.1677, Avg Loss=0.3449, Time Left=18.83[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  36%|▎| 1219/3393 [09:26<17:42,  2.05batch/s, Batch Loss=0.1962, Avg Loss=0.3411, Time Left=18.59[A
Epoch 1/3 - Training:  36%|▎| 1219/3393 [09:27<17:42,  2.05batch/s, Batch Loss=0.2074, Avg Loss=0.3410, Time Left=18.58[A
Epoch 1/3 - Training:  36%|▎| 1220/3393 [09:27<17:51,  2.03batch/s, Batch Loss=0.2074, Avg Loss=0.3410, Time Left=18.58[A
Epoch 1/3 - Training:  36%|▎| 1220/3393 [09:27<17:51,  2.03batch/s, Batch Loss=0.2271, Avg Loss=0.3409, Time Left=18.57[A
Epoch 1/3 - Training:  36%|▎| 1221/3393 [09:27<17:56,  2.02batch/s, Batch Loss=0.2271, Avg Loss=0.3409, Time Left=18.57[A
Epoch 1/3 - Training:  36%|▎| 1221/3393 [09:28<17:56,  2.02batch/s, Batch Loss=0.1592, Avg Loss=0.3407, Time Left=18.56[A
Epoch 1/3 - Training:  36%|▎| 1222/3393 [09:28<17:56,  2.02batch/s, Batch Loss=0.1592, Avg Loss=0.3407, Time Left=18.56[A
Epoch 1/3 - Training:  36%|▎| 1222/3393 [09:28<17:56,  2.02batch/s, Batch Loss=0.4469, Avg Loss=0.3408, Time Left=18.55[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  37%|▎| 1252/3393 [09:42<17:28,  2.04batch/s, Batch Loss=0.1365, Avg Loss=0.3381, Time Left=18.31[A
Epoch 1/3 - Training:  37%|▎| 1252/3393 [09:43<17:28,  2.04batch/s, Batch Loss=0.3557, Avg Loss=0.3381, Time Left=18.30[A
Epoch 1/3 - Training:  37%|▎| 1253/3393 [09:43<17:22,  2.05batch/s, Batch Loss=0.3557, Avg Loss=0.3381, Time Left=18.30[A
Epoch 1/3 - Training:  37%|▎| 1253/3393 [09:43<17:22,  2.05batch/s, Batch Loss=0.2742, Avg Loss=0.3381, Time Left=18.29[A
Epoch 1/3 - Training:  37%|▎| 1254/3393 [09:43<17:40,  2.02batch/s, Batch Loss=0.2742, Avg Loss=0.3381, Time Left=18.29[A
Epoch 1/3 - Training:  37%|▎| 1254/3393 [09:44<17:40,  2.02batch/s, Batch Loss=0.1689, Avg Loss=0.3379, Time Left=18.28[A
Epoch 1/3 - Training:  37%|▎| 1255/3393 [09:44<17:46,  2.01batch/s, Batch Loss=0.1689, Avg Loss=0.3379, Time Left=18.28[A
Epoch 1/3 - Training:  37%|▎| 1255/3393 [09:44<17:46,  2.01batch/s, Batch Loss=0.3624, Avg Loss=0.3379, Time Left=18.28[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  38%|▍| 1285/3393 [09:59<17:23,  2.02batch/s, Batch Loss=0.2999, Avg Loss=0.3355, Time Left=18.04[A
Epoch 1/3 - Training:  38%|▍| 1285/3393 [09:59<17:23,  2.02batch/s, Batch Loss=0.0534, Avg Loss=0.3352, Time Left=18.03[A
Epoch 1/3 - Training:  38%|▍| 1286/3393 [09:59<17:22,  2.02batch/s, Batch Loss=0.0534, Avg Loss=0.3352, Time Left=18.03[A
Epoch 1/3 - Training:  38%|▍| 1286/3393 [10:00<17:22,  2.02batch/s, Batch Loss=0.2254, Avg Loss=0.3351, Time Left=18.02[A
Epoch 1/3 - Training:  38%|▍| 1287/3393 [10:00<17:08,  2.05batch/s, Batch Loss=0.2254, Avg Loss=0.3351, Time Left=18.02[A
Epoch 1/3 - Training:  38%|▍| 1287/3393 [10:00<17:08,  2.05batch/s, Batch Loss=0.0849, Avg Loss=0.3349, Time Left=18.01[A
Epoch 1/3 - Training:  38%|▍| 1288/3393 [10:00<17:29,  2.01batch/s, Batch Loss=0.0849, Avg Loss=0.3349, Time Left=18.01[A
Epoch 1/3 - Training:  38%|▍| 1288/3393 [10:01<17:29,  2.01batch/s, Batch Loss=0.1361, Avg Loss=0.3348, Time Left=18.00[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  39%|▍| 1318/3393 [10:15<16:34,  2.09batch/s, Batch Loss=0.2969, Avg Loss=0.3325, Time Left=17.76[A
Epoch 1/3 - Training:  39%|▍| 1318/3393 [10:15<16:34,  2.09batch/s, Batch Loss=0.1715, Avg Loss=0.3323, Time Left=17.75[A
Epoch 1/3 - Training:  39%|▍| 1319/3393 [10:15<16:57,  2.04batch/s, Batch Loss=0.1715, Avg Loss=0.3323, Time Left=17.75[A
Epoch 1/3 - Training:  39%|▍| 1319/3393 [10:16<16:57,  2.04batch/s, Batch Loss=0.2609, Avg Loss=0.3323, Time Left=17.74[A
Epoch 1/3 - Training:  39%|▍| 1320/3393 [10:16<16:54,  2.04batch/s, Batch Loss=0.2609, Avg Loss=0.3323, Time Left=17.74[A
Epoch 1/3 - Training:  39%|▍| 1320/3393 [10:16<16:54,  2.04batch/s, Batch Loss=0.0984, Avg Loss=0.3321, Time Left=17.73[A
Epoch 1/3 - Training:  39%|▍| 1321/3393 [10:16<17:11,  2.01batch/s, Batch Loss=0.0984, Avg Loss=0.3321, Time Left=17.73[A
Epoch 1/3 - Training:  39%|▍| 1321/3393 [10:17<17:11,  2.01batch/s, Batch Loss=0.0926, Avg Loss=0.3319, Time Left=17.72[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  40%|▍| 1351/3393 [10:31<17:01,  2.00batch/s, Batch Loss=0.1741, Avg Loss=0.3287, Time Left=17.48[A
Epoch 1/3 - Training:  40%|▍| 1351/3393 [10:31<17:01,  2.00batch/s, Batch Loss=0.2742, Avg Loss=0.3287, Time Left=17.47[A
Epoch 1/3 - Training:  40%|▍| 1352/3393 [10:31<17:11,  1.98batch/s, Batch Loss=0.2742, Avg Loss=0.3287, Time Left=17.47[A
Epoch 1/3 - Training:  40%|▍| 1352/3393 [10:32<17:11,  1.98batch/s, Batch Loss=0.1605, Avg Loss=0.3286, Time Left=17.46[A
Epoch 1/3 - Training:  40%|▍| 1353/3393 [10:32<16:50,  2.02batch/s, Batch Loss=0.1605, Avg Loss=0.3286, Time Left=17.46[A
Epoch 1/3 - Training:  40%|▍| 1353/3393 [10:32<16:50,  2.02batch/s, Batch Loss=0.0580, Avg Loss=0.3283, Time Left=17.46[A
Epoch 1/3 - Training:  40%|▍| 1354/3393 [10:32<17:03,  1.99batch/s, Batch Loss=0.0580, Avg Loss=0.3283, Time Left=17.46[A
Epoch 1/3 - Training:  40%|▍| 1354/3393 [10:33<17:03,  1.99batch/s, Batch Loss=0.1935, Avg Loss=0.3282, Time Left=17.45[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  41%|▍| 1384/3393 [10:47<16:40,  2.01batch/s, Batch Loss=0.1724, Avg Loss=0.3257, Time Left=17.21[A
Epoch 1/3 - Training:  41%|▍| 1384/3393 [10:48<16:40,  2.01batch/s, Batch Loss=0.2026, Avg Loss=0.3256, Time Left=17.20[A
Epoch 1/3 - Training:  41%|▍| 1385/3393 [10:48<16:33,  2.02batch/s, Batch Loss=0.2026, Avg Loss=0.3256, Time Left=17.20[A
Epoch 1/3 - Training:  41%|▍| 1385/3393 [10:48<16:33,  2.02batch/s, Batch Loss=0.0813, Avg Loss=0.3254, Time Left=17.19[A
Epoch 1/3 - Training:  41%|▍| 1386/3393 [10:48<16:25,  2.04batch/s, Batch Loss=0.0813, Avg Loss=0.3254, Time Left=17.19[A
Epoch 1/3 - Training:  41%|▍| 1386/3393 [10:49<16:25,  2.04batch/s, Batch Loss=0.2471, Avg Loss=0.3253, Time Left=17.18[A
Epoch 1/3 - Training:  41%|▍| 1387/3393 [10:49<16:15,  2.06batch/s, Batch Loss=0.2471, Avg Loss=0.3253, Time Left=17.18[A
Epoch 1/3 - Training:  41%|▍| 1387/3393 [10:49<16:15,  2.06batch/s, Batch Loss=0.1655, Avg Loss=0.3252, Time Left=17.17[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  42%|▍| 1417/3393 [11:04<16:17,  2.02batch/s, Batch Loss=0.2107, Avg Loss=0.3227, Time Left=16.93[A
Epoch 1/3 - Training:  42%|▍| 1417/3393 [11:04<16:17,  2.02batch/s, Batch Loss=0.1334, Avg Loss=0.3225, Time Left=16.93[A
Epoch 1/3 - Training:  42%|▍| 1418/3393 [11:04<16:24,  2.01batch/s, Batch Loss=0.1334, Avg Loss=0.3225, Time Left=16.93[A
Epoch 1/3 - Training:  42%|▍| 1418/3393 [11:04<16:24,  2.01batch/s, Batch Loss=0.0562, Avg Loss=0.3223, Time Left=16.92[A
Epoch 1/3 - Training:  42%|▍| 1419/3393 [11:04<16:05,  2.04batch/s, Batch Loss=0.0562, Avg Loss=0.3223, Time Left=16.92[A
Epoch 1/3 - Training:  42%|▍| 1419/3393 [11:05<16:05,  2.04batch/s, Batch Loss=0.1331, Avg Loss=0.3222, Time Left=16.91[A
Epoch 1/3 - Training:  42%|▍| 1420/3393 [11:05<15:53,  2.07batch/s, Batch Loss=0.1331, Avg Loss=0.3222, Time Left=16.91[A
Epoch 1/3 - Training:  42%|▍| 1420/3393 [11:05<15:53,  2.07batch/s, Batch Loss=0.1141, Avg Loss=0.3220, Time Left=16.90[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  43%|▍| 1450/3393 [11:20<16:02,  2.02batch/s, Batch Loss=0.1206, Avg Loss=0.3200, Time Left=16.66[A
Epoch 1/3 - Training:  43%|▍| 1450/3393 [11:20<16:02,  2.02batch/s, Batch Loss=0.0803, Avg Loss=0.3199, Time Left=16.65[A
Epoch 1/3 - Training:  43%|▍| 1451/3393 [11:20<16:07,  2.01batch/s, Batch Loss=0.0803, Avg Loss=0.3199, Time Left=16.65[A
Epoch 1/3 - Training:  43%|▍| 1451/3393 [11:21<16:07,  2.01batch/s, Batch Loss=0.3431, Avg Loss=0.3199, Time Left=16.64[A
Epoch 1/3 - Training:  43%|▍| 1452/3393 [11:21<16:26,  1.97batch/s, Batch Loss=0.3431, Avg Loss=0.3199, Time Left=16.64[A
Epoch 1/3 - Training:  43%|▍| 1452/3393 [11:21<16:26,  1.97batch/s, Batch Loss=0.2014, Avg Loss=0.3198, Time Left=16.63[A
Epoch 1/3 - Training:  43%|▍| 1453/3393 [11:21<16:14,  1.99batch/s, Batch Loss=0.2014, Avg Loss=0.3198, Time Left=16.63[A
Epoch 1/3 - Training:  43%|▍| 1453/3393 [11:22<16:14,  1.99batch/s, Batch Loss=0.2011, Avg Loss=0.3197, Time Left=16.63[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  44%|▍| 1483/3393 [11:36<15:44,  2.02batch/s, Batch Loss=0.1010, Avg Loss=0.3173, Time Left=16.39[A
Epoch 1/3 - Training:  44%|▍| 1483/3393 [11:37<15:44,  2.02batch/s, Batch Loss=0.2582, Avg Loss=0.3173, Time Left=16.38[A
Epoch 1/3 - Training:  44%|▍| 1484/3393 [11:37<15:39,  2.03batch/s, Batch Loss=0.2582, Avg Loss=0.3173, Time Left=16.38[A
Epoch 1/3 - Training:  44%|▍| 1484/3393 [11:37<15:39,  2.03batch/s, Batch Loss=0.1634, Avg Loss=0.3172, Time Left=16.37[A
Epoch 1/3 - Training:  44%|▍| 1485/3393 [11:37<16:02,  1.98batch/s, Batch Loss=0.1634, Avg Loss=0.3172, Time Left=16.37[A
Epoch 1/3 - Training:  44%|▍| 1485/3393 [11:38<16:02,  1.98batch/s, Batch Loss=0.2249, Avg Loss=0.3171, Time Left=16.36[A
Epoch 1/3 - Training:  44%|▍| 1486/3393 [11:38<15:52,  2.00batch/s, Batch Loss=0.2249, Avg Loss=0.3171, Time Left=16.36[A
Epoch 1/3 - Training:  44%|▍| 1486/3393 [11:38<15:52,  2.00batch/s, Batch Loss=0.1100, Avg Loss=0.3170, Time Left=16.36[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  45%|▍| 1516/3393 [11:53<15:39,  2.00batch/s, Batch Loss=0.0915, Avg Loss=0.3152, Time Left=16.12[A
Epoch 1/3 - Training:  45%|▍| 1516/3393 [11:53<15:39,  2.00batch/s, Batch Loss=0.1119, Avg Loss=0.3151, Time Left=16.11[A
Epoch 1/3 - Training:  45%|▍| 1517/3393 [11:53<15:57,  1.96batch/s, Batch Loss=0.1119, Avg Loss=0.3151, Time Left=16.11[A
Epoch 1/3 - Training:  45%|▍| 1517/3393 [11:54<15:57,  1.96batch/s, Batch Loss=0.1891, Avg Loss=0.3150, Time Left=16.10[A
Epoch 1/3 - Training:  45%|▍| 1518/3393 [11:54<15:43,  1.99batch/s, Batch Loss=0.1891, Avg Loss=0.3150, Time Left=16.10[A
Epoch 1/3 - Training:  45%|▍| 1518/3393 [11:54<15:43,  1.99batch/s, Batch Loss=0.1898, Avg Loss=0.3149, Time Left=16.09[A
Epoch 1/3 - Training:  45%|▍| 1519/3393 [11:54<15:51,  1.97batch/s, Batch Loss=0.1898, Avg Loss=0.3149, Time Left=16.09[A
Epoch 1/3 - Training:  45%|▍| 1519/3393 [11:55<15:51,  1.97batch/s, Batch Loss=0.3080, Avg Loss=0.3149, Time Left=16.09[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  46%|▍| 1549/3393 [12:09<15:16,  2.01batch/s, Batch Loss=0.1980, Avg Loss=0.3123, Time Left=15.85[A
Epoch 1/3 - Training:  46%|▍| 1549/3393 [12:10<15:16,  2.01batch/s, Batch Loss=0.2989, Avg Loss=0.3123, Time Left=15.84[A
Epoch 1/3 - Training:  46%|▍| 1550/3393 [12:10<14:52,  2.07batch/s, Batch Loss=0.2989, Avg Loss=0.3123, Time Left=15.84[A
Epoch 1/3 - Training:  46%|▍| 1550/3393 [12:10<14:52,  2.07batch/s, Batch Loss=0.1605, Avg Loss=0.3122, Time Left=15.83[A
Epoch 1/3 - Training:  46%|▍| 1551/3393 [12:10<15:02,  2.04batch/s, Batch Loss=0.1605, Avg Loss=0.3122, Time Left=15.83[A
Epoch 1/3 - Training:  46%|▍| 1551/3393 [12:10<15:02,  2.04batch/s, Batch Loss=0.3554, Avg Loss=0.3123, Time Left=15.82[A
Epoch 1/3 - Training:  46%|▍| 1552/3393 [12:11<15:09,  2.03batch/s, Batch Loss=0.3554, Avg Loss=0.3123, Time Left=15.82[A
Epoch 1/3 - Training:  46%|▍| 1552/3393 [12:11<15:09,  2.03batch/s, Batch Loss=0.2373, Avg Loss=0.3122, Time Left=15.82[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  47%|▍| 1582/3393 [12:25<14:56,  2.02batch/s, Batch Loss=0.1920, Avg Loss=0.3105, Time Left=15.57[A
Epoch 1/3 - Training:  47%|▍| 1582/3393 [12:26<14:56,  2.02batch/s, Batch Loss=0.2979, Avg Loss=0.3105, Time Left=15.56[A
Epoch 1/3 - Training:  47%|▍| 1583/3393 [12:26<14:51,  2.03batch/s, Batch Loss=0.2979, Avg Loss=0.3105, Time Left=15.56[A
Epoch 1/3 - Training:  47%|▍| 1583/3393 [12:26<14:51,  2.03batch/s, Batch Loss=0.4106, Avg Loss=0.3106, Time Left=15.56[A
Epoch 1/3 - Training:  47%|▍| 1584/3393 [12:26<15:04,  2.00batch/s, Batch Loss=0.4106, Avg Loss=0.3106, Time Left=15.56[A
Epoch 1/3 - Training:  47%|▍| 1584/3393 [12:27<15:04,  2.00batch/s, Batch Loss=0.1728, Avg Loss=0.3105, Time Left=15.55[A
Epoch 1/3 - Training:  47%|▍| 1585/3393 [12:27<14:54,  2.02batch/s, Batch Loss=0.1728, Avg Loss=0.3105, Time Left=15.55[A
Epoch 1/3 - Training:  47%|▍| 1585/3393 [12:27<14:54,  2.02batch/s, Batch Loss=0.0837, Avg Loss=0.3103, Time Left=15.54[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  48%|▍| 1615/3393 [12:41<14:20,  2.07batch/s, Batch Loss=0.2594, Avg Loss=0.3088, Time Left=15.29[A
Epoch 1/3 - Training:  48%|▍| 1615/3393 [12:42<14:20,  2.07batch/s, Batch Loss=0.1060, Avg Loss=0.3087, Time Left=15.29[A
Epoch 1/3 - Training:  48%|▍| 1616/3393 [12:42<14:21,  2.06batch/s, Batch Loss=0.1060, Avg Loss=0.3087, Time Left=15.29[A
Epoch 1/3 - Training:  48%|▍| 1616/3393 [12:42<14:21,  2.06batch/s, Batch Loss=0.1848, Avg Loss=0.3086, Time Left=15.28[A
Epoch 1/3 - Training:  48%|▍| 1617/3393 [12:42<14:38,  2.02batch/s, Batch Loss=0.1848, Avg Loss=0.3086, Time Left=15.28[A
Epoch 1/3 - Training:  48%|▍| 1617/3393 [12:43<14:38,  2.02batch/s, Batch Loss=0.1196, Avg Loss=0.3085, Time Left=15.27[A
Epoch 1/3 - Training:  48%|▍| 1618/3393 [12:43<14:33,  2.03batch/s, Batch Loss=0.1196, Avg Loss=0.3085, Time Left=15.27[A
Epoch 1/3 - Training:  48%|▍| 1618/3393 [12:43<14:33,  2.03batch/s, Batch Loss=0.3417, Avg Loss=0.3085, Time Left=15.26[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  49%|▍| 1648/3393 [12:58<14:16,  2.04batch/s, Batch Loss=0.3213, Avg Loss=0.3066, Time Left=15.02[A
Epoch 1/3 - Training:  49%|▍| 1648/3393 [12:58<14:16,  2.04batch/s, Batch Loss=0.0968, Avg Loss=0.3065, Time Left=15.02[A
Epoch 1/3 - Training:  49%|▍| 1649/3393 [12:58<14:29,  2.01batch/s, Batch Loss=0.0968, Avg Loss=0.3065, Time Left=15.02[A
Epoch 1/3 - Training:  49%|▍| 1649/3393 [12:59<14:29,  2.01batch/s, Batch Loss=0.2570, Avg Loss=0.3064, Time Left=15.01[A
Epoch 1/3 - Training:  49%|▍| 1650/3393 [12:59<14:23,  2.02batch/s, Batch Loss=0.2570, Avg Loss=0.3064, Time Left=15.01[A
Epoch 1/3 - Training:  49%|▍| 1650/3393 [12:59<14:23,  2.02batch/s, Batch Loss=0.1632, Avg Loss=0.3063, Time Left=15.00[A
Epoch 1/3 - Training:  49%|▍| 1651/3393 [12:59<14:35,  1.99batch/s, Batch Loss=0.1632, Avg Loss=0.3063, Time Left=15.00[A
Epoch 1/3 - Training:  49%|▍| 1651/3393 [13:00<14:35,  1.99batch/s, Batch Loss=0.0425, Avg Loss=0.3062, Time Left=14.99[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  50%|▍| 1681/3393 [13:14<14:15,  2.00batch/s, Batch Loss=0.1243, Avg Loss=0.3042, Time Left=14.75[A
Epoch 1/3 - Training:  50%|▍| 1681/3393 [13:15<14:15,  2.00batch/s, Batch Loss=0.0795, Avg Loss=0.3040, Time Left=14.74[A
Epoch 1/3 - Training:  50%|▍| 1682/3393 [13:15<14:08,  2.02batch/s, Batch Loss=0.0795, Avg Loss=0.3040, Time Left=14.74[A
Epoch 1/3 - Training:  50%|▍| 1682/3393 [13:15<14:08,  2.02batch/s, Batch Loss=0.1924, Avg Loss=0.3040, Time Left=14.74[A
Epoch 1/3 - Training:  50%|▍| 1683/3393 [13:15<13:54,  2.05batch/s, Batch Loss=0.1924, Avg Loss=0.3040, Time Left=14.74[A
Epoch 1/3 - Training:  50%|▍| 1683/3393 [13:16<13:54,  2.05batch/s, Batch Loss=0.1409, Avg Loss=0.3039, Time Left=14.73[A
Epoch 1/3 - Training:  50%|▍| 1684/3393 [13:16<14:02,  2.03batch/s, Batch Loss=0.1409, Avg Loss=0.3039, Time Left=14.73[A
Epoch 1/3 - Training:  50%|▍| 1684/3393 [13:16<14:02,  2.03batch/s, Batch Loss=0.3789, Avg Loss=0.3039, Time Left=14.72[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  51%|▌| 1714/3393 [13:30<13:55,  2.01batch/s, Batch Loss=0.3553, Avg Loss=0.3017, Time Left=14.48[A
Epoch 1/3 - Training:  51%|▌| 1714/3393 [13:31<13:55,  2.01batch/s, Batch Loss=0.1950, Avg Loss=0.3016, Time Left=14.47[A
Epoch 1/3 - Training:  51%|▌| 1715/3393 [13:31<13:56,  2.01batch/s, Batch Loss=0.1950, Avg Loss=0.3016, Time Left=14.47[A
Epoch 1/3 - Training:  51%|▌| 1715/3393 [13:31<13:56,  2.01batch/s, Batch Loss=0.1508, Avg Loss=0.3016, Time Left=14.46[A
Epoch 1/3 - Training:  51%|▌| 1716/3393 [13:31<14:05,  1.98batch/s, Batch Loss=0.1508, Avg Loss=0.3016, Time Left=14.46[A
Epoch 1/3 - Training:  51%|▌| 1716/3393 [13:32<14:05,  1.98batch/s, Batch Loss=0.1463, Avg Loss=0.3015, Time Left=14.46[A
Epoch 1/3 - Training:  51%|▌| 1717/3393 [13:32<14:07,  1.98batch/s, Batch Loss=0.1463, Avg Loss=0.3015, Time Left=14.46[A
Epoch 1/3 - Training:  51%|▌| 1717/3393 [13:32<14:07,  1.98batch/s, Batch Loss=0.1138, Avg Loss=0.3013, Time Left=14.45[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  51%|▌| 1747/3393 [13:47<13:27,  2.04batch/s, Batch Loss=0.2167, Avg Loss=0.2993, Time Left=14.21[A
Epoch 1/3 - Training:  51%|▌| 1747/3393 [13:47<13:27,  2.04batch/s, Batch Loss=0.3012, Avg Loss=0.2993, Time Left=14.20[A
Epoch 1/3 - Training:  52%|▌| 1748/3393 [13:47<13:38,  2.01batch/s, Batch Loss=0.3012, Avg Loss=0.2993, Time Left=14.20[A
Epoch 1/3 - Training:  52%|▌| 1748/3393 [13:48<13:38,  2.01batch/s, Batch Loss=0.1407, Avg Loss=0.2992, Time Left=14.19[A
Epoch 1/3 - Training:  52%|▌| 1749/3393 [13:48<13:33,  2.02batch/s, Batch Loss=0.1407, Avg Loss=0.2992, Time Left=14.19[A
Epoch 1/3 - Training:  52%|▌| 1749/3393 [13:48<13:33,  2.02batch/s, Batch Loss=0.0818, Avg Loss=0.2991, Time Left=14.18[A
Epoch 1/3 - Training:  52%|▌| 1750/3393 [13:48<13:50,  1.98batch/s, Batch Loss=0.0818, Avg Loss=0.2991, Time Left=14.18[A
Epoch 1/3 - Training:  52%|▌| 1750/3393 [13:49<13:50,  1.98batch/s, Batch Loss=0.3135, Avg Loss=0.2991, Time Left=14.17[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  52%|▌| 1780/3393 [14:03<13:20,  2.01batch/s, Batch Loss=0.1812, Avg Loss=0.2980, Time Left=13.94[A
Epoch 1/3 - Training:  52%|▌| 1780/3393 [14:04<13:20,  2.01batch/s, Batch Loss=0.1368, Avg Loss=0.2979, Time Left=13.93[A
Epoch 1/3 - Training:  52%|▌| 1781/3393 [14:04<13:15,  2.03batch/s, Batch Loss=0.1368, Avg Loss=0.2979, Time Left=13.93[A
Epoch 1/3 - Training:  52%|▌| 1781/3393 [14:04<13:15,  2.03batch/s, Batch Loss=0.1518, Avg Loss=0.2978, Time Left=13.92[A
Epoch 1/3 - Training:  53%|▌| 1782/3393 [14:04<13:51,  1.94batch/s, Batch Loss=0.1518, Avg Loss=0.2978, Time Left=13.92[A
Epoch 1/3 - Training:  53%|▌| 1782/3393 [14:05<13:51,  1.94batch/s, Batch Loss=0.2420, Avg Loss=0.2978, Time Left=13.92[A
Epoch 1/3 - Training:  53%|▌| 1783/3393 [14:05<14:07,  1.90batch/s, Batch Loss=0.2420, Avg Loss=0.2978, Time Left=13.92[A
Epoch 1/3 - Training:  53%|▌| 1783/3393 [14:05<14:07,  1.90batch/s, Batch Loss=0.3332, Avg Loss=0.2978, Time Left=13.91[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  53%|▌| 1813/3393 [14:20<13:26,  1.96batch/s, Batch Loss=0.1327, Avg Loss=0.2959, Time Left=13.67[A
Epoch 1/3 - Training:  53%|▌| 1813/3393 [14:21<13:26,  1.96batch/s, Batch Loss=0.1183, Avg Loss=0.2958, Time Left=13.67[A
Epoch 1/3 - Training:  53%|▌| 1814/3393 [14:21<13:21,  1.97batch/s, Batch Loss=0.1183, Avg Loss=0.2958, Time Left=13.67[A
Epoch 1/3 - Training:  53%|▌| 1814/3393 [14:21<13:21,  1.97batch/s, Batch Loss=0.0269, Avg Loss=0.2956, Time Left=13.66[A
Epoch 1/3 - Training:  53%|▌| 1815/3393 [14:21<13:24,  1.96batch/s, Batch Loss=0.0269, Avg Loss=0.2956, Time Left=13.66[A
Epoch 1/3 - Training:  53%|▌| 1815/3393 [14:22<13:24,  1.96batch/s, Batch Loss=0.0973, Avg Loss=0.2955, Time Left=13.65[A
Epoch 1/3 - Training:  54%|▌| 1816/3393 [14:22<12:57,  2.03batch/s, Batch Loss=0.0973, Avg Loss=0.2955, Time Left=13.65[A
Epoch 1/3 - Training:  54%|▌| 1816/3393 [14:22<12:57,  2.03batch/s, Batch Loss=0.0751, Avg Loss=0.2954, Time Left=13.64[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  54%|▌| 1846/3393 [14:37<12:52,  2.00batch/s, Batch Loss=0.0404, Avg Loss=0.2929, Time Left=13.40[A
Epoch 1/3 - Training:  54%|▌| 1846/3393 [14:37<12:52,  2.00batch/s, Batch Loss=0.1062, Avg Loss=0.2928, Time Left=13.40[A
Epoch 1/3 - Training:  54%|▌| 1847/3393 [14:37<13:06,  1.97batch/s, Batch Loss=0.1062, Avg Loss=0.2928, Time Left=13.40[A
Epoch 1/3 - Training:  54%|▌| 1847/3393 [14:38<13:06,  1.97batch/s, Batch Loss=0.4161, Avg Loss=0.2929, Time Left=13.39[A
Epoch 1/3 - Training:  54%|▌| 1848/3393 [14:38<12:55,  1.99batch/s, Batch Loss=0.4161, Avg Loss=0.2929, Time Left=13.39[A
Epoch 1/3 - Training:  54%|▌| 1848/3393 [14:38<12:55,  1.99batch/s, Batch Loss=0.1825, Avg Loss=0.2928, Time Left=13.38[A
Epoch 1/3 - Training:  54%|▌| 1849/3393 [14:38<13:01,  1.98batch/s, Batch Loss=0.1825, Avg Loss=0.2928, Time Left=13.38[A
Epoch 1/3 - Training:  54%|▌| 1849/3393 [14:39<13:01,  1.98batch/s, Batch Loss=0.0575, Avg Loss=0.2927, Time Left=13.37[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  55%|▌| 1879/3393 [14:53<12:50,  1.97batch/s, Batch Loss=0.0770, Avg Loss=0.2910, Time Left=13.13[A
Epoch 1/3 - Training:  55%|▌| 1879/3393 [14:54<12:50,  1.97batch/s, Batch Loss=0.2051, Avg Loss=0.2909, Time Left=13.13[A
Epoch 1/3 - Training:  55%|▌| 1880/3393 [14:54<12:46,  1.97batch/s, Batch Loss=0.2051, Avg Loss=0.2909, Time Left=13.13[A
Epoch 1/3 - Training:  55%|▌| 1880/3393 [14:54<12:46,  1.97batch/s, Batch Loss=0.0995, Avg Loss=0.2908, Time Left=13.12[A
Epoch 1/3 - Training:  55%|▌| 1881/3393 [14:54<12:50,  1.96batch/s, Batch Loss=0.0995, Avg Loss=0.2908, Time Left=13.12[A
Epoch 1/3 - Training:  55%|▌| 1881/3393 [14:55<12:50,  1.96batch/s, Batch Loss=0.0743, Avg Loss=0.2907, Time Left=13.11[A
Epoch 1/3 - Training:  55%|▌| 1882/3393 [14:55<12:32,  2.01batch/s, Batch Loss=0.0743, Avg Loss=0.2907, Time Left=13.11[A
Epoch 1/3 - Training:  55%|▌| 1882/3393 [14:55<12:32,  2.01batch/s, Batch Loss=0.1634, Avg Loss=0.2906, Time Left=13.10[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  56%|▌| 1912/3393 [15:09<12:06,  2.04batch/s, Batch Loss=0.1529, Avg Loss=0.2884, Time Left=12.86[A
Epoch 1/3 - Training:  56%|▌| 1912/3393 [15:10<12:06,  2.04batch/s, Batch Loss=0.2576, Avg Loss=0.2883, Time Left=12.85[A
Epoch 1/3 - Training:  56%|▌| 1913/3393 [15:10<12:03,  2.05batch/s, Batch Loss=0.2576, Avg Loss=0.2883, Time Left=12.85[A
Epoch 1/3 - Training:  56%|▌| 1913/3393 [15:10<12:03,  2.05batch/s, Batch Loss=0.1376, Avg Loss=0.2883, Time Left=12.84[A
Epoch 1/3 - Training:  56%|▌| 1914/3393 [15:10<12:14,  2.01batch/s, Batch Loss=0.1376, Avg Loss=0.2883, Time Left=12.84[A
Epoch 1/3 - Training:  56%|▌| 1914/3393 [15:11<12:14,  2.01batch/s, Batch Loss=0.2047, Avg Loss=0.2882, Time Left=12.83[A
Epoch 1/3 - Training:  56%|▌| 1915/3393 [15:11<12:09,  2.03batch/s, Batch Loss=0.2047, Avg Loss=0.2882, Time Left=12.83[A
Epoch 1/3 - Training:  56%|▌| 1915/3393 [15:11<12:09,  2.03batch/s, Batch Loss=0.1644, Avg Loss=0.2882, Time Left=12.83[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  57%|▌| 1945/3393 [15:25<11:38,  2.07batch/s, Batch Loss=0.1112, Avg Loss=0.2868, Time Left=12.58[A
Epoch 1/3 - Training:  57%|▌| 1945/3393 [15:26<11:38,  2.07batch/s, Batch Loss=0.2970, Avg Loss=0.2868, Time Left=12.58[A
Epoch 1/3 - Training:  57%|▌| 1946/3393 [15:26<11:37,  2.08batch/s, Batch Loss=0.2970, Avg Loss=0.2868, Time Left=12.58[A
Epoch 1/3 - Training:  57%|▌| 1946/3393 [15:26<11:37,  2.08batch/s, Batch Loss=0.1867, Avg Loss=0.2867, Time Left=12.57[A
Epoch 1/3 - Training:  57%|▌| 1947/3393 [15:26<11:30,  2.10batch/s, Batch Loss=0.1867, Avg Loss=0.2867, Time Left=12.57[A
Epoch 1/3 - Training:  57%|▌| 1947/3393 [15:27<11:30,  2.10batch/s, Batch Loss=0.2223, Avg Loss=0.2867, Time Left=12.56[A
Epoch 1/3 - Training:  57%|▌| 1948/3393 [15:27<11:45,  2.05batch/s, Batch Loss=0.2223, Avg Loss=0.2867, Time Left=12.56[A
Epoch 1/3 - Training:  57%|▌| 1948/3393 [15:27<11:45,  2.05batch/s, Batch Loss=0.1675, Avg Loss=0.2866, Time Left=12.55[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  58%|▌| 1978/3393 [15:41<11:26,  2.06batch/s, Batch Loss=0.0752, Avg Loss=0.2846, Time Left=12.31[A
Epoch 1/3 - Training:  58%|▌| 1978/3393 [15:42<11:26,  2.06batch/s, Batch Loss=0.2819, Avg Loss=0.2846, Time Left=12.30[A
Epoch 1/3 - Training:  58%|▌| 1979/3393 [15:42<11:45,  2.00batch/s, Batch Loss=0.2819, Avg Loss=0.2846, Time Left=12.30[A
Epoch 1/3 - Training:  58%|▌| 1979/3393 [15:43<11:45,  2.00batch/s, Batch Loss=0.1638, Avg Loss=0.2845, Time Left=12.29[A
Epoch 1/3 - Training:  58%|▌| 1980/3393 [15:43<11:39,  2.02batch/s, Batch Loss=0.1638, Avg Loss=0.2845, Time Left=12.29[A
Epoch 1/3 - Training:  58%|▌| 1980/3393 [15:43<11:39,  2.02batch/s, Batch Loss=0.3089, Avg Loss=0.2846, Time Left=12.28[A
Epoch 1/3 - Training:  58%|▌| 1981/3393 [15:43<11:27,  2.05batch/s, Batch Loss=0.3089, Avg Loss=0.2846, Time Left=12.28[A
Epoch 1/3 - Training:  58%|▌| 1981/3393 [15:43<11:27,  2.05batch/s, Batch Loss=0.0983, Avg Loss=0.2845, Time Left=12.27[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  59%|▌| 2011/3393 [15:58<11:48,  1.95batch/s, Batch Loss=0.1263, Avg Loss=0.2838, Time Left=12.03[A
Epoch 1/3 - Training:  59%|▌| 2011/3393 [15:58<11:48,  1.95batch/s, Batch Loss=0.1365, Avg Loss=0.2838, Time Left=12.03[A
Epoch 1/3 - Training:  59%|▌| 2012/3393 [15:58<11:43,  1.96batch/s, Batch Loss=0.1365, Avg Loss=0.2838, Time Left=12.03[A
Epoch 1/3 - Training:  59%|▌| 2012/3393 [15:59<11:43,  1.96batch/s, Batch Loss=0.2730, Avg Loss=0.2838, Time Left=12.02[A
Epoch 1/3 - Training:  59%|▌| 2013/3393 [15:59<11:44,  1.96batch/s, Batch Loss=0.2730, Avg Loss=0.2838, Time Left=12.02[A
Epoch 1/3 - Training:  59%|▌| 2013/3393 [15:59<11:44,  1.96batch/s, Batch Loss=0.2130, Avg Loss=0.2837, Time Left=12.01[A
Epoch 1/3 - Training:  59%|▌| 2014/3393 [15:59<11:34,  1.99batch/s, Batch Loss=0.2130, Avg Loss=0.2837, Time Left=12.01[A
Epoch 1/3 - Training:  59%|▌| 2014/3393 [16:00<11:34,  1.99batch/s, Batch Loss=0.1137, Avg Loss=0.2836, Time Left=12.00[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  60%|▌| 2044/3393 [16:14<10:51,  2.07batch/s, Batch Loss=0.0191, Avg Loss=0.2824, Time Left=11.76[A
Epoch 1/3 - Training:  60%|▌| 2044/3393 [16:14<10:51,  2.07batch/s, Batch Loss=0.1779, Avg Loss=0.2823, Time Left=11.75[A
Epoch 1/3 - Training:  60%|▌| 2045/3393 [16:14<10:51,  2.07batch/s, Batch Loss=0.1779, Avg Loss=0.2823, Time Left=11.75[A
Epoch 1/3 - Training:  60%|▌| 2045/3393 [16:15<10:51,  2.07batch/s, Batch Loss=0.4458, Avg Loss=0.2824, Time Left=11.74[A
Epoch 1/3 - Training:  60%|▌| 2046/3393 [16:15<10:45,  2.09batch/s, Batch Loss=0.4458, Avg Loss=0.2824, Time Left=11.74[A
Epoch 1/3 - Training:  60%|▌| 2046/3393 [16:15<10:45,  2.09batch/s, Batch Loss=0.1742, Avg Loss=0.2823, Time Left=11.73[A
Epoch 1/3 - Training:  60%|▌| 2047/3393 [16:15<10:39,  2.11batch/s, Batch Loss=0.1742, Avg Loss=0.2823, Time Left=11.73[A
Epoch 1/3 - Training:  60%|▌| 2047/3393 [16:16<10:39,  2.11batch/s, Batch Loss=0.1503, Avg Loss=0.2823, Time Left=11.73[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  61%|▌| 2077/3393 [16:30<11:04,  1.98batch/s, Batch Loss=0.0276, Avg Loss=0.2809, Time Left=11.49[A
Epoch 1/3 - Training:  61%|▌| 2077/3393 [16:31<11:04,  1.98batch/s, Batch Loss=0.0434, Avg Loss=0.2808, Time Left=11.48[A
Epoch 1/3 - Training:  61%|▌| 2078/3393 [16:31<11:03,  1.98batch/s, Batch Loss=0.0434, Avg Loss=0.2808, Time Left=11.48[A
Epoch 1/3 - Training:  61%|▌| 2078/3393 [16:31<11:03,  1.98batch/s, Batch Loss=0.2658, Avg Loss=0.2808, Time Left=11.47[A
Epoch 1/3 - Training:  61%|▌| 2079/3393 [16:31<11:27,  1.91batch/s, Batch Loss=0.2658, Avg Loss=0.2808, Time Left=11.47[A
Epoch 1/3 - Training:  61%|▌| 2079/3393 [16:32<11:27,  1.91batch/s, Batch Loss=0.0178, Avg Loss=0.2806, Time Left=11.47[A
Epoch 1/3 - Training:  61%|▌| 2080/3393 [16:32<11:48,  1.85batch/s, Batch Loss=0.0178, Avg Loss=0.2806, Time Left=11.47[A
Epoch 1/3 - Training:  61%|▌| 2080/3393 [16:32<11:48,  1.85batch/s, Batch Loss=0.0580, Avg Loss=0.2805, Time Left=11.46[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  62%|▌| 2110/3393 [16:47<10:39,  2.01batch/s, Batch Loss=0.0237, Avg Loss=0.2793, Time Left=11.22[A
Epoch 1/3 - Training:  62%|▌| 2110/3393 [16:48<10:39,  2.01batch/s, Batch Loss=0.1635, Avg Loss=0.2793, Time Left=11.21[A
Epoch 1/3 - Training:  62%|▌| 2111/3393 [16:48<10:26,  2.05batch/s, Batch Loss=0.1635, Avg Loss=0.2793, Time Left=11.21[A
Epoch 1/3 - Training:  62%|▌| 2111/3393 [16:48<10:26,  2.05batch/s, Batch Loss=0.1063, Avg Loss=0.2792, Time Left=11.21[A
Epoch 1/3 - Training:  62%|▌| 2112/3393 [16:48<10:25,  2.05batch/s, Batch Loss=0.1063, Avg Loss=0.2792, Time Left=11.21[A
Epoch 1/3 - Training:  62%|▌| 2112/3393 [16:49<10:25,  2.05batch/s, Batch Loss=0.2509, Avg Loss=0.2792, Time Left=11.20[A
Epoch 1/3 - Training:  62%|▌| 2113/3393 [16:49<10:33,  2.02batch/s, Batch Loss=0.2509, Avg Loss=0.2792, Time Left=11.20[A
Epoch 1/3 - Training:  62%|▌| 2113/3393 [16:49<10:33,  2.02batch/s, Batch Loss=0.0904, Avg Loss=0.2791, Time Left=11.19[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  63%|▋| 2143/3393 [17:04<10:28,  1.99batch/s, Batch Loss=0.0714, Avg Loss=0.2773, Time Left=10.95[A
Epoch 1/3 - Training:  63%|▋| 2143/3393 [17:04<10:28,  1.99batch/s, Batch Loss=0.0798, Avg Loss=0.2772, Time Left=10.94[A
Epoch 1/3 - Training:  63%|▋| 2144/3393 [17:04<10:38,  1.96batch/s, Batch Loss=0.0798, Avg Loss=0.2772, Time Left=10.94[A
Epoch 1/3 - Training:  63%|▋| 2144/3393 [17:05<10:38,  1.96batch/s, Batch Loss=0.1088, Avg Loss=0.2772, Time Left=10.93[A
Epoch 1/3 - Training:  63%|▋| 2145/3393 [17:05<10:28,  1.99batch/s, Batch Loss=0.1088, Avg Loss=0.2772, Time Left=10.93[A
Epoch 1/3 - Training:  63%|▋| 2145/3393 [17:05<10:28,  1.99batch/s, Batch Loss=0.0670, Avg Loss=0.2771, Time Left=10.93[A
Epoch 1/3 - Training:  63%|▋| 2146/3393 [17:05<10:20,  2.01batch/s, Batch Loss=0.0670, Avg Loss=0.2771, Time Left=10.93[A
Epoch 1/3 - Training:  63%|▋| 2146/3393 [17:06<10:20,  2.01batch/s, Batch Loss=0.0601, Avg Loss=0.2770, Time Left=10.92[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  64%|▋| 2176/3393 [17:20<09:55,  2.04batch/s, Batch Loss=0.0947, Avg Loss=0.2754, Time Left=10.68[A
Epoch 1/3 - Training:  64%|▋| 2176/3393 [17:20<09:55,  2.04batch/s, Batch Loss=0.1119, Avg Loss=0.2753, Time Left=10.67[A
Epoch 1/3 - Training:  64%|▋| 2177/3393 [17:20<10:04,  2.01batch/s, Batch Loss=0.1119, Avg Loss=0.2753, Time Left=10.67[A
Epoch 1/3 - Training:  64%|▋| 2177/3393 [17:21<10:04,  2.01batch/s, Batch Loss=0.1428, Avg Loss=0.2752, Time Left=10.66[A
Epoch 1/3 - Training:  64%|▋| 2178/3393 [17:21<10:00,  2.02batch/s, Batch Loss=0.1428, Avg Loss=0.2752, Time Left=10.66[A
Epoch 1/3 - Training:  64%|▋| 2178/3393 [17:21<10:00,  2.02batch/s, Batch Loss=0.2474, Avg Loss=0.2752, Time Left=10.65[A
Epoch 1/3 - Training:  64%|▋| 2179/3393 [17:21<09:56,  2.04batch/s, Batch Loss=0.2474, Avg Loss=0.2752, Time Left=10.65[A
Epoch 1/3 - Training:  64%|▋| 2179/3393 [17:22<09:56,  2.04batch/s, Batch Loss=0.1128, Avg Loss=0.2752, Time Left=10.64[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  65%|▋| 2209/3393 [17:36<09:46,  2.02batch/s, Batch Loss=0.1237, Avg Loss=0.2739, Time Left=10.40[A
Epoch 1/3 - Training:  65%|▋| 2209/3393 [17:37<09:46,  2.02batch/s, Batch Loss=0.3559, Avg Loss=0.2739, Time Left=10.39[A
Epoch 1/3 - Training:  65%|▋| 2210/3393 [17:37<09:36,  2.05batch/s, Batch Loss=0.3559, Avg Loss=0.2739, Time Left=10.39[A
Epoch 1/3 - Training:  65%|▋| 2210/3393 [17:37<09:36,  2.05batch/s, Batch Loss=0.0566, Avg Loss=0.2738, Time Left=10.39[A
Epoch 1/3 - Training:  65%|▋| 2211/3393 [17:37<09:39,  2.04batch/s, Batch Loss=0.0566, Avg Loss=0.2738, Time Left=10.39[A
Epoch 1/3 - Training:  65%|▋| 2211/3393 [17:38<09:39,  2.04batch/s, Batch Loss=0.0244, Avg Loss=0.2737, Time Left=10.38[A
Epoch 1/3 - Training:  65%|▋| 2212/3393 [17:38<09:36,  2.05batch/s, Batch Loss=0.0244, Avg Loss=0.2737, Time Left=10.38[A
Epoch 1/3 - Training:  65%|▋| 2212/3393 [17:38<09:36,  2.05batch/s, Batch Loss=0.0913, Avg Loss=0.2736, Time Left=10.37[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  66%|▋| 2242/3393 [17:52<09:18,  2.06batch/s, Batch Loss=0.3040, Avg Loss=0.2724, Time Left=10.13[A
Epoch 1/3 - Training:  66%|▋| 2242/3393 [17:53<09:18,  2.06batch/s, Batch Loss=0.4077, Avg Loss=0.2725, Time Left=10.12[A
Epoch 1/3 - Training:  66%|▋| 2243/3393 [17:53<09:12,  2.08batch/s, Batch Loss=0.4077, Avg Loss=0.2725, Time Left=10.12[A
Epoch 1/3 - Training:  66%|▋| 2243/3393 [17:53<09:12,  2.08batch/s, Batch Loss=0.1845, Avg Loss=0.2724, Time Left=10.11[A
Epoch 1/3 - Training:  66%|▋| 2244/3393 [17:53<09:06,  2.10batch/s, Batch Loss=0.1845, Avg Loss=0.2724, Time Left=10.11[A
Epoch 1/3 - Training:  66%|▋| 2244/3393 [17:54<09:06,  2.10batch/s, Batch Loss=0.1281, Avg Loss=0.2724, Time Left=10.10[A
Epoch 1/3 - Training:  66%|▋| 2245/3393 [17:54<09:13,  2.07batch/s, Batch Loss=0.1281, Avg Loss=0.2724, Time Left=10.10[A
Epoch 1/3 - Training:  66%|▋| 2245/3393 [17:54<09:13,  2.07batch/s, Batch Loss=0.0515, Avg Loss=0.2722, Time Left=10.09[A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  67%|▋| 2275/3393 [18:08<09:10,  2.03batch/s, Batch Loss=0.2182, Avg Loss=0.2708, Time Left=9.85 [A
Epoch 1/3 - Training:  67%|▋| 2275/3393 [18:09<09:10,  2.03batch/s, Batch Loss=0.1600, Avg Loss=0.2708, Time Left=9.84 [A
Epoch 1/3 - Training:  67%|▋| 2276/3393 [18:09<09:12,  2.02batch/s, Batch Loss=0.1600, Avg Loss=0.2708, Time Left=9.84 [A
Epoch 1/3 - Training:  67%|▋| 2276/3393 [18:09<09:12,  2.02batch/s, Batch Loss=0.1212, Avg Loss=0.2707, Time Left=9.84 [A
Epoch 1/3 - Training:  67%|▋| 2277/3393 [18:09<09:14,  2.01batch/s, Batch Loss=0.1212, Avg Loss=0.2707, Time Left=9.84 [A
Epoch 1/3 - Training:  67%|▋| 2277/3393 [18:10<09:14,  2.01batch/s, Batch Loss=0.2227, Avg Loss=0.2707, Time Left=9.83 [A
Epoch 1/3 - Training:  67%|▋| 2278/3393 [18:10<09:15,  2.01batch/s, Batch Loss=0.2227, Avg Loss=0.2707, Time Left=9.83 [A
Epoch 1/3 - Training:  67%|▋| 2278/3393 [18:10<09:15,  2.01batch/s, Batch Loss=0.0499, Avg Loss=0.2706, Time Left=9.82 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  68%|▋| 2308/3393 [18:24<08:52,  2.04batch/s, Batch Loss=0.1349, Avg Loss=0.2687, Time Left=9.58 [A
Epoch 1/3 - Training:  68%|▋| 2308/3393 [18:25<08:52,  2.04batch/s, Batch Loss=0.3372, Avg Loss=0.2687, Time Left=9.57 [A
Epoch 1/3 - Training:  68%|▋| 2309/3393 [18:25<08:49,  2.05batch/s, Batch Loss=0.3372, Avg Loss=0.2687, Time Left=9.57 [A
Epoch 1/3 - Training:  68%|▋| 2309/3393 [18:25<08:49,  2.05batch/s, Batch Loss=0.1568, Avg Loss=0.2687, Time Left=9.56 [A
Epoch 1/3 - Training:  68%|▋| 2310/3393 [18:25<08:47,  2.05batch/s, Batch Loss=0.1568, Avg Loss=0.2687, Time Left=9.56 [A
Epoch 1/3 - Training:  68%|▋| 2310/3393 [18:26<08:47,  2.05batch/s, Batch Loss=0.2810, Avg Loss=0.2687, Time Left=9.55 [A
Epoch 1/3 - Training:  68%|▋| 2311/3393 [18:26<08:40,  2.08batch/s, Batch Loss=0.2810, Avg Loss=0.2687, Time Left=9.55 [A
Epoch 1/3 - Training:  68%|▋| 2311/3393 [18:26<08:40,  2.08batch/s, Batch Loss=0.2344, Avg Loss=0.2687, Time Left=9.54 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  69%|▋| 2341/3393 [18:40<08:27,  2.07batch/s, Batch Loss=0.0766, Avg Loss=0.2672, Time Left=9.30 [A
Epoch 1/3 - Training:  69%|▋| 2341/3393 [18:41<08:27,  2.07batch/s, Batch Loss=0.1361, Avg Loss=0.2672, Time Left=9.30 [A
Epoch 1/3 - Training:  69%|▋| 2342/3393 [18:41<08:26,  2.07batch/s, Batch Loss=0.1361, Avg Loss=0.2672, Time Left=9.30 [A
Epoch 1/3 - Training:  69%|▋| 2342/3393 [18:41<08:26,  2.07batch/s, Batch Loss=0.1642, Avg Loss=0.2671, Time Left=9.29 [A
Epoch 1/3 - Training:  69%|▋| 2343/3393 [18:41<08:21,  2.09batch/s, Batch Loss=0.1642, Avg Loss=0.2671, Time Left=9.29 [A
Epoch 1/3 - Training:  69%|▋| 2343/3393 [18:42<08:21,  2.09batch/s, Batch Loss=0.1886, Avg Loss=0.2671, Time Left=9.28 [A
Epoch 1/3 - Training:  69%|▋| 2344/3393 [18:42<08:22,  2.09batch/s, Batch Loss=0.1886, Avg Loss=0.2671, Time Left=9.28 [A
Epoch 1/3 - Training:  69%|▋| 2344/3393 [18:42<08:22,  2.09batch/s, Batch Loss=0.5013, Avg Loss=0.2672, Time Left=9.27 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  70%|▋| 2374/3393 [18:57<08:13,  2.06batch/s, Batch Loss=0.0354, Avg Loss=0.2660, Time Left=9.03 [A
Epoch 1/3 - Training:  70%|▋| 2374/3393 [18:57<08:13,  2.06batch/s, Batch Loss=0.1456, Avg Loss=0.2659, Time Left=9.02 [A
Epoch 1/3 - Training:  70%|▋| 2375/3393 [18:57<08:13,  2.06batch/s, Batch Loss=0.1456, Avg Loss=0.2659, Time Left=9.02 [A
Epoch 1/3 - Training:  70%|▋| 2375/3393 [18:58<08:13,  2.06batch/s, Batch Loss=0.1876, Avg Loss=0.2659, Time Left=9.01 [A
Epoch 1/3 - Training:  70%|▋| 2376/3393 [18:58<08:07,  2.09batch/s, Batch Loss=0.1876, Avg Loss=0.2659, Time Left=9.01 [A
Epoch 1/3 - Training:  70%|▋| 2376/3393 [18:58<08:07,  2.09batch/s, Batch Loss=0.1564, Avg Loss=0.2659, Time Left=9.01 [A
Epoch 1/3 - Training:  70%|▋| 2377/3393 [18:58<08:08,  2.08batch/s, Batch Loss=0.1564, Avg Loss=0.2659, Time Left=9.01 [A
Epoch 1/3 - Training:  70%|▋| 2377/3393 [18:59<08:08,  2.08batch/s, Batch Loss=0.1185, Avg Loss=0.2658, Time Left=9.00 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  71%|▋| 2407/3393 [19:13<07:56,  2.07batch/s, Batch Loss=0.0311, Avg Loss=0.2645, Time Left=8.76 [A
Epoch 1/3 - Training:  71%|▋| 2407/3393 [19:13<07:56,  2.07batch/s, Batch Loss=0.1938, Avg Loss=0.2644, Time Left=8.75 [A
Epoch 1/3 - Training:  71%|▋| 2408/3393 [19:13<07:55,  2.07batch/s, Batch Loss=0.1938, Avg Loss=0.2644, Time Left=8.75 [A
Epoch 1/3 - Training:  71%|▋| 2408/3393 [19:14<07:55,  2.07batch/s, Batch Loss=0.2826, Avg Loss=0.2644, Time Left=8.74 [A
Epoch 1/3 - Training:  71%|▋| 2409/3393 [19:14<08:13,  1.99batch/s, Batch Loss=0.2826, Avg Loss=0.2644, Time Left=8.74 [A
Epoch 1/3 - Training:  71%|▋| 2409/3393 [19:14<08:13,  1.99batch/s, Batch Loss=0.4388, Avg Loss=0.2645, Time Left=8.73 [A
Epoch 1/3 - Training:  71%|▋| 2410/3393 [19:14<08:07,  2.02batch/s, Batch Loss=0.4388, Avg Loss=0.2645, Time Left=8.73 [A
Epoch 1/3 - Training:  71%|▋| 2410/3393 [19:15<08:07,  2.02batch/s, Batch Loss=0.5497, Avg Loss=0.2646, Time Left=8.72 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  72%|▋| 2440/3393 [19:29<07:55,  2.00batch/s, Batch Loss=0.1549, Avg Loss=0.2634, Time Left=8.48 [A
Epoch 1/3 - Training:  72%|▋| 2440/3393 [19:30<07:55,  2.00batch/s, Batch Loss=0.0169, Avg Loss=0.2633, Time Left=8.48 [A
Epoch 1/3 - Training:  72%|▋| 2441/3393 [19:30<08:04,  1.97batch/s, Batch Loss=0.0169, Avg Loss=0.2633, Time Left=8.48 [A
Epoch 1/3 - Training:  72%|▋| 2441/3393 [19:30<08:04,  1.97batch/s, Batch Loss=0.2989, Avg Loss=0.2633, Time Left=8.47 [A
Epoch 1/3 - Training:  72%|▋| 2442/3393 [19:30<07:52,  2.01batch/s, Batch Loss=0.2989, Avg Loss=0.2633, Time Left=8.47 [A
Epoch 1/3 - Training:  72%|▋| 2442/3393 [19:30<07:52,  2.01batch/s, Batch Loss=0.1194, Avg Loss=0.2632, Time Left=8.46 [A
Epoch 1/3 - Training:  72%|▋| 2443/3393 [19:30<07:48,  2.03batch/s, Batch Loss=0.1194, Avg Loss=0.2632, Time Left=8.46 [A
Epoch 1/3 - Training:  72%|▋| 2443/3393 [19:31<07:48,  2.03batch/s, Batch Loss=0.0983, Avg Loss=0.2631, Time Left=8.45 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  73%|▋| 2473/3393 [19:45<07:34,  2.03batch/s, Batch Loss=0.1670, Avg Loss=0.2628, Time Left=8.21 [A
Epoch 1/3 - Training:  73%|▋| 2473/3393 [19:46<07:34,  2.03batch/s, Batch Loss=0.0319, Avg Loss=0.2627, Time Left=8.20 [A
Epoch 1/3 - Training:  73%|▋| 2474/3393 [19:46<07:26,  2.06batch/s, Batch Loss=0.0319, Avg Loss=0.2627, Time Left=8.20 [A
Epoch 1/3 - Training:  73%|▋| 2474/3393 [19:46<07:26,  2.06batch/s, Batch Loss=0.1052, Avg Loss=0.2626, Time Left=8.19 [A
Epoch 1/3 - Training:  73%|▋| 2475/3393 [19:46<07:25,  2.06batch/s, Batch Loss=0.1052, Avg Loss=0.2626, Time Left=8.19 [A
Epoch 1/3 - Training:  73%|▋| 2475/3393 [19:47<07:25,  2.06batch/s, Batch Loss=0.2846, Avg Loss=0.2626, Time Left=8.19 [A
Epoch 1/3 - Training:  73%|▋| 2476/3393 [19:47<07:20,  2.08batch/s, Batch Loss=0.2846, Avg Loss=0.2626, Time Left=8.19 [A
Epoch 1/3 - Training:  73%|▋| 2476/3393 [19:47<07:20,  2.08batch/s, Batch Loss=0.0382, Avg Loss=0.2625, Time Left=8.18 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  74%|▋| 2506/3393 [20:01<07:13,  2.05batch/s, Batch Loss=0.1196, Avg Loss=0.2614, Time Left=7.94 [A
Epoch 1/3 - Training:  74%|▋| 2506/3393 [20:02<07:13,  2.05batch/s, Batch Loss=0.3095, Avg Loss=0.2614, Time Left=7.93 [A
Epoch 1/3 - Training:  74%|▋| 2507/3393 [20:02<07:11,  2.05batch/s, Batch Loss=0.3095, Avg Loss=0.2614, Time Left=7.93 [A
Epoch 1/3 - Training:  74%|▋| 2507/3393 [20:02<07:11,  2.05batch/s, Batch Loss=0.1862, Avg Loss=0.2614, Time Left=7.92 [A
Epoch 1/3 - Training:  74%|▋| 2508/3393 [20:02<07:10,  2.06batch/s, Batch Loss=0.1862, Avg Loss=0.2614, Time Left=7.92 [A
Epoch 1/3 - Training:  74%|▋| 2508/3393 [20:03<07:10,  2.06batch/s, Batch Loss=0.0416, Avg Loss=0.2613, Time Left=7.91 [A
Epoch 1/3 - Training:  74%|▋| 2509/3393 [20:03<07:00,  2.10batch/s, Batch Loss=0.0416, Avg Loss=0.2613, Time Left=7.91 [A
Epoch 1/3 - Training:  74%|▋| 2509/3393 [20:03<07:00,  2.10batch/s, Batch Loss=0.2344, Avg Loss=0.2613, Time Left=7.90 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  75%|▋| 2539/3393 [20:17<06:54,  2.06batch/s, Batch Loss=0.2752, Avg Loss=0.2599, Time Left=7.66 [A
Epoch 1/3 - Training:  75%|▋| 2539/3393 [20:18<06:54,  2.06batch/s, Batch Loss=0.0347, Avg Loss=0.2599, Time Left=7.66 [A
Epoch 1/3 - Training:  75%|▋| 2540/3393 [20:18<06:50,  2.08batch/s, Batch Loss=0.0347, Avg Loss=0.2599, Time Left=7.66 [A
Epoch 1/3 - Training:  75%|▋| 2540/3393 [20:18<06:50,  2.08batch/s, Batch Loss=0.5744, Avg Loss=0.2600, Time Left=7.65 [A
Epoch 1/3 - Training:  75%|▋| 2541/3393 [20:18<06:51,  2.07batch/s, Batch Loss=0.5744, Avg Loss=0.2600, Time Left=7.65 [A
Epoch 1/3 - Training:  75%|▋| 2541/3393 [20:19<06:51,  2.07batch/s, Batch Loss=0.0227, Avg Loss=0.2599, Time Left=7.64 [A
Epoch 1/3 - Training:  75%|▋| 2542/3393 [20:19<06:49,  2.08batch/s, Batch Loss=0.0227, Avg Loss=0.2599, Time Left=7.64 [A
Epoch 1/3 - Training:  75%|▋| 2542/3393 [20:19<06:49,  2.08batch/s, Batch Loss=0.0364, Avg Loss=0.2598, Time Left=7.63 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  76%|▊| 2572/3393 [20:34<06:50,  2.00batch/s, Batch Loss=0.1570, Avg Loss=0.2587, Time Left=7.39 [A
Epoch 1/3 - Training:  76%|▊| 2572/3393 [20:34<06:50,  2.00batch/s, Batch Loss=0.2557, Avg Loss=0.2587, Time Left=7.38 [A
Epoch 1/3 - Training:  76%|▊| 2573/3393 [20:34<06:45,  2.02batch/s, Batch Loss=0.2557, Avg Loss=0.2587, Time Left=7.38 [A
Epoch 1/3 - Training:  76%|▊| 2573/3393 [20:35<06:45,  2.02batch/s, Batch Loss=0.2678, Avg Loss=0.2587, Time Left=7.38 [A
Epoch 1/3 - Training:  76%|▊| 2574/3393 [20:35<06:43,  2.03batch/s, Batch Loss=0.2678, Avg Loss=0.2587, Time Left=7.38 [A
Epoch 1/3 - Training:  76%|▊| 2574/3393 [20:35<06:43,  2.03batch/s, Batch Loss=0.2613, Avg Loss=0.2587, Time Left=7.37 [A
Epoch 1/3 - Training:  76%|▊| 2575/3393 [20:35<06:40,  2.04batch/s, Batch Loss=0.2613, Avg Loss=0.2587, Time Left=7.37 [A
Epoch 1/3 - Training:  76%|▊| 2575/3393 [20:36<06:40,  2.04batch/s, Batch Loss=0.0487, Avg Loss=0.2586, Time Left=7.36 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  77%|▊| 2605/3393 [20:50<06:21,  2.06batch/s, Batch Loss=0.1081, Avg Loss=0.2575, Time Left=7.12 [A
Epoch 1/3 - Training:  77%|▊| 2605/3393 [20:50<06:21,  2.06batch/s, Batch Loss=0.1395, Avg Loss=0.2575, Time Left=7.11 [A
Epoch 1/3 - Training:  77%|▊| 2606/3393 [20:50<06:21,  2.06batch/s, Batch Loss=0.1395, Avg Loss=0.2575, Time Left=7.11 [A
Epoch 1/3 - Training:  77%|▊| 2606/3393 [20:51<06:21,  2.06batch/s, Batch Loss=0.4027, Avg Loss=0.2575, Time Left=7.10 [A
Epoch 1/3 - Training:  77%|▊| 2607/3393 [20:51<06:31,  2.01batch/s, Batch Loss=0.4027, Avg Loss=0.2575, Time Left=7.10 [A
Epoch 1/3 - Training:  77%|▊| 2607/3393 [20:51<06:31,  2.01batch/s, Batch Loss=0.2362, Avg Loss=0.2575, Time Left=7.09 [A
Epoch 1/3 - Training:  77%|▊| 2608/3393 [20:51<06:28,  2.02batch/s, Batch Loss=0.2362, Avg Loss=0.2575, Time Left=7.09 [A
Epoch 1/3 - Training:  77%|▊| 2608/3393 [20:52<06:28,  2.02batch/s, Batch Loss=0.0777, Avg Loss=0.2574, Time Left=7.09 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  78%|▊| 2638/3393 [21:06<06:11,  2.03batch/s, Batch Loss=0.3153, Avg Loss=0.2560, Time Left=6.85 [A
Epoch 1/3 - Training:  78%|▊| 2638/3393 [21:07<06:11,  2.03batch/s, Batch Loss=0.3295, Avg Loss=0.2561, Time Left=6.84 [A
Epoch 1/3 - Training:  78%|▊| 2639/3393 [21:07<06:12,  2.03batch/s, Batch Loss=0.3295, Avg Loss=0.2561, Time Left=6.84 [A
Epoch 1/3 - Training:  78%|▊| 2639/3393 [21:07<06:12,  2.03batch/s, Batch Loss=0.0651, Avg Loss=0.2560, Time Left=6.83 [A
Epoch 1/3 - Training:  78%|▊| 2640/3393 [21:07<06:13,  2.02batch/s, Batch Loss=0.0651, Avg Loss=0.2560, Time Left=6.83 [A
Epoch 1/3 - Training:  78%|▊| 2640/3393 [21:08<06:13,  2.02batch/s, Batch Loss=0.2042, Avg Loss=0.2560, Time Left=6.82 [A
Epoch 1/3 - Training:  78%|▊| 2641/3393 [21:08<06:06,  2.05batch/s, Batch Loss=0.2042, Avg Loss=0.2560, Time Left=6.82 [A
Epoch 1/3 - Training:  78%|▊| 2641/3393 [21:08<06:06,  2.05batch/s, Batch Loss=0.0303, Avg Loss=0.2559, Time Left=6.81 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  79%|▊| 2671/3393 [21:22<05:49,  2.07batch/s, Batch Loss=0.0355, Avg Loss=0.2549, Time Left=6.57 [A
Epoch 1/3 - Training:  79%|▊| 2671/3393 [21:23<05:49,  2.07batch/s, Batch Loss=0.1199, Avg Loss=0.2549, Time Left=6.57 [A
Epoch 1/3 - Training:  79%|▊| 2672/3393 [21:23<05:49,  2.06batch/s, Batch Loss=0.1199, Avg Loss=0.2549, Time Left=6.57 [A
Epoch 1/3 - Training:  79%|▊| 2672/3393 [21:23<05:49,  2.06batch/s, Batch Loss=0.1624, Avg Loss=0.2549, Time Left=6.56 [A
Epoch 1/3 - Training:  79%|▊| 2673/3393 [21:23<05:48,  2.07batch/s, Batch Loss=0.1624, Avg Loss=0.2549, Time Left=6.56 [A
Epoch 1/3 - Training:  79%|▊| 2673/3393 [21:24<05:48,  2.07batch/s, Batch Loss=0.0445, Avg Loss=0.2548, Time Left=6.55 [A
Epoch 1/3 - Training:  79%|▊| 2674/3393 [21:24<06:00,  1.99batch/s, Batch Loss=0.0445, Avg Loss=0.2548, Time Left=6.55 [A
Epoch 1/3 - Training:  79%|▊| 2674/3393 [21:24<06:00,  1.99batch/s, Batch Loss=0.0647, Avg Loss=0.2547, Time Left=6.54 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  80%|▊| 2704/3393 [21:38<05:30,  2.09batch/s, Batch Loss=0.3035, Avg Loss=0.2537, Time Left=6.30 [A
Epoch 1/3 - Training:  80%|▊| 2704/3393 [21:39<05:30,  2.09batch/s, Batch Loss=0.2003, Avg Loss=0.2537, Time Left=6.29 [A
Epoch 1/3 - Training:  80%|▊| 2705/3393 [21:39<05:36,  2.04batch/s, Batch Loss=0.2003, Avg Loss=0.2537, Time Left=6.29 [A
Epoch 1/3 - Training:  80%|▊| 2705/3393 [21:39<05:36,  2.04batch/s, Batch Loss=0.0441, Avg Loss=0.2536, Time Left=6.28 [A
Epoch 1/3 - Training:  80%|▊| 2706/3393 [21:39<05:34,  2.05batch/s, Batch Loss=0.0441, Avg Loss=0.2536, Time Left=6.28 [A
Epoch 1/3 - Training:  80%|▊| 2706/3393 [21:40<05:34,  2.05batch/s, Batch Loss=0.2670, Avg Loss=0.2536, Time Left=6.28 [A
Epoch 1/3 - Training:  80%|▊| 2707/3393 [21:40<05:40,  2.02batch/s, Batch Loss=0.2670, Avg Loss=0.2536, Time Left=6.28 [A
Epoch 1/3 - Training:  80%|▊| 2707/3393 [21:40<05:40,  2.02batch/s, Batch Loss=0.0477, Avg Loss=0.2535, Time Left=6.27 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  81%|▊| 2737/3393 [21:55<05:21,  2.04batch/s, Batch Loss=0.2031, Avg Loss=0.2525, Time Left=6.03 [A
Epoch 1/3 - Training:  81%|▊| 2737/3393 [21:55<05:21,  2.04batch/s, Batch Loss=0.4890, Avg Loss=0.2526, Time Left=6.02 [A
Epoch 1/3 - Training:  81%|▊| 2738/3393 [21:55<05:26,  2.01batch/s, Batch Loss=0.4890, Avg Loss=0.2526, Time Left=6.02 [A
Epoch 1/3 - Training:  81%|▊| 2738/3393 [21:56<05:26,  2.01batch/s, Batch Loss=0.0321, Avg Loss=0.2525, Time Left=6.01 [A
Epoch 1/3 - Training:  81%|▊| 2739/3393 [21:56<05:23,  2.02batch/s, Batch Loss=0.0321, Avg Loss=0.2525, Time Left=6.01 [A
Epoch 1/3 - Training:  81%|▊| 2739/3393 [21:56<05:23,  2.02batch/s, Batch Loss=0.3603, Avg Loss=0.2525, Time Left=6.01 [A
Epoch 1/3 - Training:  81%|▊| 2740/3393 [21:56<05:29,  1.98batch/s, Batch Loss=0.3603, Avg Loss=0.2525, Time Left=6.01 [A
Epoch 1/3 - Training:  81%|▊| 2740/3393 [21:57<05:29,  1.98batch/s, Batch Loss=0.1993, Avg Loss=0.2525, Time Left=6.00 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  82%|▊| 2770/3393 [22:11<05:07,  2.02batch/s, Batch Loss=0.0265, Avg Loss=0.2514, Time Left=5.76 [A
Epoch 1/3 - Training:  82%|▊| 2770/3393 [22:12<05:07,  2.02batch/s, Batch Loss=0.0428, Avg Loss=0.2514, Time Left=5.75 [A
Epoch 1/3 - Training:  82%|▊| 2771/3393 [22:12<05:08,  2.02batch/s, Batch Loss=0.0428, Avg Loss=0.2514, Time Left=5.75 [A
Epoch 1/3 - Training:  82%|▊| 2771/3393 [22:12<05:08,  2.02batch/s, Batch Loss=0.1179, Avg Loss=0.2513, Time Left=5.74 [A
Epoch 1/3 - Training:  82%|▊| 2772/3393 [22:12<05:02,  2.05batch/s, Batch Loss=0.1179, Avg Loss=0.2513, Time Left=5.74 [A
Epoch 1/3 - Training:  82%|▊| 2772/3393 [22:13<05:02,  2.05batch/s, Batch Loss=0.1721, Avg Loss=0.2513, Time Left=5.73 [A
Epoch 1/3 - Training:  82%|▊| 2773/3393 [22:13<04:58,  2.08batch/s, Batch Loss=0.1721, Avg Loss=0.2513, Time Left=5.73 [A
Epoch 1/3 - Training:  82%|▊| 2773/3393 [22:13<04:58,  2.08batch/s, Batch Loss=0.1649, Avg Loss=0.2513, Time Left=5.73 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  83%|▊| 2803/3393 [22:28<04:44,  2.07batch/s, Batch Loss=0.0779, Avg Loss=0.2500, Time Left=5.49 [A
Epoch 1/3 - Training:  83%|▊| 2803/3393 [22:28<04:44,  2.07batch/s, Batch Loss=0.1493, Avg Loss=0.2500, Time Left=5.48 [A
Epoch 1/3 - Training:  83%|▊| 2804/3393 [22:28<04:41,  2.09batch/s, Batch Loss=0.1493, Avg Loss=0.2500, Time Left=5.48 [A
Epoch 1/3 - Training:  83%|▊| 2804/3393 [22:28<04:41,  2.09batch/s, Batch Loss=0.0603, Avg Loss=0.2499, Time Left=5.47 [A
Epoch 1/3 - Training:  83%|▊| 2805/3393 [22:28<04:41,  2.09batch/s, Batch Loss=0.0603, Avg Loss=0.2499, Time Left=5.47 [A
Epoch 1/3 - Training:  83%|▊| 2805/3393 [22:29<04:41,  2.09batch/s, Batch Loss=0.0823, Avg Loss=0.2498, Time Left=5.46 [A
Epoch 1/3 - Training:  83%|▊| 2806/3393 [22:29<04:44,  2.06batch/s, Batch Loss=0.0823, Avg Loss=0.2498, Time Left=5.46 [A
Epoch 1/3 - Training:  83%|▊| 2806/3393 [22:29<04:44,  2.06batch/s, Batch Loss=0.0454, Avg Loss=0.2498, Time Left=5.45 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  84%|▊| 2836/3393 [22:44<04:35,  2.02batch/s, Batch Loss=0.0499, Avg Loss=0.2489, Time Left=5.21 [A
Epoch 1/3 - Training:  84%|▊| 2836/3393 [22:44<04:35,  2.02batch/s, Batch Loss=0.1749, Avg Loss=0.2489, Time Left=5.21 [A
Epoch 1/3 - Training:  84%|▊| 2837/3393 [22:44<04:35,  2.02batch/s, Batch Loss=0.1749, Avg Loss=0.2489, Time Left=5.21 [A
Epoch 1/3 - Training:  84%|▊| 2837/3393 [22:45<04:35,  2.02batch/s, Batch Loss=0.1636, Avg Loss=0.2489, Time Left=5.20 [A
Epoch 1/3 - Training:  84%|▊| 2838/3393 [22:45<04:38,  1.99batch/s, Batch Loss=0.1636, Avg Loss=0.2489, Time Left=5.20 [A
Epoch 1/3 - Training:  84%|▊| 2838/3393 [22:45<04:38,  1.99batch/s, Batch Loss=0.2470, Avg Loss=0.2489, Time Left=5.19 [A
Epoch 1/3 - Training:  84%|▊| 2839/3393 [22:45<04:35,  2.01batch/s, Batch Loss=0.2470, Avg Loss=0.2489, Time Left=5.19 [A
Epoch 1/3 - Training:  84%|▊| 2839/3393 [22:46<04:35,  2.01batch/s, Batch Loss=0.1995, Avg Loss=0.2489, Time Left=5.18 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  85%|▊| 2869/3393 [23:00<04:25,  1.98batch/s, Batch Loss=0.0305, Avg Loss=0.2480, Time Left=4.94 [A
Epoch 1/3 - Training:  85%|▊| 2869/3393 [23:01<04:25,  1.98batch/s, Batch Loss=0.3452, Avg Loss=0.2481, Time Left=4.94 [A
Epoch 1/3 - Training:  85%|▊| 2870/3393 [23:01<04:26,  1.96batch/s, Batch Loss=0.3452, Avg Loss=0.2481, Time Left=4.94 [A
Epoch 1/3 - Training:  85%|▊| 2870/3393 [23:01<04:26,  1.96batch/s, Batch Loss=0.0886, Avg Loss=0.2480, Time Left=4.93 [A
Epoch 1/3 - Training:  85%|▊| 2871/3393 [23:01<04:24,  1.97batch/s, Batch Loss=0.0886, Avg Loss=0.2480, Time Left=4.93 [A
Epoch 1/3 - Training:  85%|▊| 2871/3393 [23:02<04:24,  1.97batch/s, Batch Loss=0.1535, Avg Loss=0.2480, Time Left=4.92 [A
Epoch 1/3 - Training:  85%|▊| 2872/3393 [23:02<04:20,  2.00batch/s, Batch Loss=0.1535, Avg Loss=0.2480, Time Left=4.92 [A
Epoch 1/3 - Training:  85%|▊| 2872/3393 [23:02<04:20,  2.00batch/s, Batch Loss=0.2750, Avg Loss=0.2480, Time Left=4.91 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  86%|▊| 2902/3393 [23:16<03:55,  2.08batch/s, Batch Loss=0.1377, Avg Loss=0.2468, Time Left=4.67 [A
Epoch 1/3 - Training:  86%|▊| 2902/3393 [23:17<03:55,  2.08batch/s, Batch Loss=0.1671, Avg Loss=0.2467, Time Left=4.66 [A
Epoch 1/3 - Training:  86%|▊| 2903/3393 [23:17<03:55,  2.08batch/s, Batch Loss=0.1671, Avg Loss=0.2467, Time Left=4.66 [A
Epoch 1/3 - Training:  86%|▊| 2903/3393 [23:17<03:55,  2.08batch/s, Batch Loss=0.2917, Avg Loss=0.2467, Time Left=4.65 [A
Epoch 1/3 - Training:  86%|▊| 2904/3393 [23:17<03:55,  2.08batch/s, Batch Loss=0.2917, Avg Loss=0.2467, Time Left=4.65 [A
Epoch 1/3 - Training:  86%|▊| 2904/3393 [23:18<03:55,  2.08batch/s, Batch Loss=0.1051, Avg Loss=0.2467, Time Left=4.65 [A
Epoch 1/3 - Training:  86%|▊| 2905/3393 [23:18<03:50,  2.12batch/s, Batch Loss=0.1051, Avg Loss=0.2467, Time Left=4.65 [A
Epoch 1/3 - Training:  86%|▊| 2905/3393 [23:18<03:50,  2.12batch/s, Batch Loss=0.0175, Avg Loss=0.2466, Time Left=4.64 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  87%|▊| 2935/3393 [23:33<03:48,  2.01batch/s, Batch Loss=0.0187, Avg Loss=0.2456, Time Left=4.40 [A
Epoch 1/3 - Training:  87%|▊| 2935/3393 [23:33<03:48,  2.01batch/s, Batch Loss=0.0312, Avg Loss=0.2455, Time Left=4.39 [A
Epoch 1/3 - Training:  87%|▊| 2936/3393 [23:33<03:43,  2.04batch/s, Batch Loss=0.0312, Avg Loss=0.2455, Time Left=4.39 [A
Epoch 1/3 - Training:  87%|▊| 2936/3393 [23:33<03:43,  2.04batch/s, Batch Loss=0.0801, Avg Loss=0.2454, Time Left=4.38 [A
Epoch 1/3 - Training:  87%|▊| 2937/3393 [23:33<03:46,  2.01batch/s, Batch Loss=0.0801, Avg Loss=0.2454, Time Left=4.38 [A
Epoch 1/3 - Training:  87%|▊| 2937/3393 [23:34<03:46,  2.01batch/s, Batch Loss=0.0560, Avg Loss=0.2454, Time Left=4.37 [A
Epoch 1/3 - Training:  87%|▊| 2938/3393 [23:34<03:46,  2.01batch/s, Batch Loss=0.0560, Avg Loss=0.2454, Time Left=4.37 [A
Epoch 1/3 - Training:  87%|▊| 2938/3393 [23:34<03:46,  2.01batch/s, Batch Loss=0.0291, Avg Loss=0.2453, Time Left=4.37 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  87%|▊| 2968/3393 [23:49<03:32,  2.00batch/s, Batch Loss=0.0232, Avg Loss=0.2442, Time Left=4.13 [A
Epoch 1/3 - Training:  87%|▊| 2968/3393 [23:49<03:32,  2.00batch/s, Batch Loss=0.2771, Avg Loss=0.2442, Time Left=4.12 [A
Epoch 1/3 - Training:  88%|▉| 2969/3393 [23:49<03:27,  2.04batch/s, Batch Loss=0.2771, Avg Loss=0.2442, Time Left=4.12 [A
Epoch 1/3 - Training:  88%|▉| 2969/3393 [23:50<03:27,  2.04batch/s, Batch Loss=0.3018, Avg Loss=0.2442, Time Left=4.11 [A
Epoch 1/3 - Training:  88%|▉| 2970/3393 [23:50<03:32,  1.99batch/s, Batch Loss=0.3018, Avg Loss=0.2442, Time Left=4.11 [A
Epoch 1/3 - Training:  88%|▉| 2970/3393 [23:50<03:32,  1.99batch/s, Batch Loss=0.0108, Avg Loss=0.2441, Time Left=4.10 [A
Epoch 1/3 - Training:  88%|▉| 2971/3393 [23:50<03:32,  1.99batch/s, Batch Loss=0.0108, Avg Loss=0.2441, Time Left=4.10 [A
Epoch 1/3 - Training:  88%|▉| 2971/3393 [23:51<03:32,  1.99batch/s, Batch Loss=0.0768, Avg Loss=0.2441, Time Left=4.09 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  88%|▉| 3001/3393 [24:05<03:11,  2.05batch/s, Batch Loss=0.1132, Avg Loss=0.2433, Time Left=3.86 [A
Epoch 1/3 - Training:  88%|▉| 3001/3393 [24:06<03:11,  2.05batch/s, Batch Loss=0.1285, Avg Loss=0.2433, Time Left=3.85 [A
Epoch 1/3 - Training:  88%|▉| 3002/3393 [24:06<03:08,  2.07batch/s, Batch Loss=0.1285, Avg Loss=0.2433, Time Left=3.85 [A
Epoch 1/3 - Training:  88%|▉| 3002/3393 [24:06<03:08,  2.07batch/s, Batch Loss=0.0455, Avg Loss=0.2432, Time Left=3.84 [A
Epoch 1/3 - Training:  89%|▉| 3003/3393 [24:06<03:06,  2.10batch/s, Batch Loss=0.0455, Avg Loss=0.2432, Time Left=3.84 [A
Epoch 1/3 - Training:  89%|▉| 3003/3393 [24:07<03:06,  2.10batch/s, Batch Loss=0.0979, Avg Loss=0.2432, Time Left=3.83 [A
Epoch 1/3 - Training:  89%|▉| 3004/3393 [24:07<03:09,  2.05batch/s, Batch Loss=0.0979, Avg Loss=0.2432, Time Left=3.83 [A
Epoch 1/3 - Training:  89%|▉| 3004/3393 [24:07<03:09,  2.05batch/s, Batch Loss=0.1088, Avg Loss=0.2431, Time Left=3.82 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  89%|▉| 3034/3393 [24:21<02:54,  2.05batch/s, Batch Loss=0.1371, Avg Loss=0.2426, Time Left=3.58 [A
Epoch 1/3 - Training:  89%|▉| 3034/3393 [24:22<02:54,  2.05batch/s, Batch Loss=0.1943, Avg Loss=0.2425, Time Left=3.57 [A
Epoch 1/3 - Training:  89%|▉| 3035/3393 [24:22<02:55,  2.04batch/s, Batch Loss=0.1943, Avg Loss=0.2425, Time Left=3.57 [A
Epoch 1/3 - Training:  89%|▉| 3035/3393 [24:22<02:55,  2.04batch/s, Batch Loss=0.1378, Avg Loss=0.2425, Time Left=3.57 [A
Epoch 1/3 - Training:  89%|▉| 3036/3393 [24:22<02:52,  2.07batch/s, Batch Loss=0.1378, Avg Loss=0.2425, Time Left=3.57 [A
Epoch 1/3 - Training:  89%|▉| 3036/3393 [24:23<02:52,  2.07batch/s, Batch Loss=0.2013, Avg Loss=0.2425, Time Left=3.56 [A
Epoch 1/3 - Training:  90%|▉| 3037/3393 [24:23<02:50,  2.09batch/s, Batch Loss=0.2013, Avg Loss=0.2425, Time Left=3.56 [A
Epoch 1/3 - Training:  90%|▉| 3037/3393 [24:23<02:50,  2.09batch/s, Batch Loss=0.0816, Avg Loss=0.2424, Time Left=3.55 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  90%|▉| 3067/3393 [24:38<02:40,  2.03batch/s, Batch Loss=0.2017, Avg Loss=0.2414, Time Left=3.31 [A
Epoch 1/3 - Training:  90%|▉| 3067/3393 [24:38<02:40,  2.03batch/s, Batch Loss=0.0783, Avg Loss=0.2414, Time Left=3.30 [A
Epoch 1/3 - Training:  90%|▉| 3068/3393 [24:38<02:39,  2.04batch/s, Batch Loss=0.0783, Avg Loss=0.2414, Time Left=3.30 [A
Epoch 1/3 - Training:  90%|▉| 3068/3393 [24:39<02:39,  2.04batch/s, Batch Loss=0.1752, Avg Loss=0.2414, Time Left=3.29 [A
Epoch 1/3 - Training:  90%|▉| 3069/3393 [24:39<02:40,  2.01batch/s, Batch Loss=0.1752, Avg Loss=0.2414, Time Left=3.29 [A
Epoch 1/3 - Training:  90%|▉| 3069/3393 [24:39<02:40,  2.01batch/s, Batch Loss=0.2195, Avg Loss=0.2414, Time Left=3.29 [A
Epoch 1/3 - Training:  90%|▉| 3070/3393 [24:39<02:39,  2.03batch/s, Batch Loss=0.2195, Avg Loss=0.2414, Time Left=3.29 [A
Epoch 1/3 - Training:  90%|▉| 3070/3393 [24:40<02:39,  2.03batch/s, Batch Loss=0.1315, Avg Loss=0.2413, Time Left=3.28 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  91%|▉| 3100/3393 [24:54<02:22,  2.05batch/s, Batch Loss=0.4099, Avg Loss=0.2404, Time Left=3.04 [A
Epoch 1/3 - Training:  91%|▉| 3100/3393 [24:54<02:22,  2.05batch/s, Batch Loss=0.3419, Avg Loss=0.2404, Time Left=3.03 [A
Epoch 1/3 - Training:  91%|▉| 3101/3393 [24:54<02:22,  2.06batch/s, Batch Loss=0.3419, Avg Loss=0.2404, Time Left=3.03 [A
Epoch 1/3 - Training:  91%|▉| 3101/3393 [24:55<02:22,  2.06batch/s, Batch Loss=0.1081, Avg Loss=0.2403, Time Left=3.02 [A
Epoch 1/3 - Training:  91%|▉| 3102/3393 [24:55<02:21,  2.06batch/s, Batch Loss=0.1081, Avg Loss=0.2403, Time Left=3.02 [A
Epoch 1/3 - Training:  91%|▉| 3102/3393 [24:55<02:21,  2.06batch/s, Batch Loss=0.0385, Avg Loss=0.2403, Time Left=3.01 [A
Epoch 1/3 - Training:  91%|▉| 3103/3393 [24:55<02:23,  2.02batch/s, Batch Loss=0.0385, Avg Loss=0.2403, Time Left=3.01 [A
Epoch 1/3 - Training:  91%|▉| 3103/3393 [24:56<02:23,  2.02batch/s, Batch Loss=0.0128, Avg Loss=0.2402, Time Left=3.01 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  92%|▉| 3133/3393 [25:10<02:09,  2.01batch/s, Batch Loss=0.3364, Avg Loss=0.2392, Time Left=2.77 [A
Epoch 1/3 - Training:  92%|▉| 3133/3393 [25:10<02:09,  2.01batch/s, Batch Loss=0.2244, Avg Loss=0.2392, Time Left=2.76 [A
Epoch 1/3 - Training:  92%|▉| 3134/3393 [25:10<02:07,  2.03batch/s, Batch Loss=0.2244, Avg Loss=0.2392, Time Left=2.76 [A
Epoch 1/3 - Training:  92%|▉| 3134/3393 [25:11<02:07,  2.03batch/s, Batch Loss=0.1500, Avg Loss=0.2392, Time Left=2.75 [A
Epoch 1/3 - Training:  92%|▉| 3135/3393 [25:11<02:05,  2.06batch/s, Batch Loss=0.1500, Avg Loss=0.2392, Time Left=2.75 [A
Epoch 1/3 - Training:  92%|▉| 3135/3393 [25:11<02:05,  2.06batch/s, Batch Loss=0.1577, Avg Loss=0.2392, Time Left=2.74 [A
Epoch 1/3 - Training:  92%|▉| 3136/3393 [25:11<02:04,  2.06batch/s, Batch Loss=0.1577, Avg Loss=0.2392, Time Left=2.74 [A
Epoch 1/3 - Training:  92%|▉| 3136/3393 [25:12<02:04,  2.06batch/s, Batch Loss=0.2105, Avg Loss=0.2392, Time Left=2.73 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  93%|▉| 3166/3393 [25:26<01:49,  2.07batch/s, Batch Loss=0.1924, Avg Loss=0.2383, Time Left=2.49 [A
Epoch 1/3 - Training:  93%|▉| 3166/3393 [25:26<01:49,  2.07batch/s, Batch Loss=0.0477, Avg Loss=0.2382, Time Left=2.49 [A
Epoch 1/3 - Training:  93%|▉| 3167/3393 [25:26<01:47,  2.09batch/s, Batch Loss=0.0477, Avg Loss=0.2382, Time Left=2.49 [A
Epoch 1/3 - Training:  93%|▉| 3167/3393 [25:27<01:47,  2.09batch/s, Batch Loss=0.0591, Avg Loss=0.2382, Time Left=2.48 [A
Epoch 1/3 - Training:  93%|▉| 3168/3393 [25:27<01:48,  2.06batch/s, Batch Loss=0.0591, Avg Loss=0.2382, Time Left=2.48 [A
Epoch 1/3 - Training:  93%|▉| 3168/3393 [25:27<01:48,  2.06batch/s, Batch Loss=0.0862, Avg Loss=0.2381, Time Left=2.47 [A
Epoch 1/3 - Training:  93%|▉| 3169/3393 [25:27<01:48,  2.06batch/s, Batch Loss=0.0862, Avg Loss=0.2381, Time Left=2.47 [A
Epoch 1/3 - Training:  93%|▉| 3169/3393 [25:28<01:48,  2.06batch/s, Batch Loss=0.2611, Avg Loss=0.2381, Time Left=2.46 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  94%|▉| 3199/3393 [25:42<01:35,  2.04batch/s, Batch Loss=0.0183, Avg Loss=0.2371, Time Left=2.22 [A
Epoch 1/3 - Training:  94%|▉| 3199/3393 [25:43<01:35,  2.04batch/s, Batch Loss=0.0543, Avg Loss=0.2370, Time Left=2.21 [A
Epoch 1/3 - Training:  94%|▉| 3200/3393 [25:43<01:32,  2.09batch/s, Batch Loss=0.0543, Avg Loss=0.2370, Time Left=2.21 [A
Epoch 1/3 - Training:  94%|▉| 3200/3393 [25:43<01:32,  2.09batch/s, Batch Loss=0.0521, Avg Loss=0.2370, Time Left=2.21 [A
Epoch 1/3 - Training:  94%|▉| 3201/3393 [25:43<01:32,  2.08batch/s, Batch Loss=0.0521, Avg Loss=0.2370, Time Left=2.21 [A
Epoch 1/3 - Training:  94%|▉| 3201/3393 [25:44<01:32,  2.08batch/s, Batch Loss=0.0213, Avg Loss=0.2369, Time Left=2.20 [A
Epoch 1/3 - Training:  94%|▉| 3202/3393 [25:44<01:32,  2.06batch/s, Batch Loss=0.0213, Avg Loss=0.2369, Time Left=2.20 [A
Epoch 1/3 - Training:  94%|▉| 3202/3393 [25:44<01:32,  2.06batch/s, Batch Loss=0.2157, Avg Loss=0.2369, Time Left=2.19 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  95%|▉| 3232/3393 [25:58<01:18,  2.06batch/s, Batch Loss=0.0796, Avg Loss=0.2360, Time Left=1.95 [A
Epoch 1/3 - Training:  95%|▉| 3232/3393 [25:59<01:18,  2.06batch/s, Batch Loss=0.0895, Avg Loss=0.2360, Time Left=1.94 [A
Epoch 1/3 - Training:  95%|▉| 3233/3393 [25:59<01:18,  2.03batch/s, Batch Loss=0.0895, Avg Loss=0.2360, Time Left=1.94 [A
Epoch 1/3 - Training:  95%|▉| 3233/3393 [25:59<01:18,  2.03batch/s, Batch Loss=0.1216, Avg Loss=0.2359, Time Left=1.93 [A
Epoch 1/3 - Training:  95%|▉| 3234/3393 [25:59<01:18,  2.03batch/s, Batch Loss=0.1216, Avg Loss=0.2359, Time Left=1.93 [A
Epoch 1/3 - Training:  95%|▉| 3234/3393 [26:00<01:18,  2.03batch/s, Batch Loss=0.0382, Avg Loss=0.2359, Time Left=1.93 [A
Epoch 1/3 - Training:  95%|▉| 3235/3393 [26:00<01:17,  2.04batch/s, Batch Loss=0.0382, Avg Loss=0.2359, Time Left=1.93 [A
Epoch 1/3 - Training:  95%|▉| 3235/3393 [26:00<01:17,  2.04batch/s, Batch Loss=0.0422, Avg Loss=0.2358, Time Left=1.92 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  96%|▉| 3265/3393 [26:14<01:02,  2.05batch/s, Batch Loss=0.0953, Avg Loss=0.2348, Time Left=1.68 [A
Epoch 1/3 - Training:  96%|▉| 3265/3393 [26:15<01:02,  2.05batch/s, Batch Loss=0.0474, Avg Loss=0.2348, Time Left=1.67 [A
Epoch 1/3 - Training:  96%|▉| 3266/3393 [26:15<01:02,  2.05batch/s, Batch Loss=0.0474, Avg Loss=0.2348, Time Left=1.67 [A
Epoch 1/3 - Training:  96%|▉| 3266/3393 [26:15<01:02,  2.05batch/s, Batch Loss=0.2093, Avg Loss=0.2348, Time Left=1.66 [A
Epoch 1/3 - Training:  96%|▉| 3267/3393 [26:15<01:01,  2.04batch/s, Batch Loss=0.2093, Avg Loss=0.2348, Time Left=1.66 [A
Epoch 1/3 - Training:  96%|▉| 3267/3393 [26:16<01:01,  2.04batch/s, Batch Loss=0.0400, Avg Loss=0.2347, Time Left=1.65 [A
Epoch 1/3 - Training:  96%|▉| 3268/3393 [26:16<01:00,  2.05batch/s, Batch Loss=0.0400, Avg Loss=0.2347, Time Left=1.65 [A
Epoch 1/3 - Training:  96%|▉| 3268/3393 [26:16<01:00,  2.05batch/s, Batch Loss=0.1377, Avg Loss=0.2347, Time Left=1.65 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  97%|▉| 3298/3393 [26:30<00:46,  2.02batch/s, Batch Loss=0.0967, Avg Loss=0.2337, Time Left=1.41 [A
Epoch 1/3 - Training:  97%|▉| 3298/3393 [26:31<00:46,  2.02batch/s, Batch Loss=0.0460, Avg Loss=0.2337, Time Left=1.40 [A
Epoch 1/3 - Training:  97%|▉| 3299/3393 [26:31<00:46,  2.02batch/s, Batch Loss=0.0460, Avg Loss=0.2337, Time Left=1.40 [A
Epoch 1/3 - Training:  97%|▉| 3299/3393 [26:31<00:46,  2.02batch/s, Batch Loss=0.1356, Avg Loss=0.2336, Time Left=1.39 [A
Epoch 1/3 - Training:  97%|▉| 3300/3393 [26:31<00:45,  2.03batch/s, Batch Loss=0.1356, Avg Loss=0.2336, Time Left=1.39 [A
Epoch 1/3 - Training:  97%|▉| 3300/3393 [26:32<00:45,  2.03batch/s, Batch Loss=0.3295, Avg Loss=0.2337, Time Left=1.38 [A
Epoch 1/3 - Training:  97%|▉| 3301/3393 [26:32<00:44,  2.06batch/s, Batch Loss=0.3295, Avg Loss=0.2337, Time Left=1.38 [A
Epoch 1/3 - Training:  97%|▉| 3301/3393 [26:32<00:44,  2.06batch/s, Batch Loss=0.0343, Avg Loss=0.2336, Time Left=1.37 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  98%|▉| 3331/3393 [26:47<00:30,  2.01batch/s, Batch Loss=0.3372, Avg Loss=0.2329, Time Left=1.14 [A
Epoch 1/3 - Training:  98%|▉| 3331/3393 [26:47<00:30,  2.01batch/s, Batch Loss=0.0135, Avg Loss=0.2329, Time Left=1.13 [A
Epoch 1/3 - Training:  98%|▉| 3332/3393 [26:47<00:30,  2.00batch/s, Batch Loss=0.0135, Avg Loss=0.2329, Time Left=1.13 [A
Epoch 1/3 - Training:  98%|▉| 3332/3393 [26:48<00:30,  2.00batch/s, Batch Loss=0.2798, Avg Loss=0.2329, Time Left=1.12 [A
Epoch 1/3 - Training:  98%|▉| 3333/3393 [26:48<00:29,  2.04batch/s, Batch Loss=0.2798, Avg Loss=0.2329, Time Left=1.12 [A
Epoch 1/3 - Training:  98%|▉| 3333/3393 [26:48<00:29,  2.04batch/s, Batch Loss=0.0346, Avg Loss=0.2328, Time Left=1.11 [A
Epoch 1/3 - Training:  98%|▉| 3334/3393 [26:48<00:28,  2.05batch/s, Batch Loss=0.0346, Avg Loss=0.2328, Time Left=1.11 [A
Epoch 1/3 - Training:  98%|▉| 3334/3393 [26:48<00:28,  2.05batch/s, Batch Loss=0.0351, Avg Loss=0.2328, Time Left=1.10 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training:  99%|▉| 3364/3393 [27:03<00:14,  2.06batch/s, Batch Loss=0.0192, Avg Loss=0.2318, Time Left=0.86 [A
Epoch 1/3 - Training:  99%|▉| 3364/3393 [27:03<00:14,  2.06batch/s, Batch Loss=0.0258, Avg Loss=0.2317, Time Left=0.86 [A
Epoch 1/3 - Training:  99%|▉| 3365/3393 [27:03<00:13,  2.04batch/s, Batch Loss=0.0258, Avg Loss=0.2317, Time Left=0.86 [A
Epoch 1/3 - Training:  99%|▉| 3365/3393 [27:04<00:13,  2.04batch/s, Batch Loss=0.0195, Avg Loss=0.2316, Time Left=0.85 [A
Epoch 1/3 - Training:  99%|▉| 3366/3393 [27:04<00:13,  2.07batch/s, Batch Loss=0.0195, Avg Loss=0.2316, Time Left=0.85 [A
Epoch 1/3 - Training:  99%|▉| 3366/3393 [27:04<00:13,  2.07batch/s, Batch Loss=0.2318, Avg Loss=0.2316, Time Left=0.84 [A
Epoch 1/3 - Training:  99%|▉| 3367/3393 [27:04<00:12,  2.09batch/s, Batch Loss=0.2318, Avg Loss=0.2316, Time Left=0.84 [A
Epoch 1/3 - Training:  99%|▉| 3367/3393 [27:05<00:12,  2.09batch/s, Batch Loss=0.0219, Avg Loss=0.2316, Time Left=0.83 [A
Epoch 1/3 - Trai

Epoch 1/3 - Training: 3397batch [27:19,  2.10batch/s, Batch Loss=0.1394, Avg Loss=0.2304, Time Left=0.58 min][A
Epoch 1/3 - Training: 3398batch [27:19,  2.09batch/s, Batch Loss=0.1394, Avg Loss=0.2304, Time Left=0.58 min][A
Epoch 1/3 - Training: 3398batch [27:20,  2.09batch/s, Batch Loss=0.2626, Avg Loss=0.2304, Time Left=0.58 min][A
Epoch 1/3 - Training: 3399batch [27:20,  2.04batch/s, Batch Loss=0.2626, Avg Loss=0.2304, Time Left=0.58 min][A
Epoch 1/3 - Training: 3399batch [27:20,  2.04batch/s, Batch Loss=0.3020, Avg Loss=0.2304, Time Left=0.57 min][A
Epoch 1/3 - Training: 3400batch [27:20,  2.05batch/s, Batch Loss=0.3020, Avg Loss=0.2304, Time Left=0.57 min][A
Epoch 1/3 - Training: 3400batch [27:21,  2.05batch/s, Batch Loss=0.0360, Avg Loss=0.2303, Time Left=0.56 min][A
Epoch 1/3 - Training: 3401batch [27:21,  2.04batch/s, Batch Loss=0.0360, Avg Loss=0.2303, Time Left=0.56 min][A
Epoch 1/3 - Training: 3401batch [27:21,  2.04batch/s, Batch Loss=0.1552, Avg Loss=0.2303, Time L

Epoch 1/3 - Training: 3433batch [27:37,  2.07batch/s, Batch Loss=0.1761, Avg Loss=0.2302, Time Left=0.29 min][A
Epoch 1/3 - Training: 3434batch [27:37,  2.07batch/s, Batch Loss=0.1761, Avg Loss=0.2302, Time Left=0.29 min][A
Epoch 1/3 - Training: 3434batch [27:37,  2.07batch/s, Batch Loss=0.2078, Avg Loss=0.2302, Time Left=0.28 min][A
Epoch 1/3 - Training: 3435batch [27:37,  2.07batch/s, Batch Loss=0.2078, Avg Loss=0.2302, Time Left=0.28 min][A
Epoch 1/3 - Training: 3435batch [27:38,  2.07batch/s, Batch Loss=0.0949, Avg Loss=0.2302, Time Left=0.27 min][A
Epoch 1/3 - Training: 3436batch [27:38,  2.07batch/s, Batch Loss=0.0949, Avg Loss=0.2302, Time Left=0.27 min][A
Epoch 1/3 - Training: 3436batch [27:38,  2.07batch/s, Batch Loss=0.0974, Avg Loss=0.2301, Time Left=0.26 min][A
Epoch 1/3 - Training: 3437batch [27:38,  2.03batch/s, Batch Loss=0.0974, Avg Loss=0.2301, Time Left=0.26 min][A
Epoch 1/3 - Training: 3437batch [27:39,  2.03batch/s, Batch Loss=0.1278, Avg Loss=0.2301, Time L

                                                                                                             [A
Epoch 1/3 - Evaluating:   0%|                                                               | 0/849 [00:00<?, ?batch/s][A
                                                                                                                       [A


Epoch 1/3 Results:
Train Loss: 0.2294
Validation Loss: 0.1239, Accuracy: 0.9559

Starting Epoch 2/3



  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

Epoch 2/3 - Training:   0%|       | 0/3393 [00:00<?, ?batch/s, Batch Loss=0.0764, Avg Loss=0.0764, Time Left=29.75 min][A
Epoch 2/3 - Training:   3%| | 100/3393 [00:00<00:17, 190.04batch/s, Batch Loss=0.0764, Avg Loss=0.0764, Time Left=29.75[A
Epoch 2/3 - Training:   3%| | 100/3393 [00:13<00:17, 190.04batch/s, Batch Loss=0.0764, Avg Loss=0.0764, Time Left=29.75[A
Epoch 2/3 - Training:   3%| | 100/3393 [00:13<00:17, 190.04batch/s, Batch Loss=0.0254, Avg Loss=0.1215, Time Left=26.59[A
Epoch 2/3 - Training:   3%| | 101/3393 [00:13<10:30,  5.22batch/s, Batch Loss=0.0254, Avg Loss=0.1215, Time Left=26.59 [A
Epoch 2/3 - Training:   3%| | 101/3393 [00:14<10:30,  5.22batch/s, Batch Loss=0.1509, Avg Loss=0.1225, Time Left=26.65 [A
Epoch 2/3 - Training:   3%| | 102/3393 [00:14<10:51,  5.05batch/s, Batch Loss=0.1509, Avg Loss=0.1225, Time Left=26.65 [A
Epoch 2/3 - Training:   3%| | 102/3393 [00:14<10:51,  5.05b

Epoch 2/3 - Training:   4%| | 130/3393 [00:27<27:01,  2.01batch/s, Batch Loss=0.0492, Avg Loss=0.1307, Time Left=26.81 [A
Epoch 2/3 - Training:   4%| | 130/3393 [00:28<27:01,  2.01batch/s, Batch Loss=0.0400, Avg Loss=0.1291, Time Left=26.82 [A
Epoch 2/3 - Training:   4%| | 131/3393 [00:28<27:02,  2.01batch/s, Batch Loss=0.0400, Avg Loss=0.1291, Time Left=26.82 [A
Epoch 2/3 - Training:   4%| | 131/3393 [00:28<27:02,  2.01batch/s, Batch Loss=0.0785, Avg Loss=0.1283, Time Left=26.84 [A
Epoch 2/3 - Training:   4%| | 132/3393 [00:28<27:20,  1.99batch/s, Batch Loss=0.0785, Avg Loss=0.1283, Time Left=26.84 [A
Epoch 2/3 - Training:   4%| | 132/3393 [00:29<27:20,  1.99batch/s, Batch Loss=0.0329, Avg Loss=0.1267, Time Left=26.84 [A
Epoch 2/3 - Training:   4%| | 133/3393 [00:29<27:04,  2.01batch/s, Batch Loss=0.0329, Avg Loss=0.1267, Time Left=26.84 [A
Epoch 2/3 - Training:   4%| | 133/3393 [00:29<27:04,  2.01batch/s, Batch Loss=0.1514, Avg Loss=0.1271, Time Left=26.83 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:   5%| | 163/3393 [00:44<25:42,  2.09batch/s, Batch Loss=0.2567, Avg Loss=0.1329, Time Left=26.65 [A
Epoch 2/3 - Training:   5%| | 163/3393 [00:44<25:42,  2.09batch/s, Batch Loss=0.0617, Avg Loss=0.1321, Time Left=26.64 [A
Epoch 2/3 - Training:   5%| | 164/3393 [00:44<25:47,  2.09batch/s, Batch Loss=0.0617, Avg Loss=0.1321, Time Left=26.64 [A
Epoch 2/3 - Training:   5%| | 164/3393 [00:45<25:47,  2.09batch/s, Batch Loss=0.3221, Avg Loss=0.1341, Time Left=26.63 [A
Epoch 2/3 - Training:   5%| | 165/3393 [00:45<25:47,  2.09batch/s, Batch Loss=0.3221, Avg Loss=0.1341, Time Left=26.63 [A
Epoch 2/3 - Training:   5%| | 165/3393 [00:45<25:47,  2.09batch/s, Batch Loss=0.1231, Avg Loss=0.1340, Time Left=26.62 [A
Epoch 2/3 - Training:   5%| | 166/3393 [00:45<25:54,  2.08batch/s, Batch Loss=0.1231, Avg Loss=0.1340, Time Left=26.62 [A
Epoch 2/3 - Training:   5%| | 166/3393 [00:46<25:54,  2.08batch/s, Batch Loss=0.0394, Avg Loss=0.1330, Time Left=26.62 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:   6%| | 196/3393 [01:00<26:45,  1.99batch/s, Batch Loss=0.2643, Avg Loss=0.1311, Time Left=26.50 [A
Epoch 2/3 - Training:   6%| | 196/3393 [01:00<26:45,  1.99batch/s, Batch Loss=0.0823, Avg Loss=0.1307, Time Left=26.49 [A
Epoch 2/3 - Training:   6%| | 197/3393 [01:00<26:14,  2.03batch/s, Batch Loss=0.0823, Avg Loss=0.1307, Time Left=26.49 [A
Epoch 2/3 - Training:   6%| | 197/3393 [01:01<26:14,  2.03batch/s, Batch Loss=0.0777, Avg Loss=0.1303, Time Left=26.48 [A
Epoch 2/3 - Training:   6%| | 198/3393 [01:01<26:05,  2.04batch/s, Batch Loss=0.0777, Avg Loss=0.1303, Time Left=26.48 [A
Epoch 2/3 - Training:   6%| | 198/3393 [01:01<26:05,  2.04batch/s, Batch Loss=0.0695, Avg Loss=0.1298, Time Left=26.47 [A
Epoch 2/3 - Training:   6%| | 199/3393 [01:01<25:58,  2.05batch/s, Batch Loss=0.0695, Avg Loss=0.1298, Time Left=26.47 [A
Epoch 2/3 - Training:   6%| | 199/3393 [01:02<25:58,  2.05batch/s, Batch Loss=0.0713, Avg Loss=0.1294, Time Left=26.47 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:   7%| | 229/3393 [01:16<25:44,  2.05batch/s, Batch Loss=0.3139, Avg Loss=0.1218, Time Left=26.24 [A
Epoch 2/3 - Training:   7%| | 229/3393 [01:16<25:44,  2.05batch/s, Batch Loss=0.0849, Avg Loss=0.1216, Time Left=26.25 [A
Epoch 2/3 - Training:   7%| | 230/3393 [01:16<26:09,  2.02batch/s, Batch Loss=0.0849, Avg Loss=0.1216, Time Left=26.25 [A
Epoch 2/3 - Training:   7%| | 230/3393 [01:17<26:09,  2.02batch/s, Batch Loss=0.5479, Avg Loss=0.1243, Time Left=26.24 [A
Epoch 2/3 - Training:   7%| | 231/3393 [01:17<26:14,  2.01batch/s, Batch Loss=0.5479, Avg Loss=0.1243, Time Left=26.24 [A
Epoch 2/3 - Training:   7%| | 231/3393 [01:17<26:14,  2.01batch/s, Batch Loss=0.0229, Avg Loss=0.1236, Time Left=26.25 [A
Epoch 2/3 - Training:   7%| | 232/3393 [01:17<26:43,  1.97batch/s, Batch Loss=0.0229, Avg Loss=0.1236, Time Left=26.25 [A
Epoch 2/3 - Training:   7%| | 232/3393 [01:18<26:43,  1.97batch/s, Batch Loss=0.3341, Avg Loss=0.1249, Time Left=26.24 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:   8%| | 262/3393 [01:32<25:07,  2.08batch/s, Batch Loss=0.1452, Avg Loss=0.1316, Time Left=26.02 [A
Epoch 2/3 - Training:   8%| | 262/3393 [01:33<25:07,  2.08batch/s, Batch Loss=0.0247, Avg Loss=0.1310, Time Left=26.01 [A
Epoch 2/3 - Training:   8%| | 263/3393 [01:33<24:55,  2.09batch/s, Batch Loss=0.0247, Avg Loss=0.1310, Time Left=26.01 [A
Epoch 2/3 - Training:   8%| | 263/3393 [01:33<24:55,  2.09batch/s, Batch Loss=0.0643, Avg Loss=0.1307, Time Left=26.00 [A
Epoch 2/3 - Training:   8%| | 264/3393 [01:33<24:45,  2.11batch/s, Batch Loss=0.0643, Avg Loss=0.1307, Time Left=26.00 [A
Epoch 2/3 - Training:   8%| | 264/3393 [01:34<24:45,  2.11batch/s, Batch Loss=0.0796, Avg Loss=0.1304, Time Left=25.99 [A
Epoch 2/3 - Training:   8%| | 265/3393 [01:34<25:16,  2.06batch/s, Batch Loss=0.0796, Avg Loss=0.1304, Time Left=25.99 [A
Epoch 2/3 - Training:   8%| | 265/3393 [01:34<25:16,  2.06batch/s, Batch Loss=0.0550, Avg Loss=0.1300, Time Left=25.98 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:   9%| | 295/3393 [01:48<25:36,  2.02batch/s, Batch Loss=0.0946, Avg Loss=0.1336, Time Left=25.79 [A
Epoch 2/3 - Training:   9%| | 295/3393 [01:49<25:36,  2.02batch/s, Batch Loss=0.1066, Avg Loss=0.1335, Time Left=25.78 [A
Epoch 2/3 - Training:   9%| | 296/3393 [01:49<25:25,  2.03batch/s, Batch Loss=0.1066, Avg Loss=0.1335, Time Left=25.78 [A
Epoch 2/3 - Training:   9%| | 296/3393 [01:49<25:25,  2.03batch/s, Batch Loss=0.0281, Avg Loss=0.1330, Time Left=25.77 [A
Epoch 2/3 - Training:   9%| | 297/3393 [01:49<25:17,  2.04batch/s, Batch Loss=0.0281, Avg Loss=0.1330, Time Left=25.77 [A
Epoch 2/3 - Training:   9%| | 297/3393 [01:50<25:17,  2.04batch/s, Batch Loss=0.0249, Avg Loss=0.1325, Time Left=25.77 [A
Epoch 2/3 - Training:   9%| | 298/3393 [01:50<25:24,  2.03batch/s, Batch Loss=0.0249, Avg Loss=0.1325, Time Left=25.77 [A
Epoch 2/3 - Training:   9%| | 298/3393 [01:50<25:24,  2.03batch/s, Batch Loss=0.0149, Avg Loss=0.1320, Time Left=25.75 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  10%| | 328/3393 [02:04<24:34,  2.08batch/s, Batch Loss=0.2157, Avg Loss=0.1415, Time Left=25.51 [A
Epoch 2/3 - Training:  10%| | 328/3393 [02:05<24:34,  2.08batch/s, Batch Loss=0.1438, Avg Loss=0.1415, Time Left=25.50 [A
Epoch 2/3 - Training:  10%| | 329/3393 [02:05<24:28,  2.09batch/s, Batch Loss=0.1438, Avg Loss=0.1415, Time Left=25.50 [A
Epoch 2/3 - Training:  10%| | 329/3393 [02:05<24:28,  2.09batch/s, Batch Loss=0.1516, Avg Loss=0.1415, Time Left=25.49 [A
Epoch 2/3 - Training:  10%| | 330/3393 [02:05<24:35,  2.08batch/s, Batch Loss=0.1516, Avg Loss=0.1415, Time Left=25.49 [A
Epoch 2/3 - Training:  10%| | 330/3393 [02:06<24:35,  2.08batch/s, Batch Loss=0.0155, Avg Loss=0.1411, Time Left=25.48 [A
Epoch 2/3 - Training:  10%| | 331/3393 [02:06<24:16,  2.10batch/s, Batch Loss=0.0155, Avg Loss=0.1411, Time Left=25.48 [A
Epoch 2/3 - Training:  10%| | 331/3393 [02:06<24:16,  2.10batch/s, Batch Loss=0.0348, Avg Loss=0.1406, Time Left=25.47 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  11%| | 361/3393 [02:20<24:27,  2.07batch/s, Batch Loss=0.1491, Avg Loss=0.1391, Time Left=25.21 [A
Epoch 2/3 - Training:  11%| | 361/3393 [02:21<24:27,  2.07batch/s, Batch Loss=0.0191, Avg Loss=0.1387, Time Left=25.20 [A
Epoch 2/3 - Training:  11%| | 362/3393 [02:21<24:26,  2.07batch/s, Batch Loss=0.0191, Avg Loss=0.1387, Time Left=25.20 [A
Epoch 2/3 - Training:  11%| | 362/3393 [02:21<24:26,  2.07batch/s, Batch Loss=0.0437, Avg Loss=0.1384, Time Left=25.20 [A
Epoch 2/3 - Training:  11%| | 363/3393 [02:21<24:53,  2.03batch/s, Batch Loss=0.0437, Avg Loss=0.1384, Time Left=25.20 [A
Epoch 2/3 - Training:  11%| | 363/3393 [02:22<24:53,  2.03batch/s, Batch Loss=0.0462, Avg Loss=0.1381, Time Left=25.19 [A
Epoch 2/3 - Training:  11%| | 364/3393 [02:22<24:45,  2.04batch/s, Batch Loss=0.0462, Avg Loss=0.1381, Time Left=25.19 [A
Epoch 2/3 - Training:  11%| | 364/3393 [02:22<24:45,  2.04batch/s, Batch Loss=0.2810, Avg Loss=0.1386, Time Left=25.18 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  12%| | 394/3393 [02:37<24:18,  2.06batch/s, Batch Loss=0.1064, Avg Loss=0.1357, Time Left=24.97 [A
Epoch 2/3 - Training:  12%| | 394/3393 [02:37<24:18,  2.06batch/s, Batch Loss=0.1604, Avg Loss=0.1358, Time Left=24.96 [A
Epoch 2/3 - Training:  12%| | 395/3393 [02:37<24:57,  2.00batch/s, Batch Loss=0.1604, Avg Loss=0.1358, Time Left=24.96 [A
Epoch 2/3 - Training:  12%| | 395/3393 [02:38<24:57,  2.00batch/s, Batch Loss=0.0643, Avg Loss=0.1356, Time Left=24.96 [A
Epoch 2/3 - Training:  12%| | 396/3393 [02:38<24:44,  2.02batch/s, Batch Loss=0.0643, Avg Loss=0.1356, Time Left=24.96 [A
Epoch 2/3 - Training:  12%| | 396/3393 [02:38<24:44,  2.02batch/s, Batch Loss=0.0040, Avg Loss=0.1352, Time Left=24.95 [A
Epoch 2/3 - Training:  12%| | 397/3393 [02:38<25:02,  1.99batch/s, Batch Loss=0.0040, Avg Loss=0.1352, Time Left=24.95 [A
Epoch 2/3 - Training:  12%| | 397/3393 [02:39<25:02,  1.99batch/s, Batch Loss=0.0700, Avg Loss=0.1350, Time Left=24.95 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  13%|▏| 427/3393 [02:53<24:21,  2.03batch/s, Batch Loss=0.1573, Avg Loss=0.1316, Time Left=24.77 [A
Epoch 2/3 - Training:  13%|▏| 427/3393 [02:54<24:21,  2.03batch/s, Batch Loss=0.1422, Avg Loss=0.1316, Time Left=24.76 [A
Epoch 2/3 - Training:  13%|▏| 428/3393 [02:54<24:28,  2.02batch/s, Batch Loss=0.1422, Avg Loss=0.1316, Time Left=24.76 [A
Epoch 2/3 - Training:  13%|▏| 428/3393 [02:54<24:28,  2.02batch/s, Batch Loss=0.2007, Avg Loss=0.1318, Time Left=24.75 [A
Epoch 2/3 - Training:  13%|▏| 429/3393 [02:54<24:10,  2.04batch/s, Batch Loss=0.2007, Avg Loss=0.1318, Time Left=24.75 [A
Epoch 2/3 - Training:  13%|▏| 429/3393 [02:55<24:10,  2.04batch/s, Batch Loss=0.1053, Avg Loss=0.1317, Time Left=24.74 [A
Epoch 2/3 - Training:  13%|▏| 430/3393 [02:55<24:11,  2.04batch/s, Batch Loss=0.1053, Avg Loss=0.1317, Time Left=24.74 [A
Epoch 2/3 - Training:  13%|▏| 430/3393 [02:55<24:11,  2.04batch/s, Batch Loss=0.1066, Avg Loss=0.1316, Time Left=24.74 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  14%|▏| 460/3393 [03:09<24:12,  2.02batch/s, Batch Loss=0.0281, Avg Loss=0.1298, Time Left=24.51 [A
Epoch 2/3 - Training:  14%|▏| 460/3393 [03:10<24:12,  2.02batch/s, Batch Loss=0.0097, Avg Loss=0.1294, Time Left=24.50 [A
Epoch 2/3 - Training:  14%|▏| 461/3393 [03:10<24:04,  2.03batch/s, Batch Loss=0.0097, Avg Loss=0.1294, Time Left=24.50 [A
Epoch 2/3 - Training:  14%|▏| 461/3393 [03:10<24:04,  2.03batch/s, Batch Loss=0.0565, Avg Loss=0.1293, Time Left=24.49 [A
Epoch 2/3 - Training:  14%|▏| 462/3393 [03:10<23:38,  2.07batch/s, Batch Loss=0.0565, Avg Loss=0.1293, Time Left=24.49 [A
Epoch 2/3 - Training:  14%|▏| 462/3393 [03:11<23:38,  2.07batch/s, Batch Loss=0.0383, Avg Loss=0.1290, Time Left=24.49 [A
Epoch 2/3 - Training:  14%|▏| 463/3393 [03:11<23:50,  2.05batch/s, Batch Loss=0.0383, Avg Loss=0.1290, Time Left=24.49 [A
Epoch 2/3 - Training:  14%|▏| 463/3393 [03:11<23:50,  2.05batch/s, Batch Loss=0.0548, Avg Loss=0.1288, Time Left=24.48 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  15%|▏| 493/3393 [03:26<24:06,  2.00batch/s, Batch Loss=0.0169, Avg Loss=0.1267, Time Left=24.25 [A
Epoch 2/3 - Training:  15%|▏| 493/3393 [03:26<24:06,  2.00batch/s, Batch Loss=0.0780, Avg Loss=0.1266, Time Left=24.24 [A
Epoch 2/3 - Training:  15%|▏| 494/3393 [03:26<23:39,  2.04batch/s, Batch Loss=0.0780, Avg Loss=0.1266, Time Left=24.24 [A
Epoch 2/3 - Training:  15%|▏| 494/3393 [03:27<23:39,  2.04batch/s, Batch Loss=0.0717, Avg Loss=0.1264, Time Left=24.23 [A
Epoch 2/3 - Training:  15%|▏| 495/3393 [03:27<23:33,  2.05batch/s, Batch Loss=0.0717, Avg Loss=0.1264, Time Left=24.23 [A
Epoch 2/3 - Training:  15%|▏| 495/3393 [03:27<23:33,  2.05batch/s, Batch Loss=0.0041, Avg Loss=0.1262, Time Left=24.22 [A
Epoch 2/3 - Training:  15%|▏| 496/3393 [03:27<23:55,  2.02batch/s, Batch Loss=0.0041, Avg Loss=0.1262, Time Left=24.22 [A
Epoch 2/3 - Training:  15%|▏| 496/3393 [03:28<23:55,  2.02batch/s, Batch Loss=0.1337, Avg Loss=0.1262, Time Left=24.22 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  16%|▏| 526/3393 [03:42<24:03,  1.99batch/s, Batch Loss=0.1898, Avg Loss=0.1273, Time Left=23.99 [A
Epoch 2/3 - Training:  16%|▏| 526/3393 [03:42<24:03,  1.99batch/s, Batch Loss=0.0121, Avg Loss=0.1271, Time Left=23.98 [A
Epoch 2/3 - Training:  16%|▏| 527/3393 [03:42<23:48,  2.01batch/s, Batch Loss=0.0121, Avg Loss=0.1271, Time Left=23.98 [A
Epoch 2/3 - Training:  16%|▏| 527/3393 [03:43<23:48,  2.01batch/s, Batch Loss=0.0390, Avg Loss=0.1269, Time Left=23.98 [A
Epoch 2/3 - Training:  16%|▏| 528/3393 [03:43<24:01,  1.99batch/s, Batch Loss=0.0390, Avg Loss=0.1269, Time Left=23.98 [A
Epoch 2/3 - Training:  16%|▏| 528/3393 [03:43<24:01,  1.99batch/s, Batch Loss=0.0306, Avg Loss=0.1267, Time Left=23.97 [A
Epoch 2/3 - Training:  16%|▏| 529/3393 [03:43<23:33,  2.03batch/s, Batch Loss=0.0306, Avg Loss=0.1267, Time Left=23.97 [A
Epoch 2/3 - Training:  16%|▏| 529/3393 [03:44<23:33,  2.03batch/s, Batch Loss=0.0155, Avg Loss=0.1264, Time Left=23.96 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  16%|▏| 559/3393 [03:58<23:14,  2.03batch/s, Batch Loss=0.0845, Avg Loss=0.1271, Time Left=23.75 [A
Epoch 2/3 - Training:  16%|▏| 559/3393 [03:59<23:14,  2.03batch/s, Batch Loss=0.0771, Avg Loss=0.1270, Time Left=23.74 [A
Epoch 2/3 - Training:  17%|▏| 560/3393 [03:59<23:33,  2.00batch/s, Batch Loss=0.0771, Avg Loss=0.1270, Time Left=23.74 [A
Epoch 2/3 - Training:  17%|▏| 560/3393 [03:59<23:33,  2.00batch/s, Batch Loss=0.0278, Avg Loss=0.1268, Time Left=23.73 [A
Epoch 2/3 - Training:  17%|▏| 561/3393 [03:59<23:08,  2.04batch/s, Batch Loss=0.0278, Avg Loss=0.1268, Time Left=23.73 [A
Epoch 2/3 - Training:  17%|▏| 561/3393 [04:00<23:08,  2.04batch/s, Batch Loss=0.0905, Avg Loss=0.1267, Time Left=23.72 [A
Epoch 2/3 - Training:  17%|▏| 562/3393 [04:00<23:02,  2.05batch/s, Batch Loss=0.0905, Avg Loss=0.1267, Time Left=23.72 [A
Epoch 2/3 - Training:  17%|▏| 562/3393 [04:00<23:02,  2.05batch/s, Batch Loss=0.1177, Avg Loss=0.1267, Time Left=23.71 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  17%|▏| 592/3393 [04:14<22:13,  2.10batch/s, Batch Loss=0.2306, Avg Loss=0.1280, Time Left=23.46 [A
Epoch 2/3 - Training:  17%|▏| 592/3393 [04:15<22:13,  2.10batch/s, Batch Loss=0.0237, Avg Loss=0.1278, Time Left=23.46 [A
Epoch 2/3 - Training:  17%|▏| 593/3393 [04:15<22:35,  2.07batch/s, Batch Loss=0.0237, Avg Loss=0.1278, Time Left=23.46 [A
Epoch 2/3 - Training:  17%|▏| 593/3393 [04:15<22:35,  2.07batch/s, Batch Loss=0.0842, Avg Loss=0.1278, Time Left=23.45 [A
Epoch 2/3 - Training:  18%|▏| 594/3393 [04:15<22:31,  2.07batch/s, Batch Loss=0.0842, Avg Loss=0.1278, Time Left=23.45 [A
Epoch 2/3 - Training:  18%|▏| 594/3393 [04:16<22:31,  2.07batch/s, Batch Loss=0.1625, Avg Loss=0.1278, Time Left=23.44 [A
Epoch 2/3 - Training:  18%|▏| 595/3393 [04:16<22:18,  2.09batch/s, Batch Loss=0.1625, Avg Loss=0.1278, Time Left=23.44 [A
Epoch 2/3 - Training:  18%|▏| 595/3393 [04:16<22:18,  2.09batch/s, Batch Loss=0.0982, Avg Loss=0.1278, Time Left=23.43 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  18%|▏| 625/3393 [04:31<22:43,  2.03batch/s, Batch Loss=0.2175, Avg Loss=0.1279, Time Left=23.19 [A
Epoch 2/3 - Training:  18%|▏| 625/3393 [04:31<22:43,  2.03batch/s, Batch Loss=0.0762, Avg Loss=0.1278, Time Left=23.19 [A
Epoch 2/3 - Training:  18%|▏| 626/3393 [04:31<22:22,  2.06batch/s, Batch Loss=0.0762, Avg Loss=0.1278, Time Left=23.19 [A
Epoch 2/3 - Training:  18%|▏| 626/3393 [04:31<22:22,  2.06batch/s, Batch Loss=0.0428, Avg Loss=0.1276, Time Left=23.17 [A
Epoch 2/3 - Training:  18%|▏| 627/3393 [04:31<22:07,  2.08batch/s, Batch Loss=0.0428, Avg Loss=0.1276, Time Left=23.17 [A
Epoch 2/3 - Training:  18%|▏| 627/3393 [04:32<22:07,  2.08batch/s, Batch Loss=0.1532, Avg Loss=0.1277, Time Left=23.17 [A
Epoch 2/3 - Training:  19%|▏| 628/3393 [04:32<21:55,  2.10batch/s, Batch Loss=0.1532, Avg Loss=0.1277, Time Left=23.17 [A
Epoch 2/3 - Training:  19%|▏| 628/3393 [04:32<21:55,  2.10batch/s, Batch Loss=0.0866, Avg Loss=0.1276, Time Left=23.16 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  19%|▏| 658/3393 [04:47<23:02,  1.98batch/s, Batch Loss=0.1465, Avg Loss=0.1261, Time Left=22.94 [A
Epoch 2/3 - Training:  19%|▏| 658/3393 [04:47<23:02,  1.98batch/s, Batch Loss=0.0446, Avg Loss=0.1259, Time Left=22.93 [A
Epoch 2/3 - Training:  19%|▏| 659/3393 [04:47<22:34,  2.02batch/s, Batch Loss=0.0446, Avg Loss=0.1259, Time Left=22.93 [A
Epoch 2/3 - Training:  19%|▏| 659/3393 [04:48<22:34,  2.02batch/s, Batch Loss=0.0663, Avg Loss=0.1258, Time Left=22.92 [A
Epoch 2/3 - Training:  19%|▏| 660/3393 [04:48<22:34,  2.02batch/s, Batch Loss=0.0663, Avg Loss=0.1258, Time Left=22.92 [A
Epoch 2/3 - Training:  19%|▏| 660/3393 [04:48<22:34,  2.02batch/s, Batch Loss=0.2727, Avg Loss=0.1261, Time Left=22.91 [A
Epoch 2/3 - Training:  19%|▏| 661/3393 [04:48<22:26,  2.03batch/s, Batch Loss=0.2727, Avg Loss=0.1261, Time Left=22.91 [A
Epoch 2/3 - Training:  19%|▏| 661/3393 [04:49<22:26,  2.03batch/s, Batch Loss=0.3131, Avg Loss=0.1264, Time Left=22.91 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  20%|▏| 691/3393 [05:03<22:20,  2.02batch/s, Batch Loss=0.6176, Avg Loss=0.1261, Time Left=22.68 [A
Epoch 2/3 - Training:  20%|▏| 691/3393 [05:04<22:20,  2.02batch/s, Batch Loss=0.5837, Avg Loss=0.1269, Time Left=22.67 [A
Epoch 2/3 - Training:  20%|▏| 692/3393 [05:04<22:10,  2.03batch/s, Batch Loss=0.5837, Avg Loss=0.1269, Time Left=22.67 [A
Epoch 2/3 - Training:  20%|▏| 692/3393 [05:04<22:10,  2.03batch/s, Batch Loss=0.0693, Avg Loss=0.1268, Time Left=22.66 [A
Epoch 2/3 - Training:  20%|▏| 693/3393 [05:04<22:02,  2.04batch/s, Batch Loss=0.0693, Avg Loss=0.1268, Time Left=22.66 [A
Epoch 2/3 - Training:  20%|▏| 693/3393 [05:05<22:02,  2.04batch/s, Batch Loss=0.0489, Avg Loss=0.1267, Time Left=22.65 [A
Epoch 2/3 - Training:  20%|▏| 694/3393 [05:05<22:08,  2.03batch/s, Batch Loss=0.0489, Avg Loss=0.1267, Time Left=22.65 [A
Epoch 2/3 - Training:  20%|▏| 694/3393 [05:05<22:08,  2.03batch/s, Batch Loss=0.1008, Avg Loss=0.1266, Time Left=22.64 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  21%|▏| 724/3393 [05:19<22:27,  1.98batch/s, Batch Loss=0.0230, Avg Loss=0.1271, Time Left=22.42 [A
Epoch 2/3 - Training:  21%|▏| 724/3393 [05:20<22:27,  1.98batch/s, Batch Loss=0.0387, Avg Loss=0.1270, Time Left=22.41 [A
Epoch 2/3 - Training:  21%|▏| 725/3393 [05:20<22:23,  1.99batch/s, Batch Loss=0.0387, Avg Loss=0.1270, Time Left=22.41 [A
Epoch 2/3 - Training:  21%|▏| 725/3393 [05:20<22:23,  1.99batch/s, Batch Loss=0.2680, Avg Loss=0.1272, Time Left=22.40 [A
Epoch 2/3 - Training:  21%|▏| 726/3393 [05:20<22:11,  2.00batch/s, Batch Loss=0.2680, Avg Loss=0.1272, Time Left=22.40 [A
Epoch 2/3 - Training:  21%|▏| 726/3393 [05:21<22:11,  2.00batch/s, Batch Loss=0.0100, Avg Loss=0.1270, Time Left=22.40 [A
Epoch 2/3 - Training:  21%|▏| 727/3393 [05:21<21:54,  2.03batch/s, Batch Loss=0.0100, Avg Loss=0.1270, Time Left=22.40 [A
Epoch 2/3 - Training:  21%|▏| 727/3393 [05:21<21:54,  2.03batch/s, Batch Loss=0.0194, Avg Loss=0.1268, Time Left=22.39 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  22%|▏| 757/3393 [05:36<21:18,  2.06batch/s, Batch Loss=0.0815, Avg Loss=0.1273, Time Left=22.15 [A
Epoch 2/3 - Training:  22%|▏| 757/3393 [05:36<21:18,  2.06batch/s, Batch Loss=0.1896, Avg Loss=0.1274, Time Left=22.14 [A
Epoch 2/3 - Training:  22%|▏| 758/3393 [05:36<21:08,  2.08batch/s, Batch Loss=0.1896, Avg Loss=0.1274, Time Left=22.14 [A
Epoch 2/3 - Training:  22%|▏| 758/3393 [05:37<21:08,  2.08batch/s, Batch Loss=0.1648, Avg Loss=0.1274, Time Left=22.14 [A
Epoch 2/3 - Training:  22%|▏| 759/3393 [05:37<21:42,  2.02batch/s, Batch Loss=0.1648, Avg Loss=0.1274, Time Left=22.14 [A
Epoch 2/3 - Training:  22%|▏| 759/3393 [05:37<21:42,  2.02batch/s, Batch Loss=0.0753, Avg Loss=0.1274, Time Left=22.13 [A
Epoch 2/3 - Training:  22%|▏| 760/3393 [05:37<21:35,  2.03batch/s, Batch Loss=0.0753, Avg Loss=0.1274, Time Left=22.13 [A
Epoch 2/3 - Training:  22%|▏| 760/3393 [05:38<21:35,  2.03batch/s, Batch Loss=0.0863, Avg Loss=0.1273, Time Left=22.12 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  23%|▏| 790/3393 [05:52<21:43,  2.00batch/s, Batch Loss=0.0109, Avg Loss=0.1274, Time Left=21.89 [A
Epoch 2/3 - Training:  23%|▏| 790/3393 [05:52<21:43,  2.00batch/s, Batch Loss=0.0589, Avg Loss=0.1273, Time Left=21.88 [A
Epoch 2/3 - Training:  23%|▏| 791/3393 [05:52<21:41,  2.00batch/s, Batch Loss=0.0589, Avg Loss=0.1273, Time Left=21.88 [A
Epoch 2/3 - Training:  23%|▏| 791/3393 [05:53<21:41,  2.00batch/s, Batch Loss=0.0904, Avg Loss=0.1273, Time Left=21.87 [A
Epoch 2/3 - Training:  23%|▏| 792/3393 [05:53<21:29,  2.02batch/s, Batch Loss=0.0904, Avg Loss=0.1273, Time Left=21.87 [A
Epoch 2/3 - Training:  23%|▏| 792/3393 [05:54<21:29,  2.02batch/s, Batch Loss=0.1199, Avg Loss=0.1273, Time Left=21.87 [A
Epoch 2/3 - Training:  23%|▏| 793/3393 [05:54<22:03,  1.96batch/s, Batch Loss=0.1199, Avg Loss=0.1273, Time Left=21.87 [A
Epoch 2/3 - Training:  23%|▏| 793/3393 [05:54<22:03,  1.96batch/s, Batch Loss=0.2663, Avg Loss=0.1274, Time Left=21.86 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  24%|▏| 823/3393 [06:08<21:03,  2.03batch/s, Batch Loss=0.0865, Avg Loss=0.1273, Time Left=21.62 [A
Epoch 2/3 - Training:  24%|▏| 823/3393 [06:09<21:03,  2.03batch/s, Batch Loss=0.0630, Avg Loss=0.1272, Time Left=21.61 [A
Epoch 2/3 - Training:  24%|▏| 824/3393 [06:09<20:46,  2.06batch/s, Batch Loss=0.0630, Avg Loss=0.1272, Time Left=21.61 [A
Epoch 2/3 - Training:  24%|▏| 824/3393 [06:09<20:46,  2.06batch/s, Batch Loss=0.3429, Avg Loss=0.1275, Time Left=21.61 [A
Epoch 2/3 - Training:  24%|▏| 825/3393 [06:09<21:22,  2.00batch/s, Batch Loss=0.3429, Avg Loss=0.1275, Time Left=21.61 [A
Epoch 2/3 - Training:  24%|▏| 825/3393 [06:10<21:22,  2.00batch/s, Batch Loss=0.0277, Avg Loss=0.1274, Time Left=21.60 [A
Epoch 2/3 - Training:  24%|▏| 826/3393 [06:10<21:07,  2.02batch/s, Batch Loss=0.0277, Avg Loss=0.1274, Time Left=21.60 [A
Epoch 2/3 - Training:  24%|▏| 826/3393 [06:10<21:07,  2.02batch/s, Batch Loss=0.0854, Avg Loss=0.1273, Time Left=21.59 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  25%|▎| 856/3393 [06:25<20:45,  2.04batch/s, Batch Loss=0.0982, Avg Loss=0.1271, Time Left=21.35 [A
Epoch 2/3 - Training:  25%|▎| 856/3393 [06:25<20:45,  2.04batch/s, Batch Loss=0.1698, Avg Loss=0.1271, Time Left=21.35 [A
Epoch 2/3 - Training:  25%|▎| 857/3393 [06:25<20:51,  2.03batch/s, Batch Loss=0.1698, Avg Loss=0.1271, Time Left=21.35 [A
Epoch 2/3 - Training:  25%|▎| 857/3393 [06:25<20:51,  2.03batch/s, Batch Loss=0.3366, Avg Loss=0.1274, Time Left=21.34 [A
Epoch 2/3 - Training:  25%|▎| 858/3393 [06:25<20:33,  2.06batch/s, Batch Loss=0.3366, Avg Loss=0.1274, Time Left=21.34 [A
Epoch 2/3 - Training:  25%|▎| 858/3393 [06:26<20:33,  2.06batch/s, Batch Loss=0.0843, Avg Loss=0.1273, Time Left=21.33 [A
Epoch 2/3 - Training:  25%|▎| 859/3393 [06:26<20:40,  2.04batch/s, Batch Loss=0.0843, Avg Loss=0.1273, Time Left=21.33 [A
Epoch 2/3 - Training:  25%|▎| 859/3393 [06:26<20:40,  2.04batch/s, Batch Loss=0.0901, Avg Loss=0.1273, Time Left=21.32 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  26%|▎| 889/3393 [06:41<21:00,  1.99batch/s, Batch Loss=0.1376, Avg Loss=0.1266, Time Left=21.08 [A
Epoch 2/3 - Training:  26%|▎| 889/3393 [06:41<21:00,  1.99batch/s, Batch Loss=0.2143, Avg Loss=0.1267, Time Left=21.07 [A
Epoch 2/3 - Training:  26%|▎| 890/3393 [06:41<20:46,  2.01batch/s, Batch Loss=0.2143, Avg Loss=0.1267, Time Left=21.07 [A
Epoch 2/3 - Training:  26%|▎| 890/3393 [06:42<20:46,  2.01batch/s, Batch Loss=0.0732, Avg Loss=0.1267, Time Left=21.06 [A
Epoch 2/3 - Training:  26%|▎| 891/3393 [06:42<20:52,  2.00batch/s, Batch Loss=0.0732, Avg Loss=0.1267, Time Left=21.06 [A
Epoch 2/3 - Training:  26%|▎| 891/3393 [06:42<20:52,  2.00batch/s, Batch Loss=0.1588, Avg Loss=0.1267, Time Left=21.05 [A
Epoch 2/3 - Training:  26%|▎| 892/3393 [06:42<20:11,  2.06batch/s, Batch Loss=0.1588, Avg Loss=0.1267, Time Left=21.05 [A
Epoch 2/3 - Training:  26%|▎| 892/3393 [06:43<20:11,  2.06batch/s, Batch Loss=0.3260, Avg Loss=0.1270, Time Left=21.04 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  27%|▎| 922/3393 [06:57<19:58,  2.06batch/s, Batch Loss=0.3592, Avg Loss=0.1265, Time Left=20.80 [A
Epoch 2/3 - Training:  27%|▎| 922/3393 [06:57<19:58,  2.06batch/s, Batch Loss=0.2426, Avg Loss=0.1266, Time Left=20.79 [A
Epoch 2/3 - Training:  27%|▎| 923/3393 [06:57<19:58,  2.06batch/s, Batch Loss=0.2426, Avg Loss=0.1266, Time Left=20.79 [A
Epoch 2/3 - Training:  27%|▎| 923/3393 [06:58<19:58,  2.06batch/s, Batch Loss=0.0844, Avg Loss=0.1266, Time Left=20.79 [A
Epoch 2/3 - Training:  27%|▎| 924/3393 [06:58<19:43,  2.09batch/s, Batch Loss=0.0844, Avg Loss=0.1266, Time Left=20.79 [A
Epoch 2/3 - Training:  27%|▎| 924/3393 [06:58<19:43,  2.09batch/s, Batch Loss=0.0508, Avg Loss=0.1265, Time Left=20.78 [A
Epoch 2/3 - Training:  27%|▎| 925/3393 [06:58<19:46,  2.08batch/s, Batch Loss=0.0508, Avg Loss=0.1265, Time Left=20.78 [A
Epoch 2/3 - Training:  27%|▎| 925/3393 [06:59<19:46,  2.08batch/s, Batch Loss=0.0140, Avg Loss=0.1264, Time Left=20.77 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  28%|▎| 955/3393 [07:13<19:25,  2.09batch/s, Batch Loss=0.1272, Avg Loss=0.1249, Time Left=20.53 [A
Epoch 2/3 - Training:  28%|▎| 955/3393 [07:13<19:25,  2.09batch/s, Batch Loss=0.0199, Avg Loss=0.1248, Time Left=20.52 [A
Epoch 2/3 - Training:  28%|▎| 956/3393 [07:13<19:40,  2.06batch/s, Batch Loss=0.0199, Avg Loss=0.1248, Time Left=20.52 [A
Epoch 2/3 - Training:  28%|▎| 956/3393 [07:14<19:40,  2.06batch/s, Batch Loss=0.5064, Avg Loss=0.1252, Time Left=20.51 [A
Epoch 2/3 - Training:  28%|▎| 957/3393 [07:14<19:16,  2.11batch/s, Batch Loss=0.5064, Avg Loss=0.1252, Time Left=20.51 [A
Epoch 2/3 - Training:  28%|▎| 957/3393 [07:14<19:16,  2.11batch/s, Batch Loss=0.0657, Avg Loss=0.1252, Time Left=20.51 [A
Epoch 2/3 - Training:  28%|▎| 958/3393 [07:14<19:21,  2.10batch/s, Batch Loss=0.0657, Avg Loss=0.1252, Time Left=20.51 [A
Epoch 2/3 - Training:  28%|▎| 958/3393 [07:15<19:21,  2.10batch/s, Batch Loss=0.0040, Avg Loss=0.1250, Time Left=20.50 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  29%|▎| 988/3393 [07:29<19:56,  2.01batch/s, Batch Loss=0.0673, Avg Loss=0.1249, Time Left=20.26 [A
Epoch 2/3 - Training:  29%|▎| 988/3393 [07:30<19:56,  2.01batch/s, Batch Loss=0.0256, Avg Loss=0.1248, Time Left=20.26 [A
Epoch 2/3 - Training:  29%|▎| 989/3393 [07:30<19:47,  2.02batch/s, Batch Loss=0.0256, Avg Loss=0.1248, Time Left=20.26 [A
Epoch 2/3 - Training:  29%|▎| 989/3393 [07:30<19:47,  2.02batch/s, Batch Loss=0.0533, Avg Loss=0.1247, Time Left=20.25 [A
Epoch 2/3 - Training:  29%|▎| 990/3393 [07:30<20:02,  2.00batch/s, Batch Loss=0.0533, Avg Loss=0.1247, Time Left=20.25 [A
Epoch 2/3 - Training:  29%|▎| 990/3393 [07:31<20:02,  2.00batch/s, Batch Loss=0.1838, Avg Loss=0.1248, Time Left=20.24 [A
Epoch 2/3 - Training:  29%|▎| 991/3393 [07:31<19:41,  2.03batch/s, Batch Loss=0.1838, Avg Loss=0.1248, Time Left=20.24 [A
Epoch 2/3 - Training:  29%|▎| 991/3393 [07:31<19:41,  2.03batch/s, Batch Loss=0.0327, Avg Loss=0.1247, Time Left=20.23 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  30%|▎| 1021/3393 [07:45<18:57,  2.08batch/s, Batch Loss=0.0237, Avg Loss=0.1245, Time Left=19.99[A
Epoch 2/3 - Training:  30%|▎| 1021/3393 [07:46<18:57,  2.08batch/s, Batch Loss=0.1004, Avg Loss=0.1245, Time Left=19.98[A
Epoch 2/3 - Training:  30%|▎| 1022/3393 [07:46<19:33,  2.02batch/s, Batch Loss=0.1004, Avg Loss=0.1245, Time Left=19.98[A
Epoch 2/3 - Training:  30%|▎| 1022/3393 [07:46<19:33,  2.02batch/s, Batch Loss=0.0943, Avg Loss=0.1244, Time Left=19.98[A
Epoch 2/3 - Training:  30%|▎| 1023/3393 [07:46<19:14,  2.05batch/s, Batch Loss=0.0943, Avg Loss=0.1244, Time Left=19.98[A
Epoch 2/3 - Training:  30%|▎| 1023/3393 [07:47<19:14,  2.05batch/s, Batch Loss=0.1556, Avg Loss=0.1245, Time Left=19.97[A
Epoch 2/3 - Training:  30%|▎| 1024/3393 [07:47<19:22,  2.04batch/s, Batch Loss=0.1556, Avg Loss=0.1245, Time Left=19.97[A
Epoch 2/3 - Training:  30%|▎| 1024/3393 [07:47<19:22,  2.04batch/s, Batch Loss=0.0553, Avg Loss=0.1244, Time Left=19.96[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  31%|▎| 1054/3393 [08:02<19:04,  2.04batch/s, Batch Loss=0.0143, Avg Loss=0.1237, Time Left=19.74[A
Epoch 2/3 - Training:  31%|▎| 1054/3393 [08:02<19:04,  2.04batch/s, Batch Loss=0.0672, Avg Loss=0.1236, Time Left=19.73[A
Epoch 2/3 - Training:  31%|▎| 1055/3393 [08:02<19:00,  2.05batch/s, Batch Loss=0.0672, Avg Loss=0.1236, Time Left=19.73[A
Epoch 2/3 - Training:  31%|▎| 1055/3393 [08:03<19:00,  2.05batch/s, Batch Loss=0.1480, Avg Loss=0.1237, Time Left=19.72[A
Epoch 2/3 - Training:  31%|▎| 1056/3393 [08:03<19:08,  2.04batch/s, Batch Loss=0.1480, Avg Loss=0.1237, Time Left=19.72[A
Epoch 2/3 - Training:  31%|▎| 1056/3393 [08:03<19:08,  2.04batch/s, Batch Loss=0.0520, Avg Loss=0.1236, Time Left=19.71[A
Epoch 2/3 - Training:  31%|▎| 1057/3393 [08:03<19:24,  2.01batch/s, Batch Loss=0.0520, Avg Loss=0.1236, Time Left=19.71[A
Epoch 2/3 - Training:  31%|▎| 1057/3393 [08:04<19:24,  2.01batch/s, Batch Loss=0.0743, Avg Loss=0.1235, Time Left=19.71[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  32%|▎| 1087/3393 [08:18<18:36,  2.07batch/s, Batch Loss=0.0952, Avg Loss=0.1239, Time Left=19.46[A
Epoch 2/3 - Training:  32%|▎| 1087/3393 [08:18<18:36,  2.07batch/s, Batch Loss=0.0236, Avg Loss=0.1238, Time Left=19.45[A
Epoch 2/3 - Training:  32%|▎| 1088/3393 [08:18<18:36,  2.06batch/s, Batch Loss=0.0236, Avg Loss=0.1238, Time Left=19.45[A
Epoch 2/3 - Training:  32%|▎| 1088/3393 [08:19<18:36,  2.06batch/s, Batch Loss=0.1232, Avg Loss=0.1238, Time Left=19.45[A
Epoch 2/3 - Training:  32%|▎| 1089/3393 [08:19<18:56,  2.03batch/s, Batch Loss=0.1232, Avg Loss=0.1238, Time Left=19.45[A
Epoch 2/3 - Training:  32%|▎| 1089/3393 [08:19<18:56,  2.03batch/s, Batch Loss=0.2079, Avg Loss=0.1239, Time Left=19.44[A
Epoch 2/3 - Training:  32%|▎| 1090/3393 [08:19<18:57,  2.02batch/s, Batch Loss=0.2079, Avg Loss=0.1239, Time Left=19.44[A
Epoch 2/3 - Training:  32%|▎| 1090/3393 [08:20<18:57,  2.02batch/s, Batch Loss=0.2213, Avg Loss=0.1240, Time Left=19.43[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  33%|▎| 1120/3393 [08:34<19:18,  1.96batch/s, Batch Loss=0.0581, Avg Loss=0.1234, Time Left=19.20[A
Epoch 2/3 - Training:  33%|▎| 1120/3393 [08:35<19:18,  1.96batch/s, Batch Loss=0.1422, Avg Loss=0.1234, Time Left=19.19[A
Epoch 2/3 - Training:  33%|▎| 1121/3393 [08:35<19:10,  1.97batch/s, Batch Loss=0.1422, Avg Loss=0.1234, Time Left=19.19[A
Epoch 2/3 - Training:  33%|▎| 1121/3393 [08:35<19:10,  1.97batch/s, Batch Loss=0.0727, Avg Loss=0.1234, Time Left=19.18[A
Epoch 2/3 - Training:  33%|▎| 1122/3393 [08:35<19:03,  1.99batch/s, Batch Loss=0.0727, Avg Loss=0.1234, Time Left=19.18[A
Epoch 2/3 - Training:  33%|▎| 1122/3393 [08:36<19:03,  1.99batch/s, Batch Loss=0.0934, Avg Loss=0.1233, Time Left=19.17[A
Epoch 2/3 - Training:  33%|▎| 1123/3393 [08:36<18:51,  2.01batch/s, Batch Loss=0.0934, Avg Loss=0.1233, Time Left=19.17[A
Epoch 2/3 - Training:  33%|▎| 1123/3393 [08:36<18:51,  2.01batch/s, Batch Loss=0.0283, Avg Loss=0.1232, Time Left=19.16[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  34%|▎| 1153/3393 [08:50<17:52,  2.09batch/s, Batch Loss=0.4464, Avg Loss=0.1237, Time Left=18.92[A
Epoch 2/3 - Training:  34%|▎| 1153/3393 [08:51<17:52,  2.09batch/s, Batch Loss=0.0728, Avg Loss=0.1236, Time Left=18.91[A
Epoch 2/3 - Training:  34%|▎| 1154/3393 [08:51<18:10,  2.05batch/s, Batch Loss=0.0728, Avg Loss=0.1236, Time Left=18.91[A
Epoch 2/3 - Training:  34%|▎| 1154/3393 [08:51<18:10,  2.05batch/s, Batch Loss=0.1259, Avg Loss=0.1236, Time Left=18.91[A
Epoch 2/3 - Training:  34%|▎| 1155/3393 [08:51<18:03,  2.07batch/s, Batch Loss=0.1259, Avg Loss=0.1236, Time Left=18.91[A
Epoch 2/3 - Training:  34%|▎| 1155/3393 [08:52<18:03,  2.07batch/s, Batch Loss=0.3751, Avg Loss=0.1239, Time Left=18.90[A
Epoch 2/3 - Training:  34%|▎| 1156/3393 [08:52<18:23,  2.03batch/s, Batch Loss=0.3751, Avg Loss=0.1239, Time Left=18.90[A
Epoch 2/3 - Training:  34%|▎| 1156/3393 [08:52<18:23,  2.03batch/s, Batch Loss=0.0288, Avg Loss=0.1238, Time Left=18.89[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  35%|▎| 1186/3393 [09:07<17:46,  2.07batch/s, Batch Loss=0.2096, Avg Loss=0.1240, Time Left=18.65[A
Epoch 2/3 - Training:  35%|▎| 1186/3393 [09:07<17:46,  2.07batch/s, Batch Loss=0.1216, Avg Loss=0.1240, Time Left=18.64[A
Epoch 2/3 - Training:  35%|▎| 1187/3393 [09:07<18:17,  2.01batch/s, Batch Loss=0.1216, Avg Loss=0.1240, Time Left=18.64[A
Epoch 2/3 - Training:  35%|▎| 1187/3393 [09:07<18:17,  2.01batch/s, Batch Loss=0.0387, Avg Loss=0.1239, Time Left=18.63[A
Epoch 2/3 - Training:  35%|▎| 1188/3393 [09:08<17:49,  2.06batch/s, Batch Loss=0.0387, Avg Loss=0.1239, Time Left=18.63[A
Epoch 2/3 - Training:  35%|▎| 1188/3393 [09:08<17:49,  2.06batch/s, Batch Loss=0.1250, Avg Loss=0.1239, Time Left=18.63[A
Epoch 2/3 - Training:  35%|▎| 1189/3393 [09:08<17:57,  2.05batch/s, Batch Loss=0.1250, Avg Loss=0.1239, Time Left=18.63[A
Epoch 2/3 - Training:  35%|▎| 1189/3393 [09:08<17:57,  2.05batch/s, Batch Loss=0.0219, Avg Loss=0.1238, Time Left=18.62[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  36%|▎| 1219/3393 [09:23<18:03,  2.01batch/s, Batch Loss=0.0256, Avg Loss=0.1231, Time Left=18.38[A
Epoch 2/3 - Training:  36%|▎| 1219/3393 [09:23<18:03,  2.01batch/s, Batch Loss=0.0155, Avg Loss=0.1230, Time Left=18.38[A
Epoch 2/3 - Training:  36%|▎| 1220/3393 [09:23<17:55,  2.02batch/s, Batch Loss=0.0155, Avg Loss=0.1230, Time Left=18.38[A
Epoch 2/3 - Training:  36%|▎| 1220/3393 [09:24<17:55,  2.02batch/s, Batch Loss=0.0170, Avg Loss=0.1229, Time Left=18.37[A
Epoch 2/3 - Training:  36%|▎| 1221/3393 [09:24<17:36,  2.06batch/s, Batch Loss=0.0170, Avg Loss=0.1229, Time Left=18.37[A
Epoch 2/3 - Training:  36%|▎| 1221/3393 [09:24<17:36,  2.06batch/s, Batch Loss=0.2838, Avg Loss=0.1230, Time Left=18.36[A
Epoch 2/3 - Training:  36%|▎| 1222/3393 [09:24<17:35,  2.06batch/s, Batch Loss=0.2838, Avg Loss=0.1230, Time Left=18.36[A
Epoch 2/3 - Training:  36%|▎| 1222/3393 [09:25<17:35,  2.06batch/s, Batch Loss=0.0363, Avg Loss=0.1229, Time Left=18.35[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  37%|▎| 1252/3393 [09:39<17:34,  2.03batch/s, Batch Loss=0.2664, Avg Loss=0.1229, Time Left=18.11[A
Epoch 2/3 - Training:  37%|▎| 1252/3393 [09:39<17:34,  2.03batch/s, Batch Loss=0.0438, Avg Loss=0.1228, Time Left=18.10[A
Epoch 2/3 - Training:  37%|▎| 1253/3393 [09:39<17:38,  2.02batch/s, Batch Loss=0.0438, Avg Loss=0.1228, Time Left=18.10[A
Epoch 2/3 - Training:  37%|▎| 1253/3393 [09:40<17:38,  2.02batch/s, Batch Loss=0.3341, Avg Loss=0.1230, Time Left=18.09[A
Epoch 2/3 - Training:  37%|▎| 1254/3393 [09:40<17:41,  2.01batch/s, Batch Loss=0.3341, Avg Loss=0.1230, Time Left=18.09[A
Epoch 2/3 - Training:  37%|▎| 1254/3393 [09:40<17:41,  2.01batch/s, Batch Loss=0.0595, Avg Loss=0.1229, Time Left=18.08[A
Epoch 2/3 - Training:  37%|▎| 1255/3393 [09:40<17:33,  2.03batch/s, Batch Loss=0.0595, Avg Loss=0.1229, Time Left=18.08[A
Epoch 2/3 - Training:  37%|▎| 1255/3393 [09:41<17:33,  2.03batch/s, Batch Loss=0.2956, Avg Loss=0.1231, Time Left=18.08[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  38%|▍| 1285/3393 [09:55<17:17,  2.03batch/s, Batch Loss=0.3502, Avg Loss=0.1229, Time Left=17.84[A
Epoch 2/3 - Training:  38%|▍| 1285/3393 [09:56<17:17,  2.03batch/s, Batch Loss=0.2719, Avg Loss=0.1231, Time Left=17.84[A
Epoch 2/3 - Training:  38%|▍| 1286/3393 [09:56<17:10,  2.04batch/s, Batch Loss=0.2719, Avg Loss=0.1231, Time Left=17.84[A
Epoch 2/3 - Training:  38%|▍| 1286/3393 [09:56<17:10,  2.04batch/s, Batch Loss=0.2258, Avg Loss=0.1232, Time Left=17.83[A
Epoch 2/3 - Training:  38%|▍| 1287/3393 [09:56<17:07,  2.05batch/s, Batch Loss=0.2258, Avg Loss=0.1232, Time Left=17.83[A
Epoch 2/3 - Training:  38%|▍| 1287/3393 [09:57<17:07,  2.05batch/s, Batch Loss=0.0541, Avg Loss=0.1231, Time Left=17.82[A
Epoch 2/3 - Training:  38%|▍| 1288/3393 [09:57<16:53,  2.08batch/s, Batch Loss=0.0541, Avg Loss=0.1231, Time Left=17.82[A
Epoch 2/3 - Training:  38%|▍| 1288/3393 [09:57<16:53,  2.08batch/s, Batch Loss=0.0854, Avg Loss=0.1231, Time Left=17.81[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  39%|▍| 1318/3393 [10:11<16:50,  2.05batch/s, Batch Loss=0.1224, Avg Loss=0.1227, Time Left=17.57[A
Epoch 2/3 - Training:  39%|▍| 1318/3393 [10:12<16:50,  2.05batch/s, Batch Loss=0.1860, Avg Loss=0.1228, Time Left=17.56[A
Epoch 2/3 - Training:  39%|▍| 1319/3393 [10:12<16:39,  2.07batch/s, Batch Loss=0.1860, Avg Loss=0.1228, Time Left=17.56[A
Epoch 2/3 - Training:  39%|▍| 1319/3393 [10:12<16:39,  2.07batch/s, Batch Loss=0.0100, Avg Loss=0.1227, Time Left=17.55[A
Epoch 2/3 - Training:  39%|▍| 1320/3393 [10:12<16:39,  2.07batch/s, Batch Loss=0.0100, Avg Loss=0.1227, Time Left=17.55[A
Epoch 2/3 - Training:  39%|▍| 1320/3393 [10:13<16:39,  2.07batch/s, Batch Loss=0.0975, Avg Loss=0.1227, Time Left=17.55[A
Epoch 2/3 - Training:  39%|▍| 1321/3393 [10:13<16:49,  2.05batch/s, Batch Loss=0.0975, Avg Loss=0.1227, Time Left=17.55[A
Epoch 2/3 - Training:  39%|▍| 1321/3393 [10:13<16:49,  2.05batch/s, Batch Loss=0.0163, Avg Loss=0.1226, Time Left=17.54[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  40%|▍| 1351/3393 [10:27<16:49,  2.02batch/s, Batch Loss=0.2610, Avg Loss=0.1228, Time Left=17.30[A
Epoch 2/3 - Training:  40%|▍| 1351/3393 [10:28<16:49,  2.02batch/s, Batch Loss=0.0591, Avg Loss=0.1227, Time Left=17.29[A
Epoch 2/3 - Training:  40%|▍| 1352/3393 [10:28<16:43,  2.03batch/s, Batch Loss=0.0591, Avg Loss=0.1227, Time Left=17.29[A
Epoch 2/3 - Training:  40%|▍| 1352/3393 [10:28<16:43,  2.03batch/s, Batch Loss=0.0522, Avg Loss=0.1227, Time Left=17.28[A
Epoch 2/3 - Training:  40%|▍| 1353/3393 [10:28<16:18,  2.08batch/s, Batch Loss=0.0522, Avg Loss=0.1227, Time Left=17.28[A
Epoch 2/3 - Training:  40%|▍| 1353/3393 [10:29<16:18,  2.08batch/s, Batch Loss=0.4082, Avg Loss=0.1229, Time Left=17.27[A
Epoch 2/3 - Training:  40%|▍| 1354/3393 [10:29<16:28,  2.06batch/s, Batch Loss=0.4082, Avg Loss=0.1229, Time Left=17.27[A
Epoch 2/3 - Training:  40%|▍| 1354/3393 [10:29<16:28,  2.06batch/s, Batch Loss=0.2269, Avg Loss=0.1230, Time Left=17.26[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  41%|▍| 1384/3393 [10:43<16:19,  2.05batch/s, Batch Loss=0.0132, Avg Loss=0.1228, Time Left=17.02[A
Epoch 2/3 - Training:  41%|▍| 1384/3393 [10:44<16:19,  2.05batch/s, Batch Loss=0.0030, Avg Loss=0.1228, Time Left=17.01[A
Epoch 2/3 - Training:  41%|▍| 1385/3393 [10:44<16:16,  2.06batch/s, Batch Loss=0.0030, Avg Loss=0.1228, Time Left=17.01[A
Epoch 2/3 - Training:  41%|▍| 1385/3393 [10:44<16:16,  2.06batch/s, Batch Loss=0.1309, Avg Loss=0.1228, Time Left=17.01[A
Epoch 2/3 - Training:  41%|▍| 1386/3393 [10:44<16:05,  2.08batch/s, Batch Loss=0.1309, Avg Loss=0.1228, Time Left=17.01[A
Epoch 2/3 - Training:  41%|▍| 1386/3393 [10:45<16:05,  2.08batch/s, Batch Loss=0.0559, Avg Loss=0.1227, Time Left=17.00[A
Epoch 2/3 - Training:  41%|▍| 1387/3393 [10:45<16:15,  2.06batch/s, Batch Loss=0.0559, Avg Loss=0.1227, Time Left=17.00[A
Epoch 2/3 - Training:  41%|▍| 1387/3393 [10:45<16:15,  2.06batch/s, Batch Loss=0.0436, Avg Loss=0.1226, Time Left=16.99[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  42%|▍| 1417/3393 [10:59<15:40,  2.10batch/s, Batch Loss=0.0107, Avg Loss=0.1211, Time Left=16.75[A
Epoch 2/3 - Training:  42%|▍| 1417/3393 [11:00<15:40,  2.10batch/s, Batch Loss=0.0873, Avg Loss=0.1211, Time Left=16.74[A
Epoch 2/3 - Training:  42%|▍| 1418/3393 [11:00<15:34,  2.11batch/s, Batch Loss=0.0873, Avg Loss=0.1211, Time Left=16.74[A
Epoch 2/3 - Training:  42%|▍| 1418/3393 [11:00<15:34,  2.11batch/s, Batch Loss=0.0384, Avg Loss=0.1210, Time Left=16.73[A
Epoch 2/3 - Training:  42%|▍| 1419/3393 [11:00<15:48,  2.08batch/s, Batch Loss=0.0384, Avg Loss=0.1210, Time Left=16.73[A
Epoch 2/3 - Training:  42%|▍| 1419/3393 [11:01<15:48,  2.08batch/s, Batch Loss=0.1343, Avg Loss=0.1211, Time Left=16.72[A
Epoch 2/3 - Training:  42%|▍| 1420/3393 [11:01<15:51,  2.07batch/s, Batch Loss=0.1343, Avg Loss=0.1211, Time Left=16.72[A
Epoch 2/3 - Training:  42%|▍| 1420/3393 [11:01<15:51,  2.07batch/s, Batch Loss=0.2449, Avg Loss=0.1211, Time Left=16.72[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  43%|▍| 1450/3393 [11:16<15:57,  2.03batch/s, Batch Loss=0.0347, Avg Loss=0.1204, Time Left=16.48[A
Epoch 2/3 - Training:  43%|▍| 1450/3393 [11:16<15:57,  2.03batch/s, Batch Loss=0.3048, Avg Loss=0.1205, Time Left=16.47[A
Epoch 2/3 - Training:  43%|▍| 1451/3393 [11:16<15:43,  2.06batch/s, Batch Loss=0.3048, Avg Loss=0.1205, Time Left=16.47[A
Epoch 2/3 - Training:  43%|▍| 1451/3393 [11:17<15:43,  2.06batch/s, Batch Loss=0.1342, Avg Loss=0.1205, Time Left=16.46[A
Epoch 2/3 - Training:  43%|▍| 1452/3393 [11:17<16:08,  2.00batch/s, Batch Loss=0.1342, Avg Loss=0.1205, Time Left=16.46[A
Epoch 2/3 - Training:  43%|▍| 1452/3393 [11:17<16:08,  2.00batch/s, Batch Loss=0.0374, Avg Loss=0.1205, Time Left=16.45[A
Epoch 2/3 - Training:  43%|▍| 1453/3393 [11:17<15:51,  2.04batch/s, Batch Loss=0.0374, Avg Loss=0.1205, Time Left=16.45[A
Epoch 2/3 - Training:  43%|▍| 1453/3393 [11:18<15:51,  2.04batch/s, Batch Loss=0.0876, Avg Loss=0.1204, Time Left=16.45[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  44%|▍| 1483/3393 [11:32<15:25,  2.06batch/s, Batch Loss=0.1047, Avg Loss=0.1203, Time Left=16.21[A
Epoch 2/3 - Training:  44%|▍| 1483/3393 [11:32<15:25,  2.06batch/s, Batch Loss=0.1258, Avg Loss=0.1203, Time Left=16.20[A
Epoch 2/3 - Training:  44%|▍| 1484/3393 [11:32<15:23,  2.07batch/s, Batch Loss=0.1258, Avg Loss=0.1203, Time Left=16.20[A
Epoch 2/3 - Training:  44%|▍| 1484/3393 [11:33<15:23,  2.07batch/s, Batch Loss=0.0790, Avg Loss=0.1203, Time Left=16.19[A
Epoch 2/3 - Training:  44%|▍| 1485/3393 [11:33<15:14,  2.09batch/s, Batch Loss=0.0790, Avg Loss=0.1203, Time Left=16.19[A
Epoch 2/3 - Training:  44%|▍| 1485/3393 [11:33<15:14,  2.09batch/s, Batch Loss=0.1139, Avg Loss=0.1202, Time Left=16.18[A
Epoch 2/3 - Training:  44%|▍| 1486/3393 [11:33<15:41,  2.03batch/s, Batch Loss=0.1139, Avg Loss=0.1202, Time Left=16.18[A
Epoch 2/3 - Training:  44%|▍| 1486/3393 [11:34<15:41,  2.03batch/s, Batch Loss=0.0635, Avg Loss=0.1202, Time Left=16.18[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  45%|▍| 1516/3393 [11:48<15:24,  2.03batch/s, Batch Loss=0.2837, Avg Loss=0.1205, Time Left=15.93[A
Epoch 2/3 - Training:  45%|▍| 1516/3393 [11:48<15:24,  2.03batch/s, Batch Loss=0.0233, Avg Loss=0.1204, Time Left=15.93[A
Epoch 2/3 - Training:  45%|▍| 1517/3393 [11:48<15:27,  2.02batch/s, Batch Loss=0.0233, Avg Loss=0.1204, Time Left=15.93[A
Epoch 2/3 - Training:  45%|▍| 1517/3393 [11:49<15:27,  2.02batch/s, Batch Loss=0.1954, Avg Loss=0.1205, Time Left=15.92[A
Epoch 2/3 - Training:  45%|▍| 1518/3393 [11:49<15:12,  2.06batch/s, Batch Loss=0.1954, Avg Loss=0.1205, Time Left=15.92[A
Epoch 2/3 - Training:  45%|▍| 1518/3393 [11:49<15:12,  2.06batch/s, Batch Loss=0.3110, Avg Loss=0.1206, Time Left=15.91[A
Epoch 2/3 - Training:  45%|▍| 1519/3393 [11:49<15:10,  2.06batch/s, Batch Loss=0.3110, Avg Loss=0.1206, Time Left=15.91[A
Epoch 2/3 - Training:  45%|▍| 1519/3393 [11:50<15:10,  2.06batch/s, Batch Loss=0.0758, Avg Loss=0.1206, Time Left=15.90[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  46%|▍| 1549/3393 [12:04<15:00,  2.05batch/s, Batch Loss=0.1144, Avg Loss=0.1203, Time Left=15.67[A
Epoch 2/3 - Training:  46%|▍| 1549/3393 [12:05<15:00,  2.05batch/s, Batch Loss=0.2049, Avg Loss=0.1204, Time Left=15.66[A
Epoch 2/3 - Training:  46%|▍| 1550/3393 [12:05<14:49,  2.07batch/s, Batch Loss=0.2049, Avg Loss=0.1204, Time Left=15.66[A
Epoch 2/3 - Training:  46%|▍| 1550/3393 [12:05<14:49,  2.07batch/s, Batch Loss=0.1406, Avg Loss=0.1204, Time Left=15.65[A
Epoch 2/3 - Training:  46%|▍| 1551/3393 [12:05<14:48,  2.07batch/s, Batch Loss=0.1406, Avg Loss=0.1204, Time Left=15.65[A
Epoch 2/3 - Training:  46%|▍| 1551/3393 [12:06<14:48,  2.07batch/s, Batch Loss=0.3121, Avg Loss=0.1205, Time Left=15.64[A
Epoch 2/3 - Training:  46%|▍| 1552/3393 [12:06<14:39,  2.09batch/s, Batch Loss=0.3121, Avg Loss=0.1205, Time Left=15.64[A
Epoch 2/3 - Training:  46%|▍| 1552/3393 [12:06<14:39,  2.09batch/s, Batch Loss=0.1237, Avg Loss=0.1205, Time Left=15.63[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  47%|▍| 1582/3393 [12:20<14:48,  2.04batch/s, Batch Loss=0.0516, Avg Loss=0.1205, Time Left=15.39[A
Epoch 2/3 - Training:  47%|▍| 1582/3393 [12:21<14:48,  2.04batch/s, Batch Loss=0.1452, Avg Loss=0.1205, Time Left=15.38[A
Epoch 2/3 - Training:  47%|▍| 1583/3393 [12:21<14:36,  2.07batch/s, Batch Loss=0.1452, Avg Loss=0.1205, Time Left=15.38[A
Epoch 2/3 - Training:  47%|▍| 1583/3393 [12:21<14:36,  2.07batch/s, Batch Loss=0.0085, Avg Loss=0.1205, Time Left=15.38[A
Epoch 2/3 - Training:  47%|▍| 1584/3393 [12:21<14:34,  2.07batch/s, Batch Loss=0.0085, Avg Loss=0.1205, Time Left=15.38[A
Epoch 2/3 - Training:  47%|▍| 1584/3393 [12:22<14:34,  2.07batch/s, Batch Loss=0.0499, Avg Loss=0.1204, Time Left=15.37[A
Epoch 2/3 - Training:  47%|▍| 1585/3393 [12:22<14:42,  2.05batch/s, Batch Loss=0.0499, Avg Loss=0.1204, Time Left=15.37[A
Epoch 2/3 - Training:  47%|▍| 1585/3393 [12:22<14:42,  2.05batch/s, Batch Loss=0.1084, Avg Loss=0.1204, Time Left=15.36[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  48%|▍| 1615/3393 [12:36<14:35,  2.03batch/s, Batch Loss=0.1945, Avg Loss=0.1197, Time Left=15.12[A
Epoch 2/3 - Training:  48%|▍| 1615/3393 [12:37<14:35,  2.03batch/s, Batch Loss=0.0258, Avg Loss=0.1196, Time Left=15.11[A
Epoch 2/3 - Training:  48%|▍| 1616/3393 [12:37<14:29,  2.04batch/s, Batch Loss=0.0258, Avg Loss=0.1196, Time Left=15.11[A
Epoch 2/3 - Training:  48%|▍| 1616/3393 [12:37<14:29,  2.04batch/s, Batch Loss=0.0427, Avg Loss=0.1196, Time Left=15.11[A
Epoch 2/3 - Training:  48%|▍| 1617/3393 [12:37<14:17,  2.07batch/s, Batch Loss=0.0427, Avg Loss=0.1196, Time Left=15.11[A
Epoch 2/3 - Training:  48%|▍| 1617/3393 [12:38<14:17,  2.07batch/s, Batch Loss=0.4492, Avg Loss=0.1198, Time Left=15.10[A
Epoch 2/3 - Training:  48%|▍| 1618/3393 [12:38<14:11,  2.08batch/s, Batch Loss=0.4492, Avg Loss=0.1198, Time Left=15.10[A
Epoch 2/3 - Training:  48%|▍| 1618/3393 [12:38<14:11,  2.08batch/s, Batch Loss=0.1809, Avg Loss=0.1198, Time Left=15.09[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  49%|▍| 1648/3393 [12:52<14:06,  2.06batch/s, Batch Loss=0.2597, Avg Loss=0.1197, Time Left=14.85[A
Epoch 2/3 - Training:  49%|▍| 1648/3393 [12:53<14:06,  2.06batch/s, Batch Loss=0.0595, Avg Loss=0.1197, Time Left=14.84[A
Epoch 2/3 - Training:  49%|▍| 1649/3393 [12:53<14:06,  2.06batch/s, Batch Loss=0.0595, Avg Loss=0.1197, Time Left=14.84[A
Epoch 2/3 - Training:  49%|▍| 1649/3393 [12:53<14:06,  2.06batch/s, Batch Loss=0.0840, Avg Loss=0.1196, Time Left=14.84[A
Epoch 2/3 - Training:  49%|▍| 1650/3393 [12:53<13:55,  2.09batch/s, Batch Loss=0.0840, Avg Loss=0.1196, Time Left=14.84[A
Epoch 2/3 - Training:  49%|▍| 1650/3393 [12:54<13:55,  2.09batch/s, Batch Loss=0.0111, Avg Loss=0.1196, Time Left=14.83[A
Epoch 2/3 - Training:  49%|▍| 1651/3393 [12:54<13:57,  2.08batch/s, Batch Loss=0.0111, Avg Loss=0.1196, Time Left=14.83[A
Epoch 2/3 - Training:  49%|▍| 1651/3393 [12:54<13:57,  2.08batch/s, Batch Loss=0.0100, Avg Loss=0.1195, Time Left=14.82[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  50%|▍| 1681/3393 [13:09<14:01,  2.04batch/s, Batch Loss=0.2452, Avg Loss=0.1190, Time Left=14.58[A
Epoch 2/3 - Training:  50%|▍| 1681/3393 [13:09<14:01,  2.04batch/s, Batch Loss=0.3821, Avg Loss=0.1191, Time Left=14.57[A
Epoch 2/3 - Training:  50%|▍| 1682/3393 [13:09<14:04,  2.03batch/s, Batch Loss=0.3821, Avg Loss=0.1191, Time Left=14.57[A
Epoch 2/3 - Training:  50%|▍| 1682/3393 [13:10<14:04,  2.03batch/s, Batch Loss=0.0261, Avg Loss=0.1191, Time Left=14.57[A
Epoch 2/3 - Training:  50%|▍| 1683/3393 [13:10<13:52,  2.05batch/s, Batch Loss=0.0261, Avg Loss=0.1191, Time Left=14.57[A
Epoch 2/3 - Training:  50%|▍| 1683/3393 [13:10<13:52,  2.05batch/s, Batch Loss=0.0268, Avg Loss=0.1190, Time Left=14.56[A
Epoch 2/3 - Training:  50%|▍| 1684/3393 [13:10<13:41,  2.08batch/s, Batch Loss=0.0268, Avg Loss=0.1190, Time Left=14.56[A
Epoch 2/3 - Training:  50%|▍| 1684/3393 [13:10<13:41,  2.08batch/s, Batch Loss=0.0102, Avg Loss=0.1189, Time Left=14.55[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  51%|▌| 1714/3393 [13:25<13:42,  2.04batch/s, Batch Loss=0.2209, Avg Loss=0.1189, Time Left=14.31[A
Epoch 2/3 - Training:  51%|▌| 1714/3393 [13:25<13:42,  2.04batch/s, Batch Loss=0.0984, Avg Loss=0.1189, Time Left=14.30[A
Epoch 2/3 - Training:  51%|▌| 1715/3393 [13:25<13:39,  2.05batch/s, Batch Loss=0.0984, Avg Loss=0.1189, Time Left=14.30[A
Epoch 2/3 - Training:  51%|▌| 1715/3393 [13:26<13:39,  2.05batch/s, Batch Loss=0.0339, Avg Loss=0.1188, Time Left=14.30[A
Epoch 2/3 - Training:  51%|▌| 1716/3393 [13:26<13:36,  2.05batch/s, Batch Loss=0.0339, Avg Loss=0.1188, Time Left=14.30[A
Epoch 2/3 - Training:  51%|▌| 1716/3393 [13:26<13:36,  2.05batch/s, Batch Loss=0.1133, Avg Loss=0.1188, Time Left=14.29[A
Epoch 2/3 - Training:  51%|▌| 1717/3393 [13:26<13:33,  2.06batch/s, Batch Loss=0.1133, Avg Loss=0.1188, Time Left=14.29[A
Epoch 2/3 - Training:  51%|▌| 1717/3393 [13:27<13:33,  2.06batch/s, Batch Loss=0.3047, Avg Loss=0.1190, Time Left=14.28[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  51%|▌| 1747/3393 [13:41<13:19,  2.06batch/s, Batch Loss=0.3752, Avg Loss=0.1196, Time Left=14.04[A
Epoch 2/3 - Training:  51%|▌| 1747/3393 [13:41<13:19,  2.06batch/s, Batch Loss=0.0462, Avg Loss=0.1195, Time Left=14.03[A
Epoch 2/3 - Training:  52%|▌| 1748/3393 [13:41<13:19,  2.06batch/s, Batch Loss=0.0462, Avg Loss=0.1195, Time Left=14.03[A
Epoch 2/3 - Training:  52%|▌| 1748/3393 [13:42<13:19,  2.06batch/s, Batch Loss=0.0748, Avg Loss=0.1195, Time Left=14.02[A
Epoch 2/3 - Training:  52%|▌| 1749/3393 [13:42<13:08,  2.08batch/s, Batch Loss=0.0748, Avg Loss=0.1195, Time Left=14.02[A
Epoch 2/3 - Training:  52%|▌| 1749/3393 [13:42<13:08,  2.08batch/s, Batch Loss=0.0392, Avg Loss=0.1195, Time Left=14.02[A
Epoch 2/3 - Training:  52%|▌| 1750/3393 [13:42<13:33,  2.02batch/s, Batch Loss=0.0392, Avg Loss=0.1195, Time Left=14.02[A
Epoch 2/3 - Training:  52%|▌| 1750/3393 [13:43<13:33,  2.02batch/s, Batch Loss=0.0924, Avg Loss=0.1195, Time Left=14.01[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  52%|▌| 1780/3393 [13:57<12:53,  2.09batch/s, Batch Loss=0.0402, Avg Loss=0.1193, Time Left=13.77[A
Epoch 2/3 - Training:  52%|▌| 1780/3393 [13:58<12:53,  2.09batch/s, Batch Loss=0.0452, Avg Loss=0.1193, Time Left=13.76[A
Epoch 2/3 - Training:  52%|▌| 1781/3393 [13:58<13:18,  2.02batch/s, Batch Loss=0.0452, Avg Loss=0.1193, Time Left=13.76[A
Epoch 2/3 - Training:  52%|▌| 1781/3393 [13:58<13:18,  2.02batch/s, Batch Loss=0.0305, Avg Loss=0.1193, Time Left=13.76[A
Epoch 2/3 - Training:  53%|▌| 1782/3393 [13:58<13:12,  2.03batch/s, Batch Loss=0.0305, Avg Loss=0.1193, Time Left=13.76[A
Epoch 2/3 - Training:  53%|▌| 1782/3393 [13:59<13:12,  2.03batch/s, Batch Loss=0.0514, Avg Loss=0.1192, Time Left=13.75[A
Epoch 2/3 - Training:  53%|▌| 1783/3393 [13:59<13:15,  2.02batch/s, Batch Loss=0.0514, Avg Loss=0.1192, Time Left=13.75[A
Epoch 2/3 - Training:  53%|▌| 1783/3393 [13:59<13:15,  2.02batch/s, Batch Loss=0.1042, Avg Loss=0.1192, Time Left=13.74[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  53%|▌| 1813/3393 [14:13<12:52,  2.05batch/s, Batch Loss=0.2376, Avg Loss=0.1188, Time Left=13.50[A
Epoch 2/3 - Training:  53%|▌| 1813/3393 [14:14<12:52,  2.05batch/s, Batch Loss=0.0198, Avg Loss=0.1188, Time Left=13.49[A
Epoch 2/3 - Training:  53%|▌| 1814/3393 [14:14<12:49,  2.05batch/s, Batch Loss=0.0198, Avg Loss=0.1188, Time Left=13.49[A
Epoch 2/3 - Training:  53%|▌| 1814/3393 [14:14<12:49,  2.05batch/s, Batch Loss=0.0290, Avg Loss=0.1187, Time Left=13.48[A
Epoch 2/3 - Training:  53%|▌| 1815/3393 [14:14<12:47,  2.06batch/s, Batch Loss=0.0290, Avg Loss=0.1187, Time Left=13.48[A
Epoch 2/3 - Training:  53%|▌| 1815/3393 [14:15<12:47,  2.06batch/s, Batch Loss=0.1079, Avg Loss=0.1187, Time Left=13.48[A
Epoch 2/3 - Training:  54%|▌| 1816/3393 [14:15<12:45,  2.06batch/s, Batch Loss=0.1079, Avg Loss=0.1187, Time Left=13.48[A
Epoch 2/3 - Training:  54%|▌| 1816/3393 [14:15<12:45,  2.06batch/s, Batch Loss=0.0532, Avg Loss=0.1187, Time Left=13.47[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  54%|▌| 1846/3393 [14:29<12:28,  2.07batch/s, Batch Loss=0.1390, Avg Loss=0.1183, Time Left=13.23[A
Epoch 2/3 - Training:  54%|▌| 1846/3393 [14:30<12:28,  2.07batch/s, Batch Loss=0.0638, Avg Loss=0.1183, Time Left=13.22[A
Epoch 2/3 - Training:  54%|▌| 1847/3393 [14:30<12:33,  2.05batch/s, Batch Loss=0.0638, Avg Loss=0.1183, Time Left=13.22[A
Epoch 2/3 - Training:  54%|▌| 1847/3393 [14:30<12:33,  2.05batch/s, Batch Loss=0.0731, Avg Loss=0.1182, Time Left=13.21[A
Epoch 2/3 - Training:  54%|▌| 1848/3393 [14:30<12:32,  2.05batch/s, Batch Loss=0.0731, Avg Loss=0.1182, Time Left=13.21[A
Epoch 2/3 - Training:  54%|▌| 1848/3393 [14:31<12:32,  2.05batch/s, Batch Loss=0.1522, Avg Loss=0.1183, Time Left=13.20[A
Epoch 2/3 - Training:  54%|▌| 1849/3393 [14:31<12:23,  2.08batch/s, Batch Loss=0.1522, Avg Loss=0.1183, Time Left=13.20[A
Epoch 2/3 - Training:  54%|▌| 1849/3393 [14:31<12:23,  2.08batch/s, Batch Loss=0.1787, Avg Loss=0.1183, Time Left=13.20[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  55%|▌| 1879/3393 [14:45<12:29,  2.02batch/s, Batch Loss=0.3198, Avg Loss=0.1181, Time Left=12.96[A
Epoch 2/3 - Training:  55%|▌| 1879/3393 [14:46<12:29,  2.02batch/s, Batch Loss=0.2579, Avg Loss=0.1181, Time Left=12.95[A
Epoch 2/3 - Training:  55%|▌| 1880/3393 [14:46<12:24,  2.03batch/s, Batch Loss=0.2579, Avg Loss=0.1181, Time Left=12.95[A
Epoch 2/3 - Training:  55%|▌| 1880/3393 [14:46<12:24,  2.03batch/s, Batch Loss=0.0059, Avg Loss=0.1181, Time Left=12.94[A
Epoch 2/3 - Training:  55%|▌| 1881/3393 [14:46<12:20,  2.04batch/s, Batch Loss=0.0059, Avg Loss=0.1181, Time Left=12.94[A
Epoch 2/3 - Training:  55%|▌| 1881/3393 [14:47<12:20,  2.04batch/s, Batch Loss=0.1530, Avg Loss=0.1181, Time Left=12.93[A
Epoch 2/3 - Training:  55%|▌| 1882/3393 [14:47<12:24,  2.03batch/s, Batch Loss=0.1530, Avg Loss=0.1181, Time Left=12.93[A
Epoch 2/3 - Training:  55%|▌| 1882/3393 [14:47<12:24,  2.03batch/s, Batch Loss=0.0978, Avg Loss=0.1181, Time Left=12.92[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  56%|▌| 1912/3393 [15:01<12:08,  2.03batch/s, Batch Loss=0.0232, Avg Loss=0.1183, Time Left=12.69[A
Epoch 2/3 - Training:  56%|▌| 1912/3393 [15:02<12:08,  2.03batch/s, Batch Loss=0.0761, Avg Loss=0.1182, Time Left=12.68[A
Epoch 2/3 - Training:  56%|▌| 1913/3393 [15:02<12:18,  2.00batch/s, Batch Loss=0.0761, Avg Loss=0.1182, Time Left=12.68[A
Epoch 2/3 - Training:  56%|▌| 1913/3393 [15:02<12:18,  2.00batch/s, Batch Loss=0.0353, Avg Loss=0.1182, Time Left=12.67[A
Epoch 2/3 - Training:  56%|▌| 1914/3393 [15:02<12:11,  2.02batch/s, Batch Loss=0.0353, Avg Loss=0.1182, Time Left=12.67[A
Epoch 2/3 - Training:  56%|▌| 1914/3393 [15:03<12:11,  2.02batch/s, Batch Loss=0.0345, Avg Loss=0.1181, Time Left=12.66[A
Epoch 2/3 - Training:  56%|▌| 1915/3393 [15:03<12:20,  2.00batch/s, Batch Loss=0.0345, Avg Loss=0.1181, Time Left=12.66[A
Epoch 2/3 - Training:  56%|▌| 1915/3393 [15:03<12:20,  2.00batch/s, Batch Loss=0.2065, Avg Loss=0.1182, Time Left=12.66[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  57%|▌| 1945/3393 [15:18<11:50,  2.04batch/s, Batch Loss=0.0519, Avg Loss=0.1180, Time Left=12.42[A
Epoch 2/3 - Training:  57%|▌| 1945/3393 [15:18<11:50,  2.04batch/s, Batch Loss=0.0080, Avg Loss=0.1179, Time Left=12.41[A
Epoch 2/3 - Training:  57%|▌| 1946/3393 [15:18<11:53,  2.03batch/s, Batch Loss=0.0080, Avg Loss=0.1179, Time Left=12.41[A
Epoch 2/3 - Training:  57%|▌| 1946/3393 [15:19<11:53,  2.03batch/s, Batch Loss=0.3505, Avg Loss=0.1181, Time Left=12.40[A
Epoch 2/3 - Training:  57%|▌| 1947/3393 [15:19<11:36,  2.08batch/s, Batch Loss=0.3505, Avg Loss=0.1181, Time Left=12.40[A
Epoch 2/3 - Training:  57%|▌| 1947/3393 [15:19<11:36,  2.08batch/s, Batch Loss=0.0083, Avg Loss=0.1180, Time Left=12.40[A
Epoch 2/3 - Training:  57%|▌| 1948/3393 [15:19<11:35,  2.08batch/s, Batch Loss=0.0083, Avg Loss=0.1180, Time Left=12.40[A
Epoch 2/3 - Training:  57%|▌| 1948/3393 [15:20<11:35,  2.08batch/s, Batch Loss=0.0343, Avg Loss=0.1180, Time Left=12.39[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  58%|▌| 1978/3393 [15:34<11:26,  2.06batch/s, Batch Loss=0.0931, Avg Loss=0.1175, Time Left=12.15[A
Epoch 2/3 - Training:  58%|▌| 1978/3393 [15:34<11:26,  2.06batch/s, Batch Loss=0.0754, Avg Loss=0.1175, Time Left=12.14[A
Epoch 2/3 - Training:  58%|▌| 1979/3393 [15:34<11:19,  2.08batch/s, Batch Loss=0.0754, Avg Loss=0.1175, Time Left=12.14[A
Epoch 2/3 - Training:  58%|▌| 1979/3393 [15:35<11:19,  2.08batch/s, Batch Loss=0.0813, Avg Loss=0.1175, Time Left=12.13[A
Epoch 2/3 - Training:  58%|▌| 1980/3393 [15:35<11:39,  2.02batch/s, Batch Loss=0.0813, Avg Loss=0.1175, Time Left=12.13[A
Epoch 2/3 - Training:  58%|▌| 1980/3393 [15:35<11:39,  2.02batch/s, Batch Loss=0.0362, Avg Loss=0.1174, Time Left=12.12[A
Epoch 2/3 - Training:  58%|▌| 1981/3393 [15:35<11:26,  2.06batch/s, Batch Loss=0.0362, Avg Loss=0.1174, Time Left=12.12[A
Epoch 2/3 - Training:  58%|▌| 1981/3393 [15:36<11:26,  2.06batch/s, Batch Loss=0.0697, Avg Loss=0.1174, Time Left=12.12[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  59%|▌| 2011/3393 [15:50<11:05,  2.08batch/s, Batch Loss=0.0628, Avg Loss=0.1176, Time Left=11.88[A
Epoch 2/3 - Training:  59%|▌| 2011/3393 [15:51<11:05,  2.08batch/s, Batch Loss=0.0821, Avg Loss=0.1175, Time Left=11.87[A
Epoch 2/3 - Training:  59%|▌| 2012/3393 [15:51<11:26,  2.01batch/s, Batch Loss=0.0821, Avg Loss=0.1175, Time Left=11.87[A
Epoch 2/3 - Training:  59%|▌| 2012/3393 [15:51<11:26,  2.01batch/s, Batch Loss=0.0596, Avg Loss=0.1175, Time Left=11.86[A
Epoch 2/3 - Training:  59%|▌| 2013/3393 [15:51<11:20,  2.03batch/s, Batch Loss=0.0596, Avg Loss=0.1175, Time Left=11.86[A
Epoch 2/3 - Training:  59%|▌| 2013/3393 [15:52<11:20,  2.03batch/s, Batch Loss=0.0924, Avg Loss=0.1175, Time Left=11.86[A
Epoch 2/3 - Training:  59%|▌| 2014/3393 [15:52<11:34,  1.99batch/s, Batch Loss=0.0924, Avg Loss=0.1175, Time Left=11.86[A
Epoch 2/3 - Training:  59%|▌| 2014/3393 [15:52<11:34,  1.99batch/s, Batch Loss=0.0902, Avg Loss=0.1175, Time Left=11.85[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  60%|▌| 2044/3393 [16:06<10:57,  2.05batch/s, Batch Loss=0.0463, Avg Loss=0.1171, Time Left=11.61[A
Epoch 2/3 - Training:  60%|▌| 2044/3393 [16:07<10:57,  2.05batch/s, Batch Loss=0.0298, Avg Loss=0.1170, Time Left=11.60[A
Epoch 2/3 - Training:  60%|▌| 2045/3393 [16:07<10:55,  2.06batch/s, Batch Loss=0.0298, Avg Loss=0.1170, Time Left=11.60[A
Epoch 2/3 - Training:  60%|▌| 2045/3393 [16:07<10:55,  2.06batch/s, Batch Loss=0.0189, Avg Loss=0.1170, Time Left=11.59[A
Epoch 2/3 - Training:  60%|▌| 2046/3393 [16:07<10:53,  2.06batch/s, Batch Loss=0.0189, Avg Loss=0.1170, Time Left=11.59[A
Epoch 2/3 - Training:  60%|▌| 2046/3393 [16:08<10:53,  2.06batch/s, Batch Loss=0.0116, Avg Loss=0.1169, Time Left=11.58[A
Epoch 2/3 - Training:  60%|▌| 2047/3393 [16:08<10:53,  2.06batch/s, Batch Loss=0.0116, Avg Loss=0.1169, Time Left=11.58[A
Epoch 2/3 - Training:  60%|▌| 2047/3393 [16:08<10:53,  2.06batch/s, Batch Loss=0.0086, Avg Loss=0.1169, Time Left=11.58[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  61%|▌| 2077/3393 [16:22<10:34,  2.07batch/s, Batch Loss=0.0363, Avg Loss=0.1163, Time Left=11.34[A
Epoch 2/3 - Training:  61%|▌| 2077/3393 [16:23<10:34,  2.07batch/s, Batch Loss=0.0953, Avg Loss=0.1163, Time Left=11.33[A
Epoch 2/3 - Training:  61%|▌| 2078/3393 [16:23<10:33,  2.07batch/s, Batch Loss=0.0953, Avg Loss=0.1163, Time Left=11.33[A
Epoch 2/3 - Training:  61%|▌| 2078/3393 [16:23<10:33,  2.07batch/s, Batch Loss=0.0032, Avg Loss=0.1163, Time Left=11.32[A
Epoch 2/3 - Training:  61%|▌| 2079/3393 [16:23<10:40,  2.05batch/s, Batch Loss=0.0032, Avg Loss=0.1163, Time Left=11.32[A
Epoch 2/3 - Training:  61%|▌| 2079/3393 [16:24<10:40,  2.05batch/s, Batch Loss=0.0055, Avg Loss=0.1162, Time Left=11.31[A
Epoch 2/3 - Training:  61%|▌| 2080/3393 [16:24<10:39,  2.05batch/s, Batch Loss=0.0055, Avg Loss=0.1162, Time Left=11.31[A
Epoch 2/3 - Training:  61%|▌| 2080/3393 [16:24<10:39,  2.05batch/s, Batch Loss=0.2008, Avg Loss=0.1162, Time Left=11.30[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  62%|▌| 2110/3393 [16:38<10:25,  2.05batch/s, Batch Loss=0.0687, Avg Loss=0.1158, Time Left=11.07[A
Epoch 2/3 - Training:  62%|▌| 2110/3393 [16:39<10:25,  2.05batch/s, Batch Loss=0.0714, Avg Loss=0.1157, Time Left=11.06[A
Epoch 2/3 - Training:  62%|▌| 2111/3393 [16:39<10:23,  2.06batch/s, Batch Loss=0.0714, Avg Loss=0.1157, Time Left=11.06[A
Epoch 2/3 - Training:  62%|▌| 2111/3393 [16:39<10:23,  2.06batch/s, Batch Loss=0.1352, Avg Loss=0.1157, Time Left=11.05[A
Epoch 2/3 - Training:  62%|▌| 2112/3393 [16:39<10:27,  2.04batch/s, Batch Loss=0.1352, Avg Loss=0.1157, Time Left=11.05[A
Epoch 2/3 - Training:  62%|▌| 2112/3393 [16:40<10:27,  2.04batch/s, Batch Loss=0.0832, Avg Loss=0.1157, Time Left=11.04[A
Epoch 2/3 - Training:  62%|▌| 2113/3393 [16:40<10:19,  2.07batch/s, Batch Loss=0.0832, Avg Loss=0.1157, Time Left=11.04[A
Epoch 2/3 - Training:  62%|▌| 2113/3393 [16:40<10:19,  2.07batch/s, Batch Loss=0.1486, Avg Loss=0.1157, Time Left=11.03[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  63%|▋| 2143/3393 [16:54<10:18,  2.02batch/s, Batch Loss=0.2619, Avg Loss=0.1155, Time Left=10.80[A
Epoch 2/3 - Training:  63%|▋| 2143/3393 [16:55<10:18,  2.02batch/s, Batch Loss=0.1187, Avg Loss=0.1155, Time Left=10.79[A
Epoch 2/3 - Training:  63%|▋| 2144/3393 [16:55<10:13,  2.04batch/s, Batch Loss=0.1187, Avg Loss=0.1155, Time Left=10.79[A
Epoch 2/3 - Training:  63%|▋| 2144/3393 [16:55<10:13,  2.04batch/s, Batch Loss=0.0682, Avg Loss=0.1155, Time Left=10.78[A
Epoch 2/3 - Training:  63%|▋| 2145/3393 [16:55<10:10,  2.04batch/s, Batch Loss=0.0682, Avg Loss=0.1155, Time Left=10.78[A
Epoch 2/3 - Training:  63%|▋| 2145/3393 [16:56<10:10,  2.04batch/s, Batch Loss=0.1484, Avg Loss=0.1155, Time Left=10.77[A
Epoch 2/3 - Training:  63%|▋| 2146/3393 [16:56<10:26,  1.99batch/s, Batch Loss=0.1484, Avg Loss=0.1155, Time Left=10.77[A
Epoch 2/3 - Training:  63%|▋| 2146/3393 [16:56<10:26,  1.99batch/s, Batch Loss=0.0825, Avg Loss=0.1155, Time Left=10.76[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  64%|▋| 2176/3393 [17:10<09:51,  2.06batch/s, Batch Loss=0.1821, Avg Loss=0.1151, Time Left=10.53[A
Epoch 2/3 - Training:  64%|▋| 2176/3393 [17:11<09:51,  2.06batch/s, Batch Loss=0.0238, Avg Loss=0.1150, Time Left=10.52[A
Epoch 2/3 - Training:  64%|▋| 2177/3393 [17:11<09:50,  2.06batch/s, Batch Loss=0.0238, Avg Loss=0.1150, Time Left=10.52[A
Epoch 2/3 - Training:  64%|▋| 2177/3393 [17:11<09:50,  2.06batch/s, Batch Loss=0.3232, Avg Loss=0.1151, Time Left=10.51[A
Epoch 2/3 - Training:  64%|▋| 2178/3393 [17:11<10:00,  2.02batch/s, Batch Loss=0.3232, Avg Loss=0.1151, Time Left=10.51[A
Epoch 2/3 - Training:  64%|▋| 2178/3393 [17:12<10:00,  2.02batch/s, Batch Loss=0.0093, Avg Loss=0.1151, Time Left=10.50[A
Epoch 2/3 - Training:  64%|▋| 2179/3393 [17:12<10:03,  2.01batch/s, Batch Loss=0.0093, Avg Loss=0.1151, Time Left=10.50[A
Epoch 2/3 - Training:  64%|▋| 2179/3393 [17:12<10:03,  2.01batch/s, Batch Loss=0.0473, Avg Loss=0.1150, Time Left=10.49[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  65%|▋| 2209/3393 [17:27<09:35,  2.06batch/s, Batch Loss=0.1757, Avg Loss=0.1152, Time Left=10.26[A
Epoch 2/3 - Training:  65%|▋| 2209/3393 [17:27<09:35,  2.06batch/s, Batch Loss=0.2412, Avg Loss=0.1153, Time Left=10.25[A
Epoch 2/3 - Training:  65%|▋| 2210/3393 [17:27<09:30,  2.07batch/s, Batch Loss=0.2412, Avg Loss=0.1153, Time Left=10.25[A
Epoch 2/3 - Training:  65%|▋| 2210/3393 [17:27<09:30,  2.07batch/s, Batch Loss=0.0067, Avg Loss=0.1152, Time Left=10.24[A
Epoch 2/3 - Training:  65%|▋| 2211/3393 [17:27<09:30,  2.07batch/s, Batch Loss=0.0067, Avg Loss=0.1152, Time Left=10.24[A
Epoch 2/3 - Training:  65%|▋| 2211/3393 [17:28<09:30,  2.07batch/s, Batch Loss=0.0961, Avg Loss=0.1152, Time Left=10.23[A
Epoch 2/3 - Training:  65%|▋| 2212/3393 [17:28<09:23,  2.09batch/s, Batch Loss=0.0961, Avg Loss=0.1152, Time Left=10.23[A
Epoch 2/3 - Training:  65%|▋| 2212/3393 [17:28<09:23,  2.09batch/s, Batch Loss=0.0625, Avg Loss=0.1152, Time Left=10.22[A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  66%|▋| 2242/3393 [17:43<09:25,  2.03batch/s, Batch Loss=0.0212, Avg Loss=0.1147, Time Left=9.99 [A
Epoch 2/3 - Training:  66%|▋| 2242/3393 [17:43<09:25,  2.03batch/s, Batch Loss=0.3026, Avg Loss=0.1148, Time Left=9.98 [A
Epoch 2/3 - Training:  66%|▋| 2243/3393 [17:43<09:17,  2.06batch/s, Batch Loss=0.3026, Avg Loss=0.1148, Time Left=9.98 [A
Epoch 2/3 - Training:  66%|▋| 2243/3393 [17:44<09:17,  2.06batch/s, Batch Loss=0.2175, Avg Loss=0.1149, Time Left=9.97 [A
Epoch 2/3 - Training:  66%|▋| 2244/3393 [17:44<09:11,  2.08batch/s, Batch Loss=0.2175, Avg Loss=0.1149, Time Left=9.97 [A
Epoch 2/3 - Training:  66%|▋| 2244/3393 [17:44<09:11,  2.08batch/s, Batch Loss=0.0183, Avg Loss=0.1148, Time Left=9.96 [A
Epoch 2/3 - Training:  66%|▋| 2245/3393 [17:44<09:17,  2.06batch/s, Batch Loss=0.0183, Avg Loss=0.1148, Time Left=9.96 [A
Epoch 2/3 - Training:  66%|▋| 2245/3393 [17:45<09:17,  2.06batch/s, Batch Loss=0.0095, Avg Loss=0.1148, Time Left=9.95 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  67%|▋| 2275/3393 [17:59<08:57,  2.08batch/s, Batch Loss=0.0797, Avg Loss=0.1144, Time Left=9.72 [A
Epoch 2/3 - Training:  67%|▋| 2275/3393 [17:59<08:57,  2.08batch/s, Batch Loss=0.0059, Avg Loss=0.1144, Time Left=9.71 [A
Epoch 2/3 - Training:  67%|▋| 2276/3393 [17:59<08:52,  2.10batch/s, Batch Loss=0.0059, Avg Loss=0.1144, Time Left=9.71 [A
Epoch 2/3 - Training:  67%|▋| 2276/3393 [18:00<08:52,  2.10batch/s, Batch Loss=0.1339, Avg Loss=0.1144, Time Left=9.70 [A
Epoch 2/3 - Training:  67%|▋| 2277/3393 [18:00<08:53,  2.09batch/s, Batch Loss=0.1339, Avg Loss=0.1144, Time Left=9.70 [A
Epoch 2/3 - Training:  67%|▋| 2277/3393 [18:00<08:53,  2.09batch/s, Batch Loss=0.0013, Avg Loss=0.1143, Time Left=9.69 [A
Epoch 2/3 - Training:  67%|▋| 2278/3393 [18:00<08:55,  2.08batch/s, Batch Loss=0.0013, Avg Loss=0.1143, Time Left=9.69 [A
Epoch 2/3 - Training:  67%|▋| 2278/3393 [18:01<08:55,  2.08batch/s, Batch Loss=0.3613, Avg Loss=0.1144, Time Left=9.68 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  68%|▋| 2308/3393 [18:15<08:47,  2.06batch/s, Batch Loss=0.1023, Avg Loss=0.1144, Time Left=9.45 [A
Epoch 2/3 - Training:  68%|▋| 2308/3393 [18:15<08:47,  2.06batch/s, Batch Loss=0.1791, Avg Loss=0.1145, Time Left=9.44 [A
Epoch 2/3 - Training:  68%|▋| 2309/3393 [18:15<08:46,  2.06batch/s, Batch Loss=0.1791, Avg Loss=0.1145, Time Left=9.44 [A
Epoch 2/3 - Training:  68%|▋| 2309/3393 [18:16<08:46,  2.06batch/s, Batch Loss=0.0679, Avg Loss=0.1144, Time Left=9.43 [A
Epoch 2/3 - Training:  68%|▋| 2310/3393 [18:16<08:47,  2.05batch/s, Batch Loss=0.0679, Avg Loss=0.1144, Time Left=9.43 [A
Epoch 2/3 - Training:  68%|▋| 2310/3393 [18:16<08:47,  2.05batch/s, Batch Loss=0.0063, Avg Loss=0.1144, Time Left=9.42 [A
Epoch 2/3 - Training:  68%|▋| 2311/3393 [18:16<08:43,  2.07batch/s, Batch Loss=0.0063, Avg Loss=0.1144, Time Left=9.42 [A
Epoch 2/3 - Training:  68%|▋| 2311/3393 [18:17<08:43,  2.07batch/s, Batch Loss=0.0102, Avg Loss=0.1144, Time Left=9.41 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  69%|▋| 2341/3393 [18:31<08:26,  2.08batch/s, Batch Loss=0.0682, Avg Loss=0.1140, Time Left=9.18 [A
Epoch 2/3 - Training:  69%|▋| 2341/3393 [18:31<08:26,  2.08batch/s, Batch Loss=0.4494, Avg Loss=0.1141, Time Left=9.17 [A
Epoch 2/3 - Training:  69%|▋| 2342/3393 [18:31<08:31,  2.05batch/s, Batch Loss=0.4494, Avg Loss=0.1141, Time Left=9.17 [A
Epoch 2/3 - Training:  69%|▋| 2342/3393 [18:32<08:31,  2.05batch/s, Batch Loss=0.0014, Avg Loss=0.1141, Time Left=9.16 [A
Epoch 2/3 - Training:  69%|▋| 2343/3393 [18:32<08:34,  2.04batch/s, Batch Loss=0.0014, Avg Loss=0.1141, Time Left=9.16 [A
Epoch 2/3 - Training:  69%|▋| 2343/3393 [18:32<08:34,  2.04batch/s, Batch Loss=0.1542, Avg Loss=0.1141, Time Left=9.15 [A
Epoch 2/3 - Training:  69%|▋| 2344/3393 [18:32<08:33,  2.04batch/s, Batch Loss=0.1542, Avg Loss=0.1141, Time Left=9.15 [A
Epoch 2/3 - Training:  69%|▋| 2344/3393 [18:33<08:33,  2.04batch/s, Batch Loss=0.1148, Avg Loss=0.1141, Time Left=9.14 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  70%|▋| 2374/3393 [18:47<08:07,  2.09batch/s, Batch Loss=0.2079, Avg Loss=0.1139, Time Left=8.90 [A
Epoch 2/3 - Training:  70%|▋| 2374/3393 [18:47<08:07,  2.09batch/s, Batch Loss=0.0118, Avg Loss=0.1138, Time Left=8.90 [A
Epoch 2/3 - Training:  70%|▋| 2375/3393 [18:47<08:13,  2.06batch/s, Batch Loss=0.0118, Avg Loss=0.1138, Time Left=8.90 [A
Epoch 2/3 - Training:  70%|▋| 2375/3393 [18:48<08:13,  2.06batch/s, Batch Loss=0.2403, Avg Loss=0.1139, Time Left=8.89 [A
Epoch 2/3 - Training:  70%|▋| 2376/3393 [18:48<08:20,  2.03batch/s, Batch Loss=0.2403, Avg Loss=0.1139, Time Left=8.89 [A
Epoch 2/3 - Training:  70%|▋| 2376/3393 [18:48<08:20,  2.03batch/s, Batch Loss=0.1497, Avg Loss=0.1139, Time Left=8.88 [A
Epoch 2/3 - Training:  70%|▋| 2377/3393 [18:48<08:19,  2.03batch/s, Batch Loss=0.1497, Avg Loss=0.1139, Time Left=8.88 [A
Epoch 2/3 - Training:  70%|▋| 2377/3393 [18:49<08:19,  2.03batch/s, Batch Loss=0.1196, Avg Loss=0.1139, Time Left=8.87 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  71%|▋| 2407/3393 [19:03<08:00,  2.05batch/s, Batch Loss=0.1561, Avg Loss=0.1135, Time Left=8.63 [A
Epoch 2/3 - Training:  71%|▋| 2407/3393 [19:03<08:00,  2.05batch/s, Batch Loss=0.1344, Avg Loss=0.1135, Time Left=8.63 [A
Epoch 2/3 - Training:  71%|▋| 2408/3393 [19:03<07:59,  2.05batch/s, Batch Loss=0.1344, Avg Loss=0.1135, Time Left=8.63 [A
Epoch 2/3 - Training:  71%|▋| 2408/3393 [19:04<07:59,  2.05batch/s, Batch Loss=0.2065, Avg Loss=0.1135, Time Left=8.62 [A
Epoch 2/3 - Training:  71%|▋| 2409/3393 [19:04<07:57,  2.06batch/s, Batch Loss=0.2065, Avg Loss=0.1135, Time Left=8.62 [A
Epoch 2/3 - Training:  71%|▋| 2409/3393 [19:04<07:57,  2.06batch/s, Batch Loss=0.1185, Avg Loss=0.1135, Time Left=8.61 [A
Epoch 2/3 - Training:  71%|▋| 2410/3393 [19:04<08:01,  2.04batch/s, Batch Loss=0.1185, Avg Loss=0.1135, Time Left=8.61 [A
Epoch 2/3 - Training:  71%|▋| 2410/3393 [19:05<08:01,  2.04batch/s, Batch Loss=0.0050, Avg Loss=0.1135, Time Left=8.60 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  72%|▋| 2440/3393 [19:19<07:49,  2.03batch/s, Batch Loss=0.0379, Avg Loss=0.1130, Time Left=8.36 [A
Epoch 2/3 - Training:  72%|▋| 2440/3393 [19:19<07:49,  2.03batch/s, Batch Loss=0.0502, Avg Loss=0.1130, Time Left=8.36 [A
Epoch 2/3 - Training:  72%|▋| 2441/3393 [19:19<07:40,  2.07batch/s, Batch Loss=0.0502, Avg Loss=0.1130, Time Left=8.36 [A
Epoch 2/3 - Training:  72%|▋| 2441/3393 [19:20<07:40,  2.07batch/s, Batch Loss=0.1566, Avg Loss=0.1130, Time Left=8.35 [A
Epoch 2/3 - Training:  72%|▋| 2442/3393 [19:20<07:49,  2.03batch/s, Batch Loss=0.1566, Avg Loss=0.1130, Time Left=8.35 [A
Epoch 2/3 - Training:  72%|▋| 2442/3393 [19:20<07:49,  2.03batch/s, Batch Loss=0.0240, Avg Loss=0.1130, Time Left=8.34 [A
Epoch 2/3 - Training:  72%|▋| 2443/3393 [19:20<07:37,  2.08batch/s, Batch Loss=0.0240, Avg Loss=0.1130, Time Left=8.34 [A
Epoch 2/3 - Training:  72%|▋| 2443/3393 [19:21<07:37,  2.08batch/s, Batch Loss=0.1181, Avg Loss=0.1130, Time Left=8.33 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  73%|▋| 2473/3393 [19:35<07:23,  2.08batch/s, Batch Loss=0.4240, Avg Loss=0.1134, Time Left=8.09 [A
Epoch 2/3 - Training:  73%|▋| 2473/3393 [19:35<07:23,  2.08batch/s, Batch Loss=0.0036, Avg Loss=0.1133, Time Left=8.09 [A
Epoch 2/3 - Training:  73%|▋| 2474/3393 [19:35<07:19,  2.09batch/s, Batch Loss=0.0036, Avg Loss=0.1133, Time Left=8.09 [A
Epoch 2/3 - Training:  73%|▋| 2474/3393 [19:36<07:19,  2.09batch/s, Batch Loss=0.4143, Avg Loss=0.1135, Time Left=8.08 [A
Epoch 2/3 - Training:  73%|▋| 2475/3393 [19:36<07:15,  2.11batch/s, Batch Loss=0.4143, Avg Loss=0.1135, Time Left=8.08 [A
Epoch 2/3 - Training:  73%|▋| 2475/3393 [19:36<07:15,  2.11batch/s, Batch Loss=0.0568, Avg Loss=0.1134, Time Left=8.07 [A
Epoch 2/3 - Training:  73%|▋| 2476/3393 [19:36<07:12,  2.12batch/s, Batch Loss=0.0568, Avg Loss=0.1134, Time Left=8.07 [A
Epoch 2/3 - Training:  73%|▋| 2476/3393 [19:37<07:12,  2.12batch/s, Batch Loss=0.0457, Avg Loss=0.1134, Time Left=8.06 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  74%|▋| 2506/3393 [19:51<07:10,  2.06batch/s, Batch Loss=0.0142, Avg Loss=0.1134, Time Left=7.82 [A
Epoch 2/3 - Training:  74%|▋| 2506/3393 [19:52<07:10,  2.06batch/s, Batch Loss=0.0683, Avg Loss=0.1134, Time Left=7.82 [A
Epoch 2/3 - Training:  74%|▋| 2507/3393 [19:52<07:23,  2.00batch/s, Batch Loss=0.0683, Avg Loss=0.1134, Time Left=7.82 [A
Epoch 2/3 - Training:  74%|▋| 2507/3393 [19:52<07:23,  2.00batch/s, Batch Loss=0.0100, Avg Loss=0.1134, Time Left=7.81 [A
Epoch 2/3 - Training:  74%|▋| 2508/3393 [19:52<07:41,  1.92batch/s, Batch Loss=0.0100, Avg Loss=0.1134, Time Left=7.81 [A
Epoch 2/3 - Training:  74%|▋| 2508/3393 [19:53<07:41,  1.92batch/s, Batch Loss=0.1746, Avg Loss=0.1134, Time Left=7.80 [A
Epoch 2/3 - Training:  74%|▋| 2509/3393 [19:53<07:33,  1.95batch/s, Batch Loss=0.1746, Avg Loss=0.1134, Time Left=7.80 [A
Epoch 2/3 - Training:  74%|▋| 2509/3393 [19:53<07:33,  1.95batch/s, Batch Loss=0.0290, Avg Loss=0.1134, Time Left=7.79 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  75%|▋| 2539/3393 [20:07<06:48,  2.09batch/s, Batch Loss=0.0297, Avg Loss=0.1135, Time Left=7.56 [A
Epoch 2/3 - Training:  75%|▋| 2539/3393 [20:08<06:48,  2.09batch/s, Batch Loss=0.1198, Avg Loss=0.1135, Time Left=7.55 [A
Epoch 2/3 - Training:  75%|▋| 2540/3393 [20:08<06:52,  2.07batch/s, Batch Loss=0.1198, Avg Loss=0.1135, Time Left=7.55 [A
Epoch 2/3 - Training:  75%|▋| 2540/3393 [20:08<06:52,  2.07batch/s, Batch Loss=0.1610, Avg Loss=0.1135, Time Left=7.54 [A
Epoch 2/3 - Training:  75%|▋| 2541/3393 [20:08<06:52,  2.06batch/s, Batch Loss=0.1610, Avg Loss=0.1135, Time Left=7.54 [A
Epoch 2/3 - Training:  75%|▋| 2541/3393 [20:09<06:52,  2.06batch/s, Batch Loss=0.0350, Avg Loss=0.1135, Time Left=7.53 [A
Epoch 2/3 - Training:  75%|▋| 2542/3393 [20:09<07:00,  2.02batch/s, Batch Loss=0.0350, Avg Loss=0.1135, Time Left=7.53 [A
Epoch 2/3 - Training:  75%|▋| 2542/3393 [20:09<07:00,  2.02batch/s, Batch Loss=0.1323, Avg Loss=0.1135, Time Left=7.52 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  76%|▊| 2572/3393 [20:23<06:53,  1.98batch/s, Batch Loss=0.0875, Avg Loss=0.1137, Time Left=7.29 [A
Epoch 2/3 - Training:  76%|▊| 2572/3393 [20:24<06:53,  1.98batch/s, Batch Loss=0.1186, Avg Loss=0.1137, Time Left=7.28 [A
Epoch 2/3 - Training:  76%|▊| 2573/3393 [20:24<06:52,  1.99batch/s, Batch Loss=0.1186, Avg Loss=0.1137, Time Left=7.28 [A
Epoch 2/3 - Training:  76%|▊| 2573/3393 [20:24<06:52,  1.99batch/s, Batch Loss=0.1233, Avg Loss=0.1137, Time Left=7.27 [A
Epoch 2/3 - Training:  76%|▊| 2574/3393 [20:24<06:43,  2.03batch/s, Batch Loss=0.1233, Avg Loss=0.1137, Time Left=7.27 [A
Epoch 2/3 - Training:  76%|▊| 2574/3393 [20:25<06:43,  2.03batch/s, Batch Loss=0.0710, Avg Loss=0.1137, Time Left=7.26 [A
Epoch 2/3 - Training:  76%|▊| 2575/3393 [20:25<06:44,  2.02batch/s, Batch Loss=0.0710, Avg Loss=0.1137, Time Left=7.26 [A
Epoch 2/3 - Training:  76%|▊| 2575/3393 [20:25<06:44,  2.02batch/s, Batch Loss=0.0436, Avg Loss=0.1136, Time Left=7.25 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  77%|▊| 2605/3393 [20:40<06:17,  2.09batch/s, Batch Loss=0.0057, Avg Loss=0.1133, Time Left=7.02 [A
Epoch 2/3 - Training:  77%|▊| 2605/3393 [20:40<06:17,  2.09batch/s, Batch Loss=0.0165, Avg Loss=0.1132, Time Left=7.01 [A
Epoch 2/3 - Training:  77%|▊| 2606/3393 [20:40<06:18,  2.08batch/s, Batch Loss=0.0165, Avg Loss=0.1132, Time Left=7.01 [A
Epoch 2/3 - Training:  77%|▊| 2606/3393 [20:40<06:18,  2.08batch/s, Batch Loss=0.1143, Avg Loss=0.1132, Time Left=7.00 [A
Epoch 2/3 - Training:  77%|▊| 2607/3393 [20:40<06:14,  2.10batch/s, Batch Loss=0.1143, Avg Loss=0.1132, Time Left=7.00 [A
Epoch 2/3 - Training:  77%|▊| 2607/3393 [20:41<06:14,  2.10batch/s, Batch Loss=0.0195, Avg Loss=0.1132, Time Left=6.99 [A
Epoch 2/3 - Training:  77%|▊| 2608/3393 [20:41<06:14,  2.10batch/s, Batch Loss=0.0195, Avg Loss=0.1132, Time Left=6.99 [A
Epoch 2/3 - Training:  77%|▊| 2608/3393 [20:41<06:14,  2.10batch/s, Batch Loss=0.0246, Avg Loss=0.1132, Time Left=6.98 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  78%|▊| 2638/3393 [20:56<06:20,  1.98batch/s, Batch Loss=0.2209, Avg Loss=0.1133, Time Left=6.75 [A
Epoch 2/3 - Training:  78%|▊| 2638/3393 [20:56<06:20,  1.98batch/s, Batch Loss=0.0185, Avg Loss=0.1133, Time Left=6.74 [A
Epoch 2/3 - Training:  78%|▊| 2639/3393 [20:56<06:16,  2.00batch/s, Batch Loss=0.0185, Avg Loss=0.1133, Time Left=6.74 [A
Epoch 2/3 - Training:  78%|▊| 2639/3393 [20:57<06:16,  2.00batch/s, Batch Loss=0.0487, Avg Loss=0.1133, Time Left=6.73 [A
Epoch 2/3 - Training:  78%|▊| 2640/3393 [20:57<06:12,  2.02batch/s, Batch Loss=0.0487, Avg Loss=0.1133, Time Left=6.73 [A
Epoch 2/3 - Training:  78%|▊| 2640/3393 [20:57<06:12,  2.02batch/s, Batch Loss=0.0475, Avg Loss=0.1132, Time Left=6.72 [A
Epoch 2/3 - Training:  78%|▊| 2641/3393 [20:57<06:06,  2.05batch/s, Batch Loss=0.0475, Avg Loss=0.1132, Time Left=6.72 [A
Epoch 2/3 - Training:  78%|▊| 2641/3393 [20:58<06:06,  2.05batch/s, Batch Loss=0.0154, Avg Loss=0.1132, Time Left=6.71 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  79%|▊| 2671/3393 [21:11<05:49,  2.07batch/s, Batch Loss=0.0767, Avg Loss=0.1132, Time Left=6.48 [A
Epoch 2/3 - Training:  79%|▊| 2671/3393 [21:12<05:49,  2.07batch/s, Batch Loss=0.2942, Avg Loss=0.1132, Time Left=6.47 [A
Epoch 2/3 - Training:  79%|▊| 2672/3393 [21:12<05:59,  2.01batch/s, Batch Loss=0.2942, Avg Loss=0.1132, Time Left=6.47 [A
Epoch 2/3 - Training:  79%|▊| 2672/3393 [21:12<05:59,  2.01batch/s, Batch Loss=0.0299, Avg Loss=0.1132, Time Left=6.46 [A
Epoch 2/3 - Training:  79%|▊| 2673/3393 [21:12<05:56,  2.02batch/s, Batch Loss=0.0299, Avg Loss=0.1132, Time Left=6.46 [A
Epoch 2/3 - Training:  79%|▊| 2673/3393 [21:13<05:56,  2.02batch/s, Batch Loss=0.1218, Avg Loss=0.1132, Time Left=6.45 [A
Epoch 2/3 - Training:  79%|▊| 2674/3393 [21:13<05:53,  2.03batch/s, Batch Loss=0.1218, Avg Loss=0.1132, Time Left=6.45 [A
Epoch 2/3 - Training:  79%|▊| 2674/3393 [21:13<05:53,  2.03batch/s, Batch Loss=0.1330, Avg Loss=0.1132, Time Left=6.44 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  80%|▊| 2704/3393 [21:27<05:29,  2.09batch/s, Batch Loss=0.0029, Avg Loss=0.1126, Time Left=6.21 [A
Epoch 2/3 - Training:  80%|▊| 2704/3393 [21:28<05:29,  2.09batch/s, Batch Loss=0.0667, Avg Loss=0.1126, Time Left=6.20 [A
Epoch 2/3 - Training:  80%|▊| 2705/3393 [21:28<05:29,  2.09batch/s, Batch Loss=0.0667, Avg Loss=0.1126, Time Left=6.20 [A
Epoch 2/3 - Training:  80%|▊| 2705/3393 [21:28<05:29,  2.09batch/s, Batch Loss=0.1463, Avg Loss=0.1126, Time Left=6.19 [A
Epoch 2/3 - Training:  80%|▊| 2706/3393 [21:28<05:30,  2.08batch/s, Batch Loss=0.1463, Avg Loss=0.1126, Time Left=6.19 [A
Epoch 2/3 - Training:  80%|▊| 2706/3393 [21:29<05:30,  2.08batch/s, Batch Loss=0.0031, Avg Loss=0.1125, Time Left=6.18 [A
Epoch 2/3 - Training:  80%|▊| 2707/3393 [21:29<05:27,  2.10batch/s, Batch Loss=0.0031, Avg Loss=0.1125, Time Left=6.18 [A
Epoch 2/3 - Training:  80%|▊| 2707/3393 [21:29<05:27,  2.10batch/s, Batch Loss=0.2236, Avg Loss=0.1126, Time Left=6.17 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  81%|▊| 2737/3393 [21:43<05:10,  2.11batch/s, Batch Loss=0.0467, Avg Loss=0.1121, Time Left=5.94 [A
Epoch 2/3 - Training:  81%|▊| 2737/3393 [21:44<05:10,  2.11batch/s, Batch Loss=0.0029, Avg Loss=0.1121, Time Left=5.93 [A
Epoch 2/3 - Training:  81%|▊| 2738/3393 [21:44<05:21,  2.04batch/s, Batch Loss=0.0029, Avg Loss=0.1121, Time Left=5.93 [A
Epoch 2/3 - Training:  81%|▊| 2738/3393 [21:44<05:21,  2.04batch/s, Batch Loss=0.0681, Avg Loss=0.1121, Time Left=5.92 [A
Epoch 2/3 - Training:  81%|▊| 2739/3393 [21:44<05:16,  2.07batch/s, Batch Loss=0.0681, Avg Loss=0.1121, Time Left=5.92 [A
Epoch 2/3 - Training:  81%|▊| 2739/3393 [21:45<05:16,  2.07batch/s, Batch Loss=0.0366, Avg Loss=0.1121, Time Left=5.91 [A
Epoch 2/3 - Training:  81%|▊| 2740/3393 [21:45<05:22,  2.03batch/s, Batch Loss=0.0366, Avg Loss=0.1121, Time Left=5.91 [A
Epoch 2/3 - Training:  81%|▊| 2740/3393 [21:45<05:22,  2.03batch/s, Batch Loss=0.5951, Avg Loss=0.1122, Time Left=5.90 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  82%|▊| 2770/3393 [21:59<04:59,  2.08batch/s, Batch Loss=0.1058, Avg Loss=0.1127, Time Left=5.67 [A
Epoch 2/3 - Training:  82%|▊| 2770/3393 [22:00<04:59,  2.08batch/s, Batch Loss=0.0706, Avg Loss=0.1127, Time Left=5.66 [A
Epoch 2/3 - Training:  82%|▊| 2771/3393 [22:00<04:57,  2.09batch/s, Batch Loss=0.0706, Avg Loss=0.1127, Time Left=5.66 [A
Epoch 2/3 - Training:  82%|▊| 2771/3393 [22:00<04:57,  2.09batch/s, Batch Loss=0.0399, Avg Loss=0.1127, Time Left=5.65 [A
Epoch 2/3 - Training:  82%|▊| 2772/3393 [22:00<04:57,  2.09batch/s, Batch Loss=0.0399, Avg Loss=0.1127, Time Left=5.65 [A
Epoch 2/3 - Training:  82%|▊| 2772/3393 [22:01<04:57,  2.09batch/s, Batch Loss=0.0274, Avg Loss=0.1126, Time Left=5.64 [A
Epoch 2/3 - Training:  82%|▊| 2773/3393 [22:01<04:54,  2.10batch/s, Batch Loss=0.0274, Avg Loss=0.1126, Time Left=5.64 [A
Epoch 2/3 - Training:  82%|▊| 2773/3393 [22:01<04:54,  2.10batch/s, Batch Loss=0.0575, Avg Loss=0.1126, Time Left=5.63 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  83%|▊| 2803/3393 [22:15<04:43,  2.08batch/s, Batch Loss=0.3459, Avg Loss=0.1127, Time Left=5.40 [A
Epoch 2/3 - Training:  83%|▊| 2803/3393 [22:16<04:43,  2.08batch/s, Batch Loss=0.2816, Avg Loss=0.1127, Time Left=5.39 [A
Epoch 2/3 - Training:  83%|▊| 2804/3393 [22:16<04:46,  2.06batch/s, Batch Loss=0.2816, Avg Loss=0.1127, Time Left=5.39 [A
Epoch 2/3 - Training:  83%|▊| 2804/3393 [22:16<04:46,  2.06batch/s, Batch Loss=0.0731, Avg Loss=0.1127, Time Left=5.38 [A
Epoch 2/3 - Training:  83%|▊| 2805/3393 [22:16<04:45,  2.06batch/s, Batch Loss=0.0731, Avg Loss=0.1127, Time Left=5.38 [A
Epoch 2/3 - Training:  83%|▊| 2805/3393 [22:17<04:45,  2.06batch/s, Batch Loss=0.1376, Avg Loss=0.1127, Time Left=5.37 [A
Epoch 2/3 - Training:  83%|▊| 2806/3393 [22:17<04:41,  2.08batch/s, Batch Loss=0.1376, Avg Loss=0.1127, Time Left=5.37 [A
Epoch 2/3 - Training:  83%|▊| 2806/3393 [22:17<04:41,  2.08batch/s, Batch Loss=0.1114, Avg Loss=0.1127, Time Left=5.36 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  84%|▊| 2836/3393 [22:31<04:35,  2.02batch/s, Batch Loss=0.0265, Avg Loss=0.1126, Time Left=5.13 [A
Epoch 2/3 - Training:  84%|▊| 2836/3393 [22:32<04:35,  2.02batch/s, Batch Loss=0.0100, Avg Loss=0.1126, Time Left=5.12 [A
Epoch 2/3 - Training:  84%|▊| 2837/3393 [22:32<04:37,  2.00batch/s, Batch Loss=0.0100, Avg Loss=0.1126, Time Left=5.12 [A
Epoch 2/3 - Training:  84%|▊| 2837/3393 [22:32<04:37,  2.00batch/s, Batch Loss=0.1986, Avg Loss=0.1126, Time Left=5.11 [A
Epoch 2/3 - Training:  84%|▊| 2838/3393 [22:32<04:31,  2.04batch/s, Batch Loss=0.1986, Avg Loss=0.1126, Time Left=5.11 [A
Epoch 2/3 - Training:  84%|▊| 2838/3393 [22:33<04:31,  2.04batch/s, Batch Loss=0.0667, Avg Loss=0.1126, Time Left=5.10 [A
Epoch 2/3 - Training:  84%|▊| 2839/3393 [22:33<04:31,  2.04batch/s, Batch Loss=0.0667, Avg Loss=0.1126, Time Left=5.10 [A
Epoch 2/3 - Training:  84%|▊| 2839/3393 [22:33<04:31,  2.04batch/s, Batch Loss=0.0231, Avg Loss=0.1126, Time Left=5.09 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  85%|▊| 2869/3393 [22:48<04:17,  2.03batch/s, Batch Loss=0.0276, Avg Loss=0.1123, Time Left=4.86 [A
Epoch 2/3 - Training:  85%|▊| 2869/3393 [22:48<04:17,  2.03batch/s, Batch Loss=0.0285, Avg Loss=0.1123, Time Left=4.85 [A
Epoch 2/3 - Training:  85%|▊| 2870/3393 [22:48<04:13,  2.06batch/s, Batch Loss=0.0285, Avg Loss=0.1123, Time Left=4.85 [A
Epoch 2/3 - Training:  85%|▊| 2870/3393 [22:49<04:13,  2.06batch/s, Batch Loss=0.0773, Avg Loss=0.1123, Time Left=4.84 [A
Epoch 2/3 - Training:  85%|▊| 2871/3393 [22:49<04:12,  2.06batch/s, Batch Loss=0.0773, Avg Loss=0.1123, Time Left=4.84 [A
Epoch 2/3 - Training:  85%|▊| 2871/3393 [22:49<04:12,  2.06batch/s, Batch Loss=0.1497, Avg Loss=0.1123, Time Left=4.83 [A
Epoch 2/3 - Training:  85%|▊| 2872/3393 [22:49<04:16,  2.03batch/s, Batch Loss=0.1497, Avg Loss=0.1123, Time Left=4.83 [A
Epoch 2/3 - Training:  85%|▊| 2872/3393 [22:50<04:16,  2.03batch/s, Batch Loss=0.0408, Avg Loss=0.1123, Time Left=4.83 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  86%|▊| 2902/3393 [23:04<03:55,  2.08batch/s, Batch Loss=0.0350, Avg Loss=0.1120, Time Left=4.59 [A
Epoch 2/3 - Training:  86%|▊| 2902/3393 [23:04<03:55,  2.08batch/s, Batch Loss=0.2116, Avg Loss=0.1120, Time Left=4.58 [A
Epoch 2/3 - Training:  86%|▊| 2903/3393 [23:04<03:55,  2.08batch/s, Batch Loss=0.2116, Avg Loss=0.1120, Time Left=4.58 [A
Epoch 2/3 - Training:  86%|▊| 2903/3393 [23:05<03:55,  2.08batch/s, Batch Loss=0.0524, Avg Loss=0.1120, Time Left=4.57 [A
Epoch 2/3 - Training:  86%|▊| 2904/3393 [23:05<03:53,  2.10batch/s, Batch Loss=0.0524, Avg Loss=0.1120, Time Left=4.57 [A
Epoch 2/3 - Training:  86%|▊| 2904/3393 [23:05<03:53,  2.10batch/s, Batch Loss=0.1421, Avg Loss=0.1120, Time Left=4.56 [A
Epoch 2/3 - Training:  86%|▊| 2905/3393 [23:05<03:51,  2.11batch/s, Batch Loss=0.1421, Avg Loss=0.1120, Time Left=4.56 [A
Epoch 2/3 - Training:  86%|▊| 2905/3393 [23:06<03:51,  2.11batch/s, Batch Loss=0.1210, Avg Loss=0.1120, Time Left=4.56 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  87%|▊| 2935/3393 [23:20<03:44,  2.04batch/s, Batch Loss=0.0068, Avg Loss=0.1115, Time Left=4.32 [A
Epoch 2/3 - Training:  87%|▊| 2935/3393 [23:20<03:44,  2.04batch/s, Batch Loss=0.0082, Avg Loss=0.1115, Time Left=4.31 [A
Epoch 2/3 - Training:  87%|▊| 2936/3393 [23:20<03:43,  2.05batch/s, Batch Loss=0.0082, Avg Loss=0.1115, Time Left=4.31 [A
Epoch 2/3 - Training:  87%|▊| 2936/3393 [23:21<03:43,  2.05batch/s, Batch Loss=0.2667, Avg Loss=0.1116, Time Left=4.30 [A
Epoch 2/3 - Training:  87%|▊| 2937/3393 [23:21<03:44,  2.03batch/s, Batch Loss=0.2667, Avg Loss=0.1116, Time Left=4.30 [A
Epoch 2/3 - Training:  87%|▊| 2937/3393 [23:21<03:44,  2.03batch/s, Batch Loss=0.0018, Avg Loss=0.1115, Time Left=4.30 [A
Epoch 2/3 - Training:  87%|▊| 2938/3393 [23:21<03:40,  2.06batch/s, Batch Loss=0.0018, Avg Loss=0.1115, Time Left=4.30 [A
Epoch 2/3 - Training:  87%|▊| 2938/3393 [23:22<03:40,  2.06batch/s, Batch Loss=0.2187, Avg Loss=0.1116, Time Left=4.29 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  87%|▊| 2968/3393 [23:36<03:28,  2.04batch/s, Batch Loss=0.0085, Avg Loss=0.1112, Time Left=4.05 [A
Epoch 2/3 - Training:  87%|▊| 2968/3393 [23:36<03:28,  2.04batch/s, Batch Loss=0.1040, Avg Loss=0.1112, Time Left=4.04 [A
Epoch 2/3 - Training:  88%|▉| 2969/3393 [23:36<03:23,  2.09batch/s, Batch Loss=0.1040, Avg Loss=0.1112, Time Left=4.04 [A
Epoch 2/3 - Training:  88%|▉| 2969/3393 [23:36<03:23,  2.09batch/s, Batch Loss=0.0320, Avg Loss=0.1112, Time Left=4.03 [A
Epoch 2/3 - Training:  88%|▉| 2970/3393 [23:36<03:24,  2.06batch/s, Batch Loss=0.0320, Avg Loss=0.1112, Time Left=4.03 [A
Epoch 2/3 - Training:  88%|▉| 2970/3393 [23:37<03:24,  2.06batch/s, Batch Loss=0.1259, Avg Loss=0.1112, Time Left=4.03 [A
Epoch 2/3 - Training:  88%|▉| 2971/3393 [23:37<03:22,  2.09batch/s, Batch Loss=0.1259, Avg Loss=0.1112, Time Left=4.03 [A
Epoch 2/3 - Training:  88%|▉| 2971/3393 [23:37<03:22,  2.09batch/s, Batch Loss=0.0871, Avg Loss=0.1112, Time Left=4.02 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  88%|▉| 3001/3393 [23:51<03:06,  2.10batch/s, Batch Loss=0.0147, Avg Loss=0.1114, Time Left=3.78 [A
Epoch 2/3 - Training:  88%|▉| 3001/3393 [23:52<03:06,  2.10batch/s, Batch Loss=0.0995, Avg Loss=0.1114, Time Left=3.77 [A
Epoch 2/3 - Training:  88%|▉| 3002/3393 [23:52<03:11,  2.04batch/s, Batch Loss=0.0995, Avg Loss=0.1114, Time Left=3.77 [A
Epoch 2/3 - Training:  88%|▉| 3002/3393 [23:52<03:11,  2.04batch/s, Batch Loss=0.0934, Avg Loss=0.1114, Time Left=3.76 [A
Epoch 2/3 - Training:  89%|▉| 3003/3393 [23:52<03:11,  2.04batch/s, Batch Loss=0.0934, Avg Loss=0.1114, Time Left=3.76 [A
Epoch 2/3 - Training:  89%|▉| 3003/3393 [23:53<03:11,  2.04batch/s, Batch Loss=0.0945, Avg Loss=0.1114, Time Left=3.76 [A
Epoch 2/3 - Training:  89%|▉| 3004/3393 [23:53<03:10,  2.05batch/s, Batch Loss=0.0945, Avg Loss=0.1114, Time Left=3.76 [A
Epoch 2/3 - Training:  89%|▉| 3004/3393 [23:53<03:10,  2.05batch/s, Batch Loss=0.1846, Avg Loss=0.1114, Time Left=3.75 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  89%|▉| 3034/3393 [24:07<02:52,  2.08batch/s, Batch Loss=0.0043, Avg Loss=0.1111, Time Left=3.51 [A
Epoch 2/3 - Training:  89%|▉| 3034/3393 [24:08<02:52,  2.08batch/s, Batch Loss=0.0739, Avg Loss=0.1111, Time Left=3.50 [A
Epoch 2/3 - Training:  89%|▉| 3035/3393 [24:08<02:50,  2.10batch/s, Batch Loss=0.0739, Avg Loss=0.1111, Time Left=3.50 [A
Epoch 2/3 - Training:  89%|▉| 3035/3393 [24:08<02:50,  2.10batch/s, Batch Loss=0.1039, Avg Loss=0.1111, Time Left=3.50 [A
Epoch 2/3 - Training:  89%|▉| 3036/3393 [24:08<02:50,  2.09batch/s, Batch Loss=0.1039, Avg Loss=0.1111, Time Left=3.50 [A
Epoch 2/3 - Training:  89%|▉| 3036/3393 [24:09<02:50,  2.09batch/s, Batch Loss=0.0355, Avg Loss=0.1111, Time Left=3.49 [A
Epoch 2/3 - Training:  90%|▉| 3037/3393 [24:09<02:50,  2.08batch/s, Batch Loss=0.0355, Avg Loss=0.1111, Time Left=3.49 [A
Epoch 2/3 - Training:  90%|▉| 3037/3393 [24:09<02:50,  2.08batch/s, Batch Loss=0.0135, Avg Loss=0.1111, Time Left=3.48 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  90%|▉| 3067/3393 [24:24<02:35,  2.09batch/s, Batch Loss=0.3536, Avg Loss=0.1112, Time Left=3.24 [A
Epoch 2/3 - Training:  90%|▉| 3067/3393 [24:24<02:35,  2.09batch/s, Batch Loss=0.0276, Avg Loss=0.1111, Time Left=3.23 [A
Epoch 2/3 - Training:  90%|▉| 3068/3393 [24:24<02:37,  2.07batch/s, Batch Loss=0.0276, Avg Loss=0.1111, Time Left=3.23 [A
Epoch 2/3 - Training:  90%|▉| 3068/3393 [24:24<02:37,  2.07batch/s, Batch Loss=0.1303, Avg Loss=0.1112, Time Left=3.23 [A
Epoch 2/3 - Training:  90%|▉| 3069/3393 [24:24<02:36,  2.07batch/s, Batch Loss=0.1303, Avg Loss=0.1112, Time Left=3.23 [A
Epoch 2/3 - Training:  90%|▉| 3069/3393 [24:25<02:36,  2.07batch/s, Batch Loss=0.0570, Avg Loss=0.1111, Time Left=3.22 [A
Epoch 2/3 - Training:  90%|▉| 3070/3393 [24:25<02:34,  2.09batch/s, Batch Loss=0.0570, Avg Loss=0.1111, Time Left=3.22 [A
Epoch 2/3 - Training:  90%|▉| 3070/3393 [24:25<02:34,  2.09batch/s, Batch Loss=0.0487, Avg Loss=0.1111, Time Left=3.21 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  91%|▉| 3100/3393 [24:39<02:22,  2.06batch/s, Batch Loss=0.0744, Avg Loss=0.1109, Time Left=2.97 [A
Epoch 2/3 - Training:  91%|▉| 3100/3393 [24:40<02:22,  2.06batch/s, Batch Loss=0.2027, Avg Loss=0.1109, Time Left=2.96 [A
Epoch 2/3 - Training:  91%|▉| 3101/3393 [24:40<02:24,  2.02batch/s, Batch Loss=0.2027, Avg Loss=0.1109, Time Left=2.96 [A
Epoch 2/3 - Training:  91%|▉| 3101/3393 [24:40<02:24,  2.02batch/s, Batch Loss=0.1465, Avg Loss=0.1109, Time Left=2.96 [A
Epoch 2/3 - Training:  91%|▉| 3102/3393 [24:40<02:23,  2.03batch/s, Batch Loss=0.1465, Avg Loss=0.1109, Time Left=2.96 [A
Epoch 2/3 - Training:  91%|▉| 3102/3393 [24:41<02:23,  2.03batch/s, Batch Loss=0.0102, Avg Loss=0.1109, Time Left=2.95 [A
Epoch 2/3 - Training:  91%|▉| 3103/3393 [24:41<02:23,  2.02batch/s, Batch Loss=0.0102, Avg Loss=0.1109, Time Left=2.95 [A
Epoch 2/3 - Training:  91%|▉| 3103/3393 [24:41<02:23,  2.02batch/s, Batch Loss=0.1255, Avg Loss=0.1109, Time Left=2.94 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  92%|▉| 3133/3393 [24:55<02:10,  1.99batch/s, Batch Loss=0.0201, Avg Loss=0.1108, Time Left=2.70 [A
Epoch 2/3 - Training:  92%|▉| 3133/3393 [24:56<02:10,  1.99batch/s, Batch Loss=0.1049, Avg Loss=0.1108, Time Left=2.70 [A
Epoch 2/3 - Training:  92%|▉| 3134/3393 [24:56<02:10,  1.99batch/s, Batch Loss=0.1049, Avg Loss=0.1108, Time Left=2.70 [A
Epoch 2/3 - Training:  92%|▉| 3134/3393 [24:56<02:10,  1.99batch/s, Batch Loss=0.0626, Avg Loss=0.1108, Time Left=2.69 [A
Epoch 2/3 - Training:  92%|▉| 3135/3393 [24:56<02:08,  2.01batch/s, Batch Loss=0.0626, Avg Loss=0.1108, Time Left=2.69 [A
Epoch 2/3 - Training:  92%|▉| 3135/3393 [24:57<02:08,  2.01batch/s, Batch Loss=0.0581, Avg Loss=0.1108, Time Left=2.68 [A
Epoch 2/3 - Training:  92%|▉| 3136/3393 [24:57<02:05,  2.05batch/s, Batch Loss=0.0581, Avg Loss=0.1108, Time Left=2.68 [A
Epoch 2/3 - Training:  92%|▉| 3136/3393 [24:57<02:05,  2.05batch/s, Batch Loss=0.0233, Avg Loss=0.1108, Time Left=2.67 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  93%|▉| 3166/3393 [25:11<01:53,  2.00batch/s, Batch Loss=0.0523, Avg Loss=0.1104, Time Left=2.44 [A
Epoch 2/3 - Training:  93%|▉| 3166/3393 [25:12<01:53,  2.00batch/s, Batch Loss=0.0319, Avg Loss=0.1104, Time Left=2.43 [A
Epoch 2/3 - Training:  93%|▉| 3167/3393 [25:12<01:51,  2.02batch/s, Batch Loss=0.0319, Avg Loss=0.1104, Time Left=2.43 [A
Epoch 2/3 - Training:  93%|▉| 3167/3393 [25:12<01:51,  2.02batch/s, Batch Loss=0.3885, Avg Loss=0.1105, Time Left=2.42 [A
Epoch 2/3 - Training:  93%|▉| 3168/3393 [25:12<01:51,  2.01batch/s, Batch Loss=0.3885, Avg Loss=0.1105, Time Left=2.42 [A
Epoch 2/3 - Training:  93%|▉| 3168/3393 [25:13<01:51,  2.01batch/s, Batch Loss=0.0106, Avg Loss=0.1104, Time Left=2.41 [A
Epoch 2/3 - Training:  93%|▉| 3169/3393 [25:13<01:51,  2.01batch/s, Batch Loss=0.0106, Avg Loss=0.1104, Time Left=2.41 [A
Epoch 2/3 - Training:  93%|▉| 3169/3393 [25:13<01:51,  2.01batch/s, Batch Loss=0.0201, Avg Loss=0.1104, Time Left=2.40 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  94%|▉| 3199/3393 [25:27<01:33,  2.08batch/s, Batch Loss=0.0157, Avg Loss=0.1106, Time Left=2.17 [A
Epoch 2/3 - Training:  94%|▉| 3199/3393 [25:28<01:33,  2.08batch/s, Batch Loss=0.0693, Avg Loss=0.1106, Time Left=2.16 [A
Epoch 2/3 - Training:  94%|▉| 3200/3393 [25:28<01:36,  2.00batch/s, Batch Loss=0.0693, Avg Loss=0.1106, Time Left=2.16 [A
Epoch 2/3 - Training:  94%|▉| 3200/3393 [25:28<01:36,  2.00batch/s, Batch Loss=0.0355, Avg Loss=0.1105, Time Left=2.15 [A
Epoch 2/3 - Training:  94%|▉| 3201/3393 [25:28<01:35,  2.01batch/s, Batch Loss=0.0355, Avg Loss=0.1105, Time Left=2.15 [A
Epoch 2/3 - Training:  94%|▉| 3201/3393 [25:29<01:35,  2.01batch/s, Batch Loss=0.0256, Avg Loss=0.1105, Time Left=2.14 [A
Epoch 2/3 - Training:  94%|▉| 3202/3393 [25:29<01:35,  2.01batch/s, Batch Loss=0.0256, Avg Loss=0.1105, Time Left=2.14 [A
Epoch 2/3 - Training:  94%|▉| 3202/3393 [25:29<01:35,  2.01batch/s, Batch Loss=0.0097, Avg Loss=0.1105, Time Left=2.13 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  95%|▉| 3232/3393 [25:44<01:21,  1.98batch/s, Batch Loss=0.0696, Avg Loss=0.1105, Time Left=1.90 [A
Epoch 2/3 - Training:  95%|▉| 3232/3393 [25:44<01:21,  1.98batch/s, Batch Loss=0.0842, Avg Loss=0.1105, Time Left=1.89 [A
Epoch 2/3 - Training:  95%|▉| 3233/3393 [25:44<01:19,  2.02batch/s, Batch Loss=0.0842, Avg Loss=0.1105, Time Left=1.89 [A
Epoch 2/3 - Training:  95%|▉| 3233/3393 [25:45<01:19,  2.02batch/s, Batch Loss=0.0274, Avg Loss=0.1105, Time Left=1.88 [A
Epoch 2/3 - Training:  95%|▉| 3234/3393 [25:45<01:17,  2.06batch/s, Batch Loss=0.0274, Avg Loss=0.1105, Time Left=1.88 [A
Epoch 2/3 - Training:  95%|▉| 3234/3393 [25:45<01:17,  2.06batch/s, Batch Loss=0.1099, Avg Loss=0.1105, Time Left=1.87 [A
Epoch 2/3 - Training:  95%|▉| 3235/3393 [25:45<01:15,  2.10batch/s, Batch Loss=0.1099, Avg Loss=0.1105, Time Left=1.87 [A
Epoch 2/3 - Training:  95%|▉| 3235/3393 [25:46<01:15,  2.10batch/s, Batch Loss=0.0131, Avg Loss=0.1104, Time Left=1.87 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  96%|▉| 3265/3393 [26:00<01:01,  2.08batch/s, Batch Loss=0.0930, Avg Loss=0.1102, Time Left=1.63 [A
Epoch 2/3 - Training:  96%|▉| 3265/3393 [26:00<01:01,  2.08batch/s, Batch Loss=0.1866, Avg Loss=0.1102, Time Left=1.62 [A
Epoch 2/3 - Training:  96%|▉| 3266/3393 [26:00<01:01,  2.07batch/s, Batch Loss=0.1866, Avg Loss=0.1102, Time Left=1.62 [A
Epoch 2/3 - Training:  96%|▉| 3266/3393 [26:01<01:01,  2.07batch/s, Batch Loss=0.0048, Avg Loss=0.1102, Time Left=1.61 [A
Epoch 2/3 - Training:  96%|▉| 3267/3393 [26:01<01:00,  2.09batch/s, Batch Loss=0.0048, Avg Loss=0.1102, Time Left=1.61 [A
Epoch 2/3 - Training:  96%|▉| 3267/3393 [26:01<01:00,  2.09batch/s, Batch Loss=0.4739, Avg Loss=0.1103, Time Left=1.60 [A
Epoch 2/3 - Training:  96%|▉| 3268/3393 [26:01<00:59,  2.08batch/s, Batch Loss=0.4739, Avg Loss=0.1103, Time Left=1.60 [A
Epoch 2/3 - Training:  96%|▉| 3268/3393 [26:02<00:59,  2.08batch/s, Batch Loss=0.0951, Avg Loss=0.1103, Time Left=1.60 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  97%|▉| 3298/3393 [26:16<00:45,  2.08batch/s, Batch Loss=0.0368, Avg Loss=0.1100, Time Left=1.36 [A
Epoch 2/3 - Training:  97%|▉| 3298/3393 [26:16<00:45,  2.08batch/s, Batch Loss=0.1389, Avg Loss=0.1100, Time Left=1.35 [A
Epoch 2/3 - Training:  97%|▉| 3299/3393 [26:16<00:45,  2.08batch/s, Batch Loss=0.1389, Avg Loss=0.1100, Time Left=1.35 [A
Epoch 2/3 - Training:  97%|▉| 3299/3393 [26:17<00:45,  2.08batch/s, Batch Loss=0.0933, Avg Loss=0.1100, Time Left=1.34 [A
Epoch 2/3 - Training:  97%|▉| 3300/3393 [26:17<00:44,  2.09batch/s, Batch Loss=0.0933, Avg Loss=0.1100, Time Left=1.34 [A
Epoch 2/3 - Training:  97%|▉| 3300/3393 [26:17<00:44,  2.09batch/s, Batch Loss=0.0689, Avg Loss=0.1100, Time Left=1.34 [A
Epoch 2/3 - Training:  97%|▉| 3301/3393 [26:17<00:44,  2.08batch/s, Batch Loss=0.0689, Avg Loss=0.1100, Time Left=1.34 [A
Epoch 2/3 - Training:  97%|▉| 3301/3393 [26:18<00:44,  2.08batch/s, Batch Loss=0.0144, Avg Loss=0.1100, Time Left=1.33 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  98%|▉| 3331/3393 [26:32<00:30,  2.05batch/s, Batch Loss=0.0408, Avg Loss=0.1097, Time Left=1.09 [A
Epoch 2/3 - Training:  98%|▉| 3331/3393 [26:32<00:30,  2.05batch/s, Batch Loss=0.0525, Avg Loss=0.1097, Time Left=1.08 [A
Epoch 2/3 - Training:  98%|▉| 3332/3393 [26:32<00:30,  2.03batch/s, Batch Loss=0.0525, Avg Loss=0.1097, Time Left=1.08 [A
Epoch 2/3 - Training:  98%|▉| 3332/3393 [26:33<00:30,  2.03batch/s, Batch Loss=0.0815, Avg Loss=0.1097, Time Left=1.07 [A
Epoch 2/3 - Training:  98%|▉| 3333/3393 [26:33<00:29,  2.06batch/s, Batch Loss=0.0815, Avg Loss=0.1097, Time Left=1.07 [A
Epoch 2/3 - Training:  98%|▉| 3333/3393 [26:33<00:29,  2.06batch/s, Batch Loss=0.0061, Avg Loss=0.1097, Time Left=1.07 [A
Epoch 2/3 - Training:  98%|▉| 3334/3393 [26:33<00:28,  2.07batch/s, Batch Loss=0.0061, Avg Loss=0.1097, Time Left=1.07 [A
Epoch 2/3 - Training:  98%|▉| 3334/3393 [26:34<00:28,  2.07batch/s, Batch Loss=0.0387, Avg Loss=0.1096, Time Left=1.06 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training:  99%|▉| 3364/3393 [26:48<00:14,  2.00batch/s, Batch Loss=0.0996, Avg Loss=0.1093, Time Left=0.82 [A
Epoch 2/3 - Training:  99%|▉| 3364/3393 [26:48<00:14,  2.00batch/s, Batch Loss=0.0138, Avg Loss=0.1093, Time Left=0.81 [A
Epoch 2/3 - Training:  99%|▉| 3365/3393 [26:48<00:13,  2.02batch/s, Batch Loss=0.0138, Avg Loss=0.1093, Time Left=0.81 [A
Epoch 2/3 - Training:  99%|▉| 3365/3393 [26:49<00:13,  2.02batch/s, Batch Loss=0.0144, Avg Loss=0.1092, Time Left=0.81 [A
Epoch 2/3 - Training:  99%|▉| 3366/3393 [26:49<00:13,  1.98batch/s, Batch Loss=0.0144, Avg Loss=0.1092, Time Left=0.81 [A
Epoch 2/3 - Training:  99%|▉| 3366/3393 [26:49<00:13,  1.98batch/s, Batch Loss=0.0201, Avg Loss=0.1092, Time Left=0.80 [A
Epoch 2/3 - Training:  99%|▉| 3367/3393 [26:49<00:12,  2.00batch/s, Batch Loss=0.0201, Avg Loss=0.1092, Time Left=0.80 [A
Epoch 2/3 - Training:  99%|▉| 3367/3393 [26:50<00:12,  2.00batch/s, Batch Loss=0.0382, Avg Loss=0.1092, Time Left=0.79 [A
Epoch 2/3 - Trai

Epoch 2/3 - Training: 3397batch [27:04,  2.01batch/s, Batch Loss=0.0644, Avg Loss=0.1089, Time Left=0.55 min][A
Epoch 2/3 - Training: 3398batch [27:04,  1.99batch/s, Batch Loss=0.0644, Avg Loss=0.1089, Time Left=0.55 min][A
Epoch 2/3 - Training: 3398batch [27:05,  1.99batch/s, Batch Loss=0.1289, Avg Loss=0.1089, Time Left=0.54 min][A
Epoch 2/3 - Training: 3399batch [27:05,  1.99batch/s, Batch Loss=0.1289, Avg Loss=0.1089, Time Left=0.54 min][A
Epoch 2/3 - Training: 3399batch [27:05,  1.99batch/s, Batch Loss=0.1253, Avg Loss=0.1089, Time Left=0.53 min][A
Epoch 2/3 - Training: 3400batch [27:05,  1.97batch/s, Batch Loss=0.1253, Avg Loss=0.1089, Time Left=0.53 min][A
Epoch 2/3 - Training: 3400batch [27:06,  1.97batch/s, Batch Loss=0.4285, Avg Loss=0.1090, Time Left=0.52 min][A
Epoch 2/3 - Training: 3401batch [27:06,  1.98batch/s, Batch Loss=0.4285, Avg Loss=0.1090, Time Left=0.52 min][A
Epoch 2/3 - Training: 3401batch [27:06,  1.98batch/s, Batch Loss=0.1613, Avg Loss=0.1090, Time L

Epoch 2/3 - Training: 3433batch [27:22,  2.07batch/s, Batch Loss=0.0378, Avg Loss=0.1091, Time Left=0.25 min][A
Epoch 2/3 - Training: 3434batch [27:22,  2.05batch/s, Batch Loss=0.0378, Avg Loss=0.1091, Time Left=0.25 min][A
Epoch 2/3 - Training: 3434batch [27:22,  2.05batch/s, Batch Loss=0.1832, Avg Loss=0.1091, Time Left=0.24 min][A
Epoch 2/3 - Training: 3435batch [27:22,  2.07batch/s, Batch Loss=0.1832, Avg Loss=0.1091, Time Left=0.24 min][A
Epoch 2/3 - Training: 3435batch [27:23,  2.07batch/s, Batch Loss=0.0415, Avg Loss=0.1091, Time Left=0.24 min][A
Epoch 2/3 - Training: 3436batch [27:23,  2.09batch/s, Batch Loss=0.0415, Avg Loss=0.1091, Time Left=0.24 min][A
Epoch 2/3 - Training: 3436batch [27:23,  2.09batch/s, Batch Loss=0.0422, Avg Loss=0.1091, Time Left=0.23 min][A
Epoch 2/3 - Training: 3437batch [27:23,  2.08batch/s, Batch Loss=0.0422, Avg Loss=0.1091, Time Left=0.23 min][A
Epoch 2/3 - Training: 3437batch [27:24,  2.08batch/s, Batch Loss=0.0212, Avg Loss=0.1090, Time L


Epoch 2/3 Results:
Train Loss: 0.1088
Validation Loss: 0.0997, Accuracy: 0.9614

Starting Epoch 3/3



  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

Epoch 3/3 - Training:   0%|       | 0/3393 [00:00<?, ?batch/s, Batch Loss=0.1308, Avg Loss=0.1308, Time Left=30.41 min][A
Epoch 3/3 - Training:   3%| | 100/3393 [00:00<00:17, 185.93batch/s, Batch Loss=0.1308, Avg Loss=0.1308, Time Left=30.41[A
Epoch 3/3 - Training:   3%| | 100/3393 [00:16<00:17, 185.93batch/s, Batch Loss=0.1308, Avg Loss=0.1308, Time Left=30.41[A
Epoch 3/3 - Training:   3%| | 100/3393 [00:16<00:17, 185.93batch/s, Batch Loss=0.0069, Avg Loss=0.0694, Time Left=27.04[A
Epoch 3/3 - Training:   3%| | 101/3393 [00:16<12:57,  4.24batch/s, Batch Loss=0.0069, Avg Loss=0.0694, Time Left=27.04 [A
Epoch 3/3 - Training:   3%| | 101/3393 [00:17<12:57,  4.24batch/s, Batch Loss=0.0763, Avg Loss=0.0696, Time Left=27.03 [A
Epoch 3/3 - Training:   3%| | 102/3393 [00:17<13:12,  4.15batch/s, Batch Loss=0.0763, Avg Loss=0.0696, Time Left=27.03 [A
Epoch 3/3 - Training:   3%| | 102/3393 [00:17<13:12,  4.15b

Epoch 3/3 - Training:   4%| | 130/3393 [00:31<26:34,  2.05batch/s, Batch Loss=0.0046, Avg Loss=0.0705, Time Left=26.88 [A
Epoch 3/3 - Training:   4%| | 130/3393 [00:31<26:34,  2.05batch/s, Batch Loss=0.0054, Avg Loss=0.0695, Time Left=26.84 [A
Epoch 3/3 - Training:   4%| | 131/3393 [00:31<25:58,  2.09batch/s, Batch Loss=0.0054, Avg Loss=0.0695, Time Left=26.84 [A
Epoch 3/3 - Training:   4%| | 131/3393 [00:31<25:58,  2.09batch/s, Batch Loss=0.0093, Avg Loss=0.0686, Time Left=26.85 [A
Epoch 3/3 - Training:   4%| | 132/3393 [00:31<26:16,  2.07batch/s, Batch Loss=0.0093, Avg Loss=0.0686, Time Left=26.85 [A
Epoch 3/3 - Training:   4%| | 132/3393 [00:32<26:16,  2.07batch/s, Batch Loss=0.0169, Avg Loss=0.0679, Time Left=26.84 [A
Epoch 3/3 - Training:   4%| | 133/3393 [00:32<26:18,  2.07batch/s, Batch Loss=0.0169, Avg Loss=0.0679, Time Left=26.84 [A
Epoch 3/3 - Training:   4%| | 133/3393 [00:32<26:18,  2.07batch/s, Batch Loss=0.0061, Avg Loss=0.0669, Time Left=26.82 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:   5%| | 163/3393 [00:47<26:01,  2.07batch/s, Batch Loss=0.1909, Avg Loss=0.0805, Time Left=26.67 [A
Epoch 3/3 - Training:   5%| | 163/3393 [00:47<26:01,  2.07batch/s, Batch Loss=0.0553, Avg Loss=0.0802, Time Left=26.67 [A
Epoch 3/3 - Training:   5%| | 164/3393 [00:47<26:31,  2.03batch/s, Batch Loss=0.0553, Avg Loss=0.0802, Time Left=26.67 [A
Epoch 3/3 - Training:   5%| | 164/3393 [00:48<26:31,  2.03batch/s, Batch Loss=0.0257, Avg Loss=0.0797, Time Left=26.67 [A
Epoch 3/3 - Training:   5%| | 165/3393 [00:48<26:39,  2.02batch/s, Batch Loss=0.0257, Avg Loss=0.0797, Time Left=26.67 [A
Epoch 3/3 - Training:   5%| | 165/3393 [00:48<26:39,  2.02batch/s, Batch Loss=0.1840, Avg Loss=0.0807, Time Left=26.67 [A
Epoch 3/3 - Training:   5%| | 166/3393 [00:48<26:29,  2.03batch/s, Batch Loss=0.1840, Avg Loss=0.0807, Time Left=26.67 [A
Epoch 3/3 - Training:   5%| | 166/3393 [00:49<26:29,  2.03batch/s, Batch Loss=0.1461, Avg Loss=0.0814, Time Left=26.66 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:   6%| | 196/3393 [01:03<25:53,  2.06batch/s, Batch Loss=0.0023, Avg Loss=0.0794, Time Left=26.44 [A
Epoch 3/3 - Training:   6%| | 196/3393 [01:03<25:53,  2.06batch/s, Batch Loss=0.0100, Avg Loss=0.0789, Time Left=26.43 [A
Epoch 3/3 - Training:   6%| | 197/3393 [01:03<25:48,  2.06batch/s, Batch Loss=0.0100, Avg Loss=0.0789, Time Left=26.43 [A
Epoch 3/3 - Training:   6%| | 197/3393 [01:04<25:48,  2.06batch/s, Batch Loss=0.0921, Avg Loss=0.0790, Time Left=26.43 [A
Epoch 3/3 - Training:   6%| | 198/3393 [01:04<26:01,  2.05batch/s, Batch Loss=0.0921, Avg Loss=0.0790, Time Left=26.43 [A
Epoch 3/3 - Training:   6%| | 198/3393 [01:04<26:01,  2.05batch/s, Batch Loss=0.0107, Avg Loss=0.0785, Time Left=26.41 [A
Epoch 3/3 - Training:   6%| | 199/3393 [01:04<25:42,  2.07batch/s, Batch Loss=0.0107, Avg Loss=0.0785, Time Left=26.41 [A
Epoch 3/3 - Training:   6%| | 199/3393 [01:05<25:42,  2.07batch/s, Batch Loss=0.0015, Avg Loss=0.0779, Time Left=26.40 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:   7%| | 229/3393 [01:19<25:15,  2.09batch/s, Batch Loss=0.0772, Avg Loss=0.0750, Time Left=26.21 [A
Epoch 3/3 - Training:   7%| | 229/3393 [01:19<25:15,  2.09batch/s, Batch Loss=0.2372, Avg Loss=0.0760, Time Left=26.20 [A
Epoch 3/3 - Training:   7%| | 230/3393 [01:19<25:19,  2.08batch/s, Batch Loss=0.2372, Avg Loss=0.0760, Time Left=26.20 [A
Epoch 3/3 - Training:   7%| | 230/3393 [01:20<25:19,  2.08batch/s, Batch Loss=0.1815, Avg Loss=0.0766, Time Left=26.19 [A
Epoch 3/3 - Training:   7%| | 231/3393 [01:20<25:36,  2.06batch/s, Batch Loss=0.1815, Avg Loss=0.0766, Time Left=26.19 [A
Epoch 3/3 - Training:   7%| | 231/3393 [01:20<25:36,  2.06batch/s, Batch Loss=0.0556, Avg Loss=0.0765, Time Left=26.18 [A
Epoch 3/3 - Training:   7%| | 232/3393 [01:20<25:19,  2.08batch/s, Batch Loss=0.0556, Avg Loss=0.0765, Time Left=26.18 [A
Epoch 3/3 - Training:   7%| | 232/3393 [01:21<25:19,  2.08batch/s, Batch Loss=0.0015, Avg Loss=0.0761, Time Left=26.16 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:   8%| | 262/3393 [01:35<25:48,  2.02batch/s, Batch Loss=0.0705, Avg Loss=0.0771, Time Left=25.96 [A
Epoch 3/3 - Training:   8%| | 262/3393 [01:35<25:48,  2.02batch/s, Batch Loss=0.0397, Avg Loss=0.0769, Time Left=25.95 [A
Epoch 3/3 - Training:   8%| | 263/3393 [01:35<25:22,  2.06batch/s, Batch Loss=0.0397, Avg Loss=0.0769, Time Left=25.95 [A
Epoch 3/3 - Training:   8%| | 263/3393 [01:36<25:22,  2.06batch/s, Batch Loss=0.0693, Avg Loss=0.0769, Time Left=25.93 [A
Epoch 3/3 - Training:   8%| | 264/3393 [01:36<24:49,  2.10batch/s, Batch Loss=0.0693, Avg Loss=0.0769, Time Left=25.93 [A
Epoch 3/3 - Training:   8%| | 264/3393 [01:36<24:49,  2.10batch/s, Batch Loss=0.0347, Avg Loss=0.0767, Time Left=25.92 [A
Epoch 3/3 - Training:   8%| | 265/3393 [01:36<24:55,  2.09batch/s, Batch Loss=0.0347, Avg Loss=0.0767, Time Left=25.92 [A
Epoch 3/3 - Training:   8%| | 265/3393 [01:37<24:55,  2.09batch/s, Batch Loss=0.0026, Avg Loss=0.0763, Time Left=25.91 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:   9%| | 295/3393 [01:51<25:11,  2.05batch/s, Batch Loss=0.0339, Avg Loss=0.0783, Time Left=25.69 [A
Epoch 3/3 - Training:   9%| | 295/3393 [01:52<25:11,  2.05batch/s, Batch Loss=0.0177, Avg Loss=0.0780, Time Left=25.69 [A
Epoch 3/3 - Training:   9%| | 296/3393 [01:52<25:16,  2.04batch/s, Batch Loss=0.0177, Avg Loss=0.0780, Time Left=25.69 [A
Epoch 3/3 - Training:   9%| | 296/3393 [01:52<25:16,  2.04batch/s, Batch Loss=0.1311, Avg Loss=0.0783, Time Left=25.68 [A
Epoch 3/3 - Training:   9%| | 297/3393 [01:52<25:17,  2.04batch/s, Batch Loss=0.1311, Avg Loss=0.0783, Time Left=25.68 [A
Epoch 3/3 - Training:   9%| | 297/3393 [01:53<25:17,  2.04batch/s, Batch Loss=0.0609, Avg Loss=0.0782, Time Left=25.67 [A
Epoch 3/3 - Training:   9%| | 298/3393 [01:53<25:04,  2.06batch/s, Batch Loss=0.0609, Avg Loss=0.0782, Time Left=25.67 [A
Epoch 3/3 - Training:   9%| | 298/3393 [01:53<25:04,  2.06batch/s, Batch Loss=0.1445, Avg Loss=0.0785, Time Left=25.67 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  10%| | 328/3393 [02:07<25:21,  2.01batch/s, Batch Loss=0.0072, Avg Loss=0.0789, Time Left=25.44 [A
Epoch 3/3 - Training:  10%| | 328/3393 [02:08<25:21,  2.01batch/s, Batch Loss=0.0466, Avg Loss=0.0788, Time Left=25.44 [A
Epoch 3/3 - Training:  10%| | 329/3393 [02:08<25:08,  2.03batch/s, Batch Loss=0.0466, Avg Loss=0.0788, Time Left=25.44 [A
Epoch 3/3 - Training:  10%| | 329/3393 [02:08<25:08,  2.03batch/s, Batch Loss=0.0042, Avg Loss=0.0785, Time Left=25.42 [A
Epoch 3/3 - Training:  10%| | 330/3393 [02:08<24:46,  2.06batch/s, Batch Loss=0.0042, Avg Loss=0.0785, Time Left=25.42 [A
Epoch 3/3 - Training:  10%| | 330/3393 [02:09<24:46,  2.06batch/s, Batch Loss=0.0354, Avg Loss=0.0783, Time Left=25.42 [A
Epoch 3/3 - Training:  10%| | 331/3393 [02:09<24:57,  2.05batch/s, Batch Loss=0.0354, Avg Loss=0.0783, Time Left=25.42 [A
Epoch 3/3 - Training:  10%| | 331/3393 [02:09<24:57,  2.05batch/s, Batch Loss=0.1151, Avg Loss=0.0785, Time Left=25.41 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  11%| | 361/3393 [02:23<24:16,  2.08batch/s, Batch Loss=0.2990, Avg Loss=0.0764, Time Left=25.14 [A
Epoch 3/3 - Training:  11%| | 361/3393 [02:24<24:16,  2.08batch/s, Batch Loss=0.0032, Avg Loss=0.0761, Time Left=25.13 [A
Epoch 3/3 - Training:  11%| | 362/3393 [02:24<24:03,  2.10batch/s, Batch Loss=0.0032, Avg Loss=0.0761, Time Left=25.13 [A
Epoch 3/3 - Training:  11%| | 362/3393 [02:24<24:03,  2.10batch/s, Batch Loss=0.0043, Avg Loss=0.0759, Time Left=25.12 [A
Epoch 3/3 - Training:  11%| | 363/3393 [02:24<24:09,  2.09batch/s, Batch Loss=0.0043, Avg Loss=0.0759, Time Left=25.12 [A
Epoch 3/3 - Training:  11%| | 363/3393 [02:25<24:09,  2.09batch/s, Batch Loss=0.1034, Avg Loss=0.0760, Time Left=25.12 [A
Epoch 3/3 - Training:  11%| | 364/3393 [02:25<24:14,  2.08batch/s, Batch Loss=0.1034, Avg Loss=0.0760, Time Left=25.12 [A
Epoch 3/3 - Training:  11%| | 364/3393 [02:25<24:14,  2.08batch/s, Batch Loss=0.0520, Avg Loss=0.0759, Time Left=25.10 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  12%| | 394/3393 [02:39<24:25,  2.05batch/s, Batch Loss=0.0744, Avg Loss=0.0769, Time Left=24.87 [A
Epoch 3/3 - Training:  12%| | 394/3393 [02:40<24:25,  2.05batch/s, Batch Loss=0.0968, Avg Loss=0.0770, Time Left=24.87 [A
Epoch 3/3 - Training:  12%| | 395/3393 [02:40<24:36,  2.03batch/s, Batch Loss=0.0968, Avg Loss=0.0770, Time Left=24.87 [A
Epoch 3/3 - Training:  12%| | 395/3393 [02:40<24:36,  2.03batch/s, Batch Loss=0.2776, Avg Loss=0.0776, Time Left=24.86 [A
Epoch 3/3 - Training:  12%| | 396/3393 [02:40<24:28,  2.04batch/s, Batch Loss=0.2776, Avg Loss=0.0776, Time Left=24.86 [A
Epoch 3/3 - Training:  12%| | 396/3393 [02:41<24:28,  2.04batch/s, Batch Loss=0.0246, Avg Loss=0.0774, Time Left=24.85 [A
Epoch 3/3 - Training:  12%| | 397/3393 [02:41<24:24,  2.05batch/s, Batch Loss=0.0246, Avg Loss=0.0774, Time Left=24.85 [A
Epoch 3/3 - Training:  12%| | 397/3393 [02:41<24:24,  2.05batch/s, Batch Loss=0.0727, Avg Loss=0.0774, Time Left=24.85 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  13%|▏| 427/3393 [02:56<25:59,  1.90batch/s, Batch Loss=0.0136, Avg Loss=0.0780, Time Left=24.66 [A
Epoch 3/3 - Training:  13%|▏| 427/3393 [02:56<25:59,  1.90batch/s, Batch Loss=0.0912, Avg Loss=0.0780, Time Left=24.67 [A
Epoch 3/3 - Training:  13%|▏| 428/3393 [02:56<26:31,  1.86batch/s, Batch Loss=0.0912, Avg Loss=0.0780, Time Left=24.67 [A
Epoch 3/3 - Training:  13%|▏| 428/3393 [02:57<26:31,  1.86batch/s, Batch Loss=0.0178, Avg Loss=0.0778, Time Left=24.67 [A
Epoch 3/3 - Training:  13%|▏| 429/3393 [02:57<26:32,  1.86batch/s, Batch Loss=0.0178, Avg Loss=0.0778, Time Left=24.67 [A
Epoch 3/3 - Training:  13%|▏| 429/3393 [02:57<26:32,  1.86batch/s, Batch Loss=0.0298, Avg Loss=0.0777, Time Left=24.66 [A
Epoch 3/3 - Training:  13%|▏| 430/3393 [02:57<25:30,  1.94batch/s, Batch Loss=0.0298, Avg Loss=0.0777, Time Left=24.66 [A
Epoch 3/3 - Training:  13%|▏| 430/3393 [02:58<25:30,  1.94batch/s, Batch Loss=0.0312, Avg Loss=0.0776, Time Left=24.66 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  14%|▏| 460/3393 [03:13<25:33,  1.91batch/s, Batch Loss=0.0786, Avg Loss=0.0778, Time Left=24.49 [A
Epoch 3/3 - Training:  14%|▏| 460/3393 [03:13<25:33,  1.91batch/s, Batch Loss=0.0139, Avg Loss=0.0777, Time Left=24.49 [A
Epoch 3/3 - Training:  14%|▏| 461/3393 [03:13<26:11,  1.87batch/s, Batch Loss=0.0139, Avg Loss=0.0777, Time Left=24.49 [A
Epoch 3/3 - Training:  14%|▏| 461/3393 [03:14<26:11,  1.87batch/s, Batch Loss=0.2676, Avg Loss=0.0781, Time Left=24.48 [A
Epoch 3/3 - Training:  14%|▏| 462/3393 [03:14<25:07,  1.94batch/s, Batch Loss=0.2676, Avg Loss=0.0781, Time Left=24.48 [A
Epoch 3/3 - Training:  14%|▏| 462/3393 [03:14<25:07,  1.94batch/s, Batch Loss=0.0332, Avg Loss=0.0780, Time Left=24.48 [A
Epoch 3/3 - Training:  14%|▏| 463/3393 [03:14<25:28,  1.92batch/s, Batch Loss=0.0332, Avg Loss=0.0780, Time Left=24.48 [A
Epoch 3/3 - Training:  14%|▏| 463/3393 [03:15<25:28,  1.92batch/s, Batch Loss=0.2284, Avg Loss=0.0784, Time Left=24.47 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  15%|▏| 493/3393 [03:29<24:00,  2.01batch/s, Batch Loss=0.1130, Avg Loss=0.0769, Time Left=24.27 [A
Epoch 3/3 - Training:  15%|▏| 493/3393 [03:30<24:00,  2.01batch/s, Batch Loss=0.0869, Avg Loss=0.0769, Time Left=24.26 [A
Epoch 3/3 - Training:  15%|▏| 494/3393 [03:30<23:54,  2.02batch/s, Batch Loss=0.0869, Avg Loss=0.0769, Time Left=24.26 [A
Epoch 3/3 - Training:  15%|▏| 494/3393 [03:30<23:54,  2.02batch/s, Batch Loss=0.1066, Avg Loss=0.0770, Time Left=24.26 [A
Epoch 3/3 - Training:  15%|▏| 495/3393 [03:30<24:43,  1.95batch/s, Batch Loss=0.1066, Avg Loss=0.0770, Time Left=24.26 [A
Epoch 3/3 - Training:  15%|▏| 495/3393 [03:31<24:43,  1.95batch/s, Batch Loss=0.0014, Avg Loss=0.0768, Time Left=24.27 [A
Epoch 3/3 - Training:  15%|▏| 496/3393 [03:31<27:05,  1.78batch/s, Batch Loss=0.0014, Avg Loss=0.0768, Time Left=24.27 [A
Epoch 3/3 - Training:  15%|▏| 496/3393 [03:31<27:05,  1.78batch/s, Batch Loss=0.0024, Avg Loss=0.0767, Time Left=24.28 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  16%|▏| 526/3393 [03:46<22:33,  2.12batch/s, Batch Loss=0.0643, Avg Loss=0.0792, Time Left=24.05 [A
Epoch 3/3 - Training:  16%|▏| 526/3393 [03:46<22:33,  2.12batch/s, Batch Loss=0.0197, Avg Loss=0.0791, Time Left=24.04 [A
Epoch 3/3 - Training:  16%|▏| 527/3393 [03:46<23:00,  2.08batch/s, Batch Loss=0.0197, Avg Loss=0.0791, Time Left=24.04 [A
Epoch 3/3 - Training:  16%|▏| 527/3393 [03:47<23:00,  2.08batch/s, Batch Loss=0.0226, Avg Loss=0.0790, Time Left=24.03 [A
Epoch 3/3 - Training:  16%|▏| 528/3393 [03:47<22:50,  2.09batch/s, Batch Loss=0.0226, Avg Loss=0.0790, Time Left=24.03 [A
Epoch 3/3 - Training:  16%|▏| 528/3393 [03:47<22:50,  2.09batch/s, Batch Loss=0.0279, Avg Loss=0.0789, Time Left=24.02 [A
Epoch 3/3 - Training:  16%|▏| 529/3393 [03:47<22:42,  2.10batch/s, Batch Loss=0.0279, Avg Loss=0.0789, Time Left=24.02 [A
Epoch 3/3 - Training:  16%|▏| 529/3393 [03:48<22:42,  2.10batch/s, Batch Loss=0.0201, Avg Loss=0.0787, Time Left=24.01 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  16%|▏| 559/3393 [04:02<22:03,  2.14batch/s, Batch Loss=0.1980, Avg Loss=0.0796, Time Left=23.80 [A
Epoch 3/3 - Training:  16%|▏| 559/3393 [04:03<22:03,  2.14batch/s, Batch Loss=0.0356, Avg Loss=0.0795, Time Left=23.80 [A
Epoch 3/3 - Training:  17%|▏| 560/3393 [04:03<23:11,  2.04batch/s, Batch Loss=0.0356, Avg Loss=0.0795, Time Left=23.80 [A
Epoch 3/3 - Training:  17%|▏| 560/3393 [04:03<23:11,  2.04batch/s, Batch Loss=0.0663, Avg Loss=0.0795, Time Left=23.79 [A
Epoch 3/3 - Training:  17%|▏| 561/3393 [04:03<23:21,  2.02batch/s, Batch Loss=0.0663, Avg Loss=0.0795, Time Left=23.79 [A
Epoch 3/3 - Training:  17%|▏| 561/3393 [04:04<23:21,  2.02batch/s, Batch Loss=0.0748, Avg Loss=0.0795, Time Left=23.78 [A
Epoch 3/3 - Training:  17%|▏| 562/3393 [04:04<23:06,  2.04batch/s, Batch Loss=0.0748, Avg Loss=0.0795, Time Left=23.78 [A
Epoch 3/3 - Training:  17%|▏| 562/3393 [04:04<23:06,  2.04batch/s, Batch Loss=0.0816, Avg Loss=0.0795, Time Left=23.78 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  17%|▏| 592/3393 [04:19<23:08,  2.02batch/s, Batch Loss=0.0063, Avg Loss=0.0789, Time Left=23.60 [A
Epoch 3/3 - Training:  17%|▏| 592/3393 [04:20<23:08,  2.02batch/s, Batch Loss=0.0288, Avg Loss=0.0788, Time Left=23.60 [A
Epoch 3/3 - Training:  17%|▏| 593/3393 [04:20<23:44,  1.97batch/s, Batch Loss=0.0288, Avg Loss=0.0788, Time Left=23.60 [A
Epoch 3/3 - Training:  17%|▏| 593/3393 [04:20<23:44,  1.97batch/s, Batch Loss=0.0306, Avg Loss=0.0787, Time Left=23.59 [A
Epoch 3/3 - Training:  18%|▏| 594/3393 [04:20<24:13,  1.93batch/s, Batch Loss=0.0306, Avg Loss=0.0787, Time Left=23.59 [A
Epoch 3/3 - Training:  18%|▏| 594/3393 [04:21<24:13,  1.93batch/s, Batch Loss=0.2252, Avg Loss=0.0790, Time Left=23.59 [A
Epoch 3/3 - Training:  18%|▏| 595/3393 [04:21<25:04,  1.86batch/s, Batch Loss=0.2252, Avg Loss=0.0790, Time Left=23.59 [A
Epoch 3/3 - Training:  18%|▏| 595/3393 [04:21<25:04,  1.86batch/s, Batch Loss=0.0432, Avg Loss=0.0789, Time Left=23.58 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  18%|▏| 625/3393 [04:36<21:54,  2.11batch/s, Batch Loss=0.0047, Avg Loss=0.0793, Time Left=23.34 [A
Epoch 3/3 - Training:  18%|▏| 625/3393 [04:36<21:54,  2.11batch/s, Batch Loss=0.2143, Avg Loss=0.0795, Time Left=23.33 [A
Epoch 3/3 - Training:  18%|▏| 626/3393 [04:36<21:48,  2.11batch/s, Batch Loss=0.2143, Avg Loss=0.0795, Time Left=23.33 [A
Epoch 3/3 - Training:  18%|▏| 626/3393 [04:37<21:48,  2.11batch/s, Batch Loss=0.0014, Avg Loss=0.0794, Time Left=23.32 [A
Epoch 3/3 - Training:  18%|▏| 627/3393 [04:37<22:10,  2.08batch/s, Batch Loss=0.0014, Avg Loss=0.0794, Time Left=23.32 [A
Epoch 3/3 - Training:  18%|▏| 627/3393 [04:37<22:10,  2.08batch/s, Batch Loss=0.0250, Avg Loss=0.0793, Time Left=23.31 [A
Epoch 3/3 - Training:  19%|▏| 628/3393 [04:37<21:59,  2.09batch/s, Batch Loss=0.0250, Avg Loss=0.0793, Time Left=23.31 [A
Epoch 3/3 - Training:  19%|▏| 628/3393 [04:38<21:59,  2.09batch/s, Batch Loss=0.0341, Avg Loss=0.0792, Time Left=23.30 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  19%|▏| 658/3393 [04:52<22:43,  2.01batch/s, Batch Loss=0.0120, Avg Loss=0.0788, Time Left=23.10 [A
Epoch 3/3 - Training:  19%|▏| 658/3393 [04:53<22:43,  2.01batch/s, Batch Loss=0.2100, Avg Loss=0.0791, Time Left=23.09 [A
Epoch 3/3 - Training:  19%|▏| 659/3393 [04:53<22:49,  2.00batch/s, Batch Loss=0.2100, Avg Loss=0.0791, Time Left=23.09 [A
Epoch 3/3 - Training:  19%|▏| 659/3393 [04:53<22:49,  2.00batch/s, Batch Loss=0.0061, Avg Loss=0.0789, Time Left=23.08 [A
Epoch 3/3 - Training:  19%|▏| 660/3393 [04:53<23:13,  1.96batch/s, Batch Loss=0.0061, Avg Loss=0.0789, Time Left=23.08 [A
Epoch 3/3 - Training:  19%|▏| 660/3393 [04:54<23:13,  1.96batch/s, Batch Loss=0.1134, Avg Loss=0.0790, Time Left=23.08 [A
Epoch 3/3 - Training:  19%|▏| 661/3393 [04:54<23:03,  1.97batch/s, Batch Loss=0.1134, Avg Loss=0.0790, Time Left=23.08 [A
Epoch 3/3 - Training:  19%|▏| 661/3393 [04:55<23:03,  1.97batch/s, Batch Loss=0.1210, Avg Loss=0.0791, Time Left=23.08 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  20%|▏| 691/3393 [05:19<1:40:24,  2.23s/batch, Batch Loss=0.0078, Avg Loss=0.0782, Time Left=23.5[A
Epoch 3/3 - Training:  20%|▏| 691/3393 [05:19<1:40:24,  2.23s/batch, Batch Loss=0.0123, Avg Loss=0.0781, Time Left=23.5[A
Epoch 3/3 - Training:  20%|▏| 692/3393 [05:19<1:20:26,  1.79s/batch, Batch Loss=0.0123, Avg Loss=0.0781, Time Left=23.5[A
Epoch 3/3 - Training:  20%|▏| 692/3393 [05:20<1:20:26,  1.79s/batch, Batch Loss=0.0634, Avg Loss=0.0781, Time Left=23.5[A
Epoch 3/3 - Training:  20%|▏| 693/3393 [05:20<1:02:32,  1.39s/batch, Batch Loss=0.0634, Avg Loss=0.0781, Time Left=23.5[A
Epoch 3/3 - Training:  20%|▏| 693/3393 [05:20<1:02:32,  1.39s/batch, Batch Loss=0.0137, Avg Loss=0.0780, Time Left=23.5[A
Epoch 3/3 - Training:  20%|▏| 694/3393 [05:20<50:20,  1.12s/batch, Batch Loss=0.0137, Avg Loss=0.0780, Time Left=23.54 [A
Epoch 3/3 - Training:  20%|▏| 694/3393 [05:21<50:20,  1.12s/batch, Batch Loss=0.1121, Avg Loss=0.0780, Time Left=23.53 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  21%|▏| 724/3393 [05:41<33:33,  1.33batch/s, Batch Loss=0.0351, Avg Loss=0.0790, Time Left=23.69 [A
Epoch 3/3 - Training:  21%|▏| 724/3393 [05:42<33:33,  1.33batch/s, Batch Loss=0.0091, Avg Loss=0.0788, Time Left=23.70 [A
Epoch 3/3 - Training:  21%|▏| 725/3393 [05:42<34:15,  1.30batch/s, Batch Loss=0.0091, Avg Loss=0.0788, Time Left=23.70 [A
Epoch 3/3 - Training:  21%|▏| 725/3393 [05:43<34:15,  1.30batch/s, Batch Loss=0.1950, Avg Loss=0.0790, Time Left=23.70 [A
Epoch 3/3 - Training:  21%|▏| 726/3393 [05:43<32:24,  1.37batch/s, Batch Loss=0.1950, Avg Loss=0.0790, Time Left=23.70 [A
Epoch 3/3 - Training:  21%|▏| 726/3393 [05:44<32:24,  1.37batch/s, Batch Loss=0.0380, Avg Loss=0.0790, Time Left=23.70 [A
Epoch 3/3 - Training:  21%|▏| 727/3393 [05:44<31:12,  1.42batch/s, Batch Loss=0.0380, Avg Loss=0.0790, Time Left=23.70 [A
Epoch 3/3 - Training:  21%|▏| 727/3393 [05:44<31:12,  1.42batch/s, Batch Loss=0.0316, Avg Loss=0.0789, Time Left=23.70 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  22%|▏| 757/3393 [06:00<21:34,  2.04batch/s, Batch Loss=0.0197, Avg Loss=0.0794, Time Left=23.47 [A
Epoch 3/3 - Training:  22%|▏| 757/3393 [06:00<21:34,  2.04batch/s, Batch Loss=0.0408, Avg Loss=0.0793, Time Left=23.46 [A
Epoch 3/3 - Training:  22%|▏| 758/3393 [06:00<21:28,  2.04batch/s, Batch Loss=0.0408, Avg Loss=0.0793, Time Left=23.46 [A
Epoch 3/3 - Training:  22%|▏| 758/3393 [06:01<21:28,  2.04batch/s, Batch Loss=0.0046, Avg Loss=0.0792, Time Left=23.45 [A
Epoch 3/3 - Training:  22%|▏| 759/3393 [06:01<21:36,  2.03batch/s, Batch Loss=0.0046, Avg Loss=0.0792, Time Left=23.45 [A
Epoch 3/3 - Training:  22%|▏| 759/3393 [06:01<21:36,  2.03batch/s, Batch Loss=0.0208, Avg Loss=0.0791, Time Left=23.44 [A
Epoch 3/3 - Training:  22%|▏| 760/3393 [06:01<21:30,  2.04batch/s, Batch Loss=0.0208, Avg Loss=0.0791, Time Left=23.44 [A
Epoch 3/3 - Training:  22%|▏| 760/3393 [06:02<21:30,  2.04batch/s, Batch Loss=0.0789, Avg Loss=0.0791, Time Left=23.43 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  23%|▏| 790/3393 [06:16<23:41,  1.83batch/s, Batch Loss=0.4394, Avg Loss=0.0816, Time Left=23.15 [A
Epoch 3/3 - Training:  23%|▏| 790/3393 [06:17<23:41,  1.83batch/s, Batch Loss=0.0715, Avg Loss=0.0815, Time Left=23.14 [A
Epoch 3/3 - Training:  23%|▏| 791/3393 [06:17<24:38,  1.76batch/s, Batch Loss=0.0715, Avg Loss=0.0815, Time Left=23.14 [A
Epoch 3/3 - Training:  23%|▏| 791/3393 [06:17<24:38,  1.76batch/s, Batch Loss=0.1384, Avg Loss=0.0816, Time Left=23.13 [A
Epoch 3/3 - Training:  23%|▏| 792/3393 [06:17<23:01,  1.88batch/s, Batch Loss=0.1384, Avg Loss=0.0816, Time Left=23.13 [A
Epoch 3/3 - Training:  23%|▏| 792/3393 [06:18<23:01,  1.88batch/s, Batch Loss=0.2341, Avg Loss=0.0818, Time Left=23.12 [A
Epoch 3/3 - Training:  23%|▏| 793/3393 [06:18<22:34,  1.92batch/s, Batch Loss=0.2341, Avg Loss=0.0818, Time Left=23.12 [A
Epoch 3/3 - Training:  23%|▏| 793/3393 [06:18<22:34,  1.92batch/s, Batch Loss=0.1602, Avg Loss=0.0819, Time Left=23.11 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  24%|▏| 823/3393 [06:34<28:00,  1.53batch/s, Batch Loss=0.0465, Avg Loss=0.0822, Time Left=22.88 [A
Epoch 3/3 - Training:  24%|▏| 823/3393 [06:34<28:00,  1.53batch/s, Batch Loss=0.1734, Avg Loss=0.0824, Time Left=22.88 [A
Epoch 3/3 - Training:  24%|▏| 824/3393 [06:34<27:21,  1.56batch/s, Batch Loss=0.1734, Avg Loss=0.0824, Time Left=22.88 [A
Epoch 3/3 - Training:  24%|▏| 824/3393 [06:35<27:21,  1.56batch/s, Batch Loss=0.1343, Avg Loss=0.0824, Time Left=22.88 [A
Epoch 3/3 - Training:  24%|▏| 825/3393 [06:35<28:40,  1.49batch/s, Batch Loss=0.1343, Avg Loss=0.0824, Time Left=22.88 [A
Epoch 3/3 - Training:  24%|▏| 825/3393 [06:36<28:40,  1.49batch/s, Batch Loss=0.0879, Avg Loss=0.0824, Time Left=22.89 [A
Epoch 3/3 - Training:  24%|▏| 826/3393 [06:36<30:51,  1.39batch/s, Batch Loss=0.0879, Avg Loss=0.0824, Time Left=22.89 [A
Epoch 3/3 - Training:  24%|▏| 826/3393 [06:36<30:51,  1.39batch/s, Batch Loss=0.0416, Avg Loss=0.0824, Time Left=22.88 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  25%|▎| 856/3393 [06:53<23:58,  1.76batch/s, Batch Loss=0.0663, Avg Loss=0.0819, Time Left=22.73 [A
Epoch 3/3 - Training:  25%|▎| 856/3393 [06:54<23:58,  1.76batch/s, Batch Loss=0.2253, Avg Loss=0.0821, Time Left=22.72 [A
Epoch 3/3 - Training:  25%|▎| 857/3393 [06:54<23:06,  1.83batch/s, Batch Loss=0.2253, Avg Loss=0.0821, Time Left=22.72 [A
Epoch 3/3 - Training:  25%|▎| 857/3393 [06:54<23:06,  1.83batch/s, Batch Loss=0.0985, Avg Loss=0.0821, Time Left=22.71 [A
Epoch 3/3 - Training:  25%|▎| 858/3393 [06:54<22:29,  1.88batch/s, Batch Loss=0.0985, Avg Loss=0.0821, Time Left=22.71 [A
Epoch 3/3 - Training:  25%|▎| 858/3393 [06:55<22:29,  1.88batch/s, Batch Loss=0.0911, Avg Loss=0.0821, Time Left=22.71 [A
Epoch 3/3 - Training:  25%|▎| 859/3393 [06:55<23:34,  1.79batch/s, Batch Loss=0.0911, Avg Loss=0.0821, Time Left=22.71 [A
Epoch 3/3 - Training:  25%|▎| 859/3393 [06:56<23:34,  1.79batch/s, Batch Loss=0.0064, Avg Loss=0.0821, Time Left=22.70 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  26%|▎| 889/3393 [07:12<21:41,  1.92batch/s, Batch Loss=0.0308, Avg Loss=0.0822, Time Left=22.50 [A
Epoch 3/3 - Training:  26%|▎| 889/3393 [07:12<21:41,  1.92batch/s, Batch Loss=0.0173, Avg Loss=0.0821, Time Left=22.49 [A
Epoch 3/3 - Training:  26%|▎| 890/3393 [07:12<21:16,  1.96batch/s, Batch Loss=0.0173, Avg Loss=0.0821, Time Left=22.49 [A
Epoch 3/3 - Training:  26%|▎| 890/3393 [07:13<21:16,  1.96batch/s, Batch Loss=0.0382, Avg Loss=0.0820, Time Left=22.48 [A
Epoch 3/3 - Training:  26%|▎| 891/3393 [07:13<21:32,  1.94batch/s, Batch Loss=0.0382, Avg Loss=0.0820, Time Left=22.48 [A
Epoch 3/3 - Training:  26%|▎| 891/3393 [07:13<21:32,  1.94batch/s, Batch Loss=0.0155, Avg Loss=0.0819, Time Left=22.47 [A
Epoch 3/3 - Training:  26%|▎| 892/3393 [07:13<21:09,  1.97batch/s, Batch Loss=0.0155, Avg Loss=0.0819, Time Left=22.47 [A
Epoch 3/3 - Training:  26%|▎| 892/3393 [07:14<21:09,  1.97batch/s, Batch Loss=0.0286, Avg Loss=0.0819, Time Left=22.46 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  27%|▎| 922/3393 [07:28<20:49,  1.98batch/s, Batch Loss=0.0388, Avg Loss=0.0822, Time Left=22.17 [A
Epoch 3/3 - Training:  27%|▎| 922/3393 [07:29<20:49,  1.98batch/s, Batch Loss=0.0046, Avg Loss=0.0821, Time Left=22.16 [A
Epoch 3/3 - Training:  27%|▎| 923/3393 [07:29<20:35,  2.00batch/s, Batch Loss=0.0046, Avg Loss=0.0821, Time Left=22.16 [A
Epoch 3/3 - Training:  27%|▎| 923/3393 [07:29<20:35,  2.00batch/s, Batch Loss=0.0236, Avg Loss=0.0821, Time Left=22.15 [A
Epoch 3/3 - Training:  27%|▎| 924/3393 [07:29<20:56,  1.96batch/s, Batch Loss=0.0236, Avg Loss=0.0821, Time Left=22.15 [A
Epoch 3/3 - Training:  27%|▎| 924/3393 [07:30<20:56,  1.96batch/s, Batch Loss=0.0728, Avg Loss=0.0821, Time Left=22.14 [A
Epoch 3/3 - Training:  27%|▎| 925/3393 [07:30<20:40,  1.99batch/s, Batch Loss=0.0728, Avg Loss=0.0821, Time Left=22.14 [A
Epoch 3/3 - Training:  27%|▎| 925/3393 [07:30<20:40,  1.99batch/s, Batch Loss=0.3683, Avg Loss=0.0824, Time Left=22.13 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  28%|▎| 955/3393 [07:45<20:42,  1.96batch/s, Batch Loss=0.4619, Avg Loss=0.0816, Time Left=21.86 [A
Epoch 3/3 - Training:  28%|▎| 955/3393 [07:46<20:42,  1.96batch/s, Batch Loss=0.0061, Avg Loss=0.0815, Time Left=21.85 [A
Epoch 3/3 - Training:  28%|▎| 956/3393 [07:46<20:46,  1.95batch/s, Batch Loss=0.0061, Avg Loss=0.0815, Time Left=21.85 [A
Epoch 3/3 - Training:  28%|▎| 956/3393 [07:46<20:46,  1.95batch/s, Batch Loss=0.0264, Avg Loss=0.0815, Time Left=21.84 [A
Epoch 3/3 - Training:  28%|▎| 957/3393 [07:46<20:38,  1.97batch/s, Batch Loss=0.0264, Avg Loss=0.0815, Time Left=21.84 [A
Epoch 3/3 - Training:  28%|▎| 957/3393 [07:47<20:38,  1.97batch/s, Batch Loss=0.0192, Avg Loss=0.0814, Time Left=21.83 [A
Epoch 3/3 - Training:  28%|▎| 958/3393 [07:47<20:32,  1.98batch/s, Batch Loss=0.0192, Avg Loss=0.0814, Time Left=21.83 [A
Epoch 3/3 - Training:  28%|▎| 958/3393 [07:47<20:32,  1.98batch/s, Batch Loss=0.0082, Avg Loss=0.0813, Time Left=21.82 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  29%|▎| 988/3393 [08:02<19:59,  2.00batch/s, Batch Loss=0.0585, Avg Loss=0.0804, Time Left=21.54 [A
Epoch 3/3 - Training:  29%|▎| 988/3393 [08:02<19:59,  2.00batch/s, Batch Loss=0.0712, Avg Loss=0.0804, Time Left=21.53 [A
Epoch 3/3 - Training:  29%|▎| 989/3393 [08:02<20:34,  1.95batch/s, Batch Loss=0.0712, Avg Loss=0.0804, Time Left=21.53 [A
Epoch 3/3 - Training:  29%|▎| 989/3393 [08:03<20:34,  1.95batch/s, Batch Loss=0.0032, Avg Loss=0.0803, Time Left=21.52 [A
Epoch 3/3 - Training:  29%|▎| 990/3393 [08:03<20:14,  1.98batch/s, Batch Loss=0.0032, Avg Loss=0.0803, Time Left=21.52 [A
Epoch 3/3 - Training:  29%|▎| 990/3393 [08:03<20:14,  1.98batch/s, Batch Loss=0.3897, Avg Loss=0.0806, Time Left=21.52 [A
Epoch 3/3 - Training:  29%|▎| 991/3393 [08:03<21:00,  1.91batch/s, Batch Loss=0.3897, Avg Loss=0.0806, Time Left=21.52 [A
Epoch 3/3 - Training:  29%|▎| 991/3393 [08:04<21:00,  1.91batch/s, Batch Loss=0.0012, Avg Loss=0.0805, Time Left=21.51 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  30%|▎| 1021/3393 [08:18<20:15,  1.95batch/s, Batch Loss=0.0500, Avg Loss=0.0800, Time Left=21.23[A
Epoch 3/3 - Training:  30%|▎| 1021/3393 [08:19<20:15,  1.95batch/s, Batch Loss=0.0104, Avg Loss=0.0799, Time Left=21.22[A
Epoch 3/3 - Training:  30%|▎| 1022/3393 [08:19<20:08,  1.96batch/s, Batch Loss=0.0104, Avg Loss=0.0799, Time Left=21.22[A
Epoch 3/3 - Training:  30%|▎| 1022/3393 [08:19<20:08,  1.96batch/s, Batch Loss=0.0222, Avg Loss=0.0799, Time Left=21.21[A
Epoch 3/3 - Training:  30%|▎| 1023/3393 [08:19<20:11,  1.96batch/s, Batch Loss=0.0222, Avg Loss=0.0799, Time Left=21.21[A
Epoch 3/3 - Training:  30%|▎| 1023/3393 [08:20<20:11,  1.96batch/s, Batch Loss=0.0588, Avg Loss=0.0798, Time Left=21.20[A
Epoch 3/3 - Training:  30%|▎| 1024/3393 [08:20<19:30,  2.02batch/s, Batch Loss=0.0588, Avg Loss=0.0798, Time Left=21.20[A
Epoch 3/3 - Training:  30%|▎| 1024/3393 [08:20<19:30,  2.02batch/s, Batch Loss=0.0068, Avg Loss=0.0798, Time Left=21.19[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  31%|▎| 1054/3393 [08:35<19:58,  1.95batch/s, Batch Loss=0.1695, Avg Loss=0.0789, Time Left=20.92[A
Epoch 3/3 - Training:  31%|▎| 1054/3393 [08:36<19:58,  1.95batch/s, Batch Loss=0.0248, Avg Loss=0.0788, Time Left=20.91[A
Epoch 3/3 - Training:  31%|▎| 1055/3393 [08:36<19:51,  1.96batch/s, Batch Loss=0.0248, Avg Loss=0.0788, Time Left=20.91[A
Epoch 3/3 - Training:  31%|▎| 1055/3393 [08:36<19:51,  1.96batch/s, Batch Loss=0.0335, Avg Loss=0.0788, Time Left=20.90[A
Epoch 3/3 - Training:  31%|▎| 1056/3393 [08:36<19:43,  1.97batch/s, Batch Loss=0.0335, Avg Loss=0.0788, Time Left=20.90[A
Epoch 3/3 - Training:  31%|▎| 1056/3393 [08:37<19:43,  1.97batch/s, Batch Loss=0.0044, Avg Loss=0.0787, Time Left=20.89[A
Epoch 3/3 - Training:  31%|▎| 1057/3393 [08:37<19:27,  2.00batch/s, Batch Loss=0.0044, Avg Loss=0.0787, Time Left=20.89[A
Epoch 3/3 - Training:  31%|▎| 1057/3393 [08:37<19:27,  2.00batch/s, Batch Loss=0.0211, Avg Loss=0.0787, Time Left=20.88[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  32%|▎| 1087/3393 [08:52<19:35,  1.96batch/s, Batch Loss=0.1690, Avg Loss=0.0785, Time Left=20.60[A
Epoch 3/3 - Training:  32%|▎| 1087/3393 [08:52<19:35,  1.96batch/s, Batch Loss=0.1090, Avg Loss=0.0786, Time Left=20.59[A
Epoch 3/3 - Training:  32%|▎| 1088/3393 [08:52<19:28,  1.97batch/s, Batch Loss=0.1090, Avg Loss=0.0786, Time Left=20.59[A
Epoch 3/3 - Training:  32%|▎| 1088/3393 [08:53<19:28,  1.97batch/s, Batch Loss=0.0719, Avg Loss=0.0785, Time Left=20.59[A
Epoch 3/3 - Training:  32%|▎| 1089/3393 [08:53<19:34,  1.96batch/s, Batch Loss=0.0719, Avg Loss=0.0785, Time Left=20.59[A
Epoch 3/3 - Training:  32%|▎| 1089/3393 [08:53<19:34,  1.96batch/s, Batch Loss=0.0084, Avg Loss=0.0785, Time Left=20.58[A
Epoch 3/3 - Training:  32%|▎| 1090/3393 [08:53<19:06,  2.01batch/s, Batch Loss=0.0084, Avg Loss=0.0785, Time Left=20.58[A
Epoch 3/3 - Training:  32%|▎| 1090/3393 [08:54<19:06,  2.01batch/s, Batch Loss=0.0265, Avg Loss=0.0784, Time Left=20.57[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  33%|▎| 1120/3393 [09:08<19:00,  1.99batch/s, Batch Loss=0.1845, Avg Loss=0.0794, Time Left=20.30[A
Epoch 3/3 - Training:  33%|▎| 1120/3393 [09:09<19:00,  1.99batch/s, Batch Loss=0.1097, Avg Loss=0.0794, Time Left=20.29[A
Epoch 3/3 - Training:  33%|▎| 1121/3393 [09:09<19:09,  1.98batch/s, Batch Loss=0.1097, Avg Loss=0.0794, Time Left=20.29[A
Epoch 3/3 - Training:  33%|▎| 1121/3393 [09:09<19:09,  1.98batch/s, Batch Loss=0.0500, Avg Loss=0.0794, Time Left=20.28[A
Epoch 3/3 - Training:  33%|▎| 1122/3393 [09:09<19:18,  1.96batch/s, Batch Loss=0.0500, Avg Loss=0.0794, Time Left=20.28[A
Epoch 3/3 - Training:  33%|▎| 1122/3393 [09:10<19:18,  1.96batch/s, Batch Loss=0.0838, Avg Loss=0.0794, Time Left=20.27[A
Epoch 3/3 - Training:  33%|▎| 1123/3393 [09:10<19:33,  1.93batch/s, Batch Loss=0.0838, Avg Loss=0.0794, Time Left=20.27[A
Epoch 3/3 - Training:  33%|▎| 1123/3393 [09:10<19:33,  1.93batch/s, Batch Loss=0.0357, Avg Loss=0.0794, Time Left=20.26[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  34%|▎| 1153/3393 [09:25<19:01,  1.96batch/s, Batch Loss=0.2057, Avg Loss=0.0798, Time Left=20.00[A
Epoch 3/3 - Training:  34%|▎| 1153/3393 [09:26<19:01,  1.96batch/s, Batch Loss=0.0212, Avg Loss=0.0797, Time Left=19.99[A
Epoch 3/3 - Training:  34%|▎| 1154/3393 [09:26<19:07,  1.95batch/s, Batch Loss=0.0212, Avg Loss=0.0797, Time Left=19.99[A
Epoch 3/3 - Training:  34%|▎| 1154/3393 [09:26<19:07,  1.95batch/s, Batch Loss=0.4567, Avg Loss=0.0800, Time Left=19.98[A
Epoch 3/3 - Training:  34%|▎| 1155/3393 [09:26<18:56,  1.97batch/s, Batch Loss=0.4567, Avg Loss=0.0800, Time Left=19.98[A
Epoch 3/3 - Training:  34%|▎| 1155/3393 [09:27<18:56,  1.97batch/s, Batch Loss=0.0376, Avg Loss=0.0800, Time Left=19.97[A
Epoch 3/3 - Training:  34%|▎| 1156/3393 [09:27<18:39,  2.00batch/s, Batch Loss=0.0376, Avg Loss=0.0800, Time Left=19.97[A
Epoch 3/3 - Training:  34%|▎| 1156/3393 [09:27<18:39,  2.00batch/s, Batch Loss=0.0077, Avg Loss=0.0799, Time Left=19.96[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  35%|▎| 1186/3393 [09:42<18:51,  1.95batch/s, Batch Loss=0.1266, Avg Loss=0.0794, Time Left=19.69[A
Epoch 3/3 - Training:  35%|▎| 1186/3393 [09:42<18:51,  1.95batch/s, Batch Loss=0.0251, Avg Loss=0.0793, Time Left=19.68[A
Epoch 3/3 - Training:  35%|▎| 1187/3393 [09:42<18:31,  1.98batch/s, Batch Loss=0.0251, Avg Loss=0.0793, Time Left=19.68[A
Epoch 3/3 - Training:  35%|▎| 1187/3393 [09:43<18:31,  1.98batch/s, Batch Loss=0.0055, Avg Loss=0.0793, Time Left=19.67[A
Epoch 3/3 - Training:  35%|▎| 1188/3393 [09:43<18:28,  1.99batch/s, Batch Loss=0.0055, Avg Loss=0.0793, Time Left=19.67[A
Epoch 3/3 - Training:  35%|▎| 1188/3393 [09:43<18:28,  1.99batch/s, Batch Loss=0.0275, Avg Loss=0.0792, Time Left=19.66[A
Epoch 3/3 - Training:  35%|▎| 1189/3393 [09:43<18:15,  2.01batch/s, Batch Loss=0.0275, Avg Loss=0.0792, Time Left=19.66[A
Epoch 3/3 - Training:  35%|▎| 1189/3393 [09:44<18:15,  2.01batch/s, Batch Loss=0.0462, Avg Loss=0.0792, Time Left=19.66[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  36%|▎| 1219/3393 [09:58<18:42,  1.94batch/s, Batch Loss=0.0034, Avg Loss=0.0784, Time Left=19.39[A
Epoch 3/3 - Training:  36%|▎| 1219/3393 [09:59<18:42,  1.94batch/s, Batch Loss=0.1692, Avg Loss=0.0785, Time Left=19.38[A
Epoch 3/3 - Training:  36%|▎| 1220/3393 [09:59<18:22,  1.97batch/s, Batch Loss=0.1692, Avg Loss=0.0785, Time Left=19.38[A
Epoch 3/3 - Training:  36%|▎| 1220/3393 [09:59<18:22,  1.97batch/s, Batch Loss=0.0671, Avg Loss=0.0785, Time Left=19.37[A
Epoch 3/3 - Training:  36%|▎| 1221/3393 [09:59<18:38,  1.94batch/s, Batch Loss=0.0671, Avg Loss=0.0785, Time Left=19.37[A
Epoch 3/3 - Training:  36%|▎| 1221/3393 [10:00<18:38,  1.94batch/s, Batch Loss=0.1384, Avg Loss=0.0785, Time Left=19.36[A
Epoch 3/3 - Training:  36%|▎| 1222/3393 [10:00<18:18,  1.98batch/s, Batch Loss=0.1384, Avg Loss=0.0785, Time Left=19.36[A
Epoch 3/3 - Training:  36%|▎| 1222/3393 [10:00<18:18,  1.98batch/s, Batch Loss=0.0283, Avg Loss=0.0785, Time Left=19.36[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  37%|▎| 1252/3393 [10:15<17:40,  2.02batch/s, Batch Loss=0.1020, Avg Loss=0.0780, Time Left=19.10[A
Epoch 3/3 - Training:  37%|▎| 1252/3393 [10:16<17:40,  2.02batch/s, Batch Loss=0.0555, Avg Loss=0.0780, Time Left=19.09[A
Epoch 3/3 - Training:  37%|▎| 1253/3393 [10:16<17:42,  2.01batch/s, Batch Loss=0.0555, Avg Loss=0.0780, Time Left=19.09[A
Epoch 3/3 - Training:  37%|▎| 1253/3393 [10:16<17:42,  2.01batch/s, Batch Loss=0.1169, Avg Loss=0.0780, Time Left=19.08[A
Epoch 3/3 - Training:  37%|▎| 1254/3393 [10:16<17:44,  2.01batch/s, Batch Loss=0.1169, Avg Loss=0.0780, Time Left=19.08[A
Epoch 3/3 - Training:  37%|▎| 1254/3393 [10:17<17:44,  2.01batch/s, Batch Loss=0.0091, Avg Loss=0.0780, Time Left=19.07[A
Epoch 3/3 - Training:  37%|▎| 1255/3393 [10:17<17:46,  2.00batch/s, Batch Loss=0.0091, Avg Loss=0.0780, Time Left=19.07[A
Epoch 3/3 - Training:  37%|▎| 1255/3393 [10:17<17:46,  2.00batch/s, Batch Loss=0.0198, Avg Loss=0.0779, Time Left=19.06[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  38%|▍| 1285/3393 [10:32<17:37,  1.99batch/s, Batch Loss=0.0473, Avg Loss=0.0789, Time Left=18.79[A
Epoch 3/3 - Training:  38%|▍| 1285/3393 [10:32<17:37,  1.99batch/s, Batch Loss=0.0696, Avg Loss=0.0789, Time Left=18.78[A
Epoch 3/3 - Training:  38%|▍| 1286/3393 [10:32<17:43,  1.98batch/s, Batch Loss=0.0696, Avg Loss=0.0789, Time Left=18.78[A
Epoch 3/3 - Training:  38%|▍| 1286/3393 [10:33<17:43,  1.98batch/s, Batch Loss=0.1522, Avg Loss=0.0790, Time Left=18.77[A
Epoch 3/3 - Training:  38%|▍| 1287/3393 [10:33<17:40,  1.99batch/s, Batch Loss=0.1522, Avg Loss=0.0790, Time Left=18.77[A
Epoch 3/3 - Training:  38%|▍| 1287/3393 [10:33<17:40,  1.99batch/s, Batch Loss=0.2421, Avg Loss=0.0791, Time Left=18.76[A
Epoch 3/3 - Training:  38%|▍| 1288/3393 [10:33<17:37,  1.99batch/s, Batch Loss=0.2421, Avg Loss=0.0791, Time Left=18.76[A
Epoch 3/3 - Training:  38%|▍| 1288/3393 [10:34<17:37,  1.99batch/s, Batch Loss=0.1363, Avg Loss=0.0791, Time Left=18.76[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  39%|▍| 1318/3393 [10:49<18:09,  1.90batch/s, Batch Loss=0.0028, Avg Loss=0.0785, Time Left=18.50[A
Epoch 3/3 - Training:  39%|▍| 1318/3393 [10:49<18:09,  1.90batch/s, Batch Loss=0.1937, Avg Loss=0.0786, Time Left=18.49[A
Epoch 3/3 - Training:  39%|▍| 1319/3393 [10:49<17:44,  1.95batch/s, Batch Loss=0.1937, Avg Loss=0.0786, Time Left=18.49[A
Epoch 3/3 - Training:  39%|▍| 1319/3393 [10:50<17:44,  1.95batch/s, Batch Loss=0.0132, Avg Loss=0.0785, Time Left=18.48[A
Epoch 3/3 - Training:  39%|▍| 1320/3393 [10:50<17:45,  1.95batch/s, Batch Loss=0.0132, Avg Loss=0.0785, Time Left=18.48[A
Epoch 3/3 - Training:  39%|▍| 1320/3393 [10:50<17:45,  1.95batch/s, Batch Loss=0.0035, Avg Loss=0.0785, Time Left=18.47[A
Epoch 3/3 - Training:  39%|▍| 1321/3393 [10:50<17:47,  1.94batch/s, Batch Loss=0.0035, Avg Loss=0.0785, Time Left=18.47[A
Epoch 3/3 - Training:  39%|▍| 1321/3393 [10:51<17:47,  1.94batch/s, Batch Loss=0.0183, Avg Loss=0.0784, Time Left=18.47[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  40%|▍| 1351/3393 [11:05<16:58,  2.00batch/s, Batch Loss=0.0150, Avg Loss=0.0780, Time Left=18.20[A
Epoch 3/3 - Training:  40%|▍| 1351/3393 [11:06<16:58,  2.00batch/s, Batch Loss=0.0315, Avg Loss=0.0780, Time Left=18.19[A
Epoch 3/3 - Training:  40%|▍| 1352/3393 [11:06<16:55,  2.01batch/s, Batch Loss=0.0315, Avg Loss=0.0780, Time Left=18.19[A
Epoch 3/3 - Training:  40%|▍| 1352/3393 [11:06<16:55,  2.01batch/s, Batch Loss=0.0769, Avg Loss=0.0780, Time Left=18.18[A
Epoch 3/3 - Training:  40%|▍| 1353/3393 [11:06<16:46,  2.03batch/s, Batch Loss=0.0769, Avg Loss=0.0780, Time Left=18.18[A
Epoch 3/3 - Training:  40%|▍| 1353/3393 [11:07<16:46,  2.03batch/s, Batch Loss=0.0052, Avg Loss=0.0779, Time Left=18.17[A
Epoch 3/3 - Training:  40%|▍| 1354/3393 [11:07<16:39,  2.04batch/s, Batch Loss=0.0052, Avg Loss=0.0779, Time Left=18.17[A
Epoch 3/3 - Training:  40%|▍| 1354/3393 [11:07<16:39,  2.04batch/s, Batch Loss=0.0031, Avg Loss=0.0778, Time Left=18.17[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  41%|▍| 1384/3393 [11:22<17:06,  1.96batch/s, Batch Loss=0.0059, Avg Loss=0.0778, Time Left=17.90[A
Epoch 3/3 - Training:  41%|▍| 1384/3393 [11:22<17:06,  1.96batch/s, Batch Loss=0.0903, Avg Loss=0.0778, Time Left=17.89[A
Epoch 3/3 - Training:  41%|▍| 1385/3393 [11:22<17:00,  1.97batch/s, Batch Loss=0.0903, Avg Loss=0.0778, Time Left=17.89[A
Epoch 3/3 - Training:  41%|▍| 1385/3393 [11:23<17:00,  1.97batch/s, Batch Loss=0.0214, Avg Loss=0.0778, Time Left=17.88[A
Epoch 3/3 - Training:  41%|▍| 1386/3393 [11:23<17:14,  1.94batch/s, Batch Loss=0.0214, Avg Loss=0.0778, Time Left=17.88[A
Epoch 3/3 - Training:  41%|▍| 1386/3393 [11:23<17:14,  1.94batch/s, Batch Loss=0.0585, Avg Loss=0.0777, Time Left=17.88[A
Epoch 3/3 - Training:  41%|▍| 1387/3393 [11:23<17:05,  1.96batch/s, Batch Loss=0.0585, Avg Loss=0.0777, Time Left=17.88[A
Epoch 3/3 - Training:  41%|▍| 1387/3393 [11:24<17:05,  1.96batch/s, Batch Loss=0.0242, Avg Loss=0.0777, Time Left=17.87[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  42%|▍| 1417/3393 [11:38<16:29,  2.00batch/s, Batch Loss=0.0170, Avg Loss=0.0783, Time Left=17.60[A
Epoch 3/3 - Training:  42%|▍| 1417/3393 [11:39<16:29,  2.00batch/s, Batch Loss=0.0340, Avg Loss=0.0783, Time Left=17.59[A
Epoch 3/3 - Training:  42%|▍| 1418/3393 [11:39<16:37,  1.98batch/s, Batch Loss=0.0340, Avg Loss=0.0783, Time Left=17.59[A
Epoch 3/3 - Training:  42%|▍| 1418/3393 [11:39<16:37,  1.98batch/s, Batch Loss=0.1619, Avg Loss=0.0784, Time Left=17.58[A
Epoch 3/3 - Training:  42%|▍| 1419/3393 [11:39<16:25,  2.00batch/s, Batch Loss=0.1619, Avg Loss=0.0784, Time Left=17.58[A
Epoch 3/3 - Training:  42%|▍| 1419/3393 [11:40<16:25,  2.00batch/s, Batch Loss=0.1092, Avg Loss=0.0784, Time Left=17.57[A
Epoch 3/3 - Training:  42%|▍| 1420/3393 [11:40<16:25,  2.00batch/s, Batch Loss=0.1092, Avg Loss=0.0784, Time Left=17.57[A
Epoch 3/3 - Training:  42%|▍| 1420/3393 [11:40<16:25,  2.00batch/s, Batch Loss=0.1130, Avg Loss=0.0784, Time Left=17.57[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  43%|▍| 1450/3393 [11:55<16:18,  1.99batch/s, Batch Loss=0.1040, Avg Loss=0.0787, Time Left=17.30[A
Epoch 3/3 - Training:  43%|▍| 1450/3393 [11:55<16:18,  1.99batch/s, Batch Loss=0.5378, Avg Loss=0.0791, Time Left=17.30[A
Epoch 3/3 - Training:  43%|▍| 1451/3393 [11:55<16:16,  1.99batch/s, Batch Loss=0.5378, Avg Loss=0.0791, Time Left=17.30[A
Epoch 3/3 - Training:  43%|▍| 1451/3393 [11:56<16:16,  1.99batch/s, Batch Loss=0.0789, Avg Loss=0.0791, Time Left=17.29[A
Epoch 3/3 - Training:  43%|▍| 1452/3393 [11:56<16:23,  1.97batch/s, Batch Loss=0.0789, Avg Loss=0.0791, Time Left=17.29[A
Epoch 3/3 - Training:  43%|▍| 1452/3393 [11:56<16:23,  1.97batch/s, Batch Loss=0.1335, Avg Loss=0.0791, Time Left=17.28[A
Epoch 3/3 - Training:  43%|▍| 1453/3393 [11:56<16:20,  1.98batch/s, Batch Loss=0.1335, Avg Loss=0.0791, Time Left=17.28[A
Epoch 3/3 - Training:  43%|▍| 1453/3393 [11:57<16:20,  1.98batch/s, Batch Loss=0.1487, Avg Loss=0.0791, Time Left=17.27[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  44%|▍| 1483/3393 [12:12<15:45,  2.02batch/s, Batch Loss=0.4071, Avg Loss=0.0802, Time Left=17.02[A
Epoch 3/3 - Training:  44%|▍| 1483/3393 [12:12<15:45,  2.02batch/s, Batch Loss=0.1103, Avg Loss=0.0803, Time Left=17.01[A
Epoch 3/3 - Training:  44%|▍| 1484/3393 [12:12<15:57,  1.99batch/s, Batch Loss=0.1103, Avg Loss=0.0803, Time Left=17.01[A
Epoch 3/3 - Training:  44%|▍| 1484/3393 [12:13<15:57,  1.99batch/s, Batch Loss=0.0548, Avg Loss=0.0802, Time Left=17.00[A
Epoch 3/3 - Training:  44%|▍| 1485/3393 [12:13<15:56,  2.00batch/s, Batch Loss=0.0548, Avg Loss=0.0802, Time Left=17.00[A
Epoch 3/3 - Training:  44%|▍| 1485/3393 [12:13<15:56,  2.00batch/s, Batch Loss=0.0658, Avg Loss=0.0802, Time Left=16.99[A
Epoch 3/3 - Training:  44%|▍| 1486/3393 [12:13<16:04,  1.98batch/s, Batch Loss=0.0658, Avg Loss=0.0802, Time Left=16.99[A
Epoch 3/3 - Training:  44%|▍| 1486/3393 [12:14<16:04,  1.98batch/s, Batch Loss=0.0158, Avg Loss=0.0802, Time Left=16.98[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  45%|▍| 1516/3393 [12:28<15:48,  1.98batch/s, Batch Loss=0.0642, Avg Loss=0.0802, Time Left=16.73[A
Epoch 3/3 - Training:  45%|▍| 1516/3393 [12:29<15:48,  1.98batch/s, Batch Loss=0.1493, Avg Loss=0.0803, Time Left=16.72[A
Epoch 3/3 - Training:  45%|▍| 1517/3393 [12:29<16:02,  1.95batch/s, Batch Loss=0.1493, Avg Loss=0.0803, Time Left=16.72[A
Epoch 3/3 - Training:  45%|▍| 1517/3393 [12:29<16:02,  1.95batch/s, Batch Loss=0.0224, Avg Loss=0.0802, Time Left=16.71[A
Epoch 3/3 - Training:  45%|▍| 1518/3393 [12:29<15:47,  1.98batch/s, Batch Loss=0.0224, Avg Loss=0.0802, Time Left=16.71[A
Epoch 3/3 - Training:  45%|▍| 1518/3393 [12:30<15:47,  1.98batch/s, Batch Loss=0.1809, Avg Loss=0.0803, Time Left=16.70[A
Epoch 3/3 - Training:  45%|▍| 1519/3393 [12:30<16:01,  1.95batch/s, Batch Loss=0.1809, Avg Loss=0.0803, Time Left=16.70[A
Epoch 3/3 - Training:  45%|▍| 1519/3393 [12:30<16:01,  1.95batch/s, Batch Loss=0.0992, Avg Loss=0.0803, Time Left=16.69[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  46%|▍| 1549/3393 [12:45<15:36,  1.97batch/s, Batch Loss=0.0304, Avg Loss=0.0799, Time Left=16.44[A
Epoch 3/3 - Training:  46%|▍| 1549/3393 [12:46<15:36,  1.97batch/s, Batch Loss=0.0260, Avg Loss=0.0799, Time Left=16.43[A
Epoch 3/3 - Training:  46%|▍| 1550/3393 [12:46<15:57,  1.92batch/s, Batch Loss=0.0260, Avg Loss=0.0799, Time Left=16.43[A
Epoch 3/3 - Training:  46%|▍| 1550/3393 [12:46<15:57,  1.92batch/s, Batch Loss=0.0871, Avg Loss=0.0799, Time Left=16.42[A
Epoch 3/3 - Training:  46%|▍| 1551/3393 [12:46<15:30,  1.98batch/s, Batch Loss=0.0871, Avg Loss=0.0799, Time Left=16.42[A
Epoch 3/3 - Training:  46%|▍| 1551/3393 [12:47<15:30,  1.98batch/s, Batch Loss=0.3873, Avg Loss=0.0801, Time Left=16.41[A
Epoch 3/3 - Training:  46%|▍| 1552/3393 [12:47<15:26,  1.99batch/s, Batch Loss=0.3873, Avg Loss=0.0801, Time Left=16.41[A
Epoch 3/3 - Training:  46%|▍| 1552/3393 [12:47<15:26,  1.99batch/s, Batch Loss=0.0037, Avg Loss=0.0800, Time Left=16.40[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  47%|▍| 1582/3393 [13:02<15:25,  1.96batch/s, Batch Loss=0.0810, Avg Loss=0.0799, Time Left=16.15[A
Epoch 3/3 - Training:  47%|▍| 1582/3393 [13:02<15:25,  1.96batch/s, Batch Loss=0.0473, Avg Loss=0.0799, Time Left=16.14[A
Epoch 3/3 - Training:  47%|▍| 1583/3393 [13:02<15:19,  1.97batch/s, Batch Loss=0.0473, Avg Loss=0.0799, Time Left=16.14[A
Epoch 3/3 - Training:  47%|▍| 1583/3393 [13:03<15:19,  1.97batch/s, Batch Loss=0.0512, Avg Loss=0.0799, Time Left=16.13[A
Epoch 3/3 - Training:  47%|▍| 1584/3393 [13:03<15:15,  1.97batch/s, Batch Loss=0.0512, Avg Loss=0.0799, Time Left=16.13[A
Epoch 3/3 - Training:  47%|▍| 1584/3393 [13:03<15:15,  1.97batch/s, Batch Loss=0.1048, Avg Loss=0.0799, Time Left=16.12[A
Epoch 3/3 - Training:  47%|▍| 1585/3393 [13:03<15:03,  2.00batch/s, Batch Loss=0.1048, Avg Loss=0.0799, Time Left=16.12[A
Epoch 3/3 - Training:  47%|▍| 1585/3393 [13:04<15:03,  2.00batch/s, Batch Loss=0.0247, Avg Loss=0.0799, Time Left=16.11[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  48%|▍| 1615/3393 [13:19<15:00,  1.97batch/s, Batch Loss=0.0103, Avg Loss=0.0797, Time Left=15.85[A
Epoch 3/3 - Training:  48%|▍| 1615/3393 [13:19<15:00,  1.97batch/s, Batch Loss=0.0406, Avg Loss=0.0796, Time Left=15.84[A
Epoch 3/3 - Training:  48%|▍| 1616/3393 [13:19<14:58,  1.98batch/s, Batch Loss=0.0406, Avg Loss=0.0796, Time Left=15.84[A
Epoch 3/3 - Training:  48%|▍| 1616/3393 [13:20<14:58,  1.98batch/s, Batch Loss=0.0835, Avg Loss=0.0796, Time Left=15.84[A
Epoch 3/3 - Training:  48%|▍| 1617/3393 [13:20<15:11,  1.95batch/s, Batch Loss=0.0835, Avg Loss=0.0796, Time Left=15.84[A
Epoch 3/3 - Training:  48%|▍| 1617/3393 [13:20<15:11,  1.95batch/s, Batch Loss=0.2015, Avg Loss=0.0797, Time Left=15.83[A
Epoch 3/3 - Training:  48%|▍| 1618/3393 [13:20<14:56,  1.98batch/s, Batch Loss=0.2015, Avg Loss=0.0797, Time Left=15.83[A
Epoch 3/3 - Training:  48%|▍| 1618/3393 [13:21<14:56,  1.98batch/s, Batch Loss=0.0038, Avg Loss=0.0797, Time Left=15.82[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  49%|▍| 1648/3393 [13:35<15:03,  1.93batch/s, Batch Loss=0.0663, Avg Loss=0.0801, Time Left=15.57[A
Epoch 3/3 - Training:  49%|▍| 1648/3393 [13:36<15:03,  1.93batch/s, Batch Loss=0.0735, Avg Loss=0.0801, Time Left=15.56[A
Epoch 3/3 - Training:  49%|▍| 1649/3393 [13:36<14:38,  1.99batch/s, Batch Loss=0.0735, Avg Loss=0.0801, Time Left=15.56[A
Epoch 3/3 - Training:  49%|▍| 1649/3393 [13:36<14:38,  1.99batch/s, Batch Loss=0.0095, Avg Loss=0.0801, Time Left=15.55[A
Epoch 3/3 - Training:  49%|▍| 1650/3393 [13:36<14:43,  1.97batch/s, Batch Loss=0.0095, Avg Loss=0.0801, Time Left=15.55[A
Epoch 3/3 - Training:  49%|▍| 1650/3393 [13:37<14:43,  1.97batch/s, Batch Loss=0.1541, Avg Loss=0.0801, Time Left=15.54[A
Epoch 3/3 - Training:  49%|▍| 1651/3393 [13:37<14:40,  1.98batch/s, Batch Loss=0.1541, Avg Loss=0.0801, Time Left=15.54[A
Epoch 3/3 - Training:  49%|▍| 1651/3393 [13:37<14:40,  1.98batch/s, Batch Loss=0.0695, Avg Loss=0.0801, Time Left=15.53[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  50%|▍| 1681/3393 [13:52<14:31,  1.96batch/s, Batch Loss=0.0167, Avg Loss=0.0807, Time Left=15.27[A
Epoch 3/3 - Training:  50%|▍| 1681/3393 [13:52<14:31,  1.96batch/s, Batch Loss=0.0702, Avg Loss=0.0807, Time Left=15.26[A
Epoch 3/3 - Training:  50%|▍| 1682/3393 [13:52<14:28,  1.97batch/s, Batch Loss=0.0702, Avg Loss=0.0807, Time Left=15.26[A
Epoch 3/3 - Training:  50%|▍| 1682/3393 [13:53<14:28,  1.97batch/s, Batch Loss=0.1273, Avg Loss=0.0807, Time Left=15.26[A
Epoch 3/3 - Training:  50%|▍| 1683/3393 [13:53<14:32,  1.96batch/s, Batch Loss=0.1273, Avg Loss=0.0807, Time Left=15.26[A
Epoch 3/3 - Training:  50%|▍| 1683/3393 [13:53<14:32,  1.96batch/s, Batch Loss=0.0336, Avg Loss=0.0807, Time Left=15.25[A
Epoch 3/3 - Training:  50%|▍| 1684/3393 [13:53<14:27,  1.97batch/s, Batch Loss=0.0336, Avg Loss=0.0807, Time Left=15.25[A
Epoch 3/3 - Training:  50%|▍| 1684/3393 [13:54<14:27,  1.97batch/s, Batch Loss=0.0925, Avg Loss=0.0807, Time Left=15.24[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  51%|▌| 1714/3393 [14:08<14:19,  1.95batch/s, Batch Loss=0.4776, Avg Loss=0.0803, Time Left=14.98[A
Epoch 3/3 - Training:  51%|▌| 1714/3393 [14:09<14:19,  1.95batch/s, Batch Loss=0.0413, Avg Loss=0.0803, Time Left=14.97[A
Epoch 3/3 - Training:  51%|▌| 1715/3393 [14:09<14:14,  1.96batch/s, Batch Loss=0.0413, Avg Loss=0.0803, Time Left=14.97[A
Epoch 3/3 - Training:  51%|▌| 1715/3393 [14:10<14:14,  1.96batch/s, Batch Loss=0.0218, Avg Loss=0.0802, Time Left=14.97[A
Epoch 3/3 - Training:  51%|▌| 1716/3393 [14:10<14:24,  1.94batch/s, Batch Loss=0.0218, Avg Loss=0.0802, Time Left=14.97[A
Epoch 3/3 - Training:  51%|▌| 1716/3393 [14:10<14:24,  1.94batch/s, Batch Loss=0.0683, Avg Loss=0.0802, Time Left=14.96[A
Epoch 3/3 - Training:  51%|▌| 1717/3393 [14:10<14:09,  1.97batch/s, Batch Loss=0.0683, Avg Loss=0.0802, Time Left=14.96[A
Epoch 3/3 - Training:  51%|▌| 1717/3393 [14:11<14:09,  1.97batch/s, Batch Loss=0.0913, Avg Loss=0.0802, Time Left=14.95[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  51%|▌| 1747/3393 [14:25<13:28,  2.04batch/s, Batch Loss=0.2687, Avg Loss=0.0809, Time Left=14.69[A
Epoch 3/3 - Training:  51%|▌| 1747/3393 [14:25<13:28,  2.04batch/s, Batch Loss=0.0420, Avg Loss=0.0809, Time Left=14.68[A
Epoch 3/3 - Training:  52%|▌| 1748/3393 [14:25<13:48,  1.99batch/s, Batch Loss=0.0420, Avg Loss=0.0809, Time Left=14.68[A
Epoch 3/3 - Training:  52%|▌| 1748/3393 [14:26<13:48,  1.99batch/s, Batch Loss=0.0200, Avg Loss=0.0809, Time Left=14.67[A
Epoch 3/3 - Training:  52%|▌| 1749/3393 [14:26<13:47,  1.99batch/s, Batch Loss=0.0200, Avg Loss=0.0809, Time Left=14.67[A
Epoch 3/3 - Training:  52%|▌| 1749/3393 [14:26<13:47,  1.99batch/s, Batch Loss=0.0274, Avg Loss=0.0808, Time Left=14.66[A
Epoch 3/3 - Training:  52%|▌| 1750/3393 [14:27<14:00,  1.95batch/s, Batch Loss=0.0274, Avg Loss=0.0808, Time Left=14.66[A
Epoch 3/3 - Training:  52%|▌| 1750/3393 [14:27<14:00,  1.95batch/s, Batch Loss=0.3211, Avg Loss=0.0810, Time Left=14.66[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  52%|▌| 1780/3393 [14:42<13:24,  2.01batch/s, Batch Loss=0.0389, Avg Loss=0.0808, Time Left=14.40[A
Epoch 3/3 - Training:  52%|▌| 1780/3393 [14:42<13:24,  2.01batch/s, Batch Loss=0.0468, Avg Loss=0.0808, Time Left=14.39[A
Epoch 3/3 - Training:  52%|▌| 1781/3393 [14:42<13:40,  1.96batch/s, Batch Loss=0.0468, Avg Loss=0.0808, Time Left=14.39[A
Epoch 3/3 - Training:  52%|▌| 1781/3393 [14:43<13:40,  1.96batch/s, Batch Loss=0.2919, Avg Loss=0.0809, Time Left=14.39[A
Epoch 3/3 - Training:  53%|▌| 1782/3393 [14:43<13:25,  2.00batch/s, Batch Loss=0.2919, Avg Loss=0.0809, Time Left=14.39[A
Epoch 3/3 - Training:  53%|▌| 1782/3393 [14:43<13:25,  2.00batch/s, Batch Loss=0.0343, Avg Loss=0.0809, Time Left=14.38[A
Epoch 3/3 - Training:  53%|▌| 1783/3393 [14:43<13:19,  2.01batch/s, Batch Loss=0.0343, Avg Loss=0.0809, Time Left=14.38[A
Epoch 3/3 - Training:  53%|▌| 1783/3393 [14:44<13:19,  2.01batch/s, Batch Loss=0.0595, Avg Loss=0.0809, Time Left=14.37[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  53%|▌| 1813/3393 [14:58<13:35,  1.94batch/s, Batch Loss=0.0860, Avg Loss=0.0806, Time Left=14.12[A
Epoch 3/3 - Training:  53%|▌| 1813/3393 [14:59<13:35,  1.94batch/s, Batch Loss=0.1585, Avg Loss=0.0806, Time Left=14.11[A
Epoch 3/3 - Training:  53%|▌| 1814/3393 [14:59<13:28,  1.95batch/s, Batch Loss=0.1585, Avg Loss=0.0806, Time Left=14.11[A
Epoch 3/3 - Training:  53%|▌| 1814/3393 [14:59<13:28,  1.95batch/s, Batch Loss=0.0120, Avg Loss=0.0806, Time Left=14.10[A
Epoch 3/3 - Training:  53%|▌| 1815/3393 [14:59<13:29,  1.95batch/s, Batch Loss=0.0120, Avg Loss=0.0806, Time Left=14.10[A
Epoch 3/3 - Training:  53%|▌| 1815/3393 [15:00<13:29,  1.95batch/s, Batch Loss=0.0149, Avg Loss=0.0805, Time Left=14.09[A
Epoch 3/3 - Training:  54%|▌| 1816/3393 [15:00<13:24,  1.96batch/s, Batch Loss=0.0149, Avg Loss=0.0805, Time Left=14.09[A
Epoch 3/3 - Training:  54%|▌| 1816/3393 [15:00<13:24,  1.96batch/s, Batch Loss=0.0721, Avg Loss=0.0805, Time Left=14.08[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  54%|▌| 1846/3393 [15:15<13:10,  1.96batch/s, Batch Loss=0.0486, Avg Loss=0.0802, Time Left=13.83[A
Epoch 3/3 - Training:  54%|▌| 1846/3393 [15:16<13:10,  1.96batch/s, Batch Loss=0.0375, Avg Loss=0.0802, Time Left=13.82[A
Epoch 3/3 - Training:  54%|▌| 1847/3393 [15:16<13:19,  1.93batch/s, Batch Loss=0.0375, Avg Loss=0.0802, Time Left=13.82[A
Epoch 3/3 - Training:  54%|▌| 1847/3393 [15:16<13:19,  1.93batch/s, Batch Loss=0.0016, Avg Loss=0.0802, Time Left=13.81[A
Epoch 3/3 - Training:  54%|▌| 1848/3393 [15:16<13:11,  1.95batch/s, Batch Loss=0.0016, Avg Loss=0.0802, Time Left=13.81[A
Epoch 3/3 - Training:  54%|▌| 1848/3393 [15:17<13:11,  1.95batch/s, Batch Loss=0.0052, Avg Loss=0.0801, Time Left=13.81[A
Epoch 3/3 - Training:  54%|▌| 1849/3393 [15:17<13:20,  1.93batch/s, Batch Loss=0.0052, Avg Loss=0.0801, Time Left=13.81[A
Epoch 3/3 - Training:  54%|▌| 1849/3393 [15:17<13:20,  1.93batch/s, Batch Loss=0.0019, Avg Loss=0.0801, Time Left=13.80[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  55%|▌| 1879/3393 [15:32<12:48,  1.97batch/s, Batch Loss=0.0571, Avg Loss=0.0804, Time Left=13.55[A
Epoch 3/3 - Training:  55%|▌| 1879/3393 [15:33<12:48,  1.97batch/s, Batch Loss=0.0759, Avg Loss=0.0804, Time Left=13.54[A
Epoch 3/3 - Training:  55%|▌| 1880/3393 [15:33<12:37,  2.00batch/s, Batch Loss=0.0759, Avg Loss=0.0804, Time Left=13.54[A
Epoch 3/3 - Training:  55%|▌| 1880/3393 [15:33<12:37,  2.00batch/s, Batch Loss=0.0136, Avg Loss=0.0804, Time Left=13.53[A
Epoch 3/3 - Training:  55%|▌| 1881/3393 [15:33<12:49,  1.96batch/s, Batch Loss=0.0136, Avg Loss=0.0804, Time Left=13.53[A
Epoch 3/3 - Training:  55%|▌| 1881/3393 [15:34<12:49,  1.96batch/s, Batch Loss=0.0073, Avg Loss=0.0803, Time Left=13.52[A
Epoch 3/3 - Training:  55%|▌| 1882/3393 [15:34<12:39,  1.99batch/s, Batch Loss=0.0073, Avg Loss=0.0803, Time Left=13.52[A
Epoch 3/3 - Training:  55%|▌| 1882/3393 [15:34<12:39,  1.99batch/s, Batch Loss=0.0825, Avg Loss=0.0803, Time Left=13.51[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  56%|▌| 1912/3393 [15:49<12:32,  1.97batch/s, Batch Loss=0.0989, Avg Loss=0.0802, Time Left=13.26[A
Epoch 3/3 - Training:  56%|▌| 1912/3393 [15:50<12:32,  1.97batch/s, Batch Loss=0.0091, Avg Loss=0.0801, Time Left=13.25[A
Epoch 3/3 - Training:  56%|▌| 1913/3393 [15:50<12:28,  1.98batch/s, Batch Loss=0.0091, Avg Loss=0.0801, Time Left=13.25[A
Epoch 3/3 - Training:  56%|▌| 1913/3393 [15:50<12:28,  1.98batch/s, Batch Loss=0.1191, Avg Loss=0.0801, Time Left=13.25[A
Epoch 3/3 - Training:  56%|▌| 1914/3393 [15:50<12:46,  1.93batch/s, Batch Loss=0.1191, Avg Loss=0.0801, Time Left=13.25[A
Epoch 3/3 - Training:  56%|▌| 1914/3393 [15:51<12:46,  1.93batch/s, Batch Loss=0.0129, Avg Loss=0.0801, Time Left=13.24[A
Epoch 3/3 - Training:  56%|▌| 1915/3393 [15:51<12:36,  1.95batch/s, Batch Loss=0.0129, Avg Loss=0.0801, Time Left=13.24[A
Epoch 3/3 - Training:  56%|▌| 1915/3393 [15:51<12:36,  1.95batch/s, Batch Loss=0.3447, Avg Loss=0.0803, Time Left=13.23[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  57%|▌| 1945/3393 [16:06<12:36,  1.91batch/s, Batch Loss=0.0326, Avg Loss=0.0799, Time Left=12.98[A
Epoch 3/3 - Training:  57%|▌| 1945/3393 [16:07<12:36,  1.91batch/s, Batch Loss=0.2861, Avg Loss=0.0800, Time Left=12.97[A
Epoch 3/3 - Training:  57%|▌| 1946/3393 [16:07<12:32,  1.92batch/s, Batch Loss=0.2861, Avg Loss=0.0800, Time Left=12.97[A
Epoch 3/3 - Training:  57%|▌| 1946/3393 [16:07<12:32,  1.92batch/s, Batch Loss=0.0034, Avg Loss=0.0800, Time Left=12.96[A
Epoch 3/3 - Training:  57%|▌| 1947/3393 [16:07<12:23,  1.94batch/s, Batch Loss=0.0034, Avg Loss=0.0800, Time Left=12.96[A
Epoch 3/3 - Training:  57%|▌| 1947/3393 [16:08<12:23,  1.94batch/s, Batch Loss=0.1028, Avg Loss=0.0800, Time Left=12.95[A
Epoch 3/3 - Training:  57%|▌| 1948/3393 [16:08<12:17,  1.96batch/s, Batch Loss=0.1028, Avg Loss=0.0800, Time Left=12.95[A
Epoch 3/3 - Training:  57%|▌| 1948/3393 [16:08<12:17,  1.96batch/s, Batch Loss=0.0240, Avg Loss=0.0800, Time Left=12.95[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  58%|▌| 1978/3393 [16:23<12:06,  1.95batch/s, Batch Loss=0.0102, Avg Loss=0.0797, Time Left=12.69[A
Epoch 3/3 - Training:  58%|▌| 1978/3393 [16:23<12:06,  1.95batch/s, Batch Loss=0.1740, Avg Loss=0.0798, Time Left=12.69[A
Epoch 3/3 - Training:  58%|▌| 1979/3393 [16:23<11:53,  1.98batch/s, Batch Loss=0.1740, Avg Loss=0.0798, Time Left=12.69[A
Epoch 3/3 - Training:  58%|▌| 1979/3393 [16:24<11:53,  1.98batch/s, Batch Loss=0.0119, Avg Loss=0.0797, Time Left=12.68[A
Epoch 3/3 - Training:  58%|▌| 1980/3393 [16:24<12:04,  1.95batch/s, Batch Loss=0.0119, Avg Loss=0.0797, Time Left=12.68[A
Epoch 3/3 - Training:  58%|▌| 1980/3393 [16:24<12:04,  1.95batch/s, Batch Loss=0.0113, Avg Loss=0.0797, Time Left=12.67[A
Epoch 3/3 - Training:  58%|▌| 1981/3393 [16:24<11:59,  1.96batch/s, Batch Loss=0.0113, Avg Loss=0.0797, Time Left=12.67[A
Epoch 3/3 - Training:  58%|▌| 1981/3393 [16:25<11:59,  1.96batch/s, Batch Loss=0.0777, Avg Loss=0.0797, Time Left=12.66[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  59%|▌| 2011/3393 [16:40<11:45,  1.96batch/s, Batch Loss=0.0033, Avg Loss=0.0793, Time Left=12.41[A
Epoch 3/3 - Training:  59%|▌| 2011/3393 [16:40<11:45,  1.96batch/s, Batch Loss=0.0038, Avg Loss=0.0792, Time Left=12.40[A
Epoch 3/3 - Training:  59%|▌| 2012/3393 [16:40<11:54,  1.93batch/s, Batch Loss=0.0038, Avg Loss=0.0792, Time Left=12.40[A
Epoch 3/3 - Training:  59%|▌| 2012/3393 [16:41<11:54,  1.93batch/s, Batch Loss=0.0079, Avg Loss=0.0792, Time Left=12.39[A
Epoch 3/3 - Training:  59%|▌| 2013/3393 [16:41<11:47,  1.95batch/s, Batch Loss=0.0079, Avg Loss=0.0792, Time Left=12.39[A
Epoch 3/3 - Training:  59%|▌| 2013/3393 [16:41<11:47,  1.95batch/s, Batch Loss=0.0067, Avg Loss=0.0791, Time Left=12.38[A
Epoch 3/3 - Training:  59%|▌| 2014/3393 [16:41<11:48,  1.95batch/s, Batch Loss=0.0067, Avg Loss=0.0791, Time Left=12.38[A
Epoch 3/3 - Training:  59%|▌| 2014/3393 [16:42<11:48,  1.95batch/s, Batch Loss=0.0054, Avg Loss=0.0791, Time Left=12.37[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  60%|▌| 2044/3393 [16:56<11:32,  1.95batch/s, Batch Loss=0.5939, Avg Loss=0.0794, Time Left=12.12[A
Epoch 3/3 - Training:  60%|▌| 2044/3393 [16:57<11:32,  1.95batch/s, Batch Loss=0.0439, Avg Loss=0.0794, Time Left=12.12[A
Epoch 3/3 - Training:  60%|▌| 2045/3393 [16:57<11:25,  1.97batch/s, Batch Loss=0.0439, Avg Loss=0.0794, Time Left=12.12[A
Epoch 3/3 - Training:  60%|▌| 2045/3393 [16:57<11:25,  1.97batch/s, Batch Loss=0.1076, Avg Loss=0.0794, Time Left=12.11[A
Epoch 3/3 - Training:  60%|▌| 2046/3393 [16:57<11:22,  1.97batch/s, Batch Loss=0.1076, Avg Loss=0.0794, Time Left=12.11[A
Epoch 3/3 - Training:  60%|▌| 2046/3393 [16:58<11:22,  1.97batch/s, Batch Loss=0.0635, Avg Loss=0.0794, Time Left=12.10[A
Epoch 3/3 - Training:  60%|▌| 2047/3393 [16:58<11:13,  2.00batch/s, Batch Loss=0.0635, Avg Loss=0.0794, Time Left=12.10[A
Epoch 3/3 - Training:  60%|▌| 2047/3393 [16:58<11:13,  2.00batch/s, Batch Loss=0.1317, Avg Loss=0.0794, Time Left=12.09[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  61%|▌| 2077/3393 [17:13<11:06,  1.97batch/s, Batch Loss=0.0944, Avg Loss=0.0795, Time Left=11.84[A
Epoch 3/3 - Training:  61%|▌| 2077/3393 [17:14<11:06,  1.97batch/s, Batch Loss=0.1941, Avg Loss=0.0795, Time Left=11.83[A
Epoch 3/3 - Training:  61%|▌| 2078/3393 [17:14<11:22,  1.93batch/s, Batch Loss=0.1941, Avg Loss=0.0795, Time Left=11.83[A
Epoch 3/3 - Training:  61%|▌| 2078/3393 [17:14<11:22,  1.93batch/s, Batch Loss=0.0861, Avg Loss=0.0795, Time Left=11.82[A
Epoch 3/3 - Training:  61%|▌| 2079/3393 [17:14<11:09,  1.96batch/s, Batch Loss=0.0861, Avg Loss=0.0795, Time Left=11.82[A
Epoch 3/3 - Training:  61%|▌| 2079/3393 [17:15<11:09,  1.96batch/s, Batch Loss=0.0307, Avg Loss=0.0795, Time Left=11.81[A
Epoch 3/3 - Training:  61%|▌| 2080/3393 [17:15<11:11,  1.96batch/s, Batch Loss=0.0307, Avg Loss=0.0795, Time Left=11.81[A
Epoch 3/3 - Training:  61%|▌| 2080/3393 [17:15<11:11,  1.96batch/s, Batch Loss=0.0081, Avg Loss=0.0795, Time Left=11.80[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  62%|▌| 2110/3393 [17:30<10:35,  2.02batch/s, Batch Loss=0.0582, Avg Loss=0.0796, Time Left=11.55[A
Epoch 3/3 - Training:  62%|▌| 2110/3393 [17:30<10:35,  2.02batch/s, Batch Loss=0.0056, Avg Loss=0.0795, Time Left=11.54[A
Epoch 3/3 - Training:  62%|▌| 2111/3393 [17:30<10:24,  2.05batch/s, Batch Loss=0.0056, Avg Loss=0.0795, Time Left=11.54[A
Epoch 3/3 - Training:  62%|▌| 2111/3393 [17:31<10:24,  2.05batch/s, Batch Loss=0.1987, Avg Loss=0.0796, Time Left=11.53[A
Epoch 3/3 - Training:  62%|▌| 2112/3393 [17:31<10:27,  2.04batch/s, Batch Loss=0.1987, Avg Loss=0.0796, Time Left=11.53[A
Epoch 3/3 - Training:  62%|▌| 2112/3393 [17:31<10:27,  2.04batch/s, Batch Loss=0.1161, Avg Loss=0.0796, Time Left=11.53[A
Epoch 3/3 - Training:  62%|▌| 2113/3393 [17:31<10:43,  1.99batch/s, Batch Loss=0.1161, Avg Loss=0.0796, Time Left=11.53[A
Epoch 3/3 - Training:  62%|▌| 2113/3393 [17:32<10:43,  1.99batch/s, Batch Loss=0.0180, Avg Loss=0.0796, Time Left=11.52[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  63%|▋| 2143/3393 [17:46<10:41,  1.95batch/s, Batch Loss=0.1722, Avg Loss=0.0791, Time Left=11.27[A
Epoch 3/3 - Training:  63%|▋| 2143/3393 [17:47<10:41,  1.95batch/s, Batch Loss=0.0019, Avg Loss=0.0791, Time Left=11.26[A
Epoch 3/3 - Training:  63%|▋| 2144/3393 [17:47<10:40,  1.95batch/s, Batch Loss=0.0019, Avg Loss=0.0791, Time Left=11.26[A
Epoch 3/3 - Training:  63%|▋| 2144/3393 [17:47<10:40,  1.95batch/s, Batch Loss=0.0321, Avg Loss=0.0791, Time Left=11.25[A
Epoch 3/3 - Training:  63%|▋| 2145/3393 [17:47<10:37,  1.96batch/s, Batch Loss=0.0321, Avg Loss=0.0791, Time Left=11.25[A
Epoch 3/3 - Training:  63%|▋| 2145/3393 [17:48<10:37,  1.96batch/s, Batch Loss=0.0020, Avg Loss=0.0790, Time Left=11.24[A
Epoch 3/3 - Training:  63%|▋| 2146/3393 [17:48<10:27,  1.99batch/s, Batch Loss=0.0020, Avg Loss=0.0790, Time Left=11.24[A
Epoch 3/3 - Training:  63%|▋| 2146/3393 [17:48<10:27,  1.99batch/s, Batch Loss=0.0918, Avg Loss=0.0790, Time Left=11.23[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  64%|▋| 2176/3393 [18:03<10:25,  1.95batch/s, Batch Loss=0.1510, Avg Loss=0.0793, Time Left=10.98[A
Epoch 3/3 - Training:  64%|▋| 2176/3393 [18:04<10:25,  1.95batch/s, Batch Loss=0.0218, Avg Loss=0.0792, Time Left=10.97[A
Epoch 3/3 - Training:  64%|▋| 2177/3393 [18:04<10:27,  1.94batch/s, Batch Loss=0.0218, Avg Loss=0.0792, Time Left=10.97[A
Epoch 3/3 - Training:  64%|▋| 2177/3393 [18:04<10:27,  1.94batch/s, Batch Loss=0.0469, Avg Loss=0.0792, Time Left=10.97[A
Epoch 3/3 - Training:  64%|▋| 2178/3393 [18:04<10:33,  1.92batch/s, Batch Loss=0.0469, Avg Loss=0.0792, Time Left=10.97[A
Epoch 3/3 - Training:  64%|▋| 2178/3393 [18:05<10:33,  1.92batch/s, Batch Loss=0.2303, Avg Loss=0.0793, Time Left=10.96[A
Epoch 3/3 - Training:  64%|▋| 2179/3393 [18:05<10:25,  1.94batch/s, Batch Loss=0.2303, Avg Loss=0.0793, Time Left=10.96[A
Epoch 3/3 - Training:  64%|▋| 2179/3393 [18:05<10:25,  1.94batch/s, Batch Loss=0.0070, Avg Loss=0.0793, Time Left=10.95[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  65%|▋| 2209/3393 [18:20<10:00,  1.97batch/s, Batch Loss=0.0440, Avg Loss=0.0794, Time Left=10.70[A
Epoch 3/3 - Training:  65%|▋| 2209/3393 [18:20<10:00,  1.97batch/s, Batch Loss=0.0410, Avg Loss=0.0794, Time Left=10.69[A
Epoch 3/3 - Training:  65%|▋| 2210/3393 [18:20<09:58,  1.98batch/s, Batch Loss=0.0410, Avg Loss=0.0794, Time Left=10.69[A
Epoch 3/3 - Training:  65%|▋| 2210/3393 [18:21<09:58,  1.98batch/s, Batch Loss=0.0920, Avg Loss=0.0794, Time Left=10.68[A
Epoch 3/3 - Training:  65%|▋| 2211/3393 [18:21<10:02,  1.96batch/s, Batch Loss=0.0920, Avg Loss=0.0794, Time Left=10.68[A
Epoch 3/3 - Training:  65%|▋| 2211/3393 [18:21<10:02,  1.96batch/s, Batch Loss=0.1182, Avg Loss=0.0794, Time Left=10.67[A
Epoch 3/3 - Training:  65%|▋| 2212/3393 [18:21<09:58,  1.97batch/s, Batch Loss=0.1182, Avg Loss=0.0794, Time Left=10.67[A
Epoch 3/3 - Training:  65%|▋| 2212/3393 [18:22<09:58,  1.97batch/s, Batch Loss=0.1831, Avg Loss=0.0794, Time Left=10.66[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  66%|▋| 2242/3393 [18:36<09:36,  2.00batch/s, Batch Loss=0.4830, Avg Loss=0.0792, Time Left=10.41[A
Epoch 3/3 - Training:  66%|▋| 2242/3393 [18:37<09:36,  2.00batch/s, Batch Loss=0.0021, Avg Loss=0.0792, Time Left=10.40[A
Epoch 3/3 - Training:  66%|▋| 2243/3393 [18:37<09:30,  2.02batch/s, Batch Loss=0.0021, Avg Loss=0.0792, Time Left=10.40[A
Epoch 3/3 - Training:  66%|▋| 2243/3393 [18:37<09:30,  2.02batch/s, Batch Loss=0.1001, Avg Loss=0.0792, Time Left=10.39[A
Epoch 3/3 - Training:  66%|▋| 2244/3393 [18:37<09:31,  2.01batch/s, Batch Loss=0.1001, Avg Loss=0.0792, Time Left=10.39[A
Epoch 3/3 - Training:  66%|▋| 2244/3393 [18:38<09:31,  2.01batch/s, Batch Loss=0.0869, Avg Loss=0.0792, Time Left=10.38[A
Epoch 3/3 - Training:  66%|▋| 2245/3393 [18:38<09:26,  2.03batch/s, Batch Loss=0.0869, Avg Loss=0.0792, Time Left=10.38[A
Epoch 3/3 - Training:  66%|▋| 2245/3393 [18:38<09:26,  2.03batch/s, Batch Loss=0.0238, Avg Loss=0.0792, Time Left=10.38[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  67%|▋| 2275/3393 [18:53<09:28,  1.97batch/s, Batch Loss=0.1000, Avg Loss=0.0788, Time Left=10.12[A
Epoch 3/3 - Training:  67%|▋| 2275/3393 [18:53<09:28,  1.97batch/s, Batch Loss=0.0013, Avg Loss=0.0788, Time Left=10.11[A
Epoch 3/3 - Training:  67%|▋| 2276/3393 [18:53<09:21,  1.99batch/s, Batch Loss=0.0013, Avg Loss=0.0788, Time Left=10.11[A
Epoch 3/3 - Training:  67%|▋| 2276/3393 [18:54<09:21,  1.99batch/s, Batch Loss=0.0093, Avg Loss=0.0788, Time Left=10.11[A
Epoch 3/3 - Training:  67%|▋| 2277/3393 [18:54<09:39,  1.93batch/s, Batch Loss=0.0093, Avg Loss=0.0788, Time Left=10.11[A
Epoch 3/3 - Training:  67%|▋| 2277/3393 [18:54<09:39,  1.93batch/s, Batch Loss=0.2767, Avg Loss=0.0788, Time Left=10.10[A
Epoch 3/3 - Training:  67%|▋| 2278/3393 [18:54<09:30,  1.96batch/s, Batch Loss=0.2767, Avg Loss=0.0788, Time Left=10.10[A
Epoch 3/3 - Training:  67%|▋| 2278/3393 [18:55<09:30,  1.96batch/s, Batch Loss=0.0036, Avg Loss=0.0788, Time Left=10.09[A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  68%|▋| 2308/3393 [19:09<09:14,  1.96batch/s, Batch Loss=0.1669, Avg Loss=0.0790, Time Left=9.84 [A
Epoch 3/3 - Training:  68%|▋| 2308/3393 [19:10<09:14,  1.96batch/s, Batch Loss=0.0972, Avg Loss=0.0790, Time Left=9.83 [A
Epoch 3/3 - Training:  68%|▋| 2309/3393 [19:10<09:11,  1.97batch/s, Batch Loss=0.0972, Avg Loss=0.0790, Time Left=9.83 [A
Epoch 3/3 - Training:  68%|▋| 2309/3393 [19:10<09:11,  1.97batch/s, Batch Loss=0.0111, Avg Loss=0.0789, Time Left=9.82 [A
Epoch 3/3 - Training:  68%|▋| 2310/3393 [19:10<09:13,  1.96batch/s, Batch Loss=0.0111, Avg Loss=0.0789, Time Left=9.82 [A
Epoch 3/3 - Training:  68%|▋| 2310/3393 [19:11<09:13,  1.96batch/s, Batch Loss=0.0179, Avg Loss=0.0789, Time Left=9.81 [A
Epoch 3/3 - Training:  68%|▋| 2311/3393 [19:11<09:09,  1.97batch/s, Batch Loss=0.0179, Avg Loss=0.0789, Time Left=9.81 [A
Epoch 3/3 - Training:  68%|▋| 2311/3393 [19:11<09:09,  1.97batch/s, Batch Loss=0.0552, Avg Loss=0.0789, Time Left=9.80 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  69%|▋| 2341/3393 [19:26<08:57,  1.96batch/s, Batch Loss=0.0086, Avg Loss=0.0787, Time Left=9.55 [A
Epoch 3/3 - Training:  69%|▋| 2341/3393 [19:27<08:57,  1.96batch/s, Batch Loss=0.0125, Avg Loss=0.0787, Time Left=9.55 [A
Epoch 3/3 - Training:  69%|▋| 2342/3393 [19:27<08:53,  1.97batch/s, Batch Loss=0.0125, Avg Loss=0.0787, Time Left=9.55 [A
Epoch 3/3 - Training:  69%|▋| 2342/3393 [19:27<08:53,  1.97batch/s, Batch Loss=0.0277, Avg Loss=0.0786, Time Left=9.54 [A
Epoch 3/3 - Training:  69%|▋| 2343/3393 [19:27<09:06,  1.92batch/s, Batch Loss=0.0277, Avg Loss=0.0786, Time Left=9.54 [A
Epoch 3/3 - Training:  69%|▋| 2343/3393 [19:28<09:06,  1.92batch/s, Batch Loss=0.0481, Avg Loss=0.0786, Time Left=9.53 [A
Epoch 3/3 - Training:  69%|▋| 2344/3393 [19:28<08:54,  1.96batch/s, Batch Loss=0.0481, Avg Loss=0.0786, Time Left=9.53 [A
Epoch 3/3 - Training:  69%|▋| 2344/3393 [19:28<08:54,  1.96batch/s, Batch Loss=0.0011, Avg Loss=0.0786, Time Left=9.52 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  70%|▋| 2374/3393 [19:43<08:32,  1.99batch/s, Batch Loss=0.0295, Avg Loss=0.0786, Time Left=9.27 [A
Epoch 3/3 - Training:  70%|▋| 2374/3393 [19:43<08:32,  1.99batch/s, Batch Loss=0.0233, Avg Loss=0.0786, Time Left=9.26 [A
Epoch 3/3 - Training:  70%|▋| 2375/3393 [19:43<08:31,  1.99batch/s, Batch Loss=0.0233, Avg Loss=0.0786, Time Left=9.26 [A
Epoch 3/3 - Training:  70%|▋| 2375/3393 [19:44<08:31,  1.99batch/s, Batch Loss=0.1350, Avg Loss=0.0786, Time Left=9.25 [A
Epoch 3/3 - Training:  70%|▋| 2376/3393 [19:44<08:25,  2.01batch/s, Batch Loss=0.1350, Avg Loss=0.0786, Time Left=9.25 [A
Epoch 3/3 - Training:  70%|▋| 2376/3393 [19:44<08:25,  2.01batch/s, Batch Loss=0.0140, Avg Loss=0.0786, Time Left=9.24 [A
Epoch 3/3 - Training:  70%|▋| 2377/3393 [19:44<08:25,  2.01batch/s, Batch Loss=0.0140, Avg Loss=0.0786, Time Left=9.24 [A
Epoch 3/3 - Training:  70%|▋| 2377/3393 [19:45<08:25,  2.01batch/s, Batch Loss=0.0072, Avg Loss=0.0785, Time Left=9.24 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  71%|▋| 2407/3393 [19:59<08:14,  2.00batch/s, Batch Loss=0.0171, Avg Loss=0.0783, Time Left=8.99 [A
Epoch 3/3 - Training:  71%|▋| 2407/3393 [20:00<08:14,  2.00batch/s, Batch Loss=0.1150, Avg Loss=0.0783, Time Left=8.98 [A
Epoch 3/3 - Training:  71%|▋| 2408/3393 [20:00<08:04,  2.03batch/s, Batch Loss=0.1150, Avg Loss=0.0783, Time Left=8.98 [A
Epoch 3/3 - Training:  71%|▋| 2408/3393 [20:00<08:04,  2.03batch/s, Batch Loss=0.1361, Avg Loss=0.0784, Time Left=8.97 [A
Epoch 3/3 - Training:  71%|▋| 2409/3393 [20:00<08:10,  2.01batch/s, Batch Loss=0.1361, Avg Loss=0.0784, Time Left=8.97 [A
Epoch 3/3 - Training:  71%|▋| 2409/3393 [20:01<08:10,  2.01batch/s, Batch Loss=0.0649, Avg Loss=0.0784, Time Left=8.96 [A
Epoch 3/3 - Training:  71%|▋| 2410/3393 [20:01<08:05,  2.02batch/s, Batch Loss=0.0649, Avg Loss=0.0784, Time Left=8.96 [A
Epoch 3/3 - Training:  71%|▋| 2410/3393 [20:01<08:05,  2.02batch/s, Batch Loss=0.0062, Avg Loss=0.0783, Time Left=8.95 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  72%|▋| 2440/3393 [20:16<07:58,  1.99batch/s, Batch Loss=0.1598, Avg Loss=0.0780, Time Left=8.70 [A
Epoch 3/3 - Training:  72%|▋| 2440/3393 [20:17<07:58,  1.99batch/s, Batch Loss=0.0015, Avg Loss=0.0780, Time Left=8.69 [A
Epoch 3/3 - Training:  72%|▋| 2441/3393 [20:17<08:02,  1.97batch/s, Batch Loss=0.0015, Avg Loss=0.0780, Time Left=8.69 [A
Epoch 3/3 - Training:  72%|▋| 2441/3393 [20:17<08:02,  1.97batch/s, Batch Loss=0.0838, Avg Loss=0.0780, Time Left=8.69 [A
Epoch 3/3 - Training:  72%|▋| 2442/3393 [20:17<08:08,  1.95batch/s, Batch Loss=0.0838, Avg Loss=0.0780, Time Left=8.69 [A
Epoch 3/3 - Training:  72%|▋| 2442/3393 [20:18<08:08,  1.95batch/s, Batch Loss=0.0852, Avg Loss=0.0780, Time Left=8.68 [A
Epoch 3/3 - Training:  72%|▋| 2443/3393 [20:18<08:05,  1.96batch/s, Batch Loss=0.0852, Avg Loss=0.0780, Time Left=8.68 [A
Epoch 3/3 - Training:  72%|▋| 2443/3393 [20:18<08:05,  1.96batch/s, Batch Loss=0.0574, Avg Loss=0.0780, Time Left=8.67 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  73%|▋| 2473/3393 [20:33<07:47,  1.97batch/s, Batch Loss=0.0407, Avg Loss=0.0779, Time Left=8.42 [A
Epoch 3/3 - Training:  73%|▋| 2473/3393 [20:33<07:47,  1.97batch/s, Batch Loss=0.0722, Avg Loss=0.0779, Time Left=8.41 [A
Epoch 3/3 - Training:  73%|▋| 2474/3393 [20:33<07:36,  2.01batch/s, Batch Loss=0.0722, Avg Loss=0.0779, Time Left=8.41 [A
Epoch 3/3 - Training:  73%|▋| 2474/3393 [20:34<07:36,  2.01batch/s, Batch Loss=0.0315, Avg Loss=0.0778, Time Left=8.40 [A
Epoch 3/3 - Training:  73%|▋| 2475/3393 [20:34<07:36,  2.01batch/s, Batch Loss=0.0315, Avg Loss=0.0778, Time Left=8.40 [A
Epoch 3/3 - Training:  73%|▋| 2475/3393 [20:34<07:36,  2.01batch/s, Batch Loss=0.1018, Avg Loss=0.0778, Time Left=8.39 [A
Epoch 3/3 - Training:  73%|▋| 2476/3393 [20:34<07:41,  1.99batch/s, Batch Loss=0.1018, Avg Loss=0.0778, Time Left=8.39 [A
Epoch 3/3 - Training:  73%|▋| 2476/3393 [20:35<07:41,  1.99batch/s, Batch Loss=0.0522, Avg Loss=0.0778, Time Left=8.39 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  74%|▋| 2506/3393 [20:50<07:31,  1.96batch/s, Batch Loss=0.0322, Avg Loss=0.0780, Time Left=8.14 [A
Epoch 3/3 - Training:  74%|▋| 2506/3393 [20:50<07:31,  1.96batch/s, Batch Loss=0.0108, Avg Loss=0.0779, Time Left=8.13 [A
Epoch 3/3 - Training:  74%|▋| 2507/3393 [20:50<07:37,  1.94batch/s, Batch Loss=0.0108, Avg Loss=0.0779, Time Left=8.13 [A
Epoch 3/3 - Training:  74%|▋| 2507/3393 [20:51<07:37,  1.94batch/s, Batch Loss=0.1302, Avg Loss=0.0780, Time Left=8.12 [A
Epoch 3/3 - Training:  74%|▋| 2508/3393 [20:51<07:33,  1.95batch/s, Batch Loss=0.1302, Avg Loss=0.0780, Time Left=8.12 [A
Epoch 3/3 - Training:  74%|▋| 2508/3393 [20:51<07:33,  1.95batch/s, Batch Loss=0.0086, Avg Loss=0.0779, Time Left=8.11 [A
Epoch 3/3 - Training:  74%|▋| 2509/3393 [20:51<07:37,  1.93batch/s, Batch Loss=0.0086, Avg Loss=0.0779, Time Left=8.11 [A
Epoch 3/3 - Training:  74%|▋| 2509/3393 [20:52<07:37,  1.93batch/s, Batch Loss=0.1679, Avg Loss=0.0780, Time Left=8.10 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  75%|▋| 2539/3393 [21:06<07:21,  1.93batch/s, Batch Loss=0.0282, Avg Loss=0.0779, Time Left=7.85 [A
Epoch 3/3 - Training:  75%|▋| 2539/3393 [21:07<07:21,  1.93batch/s, Batch Loss=0.1111, Avg Loss=0.0779, Time Left=7.85 [A
Epoch 3/3 - Training:  75%|▋| 2540/3393 [21:07<07:13,  1.97batch/s, Batch Loss=0.1111, Avg Loss=0.0779, Time Left=7.85 [A
Epoch 3/3 - Training:  75%|▋| 2540/3393 [21:07<07:13,  1.97batch/s, Batch Loss=0.0386, Avg Loss=0.0779, Time Left=7.84 [A
Epoch 3/3 - Training:  75%|▋| 2541/3393 [21:07<07:17,  1.95batch/s, Batch Loss=0.0386, Avg Loss=0.0779, Time Left=7.84 [A
Epoch 3/3 - Training:  75%|▋| 2541/3393 [21:08<07:17,  1.95batch/s, Batch Loss=0.0348, Avg Loss=0.0779, Time Left=7.83 [A
Epoch 3/3 - Training:  75%|▋| 2542/3393 [21:08<07:15,  1.96batch/s, Batch Loss=0.0348, Avg Loss=0.0779, Time Left=7.83 [A
Epoch 3/3 - Training:  75%|▋| 2542/3393 [21:08<07:15,  1.96batch/s, Batch Loss=0.0216, Avg Loss=0.0779, Time Left=7.82 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  76%|▊| 2572/3393 [21:23<06:49,  2.00batch/s, Batch Loss=0.0680, Avg Loss=0.0773, Time Left=7.57 [A
Epoch 3/3 - Training:  76%|▊| 2572/3393 [21:23<06:49,  2.00batch/s, Batch Loss=0.2671, Avg Loss=0.0774, Time Left=7.56 [A
Epoch 3/3 - Training:  76%|▊| 2573/3393 [21:23<06:50,  2.00batch/s, Batch Loss=0.2671, Avg Loss=0.0774, Time Left=7.56 [A
Epoch 3/3 - Training:  76%|▊| 2573/3393 [21:24<06:50,  2.00batch/s, Batch Loss=0.0189, Avg Loss=0.0774, Time Left=7.55 [A
Epoch 3/3 - Training:  76%|▊| 2574/3393 [21:24<06:43,  2.03batch/s, Batch Loss=0.0189, Avg Loss=0.0774, Time Left=7.55 [A
Epoch 3/3 - Training:  76%|▊| 2574/3393 [21:24<06:43,  2.03batch/s, Batch Loss=0.0047, Avg Loss=0.0774, Time Left=7.54 [A
Epoch 3/3 - Training:  76%|▊| 2575/3393 [21:24<06:48,  2.00batch/s, Batch Loss=0.0047, Avg Loss=0.0774, Time Left=7.54 [A
Epoch 3/3 - Training:  76%|▊| 2575/3393 [21:25<06:48,  2.00batch/s, Batch Loss=0.0062, Avg Loss=0.0773, Time Left=7.54 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  77%|▊| 2605/3393 [21:40<06:41,  1.96batch/s, Batch Loss=0.0410, Avg Loss=0.0774, Time Left=7.29 [A
Epoch 3/3 - Training:  77%|▊| 2605/3393 [21:40<06:41,  1.96batch/s, Batch Loss=0.0453, Avg Loss=0.0773, Time Left=7.28 [A
Epoch 3/3 - Training:  77%|▊| 2606/3393 [21:40<06:42,  1.95batch/s, Batch Loss=0.0453, Avg Loss=0.0773, Time Left=7.28 [A
Epoch 3/3 - Training:  77%|▊| 2606/3393 [21:41<06:42,  1.95batch/s, Batch Loss=0.0735, Avg Loss=0.0773, Time Left=7.27 [A
Epoch 3/3 - Training:  77%|▊| 2607/3393 [21:41<06:39,  1.97batch/s, Batch Loss=0.0735, Avg Loss=0.0773, Time Left=7.27 [A
Epoch 3/3 - Training:  77%|▊| 2607/3393 [21:41<06:39,  1.97batch/s, Batch Loss=0.0402, Avg Loss=0.0773, Time Left=7.26 [A
Epoch 3/3 - Training:  77%|▊| 2608/3393 [21:41<06:44,  1.94batch/s, Batch Loss=0.0402, Avg Loss=0.0773, Time Left=7.26 [A
Epoch 3/3 - Training:  77%|▊| 2608/3393 [21:42<06:44,  1.94batch/s, Batch Loss=0.0910, Avg Loss=0.0773, Time Left=7.25 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  78%|▊| 2638/3393 [21:56<06:29,  1.94batch/s, Batch Loss=0.0013, Avg Loss=0.0772, Time Left=7.01 [A
Epoch 3/3 - Training:  78%|▊| 2638/3393 [21:57<06:29,  1.94batch/s, Batch Loss=0.0019, Avg Loss=0.0772, Time Left=7.00 [A
Epoch 3/3 - Training:  78%|▊| 2639/3393 [21:57<06:18,  1.99batch/s, Batch Loss=0.0019, Avg Loss=0.0772, Time Left=7.00 [A
Epoch 3/3 - Training:  78%|▊| 2639/3393 [21:57<06:18,  1.99batch/s, Batch Loss=0.0150, Avg Loss=0.0771, Time Left=6.99 [A
Epoch 3/3 - Training:  78%|▊| 2640/3393 [21:57<06:17,  1.99batch/s, Batch Loss=0.0150, Avg Loss=0.0771, Time Left=6.99 [A
Epoch 3/3 - Training:  78%|▊| 2640/3393 [21:58<06:17,  1.99batch/s, Batch Loss=0.0280, Avg Loss=0.0771, Time Left=6.98 [A
Epoch 3/3 - Training:  78%|▊| 2641/3393 [21:58<06:06,  2.05batch/s, Batch Loss=0.0280, Avg Loss=0.0771, Time Left=6.98 [A
Epoch 3/3 - Training:  78%|▊| 2641/3393 [21:58<06:06,  2.05batch/s, Batch Loss=0.0053, Avg Loss=0.0771, Time Left=6.97 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  79%|▊| 2671/3393 [22:13<06:06,  1.97batch/s, Batch Loss=0.0491, Avg Loss=0.0770, Time Left=6.72 [A
Epoch 3/3 - Training:  79%|▊| 2671/3393 [22:14<06:06,  1.97batch/s, Batch Loss=0.0082, Avg Loss=0.0770, Time Left=6.71 [A
Epoch 3/3 - Training:  79%|▊| 2672/3393 [22:14<05:59,  2.00batch/s, Batch Loss=0.0082, Avg Loss=0.0770, Time Left=6.71 [A
Epoch 3/3 - Training:  79%|▊| 2672/3393 [22:14<05:59,  2.00batch/s, Batch Loss=0.0659, Avg Loss=0.0770, Time Left=6.71 [A
Epoch 3/3 - Training:  79%|▊| 2673/3393 [22:14<06:06,  1.97batch/s, Batch Loss=0.0659, Avg Loss=0.0770, Time Left=6.71 [A
Epoch 3/3 - Training:  79%|▊| 2673/3393 [22:15<06:06,  1.97batch/s, Batch Loss=0.0302, Avg Loss=0.0770, Time Left=6.70 [A
Epoch 3/3 - Training:  79%|▊| 2674/3393 [22:15<06:00,  1.99batch/s, Batch Loss=0.0302, Avg Loss=0.0770, Time Left=6.70 [A
Epoch 3/3 - Training:  79%|▊| 2674/3393 [22:15<06:00,  1.99batch/s, Batch Loss=0.1458, Avg Loss=0.0770, Time Left=6.69 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  80%|▊| 2704/3393 [22:30<05:58,  1.92batch/s, Batch Loss=0.0379, Avg Loss=0.0767, Time Left=6.44 [A
Epoch 3/3 - Training:  80%|▊| 2704/3393 [22:30<05:58,  1.92batch/s, Batch Loss=0.8930, Avg Loss=0.0770, Time Left=6.43 [A
Epoch 3/3 - Training:  80%|▊| 2705/3393 [22:30<05:54,  1.94batch/s, Batch Loss=0.8930, Avg Loss=0.0770, Time Left=6.43 [A
Epoch 3/3 - Training:  80%|▊| 2705/3393 [22:31<05:54,  1.94batch/s, Batch Loss=0.0182, Avg Loss=0.0770, Time Left=6.42 [A
Epoch 3/3 - Training:  80%|▊| 2706/3393 [22:31<05:51,  1.96batch/s, Batch Loss=0.0182, Avg Loss=0.0770, Time Left=6.42 [A
Epoch 3/3 - Training:  80%|▊| 2706/3393 [22:31<05:51,  1.96batch/s, Batch Loss=0.0056, Avg Loss=0.0770, Time Left=6.42 [A
Epoch 3/3 - Training:  80%|▊| 2707/3393 [22:31<05:47,  1.97batch/s, Batch Loss=0.0056, Avg Loss=0.0770, Time Left=6.42 [A
Epoch 3/3 - Training:  80%|▊| 2707/3393 [22:32<05:47,  1.97batch/s, Batch Loss=0.0033, Avg Loss=0.0769, Time Left=6.41 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  81%|▊| 2737/3393 [22:47<05:35,  1.96batch/s, Batch Loss=0.3736, Avg Loss=0.0767, Time Left=6.16 [A
Epoch 3/3 - Training:  81%|▊| 2737/3393 [22:47<05:35,  1.96batch/s, Batch Loss=0.0025, Avg Loss=0.0767, Time Left=6.15 [A
Epoch 3/3 - Training:  81%|▊| 2738/3393 [22:47<05:35,  1.95batch/s, Batch Loss=0.0025, Avg Loss=0.0767, Time Left=6.15 [A
Epoch 3/3 - Training:  81%|▊| 2738/3393 [22:48<05:35,  1.95batch/s, Batch Loss=0.3519, Avg Loss=0.0768, Time Left=6.14 [A
Epoch 3/3 - Training:  81%|▊| 2739/3393 [22:48<05:33,  1.96batch/s, Batch Loss=0.3519, Avg Loss=0.0768, Time Left=6.14 [A
Epoch 3/3 - Training:  81%|▊| 2739/3393 [22:48<05:33,  1.96batch/s, Batch Loss=0.0057, Avg Loss=0.0768, Time Left=6.13 [A
Epoch 3/3 - Training:  81%|▊| 2740/3393 [22:48<05:33,  1.96batch/s, Batch Loss=0.0057, Avg Loss=0.0768, Time Left=6.13 [A
Epoch 3/3 - Training:  81%|▊| 2740/3393 [22:49<05:33,  1.96batch/s, Batch Loss=0.0602, Avg Loss=0.0768, Time Left=6.12 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  82%|▊| 2770/3393 [23:03<05:22,  1.93batch/s, Batch Loss=0.2103, Avg Loss=0.0767, Time Left=5.88 [A
Epoch 3/3 - Training:  82%|▊| 2770/3393 [23:04<05:22,  1.93batch/s, Batch Loss=0.0008, Avg Loss=0.0767, Time Left=5.87 [A
Epoch 3/3 - Training:  82%|▊| 2771/3393 [23:04<05:15,  1.97batch/s, Batch Loss=0.0008, Avg Loss=0.0767, Time Left=5.87 [A
Epoch 3/3 - Training:  82%|▊| 2771/3393 [23:04<05:15,  1.97batch/s, Batch Loss=0.0778, Avg Loss=0.0767, Time Left=5.86 [A
Epoch 3/3 - Training:  82%|▊| 2772/3393 [23:04<05:19,  1.94batch/s, Batch Loss=0.0778, Avg Loss=0.0767, Time Left=5.86 [A
Epoch 3/3 - Training:  82%|▊| 2772/3393 [23:05<05:19,  1.94batch/s, Batch Loss=0.0180, Avg Loss=0.0767, Time Left=5.85 [A
Epoch 3/3 - Training:  82%|▊| 2773/3393 [23:05<05:19,  1.94batch/s, Batch Loss=0.0180, Avg Loss=0.0767, Time Left=5.85 [A
Epoch 3/3 - Training:  82%|▊| 2773/3393 [23:05<05:19,  1.94batch/s, Batch Loss=0.0388, Avg Loss=0.0767, Time Left=5.84 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  83%|▊| 2803/3393 [23:20<05:03,  1.94batch/s, Batch Loss=0.2619, Avg Loss=0.0767, Time Left=5.60 [A
Epoch 3/3 - Training:  83%|▊| 2803/3393 [23:21<05:03,  1.94batch/s, Batch Loss=0.0248, Avg Loss=0.0767, Time Left=5.59 [A
Epoch 3/3 - Training:  83%|▊| 2804/3393 [23:21<05:01,  1.96batch/s, Batch Loss=0.0248, Avg Loss=0.0767, Time Left=5.59 [A
Epoch 3/3 - Training:  83%|▊| 2804/3393 [23:21<05:01,  1.96batch/s, Batch Loss=0.1369, Avg Loss=0.0767, Time Left=5.58 [A
Epoch 3/3 - Training:  83%|▊| 2805/3393 [23:21<05:04,  1.93batch/s, Batch Loss=0.1369, Avg Loss=0.0767, Time Left=5.58 [A
Epoch 3/3 - Training:  83%|▊| 2805/3393 [23:22<05:04,  1.93batch/s, Batch Loss=0.0564, Avg Loss=0.0767, Time Left=5.57 [A
Epoch 3/3 - Training:  83%|▊| 2806/3393 [23:22<05:01,  1.95batch/s, Batch Loss=0.0564, Avg Loss=0.0767, Time Left=5.57 [A
Epoch 3/3 - Training:  83%|▊| 2806/3393 [23:22<05:01,  1.95batch/s, Batch Loss=0.0174, Avg Loss=0.0767, Time Left=5.56 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  84%|▊| 2836/3393 [23:37<04:46,  1.95batch/s, Batch Loss=0.1356, Avg Loss=0.0765, Time Left=5.31 [A
Epoch 3/3 - Training:  84%|▊| 2836/3393 [23:37<04:46,  1.95batch/s, Batch Loss=0.0387, Avg Loss=0.0765, Time Left=5.30 [A
Epoch 3/3 - Training:  84%|▊| 2837/3393 [23:37<04:43,  1.96batch/s, Batch Loss=0.0387, Avg Loss=0.0765, Time Left=5.30 [A
Epoch 3/3 - Training:  84%|▊| 2837/3393 [23:38<04:43,  1.96batch/s, Batch Loss=0.0388, Avg Loss=0.0764, Time Left=5.30 [A
Epoch 3/3 - Training:  84%|▊| 2838/3393 [23:38<04:46,  1.94batch/s, Batch Loss=0.0388, Avg Loss=0.0764, Time Left=5.30 [A
Epoch 3/3 - Training:  84%|▊| 2838/3393 [23:38<04:46,  1.94batch/s, Batch Loss=0.0008, Avg Loss=0.0764, Time Left=5.29 [A
Epoch 3/3 - Training:  84%|▊| 2839/3393 [23:38<04:41,  1.97batch/s, Batch Loss=0.0008, Avg Loss=0.0764, Time Left=5.29 [A
Epoch 3/3 - Training:  84%|▊| 2839/3393 [23:39<04:41,  1.97batch/s, Batch Loss=0.0390, Avg Loss=0.0764, Time Left=5.28 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  85%|▊| 2869/3393 [23:54<04:25,  1.98batch/s, Batch Loss=0.1157, Avg Loss=0.0764, Time Left=5.03 [A
Epoch 3/3 - Training:  85%|▊| 2869/3393 [23:54<04:25,  1.98batch/s, Batch Loss=0.0965, Avg Loss=0.0765, Time Left=5.02 [A
Epoch 3/3 - Training:  85%|▊| 2870/3393 [23:54<04:28,  1.95batch/s, Batch Loss=0.0965, Avg Loss=0.0765, Time Left=5.02 [A
Epoch 3/3 - Training:  85%|▊| 2870/3393 [23:55<04:28,  1.95batch/s, Batch Loss=0.0211, Avg Loss=0.0764, Time Left=5.01 [A
Epoch 3/3 - Training:  85%|▊| 2871/3393 [23:55<04:23,  1.98batch/s, Batch Loss=0.0211, Avg Loss=0.0764, Time Left=5.01 [A
Epoch 3/3 - Training:  85%|▊| 2871/3393 [23:55<04:23,  1.98batch/s, Batch Loss=0.0083, Avg Loss=0.0764, Time Left=5.01 [A
Epoch 3/3 - Training:  85%|▊| 2872/3393 [23:55<04:27,  1.95batch/s, Batch Loss=0.0083, Avg Loss=0.0764, Time Left=5.01 [A
Epoch 3/3 - Training:  85%|▊| 2872/3393 [23:56<04:27,  1.95batch/s, Batch Loss=0.0236, Avg Loss=0.0764, Time Left=5.00 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  86%|▊| 2902/3393 [24:11<04:11,  1.95batch/s, Batch Loss=0.0272, Avg Loss=0.0764, Time Left=4.75 [A
Epoch 3/3 - Training:  86%|▊| 2902/3393 [24:11<04:11,  1.95batch/s, Batch Loss=0.0331, Avg Loss=0.0764, Time Left=4.74 [A
Epoch 3/3 - Training:  86%|▊| 2903/3393 [24:11<04:09,  1.96batch/s, Batch Loss=0.0331, Avg Loss=0.0764, Time Left=4.74 [A
Epoch 3/3 - Training:  86%|▊| 2903/3393 [24:12<04:09,  1.96batch/s, Batch Loss=0.0505, Avg Loss=0.0763, Time Left=4.73 [A
Epoch 3/3 - Training:  86%|▊| 2904/3393 [24:12<04:05,  1.99batch/s, Batch Loss=0.0505, Avg Loss=0.0763, Time Left=4.73 [A
Epoch 3/3 - Training:  86%|▊| 2904/3393 [24:12<04:05,  1.99batch/s, Batch Loss=0.1490, Avg Loss=0.0764, Time Left=4.72 [A
Epoch 3/3 - Training:  86%|▊| 2905/3393 [24:12<04:00,  2.03batch/s, Batch Loss=0.1490, Avg Loss=0.0764, Time Left=4.72 [A
Epoch 3/3 - Training:  86%|▊| 2905/3393 [24:13<04:00,  2.03batch/s, Batch Loss=0.0171, Avg Loss=0.0763, Time Left=4.72 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  87%|▊| 2935/3393 [24:27<03:51,  1.98batch/s, Batch Loss=0.0426, Avg Loss=0.0767, Time Left=4.47 [A
Epoch 3/3 - Training:  87%|▊| 2935/3393 [24:28<03:51,  1.98batch/s, Batch Loss=0.0290, Avg Loss=0.0766, Time Left=4.46 [A
Epoch 3/3 - Training:  87%|▊| 2936/3393 [24:28<03:54,  1.95batch/s, Batch Loss=0.0290, Avg Loss=0.0766, Time Left=4.46 [A
Epoch 3/3 - Training:  87%|▊| 2936/3393 [24:28<03:54,  1.95batch/s, Batch Loss=0.0489, Avg Loss=0.0766, Time Left=4.45 [A
Epoch 3/3 - Training:  87%|▊| 2937/3393 [24:28<03:52,  1.96batch/s, Batch Loss=0.0489, Avg Loss=0.0766, Time Left=4.45 [A
Epoch 3/3 - Training:  87%|▊| 2937/3393 [24:29<03:52,  1.96batch/s, Batch Loss=0.0176, Avg Loss=0.0766, Time Left=4.44 [A
Epoch 3/3 - Training:  87%|▊| 2938/3393 [24:29<03:54,  1.94batch/s, Batch Loss=0.0176, Avg Loss=0.0766, Time Left=4.44 [A
Epoch 3/3 - Training:  87%|▊| 2938/3393 [24:29<03:54,  1.94batch/s, Batch Loss=0.0350, Avg Loss=0.0766, Time Left=4.43 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  87%|▊| 2968/3393 [24:44<03:37,  1.95batch/s, Batch Loss=0.0713, Avg Loss=0.0766, Time Left=4.19 [A
Epoch 3/3 - Training:  87%|▊| 2968/3393 [24:45<03:37,  1.95batch/s, Batch Loss=0.0786, Avg Loss=0.0766, Time Left=4.18 [A
Epoch 3/3 - Training:  88%|▉| 2969/3393 [24:45<03:33,  1.98batch/s, Batch Loss=0.0786, Avg Loss=0.0766, Time Left=4.18 [A
Epoch 3/3 - Training:  88%|▉| 2969/3393 [24:45<03:33,  1.98batch/s, Batch Loss=0.0260, Avg Loss=0.0766, Time Left=4.17 [A
Epoch 3/3 - Training:  88%|▉| 2970/3393 [24:45<03:32,  1.99batch/s, Batch Loss=0.0260, Avg Loss=0.0766, Time Left=4.17 [A
Epoch 3/3 - Training:  88%|▉| 2970/3393 [24:46<03:32,  1.99batch/s, Batch Loss=0.0076, Avg Loss=0.0766, Time Left=4.16 [A
Epoch 3/3 - Training:  88%|▉| 2971/3393 [24:46<03:33,  1.98batch/s, Batch Loss=0.0076, Avg Loss=0.0766, Time Left=4.16 [A
Epoch 3/3 - Training:  88%|▉| 2971/3393 [24:46<03:33,  1.98batch/s, Batch Loss=0.0021, Avg Loss=0.0766, Time Left=4.15 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  88%|▉| 3001/3393 [25:01<03:14,  2.01batch/s, Batch Loss=0.0054, Avg Loss=0.0766, Time Left=3.90 [A
Epoch 3/3 - Training:  88%|▉| 3001/3393 [25:01<03:14,  2.01batch/s, Batch Loss=0.0151, Avg Loss=0.0765, Time Left=3.90 [A
Epoch 3/3 - Training:  88%|▉| 3002/3393 [25:01<03:12,  2.03batch/s, Batch Loss=0.0151, Avg Loss=0.0765, Time Left=3.90 [A
Epoch 3/3 - Training:  88%|▉| 3002/3393 [25:02<03:12,  2.03batch/s, Batch Loss=0.0152, Avg Loss=0.0765, Time Left=3.89 [A
Epoch 3/3 - Training:  89%|▉| 3003/3393 [25:02<03:14,  2.00batch/s, Batch Loss=0.0152, Avg Loss=0.0765, Time Left=3.89 [A
Epoch 3/3 - Training:  89%|▉| 3003/3393 [25:02<03:14,  2.00batch/s, Batch Loss=0.0010, Avg Loss=0.0765, Time Left=3.88 [A
Epoch 3/3 - Training:  89%|▉| 3004/3393 [25:02<03:14,  2.00batch/s, Batch Loss=0.0010, Avg Loss=0.0765, Time Left=3.88 [A
Epoch 3/3 - Training:  89%|▉| 3004/3393 [25:03<03:14,  2.00batch/s, Batch Loss=0.0215, Avg Loss=0.0765, Time Left=3.87 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  89%|▉| 3034/3393 [25:18<03:04,  1.94batch/s, Batch Loss=0.2092, Avg Loss=0.0763, Time Left=3.62 [A
Epoch 3/3 - Training:  89%|▉| 3034/3393 [25:18<03:04,  1.94batch/s, Batch Loss=0.0361, Avg Loss=0.0763, Time Left=3.61 [A
Epoch 3/3 - Training:  89%|▉| 3035/3393 [25:18<03:03,  1.95batch/s, Batch Loss=0.0361, Avg Loss=0.0763, Time Left=3.61 [A
Epoch 3/3 - Training:  89%|▉| 3035/3393 [25:19<03:03,  1.95batch/s, Batch Loss=0.1069, Avg Loss=0.0763, Time Left=3.61 [A
Epoch 3/3 - Training:  89%|▉| 3036/3393 [25:19<02:58,  2.00batch/s, Batch Loss=0.1069, Avg Loss=0.0763, Time Left=3.61 [A
Epoch 3/3 - Training:  89%|▉| 3036/3393 [25:19<02:58,  2.00batch/s, Batch Loss=0.0385, Avg Loss=0.0763, Time Left=3.60 [A
Epoch 3/3 - Training:  90%|▉| 3037/3393 [25:19<02:57,  2.00batch/s, Batch Loss=0.0385, Avg Loss=0.0763, Time Left=3.60 [A
Epoch 3/3 - Training:  90%|▉| 3037/3393 [25:20<02:57,  2.00batch/s, Batch Loss=0.0139, Avg Loss=0.0763, Time Left=3.59 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  90%|▉| 3067/3393 [25:34<02:46,  1.96batch/s, Batch Loss=0.0787, Avg Loss=0.0761, Time Left=3.34 [A
Epoch 3/3 - Training:  90%|▉| 3067/3393 [25:35<02:46,  1.96batch/s, Batch Loss=0.0103, Avg Loss=0.0761, Time Left=3.33 [A
Epoch 3/3 - Training:  90%|▉| 3068/3393 [25:35<02:45,  1.97batch/s, Batch Loss=0.0103, Avg Loss=0.0761, Time Left=3.33 [A
Epoch 3/3 - Training:  90%|▉| 3068/3393 [25:35<02:45,  1.97batch/s, Batch Loss=0.0006, Avg Loss=0.0760, Time Left=3.32 [A
Epoch 3/3 - Training:  90%|▉| 3069/3393 [25:35<02:46,  1.94batch/s, Batch Loss=0.0006, Avg Loss=0.0760, Time Left=3.32 [A
Epoch 3/3 - Training:  90%|▉| 3069/3393 [25:36<02:46,  1.94batch/s, Batch Loss=0.0621, Avg Loss=0.0760, Time Left=3.32 [A
Epoch 3/3 - Training:  90%|▉| 3070/3393 [25:36<02:45,  1.95batch/s, Batch Loss=0.0621, Avg Loss=0.0760, Time Left=3.32 [A
Epoch 3/3 - Training:  90%|▉| 3070/3393 [25:36<02:45,  1.95batch/s, Batch Loss=0.0527, Avg Loss=0.0760, Time Left=3.31 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  91%|▉| 3100/3393 [25:51<02:29,  1.96batch/s, Batch Loss=0.0117, Avg Loss=0.0757, Time Left=3.06 [A
Epoch 3/3 - Training:  91%|▉| 3100/3393 [25:52<02:29,  1.96batch/s, Batch Loss=0.1772, Avg Loss=0.0757, Time Left=3.05 [A
Epoch 3/3 - Training:  91%|▉| 3101/3393 [25:52<02:31,  1.93batch/s, Batch Loss=0.1772, Avg Loss=0.0757, Time Left=3.05 [A
Epoch 3/3 - Training:  91%|▉| 3101/3393 [25:52<02:31,  1.93batch/s, Batch Loss=0.0689, Avg Loss=0.0757, Time Left=3.04 [A
Epoch 3/3 - Training:  91%|▉| 3102/3393 [25:52<02:29,  1.95batch/s, Batch Loss=0.0689, Avg Loss=0.0757, Time Left=3.04 [A
Epoch 3/3 - Training:  91%|▉| 3102/3393 [25:53<02:29,  1.95batch/s, Batch Loss=0.0006, Avg Loss=0.0757, Time Left=3.03 [A
Epoch 3/3 - Training:  91%|▉| 3103/3393 [25:53<02:30,  1.93batch/s, Batch Loss=0.0006, Avg Loss=0.0757, Time Left=3.03 [A
Epoch 3/3 - Training:  91%|▉| 3103/3393 [25:53<02:30,  1.93batch/s, Batch Loss=0.1222, Avg Loss=0.0757, Time Left=3.03 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  92%|▉| 3133/3393 [26:08<02:13,  1.95batch/s, Batch Loss=0.1509, Avg Loss=0.0754, Time Left=2.78 [A
Epoch 3/3 - Training:  92%|▉| 3133/3393 [26:09<02:13,  1.95batch/s, Batch Loss=0.0454, Avg Loss=0.0754, Time Left=2.77 [A
Epoch 3/3 - Training:  92%|▉| 3134/3393 [26:09<02:14,  1.93batch/s, Batch Loss=0.0454, Avg Loss=0.0754, Time Left=2.77 [A
Epoch 3/3 - Training:  92%|▉| 3134/3393 [26:09<02:14,  1.93batch/s, Batch Loss=0.0006, Avg Loss=0.0753, Time Left=2.76 [A
Epoch 3/3 - Training:  92%|▉| 3135/3393 [26:09<02:12,  1.95batch/s, Batch Loss=0.0006, Avg Loss=0.0753, Time Left=2.76 [A
Epoch 3/3 - Training:  92%|▉| 3135/3393 [26:10<02:12,  1.95batch/s, Batch Loss=0.0118, Avg Loss=0.0753, Time Left=2.75 [A
Epoch 3/3 - Training:  92%|▉| 3136/3393 [26:10<02:12,  1.94batch/s, Batch Loss=0.0118, Avg Loss=0.0753, Time Left=2.75 [A
Epoch 3/3 - Training:  92%|▉| 3136/3393 [26:10<02:12,  1.94batch/s, Batch Loss=0.0859, Avg Loss=0.0753, Time Left=2.74 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  93%|▉| 3166/3393 [26:25<01:56,  1.95batch/s, Batch Loss=0.1115, Avg Loss=0.0752, Time Left=2.50 [A
Epoch 3/3 - Training:  93%|▉| 3166/3393 [26:25<01:56,  1.95batch/s, Batch Loss=0.0088, Avg Loss=0.0752, Time Left=2.49 [A
Epoch 3/3 - Training:  93%|▉| 3167/3393 [26:25<01:57,  1.93batch/s, Batch Loss=0.0088, Avg Loss=0.0752, Time Left=2.49 [A
Epoch 3/3 - Training:  93%|▉| 3167/3393 [26:26<01:57,  1.93batch/s, Batch Loss=0.0410, Avg Loss=0.0752, Time Left=2.48 [A
Epoch 3/3 - Training:  93%|▉| 3168/3393 [26:26<01:54,  1.96batch/s, Batch Loss=0.0410, Avg Loss=0.0752, Time Left=2.48 [A
Epoch 3/3 - Training:  93%|▉| 3168/3393 [26:26<01:54,  1.96batch/s, Batch Loss=0.0128, Avg Loss=0.0752, Time Left=2.47 [A
Epoch 3/3 - Training:  93%|▉| 3169/3393 [26:26<01:52,  2.00batch/s, Batch Loss=0.0128, Avg Loss=0.0752, Time Left=2.47 [A
Epoch 3/3 - Training:  93%|▉| 3169/3393 [26:27<01:52,  2.00batch/s, Batch Loss=0.0009, Avg Loss=0.0751, Time Left=2.46 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  94%|▉| 3199/3393 [26:42<01:39,  1.95batch/s, Batch Loss=0.1050, Avg Loss=0.0750, Time Left=2.22 [A
Epoch 3/3 - Training:  94%|▉| 3199/3393 [26:42<01:39,  1.95batch/s, Batch Loss=0.0415, Avg Loss=0.0750, Time Left=2.21 [A
Epoch 3/3 - Training:  94%|▉| 3200/3393 [26:42<01:40,  1.93batch/s, Batch Loss=0.0415, Avg Loss=0.0750, Time Left=2.21 [A
Epoch 3/3 - Training:  94%|▉| 3200/3393 [26:43<01:40,  1.93batch/s, Batch Loss=0.0287, Avg Loss=0.0749, Time Left=2.20 [A
Epoch 3/3 - Training:  94%|▉| 3201/3393 [26:43<01:38,  1.94batch/s, Batch Loss=0.0287, Avg Loss=0.0749, Time Left=2.20 [A
Epoch 3/3 - Training:  94%|▉| 3201/3393 [26:43<01:38,  1.94batch/s, Batch Loss=0.1172, Avg Loss=0.0750, Time Left=2.19 [A
Epoch 3/3 - Training:  94%|▉| 3202/3393 [26:43<01:36,  1.98batch/s, Batch Loss=0.1172, Avg Loss=0.0750, Time Left=2.19 [A
Epoch 3/3 - Training:  94%|▉| 3202/3393 [26:44<01:36,  1.98batch/s, Batch Loss=0.0084, Avg Loss=0.0749, Time Left=2.18 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  95%|▉| 3232/3393 [26:58<01:22,  1.96batch/s, Batch Loss=0.1485, Avg Loss=0.0749, Time Left=1.93 [A
Epoch 3/3 - Training:  95%|▉| 3232/3393 [26:59<01:22,  1.96batch/s, Batch Loss=0.0022, Avg Loss=0.0749, Time Left=1.93 [A
Epoch 3/3 - Training:  95%|▉| 3233/3393 [26:59<01:22,  1.94batch/s, Batch Loss=0.0022, Avg Loss=0.0749, Time Left=1.93 [A
Epoch 3/3 - Training:  95%|▉| 3233/3393 [26:59<01:22,  1.94batch/s, Batch Loss=0.0039, Avg Loss=0.0749, Time Left=1.92 [A
Epoch 3/3 - Training:  95%|▉| 3234/3393 [26:59<01:21,  1.95batch/s, Batch Loss=0.0039, Avg Loss=0.0749, Time Left=1.92 [A
Epoch 3/3 - Training:  95%|▉| 3234/3393 [27:00<01:21,  1.95batch/s, Batch Loss=0.2066, Avg Loss=0.0749, Time Left=1.91 [A
Epoch 3/3 - Training:  95%|▉| 3235/3393 [27:00<01:20,  1.97batch/s, Batch Loss=0.2066, Avg Loss=0.0749, Time Left=1.91 [A
Epoch 3/3 - Training:  95%|▉| 3235/3393 [27:00<01:20,  1.97batch/s, Batch Loss=0.0112, Avg Loss=0.0749, Time Left=1.90 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  96%|▉| 3265/3393 [27:15<01:05,  1.97batch/s, Batch Loss=0.0010, Avg Loss=0.0745, Time Left=1.65 [A
Epoch 3/3 - Training:  96%|▉| 3265/3393 [27:16<01:05,  1.97batch/s, Batch Loss=0.0734, Avg Loss=0.0745, Time Left=1.64 [A
Epoch 3/3 - Training:  96%|▉| 3266/3393 [27:16<01:04,  1.97batch/s, Batch Loss=0.0734, Avg Loss=0.0745, Time Left=1.64 [A
Epoch 3/3 - Training:  96%|▉| 3266/3393 [27:16<01:04,  1.97batch/s, Batch Loss=0.0035, Avg Loss=0.0745, Time Left=1.64 [A
Epoch 3/3 - Training:  96%|▉| 3267/3393 [27:16<01:04,  1.96batch/s, Batch Loss=0.0035, Avg Loss=0.0745, Time Left=1.64 [A
Epoch 3/3 - Training:  96%|▉| 3267/3393 [27:17<01:04,  1.96batch/s, Batch Loss=0.0008, Avg Loss=0.0745, Time Left=1.63 [A
Epoch 3/3 - Training:  96%|▉| 3268/3393 [27:17<01:03,  1.97batch/s, Batch Loss=0.0008, Avg Loss=0.0745, Time Left=1.63 [A
Epoch 3/3 - Training:  96%|▉| 3268/3393 [27:17<01:03,  1.97batch/s, Batch Loss=0.0158, Avg Loss=0.0745, Time Left=1.62 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  97%|▉| 3298/3393 [27:32<00:46,  2.03batch/s, Batch Loss=0.0877, Avg Loss=0.0741, Time Left=1.37 [A
Epoch 3/3 - Training:  97%|▉| 3298/3393 [27:32<00:46,  2.03batch/s, Batch Loss=0.1382, Avg Loss=0.0741, Time Left=1.36 [A
Epoch 3/3 - Training:  97%|▉| 3299/3393 [27:32<00:46,  2.00batch/s, Batch Loss=0.1382, Avg Loss=0.0741, Time Left=1.36 [A
Epoch 3/3 - Training:  97%|▉| 3299/3393 [27:33<00:46,  2.00batch/s, Batch Loss=0.0507, Avg Loss=0.0741, Time Left=1.35 [A
Epoch 3/3 - Training:  97%|▉| 3300/3393 [27:33<00:46,  2.00batch/s, Batch Loss=0.0507, Avg Loss=0.0741, Time Left=1.35 [A
Epoch 3/3 - Training:  97%|▉| 3300/3393 [27:33<00:46,  2.00batch/s, Batch Loss=0.1346, Avg Loss=0.0741, Time Left=1.35 [A
Epoch 3/3 - Training:  97%|▉| 3301/3393 [27:33<00:45,  2.00batch/s, Batch Loss=0.1346, Avg Loss=0.0741, Time Left=1.35 [A
Epoch 3/3 - Training:  97%|▉| 3301/3393 [27:34<00:45,  2.00batch/s, Batch Loss=0.0026, Avg Loss=0.0741, Time Left=1.34 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  98%|▉| 3331/3393 [27:49<00:31,  1.98batch/s, Batch Loss=0.1012, Avg Loss=0.0740, Time Left=1.09 [A
Epoch 3/3 - Training:  98%|▉| 3331/3393 [27:49<00:31,  1.98batch/s, Batch Loss=0.0082, Avg Loss=0.0740, Time Left=1.08 [A
Epoch 3/3 - Training:  98%|▉| 3332/3393 [27:49<00:30,  2.00batch/s, Batch Loss=0.0082, Avg Loss=0.0740, Time Left=1.08 [A
Epoch 3/3 - Training:  98%|▉| 3332/3393 [27:50<00:30,  2.00batch/s, Batch Loss=0.0333, Avg Loss=0.0740, Time Left=1.07 [A
Epoch 3/3 - Training:  98%|▉| 3333/3393 [27:50<00:29,  2.02batch/s, Batch Loss=0.0333, Avg Loss=0.0740, Time Left=1.07 [A
Epoch 3/3 - Training:  98%|▉| 3333/3393 [27:50<00:29,  2.02batch/s, Batch Loss=0.0175, Avg Loss=0.0739, Time Left=1.07 [A
Epoch 3/3 - Training:  98%|▉| 3334/3393 [27:50<00:29,  2.03batch/s, Batch Loss=0.0175, Avg Loss=0.0739, Time Left=1.07 [A
Epoch 3/3 - Training:  98%|▉| 3334/3393 [27:51<00:29,  2.03batch/s, Batch Loss=0.1302, Avg Loss=0.0740, Time Left=1.06 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training:  99%|▉| 3364/3393 [28:05<00:15,  1.87batch/s, Batch Loss=0.2126, Avg Loss=0.0741, Time Left=0.81 [A
Epoch 3/3 - Training:  99%|▉| 3364/3393 [28:06<00:15,  1.87batch/s, Batch Loss=0.0703, Avg Loss=0.0741, Time Left=0.80 [A
Epoch 3/3 - Training:  99%|▉| 3365/3393 [28:06<00:14,  1.95batch/s, Batch Loss=0.0703, Avg Loss=0.0741, Time Left=0.80 [A
Epoch 3/3 - Training:  99%|▉| 3365/3393 [28:06<00:14,  1.95batch/s, Batch Loss=0.1216, Avg Loss=0.0741, Time Left=0.79 [A
Epoch 3/3 - Training:  99%|▉| 3366/3393 [28:06<00:13,  1.98batch/s, Batch Loss=0.1216, Avg Loss=0.0741, Time Left=0.79 [A
Epoch 3/3 - Training:  99%|▉| 3366/3393 [28:07<00:13,  1.98batch/s, Batch Loss=0.0323, Avg Loss=0.0741, Time Left=0.78 [A
Epoch 3/3 - Training:  99%|▉| 3367/3393 [28:07<00:13,  1.97batch/s, Batch Loss=0.0323, Avg Loss=0.0741, Time Left=0.78 [A
Epoch 3/3 - Training:  99%|▉| 3367/3393 [28:07<00:13,  1.97batch/s, Batch Loss=0.0838, Avg Loss=0.0741, Time Left=0.78 [A
Epoch 3/3 - Trai

Epoch 3/3 - Training: 3397batch [28:22,  2.00batch/s, Batch Loss=0.1044, Avg Loss=0.0741, Time Left=0.52 min][A
Epoch 3/3 - Training: 3398batch [28:22,  2.00batch/s, Batch Loss=0.1044, Avg Loss=0.0741, Time Left=0.52 min][A
Epoch 3/3 - Training: 3398batch [28:23,  2.00batch/s, Batch Loss=0.1128, Avg Loss=0.0741, Time Left=0.51 min][A
Epoch 3/3 - Training: 3399batch [28:23,  1.98batch/s, Batch Loss=0.1128, Avg Loss=0.0741, Time Left=0.51 min][A
Epoch 3/3 - Training: 3399batch [28:24,  1.98batch/s, Batch Loss=0.0020, Avg Loss=0.0741, Time Left=0.50 min][A
Epoch 3/3 - Training: 3400batch [28:24,  1.97batch/s, Batch Loss=0.0020, Avg Loss=0.0741, Time Left=0.50 min][A
Epoch 3/3 - Training: 3400batch [28:24,  1.97batch/s, Batch Loss=0.2448, Avg Loss=0.0741, Time Left=0.49 min][A
Epoch 3/3 - Training: 3401batch [28:24,  2.00batch/s, Batch Loss=0.2448, Avg Loss=0.0741, Time Left=0.49 min][A
Epoch 3/3 - Training: 3401batch [28:24,  2.00batch/s, Batch Loss=0.0574, Avg Loss=0.0741, Time L

Epoch 3/3 - Training: 3433batch [28:41,  1.96batch/s, Batch Loss=0.0038, Avg Loss=0.0739, Time Left=0.21 min][A
Epoch 3/3 - Training: 3434batch [28:41,  1.97batch/s, Batch Loss=0.0038, Avg Loss=0.0739, Time Left=0.21 min][A
Epoch 3/3 - Training: 3434batch [28:41,  1.97batch/s, Batch Loss=0.0010, Avg Loss=0.0738, Time Left=0.20 min][A
Epoch 3/3 - Training: 3435batch [28:41,  2.00batch/s, Batch Loss=0.0010, Avg Loss=0.0738, Time Left=0.20 min][A
Epoch 3/3 - Training: 3435batch [28:42,  2.00batch/s, Batch Loss=0.0066, Avg Loss=0.0738, Time Left=0.20 min][A
Epoch 3/3 - Training: 3436batch [28:42,  1.94batch/s, Batch Loss=0.0066, Avg Loss=0.0738, Time Left=0.20 min][A
Epoch 3/3 - Training: 3436batch [28:42,  1.94batch/s, Batch Loss=0.0284, Avg Loss=0.0738, Time Left=0.19 min][A
Epoch 3/3 - Training: 3437batch [28:42,  1.98batch/s, Batch Loss=0.0284, Avg Loss=0.0738, Time Left=0.19 min][A
Epoch 3/3 - Training: 3437batch [28:43,  1.98batch/s, Batch Loss=0.0005, Avg Loss=0.0738, Time L


Epoch 3/3 Results:
Train Loss: 0.0738
Validation Loss: 0.0757, Accuracy: 0.9740



('./phobert-finetuned-vietnamese\\tokenizer_config.json',
 './phobert-finetuned-vietnamese\\special_tokens_map.json',
 './phobert-finetuned-vietnamese\\vocab.txt',
 './phobert-finetuned-vietnamese\\bpe.codes',
 './phobert-finetuned-vietnamese\\added_tokens.json')

In [161]:
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModel


# Average pooling function
def average_pool(last_hidden_states: torch.Tensor,
                 attention_mask: torch.Tensor) -> torch.Tensor:
    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]

# Function to get embeddings using saved tokenizer and model
def get_embeddings2(texts, model_dir):  # `texts` is a list of sentences
    # Load the tokenizer and model from the local directory
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
    model = AutoModel.from_pretrained(model_dir)

    # Tokenize the input texts
    batch_dict = tokenizer(texts, max_length=512, padding=True, truncation=True, return_tensors='pt')

    with torch.no_grad():
        outputs = model(**batch_dict)

    # Average pooling
    last_hidden_states = outputs.last_hidden_state
    attention_mask = batch_dict['attention_mask']
    embeddings = average_pool(last_hidden_states, attention_mask)

    # Normalize embeddings
    embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)

    return embeddings.numpy()

# Function to precompute and save embeddings for a list of food names
def precompute_embeddings(food_list, model_dir, save_path):
    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
    model = AutoModel.from_pretrained(model_dir)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Tokenize and compute embeddings in batches
    batch_size = 64  # Adjust batch size for memory usage
    all_embeddings = []
    for i in range(0, len(food_list), batch_size):
        batch_texts = food_list[i:i + batch_size]
        batch_dict = tokenizer(batch_texts, max_length=64, padding=True, truncation=True, return_tensors='pt')
        batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
        
        with torch.no_grad():
            outputs = model(**batch_dict)
        
        # Average pooling
        last_hidden_states = outputs.last_hidden_state
        attention_mask = batch_dict['attention_mask']
        embeddings = average_pool(last_hidden_states, attention_mask)
        
        # Normalize embeddings
        embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
        all_embeddings.append(embeddings.cpu().numpy())
    
    # Concatenate all embeddings and save to file
    all_embeddings = np.vstack(all_embeddings)
    np.save(save_path, all_embeddings)  # Save embeddings as .npy file
    return all_embeddings

df = pd.read_csv("./dataset.txt", sep='|', on_bad_lines='skip', header=None)
df = df[[0,2,4]]
df.columns = ['description','name','labels']
df['labels'] = df['labels'].astype(int)

# Example: Precompute and save embeddings
model_dir = './phobert-finetuned-vietnamese'
food_list = df['name'].unique().tolist()  # Extract unique food names
save_path = './food_embeddings.npy'
precompute_embeddings(food_list, model_dir, save_path)

array([[-0.01935733,  0.01708665,  0.03139011, ..., -0.01061608,
         0.02952815,  0.05609883],
       [-0.0293919 ,  0.01535334,  0.0241307 , ..., -0.00761916,
         0.04555051,  0.0392148 ],
       [-0.04319261,  0.01367596,  0.01487689, ..., -0.0075201 ,
         0.03285275,  0.01059048],
       ...,
       [-0.03481395,  0.0213473 ,  0.0120395 , ..., -0.01107377,
         0.05397564,  0.02819412],
       [-0.02788912,  0.01280272,  0.02456283, ..., -0.00767387,
         0.04115546,  0.04625097],
       [-0.0420851 ,  0.03776307, -0.00973398, ...,  0.02489198,
         0.0119226 ,  0.014562  ]], dtype=float32)

In [162]:
from sklearn.metrics.pairwise import cosine_similarity


# Function to find similar food names using precomputed embeddings
def get_similar_vietnamese_food_fast(food_name, food_list, embeddings_path, model_dir, threshold=0.6, limit=10):
    # Load precomputed embeddings
    food_embeddings = np.load(embeddings_path)
    
    # Compute embedding for the input food name
    input_embedding = get_embeddings2([food_name], model_dir)
    
    # Compute cosine similarity
    similarities = cosine_similarity(input_embedding, food_embeddings)[0]
    
    # Pair similarities with corresponding food names
    indexed_scores = [(i, score) for i, score in enumerate(similarities) if score > threshold]
    
    # Sort scores in descending order
    sorted_scores = sorted(indexed_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top `limit` most similar items
    top_results = sorted_scores[:limit]
    
    # Retrieve the corresponding food names
    results = [(food_list[i], score) for i, score in top_results]
    
    return results


In [169]:
# Test function
def test_local_model_fast():
    # Define the saved model directory and embeddings path
    model_dir = './phobert-finetuned-vietnamese'
    embeddings_path = './food_embeddings.npy'
    
    # Example food descriptions
    food_names = ["nước ép cam tươi nguyên chất",
                  "nuoc ep cam nguyen chat",
                  "nuoc ep",
                  "nước cam",
                  "nuoc cam",
                  "cơm đùi gà nướng",
                  "cơm dui ga",
                  "tra sữa",
                  "trà sữa phúc long"]

    # Similarity threshold
    threshold = 0.6

    for food_name in food_names:
        # Get similar food names
        similar_foods = get_similar_vietnamese_food_fast(food_name, df['name'].unique().tolist(), embeddings_path, model_dir, threshold, limit=10)

        # Print results
        print(f"Input Food Description: {food_name}")
        print("Top Similar Foods:")
        for food, score in similar_foods:
            print(f"+ {food} (Similarity: {score:.4f})")
        print()
test_local_model_fast()

Input Food Description: nước ép cam tươi nguyên chất
Top Similar Foods:
+ nước ép cam tươi nguyên chất (Similarity: 1.0000)
+ nước ép lê táo nguyên chất (Similarity: 0.9998)
+ nước ép táo dứa nguyên chất (Similarity: 0.9997)
+ nước ép cam nguyên chất (Similarity: 0.9997)
+ nước chanh tươi nguyên chất (Similarity: 0.9996)
+ nước ép lựu đỏ nguyên chất (Similarity: 0.9995)
+ nước ép thơm nguyên chất (Similarity: 0.9995)
+ nước ép cam nguyên chất chai (Similarity: 0.9993)
+ nước ép cà rốt nguyên chất (Similarity: 0.9993)
+ nước ép chai dưa lưới (Similarity: 0.9993)

Input Food Description: nuoc ep cam nguyen chat
Top Similar Foods:
+ combo sôt măm toi vưa (Similarity: 0.9951)
+ twister hoac fanta lon (Similarity: 0.9927)
+ mega combo (Similarity: 0.9927)
+ rice meal (Similarity: 0.9926)
+ tra tăc chém gió (Similarity: 0.9922)
+ sprite lon (Similarity: 0.9921)
+ almond tuile (Similarity: 0.9921)
+ chicken sandwich (Similarity: 0.9920)
+ cold whisk matcha latte (Similarity: 0.9917)
+ olong s

In [17]:
from transformers import RobertaModel
import torch
from functools import partial
import math

class LoRALayer(torch.nn.Module):
    def __init__(self, in_dim, out_dim, r, alpha):
        super().__init__()
        self.r = r
        self.alpha = alpha

        # Initialize A to kaiming uniform following code: https://github.com/microsoft/LoRA/blob/main/loralib/layers.py
        self.A = torch.nn.Parameter(torch.empty(r, in_dim))
        # Initialize B to zeros.
        self.B = torch.nn.Parameter(torch.empty(out_dim, r))
        torch.nn.init.kaiming_uniform_(self.A, a=math.sqrt(5))
        torch.nn.init.zeros_(self.B)

        self.scaling = self.alpha / self.r

    def forward(self, x):
        x = self.scaling * (x @ self.A.transpose(0, 1) @ self.B.transpose(0, 1))
        return x

class LinearWithLoRA(torch.nn.Module):
    def __init__(self, linear, r, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features, linear.out_features, r, alpha
        )

    def forward(self, x):
        return self.linear(x) + self.lora(x)

class RobertaWithLoRA(torch.nn.Module):
    def __init__(self, base_model_name, r=16, alpha=32, num_labels=2):
        super().__init__()
        self.base_model = RobertaModel.from_pretrained(base_model_name)
        self.dropout = torch.nn.Dropout(0.1)
        self.classifier = torch.nn.Linear(self.base_model.config.hidden_size, num_labels)

        # Apply LoRA to the attention and intermediate layers
        self.apply_lora(self.base_model, r, alpha)

    def apply_lora(self, model, r, alpha):
        assign_lora = partial(LinearWithLoRA, r=r, alpha=alpha)

        for name, module in model.named_modules():
            if isinstance(module, torch.nn.Linear):
                # Replace the linear layer with a LoRA-enhanced version
                setattr(
                    module,
                    "forward",
                    assign_lora(module).forward,
                )

    def forward(self, input_ids, attention_mask):
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0]  # Use [CLS] token representation
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits


In [21]:
from transformers import AutoTokenizer

# Load tokenizer and model from the local directory
model_dir = "vinai/phobert-base"
tokenizer = AutoTokenizer.from_pretrained(model_dir)

# Initialize the LoRA-enhanced model
num_labels = 2  # Change according to your task
lora_r = 8
lora_alpha = lora_r * 2
lora_model = RobertaWithLoRA(base_model_name=model_dir, r=lora_r, alpha=lora_alpha, num_labels=num_labels)


In [22]:
def count_trainable_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Trainable parameters without LoRA:", count_trainable_params(model))
print("Trainable parameters with LoRA:", count_trainable_params(lora_model))


Trainable parameters without LoRA: 135590402
Trainable parameters with LoRA: 134999810


In [None]:
# Train the LoRA-enhanced model
trainer = PhoBERTTrainer(
    model=lora_model, 
    tokenizer=tokenizer, 
    train_dataset=train_dataset, 
    val_dataset=val_dataset, 
    batch_size=8, 
    lr=5e-5,
    device="cuda" if torch.cuda.is_available() else "cpu"
)
trainer.fine_tune(epochs=3)

# Save the fine-tuned model
lora_model.phobert.save_pretrained("./phobert-finetuned-lora-vietnamese")
tokenizer.save_pretrained("./phobert-finetuned-lora-vietnamese")



Starting Epoch 1/3



Epoch 1/3 - Training:   0%|                                                                | 0/6785 [00:00<?, ?batch/s][A