In [13]:
import torch

from Modules import LoadingModule
from Modules import Features_encoder
from Modules import quantizationModule
from Modules import wav2vec_transformer
from Modules import ContrastiveLoss

from Modules import TempLibriSpeech

In [14]:
"""

#data loader module init
StandardScalerTransform = LoadingModule.StandardScalerTransform
LargeDataModule = LoadingModule.LargeDataModule("./data/Librispeech", batch_size=16, num_workers=1, transform=StandardScalerTransform)
"""

'\n\n#data loader module init\nStandardScalerTransform = LoadingModule.StandardScalerTransform\nLargeDataModule = LoadingModule.LargeDataModule("./data/Librispeech", batch_size=16, num_workers=1, transform=StandardScalerTransform)\n'

In [3]:
#Temp import dataloader ### rendre compatible PLightning quand on aura le GPU
# en attendant import manuel
from torch.utils.data import DataLoader



dataset = TempLibriSpeech.LibriSpeech(split="train-clean-100", target_length=48000, device='cuda')
data_loader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True)

In [4]:
for i, (audio, text) in enumerate(data_loader):
    print(f"Exemple {i+1}")
    print(f"Audio shape: {audio.shape}")
    print(f"Texte: {text}")
    print("-" * 50)
    if i == 1: 
        break

Exemple 1
Audio shape: torch.Size([8, 48000])
Texte: ('INTO THESE THE BLOOD OF NECESSITY FLOWS FROM THE HOLLOW VEIN INTO THE RIGHT AND FROM THE VENOUS ARTERY INTO THE LEFT BECAUSE THESE TWO VESSELS ARE ALWAYS FULL OF BLOOD AND THEIR ORIFICES WHICH ARE TURNED TOWARDS THE HEART CANNOT THEN BE CLOSED', 'OTHERS SINGING OTHERS PLAYING THE VARIOUS INSTRUMENTS ALREADY MENTIONED', 'STILL I WAS SORRY FOR THE POOR LITTLE OLD LADY I WISH SOMEHOW SHE COULD HAVE THAT HUNDRED DOLLARS IT WAS THE MAN WHO SAID THIS NOT THE COLLECTOR SO DO I REJOINED BILLY DOLEFULLY', "WE OUGHT TO BE ABLE TO GET A GOOD DINNER ZVERKOV OF COURSE WON'T PAY OF COURSE NOT SINCE WE ARE INVITING HIM SIMONOV DECIDED CAN YOU IMAGINE FERFITCHKIN INTERRUPTED HOTLY AND CONCEITEDLY", 'ONE CARRIAGE DELAYED SUFFICED TO PARALYZE THE WHOLE LINE THEN THEY SET OUT AGAIN ON THE MARCH THE WEDDING CARRIAGES WERE IN THE FILE PROCEEDING TOWARDS THE BASTILLE AND SKIRTING THE RIGHT SIDE OF THE BOULEVARD', 'AND FORGOT HER OWN GRIEF IN SOLACING TH

In [5]:
### Model dev ###

In [9]:
import torch
import torch.nn as nn

class Model_W2V(nn.Module):
    def __init__(self, embed_size, num_heads, dropout, forward_expansion, kernel_size, groups, d_model, num_layers, max_relative_position):

        #EAB
        self.batch_size = batch_size
        #self.seq_length = seq_length
        self.embed_size = embed_size
        self.mask_prob = 0.50
        self.mask_length = 1
        self.num_heads = num_heads
        self.dropout = dropout
        self.forward_expansion = forward_expansion
        self.kernel_size = kernel_size
        self.groups = groups
        self.d_model = d_model
        self.num_layers = num_layers

        self.num_codebooks = 2
        self.num_codes = 320
        
        self.code_dim = 256
        self.output_dim = 512
        self.temperature= 0.07

        self.max_relative_position = max_relative_position

        super(Model_W2V, self).__init__()

        

        self.FeaturesEncoder = Features_encoder.FeatureEncoder(input_channels=1, feature_dim=512) #1501 ?
        self.masking = wav2vec_transformer.MaskingWithLearnableEmbedding()
        # d_model, num_heads, dropout, forward_expansion):
        self.TranformerBlock = wav2vec_transformer.TransformerBlockW(self.d_model, self.num_heads, self.dropout, self.forward_expansion)   #(self.embed_size, self.num_heads, self.dropout, self.forward_expansion, self.kernel_size, self.groups, self.d_model, self.max_relative_position)
        self.quantization = quantizationModule.QuantizationModule(self.num_codebooks, self.num_codes, self.code_dim, self.output_dim, self.temperature)
        self.LossItem = ContrastiveLoss.LossW2V(20)
#embed_size, num_heads, dropout, forward_expansion, kernel_size, groups, d_model):
    def forward(self, x):


       # print("ORIGINAL , ", x.shape)
        x = x.to(next(self.parameters()).device)
        x = x.unsqueeze(1)

        x = self.FeaturesEncoder(x)
        
       #
        
        
      #  print("q",x.shape)
        
        quantized_repr = self.quantization(x)

        masked_reps, mask = self.masking(x, self.mask_prob, self.mask_length) #(self, x, mask_prob, mask_length)

        contextualized_reps = self.TranformerBlock(masked_reps, masked_reps, masked_reps, mask)
                                                # value, key, query, mask=None
    

        #print("Debug", contextualized_reps.shape, quantized_repr.shape, mask.shape)
        loss = self.LossItem.compute_loss(contextualized_reps, quantized_repr, mask, self.batch_size)
        
   # embed_size, num_heads, dropout, forward_expansion, kernel_size, groups,d_model
        
        return x, contextualized_reps, loss
    

In [10]:
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
from tqdm import tqdm

def train_model(model, dataset, epochs, learning_rate, device):

    dataloader = DataLoader(dataset, batch_size=model.batch_size, shuffle=True)
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model.train()
    
    for epoch in range(epochs):
        
        epoch_loss = 0
        total_loss = 0.0

        num_batches = len(dataloader) - 1

        for batch_idx, (inputs, _) in enumerate(tqdm( data_loader, desc=f"Epoch {epoch+1}/{epochs}")):
            if batch_idx >= num_batches:
                break  # S'arrêter avant la dernière itération
                
            optimizer.zero_grad()
            
            _,_, loss = model(inputs)
            
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
            total_loss += loss.item()

            if batch_idx % 10 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Step [{batch_idx+1}/{num_batches}], Loss: {loss.item():.4f}")

        avg_loss = total_loss / num_batches
        print(f"Epoch [{epoch+1}/{epochs}] Average Loss: {avg_loss:.4f}")


In [12]:
batch_size = 8
seq_length = 151
embed_size = 512
num_heads = 8
dropout = 0.1
forward_expansion = 4
kernel_size = 7
groups = 2
d_model = 512
num_layers = 12

max_relative_position=128
torch.autograd.set_detect_anomaly(True)

device = 'cuda'#torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = Model_W2V(embed_size, num_heads, dropout, forward_expansion, kernel_size, groups, d_model, num_layers, max_relative_position).to(device)


train_model(model, dataset, epochs=10, learning_rate=1e-4, device=device)


Epoch 1/10:   0%|          | 1/3568 [00:00<53:05,  1.12it/s]

Epoch [1/10], Step [1/3567], Loss: 2.9579


Epoch 1/10:   0%|          | 11/3568 [00:07<39:22,  1.51it/s]

Epoch [1/10], Step [11/3567], Loss: 2.9446


Epoch 1/10:   1%|          | 21/3568 [00:13<37:09,  1.59it/s]

Epoch [1/10], Step [21/3567], Loss: 2.9446


Epoch 1/10:   1%|          | 31/3568 [00:20<36:51,  1.60it/s]

Epoch [1/10], Step [31/3567], Loss: 2.9446


Epoch 1/10:   1%|          | 41/3568 [00:26<37:40,  1.56it/s]

Epoch [1/10], Step [41/3567], Loss: 2.9446


Epoch 1/10:   1%|▏         | 51/3568 [00:32<36:52,  1.59it/s]

Epoch [1/10], Step [51/3567], Loss: 2.9446


Epoch 1/10:   2%|▏         | 61/3568 [00:38<36:26,  1.60it/s]

Epoch [1/10], Step [61/3567], Loss: 2.9446


Epoch 1/10:   2%|▏         | 71/3568 [00:45<36:47,  1.58it/s]

Epoch [1/10], Step [71/3567], Loss: 2.9446


Epoch 1/10:   2%|▏         | 81/3568 [00:51<36:53,  1.58it/s]

Epoch [1/10], Step [81/3567], Loss: 2.9446


Epoch 1/10:   3%|▎         | 91/3568 [00:57<36:07,  1.60it/s]

Epoch [1/10], Step [91/3567], Loss: 2.9446


Epoch 1/10:   3%|▎         | 101/3568 [01:04<36:12,  1.60it/s]

Epoch [1/10], Step [101/3567], Loss: 2.9446


Epoch 1/10:   3%|▎         | 111/3568 [01:10<36:22,  1.58it/s]

Epoch [1/10], Step [111/3567], Loss: 2.9446


Epoch 1/10:   3%|▎         | 121/3568 [01:16<35:18,  1.63it/s]

Epoch [1/10], Step [121/3567], Loss: 2.9446


Epoch 1/10:   4%|▎         | 131/3568 [01:22<35:46,  1.60it/s]

Epoch [1/10], Step [131/3567], Loss: 2.9446


Epoch 1/10:   4%|▍         | 141/3568 [01:29<35:17,  1.62it/s]

Epoch [1/10], Step [141/3567], Loss: 2.9446


Epoch 1/10:   4%|▍         | 151/3568 [01:35<35:55,  1.59it/s]

Epoch [1/10], Step [151/3567], Loss: 2.9446


Epoch 1/10:   5%|▍         | 161/3568 [01:41<35:35,  1.60it/s]

Epoch [1/10], Step [161/3567], Loss: 2.9446


Epoch 1/10:   5%|▍         | 171/3568 [01:47<34:47,  1.63it/s]

Epoch [1/10], Step [171/3567], Loss: 2.9446


Epoch 1/10:   5%|▌         | 181/3568 [01:54<35:41,  1.58it/s]

Epoch [1/10], Step [181/3567], Loss: 2.9446


Epoch 1/10:   5%|▌         | 191/3568 [02:00<36:03,  1.56it/s]

Epoch [1/10], Step [191/3567], Loss: 2.9446


Epoch 1/10:   6%|▌         | 201/3568 [02:06<35:41,  1.57it/s]

Epoch [1/10], Step [201/3567], Loss: 2.9446


Epoch 1/10:   6%|▌         | 211/3568 [02:13<35:10,  1.59it/s]

Epoch [1/10], Step [211/3567], Loss: 2.9446


Epoch 1/10:   6%|▌         | 221/3568 [02:19<34:33,  1.61it/s]

Epoch [1/10], Step [221/3567], Loss: 2.9446


Epoch 1/10:   6%|▋         | 231/3568 [02:25<34:34,  1.61it/s]

Epoch [1/10], Step [231/3567], Loss: 2.9446


Epoch 1/10:   7%|▋         | 241/3568 [02:31<34:34,  1.60it/s]

Epoch [1/10], Step [241/3567], Loss: 2.9446


Epoch 1/10:   7%|▋         | 251/3568 [02:38<34:43,  1.59it/s]

Epoch [1/10], Step [251/3567], Loss: 2.9446


Epoch 1/10:   7%|▋         | 261/3568 [02:44<34:42,  1.59it/s]

Epoch [1/10], Step [261/3567], Loss: 2.9446


Epoch 1/10:   8%|▊         | 271/3568 [02:50<34:28,  1.59it/s]

Epoch [1/10], Step [271/3567], Loss: 2.9446


Epoch 1/10:   8%|▊         | 281/3568 [02:57<34:39,  1.58it/s]

Epoch [1/10], Step [281/3567], Loss: 2.9446


Epoch 1/10:   8%|▊         | 291/3568 [03:03<34:09,  1.60it/s]

Epoch [1/10], Step [291/3567], Loss: 2.9446


Epoch 1/10:   8%|▊         | 301/3568 [03:09<33:59,  1.60it/s]

Epoch [1/10], Step [301/3567], Loss: 2.9446


Epoch 1/10:   9%|▊         | 311/3568 [03:15<34:47,  1.56it/s]

Epoch [1/10], Step [311/3567], Loss: 2.9446


Epoch 1/10:   9%|▉         | 321/3568 [03:22<34:02,  1.59it/s]

Epoch [1/10], Step [321/3567], Loss: 2.9446


Epoch 1/10:   9%|▉         | 331/3568 [03:28<33:46,  1.60it/s]

Epoch [1/10], Step [331/3567], Loss: 2.9446


Epoch 1/10:  10%|▉         | 341/3568 [03:34<34:03,  1.58it/s]

Epoch [1/10], Step [341/3567], Loss: 2.9446


Epoch 1/10:  10%|▉         | 351/3568 [03:41<34:27,  1.56it/s]

Epoch [1/10], Step [351/3567], Loss: 2.9446


Epoch 1/10:  10%|█         | 361/3568 [03:47<33:17,  1.61it/s]

Epoch [1/10], Step [361/3567], Loss: 2.9446


Epoch 1/10:  10%|█         | 371/3568 [03:53<33:55,  1.57it/s]

Epoch [1/10], Step [371/3567], Loss: 2.9446


Epoch 1/10:  11%|█         | 381/3568 [03:59<33:11,  1.60it/s]

Epoch [1/10], Step [381/3567], Loss: 2.9446


Epoch 1/10:  11%|█         | 391/3568 [04:06<33:05,  1.60it/s]

Epoch [1/10], Step [391/3567], Loss: 2.9446


Epoch 1/10:  11%|█         | 401/3568 [04:12<32:56,  1.60it/s]

Epoch [1/10], Step [401/3567], Loss: 2.9446


Epoch 1/10:  12%|█▏        | 411/3568 [04:18<32:24,  1.62it/s]

Epoch [1/10], Step [411/3567], Loss: 2.9446


Epoch 1/10:  12%|█▏        | 421/3568 [04:24<32:35,  1.61it/s]

Epoch [1/10], Step [421/3567], Loss: 2.9446


Epoch 1/10:  12%|█▏        | 431/3568 [04:31<33:29,  1.56it/s]

Epoch [1/10], Step [431/3567], Loss: 2.9446


Epoch 1/10:  12%|█▏        | 441/3568 [04:37<32:30,  1.60it/s]

Epoch [1/10], Step [441/3567], Loss: 2.9446


Epoch 1/10:  13%|█▎        | 451/3568 [04:44<33:33,  1.55it/s]

Epoch [1/10], Step [451/3567], Loss: 2.9446


Epoch 1/10:  13%|█▎        | 461/3568 [04:50<33:37,  1.54it/s]

Epoch [1/10], Step [461/3567], Loss: 2.9446


Epoch 1/10:  13%|█▎        | 471/3568 [04:56<32:01,  1.61it/s]

Epoch [1/10], Step [471/3567], Loss: 2.9446


Epoch 1/10:  13%|█▎        | 481/3568 [05:02<32:21,  1.59it/s]

Epoch [1/10], Step [481/3567], Loss: 2.9446


Epoch 1/10:  14%|█▍        | 491/3568 [05:09<31:46,  1.61it/s]

Epoch [1/10], Step [491/3567], Loss: 2.9446


Epoch 1/10:  14%|█▍        | 501/3568 [05:15<31:17,  1.63it/s]

Epoch [1/10], Step [501/3567], Loss: 2.9446


Epoch 1/10:  14%|█▍        | 511/3568 [05:21<37:30,  1.36it/s]

Epoch [1/10], Step [511/3567], Loss: 2.9446


Epoch 1/10:  15%|█▍        | 521/3568 [05:30<42:29,  1.20it/s]

Epoch [1/10], Step [521/3567], Loss: 2.9446


Epoch 1/10:  15%|█▍        | 531/3568 [05:39<43:30,  1.16it/s]

Epoch [1/10], Step [531/3567], Loss: 2.9446


Epoch 1/10:  15%|█▌        | 541/3568 [05:47<41:32,  1.21it/s]

Epoch [1/10], Step [541/3567], Loss: 2.9446


Epoch 1/10:  15%|█▌        | 551/3568 [05:54<34:24,  1.46it/s]

Epoch [1/10], Step [551/3567], Loss: 2.9446


Epoch 1/10:  16%|█▌        | 561/3568 [06:01<32:20,  1.55it/s]

Epoch [1/10], Step [561/3567], Loss: 2.9446


Epoch 1/10:  16%|█▌        | 571/3568 [06:07<31:35,  1.58it/s]

Epoch [1/10], Step [571/3567], Loss: 2.9446


Epoch 1/10:  16%|█▋        | 581/3568 [06:13<30:58,  1.61it/s]

Epoch [1/10], Step [581/3567], Loss: 2.9446


Epoch 1/10:  17%|█▋        | 591/3568 [06:20<31:53,  1.56it/s]

Epoch [1/10], Step [591/3567], Loss: 2.9446


Epoch 1/10:  17%|█▋        | 601/3568 [06:26<31:16,  1.58it/s]

Epoch [1/10], Step [601/3567], Loss: 2.9446


Epoch 1/10:  17%|█▋        | 611/3568 [06:32<31:46,  1.55it/s]

Epoch [1/10], Step [611/3567], Loss: 2.9446


Epoch 1/10:  17%|█▋        | 621/3568 [06:39<33:30,  1.47it/s]

Epoch [1/10], Step [621/3567], Loss: 2.9446


Epoch 1/10:  18%|█▊        | 631/3568 [06:45<29:52,  1.64it/s]

Epoch [1/10], Step [631/3567], Loss: 2.9446


Epoch 1/10:  18%|█▊        | 641/3568 [06:52<30:36,  1.59it/s]

Epoch [1/10], Step [641/3567], Loss: 2.9446


Epoch 1/10:  18%|█▊        | 651/3568 [06:58<30:51,  1.58it/s]

Epoch [1/10], Step [651/3567], Loss: 2.9446


Epoch 1/10:  19%|█▊        | 661/3568 [07:04<30:24,  1.59it/s]

Epoch [1/10], Step [661/3567], Loss: 2.9446


Epoch 1/10:  19%|█▉        | 671/3568 [07:10<30:53,  1.56it/s]

Epoch [1/10], Step [671/3567], Loss: 2.9446


Epoch 1/10:  19%|█▉        | 681/3568 [07:17<30:34,  1.57it/s]

Epoch [1/10], Step [681/3567], Loss: 2.9446


Epoch 1/10:  19%|█▉        | 691/3568 [07:23<29:57,  1.60it/s]

Epoch [1/10], Step [691/3567], Loss: 2.9446


Epoch 1/10:  20%|█▉        | 701/3568 [07:29<29:45,  1.61it/s]

Epoch [1/10], Step [701/3567], Loss: 2.9446


Epoch 1/10:  20%|█▉        | 711/3568 [07:36<29:48,  1.60it/s]

Epoch [1/10], Step [711/3567], Loss: 2.9446


Epoch 1/10:  20%|██        | 721/3568 [07:42<30:12,  1.57it/s]

Epoch [1/10], Step [721/3567], Loss: 2.9446


Epoch 1/10:  20%|██        | 731/3568 [07:48<29:22,  1.61it/s]

Epoch [1/10], Step [731/3567], Loss: 2.9446


Epoch 1/10:  21%|██        | 741/3568 [07:55<29:55,  1.57it/s]

Epoch [1/10], Step [741/3567], Loss: 2.9446


Epoch 1/10:  21%|██        | 751/3568 [08:01<29:20,  1.60it/s]

Epoch [1/10], Step [751/3567], Loss: 2.9446


Epoch 1/10:  21%|██▏       | 761/3568 [08:07<28:59,  1.61it/s]

Epoch [1/10], Step [761/3567], Loss: 2.9446


Epoch 1/10:  22%|██▏       | 771/3568 [08:13<29:47,  1.56it/s]

Epoch [1/10], Step [771/3567], Loss: 2.9446


Epoch 1/10:  22%|██▏       | 781/3568 [08:20<29:51,  1.56it/s]

Epoch [1/10], Step [781/3567], Loss: 2.9446


Epoch 1/10:  22%|██▏       | 791/3568 [08:26<29:12,  1.58it/s]

Epoch [1/10], Step [791/3567], Loss: 2.9446


Epoch 1/10:  22%|██▏       | 801/3568 [08:32<28:50,  1.60it/s]

Epoch [1/10], Step [801/3567], Loss: 2.9446


Epoch 1/10:  23%|██▎       | 811/3568 [08:39<28:58,  1.59it/s]

Epoch [1/10], Step [811/3567], Loss: 2.9446


Epoch 1/10:  23%|██▎       | 821/3568 [08:45<28:27,  1.61it/s]

Epoch [1/10], Step [821/3567], Loss: 2.9446


Epoch 1/10:  23%|██▎       | 831/3568 [08:51<28:26,  1.60it/s]

Epoch [1/10], Step [831/3567], Loss: 2.9446


Epoch 1/10:  24%|██▎       | 841/3568 [08:58<28:11,  1.61it/s]

Epoch [1/10], Step [841/3567], Loss: 2.9446


Epoch 1/10:  24%|██▍       | 851/3568 [09:04<27:58,  1.62it/s]

Epoch [1/10], Step [851/3567], Loss: 2.9446


Epoch 1/10:  24%|██▍       | 861/3568 [09:10<27:53,  1.62it/s]

Epoch [1/10], Step [861/3567], Loss: 2.9446


Epoch 1/10:  24%|██▍       | 871/3568 [09:16<27:57,  1.61it/s]

Epoch [1/10], Step [871/3567], Loss: 2.9446


Epoch 1/10:  25%|██▍       | 881/3568 [09:23<29:36,  1.51it/s]

Epoch [1/10], Step [881/3567], Loss: 2.9446


Epoch 1/10:  25%|██▍       | 891/3568 [09:29<30:13,  1.48it/s]

Epoch [1/10], Step [891/3567], Loss: 2.9446


Epoch 1/10:  25%|██▌       | 901/3568 [09:36<28:18,  1.57it/s]

Epoch [1/10], Step [901/3567], Loss: 2.9446


Epoch 1/10:  26%|██▌       | 911/3568 [09:42<28:12,  1.57it/s]

Epoch [1/10], Step [911/3567], Loss: 2.9446


Epoch 1/10:  26%|██▌       | 921/3568 [09:48<27:35,  1.60it/s]

Epoch [1/10], Step [921/3567], Loss: 2.9446


Epoch 1/10:  26%|██▌       | 931/3568 [09:55<27:36,  1.59it/s]

Epoch [1/10], Step [931/3567], Loss: 2.9446


Epoch 1/10:  26%|██▋       | 941/3568 [10:01<27:57,  1.57it/s]

Epoch [1/10], Step [941/3567], Loss: 2.9446


Epoch 1/10:  27%|██▋       | 951/3568 [10:07<26:59,  1.62it/s]

Epoch [1/10], Step [951/3567], Loss: 2.9446


Epoch 1/10:  27%|██▋       | 961/3568 [10:14<27:26,  1.58it/s]

Epoch [1/10], Step [961/3567], Loss: 2.9446


Epoch 1/10:  27%|██▋       | 971/3568 [10:20<26:53,  1.61it/s]

Epoch [1/10], Step [971/3567], Loss: 2.9446


Epoch 1/10:  27%|██▋       | 981/3568 [10:26<26:55,  1.60it/s]

Epoch [1/10], Step [981/3567], Loss: 2.9446


Epoch 1/10:  28%|██▊       | 991/3568 [10:32<26:51,  1.60it/s]

Epoch [1/10], Step [991/3567], Loss: 2.9446


Epoch 1/10:  28%|██▊       | 1001/3568 [10:39<26:27,  1.62it/s]

Epoch [1/10], Step [1001/3567], Loss: 2.9446


Epoch 1/10:  28%|██▊       | 1011/3568 [10:45<26:42,  1.60it/s]

Epoch [1/10], Step [1011/3567], Loss: 2.9446


Epoch 1/10:  29%|██▊       | 1021/3568 [10:51<26:40,  1.59it/s]

Epoch [1/10], Step [1021/3567], Loss: 2.9446


Epoch 1/10:  29%|██▉       | 1031/3568 [10:57<26:33,  1.59it/s]

Epoch [1/10], Step [1031/3567], Loss: 2.9446


Epoch 1/10:  29%|██▉       | 1041/3568 [11:04<26:56,  1.56it/s]

Epoch [1/10], Step [1041/3567], Loss: 2.9446


Epoch 1/10:  29%|██▉       | 1051/3568 [11:10<26:20,  1.59it/s]

Epoch [1/10], Step [1051/3567], Loss: 2.9446


Epoch 1/10:  30%|██▉       | 1061/3568 [11:16<26:09,  1.60it/s]

Epoch [1/10], Step [1061/3567], Loss: 2.9446


Epoch 1/10:  30%|███       | 1071/3568 [11:23<26:10,  1.59it/s]

Epoch [1/10], Step [1071/3567], Loss: 2.9446


Epoch 1/10:  30%|███       | 1081/3568 [11:29<25:45,  1.61it/s]

Epoch [1/10], Step [1081/3567], Loss: 2.9446


Epoch 1/10:  31%|███       | 1091/3568 [11:35<25:31,  1.62it/s]

Epoch [1/10], Step [1091/3567], Loss: 2.9446


Epoch 1/10:  31%|███       | 1101/3568 [11:41<25:27,  1.62it/s]

Epoch [1/10], Step [1101/3567], Loss: 2.9446


Epoch 1/10:  31%|███       | 1111/3568 [11:48<26:08,  1.57it/s]

Epoch [1/10], Step [1111/3567], Loss: 2.9446


Epoch 1/10:  31%|███▏      | 1121/3568 [11:54<25:12,  1.62it/s]

Epoch [1/10], Step [1121/3567], Loss: 2.9446


Epoch 1/10:  32%|███▏      | 1131/3568 [12:00<25:07,  1.62it/s]

Epoch [1/10], Step [1131/3567], Loss: 2.9446


Epoch 1/10:  32%|███▏      | 1141/3568 [12:06<25:06,  1.61it/s]

Epoch [1/10], Step [1141/3567], Loss: 2.9446


Epoch 1/10:  32%|███▏      | 1151/3568 [12:13<24:57,  1.61it/s]

Epoch [1/10], Step [1151/3567], Loss: 2.9446


Epoch 1/10:  33%|███▎      | 1161/3568 [12:19<24:57,  1.61it/s]

Epoch [1/10], Step [1161/3567], Loss: 2.9446


Epoch 1/10:  33%|███▎      | 1171/3568 [12:26<31:32,  1.27it/s]

Epoch [1/10], Step [1171/3567], Loss: 2.9446


Epoch 1/10:  33%|███▎      | 1181/3568 [12:34<33:43,  1.18it/s]

Epoch [1/10], Step [1181/3567], Loss: 2.9446


Epoch 1/10:  33%|███▎      | 1191/3568 [12:43<33:15,  1.19it/s]

Epoch [1/10], Step [1191/3567], Loss: 2.9446


Epoch 1/10:  34%|███▎      | 1201/3568 [12:51<32:57,  1.20it/s]

Epoch [1/10], Step [1201/3567], Loss: 2.9446


Epoch 1/10:  34%|███▍      | 1211/3568 [12:58<27:21,  1.44it/s]

Epoch [1/10], Step [1211/3567], Loss: 2.9446


Epoch 1/10:  34%|███▍      | 1221/3568 [13:05<24:26,  1.60it/s]

Epoch [1/10], Step [1221/3567], Loss: 2.9446


Epoch 1/10:  35%|███▍      | 1231/3568 [13:11<24:06,  1.62it/s]

Epoch [1/10], Step [1231/3567], Loss: 2.9446


Epoch 1/10:  35%|███▍      | 1241/3568 [13:17<24:27,  1.59it/s]

Epoch [1/10], Step [1241/3567], Loss: 2.9446


Epoch 1/10:  35%|███▌      | 1251/3568 [13:23<24:35,  1.57it/s]

Epoch [1/10], Step [1251/3567], Loss: 2.9446


Epoch 1/10:  35%|███▌      | 1261/3568 [13:30<23:59,  1.60it/s]

Epoch [1/10], Step [1261/3567], Loss: 2.9446


Epoch 1/10:  36%|███▌      | 1271/3568 [13:36<24:04,  1.59it/s]

Epoch [1/10], Step [1271/3567], Loss: 2.9446


Epoch 1/10:  36%|███▌      | 1281/3568 [13:44<31:53,  1.20it/s]

Epoch [1/10], Step [1281/3567], Loss: 2.9446


Epoch 1/10:  36%|███▌      | 1291/3568 [13:52<31:48,  1.19it/s]

Epoch [1/10], Step [1291/3567], Loss: 2.9446


Epoch 1/10:  36%|███▋      | 1301/3568 [14:01<31:50,  1.19it/s]

Epoch [1/10], Step [1301/3567], Loss: 2.9446


Epoch 1/10:  37%|███▋      | 1311/3568 [14:09<31:09,  1.21it/s]

Epoch [1/10], Step [1311/3567], Loss: 2.9446


Epoch 1/10:  37%|███▋      | 1321/3568 [14:15<24:29,  1.53it/s]

Epoch [1/10], Step [1321/3567], Loss: 2.9446


Epoch 1/10:  37%|███▋      | 1331/3568 [14:22<24:55,  1.50it/s]

Epoch [1/10], Step [1331/3567], Loss: 2.9446


Epoch 1/10:  38%|███▊      | 1341/3568 [14:28<23:31,  1.58it/s]

Epoch [1/10], Step [1341/3567], Loss: 2.9446


Epoch 1/10:  38%|███▊      | 1351/3568 [14:34<23:40,  1.56it/s]

Epoch [1/10], Step [1351/3567], Loss: 2.9446


Epoch 1/10:  38%|███▊      | 1361/3568 [14:41<23:00,  1.60it/s]

Epoch [1/10], Step [1361/3567], Loss: 2.9446


Epoch 1/10:  38%|███▊      | 1371/3568 [14:47<22:53,  1.60it/s]

Epoch [1/10], Step [1371/3567], Loss: 2.9446


Epoch 1/10:  39%|███▊      | 1381/3568 [14:53<22:42,  1.60it/s]

Epoch [1/10], Step [1381/3567], Loss: 2.9446


Epoch 1/10:  39%|███▉      | 1391/3568 [15:01<29:05,  1.25it/s]

Epoch [1/10], Step [1391/3567], Loss: 2.9446


Epoch 1/10:  39%|███▉      | 1401/3568 [15:09<30:45,  1.17it/s]

Epoch [1/10], Step [1401/3567], Loss: 2.9446


Epoch 1/10:  40%|███▉      | 1411/3568 [15:17<30:13,  1.19it/s]

Epoch [1/10], Step [1411/3567], Loss: 2.9446


Epoch 1/10:  40%|███▉      | 1421/3568 [15:26<29:27,  1.21it/s]

Epoch [1/10], Step [1421/3567], Loss: 2.9446


Epoch 1/10:  40%|████      | 1431/3568 [15:32<22:51,  1.56it/s]

Epoch [1/10], Step [1431/3567], Loss: 2.9446


Epoch 1/10:  40%|████      | 1441/3568 [15:39<22:13,  1.60it/s]

Epoch [1/10], Step [1441/3567], Loss: 2.9446


Epoch 1/10:  41%|████      | 1451/3568 [15:45<21:47,  1.62it/s]

Epoch [1/10], Step [1451/3567], Loss: 2.9446


Epoch 1/10:  41%|████      | 1461/3568 [15:51<21:49,  1.61it/s]

Epoch [1/10], Step [1461/3567], Loss: 2.9446


Epoch 1/10:  41%|████      | 1471/3568 [15:58<21:59,  1.59it/s]

Epoch [1/10], Step [1471/3567], Loss: 2.9446


Epoch 1/10:  42%|████▏     | 1481/3568 [16:04<21:44,  1.60it/s]

Epoch [1/10], Step [1481/3567], Loss: 2.9446


Epoch 1/10:  42%|████▏     | 1491/3568 [16:10<21:20,  1.62it/s]

Epoch [1/10], Step [1491/3567], Loss: 2.9446


Epoch 1/10:  42%|████▏     | 1501/3568 [16:16<21:58,  1.57it/s]

Epoch [1/10], Step [1501/3567], Loss: 2.9446


Epoch 1/10:  42%|████▏     | 1511/3568 [16:23<21:48,  1.57it/s]

Epoch [1/10], Step [1511/3567], Loss: 2.9446


Epoch 1/10:  43%|████▎     | 1521/3568 [16:29<21:35,  1.58it/s]

Epoch [1/10], Step [1521/3567], Loss: 2.9446


Epoch 1/10:  43%|████▎     | 1531/3568 [16:35<21:26,  1.58it/s]

Epoch [1/10], Step [1531/3567], Loss: 2.9446


Epoch 1/10:  43%|████▎     | 1541/3568 [16:41<21:03,  1.60it/s]

Epoch [1/10], Step [1541/3567], Loss: 2.9446


Epoch 1/10:  43%|████▎     | 1551/3568 [16:48<20:50,  1.61it/s]

Epoch [1/10], Step [1551/3567], Loss: 2.9446


Epoch 1/10:  44%|████▍     | 1561/3568 [16:54<20:36,  1.62it/s]

Epoch [1/10], Step [1561/3567], Loss: 2.9446


Epoch 1/10:  44%|████▍     | 1571/3568 [17:00<21:11,  1.57it/s]

Epoch [1/10], Step [1571/3567], Loss: 2.9446


Epoch 1/10:  44%|████▍     | 1581/3568 [17:07<22:48,  1.45it/s]

Epoch [1/10], Step [1581/3567], Loss: 2.9446


Epoch 1/10:  45%|████▍     | 1591/3568 [17:15<27:11,  1.21it/s]

Epoch [1/10], Step [1591/3567], Loss: 2.9446


Epoch 1/10:  45%|████▍     | 1601/3568 [17:23<26:38,  1.23it/s]

Epoch [1/10], Step [1601/3567], Loss: 2.9446


Epoch 1/10:  45%|████▌     | 1611/3568 [17:32<26:33,  1.23it/s]

Epoch [1/10], Step [1611/3567], Loss: 2.9446


Epoch 1/10:  45%|████▌     | 1621/3568 [17:40<24:43,  1.31it/s]

Epoch [1/10], Step [1621/3567], Loss: 2.9446


Epoch 1/10:  46%|████▌     | 1631/3568 [17:46<21:07,  1.53it/s]

Epoch [1/10], Step [1631/3567], Loss: 2.9446


Epoch 1/10:  46%|████▌     | 1641/3568 [17:52<19:44,  1.63it/s]

Epoch [1/10], Step [1641/3567], Loss: 2.9446


Epoch 1/10:  46%|████▋     | 1651/3568 [17:59<20:09,  1.59it/s]

Epoch [1/10], Step [1651/3567], Loss: 2.9446


Epoch 1/10:  47%|████▋     | 1661/3568 [18:05<20:27,  1.55it/s]

Epoch [1/10], Step [1661/3567], Loss: 2.9446


Epoch 1/10:  47%|████▋     | 1671/3568 [18:11<20:03,  1.58it/s]

Epoch [1/10], Step [1671/3567], Loss: 2.9446


Epoch 1/10:  47%|████▋     | 1681/3568 [18:18<19:48,  1.59it/s]

Epoch [1/10], Step [1681/3567], Loss: 2.9446


Epoch 1/10:  47%|████▋     | 1691/3568 [18:24<19:35,  1.60it/s]

Epoch [1/10], Step [1691/3567], Loss: 2.9446


Epoch 1/10:  48%|████▊     | 1701/3568 [18:30<19:26,  1.60it/s]

Epoch [1/10], Step [1701/3567], Loss: 2.9446


Epoch 1/10:  48%|████▊     | 1711/3568 [18:36<19:14,  1.61it/s]

Epoch [1/10], Step [1711/3567], Loss: 2.9446


Epoch 1/10:  48%|████▊     | 1721/3568 [18:43<19:20,  1.59it/s]

Epoch [1/10], Step [1721/3567], Loss: 2.9446


Epoch 1/10:  49%|████▊     | 1731/3568 [18:49<18:52,  1.62it/s]

Epoch [1/10], Step [1731/3567], Loss: 2.9446


Epoch 1/10:  49%|████▉     | 1741/3568 [18:55<19:30,  1.56it/s]

Epoch [1/10], Step [1741/3567], Loss: 2.9446


Epoch 1/10:  49%|████▉     | 1751/3568 [19:02<19:06,  1.58it/s]

Epoch [1/10], Step [1751/3567], Loss: 2.9446


Epoch 1/10:  49%|████▉     | 1761/3568 [19:08<18:50,  1.60it/s]

Epoch [1/10], Step [1761/3567], Loss: 2.9446


Epoch 1/10:  50%|████▉     | 1771/3568 [19:14<19:17,  1.55it/s]

Epoch [1/10], Step [1771/3567], Loss: 2.9446


Epoch 1/10:  50%|████▉     | 1781/3568 [19:21<18:37,  1.60it/s]

Epoch [1/10], Step [1781/3567], Loss: 2.9446


Epoch 1/10:  50%|█████     | 1791/3568 [19:27<18:23,  1.61it/s]

Epoch [1/10], Step [1791/3567], Loss: 2.9446


Epoch 1/10:  50%|█████     | 1801/3568 [19:33<18:20,  1.60it/s]

Epoch [1/10], Step [1801/3567], Loss: 2.9446


Epoch 1/10:  51%|█████     | 1811/3568 [19:39<18:30,  1.58it/s]

Epoch [1/10], Step [1811/3567], Loss: 2.9446


Epoch 1/10:  51%|█████     | 1821/3568 [19:46<18:37,  1.56it/s]

Epoch [1/10], Step [1821/3567], Loss: 2.9446


Epoch 1/10:  51%|█████▏    | 1831/3568 [19:52<18:52,  1.53it/s]

Epoch [1/10], Step [1831/3567], Loss: 2.9446


Epoch 1/10:  52%|█████▏    | 1841/3568 [19:58<17:53,  1.61it/s]

Epoch [1/10], Step [1841/3567], Loss: 2.9446


Epoch 1/10:  52%|█████▏    | 1851/3568 [20:05<18:01,  1.59it/s]

Epoch [1/10], Step [1851/3567], Loss: 2.9446


Epoch 1/10:  52%|█████▏    | 1861/3568 [20:11<17:43,  1.61it/s]

Epoch [1/10], Step [1861/3567], Loss: 2.9446


Epoch 1/10:  52%|█████▏    | 1871/3568 [20:17<17:32,  1.61it/s]

Epoch [1/10], Step [1871/3567], Loss: 2.9446


Epoch 1/10:  53%|█████▎    | 1881/3568 [20:23<17:40,  1.59it/s]

Epoch [1/10], Step [1881/3567], Loss: 2.9446


Epoch 1/10:  53%|█████▎    | 1891/3568 [20:29<17:24,  1.61it/s]

Epoch [1/10], Step [1891/3567], Loss: 2.9446


Epoch 1/10:  53%|█████▎    | 1901/3568 [20:36<17:31,  1.59it/s]

Epoch [1/10], Step [1901/3567], Loss: 2.9446


Epoch 1/10:  54%|█████▎    | 1911/3568 [20:42<17:19,  1.59it/s]

Epoch [1/10], Step [1911/3567], Loss: 2.9446


Epoch 1/10:  54%|█████▍    | 1921/3568 [20:48<17:12,  1.59it/s]

Epoch [1/10], Step [1921/3567], Loss: 2.9446


Epoch 1/10:  54%|█████▍    | 1931/3568 [20:55<17:16,  1.58it/s]

Epoch [1/10], Step [1931/3567], Loss: 2.9446


Epoch 1/10:  54%|█████▍    | 1941/3568 [21:01<16:45,  1.62it/s]

Epoch [1/10], Step [1941/3567], Loss: 2.9446


Epoch 1/10:  55%|█████▍    | 1951/3568 [21:07<16:50,  1.60it/s]

Epoch [1/10], Step [1951/3567], Loss: 2.9446


Epoch 1/10:  55%|█████▍    | 1961/3568 [21:14<16:38,  1.61it/s]

Epoch [1/10], Step [1961/3567], Loss: 2.9446


Epoch 1/10:  55%|█████▌    | 1971/3568 [21:20<16:42,  1.59it/s]

Epoch [1/10], Step [1971/3567], Loss: 2.9446


Epoch 1/10:  56%|█████▌    | 1981/3568 [21:27<20:59,  1.26it/s]

Epoch [1/10], Step [1981/3567], Loss: 2.9446


Epoch 1/10:  56%|█████▌    | 1991/3568 [21:36<22:31,  1.17it/s]

Epoch [1/10], Step [1991/3567], Loss: 2.9446


Epoch 1/10:  56%|█████▌    | 2001/3568 [21:44<22:37,  1.15it/s]

Epoch [1/10], Step [2001/3567], Loss: 2.9446


Epoch 1/10:  56%|█████▋    | 2011/3568 [21:53<21:48,  1.19it/s]

Epoch [1/10], Step [2011/3567], Loss: 2.9446


Epoch 1/10:  57%|█████▋    | 2021/3568 [22:00<16:46,  1.54it/s]

Epoch [1/10], Step [2021/3567], Loss: 2.9446


Epoch 1/10:  57%|█████▋    | 2031/3568 [22:06<16:51,  1.52it/s]

Epoch [1/10], Step [2031/3567], Loss: 2.9446


Epoch 1/10:  57%|█████▋    | 2041/3568 [22:12<16:07,  1.58it/s]

Epoch [1/10], Step [2041/3567], Loss: 2.9446


Epoch 1/10:  57%|█████▋    | 2051/3568 [22:19<16:02,  1.58it/s]

Epoch [1/10], Step [2051/3567], Loss: 2.9446


Epoch 1/10:  58%|█████▊    | 2061/3568 [22:25<16:18,  1.54it/s]

Epoch [1/10], Step [2061/3567], Loss: 2.9446


Epoch 1/10:  58%|█████▊    | 2071/3568 [22:32<15:41,  1.59it/s]

Epoch [1/10], Step [2071/3567], Loss: 2.9446


Epoch 1/10:  58%|█████▊    | 2081/3568 [22:38<15:27,  1.60it/s]

Epoch [1/10], Step [2081/3567], Loss: 2.9446


Epoch 1/10:  59%|█████▊    | 2091/3568 [22:44<15:17,  1.61it/s]

Epoch [1/10], Step [2091/3567], Loss: 2.9446


Epoch 1/10:  59%|█████▉    | 2101/3568 [22:50<15:32,  1.57it/s]

Epoch [1/10], Step [2101/3567], Loss: 2.9446


Epoch 1/10:  59%|█████▉    | 2111/3568 [22:57<15:20,  1.58it/s]

Epoch [1/10], Step [2111/3567], Loss: 2.9446


Epoch 1/10:  59%|█████▉    | 2121/3568 [23:03<14:50,  1.63it/s]

Epoch [1/10], Step [2121/3567], Loss: 2.9446


Epoch 1/10:  60%|█████▉    | 2131/3568 [23:09<14:59,  1.60it/s]

Epoch [1/10], Step [2131/3567], Loss: 2.9446


Epoch 1/10:  60%|██████    | 2141/3568 [23:15<15:01,  1.58it/s]

Epoch [1/10], Step [2141/3567], Loss: 2.9446


Epoch 1/10:  60%|██████    | 2151/3568 [23:22<14:46,  1.60it/s]

Epoch [1/10], Step [2151/3567], Loss: 2.9446


Epoch 1/10:  61%|██████    | 2161/3568 [23:28<15:01,  1.56it/s]

Epoch [1/10], Step [2161/3567], Loss: 2.9446


Epoch 1/10:  61%|██████    | 2171/3568 [23:34<14:34,  1.60it/s]

Epoch [1/10], Step [2171/3567], Loss: 2.9446


Epoch 1/10:  61%|██████    | 2181/3568 [23:41<14:30,  1.59it/s]

Epoch [1/10], Step [2181/3567], Loss: 2.9446


Epoch 1/10:  61%|██████▏   | 2191/3568 [23:47<14:23,  1.59it/s]

Epoch [1/10], Step [2191/3567], Loss: 2.9446


Epoch 1/10:  62%|██████▏   | 2201/3568 [23:53<14:19,  1.59it/s]

Epoch [1/10], Step [2201/3567], Loss: 2.9446


Epoch 1/10:  62%|██████▏   | 2211/3568 [24:00<14:12,  1.59it/s]

Epoch [1/10], Step [2211/3567], Loss: 2.9446


Epoch 1/10:  62%|██████▏   | 2221/3568 [24:06<14:02,  1.60it/s]

Epoch [1/10], Step [2221/3567], Loss: 2.9446


Epoch 1/10:  63%|██████▎   | 2231/3568 [24:12<13:54,  1.60it/s]

Epoch [1/10], Step [2231/3567], Loss: 2.9446


Epoch 1/10:  63%|██████▎   | 2241/3568 [24:18<14:03,  1.57it/s]

Epoch [1/10], Step [2241/3567], Loss: 2.9446


Epoch 1/10:  63%|██████▎   | 2251/3568 [24:25<13:45,  1.59it/s]

Epoch [1/10], Step [2251/3567], Loss: 2.9446


Epoch 1/10:  63%|██████▎   | 2261/3568 [24:31<13:36,  1.60it/s]

Epoch [1/10], Step [2261/3567], Loss: 2.9446


Epoch 1/10:  64%|██████▎   | 2271/3568 [24:37<13:37,  1.59it/s]

Epoch [1/10], Step [2271/3567], Loss: 2.9446


Epoch 1/10:  64%|██████▍   | 2281/3568 [24:44<15:05,  1.42it/s]

Epoch [1/10], Step [2281/3567], Loss: 2.9446


Epoch 1/10:  64%|██████▍   | 2291/3568 [24:50<13:24,  1.59it/s]

Epoch [1/10], Step [2291/3567], Loss: 2.9446


Epoch 1/10:  64%|██████▍   | 2301/3568 [24:57<17:09,  1.23it/s]

Epoch [1/10], Step [2301/3567], Loss: 2.9446


Epoch 1/10:  65%|██████▍   | 2311/3568 [25:06<16:55,  1.24it/s]

Epoch [1/10], Step [2311/3567], Loss: 2.9446


Epoch 1/10:  65%|██████▌   | 2321/3568 [25:14<17:02,  1.22it/s]

Epoch [1/10], Step [2321/3567], Loss: 2.9446


Epoch 1/10:  65%|██████▌   | 2331/3568 [25:22<16:46,  1.23it/s]

Epoch [1/10], Step [2331/3567], Loss: 2.9446


Epoch 1/10:  66%|██████▌   | 2341/3568 [25:29<13:18,  1.54it/s]

Epoch [1/10], Step [2341/3567], Loss: 2.9446


Epoch 1/10:  66%|██████▌   | 2351/3568 [25:36<12:37,  1.61it/s]

Epoch [1/10], Step [2351/3567], Loss: 2.9446


Epoch 1/10:  66%|██████▌   | 2361/3568 [25:42<12:51,  1.57it/s]

Epoch [1/10], Step [2361/3567], Loss: 2.9446


Epoch 1/10:  66%|██████▋   | 2371/3568 [25:48<12:32,  1.59it/s]

Epoch [1/10], Step [2371/3567], Loss: 2.9446


Epoch 1/10:  67%|██████▋   | 2381/3568 [25:55<12:13,  1.62it/s]

Epoch [1/10], Step [2381/3567], Loss: 2.9446


Epoch 1/10:  67%|██████▋   | 2391/3568 [26:01<12:24,  1.58it/s]

Epoch [1/10], Step [2391/3567], Loss: 2.9446


Epoch 1/10:  67%|██████▋   | 2401/3568 [26:07<12:04,  1.61it/s]

Epoch [1/10], Step [2401/3567], Loss: 2.9446


Epoch 1/10:  68%|██████▊   | 2411/3568 [26:13<11:47,  1.64it/s]

Epoch [1/10], Step [2411/3567], Loss: 2.9446


Epoch 1/10:  68%|██████▊   | 2421/3568 [26:20<11:59,  1.59it/s]

Epoch [1/10], Step [2421/3567], Loss: 2.9446


Epoch 1/10:  68%|██████▊   | 2431/3568 [26:26<11:50,  1.60it/s]

Epoch [1/10], Step [2431/3567], Loss: 2.9446


Epoch 1/10:  68%|██████▊   | 2441/3568 [26:32<12:11,  1.54it/s]

Epoch [1/10], Step [2441/3567], Loss: 2.9446


Epoch 1/10:  69%|██████▊   | 2451/3568 [26:38<11:40,  1.59it/s]

Epoch [1/10], Step [2451/3567], Loss: 2.9446


Epoch 1/10:  69%|██████▉   | 2461/3568 [26:45<11:46,  1.57it/s]

Epoch [1/10], Step [2461/3567], Loss: 2.9446


Epoch 1/10:  69%|██████▉   | 2471/3568 [26:51<11:24,  1.60it/s]

Epoch [1/10], Step [2471/3567], Loss: 2.9446


Epoch 1/10:  70%|██████▉   | 2481/3568 [26:57<11:21,  1.60it/s]

Epoch [1/10], Step [2481/3567], Loss: 2.9446


Epoch 1/10:  70%|██████▉   | 2491/3568 [27:04<11:35,  1.55it/s]

Epoch [1/10], Step [2491/3567], Loss: 2.9446


Epoch 1/10:  70%|███████   | 2501/3568 [27:10<11:01,  1.61it/s]

Epoch [1/10], Step [2501/3567], Loss: 2.9446


Epoch 1/10:  70%|███████   | 2511/3568 [27:16<11:25,  1.54it/s]

Epoch [1/10], Step [2511/3567], Loss: 2.9446


Epoch 1/10:  71%|███████   | 2521/3568 [27:23<10:59,  1.59it/s]

Epoch [1/10], Step [2521/3567], Loss: 2.9446


Epoch 1/10:  71%|███████   | 2531/3568 [27:29<10:49,  1.60it/s]

Epoch [1/10], Step [2531/3567], Loss: 2.9446


Epoch 1/10:  71%|███████   | 2541/3568 [27:35<10:55,  1.57it/s]

Epoch [1/10], Step [2541/3567], Loss: 2.9446


Epoch 1/10:  71%|███████▏  | 2551/3568 [27:41<10:39,  1.59it/s]

Epoch [1/10], Step [2551/3567], Loss: 2.9446


Epoch 1/10:  72%|███████▏  | 2561/3568 [27:48<10:36,  1.58it/s]

Epoch [1/10], Step [2561/3567], Loss: 2.9446


Epoch 1/10:  72%|███████▏  | 2571/3568 [27:54<10:26,  1.59it/s]

Epoch [1/10], Step [2571/3567], Loss: 2.9446


Epoch 1/10:  72%|███████▏  | 2581/3568 [28:00<10:26,  1.57it/s]

Epoch [1/10], Step [2581/3567], Loss: 2.9446


Epoch 1/10:  73%|███████▎  | 2591/3568 [28:07<10:12,  1.60it/s]

Epoch [1/10], Step [2591/3567], Loss: 2.9446


Epoch 1/10:  73%|███████▎  | 2601/3568 [28:13<10:09,  1.59it/s]

Epoch [1/10], Step [2601/3567], Loss: 2.9446


Epoch 1/10:  73%|███████▎  | 2611/3568 [28:19<10:04,  1.58it/s]

Epoch [1/10], Step [2611/3567], Loss: 2.9446


Epoch 1/10:  73%|███████▎  | 2621/3568 [28:26<09:49,  1.61it/s]

Epoch [1/10], Step [2621/3567], Loss: 2.9446


Epoch 1/10:  74%|███████▎  | 2631/3568 [28:32<09:56,  1.57it/s]

Epoch [1/10], Step [2631/3567], Loss: 2.9446


Epoch 1/10:  74%|███████▍  | 2641/3568 [28:38<09:45,  1.58it/s]

Epoch [1/10], Step [2641/3567], Loss: 2.9446


Epoch 1/10:  74%|███████▍  | 2651/3568 [28:45<09:43,  1.57it/s]

Epoch [1/10], Step [2651/3567], Loss: 2.9446


Epoch 1/10:  75%|███████▍  | 2661/3568 [28:51<09:38,  1.57it/s]

Epoch [1/10], Step [2661/3567], Loss: 2.9446


Epoch 1/10:  75%|███████▍  | 2671/3568 [28:57<09:12,  1.62it/s]

Epoch [1/10], Step [2671/3567], Loss: 2.9446


Epoch 1/10:  75%|███████▌  | 2681/3568 [29:03<09:17,  1.59it/s]

Epoch [1/10], Step [2681/3567], Loss: 2.9446


Epoch 1/10:  75%|███████▌  | 2691/3568 [29:09<09:06,  1.60it/s]

Epoch [1/10], Step [2691/3567], Loss: 2.9446


Epoch 1/10:  76%|███████▌  | 2701/3568 [29:17<11:36,  1.24it/s]

Epoch [1/10], Step [2701/3567], Loss: 2.9446


Epoch 1/10:  76%|███████▌  | 2711/3568 [29:25<11:32,  1.24it/s]

Epoch [1/10], Step [2711/3567], Loss: 2.9446


Epoch 1/10:  76%|███████▋  | 2721/3568 [29:33<11:38,  1.21it/s]

Epoch [1/10], Step [2721/3567], Loss: 2.9446


Epoch 1/10:  77%|███████▋  | 2731/3568 [29:42<11:21,  1.23it/s]

Epoch [1/10], Step [2731/3567], Loss: 2.9446


Epoch 1/10:  77%|███████▋  | 2741/3568 [29:49<09:08,  1.51it/s]

Epoch [1/10], Step [2741/3567], Loss: 2.9446


Epoch 1/10:  77%|███████▋  | 2751/3568 [29:55<08:32,  1.60it/s]

Epoch [1/10], Step [2751/3567], Loss: 2.9446


Epoch 1/10:  77%|███████▋  | 2761/3568 [30:01<08:32,  1.58it/s]

Epoch [1/10], Step [2761/3567], Loss: 2.9446


Epoch 1/10:  78%|███████▊  | 2771/3568 [30:08<08:19,  1.60it/s]

Epoch [1/10], Step [2771/3567], Loss: 2.9446


Epoch 1/10:  78%|███████▊  | 2781/3568 [30:14<08:13,  1.59it/s]

Epoch [1/10], Step [2781/3567], Loss: 2.9446


Epoch 1/10:  78%|███████▊  | 2791/3568 [30:20<08:14,  1.57it/s]

Epoch [1/10], Step [2791/3567], Loss: 2.9446


Epoch 1/10:  79%|███████▊  | 2801/3568 [30:26<08:04,  1.58it/s]

Epoch [1/10], Step [2801/3567], Loss: 2.9446


Epoch 1/10:  79%|███████▉  | 2811/3568 [30:33<07:50,  1.61it/s]

Epoch [1/10], Step [2811/3567], Loss: 2.9446


Epoch 1/10:  79%|███████▉  | 2821/3568 [30:39<07:42,  1.62it/s]

Epoch [1/10], Step [2821/3567], Loss: 2.9446


Epoch 1/10:  79%|███████▉  | 2831/3568 [30:45<07:44,  1.59it/s]

Epoch [1/10], Step [2831/3567], Loss: 2.9446


Epoch 1/10:  80%|███████▉  | 2841/3568 [30:52<07:50,  1.55it/s]

Epoch [1/10], Step [2841/3567], Loss: 2.9446


Epoch 1/10:  80%|███████▉  | 2851/3568 [30:59<09:56,  1.20it/s]

Epoch [1/10], Step [2851/3567], Loss: 2.9446


Epoch 1/10:  80%|████████  | 2861/3568 [31:08<09:48,  1.20it/s]

Epoch [1/10], Step [2861/3567], Loss: 2.9446


Epoch 1/10:  80%|████████  | 2871/3568 [31:16<09:45,  1.19it/s]

Epoch [1/10], Step [2871/3567], Loss: 2.9446


Epoch 1/10:  81%|████████  | 2881/3568 [31:24<09:31,  1.20it/s]

Epoch [1/10], Step [2881/3567], Loss: 2.9446


Epoch 1/10:  81%|████████  | 2891/3568 [31:31<07:17,  1.55it/s]

Epoch [1/10], Step [2891/3567], Loss: 2.9446


Epoch 1/10:  81%|████████▏ | 2901/3568 [31:37<07:00,  1.58it/s]

Epoch [1/10], Step [2901/3567], Loss: 2.9446


Epoch 1/10:  82%|████████▏ | 2911/3568 [31:43<06:57,  1.57it/s]

Epoch [1/10], Step [2911/3567], Loss: 2.9446


Epoch 1/10:  82%|████████▏ | 2921/3568 [31:50<06:43,  1.61it/s]

Epoch [1/10], Step [2921/3567], Loss: 2.9446


Epoch 1/10:  82%|████████▏ | 2931/3568 [31:56<06:38,  1.60it/s]

Epoch [1/10], Step [2931/3567], Loss: 2.9446


Epoch 1/10:  82%|████████▏ | 2941/3568 [32:02<06:45,  1.55it/s]

Epoch [1/10], Step [2941/3567], Loss: 2.9446


Epoch 1/10:  83%|████████▎ | 2951/3568 [32:09<06:26,  1.59it/s]

Epoch [1/10], Step [2951/3567], Loss: 2.9446


Epoch 1/10:  83%|████████▎ | 2961/3568 [32:15<06:18,  1.60it/s]

Epoch [1/10], Step [2961/3567], Loss: 2.9446


Epoch 1/10:  83%|████████▎ | 2971/3568 [32:21<06:08,  1.62it/s]

Epoch [1/10], Step [2971/3567], Loss: 2.9446


Epoch 1/10:  84%|████████▎ | 2981/3568 [32:28<06:08,  1.59it/s]

Epoch [1/10], Step [2981/3567], Loss: 2.9446


Epoch 1/10:  84%|████████▍ | 2991/3568 [32:34<06:01,  1.60it/s]

Epoch [1/10], Step [2991/3567], Loss: 2.9446


Epoch 1/10:  84%|████████▍ | 3001/3568 [32:40<06:02,  1.57it/s]

Epoch [1/10], Step [3001/3567], Loss: 2.9446


Epoch 1/10:  84%|████████▍ | 3011/3568 [32:47<05:50,  1.59it/s]

Epoch [1/10], Step [3011/3567], Loss: 2.9446


Epoch 1/10:  85%|████████▍ | 3021/3568 [32:53<05:39,  1.61it/s]

Epoch [1/10], Step [3021/3567], Loss: 2.9446


Epoch 1/10:  85%|████████▍ | 3031/3568 [32:59<05:38,  1.59it/s]

Epoch [1/10], Step [3031/3567], Loss: 2.9446


Epoch 1/10:  85%|████████▌ | 3041/3568 [33:06<05:41,  1.54it/s]

Epoch [1/10], Step [3041/3567], Loss: 2.9446


Epoch 1/10:  86%|████████▌ | 3051/3568 [33:12<05:23,  1.60it/s]

Epoch [1/10], Step [3051/3567], Loss: 2.9446


Epoch 1/10:  86%|████████▌ | 3061/3568 [33:18<05:11,  1.63it/s]

Epoch [1/10], Step [3061/3567], Loss: 2.9446


Epoch 1/10:  86%|████████▌ | 3071/3568 [33:24<05:21,  1.55it/s]

Epoch [1/10], Step [3071/3567], Loss: 2.9446


Epoch 1/10:  86%|████████▋ | 3081/3568 [33:31<05:13,  1.55it/s]

Epoch [1/10], Step [3081/3567], Loss: 2.9446


Epoch 1/10:  87%|████████▋ | 3091/3568 [33:37<04:58,  1.60it/s]

Epoch [1/10], Step [3091/3567], Loss: 2.9446


Epoch 1/10:  87%|████████▋ | 3101/3568 [33:43<04:55,  1.58it/s]

Epoch [1/10], Step [3101/3567], Loss: 2.9446


Epoch 1/10:  87%|████████▋ | 3111/3568 [33:49<04:43,  1.61it/s]

Epoch [1/10], Step [3111/3567], Loss: 2.9446


Epoch 1/10:  87%|████████▋ | 3121/3568 [33:55<04:31,  1.65it/s]

Epoch [1/10], Step [3121/3567], Loss: 2.9446


Epoch 1/10:  88%|████████▊ | 3131/3568 [34:02<04:44,  1.54it/s]

Epoch [1/10], Step [3131/3567], Loss: 2.9446


Epoch 1/10:  88%|████████▊ | 3141/3568 [34:10<05:56,  1.20it/s]

Epoch [1/10], Step [3141/3567], Loss: 2.9446


Epoch 1/10:  88%|████████▊ | 3151/3568 [34:19<06:03,  1.15it/s]

Epoch [1/10], Step [3151/3567], Loss: 2.9446


Epoch 1/10:  89%|████████▊ | 3161/3568 [34:27<05:42,  1.19it/s]

Epoch [1/10], Step [3161/3567], Loss: 2.9446


Epoch 1/10:  89%|████████▉ | 3171/3568 [34:35<05:34,  1.19it/s]

Epoch [1/10], Step [3171/3567], Loss: 2.9446


Epoch 1/10:  89%|████████▉ | 3181/3568 [34:42<04:06,  1.57it/s]

Epoch [1/10], Step [3181/3567], Loss: 2.9446


Epoch 1/10:  89%|████████▉ | 3191/3568 [34:49<04:02,  1.56it/s]

Epoch [1/10], Step [3191/3567], Loss: 2.9446


Epoch 1/10:  90%|████████▉ | 3201/3568 [34:55<03:47,  1.61it/s]

Epoch [1/10], Step [3201/3567], Loss: 2.9446


Epoch 1/10:  90%|████████▉ | 3211/3568 [35:01<03:43,  1.59it/s]

Epoch [1/10], Step [3211/3567], Loss: 2.9446


Epoch 1/10:  90%|█████████ | 3221/3568 [35:07<03:35,  1.61it/s]

Epoch [1/10], Step [3221/3567], Loss: 2.9446


Epoch 1/10:  91%|█████████ | 3231/3568 [35:14<03:29,  1.61it/s]

Epoch [1/10], Step [3231/3567], Loss: 2.9446


Epoch 1/10:  91%|█████████ | 3241/3568 [35:20<03:22,  1.61it/s]

Epoch [1/10], Step [3241/3567], Loss: 2.9446


Epoch 1/10:  91%|█████████ | 3251/3568 [35:26<03:19,  1.59it/s]

Epoch [1/10], Step [3251/3567], Loss: 2.9446


Epoch 1/10:  91%|█████████▏| 3261/3568 [35:33<03:13,  1.58it/s]

Epoch [1/10], Step [3261/3567], Loss: 2.9446


Epoch 1/10:  92%|█████████▏| 3271/3568 [35:39<03:06,  1.59it/s]

Epoch [1/10], Step [3271/3567], Loss: 2.9446


Epoch 1/10:  92%|█████████▏| 3281/3568 [35:45<02:54,  1.64it/s]

Epoch [1/10], Step [3281/3567], Loss: 2.9446


Epoch 1/10:  92%|█████████▏| 3291/3568 [35:51<02:53,  1.60it/s]

Epoch [1/10], Step [3291/3567], Loss: 2.9446


Epoch 1/10:  93%|█████████▎| 3301/3568 [35:57<02:45,  1.61it/s]

Epoch [1/10], Step [3301/3567], Loss: 2.9446


Epoch 1/10:  93%|█████████▎| 3311/3568 [36:04<02:39,  1.61it/s]

Epoch [1/10], Step [3311/3567], Loss: 2.9446


Epoch 1/10:  93%|█████████▎| 3321/3568 [36:10<02:35,  1.59it/s]

Epoch [1/10], Step [3321/3567], Loss: 2.9446


Epoch 1/10:  93%|█████████▎| 3331/3568 [36:16<02:29,  1.59it/s]

Epoch [1/10], Step [3331/3567], Loss: 2.9446


Epoch 1/10:  94%|█████████▎| 3341/3568 [36:23<02:27,  1.54it/s]

Epoch [1/10], Step [3341/3567], Loss: 2.9446


Epoch 1/10:  94%|█████████▍| 3351/3568 [36:29<02:17,  1.58it/s]

Epoch [1/10], Step [3351/3567], Loss: 2.9446


Epoch 1/10:  94%|█████████▍| 3361/3568 [36:35<02:10,  1.58it/s]

Epoch [1/10], Step [3361/3567], Loss: 2.9446


Epoch 1/10:  94%|█████████▍| 3371/3568 [36:42<02:04,  1.58it/s]

Epoch [1/10], Step [3371/3567], Loss: 2.9446


Epoch 1/10:  95%|█████████▍| 3381/3568 [36:48<02:00,  1.55it/s]

Epoch [1/10], Step [3381/3567], Loss: 2.9446


Epoch 1/10:  95%|█████████▌| 3391/3568 [36:54<01:51,  1.59it/s]

Epoch [1/10], Step [3391/3567], Loss: 2.9446


Epoch 1/10:  95%|█████████▌| 3401/3568 [37:01<01:44,  1.59it/s]

Epoch [1/10], Step [3401/3567], Loss: 2.9446


Epoch 1/10:  96%|█████████▌| 3411/3568 [37:07<01:43,  1.51it/s]

Epoch [1/10], Step [3411/3567], Loss: 2.9446


Epoch 1/10:  96%|█████████▌| 3421/3568 [37:13<01:32,  1.59it/s]

Epoch [1/10], Step [3421/3567], Loss: 2.9446


Epoch 1/10:  96%|█████████▌| 3431/3568 [37:20<01:25,  1.60it/s]

Epoch [1/10], Step [3431/3567], Loss: 2.9446


Epoch 1/10:  96%|█████████▋| 3441/3568 [37:26<01:19,  1.60it/s]

Epoch [1/10], Step [3441/3567], Loss: 2.9446


Epoch 1/10:  97%|█████████▋| 3451/3568 [37:32<01:14,  1.57it/s]

Epoch [1/10], Step [3451/3567], Loss: 2.9446


Epoch 1/10:  97%|█████████▋| 3461/3568 [37:39<01:07,  1.58it/s]

Epoch [1/10], Step [3461/3567], Loss: 2.9446


Epoch 1/10:  97%|█████████▋| 3471/3568 [37:45<00:59,  1.62it/s]

Epoch [1/10], Step [3471/3567], Loss: 2.9446


Epoch 1/10:  98%|█████████▊| 3481/3568 [37:51<00:53,  1.62it/s]

Epoch [1/10], Step [3481/3567], Loss: 2.9446


Epoch 1/10:  98%|█████████▊| 3491/3568 [37:57<00:47,  1.61it/s]

Epoch [1/10], Step [3491/3567], Loss: 2.9446


Epoch 1/10:  98%|█████████▊| 3501/3568 [38:04<00:41,  1.61it/s]

Epoch [1/10], Step [3501/3567], Loss: 2.9446


Epoch 1/10:  98%|█████████▊| 3511/3568 [38:10<00:36,  1.55it/s]

Epoch [1/10], Step [3511/3567], Loss: 2.9446


Epoch 1/10:  99%|█████████▊| 3521/3568 [38:16<00:30,  1.56it/s]

Epoch [1/10], Step [3521/3567], Loss: 2.9446


Epoch 1/10:  99%|█████████▉| 3531/3568 [38:23<00:23,  1.55it/s]

Epoch [1/10], Step [3531/3567], Loss: 2.9446


Epoch 1/10:  99%|█████████▉| 3541/3568 [38:29<00:18,  1.50it/s]

Epoch [1/10], Step [3541/3567], Loss: 2.9446


Epoch 1/10: 100%|█████████▉| 3551/3568 [38:38<00:14,  1.20it/s]

Epoch [1/10], Step [3551/3567], Loss: 2.9446


Epoch 1/10: 100%|█████████▉| 3561/3568 [38:46<00:05,  1.23it/s]

Epoch [1/10], Step [3561/3567], Loss: 2.9446


Epoch 1/10: 100%|█████████▉| 3567/3568 [38:51<00:00,  1.53it/s]


Epoch [1/10] Average Loss: 2.9446


Epoch 2/10:   0%|          | 1/3568 [00:00<48:53,  1.22it/s]

Epoch [2/10], Step [1/3567], Loss: 2.9446


Epoch 2/10:   0%|          | 11/3568 [00:08<47:32,  1.25it/s]

Epoch [2/10], Step [11/3567], Loss: 2.9446


Epoch 2/10:   1%|          | 21/3568 [00:16<38:00,  1.56it/s]

Epoch [2/10], Step [21/3567], Loss: 2.9446


Epoch 2/10:   1%|          | 31/3568 [00:22<37:18,  1.58it/s]

Epoch [2/10], Step [31/3567], Loss: 2.9446


Epoch 2/10:   1%|          | 41/3568 [00:28<36:26,  1.61it/s]

Epoch [2/10], Step [41/3567], Loss: 2.9446


Epoch 2/10:   1%|▏         | 51/3568 [00:34<35:59,  1.63it/s]

Epoch [2/10], Step [51/3567], Loss: 2.9446


Epoch 2/10:   2%|▏         | 61/3568 [00:41<35:59,  1.62it/s]

Epoch [2/10], Step [61/3567], Loss: 2.9446


Epoch 2/10:   2%|▏         | 71/3568 [00:47<36:45,  1.59it/s]

Epoch [2/10], Step [71/3567], Loss: 2.9446


Epoch 2/10:   2%|▏         | 81/3568 [00:53<36:37,  1.59it/s]

Epoch [2/10], Step [81/3567], Loss: 2.9446


Epoch 2/10:   3%|▎         | 91/3568 [00:59<35:41,  1.62it/s]

Epoch [2/10], Step [91/3567], Loss: 2.9446


Epoch 2/10:   3%|▎         | 101/3568 [01:05<35:42,  1.62it/s]

Epoch [2/10], Step [101/3567], Loss: 2.9446


Epoch 2/10:   3%|▎         | 111/3568 [01:12<35:58,  1.60it/s]

Epoch [2/10], Step [111/3567], Loss: 2.9446


Epoch 2/10:   3%|▎         | 121/3568 [01:18<35:36,  1.61it/s]

Epoch [2/10], Step [121/3567], Loss: 2.9446


Epoch 2/10:   4%|▎         | 131/3568 [01:24<35:47,  1.60it/s]

Epoch [2/10], Step [131/3567], Loss: 2.9446


Epoch 2/10:   4%|▍         | 141/3568 [01:30<35:03,  1.63it/s]

Epoch [2/10], Step [141/3567], Loss: 2.9446


Epoch 2/10:   4%|▍         | 151/3568 [01:36<34:42,  1.64it/s]

Epoch [2/10], Step [151/3567], Loss: 2.9446


Epoch 2/10:   5%|▍         | 161/3568 [01:43<34:18,  1.66it/s]

Epoch [2/10], Step [161/3567], Loss: 2.9446


Epoch 2/10:   5%|▍         | 171/3568 [01:49<34:49,  1.63it/s]

Epoch [2/10], Step [171/3567], Loss: 2.9446


Epoch 2/10:   5%|▌         | 181/3568 [01:55<34:28,  1.64it/s]

Epoch [2/10], Step [181/3567], Loss: 2.9446


Epoch 2/10:   5%|▌         | 191/3568 [02:01<33:44,  1.67it/s]

Epoch [2/10], Step [191/3567], Loss: 2.9446


Epoch 2/10:   6%|▌         | 201/3568 [02:07<34:46,  1.61it/s]

Epoch [2/10], Step [201/3567], Loss: 2.9446


Epoch 2/10:   6%|▌         | 211/3568 [02:13<34:23,  1.63it/s]

Epoch [2/10], Step [211/3567], Loss: 2.9446


Epoch 2/10:   6%|▌         | 221/3568 [02:19<34:26,  1.62it/s]

Epoch [2/10], Step [221/3567], Loss: 2.9446


Epoch 2/10:   6%|▋         | 231/3568 [02:26<34:17,  1.62it/s]

Epoch [2/10], Step [231/3567], Loss: 2.9446


Epoch 2/10:   7%|▋         | 241/3568 [02:32<34:31,  1.61it/s]

Epoch [2/10], Step [241/3567], Loss: 2.9446


Epoch 2/10:   7%|▋         | 251/3568 [02:38<33:27,  1.65it/s]

Epoch [2/10], Step [251/3567], Loss: 2.9446


Epoch 2/10:   7%|▋         | 261/3568 [02:44<32:52,  1.68it/s]

Epoch [2/10], Step [261/3567], Loss: 2.9446


Epoch 2/10:   8%|▊         | 271/3568 [02:50<33:11,  1.66it/s]

Epoch [2/10], Step [271/3567], Loss: 2.9446


Epoch 2/10:   8%|▊         | 281/3568 [02:56<33:27,  1.64it/s]

Epoch [2/10], Step [281/3567], Loss: 2.9446


Epoch 2/10:   8%|▊         | 291/3568 [03:02<33:15,  1.64it/s]

Epoch [2/10], Step [291/3567], Loss: 2.9446


Epoch 2/10:   8%|▊         | 301/3568 [03:08<32:40,  1.67it/s]

Epoch [2/10], Step [301/3567], Loss: 2.9446


Epoch 2/10:   9%|▊         | 311/3568 [03:14<33:28,  1.62it/s]

Epoch [2/10], Step [311/3567], Loss: 2.9446


Epoch 2/10:   9%|▉         | 321/3568 [03:21<33:16,  1.63it/s]

Epoch [2/10], Step [321/3567], Loss: 2.9446


Epoch 2/10:   9%|▉         | 331/3568 [03:27<32:35,  1.66it/s]

Epoch [2/10], Step [331/3567], Loss: 2.9446


Epoch 2/10:  10%|▉         | 341/3568 [03:33<32:52,  1.64it/s]

Epoch [2/10], Step [341/3567], Loss: 2.9446


Epoch 2/10:  10%|▉         | 351/3568 [03:39<32:47,  1.64it/s]

Epoch [2/10], Step [351/3567], Loss: 2.9446


Epoch 2/10:  10%|█         | 361/3568 [03:45<33:00,  1.62it/s]

Epoch [2/10], Step [361/3567], Loss: 2.9446


Epoch 2/10:  10%|█         | 371/3568 [03:51<32:15,  1.65it/s]

Epoch [2/10], Step [371/3567], Loss: 2.9446


Epoch 2/10:  11%|█         | 381/3568 [03:57<32:43,  1.62it/s]

Epoch [2/10], Step [381/3567], Loss: 2.9446


Epoch 2/10:  11%|█         | 391/3568 [04:03<32:18,  1.64it/s]

Epoch [2/10], Step [391/3567], Loss: 2.9446


Epoch 2/10:  11%|█         | 401/3568 [04:09<32:14,  1.64it/s]

Epoch [2/10], Step [401/3567], Loss: 2.9446


Epoch 2/10:  12%|█▏        | 411/3568 [04:16<32:13,  1.63it/s]

Epoch [2/10], Step [411/3567], Loss: 2.9446


Epoch 2/10:  12%|█▏        | 421/3568 [04:22<31:52,  1.65it/s]

Epoch [2/10], Step [421/3567], Loss: 2.9446


Epoch 2/10:  12%|█▏        | 431/3568 [04:28<31:26,  1.66it/s]

Epoch [2/10], Step [431/3567], Loss: 2.9446


Epoch 2/10:  12%|█▏        | 441/3568 [04:34<32:07,  1.62it/s]

Epoch [2/10], Step [441/3567], Loss: 2.9446


Epoch 2/10:  13%|█▎        | 451/3568 [04:40<31:59,  1.62it/s]

Epoch [2/10], Step [451/3567], Loss: 2.9446


Epoch 2/10:  13%|█▎        | 461/3568 [04:46<31:57,  1.62it/s]

Epoch [2/10], Step [461/3567], Loss: 2.9446


Epoch 2/10:  13%|█▎        | 471/3568 [04:52<32:16,  1.60it/s]

Epoch [2/10], Step [471/3567], Loss: 2.9446


Epoch 2/10:  13%|█▎        | 481/3568 [04:58<31:07,  1.65it/s]

Epoch [2/10], Step [481/3567], Loss: 2.9446


Epoch 2/10:  14%|█▍        | 491/3568 [05:04<31:14,  1.64it/s]

Epoch [2/10], Step [491/3567], Loss: 2.9446


Epoch 2/10:  14%|█▍        | 501/3568 [05:11<31:41,  1.61it/s]

Epoch [2/10], Step [501/3567], Loss: 2.9446


Epoch 2/10:  14%|█▍        | 511/3568 [05:17<31:30,  1.62it/s]

Epoch [2/10], Step [511/3567], Loss: 2.9446


Epoch 2/10:  15%|█▍        | 521/3568 [05:23<31:05,  1.63it/s]

Epoch [2/10], Step [521/3567], Loss: 2.9446


Epoch 2/10:  15%|█▍        | 531/3568 [05:29<30:53,  1.64it/s]

Epoch [2/10], Step [531/3567], Loss: 2.9446


Epoch 2/10:  15%|█▌        | 541/3568 [05:35<32:27,  1.55it/s]

Epoch [2/10], Step [541/3567], Loss: 2.9446


Epoch 2/10:  15%|█▌        | 551/3568 [05:42<30:42,  1.64it/s]

Epoch [2/10], Step [551/3567], Loss: 2.9446


Epoch 2/10:  16%|█▌        | 561/3568 [05:48<31:14,  1.60it/s]

Epoch [2/10], Step [561/3567], Loss: 2.9446


Epoch 2/10:  16%|█▌        | 571/3568 [05:54<30:34,  1.63it/s]

Epoch [2/10], Step [571/3567], Loss: 2.9446


Epoch 2/10:  16%|█▋        | 581/3568 [06:00<29:57,  1.66it/s]

Epoch [2/10], Step [581/3567], Loss: 2.9446


Epoch 2/10:  17%|█▋        | 591/3568 [06:06<30:19,  1.64it/s]

Epoch [2/10], Step [591/3567], Loss: 2.9446


Epoch 2/10:  17%|█▋        | 601/3568 [06:12<30:08,  1.64it/s]

Epoch [2/10], Step [601/3567], Loss: 2.9446


Epoch 2/10:  17%|█▋        | 611/3568 [06:18<30:40,  1.61it/s]

Epoch [2/10], Step [611/3567], Loss: 2.9446


Epoch 2/10:  17%|█▋        | 621/3568 [06:24<29:35,  1.66it/s]

Epoch [2/10], Step [621/3567], Loss: 2.9446


Epoch 2/10:  18%|█▊        | 631/3568 [06:30<29:52,  1.64it/s]

Epoch [2/10], Step [631/3567], Loss: 2.9446


Epoch 2/10:  18%|█▊        | 641/3568 [06:37<30:47,  1.58it/s]

Epoch [2/10], Step [641/3567], Loss: 2.9446


Epoch 2/10:  18%|█▊        | 651/3568 [06:43<29:24,  1.65it/s]

Epoch [2/10], Step [651/3567], Loss: 2.9446


Epoch 2/10:  19%|█▊        | 661/3568 [06:49<29:21,  1.65it/s]

Epoch [2/10], Step [661/3567], Loss: 2.9446


Epoch 2/10:  19%|█▉        | 671/3568 [06:55<29:39,  1.63it/s]

Epoch [2/10], Step [671/3567], Loss: 2.9446


Epoch 2/10:  19%|█▉        | 681/3568 [07:01<29:33,  1.63it/s]

Epoch [2/10], Step [681/3567], Loss: 2.9446


Epoch 2/10:  19%|█▉        | 691/3568 [07:07<29:44,  1.61it/s]

Epoch [2/10], Step [691/3567], Loss: 2.9446


Epoch 2/10:  20%|█▉        | 701/3568 [07:14<29:35,  1.62it/s]

Epoch [2/10], Step [701/3567], Loss: 2.9446


Epoch 2/10:  20%|█▉        | 711/3568 [07:20<29:30,  1.61it/s]

Epoch [2/10], Step [711/3567], Loss: 2.9446


Epoch 2/10:  20%|██        | 721/3568 [07:26<35:01,  1.35it/s]

Epoch [2/10], Step [721/3567], Loss: 2.9446


Epoch 2/10:  20%|██        | 731/3568 [07:35<38:36,  1.22it/s]

Epoch [2/10], Step [731/3567], Loss: 2.9446


Epoch 2/10:  21%|██        | 741/3568 [07:43<38:05,  1.24it/s]

Epoch [2/10], Step [741/3567], Loss: 2.9446


Epoch 2/10:  21%|██        | 751/3568 [07:51<38:09,  1.23it/s]

Epoch [2/10], Step [751/3567], Loss: 2.9446


Epoch 2/10:  21%|██▏       | 761/3568 [07:59<35:42,  1.31it/s]

Epoch [2/10], Step [761/3567], Loss: 2.9446


Epoch 2/10:  22%|██▏       | 771/3568 [08:05<29:30,  1.58it/s]

Epoch [2/10], Step [771/3567], Loss: 2.9446


Epoch 2/10:  22%|██▏       | 781/3568 [08:11<28:17,  1.64it/s]

Epoch [2/10], Step [781/3567], Loss: 2.9446


Epoch 2/10:  22%|██▏       | 791/3568 [08:18<27:55,  1.66it/s]

Epoch [2/10], Step [791/3567], Loss: 2.9446


Epoch 2/10:  22%|██▏       | 801/3568 [08:24<27:39,  1.67it/s]

Epoch [2/10], Step [801/3567], Loss: 2.9446


Epoch 2/10:  23%|██▎       | 811/3568 [08:30<28:37,  1.61it/s]

Epoch [2/10], Step [811/3567], Loss: 2.9446


Epoch 2/10:  23%|██▎       | 821/3568 [08:36<28:11,  1.62it/s]

Epoch [2/10], Step [821/3567], Loss: 2.9446


Epoch 2/10:  23%|██▎       | 831/3568 [08:42<28:33,  1.60it/s]

Epoch [2/10], Step [831/3567], Loss: 2.9446


Epoch 2/10:  24%|██▎       | 841/3568 [08:49<28:06,  1.62it/s]

Epoch [2/10], Step [841/3567], Loss: 2.9446


Epoch 2/10:  24%|██▍       | 851/3568 [08:55<27:13,  1.66it/s]

Epoch [2/10], Step [851/3567], Loss: 2.9446


Epoch 2/10:  24%|██▍       | 861/3568 [09:01<27:21,  1.65it/s]

Epoch [2/10], Step [861/3567], Loss: 2.9446


Epoch 2/10:  24%|██▍       | 871/3568 [09:07<27:32,  1.63it/s]

Epoch [2/10], Step [871/3567], Loss: 2.9446


Epoch 2/10:  25%|██▍       | 881/3568 [09:13<27:18,  1.64it/s]

Epoch [2/10], Step [881/3567], Loss: 2.9446


Epoch 2/10:  25%|██▍       | 891/3568 [09:19<27:05,  1.65it/s]

Epoch [2/10], Step [891/3567], Loss: 2.9446


Epoch 2/10:  25%|██▌       | 901/3568 [09:25<27:16,  1.63it/s]

Epoch [2/10], Step [901/3567], Loss: 2.9446


Epoch 2/10:  26%|██▌       | 911/3568 [09:31<26:59,  1.64it/s]

Epoch [2/10], Step [911/3567], Loss: 2.9446


Epoch 2/10:  26%|██▌       | 921/3568 [09:38<33:16,  1.33it/s]

Epoch [2/10], Step [921/3567], Loss: 2.9446


Epoch 2/10:  26%|██▌       | 931/3568 [09:46<36:04,  1.22it/s]

Epoch [2/10], Step [931/3567], Loss: 2.9446


Epoch 2/10:  26%|██▋       | 941/3568 [09:55<36:20,  1.20it/s]

Epoch [2/10], Step [941/3567], Loss: 2.9446


Epoch 2/10:  27%|██▋       | 951/3568 [10:03<36:25,  1.20it/s]

Epoch [2/10], Step [951/3567], Loss: 2.9446


Epoch 2/10:  27%|██▋       | 961/3568 [10:11<32:33,  1.33it/s]

Epoch [2/10], Step [961/3567], Loss: 2.9446


Epoch 2/10:  27%|██▋       | 971/3568 [10:17<26:32,  1.63it/s]

Epoch [2/10], Step [971/3567], Loss: 2.9446


Epoch 2/10:  27%|██▋       | 981/3568 [10:23<26:06,  1.65it/s]

Epoch [2/10], Step [981/3567], Loss: 2.9446


Epoch 2/10:  28%|██▊       | 991/3568 [10:29<26:29,  1.62it/s]

Epoch [2/10], Step [991/3567], Loss: 2.9446


Epoch 2/10:  28%|██▊       | 1001/3568 [10:35<26:12,  1.63it/s]

Epoch [2/10], Step [1001/3567], Loss: 2.9446


Epoch 2/10:  28%|██▊       | 1011/3568 [10:42<26:19,  1.62it/s]

Epoch [2/10], Step [1011/3567], Loss: 2.9446


Epoch 2/10:  29%|██▊       | 1021/3568 [10:48<26:10,  1.62it/s]

Epoch [2/10], Step [1021/3567], Loss: 2.9446


Epoch 2/10:  29%|██▉       | 1031/3568 [10:54<25:51,  1.64it/s]

Epoch [2/10], Step [1031/3567], Loss: 2.9446


Epoch 2/10:  29%|██▉       | 1041/3568 [11:00<25:58,  1.62it/s]

Epoch [2/10], Step [1041/3567], Loss: 2.9446


Epoch 2/10:  29%|██▉       | 1051/3568 [11:06<26:28,  1.58it/s]

Epoch [2/10], Step [1051/3567], Loss: 2.9446


Epoch 2/10:  30%|██▉       | 1061/3568 [11:12<25:37,  1.63it/s]

Epoch [2/10], Step [1061/3567], Loss: 2.9446


Epoch 2/10:  30%|███       | 1071/3568 [11:19<25:21,  1.64it/s]

Epoch [2/10], Step [1071/3567], Loss: 2.9446


Epoch 2/10:  30%|███       | 1081/3568 [11:25<25:07,  1.65it/s]

Epoch [2/10], Step [1081/3567], Loss: 2.9446


Epoch 2/10:  31%|███       | 1091/3568 [11:31<25:38,  1.61it/s]

Epoch [2/10], Step [1091/3567], Loss: 2.9446


Epoch 2/10:  31%|███       | 1101/3568 [11:37<25:06,  1.64it/s]

Epoch [2/10], Step [1101/3567], Loss: 2.9446


Epoch 2/10:  31%|███       | 1111/3568 [11:43<24:57,  1.64it/s]

Epoch [2/10], Step [1111/3567], Loss: 2.9446


Epoch 2/10:  31%|███▏      | 1121/3568 [11:49<24:50,  1.64it/s]

Epoch [2/10], Step [1121/3567], Loss: 2.9446


Epoch 2/10:  32%|███▏      | 1131/3568 [11:55<24:34,  1.65it/s]

Epoch [2/10], Step [1131/3567], Loss: 2.9446


Epoch 2/10:  32%|███▏      | 1141/3568 [12:02<24:36,  1.64it/s]

Epoch [2/10], Step [1141/3567], Loss: 2.9446


Epoch 2/10:  32%|███▏      | 1151/3568 [12:08<24:41,  1.63it/s]

Epoch [2/10], Step [1151/3567], Loss: 2.9446


Epoch 2/10:  33%|███▎      | 1161/3568 [12:15<31:48,  1.26it/s]

Epoch [2/10], Step [1161/3567], Loss: 2.9446


Epoch 2/10:  33%|███▎      | 1171/3568 [12:23<32:20,  1.24it/s]

Epoch [2/10], Step [1171/3567], Loss: 2.9446


Epoch 2/10:  33%|███▎      | 1181/3568 [12:32<33:32,  1.19it/s]

Epoch [2/10], Step [1181/3567], Loss: 2.9446


Epoch 2/10:  33%|███▎      | 1191/3568 [12:40<32:28,  1.22it/s]

Epoch [2/10], Step [1191/3567], Loss: 2.9446


Epoch 2/10:  34%|███▎      | 1201/3568 [12:47<27:17,  1.45it/s]

Epoch [2/10], Step [1201/3567], Loss: 2.9446


Epoch 2/10:  34%|███▍      | 1211/3568 [12:53<24:13,  1.62it/s]

Epoch [2/10], Step [1211/3567], Loss: 2.9446


Epoch 2/10:  34%|███▍      | 1221/3568 [13:00<23:49,  1.64it/s]

Epoch [2/10], Step [1221/3567], Loss: 2.9446


Epoch 2/10:  35%|███▍      | 1231/3568 [13:06<23:27,  1.66it/s]

Epoch [2/10], Step [1231/3567], Loss: 2.9446


Epoch 2/10:  35%|███▍      | 1241/3568 [13:12<24:00,  1.62it/s]

Epoch [2/10], Step [1241/3567], Loss: 2.9446


Epoch 2/10:  35%|███▌      | 1251/3568 [13:18<23:32,  1.64it/s]

Epoch [2/10], Step [1251/3567], Loss: 2.9446


Epoch 2/10:  35%|███▌      | 1261/3568 [13:24<23:18,  1.65it/s]

Epoch [2/10], Step [1261/3567], Loss: 2.9446


Epoch 2/10:  36%|███▌      | 1271/3568 [13:30<23:27,  1.63it/s]

Epoch [2/10], Step [1271/3567], Loss: 2.9446


Epoch 2/10:  36%|███▌      | 1281/3568 [13:36<23:40,  1.61it/s]

Epoch [2/10], Step [1281/3567], Loss: 2.9446


Epoch 2/10:  36%|███▌      | 1291/3568 [13:43<23:46,  1.60it/s]

Epoch [2/10], Step [1291/3567], Loss: 2.9446


Epoch 2/10:  36%|███▋      | 1301/3568 [13:49<23:45,  1.59it/s]

Epoch [2/10], Step [1301/3567], Loss: 2.9446


Epoch 2/10:  37%|███▋      | 1311/3568 [13:55<23:34,  1.60it/s]

Epoch [2/10], Step [1311/3567], Loss: 2.9446


Epoch 2/10:  37%|███▋      | 1321/3568 [14:01<23:09,  1.62it/s]

Epoch [2/10], Step [1321/3567], Loss: 2.9446


Epoch 2/10:  37%|███▋      | 1331/3568 [14:08<23:01,  1.62it/s]

Epoch [2/10], Step [1331/3567], Loss: 2.9446


Epoch 2/10:  38%|███▊      | 1341/3568 [14:14<22:40,  1.64it/s]

Epoch [2/10], Step [1341/3567], Loss: 2.9446


Epoch 2/10:  38%|███▊      | 1351/3568 [14:20<22:53,  1.61it/s]

Epoch [2/10], Step [1351/3567], Loss: 2.9446


Epoch 2/10:  38%|███▊      | 1361/3568 [14:26<22:24,  1.64it/s]

Epoch [2/10], Step [1361/3567], Loss: 2.9446


Epoch 2/10:  38%|███▊      | 1371/3568 [14:32<22:09,  1.65it/s]

Epoch [2/10], Step [1371/3567], Loss: 2.9446


Epoch 2/10:  39%|███▊      | 1381/3568 [14:38<22:23,  1.63it/s]

Epoch [2/10], Step [1381/3567], Loss: 2.9446


Epoch 2/10:  39%|███▉      | 1391/3568 [14:44<22:26,  1.62it/s]

Epoch [2/10], Step [1391/3567], Loss: 2.9446


Epoch 2/10:  39%|███▉      | 1401/3568 [14:50<22:14,  1.62it/s]

Epoch [2/10], Step [1401/3567], Loss: 2.9446


Epoch 2/10:  40%|███▉      | 1411/3568 [14:57<21:57,  1.64it/s]

Epoch [2/10], Step [1411/3567], Loss: 2.9446


Epoch 2/10:  40%|███▉      | 1421/3568 [15:03<21:53,  1.63it/s]

Epoch [2/10], Step [1421/3567], Loss: 2.9446


Epoch 2/10:  40%|████      | 1431/3568 [15:09<21:34,  1.65it/s]

Epoch [2/10], Step [1431/3567], Loss: 2.9446


Epoch 2/10:  40%|████      | 1441/3568 [15:15<25:18,  1.40it/s]

Epoch [2/10], Step [1441/3567], Loss: 2.9446


Epoch 2/10:  41%|████      | 1451/3568 [15:23<28:23,  1.24it/s]

Epoch [2/10], Step [1451/3567], Loss: 2.9446


Epoch 2/10:  41%|████      | 1461/3568 [15:31<28:26,  1.23it/s]

Epoch [2/10], Step [1461/3567], Loss: 2.9446


Epoch 2/10:  41%|████      | 1471/3568 [15:40<28:35,  1.22it/s]

Epoch [2/10], Step [1471/3567], Loss: 2.9446


Epoch 2/10:  42%|████▏     | 1481/3568 [15:48<27:51,  1.25it/s]

Epoch [2/10], Step [1481/3567], Loss: 2.9446


Epoch 2/10:  42%|████▏     | 1491/3568 [15:54<21:46,  1.59it/s]

Epoch [2/10], Step [1491/3567], Loss: 2.9446


Epoch 2/10:  42%|████▏     | 1501/3568 [16:01<23:09,  1.49it/s]

Epoch [2/10], Step [1501/3567], Loss: 2.9446


Epoch 2/10:  42%|████▏     | 1511/3568 [16:07<21:25,  1.60it/s]

Epoch [2/10], Step [1511/3567], Loss: 2.9446


Epoch 2/10:  43%|████▎     | 1521/3568 [16:13<20:40,  1.65it/s]

Epoch [2/10], Step [1521/3567], Loss: 2.9446


Epoch 2/10:  43%|████▎     | 1531/3568 [16:19<20:54,  1.62it/s]

Epoch [2/10], Step [1531/3567], Loss: 2.9446


Epoch 2/10:  43%|████▎     | 1541/3568 [16:25<20:40,  1.63it/s]

Epoch [2/10], Step [1541/3567], Loss: 2.9446


Epoch 2/10:  43%|████▎     | 1551/3568 [16:31<20:44,  1.62it/s]

Epoch [2/10], Step [1551/3567], Loss: 2.9446


Epoch 2/10:  44%|████▍     | 1561/3568 [16:37<20:37,  1.62it/s]

Epoch [2/10], Step [1561/3567], Loss: 2.9446


Epoch 2/10:  44%|████▍     | 1571/3568 [16:43<20:07,  1.65it/s]

Epoch [2/10], Step [1571/3567], Loss: 2.9446


Epoch 2/10:  44%|████▍     | 1581/3568 [16:50<20:18,  1.63it/s]

Epoch [2/10], Step [1581/3567], Loss: 2.9446


Epoch 2/10:  45%|████▍     | 1591/3568 [16:56<20:20,  1.62it/s]

Epoch [2/10], Step [1591/3567], Loss: 2.9446


Epoch 2/10:  45%|████▍     | 1601/3568 [17:02<19:57,  1.64it/s]

Epoch [2/10], Step [1601/3567], Loss: 2.9446


Epoch 2/10:  45%|████▌     | 1611/3568 [17:08<19:41,  1.66it/s]

Epoch [2/10], Step [1611/3567], Loss: 2.9446


Epoch 2/10:  45%|████▌     | 1621/3568 [17:14<19:32,  1.66it/s]

Epoch [2/10], Step [1621/3567], Loss: 2.9446


Epoch 2/10:  46%|████▌     | 1631/3568 [17:20<19:44,  1.63it/s]

Epoch [2/10], Step [1631/3567], Loss: 2.9446


Epoch 2/10:  46%|████▌     | 1641/3568 [17:27<23:04,  1.39it/s]

Epoch [2/10], Step [1641/3567], Loss: 2.9446


Epoch 2/10:  46%|████▋     | 1651/3568 [17:35<26:25,  1.21it/s]

Epoch [2/10], Step [1651/3567], Loss: 2.9446


Epoch 2/10:  47%|████▋     | 1661/3568 [17:43<25:32,  1.24it/s]

Epoch [2/10], Step [1661/3567], Loss: 2.9446


Epoch 2/10:  47%|████▋     | 1671/3568 [17:51<25:38,  1.23it/s]

Epoch [2/10], Step [1671/3567], Loss: 2.9446


Epoch 2/10:  47%|████▋     | 1681/3568 [17:59<25:36,  1.23it/s]

Epoch [2/10], Step [1681/3567], Loss: 2.9446


Epoch 2/10:  47%|████▋     | 1691/3568 [18:06<19:19,  1.62it/s]

Epoch [2/10], Step [1691/3567], Loss: 2.9446


Epoch 2/10:  48%|████▊     | 1701/3568 [18:12<18:58,  1.64it/s]

Epoch [2/10], Step [1701/3567], Loss: 2.9446


Epoch 2/10:  48%|████▊     | 1711/3568 [18:18<18:51,  1.64it/s]

Epoch [2/10], Step [1711/3567], Loss: 2.9446


Epoch 2/10:  48%|████▊     | 1721/3568 [18:24<18:52,  1.63it/s]

Epoch [2/10], Step [1721/3567], Loss: 2.9446


Epoch 2/10:  49%|████▊     | 1731/3568 [18:30<19:01,  1.61it/s]

Epoch [2/10], Step [1731/3567], Loss: 2.9446


Epoch 2/10:  49%|████▉     | 1741/3568 [18:37<18:29,  1.65it/s]

Epoch [2/10], Step [1741/3567], Loss: 2.9446


Epoch 2/10:  49%|████▉     | 1751/3568 [18:43<19:13,  1.58it/s]

Epoch [2/10], Step [1751/3567], Loss: 2.9446


Epoch 2/10:  49%|████▉     | 1761/3568 [18:49<18:20,  1.64it/s]

Epoch [2/10], Step [1761/3567], Loss: 2.9446


Epoch 2/10:  50%|████▉     | 1771/3568 [18:55<18:15,  1.64it/s]

Epoch [2/10], Step [1771/3567], Loss: 2.9446


Epoch 2/10:  50%|████▉     | 1781/3568 [19:01<18:11,  1.64it/s]

Epoch [2/10], Step [1781/3567], Loss: 2.9446


Epoch 2/10:  50%|█████     | 1791/3568 [19:07<17:53,  1.66it/s]

Epoch [2/10], Step [1791/3567], Loss: 2.9446


Epoch 2/10:  50%|█████     | 1801/3568 [19:13<18:03,  1.63it/s]

Epoch [2/10], Step [1801/3567], Loss: 2.9446


Epoch 2/10:  51%|█████     | 1811/3568 [19:20<17:55,  1.63it/s]

Epoch [2/10], Step [1811/3567], Loss: 2.9446


Epoch 2/10:  51%|█████     | 1821/3568 [19:26<17:53,  1.63it/s]

Epoch [2/10], Step [1821/3567], Loss: 2.9446


Epoch 2/10:  51%|█████▏    | 1831/3568 [19:32<17:22,  1.67it/s]

Epoch [2/10], Step [1831/3567], Loss: 2.9446


Epoch 2/10:  52%|█████▏    | 1841/3568 [19:38<17:27,  1.65it/s]

Epoch [2/10], Step [1841/3567], Loss: 2.9446


Epoch 2/10:  52%|█████▏    | 1851/3568 [19:44<17:10,  1.67it/s]

Epoch [2/10], Step [1851/3567], Loss: 2.9446


Epoch 2/10:  52%|█████▏    | 1861/3568 [19:50<17:40,  1.61it/s]

Epoch [2/10], Step [1861/3567], Loss: 2.9446


Epoch 2/10:  52%|█████▏    | 1871/3568 [19:56<17:06,  1.65it/s]

Epoch [2/10], Step [1871/3567], Loss: 2.9446


Epoch 2/10:  53%|█████▎    | 1881/3568 [20:02<17:03,  1.65it/s]

Epoch [2/10], Step [1881/3567], Loss: 2.9446


Epoch 2/10:  53%|█████▎    | 1891/3568 [20:08<16:56,  1.65it/s]

Epoch [2/10], Step [1891/3567], Loss: 2.9446


Epoch 2/10:  53%|█████▎    | 1901/3568 [20:14<16:44,  1.66it/s]

Epoch [2/10], Step [1901/3567], Loss: 2.9446


Epoch 2/10:  54%|█████▎    | 1911/3568 [20:20<16:58,  1.63it/s]

Epoch [2/10], Step [1911/3567], Loss: 2.9446


Epoch 2/10:  54%|█████▍    | 1921/3568 [20:26<16:34,  1.66it/s]

Epoch [2/10], Step [1921/3567], Loss: 2.9446


Epoch 2/10:  54%|█████▍    | 1931/3568 [20:33<16:38,  1.64it/s]

Epoch [2/10], Step [1931/3567], Loss: 2.9446


Epoch 2/10:  54%|█████▍    | 1941/3568 [20:39<17:05,  1.59it/s]

Epoch [2/10], Step [1941/3567], Loss: 2.9446


Epoch 2/10:  55%|█████▍    | 1951/3568 [20:45<16:34,  1.63it/s]

Epoch [2/10], Step [1951/3567], Loss: 2.9446


Epoch 2/10:  55%|█████▍    | 1961/3568 [20:51<16:22,  1.63it/s]

Epoch [2/10], Step [1961/3567], Loss: 2.9446


Epoch 2/10:  55%|█████▌    | 1971/3568 [20:58<16:19,  1.63it/s]

Epoch [2/10], Step [1971/3567], Loss: 2.9446


Epoch 2/10:  56%|█████▌    | 1981/3568 [21:04<16:09,  1.64it/s]

Epoch [2/10], Step [1981/3567], Loss: 2.9446


Epoch 2/10:  56%|█████▌    | 1991/3568 [21:10<16:04,  1.63it/s]

Epoch [2/10], Step [1991/3567], Loss: 2.9446


Epoch 2/10:  56%|█████▌    | 2001/3568 [21:16<16:04,  1.62it/s]

Epoch [2/10], Step [2001/3567], Loss: 2.9446


Epoch 2/10:  56%|█████▋    | 2011/3568 [21:22<16:01,  1.62it/s]

Epoch [2/10], Step [2011/3567], Loss: 2.9446


Epoch 2/10:  57%|█████▋    | 2021/3568 [21:28<15:41,  1.64it/s]

Epoch [2/10], Step [2021/3567], Loss: 2.9446


Epoch 2/10:  57%|█████▋    | 2031/3568 [21:35<15:40,  1.63it/s]

Epoch [2/10], Step [2031/3567], Loss: 2.9446


Epoch 2/10:  57%|█████▋    | 2041/3568 [21:42<20:35,  1.24it/s]

Epoch [2/10], Step [2041/3567], Loss: 2.9446


Epoch 2/10:  57%|█████▋    | 2051/3568 [21:50<21:02,  1.20it/s]

Epoch [2/10], Step [2051/3567], Loss: 2.9446


Epoch 2/10:  58%|█████▊    | 2061/3568 [21:59<20:52,  1.20it/s]

Epoch [2/10], Step [2061/3567], Loss: 2.9446


Epoch 2/10:  58%|█████▊    | 2071/3568 [22:07<19:54,  1.25it/s]

Epoch [2/10], Step [2071/3567], Loss: 2.9446


Epoch 2/10:  58%|█████▊    | 2081/3568 [22:15<20:13,  1.23it/s]

Epoch [2/10], Step [2081/3567], Loss: 2.9446


Epoch 2/10:  59%|█████▊    | 2091/3568 [22:21<15:24,  1.60it/s]

Epoch [2/10], Step [2091/3567], Loss: 2.9446


Epoch 2/10:  59%|█████▉    | 2101/3568 [22:27<15:00,  1.63it/s]

Epoch [2/10], Step [2101/3567], Loss: 2.9446


Epoch 2/10:  59%|█████▉    | 2111/3568 [22:34<15:05,  1.61it/s]

Epoch [2/10], Step [2111/3567], Loss: 2.9446


Epoch 2/10:  59%|█████▉    | 2121/3568 [22:40<14:42,  1.64it/s]

Epoch [2/10], Step [2121/3567], Loss: 2.9446


Epoch 2/10:  60%|█████▉    | 2131/3568 [22:46<14:44,  1.62it/s]

Epoch [2/10], Step [2131/3567], Loss: 2.9446


Epoch 2/10:  60%|██████    | 2141/3568 [22:52<14:44,  1.61it/s]

Epoch [2/10], Step [2141/3567], Loss: 2.9446


Epoch 2/10:  60%|██████    | 2151/3568 [22:58<14:32,  1.62it/s]

Epoch [2/10], Step [2151/3567], Loss: 2.9446


Epoch 2/10:  61%|██████    | 2161/3568 [23:05<14:26,  1.62it/s]

Epoch [2/10], Step [2161/3567], Loss: 2.9446


Epoch 2/10:  61%|██████    | 2171/3568 [23:11<14:09,  1.64it/s]

Epoch [2/10], Step [2171/3567], Loss: 2.9446


Epoch 2/10:  61%|██████    | 2181/3568 [23:17<14:06,  1.64it/s]

Epoch [2/10], Step [2181/3567], Loss: 2.9446


Epoch 2/10:  61%|██████▏   | 2191/3568 [23:23<14:19,  1.60it/s]

Epoch [2/10], Step [2191/3567], Loss: 2.9446


Epoch 2/10:  62%|██████▏   | 2201/3568 [23:29<14:27,  1.58it/s]

Epoch [2/10], Step [2201/3567], Loss: 2.9446


Epoch 2/10:  62%|██████▏   | 2211/3568 [23:35<13:47,  1.64it/s]

Epoch [2/10], Step [2211/3567], Loss: 2.9446


Epoch 2/10:  62%|██████▏   | 2221/3568 [23:41<13:41,  1.64it/s]

Epoch [2/10], Step [2221/3567], Loss: 2.9446


Epoch 2/10:  63%|██████▎   | 2231/3568 [23:48<13:37,  1.64it/s]

Epoch [2/10], Step [2231/3567], Loss: 2.9446


Epoch 2/10:  63%|██████▎   | 2241/3568 [23:55<16:44,  1.32it/s]

Epoch [2/10], Step [2241/3567], Loss: 2.9446


Epoch 2/10:  63%|██████▎   | 2251/3568 [24:03<18:10,  1.21it/s]

Epoch [2/10], Step [2251/3567], Loss: 2.9446


Epoch 2/10:  63%|██████▎   | 2261/3568 [24:11<18:05,  1.20it/s]

Epoch [2/10], Step [2261/3567], Loss: 2.9446


Epoch 2/10:  64%|██████▎   | 2271/3568 [24:19<17:21,  1.25it/s]

Epoch [2/10], Step [2271/3567], Loss: 2.9446


Epoch 2/10:  64%|██████▍   | 2281/3568 [24:27<17:30,  1.23it/s]

Epoch [2/10], Step [2281/3567], Loss: 2.9446


Epoch 2/10:  64%|██████▍   | 2291/3568 [24:34<13:22,  1.59it/s]

Epoch [2/10], Step [2291/3567], Loss: 2.9446


Epoch 2/10:  64%|██████▍   | 2301/3568 [24:40<13:01,  1.62it/s]

Epoch [2/10], Step [2301/3567], Loss: 2.9446


Epoch 2/10:  65%|██████▍   | 2311/3568 [24:46<12:49,  1.63it/s]

Epoch [2/10], Step [2311/3567], Loss: 2.9446


Epoch 2/10:  65%|██████▌   | 2321/3568 [24:53<12:49,  1.62it/s]

Epoch [2/10], Step [2321/3567], Loss: 2.9446


Epoch 2/10:  65%|██████▌   | 2331/3568 [24:59<12:48,  1.61it/s]

Epoch [2/10], Step [2331/3567], Loss: 2.9446


Epoch 2/10:  66%|██████▌   | 2341/3568 [25:05<12:34,  1.63it/s]

Epoch [2/10], Step [2341/3567], Loss: 2.9446


Epoch 2/10:  66%|██████▌   | 2351/3568 [25:11<12:22,  1.64it/s]

Epoch [2/10], Step [2351/3567], Loss: 2.9446


Epoch 2/10:  66%|██████▌   | 2361/3568 [25:17<12:26,  1.62it/s]

Epoch [2/10], Step [2361/3567], Loss: 2.9446


Epoch 2/10:  66%|██████▋   | 2371/3568 [25:23<12:04,  1.65it/s]

Epoch [2/10], Step [2371/3567], Loss: 2.9446


Epoch 2/10:  67%|██████▋   | 2381/3568 [25:29<12:08,  1.63it/s]

Epoch [2/10], Step [2381/3567], Loss: 2.9446


Epoch 2/10:  67%|██████▋   | 2391/3568 [25:35<11:52,  1.65it/s]

Epoch [2/10], Step [2391/3567], Loss: 2.9446


Epoch 2/10:  67%|██████▋   | 2401/3568 [25:41<11:47,  1.65it/s]

Epoch [2/10], Step [2401/3567], Loss: 2.9446


Epoch 2/10:  68%|██████▊   | 2411/3568 [25:48<11:46,  1.64it/s]

Epoch [2/10], Step [2411/3567], Loss: 2.9446


Epoch 2/10:  68%|██████▊   | 2421/3568 [25:54<12:12,  1.57it/s]

Epoch [2/10], Step [2421/3567], Loss: 2.9446


Epoch 2/10:  68%|██████▊   | 2431/3568 [26:00<11:40,  1.62it/s]

Epoch [2/10], Step [2431/3567], Loss: 2.9446


Epoch 2/10:  68%|██████▊   | 2441/3568 [26:06<11:29,  1.63it/s]

Epoch [2/10], Step [2441/3567], Loss: 2.9446


Epoch 2/10:  69%|██████▊   | 2451/3568 [26:12<11:24,  1.63it/s]

Epoch [2/10], Step [2451/3567], Loss: 2.9446


Epoch 2/10:  69%|██████▉   | 2461/3568 [26:19<12:25,  1.48it/s]

Epoch [2/10], Step [2461/3567], Loss: 2.9446


Epoch 2/10:  69%|██████▉   | 2471/3568 [26:27<15:01,  1.22it/s]

Epoch [2/10], Step [2471/3567], Loss: 2.9446


Epoch 2/10:  70%|██████▉   | 2481/3568 [26:35<14:45,  1.23it/s]

Epoch [2/10], Step [2481/3567], Loss: 2.9446


Epoch 2/10:  70%|██████▉   | 2491/3568 [26:43<14:29,  1.24it/s]

Epoch [2/10], Step [2491/3567], Loss: 2.9446


Epoch 2/10:  70%|███████   | 2501/3568 [26:51<14:31,  1.22it/s]

Epoch [2/10], Step [2501/3567], Loss: 2.9446


Epoch 2/10:  70%|███████   | 2511/3568 [26:59<11:58,  1.47it/s]

Epoch [2/10], Step [2511/3567], Loss: 2.9446


Epoch 2/10:  71%|███████   | 2521/3568 [27:05<10:40,  1.63it/s]

Epoch [2/10], Step [2521/3567], Loss: 2.9446


Epoch 2/10:  71%|███████   | 2531/3568 [27:11<10:37,  1.63it/s]

Epoch [2/10], Step [2531/3567], Loss: 2.9446


Epoch 2/10:  71%|███████   | 2541/3568 [27:17<10:28,  1.63it/s]

Epoch [2/10], Step [2541/3567], Loss: 2.9446


Epoch 2/10:  71%|███████▏  | 2551/3568 [27:23<10:24,  1.63it/s]

Epoch [2/10], Step [2551/3567], Loss: 2.9446


Epoch 2/10:  72%|███████▏  | 2561/3568 [27:29<10:31,  1.59it/s]

Epoch [2/10], Step [2561/3567], Loss: 2.9446


Epoch 2/10:  72%|███████▏  | 2571/3568 [27:36<10:40,  1.56it/s]

Epoch [2/10], Step [2571/3567], Loss: 2.9446


Epoch 2/10:  72%|███████▏  | 2581/3568 [27:42<10:01,  1.64it/s]

Epoch [2/10], Step [2581/3567], Loss: 2.9446


Epoch 2/10:  73%|███████▎  | 2591/3568 [27:48<10:02,  1.62it/s]

Epoch [2/10], Step [2591/3567], Loss: 2.9446


Epoch 2/10:  73%|███████▎  | 2601/3568 [27:54<09:49,  1.64it/s]

Epoch [2/10], Step [2601/3567], Loss: 2.9446


Epoch 2/10:  73%|███████▎  | 2611/3568 [28:00<10:18,  1.55it/s]

Epoch [2/10], Step [2611/3567], Loss: 2.9446


Epoch 2/10:  73%|███████▎  | 2621/3568 [28:07<10:02,  1.57it/s]

Epoch [2/10], Step [2621/3567], Loss: 2.9446


Epoch 2/10:  74%|███████▎  | 2631/3568 [28:13<11:02,  1.41it/s]

Epoch [2/10], Step [2631/3567], Loss: 2.9446


Epoch 2/10:  74%|███████▍  | 2641/3568 [28:25<17:55,  1.16s/it]

Epoch [2/10], Step [2641/3567], Loss: 2.9446


Epoch 2/10:  74%|███████▍  | 2651/3568 [28:35<12:47,  1.19it/s]

Epoch [2/10], Step [2651/3567], Loss: 2.9446


Epoch 2/10:  75%|███████▍  | 2661/3568 [28:41<09:18,  1.63it/s]

Epoch [2/10], Step [2661/3567], Loss: 2.9446


Epoch 2/10:  75%|███████▍  | 2671/3568 [28:47<09:03,  1.65it/s]

Epoch [2/10], Step [2671/3567], Loss: 2.9446


Epoch 2/10:  75%|███████▌  | 2681/3568 [28:53<09:17,  1.59it/s]

Epoch [2/10], Step [2681/3567], Loss: 2.9446


Epoch 2/10:  75%|███████▌  | 2691/3568 [28:59<08:52,  1.65it/s]

Epoch [2/10], Step [2691/3567], Loss: 2.9446


Epoch 2/10:  76%|███████▌  | 2701/3568 [29:05<08:48,  1.64it/s]

Epoch [2/10], Step [2701/3567], Loss: 2.9446


Epoch 2/10:  76%|███████▌  | 2711/3568 [29:11<08:38,  1.65it/s]

Epoch [2/10], Step [2711/3567], Loss: 2.9446


Epoch 2/10:  76%|███████▋  | 2721/3568 [29:18<08:35,  1.64it/s]

Epoch [2/10], Step [2721/3567], Loss: 2.9446


Epoch 2/10:  77%|███████▋  | 2731/3568 [29:24<08:36,  1.62it/s]

Epoch [2/10], Step [2731/3567], Loss: 2.9446


Epoch 2/10:  77%|███████▋  | 2741/3568 [29:30<08:24,  1.64it/s]

Epoch [2/10], Step [2741/3567], Loss: 2.9446


Epoch 2/10:  77%|███████▋  | 2751/3568 [29:36<08:24,  1.62it/s]

Epoch [2/10], Step [2751/3567], Loss: 2.9446


Epoch 2/10:  77%|███████▋  | 2761/3568 [29:42<08:22,  1.61it/s]

Epoch [2/10], Step [2761/3567], Loss: 2.9446


Epoch 2/10:  78%|███████▊  | 2771/3568 [29:48<08:07,  1.63it/s]

Epoch [2/10], Step [2771/3567], Loss: 2.9446


Epoch 2/10:  78%|███████▊  | 2781/3568 [29:54<08:00,  1.64it/s]

Epoch [2/10], Step [2781/3567], Loss: 2.9446


Epoch 2/10:  78%|███████▊  | 2791/3568 [30:01<08:01,  1.61it/s]

Epoch [2/10], Step [2791/3567], Loss: 2.9446


Epoch 2/10:  79%|███████▊  | 2801/3568 [30:07<07:58,  1.60it/s]

Epoch [2/10], Step [2801/3567], Loss: 2.9446


Epoch 2/10:  79%|███████▉  | 2811/3568 [30:13<08:14,  1.53it/s]

Epoch [2/10], Step [2811/3567], Loss: 2.9446


Epoch 2/10:  79%|███████▉  | 2821/3568 [30:19<07:38,  1.63it/s]

Epoch [2/10], Step [2821/3567], Loss: 2.9446


Epoch 2/10:  79%|███████▉  | 2831/3568 [30:26<07:38,  1.61it/s]

Epoch [2/10], Step [2831/3567], Loss: 2.9446


Epoch 2/10:  80%|███████▉  | 2841/3568 [30:33<09:32,  1.27it/s]

Epoch [2/10], Step [2841/3567], Loss: 2.9446


Epoch 2/10:  80%|███████▉  | 2851/3568 [30:41<09:37,  1.24it/s]

Epoch [2/10], Step [2851/3567], Loss: 2.9446


Epoch 2/10:  80%|████████  | 2861/3568 [30:49<09:49,  1.20it/s]

Epoch [2/10], Step [2861/3567], Loss: 2.9446


Epoch 2/10:  80%|████████  | 2871/3568 [30:57<09:41,  1.20it/s]

Epoch [2/10], Step [2871/3567], Loss: 2.9446


Epoch 2/10:  81%|████████  | 2881/3568 [31:06<09:16,  1.23it/s]

Epoch [2/10], Step [2881/3567], Loss: 2.9446


Epoch 2/10:  81%|████████  | 2891/3568 [31:12<07:04,  1.60it/s]

Epoch [2/10], Step [2891/3567], Loss: 2.9446


Epoch 2/10:  81%|████████▏ | 2901/3568 [31:18<06:49,  1.63it/s]

Epoch [2/10], Step [2901/3567], Loss: 2.9446


Epoch 2/10:  82%|████████▏ | 2911/3568 [31:24<06:43,  1.63it/s]

Epoch [2/10], Step [2911/3567], Loss: 2.9446


Epoch 2/10:  82%|████████▏ | 2921/3568 [31:30<06:38,  1.62it/s]

Epoch [2/10], Step [2921/3567], Loss: 2.9446


Epoch 2/10:  82%|████████▏ | 2931/3568 [31:36<06:25,  1.65it/s]

Epoch [2/10], Step [2931/3567], Loss: 2.9446


Epoch 2/10:  82%|████████▏ | 2941/3568 [31:43<06:32,  1.60it/s]

Epoch [2/10], Step [2941/3567], Loss: 2.9446


Epoch 2/10:  83%|████████▎ | 2951/3568 [31:49<06:18,  1.63it/s]

Epoch [2/10], Step [2951/3567], Loss: 2.9446


Epoch 2/10:  83%|████████▎ | 2961/3568 [31:55<06:18,  1.60it/s]

Epoch [2/10], Step [2961/3567], Loss: 2.9446


Epoch 2/10:  83%|████████▎ | 2971/3568 [32:01<06:05,  1.63it/s]

Epoch [2/10], Step [2971/3567], Loss: 2.9446


Epoch 2/10:  84%|████████▎ | 2981/3568 [32:07<05:53,  1.66it/s]

Epoch [2/10], Step [2981/3567], Loss: 2.9446


Epoch 2/10:  84%|████████▍ | 2991/3568 [32:13<05:50,  1.65it/s]

Epoch [2/10], Step [2991/3567], Loss: 2.9446


Epoch 2/10:  84%|████████▍ | 3001/3568 [32:20<05:42,  1.65it/s]

Epoch [2/10], Step [3001/3567], Loss: 2.9446


Epoch 2/10:  84%|████████▍ | 3011/3568 [32:26<05:45,  1.61it/s]

Epoch [2/10], Step [3011/3567], Loss: 2.9446


Epoch 2/10:  85%|████████▍ | 3021/3568 [32:32<05:50,  1.56it/s]

Epoch [2/10], Step [3021/3567], Loss: 2.9446


Epoch 2/10:  85%|████████▍ | 3031/3568 [32:38<05:30,  1.62it/s]

Epoch [2/10], Step [3031/3567], Loss: 2.9446


Epoch 2/10:  85%|████████▌ | 3041/3568 [32:44<05:31,  1.59it/s]

Epoch [2/10], Step [3041/3567], Loss: 2.9446


Epoch 2/10:  86%|████████▌ | 3051/3568 [32:51<05:20,  1.61it/s]

Epoch [2/10], Step [3051/3567], Loss: 2.9446


Epoch 2/10:  86%|████████▌ | 3061/3568 [32:57<05:15,  1.61it/s]

Epoch [2/10], Step [3061/3567], Loss: 2.9446


Epoch 2/10:  86%|████████▌ | 3071/3568 [33:03<05:08,  1.61it/s]

Epoch [2/10], Step [3071/3567], Loss: 2.9446


Epoch 2/10:  86%|████████▋ | 3081/3568 [33:09<04:57,  1.64it/s]

Epoch [2/10], Step [3081/3567], Loss: 2.9446


Epoch 2/10:  87%|████████▋ | 3091/3568 [33:15<04:51,  1.64it/s]

Epoch [2/10], Step [3091/3567], Loss: 2.9446


Epoch 2/10:  87%|████████▋ | 3101/3568 [33:22<04:47,  1.62it/s]

Epoch [2/10], Step [3101/3567], Loss: 2.9446


Epoch 2/10:  87%|████████▋ | 3111/3568 [33:28<04:39,  1.63it/s]

Epoch [2/10], Step [3111/3567], Loss: 2.9446


Epoch 2/10:  87%|████████▋ | 3121/3568 [33:34<04:36,  1.62it/s]

Epoch [2/10], Step [3121/3567], Loss: 2.9446


Epoch 2/10:  88%|████████▊ | 3131/3568 [33:40<04:24,  1.65it/s]

Epoch [2/10], Step [3131/3567], Loss: 2.9446


Epoch 2/10:  88%|████████▊ | 3141/3568 [33:46<04:21,  1.64it/s]

Epoch [2/10], Step [3141/3567], Loss: 2.9446


Epoch 2/10:  88%|████████▊ | 3151/3568 [33:52<04:15,  1.63it/s]

Epoch [2/10], Step [3151/3567], Loss: 2.9446


Epoch 2/10:  89%|████████▊ | 3161/3568 [33:59<04:27,  1.52it/s]

Epoch [2/10], Step [3161/3567], Loss: 2.9446


Epoch 2/10:  89%|████████▉ | 3171/3568 [34:05<04:11,  1.58it/s]

Epoch [2/10], Step [3171/3567], Loss: 2.9446


Epoch 2/10:  89%|████████▉ | 3181/3568 [34:11<03:59,  1.62it/s]

Epoch [2/10], Step [3181/3567], Loss: 2.9446


Epoch 2/10:  89%|████████▉ | 3191/3568 [34:17<03:50,  1.63it/s]

Epoch [2/10], Step [3191/3567], Loss: 2.9446


Epoch 2/10:  90%|████████▉ | 3201/3568 [34:24<03:46,  1.62it/s]

Epoch [2/10], Step [3201/3567], Loss: 2.9446


Epoch 2/10:  90%|████████▉ | 3211/3568 [34:30<03:36,  1.65it/s]

Epoch [2/10], Step [3211/3567], Loss: 2.9446


Epoch 2/10:  90%|█████████ | 3221/3568 [34:36<03:35,  1.61it/s]

Epoch [2/10], Step [3221/3567], Loss: 2.9446


Epoch 2/10:  91%|█████████ | 3231/3568 [34:42<03:23,  1.65it/s]

Epoch [2/10], Step [3231/3567], Loss: 2.9446


Epoch 2/10:  91%|█████████ | 3241/3568 [34:48<03:20,  1.63it/s]

Epoch [2/10], Step [3241/3567], Loss: 2.9446


Epoch 2/10:  91%|█████████ | 3251/3568 [34:54<03:17,  1.61it/s]

Epoch [2/10], Step [3251/3567], Loss: 2.9446


Epoch 2/10:  91%|█████████▏| 3261/3568 [35:00<03:09,  1.62it/s]

Epoch [2/10], Step [3261/3567], Loss: 2.9446


Epoch 2/10:  92%|█████████▏| 3271/3568 [35:07<03:00,  1.64it/s]

Epoch [2/10], Step [3271/3567], Loss: 2.9446


Epoch 2/10:  92%|█████████▏| 3281/3568 [35:13<02:55,  1.63it/s]

Epoch [2/10], Step [3281/3567], Loss: 2.9446


Epoch 2/10:  92%|█████████▏| 3291/3568 [35:19<02:50,  1.63it/s]

Epoch [2/10], Step [3291/3567], Loss: 2.9446


Epoch 2/10:  93%|█████████▎| 3301/3568 [35:25<02:46,  1.60it/s]

Epoch [2/10], Step [3301/3567], Loss: 2.9446


Epoch 2/10:  93%|█████████▎| 3311/3568 [35:31<02:40,  1.60it/s]

Epoch [2/10], Step [3311/3567], Loss: 2.9446


Epoch 2/10:  93%|█████████▎| 3321/3568 [35:38<02:32,  1.62it/s]

Epoch [2/10], Step [3321/3567], Loss: 2.9446


Epoch 2/10:  93%|█████████▎| 3331/3568 [35:44<02:29,  1.59it/s]

Epoch [2/10], Step [3331/3567], Loss: 2.9446


Epoch 2/10:  94%|█████████▎| 3341/3568 [35:50<02:19,  1.63it/s]

Epoch [2/10], Step [3341/3567], Loss: 2.9446


Epoch 2/10:  94%|█████████▍| 3351/3568 [35:56<02:13,  1.63it/s]

Epoch [2/10], Step [3351/3567], Loss: 2.9446


Epoch 2/10:  94%|█████████▍| 3361/3568 [36:02<02:07,  1.62it/s]

Epoch [2/10], Step [3361/3567], Loss: 2.9446


Epoch 2/10:  94%|█████████▍| 3371/3568 [36:10<02:38,  1.24it/s]

Epoch [2/10], Step [3371/3567], Loss: 2.9446


Epoch 2/10:  95%|█████████▍| 3381/3568 [36:18<02:32,  1.23it/s]

Epoch [2/10], Step [3381/3567], Loss: 2.9446


Epoch 2/10:  95%|█████████▌| 3391/3568 [36:26<02:25,  1.22it/s]

Epoch [2/10], Step [3391/3567], Loss: 2.9446


Epoch 2/10:  95%|█████████▌| 3401/3568 [36:34<02:17,  1.22it/s]

Epoch [2/10], Step [3401/3567], Loss: 2.9446


Epoch 2/10:  96%|█████████▌| 3411/3568 [36:43<02:08,  1.22it/s]

Epoch [2/10], Step [3411/3567], Loss: 2.9446


Epoch 2/10:  96%|█████████▌| 3421/3568 [36:49<01:31,  1.61it/s]

Epoch [2/10], Step [3421/3567], Loss: 2.9446


Epoch 2/10:  96%|█████████▌| 3431/3568 [36:55<01:24,  1.63it/s]

Epoch [2/10], Step [3431/3567], Loss: 2.9446


Epoch 2/10:  96%|█████████▋| 3441/3568 [37:01<01:17,  1.64it/s]

Epoch [2/10], Step [3441/3567], Loss: 2.9446


Epoch 2/10:  97%|█████████▋| 3451/3568 [37:07<01:11,  1.64it/s]

Epoch [2/10], Step [3451/3567], Loss: 2.9446


Epoch 2/10:  97%|█████████▋| 3461/3568 [37:13<01:04,  1.65it/s]

Epoch [2/10], Step [3461/3567], Loss: 2.9446


Epoch 2/10:  97%|█████████▋| 3471/3568 [37:20<01:00,  1.60it/s]

Epoch [2/10], Step [3471/3567], Loss: 2.9446


Epoch 2/10:  98%|█████████▊| 3481/3568 [37:26<00:53,  1.63it/s]

Epoch [2/10], Step [3481/3567], Loss: 2.9446


Epoch 2/10:  98%|█████████▊| 3491/3568 [37:32<00:48,  1.60it/s]

Epoch [2/10], Step [3491/3567], Loss: 2.9446


Epoch 2/10:  98%|█████████▊| 3501/3568 [37:38<00:40,  1.64it/s]

Epoch [2/10], Step [3501/3567], Loss: 2.9446


Epoch 2/10:  98%|█████████▊| 3511/3568 [37:44<00:34,  1.65it/s]

Epoch [2/10], Step [3511/3567], Loss: 2.9446


Epoch 2/10:  99%|█████████▊| 3521/3568 [37:50<00:28,  1.64it/s]

Epoch [2/10], Step [3521/3567], Loss: 2.9446


Epoch 2/10:  99%|█████████▉| 3531/3568 [37:57<00:23,  1.61it/s]

Epoch [2/10], Step [3531/3567], Loss: 2.9446


Epoch 2/10:  99%|█████████▉| 3541/3568 [38:03<00:16,  1.63it/s]

Epoch [2/10], Step [3541/3567], Loss: 2.9446


Epoch 2/10: 100%|█████████▉| 3551/3568 [38:09<00:10,  1.63it/s]

Epoch [2/10], Step [3551/3567], Loss: 2.9446


Epoch 2/10: 100%|█████████▉| 3561/3568 [38:15<00:04,  1.60it/s]

Epoch [2/10], Step [3561/3567], Loss: 2.9446


Epoch 2/10: 100%|█████████▉| 3567/3568 [38:19<00:00,  1.55it/s]


Epoch [2/10] Average Loss: 2.9446


Epoch 3/10:   0%|          | 1/3568 [00:00<36:45,  1.62it/s]

Epoch [3/10], Step [1/3567], Loss: 2.9446


Epoch 3/10:   0%|          | 11/3568 [00:06<36:32,  1.62it/s]

Epoch [3/10], Step [11/3567], Loss: 2.9446


Epoch 3/10:   1%|          | 21/3568 [00:12<36:16,  1.63it/s]

Epoch [3/10], Step [21/3567], Loss: 2.9446


Epoch 3/10:   1%|          | 31/3568 [00:19<36:14,  1.63it/s]

Epoch [3/10], Step [31/3567], Loss: 2.9446


Epoch 3/10:   1%|          | 41/3568 [00:25<35:41,  1.65it/s]

Epoch [3/10], Step [41/3567], Loss: 2.9446


Epoch 3/10:   1%|▏         | 51/3568 [00:31<35:50,  1.64it/s]

Epoch [3/10], Step [51/3567], Loss: 2.9446


Epoch 3/10:   2%|▏         | 61/3568 [00:37<35:51,  1.63it/s]

Epoch [3/10], Step [61/3567], Loss: 2.9446


Epoch 3/10:   2%|▏         | 71/3568 [00:43<35:54,  1.62it/s]

Epoch [3/10], Step [71/3567], Loss: 2.9446


Epoch 3/10:   2%|▏         | 81/3568 [00:50<35:16,  1.65it/s]

Epoch [3/10], Step [81/3567], Loss: 2.9446


Epoch 3/10:   3%|▎         | 91/3568 [00:56<36:03,  1.61it/s]

Epoch [3/10], Step [91/3567], Loss: 2.9446


Epoch 3/10:   3%|▎         | 101/3568 [01:02<35:00,  1.65it/s]

Epoch [3/10], Step [101/3567], Loss: 2.9446


Epoch 3/10:   3%|▎         | 111/3568 [01:08<35:36,  1.62it/s]

Epoch [3/10], Step [111/3567], Loss: 2.9446


Epoch 3/10:   3%|▎         | 121/3568 [01:14<34:45,  1.65it/s]

Epoch [3/10], Step [121/3567], Loss: 2.9446


Epoch 3/10:   4%|▎         | 131/3568 [01:20<34:59,  1.64it/s]

Epoch [3/10], Step [131/3567], Loss: 2.9446


Epoch 3/10:   4%|▍         | 141/3568 [01:26<36:22,  1.57it/s]

Epoch [3/10], Step [141/3567], Loss: 2.9446


Epoch 3/10:   4%|▍         | 151/3568 [01:33<35:04,  1.62it/s]

Epoch [3/10], Step [151/3567], Loss: 2.9446


Epoch 3/10:   5%|▍         | 161/3568 [01:39<34:39,  1.64it/s]

Epoch [3/10], Step [161/3567], Loss: 2.9446


Epoch 3/10:   5%|▍         | 171/3568 [01:45<34:26,  1.64it/s]

Epoch [3/10], Step [171/3567], Loss: 2.9446


Epoch 3/10:   5%|▌         | 181/3568 [01:51<34:40,  1.63it/s]

Epoch [3/10], Step [181/3567], Loss: 2.9446


Epoch 3/10:   5%|▌         | 191/3568 [01:57<34:13,  1.64it/s]

Epoch [3/10], Step [191/3567], Loss: 2.9446


Epoch 3/10:   6%|▌         | 201/3568 [02:03<34:32,  1.62it/s]

Epoch [3/10], Step [201/3567], Loss: 2.9446


Epoch 3/10:   6%|▌         | 211/3568 [02:09<33:55,  1.65it/s]

Epoch [3/10], Step [211/3567], Loss: 2.9446


Epoch 3/10:   6%|▌         | 221/3568 [02:15<34:20,  1.62it/s]

Epoch [3/10], Step [221/3567], Loss: 2.9446


Epoch 3/10:   6%|▋         | 231/3568 [02:21<33:41,  1.65it/s]

Epoch [3/10], Step [231/3567], Loss: 2.9446


Epoch 3/10:   7%|▋         | 241/3568 [02:27<33:58,  1.63it/s]

Epoch [3/10], Step [241/3567], Loss: 2.9446


Epoch 3/10:   7%|▋         | 251/3568 [02:34<33:28,  1.65it/s]

Epoch [3/10], Step [251/3567], Loss: 2.9446


Epoch 3/10:   7%|▋         | 261/3568 [02:40<34:23,  1.60it/s]

Epoch [3/10], Step [261/3567], Loss: 2.9446


Epoch 3/10:   8%|▊         | 271/3568 [02:46<33:15,  1.65it/s]

Epoch [3/10], Step [271/3567], Loss: 2.9446


Epoch 3/10:   8%|▊         | 281/3568 [02:52<33:25,  1.64it/s]

Epoch [3/10], Step [281/3567], Loss: 2.9446


Epoch 3/10:   8%|▊         | 291/3568 [02:58<33:07,  1.65it/s]

Epoch [3/10], Step [291/3567], Loss: 2.9446


Epoch 3/10:   8%|▊         | 301/3568 [03:04<33:33,  1.62it/s]

Epoch [3/10], Step [301/3567], Loss: 2.9446


Epoch 3/10:   9%|▊         | 311/3568 [03:10<33:20,  1.63it/s]

Epoch [3/10], Step [311/3567], Loss: 2.9446


Epoch 3/10:   9%|▉         | 321/3568 [03:16<33:22,  1.62it/s]

Epoch [3/10], Step [321/3567], Loss: 2.9446


Epoch 3/10:   9%|▉         | 331/3568 [03:22<32:40,  1.65it/s]

Epoch [3/10], Step [331/3567], Loss: 2.9446


Epoch 3/10:  10%|▉         | 341/3568 [03:29<32:56,  1.63it/s]

Epoch [3/10], Step [341/3567], Loss: 2.9446


Epoch 3/10:  10%|▉         | 351/3568 [03:35<32:17,  1.66it/s]

Epoch [3/10], Step [351/3567], Loss: 2.9446


Epoch 3/10:  10%|█         | 361/3568 [03:41<33:23,  1.60it/s]

Epoch [3/10], Step [361/3567], Loss: 2.9446


Epoch 3/10:  10%|█         | 371/3568 [03:47<32:39,  1.63it/s]

Epoch [3/10], Step [371/3567], Loss: 2.9446


Epoch 3/10:  11%|█         | 381/3568 [03:53<32:03,  1.66it/s]

Epoch [3/10], Step [381/3567], Loss: 2.9446


Epoch 3/10:  11%|█         | 391/3568 [03:59<32:07,  1.65it/s]

Epoch [3/10], Step [391/3567], Loss: 2.9446


Epoch 3/10:  11%|█         | 401/3568 [04:05<32:09,  1.64it/s]

Epoch [3/10], Step [401/3567], Loss: 2.9446


Epoch 3/10:  12%|█▏        | 411/3568 [04:11<31:46,  1.66it/s]

Epoch [3/10], Step [411/3567], Loss: 2.9446


Epoch 3/10:  12%|█▏        | 421/3568 [04:18<32:28,  1.62it/s]

Epoch [3/10], Step [421/3567], Loss: 2.9446


Epoch 3/10:  12%|█▏        | 431/3568 [04:24<31:29,  1.66it/s]

Epoch [3/10], Step [431/3567], Loss: 2.9446


Epoch 3/10:  12%|█▏        | 441/3568 [04:32<42:47,  1.22it/s]

Epoch [3/10], Step [441/3567], Loss: 2.9446


Epoch 3/10:  13%|█▎        | 451/3568 [04:40<43:30,  1.19it/s]

Epoch [3/10], Step [451/3567], Loss: 2.9446


Epoch 3/10:  13%|█▎        | 461/3568 [04:48<42:32,  1.22it/s]

Epoch [3/10], Step [461/3567], Loss: 2.9446


Epoch 3/10:  13%|█▎        | 471/3568 [04:56<42:49,  1.21it/s]

Epoch [3/10], Step [471/3567], Loss: 2.9446


Epoch 3/10:  13%|█▎        | 481/3568 [05:04<38:10,  1.35it/s]

Epoch [3/10], Step [481/3567], Loss: 2.9446


Epoch 3/10:  14%|█▍        | 491/3568 [05:10<31:17,  1.64it/s]

Epoch [3/10], Step [491/3567], Loss: 2.9446


Epoch 3/10:  14%|█▍        | 501/3568 [05:16<30:44,  1.66it/s]

Epoch [3/10], Step [501/3567], Loss: 2.9446


Epoch 3/10:  14%|█▍        | 511/3568 [05:22<31:12,  1.63it/s]

Epoch [3/10], Step [511/3567], Loss: 2.9446


Epoch 3/10:  15%|█▍        | 521/3568 [05:29<30:53,  1.64it/s]

Epoch [3/10], Step [521/3567], Loss: 2.9446


Epoch 3/10:  15%|█▍        | 531/3568 [05:35<30:32,  1.66it/s]

Epoch [3/10], Step [531/3567], Loss: 2.9446


Epoch 3/10:  15%|█▌        | 541/3568 [05:41<30:27,  1.66it/s]

Epoch [3/10], Step [541/3567], Loss: 2.9446


Epoch 3/10:  15%|█▌        | 551/3568 [05:47<30:26,  1.65it/s]

Epoch [3/10], Step [551/3567], Loss: 2.9446


Epoch 3/10:  16%|█▌        | 561/3568 [05:53<30:48,  1.63it/s]

Epoch [3/10], Step [561/3567], Loss: 2.9446


Epoch 3/10:  16%|█▌        | 571/3568 [05:59<29:50,  1.67it/s]

Epoch [3/10], Step [571/3567], Loss: 2.9446


Epoch 3/10:  16%|█▋        | 581/3568 [06:05<30:31,  1.63it/s]

Epoch [3/10], Step [581/3567], Loss: 2.9446


Epoch 3/10:  17%|█▋        | 591/3568 [06:11<30:05,  1.65it/s]

Epoch [3/10], Step [591/3567], Loss: 2.9446


Epoch 3/10:  17%|█▋        | 601/3568 [06:18<30:11,  1.64it/s]

Epoch [3/10], Step [601/3567], Loss: 2.9446


Epoch 3/10:  17%|█▋        | 611/3568 [06:24<30:34,  1.61it/s]

Epoch [3/10], Step [611/3567], Loss: 2.9446


Epoch 3/10:  17%|█▋        | 621/3568 [06:30<29:44,  1.65it/s]

Epoch [3/10], Step [621/3567], Loss: 2.9446


Epoch 3/10:  18%|█▊        | 631/3568 [06:36<30:00,  1.63it/s]

Epoch [3/10], Step [631/3567], Loss: 2.9446


Epoch 3/10:  18%|█▊        | 641/3568 [06:42<30:07,  1.62it/s]

Epoch [3/10], Step [641/3567], Loss: 2.9446


Epoch 3/10:  18%|█▊        | 651/3568 [06:48<29:47,  1.63it/s]

Epoch [3/10], Step [651/3567], Loss: 2.9446


Epoch 3/10:  19%|█▊        | 661/3568 [06:54<29:38,  1.63it/s]

Epoch [3/10], Step [661/3567], Loss: 2.9446


Epoch 3/10:  19%|█▉        | 671/3568 [07:00<29:36,  1.63it/s]

Epoch [3/10], Step [671/3567], Loss: 2.9446


Epoch 3/10:  19%|█▉        | 681/3568 [07:06<29:35,  1.63it/s]

Epoch [3/10], Step [681/3567], Loss: 2.9446


Epoch 3/10:  19%|█▉        | 691/3568 [07:13<30:04,  1.59it/s]

Epoch [3/10], Step [691/3567], Loss: 2.9446


Epoch 3/10:  20%|█▉        | 701/3568 [07:19<29:27,  1.62it/s]

Epoch [3/10], Step [701/3567], Loss: 2.9446


Epoch 3/10:  20%|█▉        | 711/3568 [07:25<29:48,  1.60it/s]

Epoch [3/10], Step [711/3567], Loss: 2.9446


Epoch 3/10:  20%|██        | 721/3568 [07:31<29:02,  1.63it/s]

Epoch [3/10], Step [721/3567], Loss: 2.9446


Epoch 3/10:  20%|██        | 731/3568 [07:37<28:58,  1.63it/s]

Epoch [3/10], Step [731/3567], Loss: 2.9446


Epoch 3/10:  21%|██        | 741/3568 [07:43<28:57,  1.63it/s]

Epoch [3/10], Step [741/3567], Loss: 2.9446


Epoch 3/10:  21%|██        | 751/3568 [07:50<28:23,  1.65it/s]

Epoch [3/10], Step [751/3567], Loss: 2.9446


Epoch 3/10:  21%|██▏       | 761/3568 [07:56<28:33,  1.64it/s]

Epoch [3/10], Step [761/3567], Loss: 2.9446


Epoch 3/10:  22%|██▏       | 771/3568 [08:02<28:33,  1.63it/s]

Epoch [3/10], Step [771/3567], Loss: 2.9446


Epoch 3/10:  22%|██▏       | 781/3568 [08:08<28:36,  1.62it/s]

Epoch [3/10], Step [781/3567], Loss: 2.9446


Epoch 3/10:  22%|██▏       | 791/3568 [08:14<28:38,  1.62it/s]

Epoch [3/10], Step [791/3567], Loss: 2.9446


Epoch 3/10:  22%|██▏       | 801/3568 [08:20<27:54,  1.65it/s]

Epoch [3/10], Step [801/3567], Loss: 2.9446


Epoch 3/10:  23%|██▎       | 811/3568 [08:26<27:55,  1.65it/s]

Epoch [3/10], Step [811/3567], Loss: 2.9446


Epoch 3/10:  23%|██▎       | 821/3568 [08:33<27:43,  1.65it/s]

Epoch [3/10], Step [821/3567], Loss: 2.9446


Epoch 3/10:  23%|██▎       | 831/3568 [08:39<28:10,  1.62it/s]

Epoch [3/10], Step [831/3567], Loss: 2.9446


Epoch 3/10:  24%|██▎       | 841/3568 [08:45<28:05,  1.62it/s]

Epoch [3/10], Step [841/3567], Loss: 2.9446


Epoch 3/10:  24%|██▍       | 851/3568 [08:51<27:46,  1.63it/s]

Epoch [3/10], Step [851/3567], Loss: 2.9446


Epoch 3/10:  24%|██▍       | 861/3568 [08:57<29:01,  1.55it/s]

Epoch [3/10], Step [861/3567], Loss: 2.9446


Epoch 3/10:  24%|██▍       | 871/3568 [09:03<27:38,  1.63it/s]

Epoch [3/10], Step [871/3567], Loss: 2.9446


Epoch 3/10:  25%|██▍       | 881/3568 [09:09<27:16,  1.64it/s]

Epoch [3/10], Step [881/3567], Loss: 2.9446


Epoch 3/10:  25%|██▍       | 891/3568 [09:16<27:11,  1.64it/s]

Epoch [3/10], Step [891/3567], Loss: 2.9446


Epoch 3/10:  25%|██▌       | 901/3568 [09:22<27:15,  1.63it/s]

Epoch [3/10], Step [901/3567], Loss: 2.9446


Epoch 3/10:  26%|██▌       | 911/3568 [09:28<27:37,  1.60it/s]

Epoch [3/10], Step [911/3567], Loss: 2.9446


Epoch 3/10:  26%|██▌       | 921/3568 [09:34<27:08,  1.63it/s]

Epoch [3/10], Step [921/3567], Loss: 2.9446


Epoch 3/10:  26%|██▌       | 931/3568 [09:40<28:40,  1.53it/s]

Epoch [3/10], Step [931/3567], Loss: 2.9446


Epoch 3/10:  26%|██▋       | 941/3568 [09:47<26:54,  1.63it/s]

Epoch [3/10], Step [941/3567], Loss: 2.9446


Epoch 3/10:  27%|██▋       | 951/3568 [09:53<26:56,  1.62it/s]

Epoch [3/10], Step [951/3567], Loss: 2.9446


Epoch 3/10:  27%|██▋       | 961/3568 [09:59<26:28,  1.64it/s]

Epoch [3/10], Step [961/3567], Loss: 2.9446


Epoch 3/10:  27%|██▋       | 971/3568 [10:05<26:28,  1.63it/s]

Epoch [3/10], Step [971/3567], Loss: 2.9446


Epoch 3/10:  27%|██▋       | 981/3568 [10:11<26:17,  1.64it/s]

Epoch [3/10], Step [981/3567], Loss: 2.9446


Epoch 3/10:  28%|██▊       | 991/3568 [10:17<26:27,  1.62it/s]

Epoch [3/10], Step [991/3567], Loss: 2.9446


Epoch 3/10:  28%|██▊       | 1001/3568 [10:23<25:45,  1.66it/s]

Epoch [3/10], Step [1001/3567], Loss: 2.9446


Epoch 3/10:  28%|██▊       | 1011/3568 [10:30<25:57,  1.64it/s]

Epoch [3/10], Step [1011/3567], Loss: 2.9446


Epoch 3/10:  29%|██▊       | 1021/3568 [10:36<25:57,  1.64it/s]

Epoch [3/10], Step [1021/3567], Loss: 2.9446


Epoch 3/10:  29%|██▉       | 1031/3568 [10:42<25:40,  1.65it/s]

Epoch [3/10], Step [1031/3567], Loss: 2.9446


Epoch 3/10:  29%|██▉       | 1041/3568 [10:48<25:54,  1.63it/s]

Epoch [3/10], Step [1041/3567], Loss: 2.9446


Epoch 3/10:  29%|██▉       | 1051/3568 [10:54<25:40,  1.63it/s]

Epoch [3/10], Step [1051/3567], Loss: 2.9446


Epoch 3/10:  30%|██▉       | 1061/3568 [11:00<25:10,  1.66it/s]

Epoch [3/10], Step [1061/3567], Loss: 2.9446


Epoch 3/10:  30%|███       | 1071/3568 [11:06<25:24,  1.64it/s]

Epoch [3/10], Step [1071/3567], Loss: 2.9446


Epoch 3/10:  30%|███       | 1081/3568 [11:12<24:56,  1.66it/s]

Epoch [3/10], Step [1081/3567], Loss: 2.9446


Epoch 3/10:  31%|███       | 1091/3568 [11:18<24:56,  1.65it/s]

Epoch [3/10], Step [1091/3567], Loss: 2.9446


Epoch 3/10:  31%|███       | 1101/3568 [11:25<25:16,  1.63it/s]

Epoch [3/10], Step [1101/3567], Loss: 2.9446


Epoch 3/10:  31%|███       | 1111/3568 [11:31<24:37,  1.66it/s]

Epoch [3/10], Step [1111/3567], Loss: 2.9446


Epoch 3/10:  31%|███▏      | 1121/3568 [11:37<24:57,  1.63it/s]

Epoch [3/10], Step [1121/3567], Loss: 2.9446


Epoch 3/10:  32%|███▏      | 1131/3568 [11:43<24:42,  1.64it/s]

Epoch [3/10], Step [1131/3567], Loss: 2.9446


Epoch 3/10:  32%|███▏      | 1141/3568 [11:49<24:29,  1.65it/s]

Epoch [3/10], Step [1141/3567], Loss: 2.9446


Epoch 3/10:  32%|███▏      | 1151/3568 [11:55<24:32,  1.64it/s]

Epoch [3/10], Step [1151/3567], Loss: 2.9446


Epoch 3/10:  33%|███▎      | 1161/3568 [12:01<24:32,  1.64it/s]

Epoch [3/10], Step [1161/3567], Loss: 2.9446


Epoch 3/10:  33%|███▎      | 1171/3568 [12:07<24:22,  1.64it/s]

Epoch [3/10], Step [1171/3567], Loss: 2.9446


Epoch 3/10:  33%|███▎      | 1181/3568 [12:13<23:53,  1.66it/s]

Epoch [3/10], Step [1181/3567], Loss: 2.9446


Epoch 3/10:  33%|███▎      | 1191/3568 [12:20<24:23,  1.62it/s]

Epoch [3/10], Step [1191/3567], Loss: 2.9446


Epoch 3/10:  34%|███▎      | 1201/3568 [12:26<24:19,  1.62it/s]

Epoch [3/10], Step [1201/3567], Loss: 2.9446


Epoch 3/10:  34%|███▍      | 1211/3568 [12:32<23:56,  1.64it/s]

Epoch [3/10], Step [1211/3567], Loss: 2.9446


Epoch 3/10:  34%|███▍      | 1221/3568 [12:38<23:46,  1.64it/s]

Epoch [3/10], Step [1221/3567], Loss: 2.9446


Epoch 3/10:  35%|███▍      | 1231/3568 [12:44<23:48,  1.64it/s]

Epoch [3/10], Step [1231/3567], Loss: 2.9446


Epoch 3/10:  35%|███▍      | 1241/3568 [12:50<23:17,  1.66it/s]

Epoch [3/10], Step [1241/3567], Loss: 2.9446


Epoch 3/10:  35%|███▌      | 1251/3568 [12:56<23:28,  1.65it/s]

Epoch [3/10], Step [1251/3567], Loss: 2.9446


Epoch 3/10:  35%|███▌      | 1261/3568 [13:03<23:10,  1.66it/s]

Epoch [3/10], Step [1261/3567], Loss: 2.9446


Epoch 3/10:  36%|███▌      | 1271/3568 [13:09<23:10,  1.65it/s]

Epoch [3/10], Step [1271/3567], Loss: 2.9446


Epoch 3/10:  36%|███▌      | 1281/3568 [13:15<23:33,  1.62it/s]

Epoch [3/10], Step [1281/3567], Loss: 2.9446


Epoch 3/10:  36%|███▌      | 1291/3568 [13:21<23:15,  1.63it/s]

Epoch [3/10], Step [1291/3567], Loss: 2.9446


Epoch 3/10:  36%|███▋      | 1301/3568 [13:27<22:44,  1.66it/s]

Epoch [3/10], Step [1301/3567], Loss: 2.9446


Epoch 3/10:  37%|███▋      | 1311/3568 [13:33<22:43,  1.65it/s]

Epoch [3/10], Step [1311/3567], Loss: 2.9446


Epoch 3/10:  37%|███▋      | 1321/3568 [13:39<22:49,  1.64it/s]

Epoch [3/10], Step [1321/3567], Loss: 2.9446


Epoch 3/10:  37%|███▋      | 1331/3568 [13:45<22:50,  1.63it/s]

Epoch [3/10], Step [1331/3567], Loss: 2.9446


Epoch 3/10:  38%|███▊      | 1341/3568 [13:52<23:39,  1.57it/s]

Epoch [3/10], Step [1341/3567], Loss: 2.9446


Epoch 3/10:  38%|███▊      | 1351/3568 [13:58<22:52,  1.62it/s]

Epoch [3/10], Step [1351/3567], Loss: 2.9446


Epoch 3/10:  38%|███▊      | 1361/3568 [14:04<22:29,  1.64it/s]

Epoch [3/10], Step [1361/3567], Loss: 2.9446


Epoch 3/10:  38%|███▊      | 1371/3568 [14:10<22:28,  1.63it/s]

Epoch [3/10], Step [1371/3567], Loss: 2.9446


Epoch 3/10:  39%|███▊      | 1381/3568 [14:16<22:32,  1.62it/s]

Epoch [3/10], Step [1381/3567], Loss: 2.9446


Epoch 3/10:  39%|███▉      | 1391/3568 [14:22<22:56,  1.58it/s]

Epoch [3/10], Step [1391/3567], Loss: 2.9446


Epoch 3/10:  39%|███▉      | 1401/3568 [14:28<21:59,  1.64it/s]

Epoch [3/10], Step [1401/3567], Loss: 2.9446


Epoch 3/10:  40%|███▉      | 1411/3568 [14:35<22:22,  1.61it/s]

Epoch [3/10], Step [1411/3567], Loss: 2.9446


Epoch 3/10:  40%|███▉      | 1421/3568 [14:41<23:02,  1.55it/s]

Epoch [3/10], Step [1421/3567], Loss: 2.9446


Epoch 3/10:  40%|████      | 1431/3568 [14:47<21:39,  1.64it/s]

Epoch [3/10], Step [1431/3567], Loss: 2.9446


Epoch 3/10:  40%|████      | 1441/3568 [14:53<21:47,  1.63it/s]

Epoch [3/10], Step [1441/3567], Loss: 2.9446


Epoch 3/10:  41%|████      | 1451/3568 [14:59<21:46,  1.62it/s]

Epoch [3/10], Step [1451/3567], Loss: 2.9446


Epoch 3/10:  41%|████      | 1461/3568 [15:05<22:21,  1.57it/s]

Epoch [3/10], Step [1461/3567], Loss: 2.9446


Epoch 3/10:  41%|████      | 1471/3568 [15:12<21:21,  1.64it/s]

Epoch [3/10], Step [1471/3567], Loss: 2.9446


Epoch 3/10:  42%|████▏     | 1481/3568 [15:18<21:14,  1.64it/s]

Epoch [3/10], Step [1481/3567], Loss: 2.9446


Epoch 3/10:  42%|████▏     | 1491/3568 [15:24<21:12,  1.63it/s]

Epoch [3/10], Step [1491/3567], Loss: 2.9446


Epoch 3/10:  42%|████▏     | 1501/3568 [15:30<21:12,  1.62it/s]

Epoch [3/10], Step [1501/3567], Loss: 2.9446


Epoch 3/10:  42%|████▏     | 1511/3568 [15:36<21:02,  1.63it/s]

Epoch [3/10], Step [1511/3567], Loss: 2.9446


Epoch 3/10:  43%|████▎     | 1521/3568 [15:42<21:19,  1.60it/s]

Epoch [3/10], Step [1521/3567], Loss: 2.9446


Epoch 3/10:  43%|████▎     | 1531/3568 [15:48<20:47,  1.63it/s]

Epoch [3/10], Step [1531/3567], Loss: 2.9446


Epoch 3/10:  43%|████▎     | 1541/3568 [15:55<20:38,  1.64it/s]

Epoch [3/10], Step [1541/3567], Loss: 2.9446


Epoch 3/10:  43%|████▎     | 1551/3568 [16:01<20:31,  1.64it/s]

Epoch [3/10], Step [1551/3567], Loss: 2.9446


Epoch 3/10:  44%|████▍     | 1561/3568 [16:07<20:16,  1.65it/s]

Epoch [3/10], Step [1561/3567], Loss: 2.9446


Epoch 3/10:  44%|████▍     | 1571/3568 [16:13<20:19,  1.64it/s]

Epoch [3/10], Step [1571/3567], Loss: 2.9446


Epoch 3/10:  44%|████▍     | 1581/3568 [16:19<20:17,  1.63it/s]

Epoch [3/10], Step [1581/3567], Loss: 2.9446


Epoch 3/10:  45%|████▍     | 1591/3568 [16:25<19:47,  1.67it/s]

Epoch [3/10], Step [1591/3567], Loss: 2.9446


Epoch 3/10:  45%|████▍     | 1601/3568 [16:31<19:53,  1.65it/s]

Epoch [3/10], Step [1601/3567], Loss: 2.9446


Epoch 3/10:  45%|████▌     | 1611/3568 [16:37<19:36,  1.66it/s]

Epoch [3/10], Step [1611/3567], Loss: 2.9446


Epoch 3/10:  45%|████▌     | 1621/3568 [16:44<20:44,  1.56it/s]

Epoch [3/10], Step [1621/3567], Loss: 2.9446


Epoch 3/10:  46%|████▌     | 1631/3568 [16:50<19:30,  1.66it/s]

Epoch [3/10], Step [1631/3567], Loss: 2.9446


Epoch 3/10:  46%|████▌     | 1641/3568 [16:56<19:29,  1.65it/s]

Epoch [3/10], Step [1641/3567], Loss: 2.9446


Epoch 3/10:  46%|████▋     | 1651/3568 [17:02<19:44,  1.62it/s]

Epoch [3/10], Step [1651/3567], Loss: 2.9446


Epoch 3/10:  47%|████▋     | 1661/3568 [17:08<19:26,  1.63it/s]

Epoch [3/10], Step [1661/3567], Loss: 2.9446


Epoch 3/10:  47%|████▋     | 1671/3568 [17:14<19:31,  1.62it/s]

Epoch [3/10], Step [1671/3567], Loss: 2.9446


Epoch 3/10:  47%|████▋     | 1681/3568 [17:20<19:21,  1.62it/s]

Epoch [3/10], Step [1681/3567], Loss: 2.9446


Epoch 3/10:  47%|████▋     | 1691/3568 [17:27<18:47,  1.66it/s]

Epoch [3/10], Step [1691/3567], Loss: 2.9446


Epoch 3/10:  48%|████▊     | 1701/3568 [17:33<19:06,  1.63it/s]

Epoch [3/10], Step [1701/3567], Loss: 2.9446


Epoch 3/10:  48%|████▊     | 1711/3568 [17:39<19:01,  1.63it/s]

Epoch [3/10], Step [1711/3567], Loss: 2.9446


Epoch 3/10:  48%|████▊     | 1721/3568 [17:45<18:57,  1.62it/s]

Epoch [3/10], Step [1721/3567], Loss: 2.9446


Epoch 3/10:  49%|████▊     | 1731/3568 [17:51<18:26,  1.66it/s]

Epoch [3/10], Step [1731/3567], Loss: 2.9446


Epoch 3/10:  49%|████▉     | 1741/3568 [17:57<18:40,  1.63it/s]

Epoch [3/10], Step [1741/3567], Loss: 2.9446


Epoch 3/10:  49%|████▉     | 1751/3568 [18:03<18:29,  1.64it/s]

Epoch [3/10], Step [1751/3567], Loss: 2.9446


Epoch 3/10:  49%|████▉     | 1761/3568 [18:09<18:20,  1.64it/s]

Epoch [3/10], Step [1761/3567], Loss: 2.9446


Epoch 3/10:  50%|████▉     | 1771/3568 [18:16<18:35,  1.61it/s]

Epoch [3/10], Step [1771/3567], Loss: 2.9446


Epoch 3/10:  50%|████▉     | 1781/3568 [18:22<18:19,  1.63it/s]

Epoch [3/10], Step [1781/3567], Loss: 2.9446


Epoch 3/10:  50%|█████     | 1791/3568 [18:28<18:00,  1.64it/s]

Epoch [3/10], Step [1791/3567], Loss: 2.9446


Epoch 3/10:  50%|█████     | 1801/3568 [18:34<18:10,  1.62it/s]

Epoch [3/10], Step [1801/3567], Loss: 2.9446


Epoch 3/10:  51%|█████     | 1811/3568 [18:40<17:42,  1.65it/s]

Epoch [3/10], Step [1811/3567], Loss: 2.9446


Epoch 3/10:  51%|█████     | 1821/3568 [18:46<18:43,  1.55it/s]

Epoch [3/10], Step [1821/3567], Loss: 2.9446


Epoch 3/10:  51%|█████▏    | 1831/3568 [18:53<17:47,  1.63it/s]

Epoch [3/10], Step [1831/3567], Loss: 2.9446


Epoch 3/10:  52%|█████▏    | 1841/3568 [18:59<17:26,  1.65it/s]

Epoch [3/10], Step [1841/3567], Loss: 2.9446


Epoch 3/10:  52%|█████▏    | 1851/3568 [19:05<19:17,  1.48it/s]

Epoch [3/10], Step [1851/3567], Loss: 2.9446


Epoch 3/10:  52%|█████▏    | 1861/3568 [19:11<17:18,  1.64it/s]

Epoch [3/10], Step [1861/3567], Loss: 2.9446


Epoch 3/10:  52%|█████▏    | 1871/3568 [19:17<17:02,  1.66it/s]

Epoch [3/10], Step [1871/3567], Loss: 2.9446


Epoch 3/10:  53%|█████▎    | 1881/3568 [19:23<17:04,  1.65it/s]

Epoch [3/10], Step [1881/3567], Loss: 2.9446


Epoch 3/10:  53%|█████▎    | 1891/3568 [19:29<17:08,  1.63it/s]

Epoch [3/10], Step [1891/3567], Loss: 2.9446


Epoch 3/10:  53%|█████▎    | 1901/3568 [19:36<17:21,  1.60it/s]

Epoch [3/10], Step [1901/3567], Loss: 2.9446


Epoch 3/10:  54%|█████▎    | 1911/3568 [19:42<16:57,  1.63it/s]

Epoch [3/10], Step [1911/3567], Loss: 2.9446


Epoch 3/10:  54%|█████▍    | 1921/3568 [19:48<16:50,  1.63it/s]

Epoch [3/10], Step [1921/3567], Loss: 2.9446


Epoch 3/10:  54%|█████▍    | 1931/3568 [19:54<16:42,  1.63it/s]

Epoch [3/10], Step [1931/3567], Loss: 2.9446


Epoch 3/10:  54%|█████▍    | 1941/3568 [20:00<16:56,  1.60it/s]

Epoch [3/10], Step [1941/3567], Loss: 2.9446


Epoch 3/10:  55%|█████▍    | 1951/3568 [20:06<16:38,  1.62it/s]

Epoch [3/10], Step [1951/3567], Loss: 2.9446


Epoch 3/10:  55%|█████▍    | 1961/3568 [20:12<16:21,  1.64it/s]

Epoch [3/10], Step [1961/3567], Loss: 2.9446


Epoch 3/10:  55%|█████▌    | 1971/3568 [20:19<16:22,  1.62it/s]

Epoch [3/10], Step [1971/3567], Loss: 2.9446


Epoch 3/10:  56%|█████▌    | 1981/3568 [20:25<16:08,  1.64it/s]

Epoch [3/10], Step [1981/3567], Loss: 2.9446


Epoch 3/10:  56%|█████▌    | 1991/3568 [20:31<15:59,  1.64it/s]

Epoch [3/10], Step [1991/3567], Loss: 2.9446


Epoch 3/10:  56%|█████▌    | 2001/3568 [20:37<15:38,  1.67it/s]

Epoch [3/10], Step [2001/3567], Loss: 2.9446


Epoch 3/10:  56%|█████▋    | 2011/3568 [20:43<16:05,  1.61it/s]

Epoch [3/10], Step [2011/3567], Loss: 2.9446


Epoch 3/10:  57%|█████▋    | 2021/3568 [20:49<15:57,  1.62it/s]

Epoch [3/10], Step [2021/3567], Loss: 2.9446


Epoch 3/10:  57%|█████▋    | 2031/3568 [20:56<15:42,  1.63it/s]

Epoch [3/10], Step [2031/3567], Loss: 2.9446


Epoch 3/10:  57%|█████▋    | 2041/3568 [21:02<15:50,  1.61it/s]

Epoch [3/10], Step [2041/3567], Loss: 2.9446


Epoch 3/10:  57%|█████▋    | 2051/3568 [21:08<15:28,  1.63it/s]

Epoch [3/10], Step [2051/3567], Loss: 2.9446


Epoch 3/10:  58%|█████▊    | 2061/3568 [21:14<15:27,  1.62it/s]

Epoch [3/10], Step [2061/3567], Loss: 2.9446


Epoch 3/10:  58%|█████▊    | 2071/3568 [21:20<15:49,  1.58it/s]

Epoch [3/10], Step [2071/3567], Loss: 2.9446


Epoch 3/10:  58%|█████▊    | 2081/3568 [21:27<15:29,  1.60it/s]

Epoch [3/10], Step [2081/3567], Loss: 2.9446


Epoch 3/10:  59%|█████▊    | 2091/3568 [21:33<15:10,  1.62it/s]

Epoch [3/10], Step [2091/3567], Loss: 2.9446


Epoch 3/10:  59%|█████▉    | 2101/3568 [21:39<14:52,  1.64it/s]

Epoch [3/10], Step [2101/3567], Loss: 2.9446


Epoch 3/10:  59%|█████▉    | 2111/3568 [21:45<14:47,  1.64it/s]

Epoch [3/10], Step [2111/3567], Loss: 2.9446


Epoch 3/10:  59%|█████▉    | 2121/3568 [21:51<14:59,  1.61it/s]

Epoch [3/10], Step [2121/3567], Loss: 2.9446


Epoch 3/10:  60%|█████▉    | 2131/3568 [21:57<14:53,  1.61it/s]

Epoch [3/10], Step [2131/3567], Loss: 2.9446


Epoch 3/10:  60%|██████    | 2141/3568 [22:03<14:26,  1.65it/s]

Epoch [3/10], Step [2141/3567], Loss: 2.9446


Epoch 3/10:  60%|██████    | 2151/3568 [22:10<14:57,  1.58it/s]

Epoch [3/10], Step [2151/3567], Loss: 2.9446


Epoch 3/10:  61%|██████    | 2161/3568 [22:16<14:10,  1.65it/s]

Epoch [3/10], Step [2161/3567], Loss: 2.9446


Epoch 3/10:  61%|██████    | 2171/3568 [22:22<14:25,  1.61it/s]

Epoch [3/10], Step [2171/3567], Loss: 2.9446


Epoch 3/10:  61%|██████    | 2181/3568 [22:28<14:07,  1.64it/s]

Epoch [3/10], Step [2181/3567], Loss: 2.9446


Epoch 3/10:  61%|██████▏   | 2191/3568 [22:34<13:46,  1.67it/s]

Epoch [3/10], Step [2191/3567], Loss: 2.9446


Epoch 3/10:  62%|██████▏   | 2201/3568 [22:41<15:09,  1.50it/s]

Epoch [3/10], Step [2201/3567], Loss: 2.9446


Epoch 3/10:  62%|██████▏   | 2211/3568 [22:47<13:58,  1.62it/s]

Epoch [3/10], Step [2211/3567], Loss: 2.9446


Epoch 3/10:  62%|██████▏   | 2221/3568 [22:53<13:45,  1.63it/s]

Epoch [3/10], Step [2221/3567], Loss: 2.9446


Epoch 3/10:  63%|██████▎   | 2231/3568 [22:59<13:45,  1.62it/s]

Epoch [3/10], Step [2231/3567], Loss: 2.9446


Epoch 3/10:  63%|██████▎   | 2241/3568 [23:05<13:25,  1.65it/s]

Epoch [3/10], Step [2241/3567], Loss: 2.9446


Epoch 3/10:  63%|██████▎   | 2251/3568 [23:11<13:17,  1.65it/s]

Epoch [3/10], Step [2251/3567], Loss: 2.9446


Epoch 3/10:  63%|██████▎   | 2261/3568 [23:17<13:15,  1.64it/s]

Epoch [3/10], Step [2261/3567], Loss: 2.9446


Epoch 3/10:  64%|██████▎   | 2271/3568 [23:23<13:19,  1.62it/s]

Epoch [3/10], Step [2271/3567], Loss: 2.9446


Epoch 3/10:  64%|██████▍   | 2281/3568 [23:30<13:18,  1.61it/s]

Epoch [3/10], Step [2281/3567], Loss: 2.9446


Epoch 3/10:  64%|██████▍   | 2291/3568 [23:36<13:06,  1.62it/s]

Epoch [3/10], Step [2291/3567], Loss: 2.9446


Epoch 3/10:  64%|██████▍   | 2301/3568 [23:42<12:50,  1.65it/s]

Epoch [3/10], Step [2301/3567], Loss: 2.9446


Epoch 3/10:  65%|██████▍   | 2311/3568 [23:48<13:00,  1.61it/s]

Epoch [3/10], Step [2311/3567], Loss: 2.9446


Epoch 3/10:  65%|██████▌   | 2321/3568 [23:54<12:40,  1.64it/s]

Epoch [3/10], Step [2321/3567], Loss: 2.9446


Epoch 3/10:  65%|██████▌   | 2331/3568 [24:00<12:39,  1.63it/s]

Epoch [3/10], Step [2331/3567], Loss: 2.9446


Epoch 3/10:  66%|██████▌   | 2341/3568 [24:06<12:27,  1.64it/s]

Epoch [3/10], Step [2341/3567], Loss: 2.9446


Epoch 3/10:  66%|██████▌   | 2351/3568 [24:13<12:56,  1.57it/s]

Epoch [3/10], Step [2351/3567], Loss: 2.9446


Epoch 3/10:  66%|██████▌   | 2361/3568 [24:19<12:15,  1.64it/s]

Epoch [3/10], Step [2361/3567], Loss: 2.9446


Epoch 3/10:  66%|██████▋   | 2371/3568 [24:25<12:14,  1.63it/s]

Epoch [3/10], Step [2371/3567], Loss: 2.9446


Epoch 3/10:  67%|██████▋   | 2381/3568 [24:31<12:05,  1.64it/s]

Epoch [3/10], Step [2381/3567], Loss: 2.9446


Epoch 3/10:  67%|██████▋   | 2391/3568 [24:37<11:56,  1.64it/s]

Epoch [3/10], Step [2391/3567], Loss: 2.9446


Epoch 3/10:  67%|██████▋   | 2401/3568 [24:43<12:10,  1.60it/s]

Epoch [3/10], Step [2401/3567], Loss: 2.9446


Epoch 3/10:  68%|██████▊   | 2411/3568 [24:49<11:42,  1.65it/s]

Epoch [3/10], Step [2411/3567], Loss: 2.9446


Epoch 3/10:  68%|██████▊   | 2421/3568 [24:56<11:42,  1.63it/s]

Epoch [3/10], Step [2421/3567], Loss: 2.9446


Epoch 3/10:  68%|██████▊   | 2431/3568 [25:02<11:41,  1.62it/s]

Epoch [3/10], Step [2431/3567], Loss: 2.9446


Epoch 3/10:  68%|██████▊   | 2441/3568 [25:08<11:57,  1.57it/s]

Epoch [3/10], Step [2441/3567], Loss: 2.9446


Epoch 3/10:  69%|██████▊   | 2451/3568 [25:14<11:18,  1.65it/s]

Epoch [3/10], Step [2451/3567], Loss: 2.9446


Epoch 3/10:  69%|██████▉   | 2461/3568 [25:20<11:26,  1.61it/s]

Epoch [3/10], Step [2461/3567], Loss: 2.9446


Epoch 3/10:  69%|██████▉   | 2471/3568 [25:26<11:02,  1.65it/s]

Epoch [3/10], Step [2471/3567], Loss: 2.9446


Epoch 3/10:  70%|██████▉   | 2481/3568 [25:32<11:06,  1.63it/s]

Epoch [3/10], Step [2481/3567], Loss: 2.9446


Epoch 3/10:  70%|██████▉   | 2491/3568 [25:38<10:49,  1.66it/s]

Epoch [3/10], Step [2491/3567], Loss: 2.9446


Epoch 3/10:  70%|███████   | 2501/3568 [25:44<10:49,  1.64it/s]

Epoch [3/10], Step [2501/3567], Loss: 2.9446


Epoch 3/10:  70%|███████   | 2511/3568 [25:51<10:42,  1.65it/s]

Epoch [3/10], Step [2511/3567], Loss: 2.9446


Epoch 3/10:  71%|███████   | 2521/3568 [25:57<10:31,  1.66it/s]

Epoch [3/10], Step [2521/3567], Loss: 2.9446


Epoch 3/10:  71%|███████   | 2531/3568 [26:03<10:58,  1.58it/s]

Epoch [3/10], Step [2531/3567], Loss: 2.9446


Epoch 3/10:  71%|███████   | 2541/3568 [26:09<10:21,  1.65it/s]

Epoch [3/10], Step [2541/3567], Loss: 2.9446


Epoch 3/10:  71%|███████▏  | 2551/3568 [26:15<10:47,  1.57it/s]

Epoch [3/10], Step [2551/3567], Loss: 2.9446


Epoch 3/10:  72%|███████▏  | 2561/3568 [26:21<10:08,  1.65it/s]

Epoch [3/10], Step [2561/3567], Loss: 2.9446


Epoch 3/10:  72%|███████▏  | 2571/3568 [26:28<10:24,  1.60it/s]

Epoch [3/10], Step [2571/3567], Loss: 2.9446


Epoch 3/10:  72%|███████▏  | 2581/3568 [26:34<10:12,  1.61it/s]

Epoch [3/10], Step [2581/3567], Loss: 2.9446


Epoch 3/10:  73%|███████▎  | 2591/3568 [26:40<09:51,  1.65it/s]

Epoch [3/10], Step [2591/3567], Loss: 2.9446


Epoch 3/10:  73%|███████▎  | 2601/3568 [26:46<10:03,  1.60it/s]

Epoch [3/10], Step [2601/3567], Loss: 2.9446


Epoch 3/10:  73%|███████▎  | 2611/3568 [26:52<09:43,  1.64it/s]

Epoch [3/10], Step [2611/3567], Loss: 2.9446


Epoch 3/10:  73%|███████▎  | 2621/3568 [26:58<09:39,  1.64it/s]

Epoch [3/10], Step [2621/3567], Loss: 2.9446


Epoch 3/10:  74%|███████▎  | 2631/3568 [27:04<09:33,  1.63it/s]

Epoch [3/10], Step [2631/3567], Loss: 2.9446


Epoch 3/10:  74%|███████▍  | 2641/3568 [27:11<09:30,  1.63it/s]

Epoch [3/10], Step [2641/3567], Loss: 2.9446


Epoch 3/10:  74%|███████▍  | 2651/3568 [27:17<09:15,  1.65it/s]

Epoch [3/10], Step [2651/3567], Loss: 2.9446


Epoch 3/10:  75%|███████▍  | 2661/3568 [27:23<09:20,  1.62it/s]

Epoch [3/10], Step [2661/3567], Loss: 2.9446


Epoch 3/10:  75%|███████▍  | 2671/3568 [27:29<09:13,  1.62it/s]

Epoch [3/10], Step [2671/3567], Loss: 2.9446


Epoch 3/10:  75%|███████▌  | 2681/3568 [27:35<09:08,  1.62it/s]

Epoch [3/10], Step [2681/3567], Loss: 2.9446


Epoch 3/10:  75%|███████▌  | 2691/3568 [27:41<08:55,  1.64it/s]

Epoch [3/10], Step [2691/3567], Loss: 2.9446


Epoch 3/10:  76%|███████▌  | 2701/3568 [27:47<08:51,  1.63it/s]

Epoch [3/10], Step [2701/3567], Loss: 2.9446


Epoch 3/10:  76%|███████▌  | 2711/3568 [27:54<08:45,  1.63it/s]

Epoch [3/10], Step [2711/3567], Loss: 2.9446


Epoch 3/10:  76%|███████▋  | 2721/3568 [28:00<08:40,  1.63it/s]

Epoch [3/10], Step [2721/3567], Loss: 2.9446


Epoch 3/10:  77%|███████▋  | 2731/3568 [28:06<08:33,  1.63it/s]

Epoch [3/10], Step [2731/3567], Loss: 2.9446


Epoch 3/10:  77%|███████▋  | 2741/3568 [28:12<08:25,  1.64it/s]

Epoch [3/10], Step [2741/3567], Loss: 2.9446


Epoch 3/10:  77%|███████▋  | 2751/3568 [28:18<08:38,  1.58it/s]

Epoch [3/10], Step [2751/3567], Loss: 2.9446


Epoch 3/10:  77%|███████▋  | 2761/3568 [28:24<08:26,  1.59it/s]

Epoch [3/10], Step [2761/3567], Loss: 2.9446


Epoch 3/10:  78%|███████▊  | 2771/3568 [28:31<08:06,  1.64it/s]

Epoch [3/10], Step [2771/3567], Loss: 2.9446


Epoch 3/10:  78%|███████▊  | 2781/3568 [28:37<07:53,  1.66it/s]

Epoch [3/10], Step [2781/3567], Loss: 2.9446


Epoch 3/10:  78%|███████▊  | 2791/3568 [28:43<07:48,  1.66it/s]

Epoch [3/10], Step [2791/3567], Loss: 2.9446


Epoch 3/10:  79%|███████▊  | 2801/3568 [28:49<07:53,  1.62it/s]

Epoch [3/10], Step [2801/3567], Loss: 2.9446


Epoch 3/10:  79%|███████▉  | 2811/3568 [28:55<07:48,  1.61it/s]

Epoch [3/10], Step [2811/3567], Loss: 2.9446


Epoch 3/10:  79%|███████▉  | 2821/3568 [29:01<07:44,  1.61it/s]

Epoch [3/10], Step [2821/3567], Loss: 2.9446


Epoch 3/10:  79%|███████▉  | 2831/3568 [29:08<07:35,  1.62it/s]

Epoch [3/10], Step [2831/3567], Loss: 2.9446


Epoch 3/10:  80%|███████▉  | 2841/3568 [29:14<07:36,  1.59it/s]

Epoch [3/10], Step [2841/3567], Loss: 2.9446


Epoch 3/10:  80%|███████▉  | 2851/3568 [29:20<07:24,  1.61it/s]

Epoch [3/10], Step [2851/3567], Loss: 2.9446


Epoch 3/10:  80%|████████  | 2861/3568 [29:26<07:13,  1.63it/s]

Epoch [3/10], Step [2861/3567], Loss: 2.9446


Epoch 3/10:  80%|████████  | 2871/3568 [29:32<07:04,  1.64it/s]

Epoch [3/10], Step [2871/3567], Loss: 2.9446


Epoch 3/10:  81%|████████  | 2881/3568 [29:38<07:00,  1.63it/s]

Epoch [3/10], Step [2881/3567], Loss: 2.9446


Epoch 3/10:  81%|████████  | 2891/3568 [29:45<07:00,  1.61it/s]

Epoch [3/10], Step [2891/3567], Loss: 2.9446


Epoch 3/10:  81%|████████▏ | 2901/3568 [29:51<06:47,  1.63it/s]

Epoch [3/10], Step [2901/3567], Loss: 2.9446


Epoch 3/10:  82%|████████▏ | 2911/3568 [29:57<06:56,  1.58it/s]

Epoch [3/10], Step [2911/3567], Loss: 2.9446


Epoch 3/10:  82%|████████▏ | 2921/3568 [30:03<06:36,  1.63it/s]

Epoch [3/10], Step [2921/3567], Loss: 2.9446


Epoch 3/10:  82%|████████▏ | 2931/3568 [30:09<06:29,  1.63it/s]

Epoch [3/10], Step [2931/3567], Loss: 2.9446


Epoch 3/10:  82%|████████▏ | 2941/3568 [30:16<06:44,  1.55it/s]

Epoch [3/10], Step [2941/3567], Loss: 2.9446


Epoch 3/10:  83%|████████▎ | 2951/3568 [30:22<06:22,  1.61it/s]

Epoch [3/10], Step [2951/3567], Loss: 2.9446


Epoch 3/10:  83%|████████▎ | 2961/3568 [30:28<06:05,  1.66it/s]

Epoch [3/10], Step [2961/3567], Loss: 2.9446


Epoch 3/10:  83%|████████▎ | 2971/3568 [30:34<06:04,  1.64it/s]

Epoch [3/10], Step [2971/3567], Loss: 2.9446


Epoch 3/10:  84%|████████▎ | 2981/3568 [30:40<06:09,  1.59it/s]

Epoch [3/10], Step [2981/3567], Loss: 2.9446


Epoch 3/10:  84%|████████▍ | 2991/3568 [30:46<05:51,  1.64it/s]

Epoch [3/10], Step [2991/3567], Loss: 2.9446


Epoch 3/10:  84%|████████▍ | 3001/3568 [30:52<05:42,  1.66it/s]

Epoch [3/10], Step [3001/3567], Loss: 2.9446


Epoch 3/10:  84%|████████▍ | 3011/3568 [30:59<05:59,  1.55it/s]

Epoch [3/10], Step [3011/3567], Loss: 2.9446


Epoch 3/10:  85%|████████▍ | 3021/3568 [31:05<05:38,  1.62it/s]

Epoch [3/10], Step [3021/3567], Loss: 2.9446


Epoch 3/10:  85%|████████▍ | 3031/3568 [31:11<05:23,  1.66it/s]

Epoch [3/10], Step [3031/3567], Loss: 2.9446


Epoch 3/10:  85%|████████▌ | 3041/3568 [31:17<05:15,  1.67it/s]

Epoch [3/10], Step [3041/3567], Loss: 2.9446


Epoch 3/10:  86%|████████▌ | 3051/3568 [31:23<05:16,  1.63it/s]

Epoch [3/10], Step [3051/3567], Loss: 2.9446


Epoch 3/10:  86%|████████▌ | 3061/3568 [31:29<05:14,  1.61it/s]

Epoch [3/10], Step [3061/3567], Loss: 2.9446


Epoch 3/10:  86%|████████▌ | 3071/3568 [31:35<05:04,  1.63it/s]

Epoch [3/10], Step [3071/3567], Loss: 2.9446


Epoch 3/10:  86%|████████▋ | 3081/3568 [31:41<04:53,  1.66it/s]

Epoch [3/10], Step [3081/3567], Loss: 2.9446


Epoch 3/10:  87%|████████▋ | 3091/3568 [31:48<04:58,  1.60it/s]

Epoch [3/10], Step [3091/3567], Loss: 2.9446


Epoch 3/10:  87%|████████▋ | 3101/3568 [31:54<04:41,  1.66it/s]

Epoch [3/10], Step [3101/3567], Loss: 2.9446


Epoch 3/10:  87%|████████▋ | 3111/3568 [32:00<04:40,  1.63it/s]

Epoch [3/10], Step [3111/3567], Loss: 2.9446


Epoch 3/10:  87%|████████▋ | 3121/3568 [32:06<04:47,  1.55it/s]

Epoch [3/10], Step [3121/3567], Loss: 2.9446


Epoch 3/10:  88%|████████▊ | 3131/3568 [32:12<04:35,  1.59it/s]

Epoch [3/10], Step [3131/3567], Loss: 2.9446


Epoch 3/10:  88%|████████▊ | 3141/3568 [32:19<04:41,  1.52it/s]

Epoch [3/10], Step [3141/3567], Loss: 2.9446


Epoch 3/10:  88%|████████▊ | 3151/3568 [32:25<04:17,  1.62it/s]

Epoch [3/10], Step [3151/3567], Loss: 2.9446


Epoch 3/10:  89%|████████▊ | 3161/3568 [32:31<04:06,  1.65it/s]

Epoch [3/10], Step [3161/3567], Loss: 2.9446


Epoch 3/10:  89%|████████▉ | 3171/3568 [32:37<04:05,  1.61it/s]

Epoch [3/10], Step [3171/3567], Loss: 2.9446


Epoch 3/10:  89%|████████▉ | 3181/3568 [32:43<04:00,  1.61it/s]

Epoch [3/10], Step [3181/3567], Loss: 2.9446


Epoch 3/10:  89%|████████▉ | 3191/3568 [32:49<03:51,  1.63it/s]

Epoch [3/10], Step [3191/3567], Loss: 2.9446


Epoch 3/10:  90%|████████▉ | 3201/3568 [32:55<03:47,  1.62it/s]

Epoch [3/10], Step [3201/3567], Loss: 2.9446


Epoch 3/10:  90%|████████▉ | 3211/3568 [33:02<03:35,  1.66it/s]

Epoch [3/10], Step [3211/3567], Loss: 2.9446


Epoch 3/10:  90%|█████████ | 3221/3568 [33:08<03:31,  1.64it/s]

Epoch [3/10], Step [3221/3567], Loss: 2.9446


Epoch 3/10:  91%|█████████ | 3231/3568 [33:14<03:24,  1.64it/s]

Epoch [3/10], Step [3231/3567], Loss: 2.9446


Epoch 3/10:  91%|█████████ | 3241/3568 [33:20<03:19,  1.64it/s]

Epoch [3/10], Step [3241/3567], Loss: 2.9446


Epoch 3/10:  91%|█████████ | 3251/3568 [33:26<03:13,  1.64it/s]

Epoch [3/10], Step [3251/3567], Loss: 2.9446


Epoch 3/10:  91%|█████████▏| 3261/3568 [33:32<03:06,  1.65it/s]

Epoch [3/10], Step [3261/3567], Loss: 2.9446


Epoch 3/10:  92%|█████████▏| 3271/3568 [33:38<03:00,  1.65it/s]

Epoch [3/10], Step [3271/3567], Loss: 2.9446


Epoch 3/10:  92%|█████████▏| 3281/3568 [33:45<03:06,  1.54it/s]

Epoch [3/10], Step [3281/3567], Loss: 2.9446


Epoch 3/10:  92%|█████████▏| 3291/3568 [33:51<02:56,  1.57it/s]

Epoch [3/10], Step [3291/3567], Loss: 2.9446


Epoch 3/10:  93%|█████████▎| 3301/3568 [33:57<02:44,  1.62it/s]

Epoch [3/10], Step [3301/3567], Loss: 2.9446


Epoch 3/10:  93%|█████████▎| 3311/3568 [34:03<02:35,  1.65it/s]

Epoch [3/10], Step [3311/3567], Loss: 2.9446


Epoch 3/10:  93%|█████████▎| 3321/3568 [34:10<02:34,  1.60it/s]

Epoch [3/10], Step [3321/3567], Loss: 2.9446


Epoch 3/10:  93%|█████████▎| 3331/3568 [34:16<02:24,  1.64it/s]

Epoch [3/10], Step [3331/3567], Loss: 2.9446


Epoch 3/10:  94%|█████████▎| 3341/3568 [34:22<02:25,  1.56it/s]

Epoch [3/10], Step [3341/3567], Loss: 2.9446


Epoch 3/10:  94%|█████████▍| 3351/3568 [34:28<02:10,  1.66it/s]

Epoch [3/10], Step [3351/3567], Loss: 2.9446


Epoch 3/10:  94%|█████████▍| 3361/3568 [34:34<02:05,  1.65it/s]

Epoch [3/10], Step [3361/3567], Loss: 2.9446


Epoch 3/10:  94%|█████████▍| 3371/3568 [34:40<02:00,  1.63it/s]

Epoch [3/10], Step [3371/3567], Loss: 2.9446


Epoch 3/10:  95%|█████████▍| 3381/3568 [34:46<01:55,  1.62it/s]

Epoch [3/10], Step [3381/3567], Loss: 2.9446


Epoch 3/10:  95%|█████████▌| 3391/3568 [34:53<01:48,  1.63it/s]

Epoch [3/10], Step [3391/3567], Loss: 2.9446


Epoch 3/10:  95%|█████████▌| 3401/3568 [34:59<01:40,  1.66it/s]

Epoch [3/10], Step [3401/3567], Loss: 2.9446


Epoch 3/10:  96%|█████████▌| 3411/3568 [35:05<01:35,  1.64it/s]

Epoch [3/10], Step [3411/3567], Loss: 2.9446


Epoch 3/10:  96%|█████████▌| 3421/3568 [35:11<01:29,  1.65it/s]

Epoch [3/10], Step [3421/3567], Loss: 2.9446


Epoch 3/10:  96%|█████████▌| 3431/3568 [35:17<01:23,  1.63it/s]

Epoch [3/10], Step [3431/3567], Loss: 2.9446


Epoch 3/10:  96%|█████████▋| 3441/3568 [35:23<01:18,  1.61it/s]

Epoch [3/10], Step [3441/3567], Loss: 2.9446


Epoch 3/10:  97%|█████████▋| 3451/3568 [35:29<01:10,  1.65it/s]

Epoch [3/10], Step [3451/3567], Loss: 2.9446


Epoch 3/10:  97%|█████████▋| 3461/3568 [35:35<01:06,  1.61it/s]

Epoch [3/10], Step [3461/3567], Loss: 2.9446


Epoch 3/10:  97%|█████████▋| 3471/3568 [35:41<00:59,  1.63it/s]

Epoch [3/10], Step [3471/3567], Loss: 2.9446


Epoch 3/10:  98%|█████████▊| 3481/3568 [35:48<00:52,  1.65it/s]

Epoch [3/10], Step [3481/3567], Loss: 2.9446


Epoch 3/10:  98%|█████████▊| 3491/3568 [35:54<00:47,  1.63it/s]

Epoch [3/10], Step [3491/3567], Loss: 2.9446


Epoch 3/10:  98%|█████████▊| 3501/3568 [36:00<00:43,  1.55it/s]

Epoch [3/10], Step [3501/3567], Loss: 2.9446


Epoch 3/10:  98%|█████████▊| 3511/3568 [36:06<00:37,  1.53it/s]

Epoch [3/10], Step [3511/3567], Loss: 2.9446


Epoch 3/10:  99%|█████████▊| 3521/3568 [36:13<00:29,  1.61it/s]

Epoch [3/10], Step [3521/3567], Loss: 2.9446


Epoch 3/10:  99%|█████████▉| 3531/3568 [36:19<00:22,  1.64it/s]

Epoch [3/10], Step [3531/3567], Loss: 2.9446


Epoch 3/10:  99%|█████████▉| 3541/3568 [36:25<00:16,  1.65it/s]

Epoch [3/10], Step [3541/3567], Loss: 2.9446


Epoch 3/10: 100%|█████████▉| 3551/3568 [36:31<00:10,  1.61it/s]

Epoch [3/10], Step [3551/3567], Loss: 2.9446


Epoch 3/10: 100%|█████████▉| 3561/3568 [36:37<00:04,  1.64it/s]

Epoch [3/10], Step [3561/3567], Loss: 2.9446


Epoch 3/10: 100%|█████████▉| 3567/3568 [36:41<00:00,  1.62it/s]


Epoch [3/10] Average Loss: 2.9446


Epoch 4/10:   0%|          | 1/3568 [00:00<37:43,  1.58it/s]

Epoch [4/10], Step [1/3567], Loss: 2.9446


Epoch 4/10:   0%|          | 11/3568 [00:06<36:58,  1.60it/s]

Epoch [4/10], Step [11/3567], Loss: 2.9446


Epoch 4/10:   1%|          | 21/3568 [00:12<35:26,  1.67it/s]

Epoch [4/10], Step [21/3567], Loss: 2.9446


Epoch 4/10:   1%|          | 31/3568 [00:19<36:17,  1.62it/s]

Epoch [4/10], Step [31/3567], Loss: 2.9446


Epoch 4/10:   1%|          | 41/3568 [00:25<36:01,  1.63it/s]

Epoch [4/10], Step [41/3567], Loss: 2.9446


Epoch 4/10:   1%|▏         | 51/3568 [00:31<36:12,  1.62it/s]

Epoch [4/10], Step [51/3567], Loss: 2.9446


Epoch 4/10:   2%|▏         | 61/3568 [00:37<35:47,  1.63it/s]

Epoch [4/10], Step [61/3567], Loss: 2.9446


Epoch 4/10:   2%|▏         | 71/3568 [00:43<35:39,  1.63it/s]

Epoch [4/10], Step [71/3567], Loss: 2.9446


Epoch 4/10:   2%|▏         | 81/3568 [00:49<35:56,  1.62it/s]

Epoch [4/10], Step [81/3567], Loss: 2.9446


Epoch 4/10:   3%|▎         | 91/3568 [00:55<36:11,  1.60it/s]

Epoch [4/10], Step [91/3567], Loss: 2.9446


Epoch 4/10:   3%|▎         | 101/3568 [01:02<35:16,  1.64it/s]

Epoch [4/10], Step [101/3567], Loss: 2.9446


Epoch 4/10:   3%|▎         | 111/3568 [01:08<34:43,  1.66it/s]

Epoch [4/10], Step [111/3567], Loss: 2.9446


Epoch 4/10:   3%|▎         | 121/3568 [01:14<35:09,  1.63it/s]

Epoch [4/10], Step [121/3567], Loss: 2.9446


Epoch 4/10:   4%|▎         | 131/3568 [01:20<35:14,  1.63it/s]

Epoch [4/10], Step [131/3567], Loss: 2.9446


Epoch 4/10:   4%|▍         | 141/3568 [01:26<34:29,  1.66it/s]

Epoch [4/10], Step [141/3567], Loss: 2.9446


Epoch 4/10:   4%|▍         | 151/3568 [01:32<35:23,  1.61it/s]

Epoch [4/10], Step [151/3567], Loss: 2.9446


Epoch 4/10:   5%|▍         | 161/3568 [01:38<34:46,  1.63it/s]

Epoch [4/10], Step [161/3567], Loss: 2.9446


Epoch 4/10:   5%|▍         | 171/3568 [01:44<34:13,  1.65it/s]

Epoch [4/10], Step [171/3567], Loss: 2.9446


Epoch 4/10:   5%|▌         | 181/3568 [01:50<34:19,  1.64it/s]

Epoch [4/10], Step [181/3567], Loss: 2.9446


Epoch 4/10:   5%|▌         | 191/3568 [01:57<34:23,  1.64it/s]

Epoch [4/10], Step [191/3567], Loss: 2.9446


Epoch 4/10:   6%|▌         | 201/3568 [02:03<33:55,  1.65it/s]

Epoch [4/10], Step [201/3567], Loss: 2.9446


Epoch 4/10:   6%|▌         | 211/3568 [02:09<34:46,  1.61it/s]

Epoch [4/10], Step [211/3567], Loss: 2.9446


Epoch 4/10:   6%|▌         | 221/3568 [02:15<34:14,  1.63it/s]

Epoch [4/10], Step [221/3567], Loss: 2.9446


Epoch 4/10:   6%|▋         | 231/3568 [02:21<34:31,  1.61it/s]

Epoch [4/10], Step [231/3567], Loss: 2.9446


Epoch 4/10:   7%|▋         | 241/3568 [02:27<33:59,  1.63it/s]

Epoch [4/10], Step [241/3567], Loss: 2.9446


Epoch 4/10:   7%|▋         | 251/3568 [02:33<34:40,  1.59it/s]

Epoch [4/10], Step [251/3567], Loss: 2.9446


Epoch 4/10:   7%|▋         | 261/3568 [02:39<33:25,  1.65it/s]

Epoch [4/10], Step [261/3567], Loss: 2.9446


Epoch 4/10:   8%|▊         | 271/3568 [02:46<34:00,  1.62it/s]

Epoch [4/10], Step [271/3567], Loss: 2.9446


Epoch 4/10:   8%|▊         | 281/3568 [02:52<33:22,  1.64it/s]

Epoch [4/10], Step [281/3567], Loss: 2.9446


Epoch 4/10:   8%|▊         | 291/3568 [02:58<33:28,  1.63it/s]

Epoch [4/10], Step [291/3567], Loss: 2.9446


Epoch 4/10:   8%|▊         | 301/3568 [03:04<32:54,  1.65it/s]

Epoch [4/10], Step [301/3567], Loss: 2.9446


Epoch 4/10:   9%|▊         | 311/3568 [03:10<32:46,  1.66it/s]

Epoch [4/10], Step [311/3567], Loss: 2.9446


Epoch 4/10:   9%|▉         | 321/3568 [03:16<32:57,  1.64it/s]

Epoch [4/10], Step [321/3567], Loss: 2.9446


Epoch 4/10:   9%|▉         | 331/3568 [03:22<33:04,  1.63it/s]

Epoch [4/10], Step [331/3567], Loss: 2.9446


Epoch 4/10:  10%|▉         | 341/3568 [03:29<32:36,  1.65it/s]

Epoch [4/10], Step [341/3567], Loss: 2.9446


Epoch 4/10:  10%|▉         | 351/3568 [03:35<33:08,  1.62it/s]

Epoch [4/10], Step [351/3567], Loss: 2.9446


Epoch 4/10:  10%|█         | 361/3568 [03:41<32:48,  1.63it/s]

Epoch [4/10], Step [361/3567], Loss: 2.9446


Epoch 4/10:  10%|█         | 371/3568 [03:47<32:09,  1.66it/s]

Epoch [4/10], Step [371/3567], Loss: 2.9446


Epoch 4/10:  11%|█         | 381/3568 [03:53<32:43,  1.62it/s]

Epoch [4/10], Step [381/3567], Loss: 2.9446


Epoch 4/10:  11%|█         | 391/3568 [03:59<32:13,  1.64it/s]

Epoch [4/10], Step [391/3567], Loss: 2.9446


Epoch 4/10:  11%|█         | 401/3568 [04:06<32:50,  1.61it/s]

Epoch [4/10], Step [401/3567], Loss: 2.9446


Epoch 4/10:  12%|█▏        | 411/3568 [04:12<32:28,  1.62it/s]

Epoch [4/10], Step [411/3567], Loss: 2.9446


Epoch 4/10:  12%|█▏        | 421/3568 [04:18<32:07,  1.63it/s]

Epoch [4/10], Step [421/3567], Loss: 2.9446


Epoch 4/10:  12%|█▏        | 431/3568 [04:24<32:06,  1.63it/s]

Epoch [4/10], Step [431/3567], Loss: 2.9446


Epoch 4/10:  12%|█▏        | 441/3568 [04:30<32:04,  1.62it/s]

Epoch [4/10], Step [441/3567], Loss: 2.9446


Epoch 4/10:  13%|█▎        | 451/3568 [04:36<32:11,  1.61it/s]

Epoch [4/10], Step [451/3567], Loss: 2.9446


Epoch 4/10:  13%|█▎        | 461/3568 [04:43<31:38,  1.64it/s]

Epoch [4/10], Step [461/3567], Loss: 2.9446


Epoch 4/10:  13%|█▎        | 471/3568 [04:49<31:41,  1.63it/s]

Epoch [4/10], Step [471/3567], Loss: 2.9446


Epoch 4/10:  13%|█▎        | 481/3568 [04:55<31:27,  1.64it/s]

Epoch [4/10], Step [481/3567], Loss: 2.9446


Epoch 4/10:  14%|█▍        | 491/3568 [05:01<30:39,  1.67it/s]

Epoch [4/10], Step [491/3567], Loss: 2.9446


Epoch 4/10:  14%|█▍        | 501/3568 [05:07<32:51,  1.56it/s]

Epoch [4/10], Step [501/3567], Loss: 2.9446


Epoch 4/10:  14%|█▍        | 511/3568 [05:13<31:07,  1.64it/s]

Epoch [4/10], Step [511/3567], Loss: 2.9446


Epoch 4/10:  15%|█▍        | 521/3568 [05:19<31:28,  1.61it/s]

Epoch [4/10], Step [521/3567], Loss: 2.9446


Epoch 4/10:  15%|█▍        | 531/3568 [05:26<30:47,  1.64it/s]

Epoch [4/10], Step [531/3567], Loss: 2.9446


Epoch 4/10:  15%|█▌        | 541/3568 [05:32<30:36,  1.65it/s]

Epoch [4/10], Step [541/3567], Loss: 2.9446


Epoch 4/10:  15%|█▌        | 551/3568 [05:38<30:52,  1.63it/s]

Epoch [4/10], Step [551/3567], Loss: 2.9446


Epoch 4/10:  16%|█▌        | 561/3568 [05:44<31:05,  1.61it/s]

Epoch [4/10], Step [561/3567], Loss: 2.9446


Epoch 4/10:  16%|█▌        | 571/3568 [05:50<30:39,  1.63it/s]

Epoch [4/10], Step [571/3567], Loss: 2.9446


Epoch 4/10:  16%|█▋        | 581/3568 [05:57<32:03,  1.55it/s]

Epoch [4/10], Step [581/3567], Loss: 2.9446


Epoch 4/10:  17%|█▋        | 591/3568 [06:03<29:59,  1.65it/s]

Epoch [4/10], Step [591/3567], Loss: 2.9446


Epoch 4/10:  17%|█▋        | 601/3568 [06:09<30:10,  1.64it/s]

Epoch [4/10], Step [601/3567], Loss: 2.9446


Epoch 4/10:  17%|█▋        | 611/3568 [06:15<29:55,  1.65it/s]

Epoch [4/10], Step [611/3567], Loss: 2.9446


Epoch 4/10:  17%|█▋        | 621/3568 [06:21<30:27,  1.61it/s]

Epoch [4/10], Step [621/3567], Loss: 2.9446


Epoch 4/10:  18%|█▊        | 631/3568 [06:27<29:43,  1.65it/s]

Epoch [4/10], Step [631/3567], Loss: 2.9446


Epoch 4/10:  18%|█▊        | 641/3568 [06:33<31:33,  1.55it/s]

Epoch [4/10], Step [641/3567], Loss: 2.9446


Epoch 4/10:  18%|█▊        | 651/3568 [06:40<29:42,  1.64it/s]

Epoch [4/10], Step [651/3567], Loss: 2.9446


Epoch 4/10:  19%|█▊        | 661/3568 [06:46<29:50,  1.62it/s]

Epoch [4/10], Step [661/3567], Loss: 2.9446


Epoch 4/10:  19%|█▉        | 671/3568 [06:52<29:58,  1.61it/s]

Epoch [4/10], Step [671/3567], Loss: 2.9446


Epoch 4/10:  19%|█▉        | 681/3568 [06:58<29:20,  1.64it/s]

Epoch [4/10], Step [681/3567], Loss: 2.9446


Epoch 4/10:  19%|█▉        | 691/3568 [07:04<29:55,  1.60it/s]

Epoch [4/10], Step [691/3567], Loss: 2.9446


Epoch 4/10:  20%|█▉        | 701/3568 [07:10<29:36,  1.61it/s]

Epoch [4/10], Step [701/3567], Loss: 2.9446


Epoch 4/10:  20%|█▉        | 711/3568 [07:17<29:27,  1.62it/s]

Epoch [4/10], Step [711/3567], Loss: 2.9446


Epoch 4/10:  20%|██        | 721/3568 [07:23<28:54,  1.64it/s]

Epoch [4/10], Step [721/3567], Loss: 2.9446


Epoch 4/10:  20%|██        | 731/3568 [07:29<28:47,  1.64it/s]

Epoch [4/10], Step [731/3567], Loss: 2.9446


Epoch 4/10:  21%|██        | 741/3568 [07:35<28:34,  1.65it/s]

Epoch [4/10], Step [741/3567], Loss: 2.9446


Epoch 4/10:  21%|██        | 751/3568 [07:41<28:12,  1.66it/s]

Epoch [4/10], Step [751/3567], Loss: 2.9446


Epoch 4/10:  21%|██▏       | 761/3568 [07:47<29:31,  1.58it/s]

Epoch [4/10], Step [761/3567], Loss: 2.9446


Epoch 4/10:  22%|██▏       | 771/3568 [07:53<29:06,  1.60it/s]

Epoch [4/10], Step [771/3567], Loss: 2.9446


Epoch 4/10:  22%|██▏       | 781/3568 [07:59<28:33,  1.63it/s]

Epoch [4/10], Step [781/3567], Loss: 2.9446


Epoch 4/10:  22%|██▏       | 791/3568 [08:06<28:30,  1.62it/s]

Epoch [4/10], Step [791/3567], Loss: 2.9446


Epoch 4/10:  22%|██▏       | 801/3568 [08:12<28:06,  1.64it/s]

Epoch [4/10], Step [801/3567], Loss: 2.9446


Epoch 4/10:  23%|██▎       | 811/3568 [08:18<27:50,  1.65it/s]

Epoch [4/10], Step [811/3567], Loss: 2.9446


Epoch 4/10:  23%|██▎       | 821/3568 [08:24<28:09,  1.63it/s]

Epoch [4/10], Step [821/3567], Loss: 2.9446


Epoch 4/10:  23%|██▎       | 831/3568 [08:30<28:04,  1.62it/s]

Epoch [4/10], Step [831/3567], Loss: 2.9446


Epoch 4/10:  24%|██▎       | 841/3568 [08:36<27:53,  1.63it/s]

Epoch [4/10], Step [841/3567], Loss: 2.9446


Epoch 4/10:  24%|██▍       | 851/3568 [08:42<27:42,  1.63it/s]

Epoch [4/10], Step [851/3567], Loss: 2.9446


Epoch 4/10:  24%|██▍       | 861/3568 [08:48<27:19,  1.65it/s]

Epoch [4/10], Step [861/3567], Loss: 2.9446


Epoch 4/10:  24%|██▍       | 871/3568 [08:55<27:54,  1.61it/s]

Epoch [4/10], Step [871/3567], Loss: 2.9446


Epoch 4/10:  25%|██▍       | 881/3568 [09:01<27:29,  1.63it/s]

Epoch [4/10], Step [881/3567], Loss: 2.9446


Epoch 4/10:  25%|██▍       | 891/3568 [09:07<27:37,  1.62it/s]

Epoch [4/10], Step [891/3567], Loss: 2.9446


Epoch 4/10:  25%|██▌       | 901/3568 [09:13<27:32,  1.61it/s]

Epoch [4/10], Step [901/3567], Loss: 2.9446


Epoch 4/10:  26%|██▌       | 911/3568 [09:19<27:06,  1.63it/s]

Epoch [4/10], Step [911/3567], Loss: 2.9446


Epoch 4/10:  26%|██▌       | 921/3568 [09:25<26:49,  1.64it/s]

Epoch [4/10], Step [921/3567], Loss: 2.9446


Epoch 4/10:  26%|██▌       | 931/3568 [09:32<27:37,  1.59it/s]

Epoch [4/10], Step [931/3567], Loss: 2.9446


Epoch 4/10:  26%|██▋       | 941/3568 [09:38<26:22,  1.66it/s]

Epoch [4/10], Step [941/3567], Loss: 2.9446


Epoch 4/10:  27%|██▋       | 951/3568 [09:44<26:15,  1.66it/s]

Epoch [4/10], Step [951/3567], Loss: 2.9446


Epoch 4/10:  27%|██▋       | 961/3568 [09:50<27:05,  1.60it/s]

Epoch [4/10], Step [961/3567], Loss: 2.9446


Epoch 4/10:  27%|██▋       | 971/3568 [09:56<26:33,  1.63it/s]

Epoch [4/10], Step [971/3567], Loss: 2.9446


Epoch 4/10:  27%|██▋       | 981/3568 [10:02<26:53,  1.60it/s]

Epoch [4/10], Step [981/3567], Loss: 2.9446


Epoch 4/10:  28%|██▊       | 991/3568 [10:08<26:45,  1.61it/s]

Epoch [4/10], Step [991/3567], Loss: 2.9446


Epoch 4/10:  28%|██▊       | 1001/3568 [10:15<25:56,  1.65it/s]

Epoch [4/10], Step [1001/3567], Loss: 2.9446


Epoch 4/10:  28%|██▊       | 1011/3568 [10:21<26:22,  1.62it/s]

Epoch [4/10], Step [1011/3567], Loss: 2.9446


Epoch 4/10:  29%|██▊       | 1021/3568 [10:27<26:06,  1.63it/s]

Epoch [4/10], Step [1021/3567], Loss: 2.9446


Epoch 4/10:  29%|██▉       | 1031/3568 [10:33<26:28,  1.60it/s]

Epoch [4/10], Step [1031/3567], Loss: 2.9446


Epoch 4/10:  29%|██▉       | 1041/3568 [10:39<26:42,  1.58it/s]

Epoch [4/10], Step [1041/3567], Loss: 2.9446


Epoch 4/10:  29%|██▉       | 1051/3568 [10:46<25:57,  1.62it/s]

Epoch [4/10], Step [1051/3567], Loss: 2.9446


Epoch 4/10:  30%|██▉       | 1061/3568 [10:52<26:23,  1.58it/s]

Epoch [4/10], Step [1061/3567], Loss: 2.9446


Epoch 4/10:  30%|███       | 1071/3568 [10:58<25:26,  1.64it/s]

Epoch [4/10], Step [1071/3567], Loss: 2.9446


Epoch 4/10:  30%|███       | 1081/3568 [11:04<26:06,  1.59it/s]

Epoch [4/10], Step [1081/3567], Loss: 2.9446


Epoch 4/10:  31%|███       | 1091/3568 [11:11<25:22,  1.63it/s]

Epoch [4/10], Step [1091/3567], Loss: 2.9446


Epoch 4/10:  31%|███       | 1101/3568 [11:17<25:58,  1.58it/s]

Epoch [4/10], Step [1101/3567], Loss: 2.9446


Epoch 4/10:  31%|███       | 1111/3568 [11:23<24:46,  1.65it/s]

Epoch [4/10], Step [1111/3567], Loss: 2.9446


Epoch 4/10:  31%|███▏      | 1121/3568 [11:29<25:02,  1.63it/s]

Epoch [4/10], Step [1121/3567], Loss: 2.9446


Epoch 4/10:  32%|███▏      | 1131/3568 [11:35<25:10,  1.61it/s]

Epoch [4/10], Step [1131/3567], Loss: 2.9446


Epoch 4/10:  32%|███▏      | 1141/3568 [11:42<25:15,  1.60it/s]

Epoch [4/10], Step [1141/3567], Loss: 2.9446


Epoch 4/10:  32%|███▏      | 1151/3568 [11:48<24:05,  1.67it/s]

Epoch [4/10], Step [1151/3567], Loss: 2.9446


Epoch 4/10:  33%|███▎      | 1161/3568 [11:54<24:32,  1.64it/s]

Epoch [4/10], Step [1161/3567], Loss: 2.9446


Epoch 4/10:  33%|███▎      | 1171/3568 [12:00<24:32,  1.63it/s]

Epoch [4/10], Step [1171/3567], Loss: 2.9446


Epoch 4/10:  33%|███▎      | 1181/3568 [12:06<24:22,  1.63it/s]

Epoch [4/10], Step [1181/3567], Loss: 2.9446


Epoch 4/10:  33%|███▎      | 1191/3568 [12:12<24:10,  1.64it/s]

Epoch [4/10], Step [1191/3567], Loss: 2.9446


Epoch 4/10:  34%|███▎      | 1201/3568 [12:18<24:17,  1.62it/s]

Epoch [4/10], Step [1201/3567], Loss: 2.9446


Epoch 4/10:  34%|███▍      | 1211/3568 [12:25<24:10,  1.63it/s]

Epoch [4/10], Step [1211/3567], Loss: 2.9446


Epoch 4/10:  34%|███▍      | 1221/3568 [12:31<24:09,  1.62it/s]

Epoch [4/10], Step [1221/3567], Loss: 2.9446


Epoch 4/10:  35%|███▍      | 1231/3568 [12:37<24:48,  1.57it/s]

Epoch [4/10], Step [1231/3567], Loss: 2.9446


Epoch 4/10:  35%|███▍      | 1241/3568 [12:43<23:29,  1.65it/s]

Epoch [4/10], Step [1241/3567], Loss: 2.9446


Epoch 4/10:  35%|███▌      | 1251/3568 [12:49<24:11,  1.60it/s]

Epoch [4/10], Step [1251/3567], Loss: 2.9446


Epoch 4/10:  35%|███▌      | 1261/3568 [12:56<23:56,  1.61it/s]

Epoch [4/10], Step [1261/3567], Loss: 2.9446


Epoch 4/10:  36%|███▌      | 1271/3568 [13:02<23:37,  1.62it/s]

Epoch [4/10], Step [1271/3567], Loss: 2.9446


Epoch 4/10:  36%|███▌      | 1281/3568 [13:08<23:24,  1.63it/s]

Epoch [4/10], Step [1281/3567], Loss: 2.9446


Epoch 4/10:  36%|███▌      | 1291/3568 [13:14<23:24,  1.62it/s]

Epoch [4/10], Step [1291/3567], Loss: 2.9446


Epoch 4/10:  36%|███▋      | 1301/3568 [13:20<23:14,  1.63it/s]

Epoch [4/10], Step [1301/3567], Loss: 2.9446


Epoch 4/10:  37%|███▋      | 1311/3568 [13:26<22:59,  1.64it/s]

Epoch [4/10], Step [1311/3567], Loss: 2.9446


Epoch 4/10:  37%|███▋      | 1321/3568 [13:33<22:59,  1.63it/s]

Epoch [4/10], Step [1321/3567], Loss: 2.9446


Epoch 4/10:  37%|███▋      | 1331/3568 [13:39<22:45,  1.64it/s]

Epoch [4/10], Step [1331/3567], Loss: 2.9446


Epoch 4/10:  38%|███▊      | 1341/3568 [13:45<23:27,  1.58it/s]

Epoch [4/10], Step [1341/3567], Loss: 2.9446


Epoch 4/10:  38%|███▊      | 1351/3568 [13:51<22:08,  1.67it/s]

Epoch [4/10], Step [1351/3567], Loss: 2.9446


Epoch 4/10:  38%|███▊      | 1361/3568 [13:57<22:31,  1.63it/s]

Epoch [4/10], Step [1361/3567], Loss: 2.9446


Epoch 4/10:  38%|███▊      | 1371/3568 [14:03<22:25,  1.63it/s]

Epoch [4/10], Step [1371/3567], Loss: 2.9446


Epoch 4/10:  39%|███▊      | 1381/3568 [14:09<22:16,  1.64it/s]

Epoch [4/10], Step [1381/3567], Loss: 2.9446


Epoch 4/10:  39%|███▉      | 1391/3568 [14:16<22:02,  1.65it/s]

Epoch [4/10], Step [1391/3567], Loss: 2.9446


Epoch 4/10:  39%|███▉      | 1401/3568 [14:22<22:17,  1.62it/s]

Epoch [4/10], Step [1401/3567], Loss: 2.9446


Epoch 4/10:  40%|███▉      | 1411/3568 [14:28<21:57,  1.64it/s]

Epoch [4/10], Step [1411/3567], Loss: 2.9446


Epoch 4/10:  40%|███▉      | 1421/3568 [14:34<21:44,  1.65it/s]

Epoch [4/10], Step [1421/3567], Loss: 2.9446


Epoch 4/10:  40%|████      | 1431/3568 [14:40<21:43,  1.64it/s]

Epoch [4/10], Step [1431/3567], Loss: 2.9446


Epoch 4/10:  40%|████      | 1441/3568 [14:46<21:30,  1.65it/s]

Epoch [4/10], Step [1441/3567], Loss: 2.9446


Epoch 4/10:  41%|████      | 1451/3568 [14:52<21:32,  1.64it/s]

Epoch [4/10], Step [1451/3567], Loss: 2.9446


Epoch 4/10:  41%|████      | 1461/3568 [14:59<21:46,  1.61it/s]

Epoch [4/10], Step [1461/3567], Loss: 2.9446


Epoch 4/10:  41%|████      | 1471/3568 [15:05<21:56,  1.59it/s]

Epoch [4/10], Step [1471/3567], Loss: 2.9446


Epoch 4/10:  42%|████▏     | 1481/3568 [15:11<21:36,  1.61it/s]

Epoch [4/10], Step [1481/3567], Loss: 2.9446


Epoch 4/10:  42%|████▏     | 1491/3568 [15:17<21:27,  1.61it/s]

Epoch [4/10], Step [1491/3567], Loss: 2.9446


Epoch 4/10:  42%|████▏     | 1501/3568 [15:23<21:17,  1.62it/s]

Epoch [4/10], Step [1501/3567], Loss: 2.9446


Epoch 4/10:  42%|████▏     | 1511/3568 [15:30<20:52,  1.64it/s]

Epoch [4/10], Step [1511/3567], Loss: 2.9446


Epoch 4/10:  43%|████▎     | 1521/3568 [15:36<20:47,  1.64it/s]

Epoch [4/10], Step [1521/3567], Loss: 2.9446


Epoch 4/10:  43%|████▎     | 1531/3568 [15:42<20:30,  1.66it/s]

Epoch [4/10], Step [1531/3567], Loss: 2.9446


Epoch 4/10:  43%|████▎     | 1541/3568 [15:48<20:39,  1.64it/s]

Epoch [4/10], Step [1541/3567], Loss: 2.9446


Epoch 4/10:  43%|████▎     | 1551/3568 [15:54<20:57,  1.60it/s]

Epoch [4/10], Step [1551/3567], Loss: 2.9446


Epoch 4/10:  44%|████▍     | 1561/3568 [16:00<20:40,  1.62it/s]

Epoch [4/10], Step [1561/3567], Loss: 2.9446


Epoch 4/10:  44%|████▍     | 1571/3568 [16:06<20:10,  1.65it/s]

Epoch [4/10], Step [1571/3567], Loss: 2.9446


Epoch 4/10:  44%|████▍     | 1581/3568 [16:12<21:07,  1.57it/s]

Epoch [4/10], Step [1581/3567], Loss: 2.9446


Epoch 4/10:  45%|████▍     | 1591/3568 [16:19<20:58,  1.57it/s]

Epoch [4/10], Step [1591/3567], Loss: 2.9446


Epoch 4/10:  45%|████▍     | 1601/3568 [16:25<19:56,  1.64it/s]

Epoch [4/10], Step [1601/3567], Loss: 2.9446


Epoch 4/10:  45%|████▌     | 1611/3568 [16:31<19:44,  1.65it/s]

Epoch [4/10], Step [1611/3567], Loss: 2.9446


Epoch 4/10:  45%|████▌     | 1621/3568 [16:37<20:51,  1.56it/s]

Epoch [4/10], Step [1621/3567], Loss: 2.9446


Epoch 4/10:  46%|████▌     | 1631/3568 [16:43<20:33,  1.57it/s]

Epoch [4/10], Step [1631/3567], Loss: 2.9446


Epoch 4/10:  46%|████▌     | 1641/3568 [16:49<19:25,  1.65it/s]

Epoch [4/10], Step [1641/3567], Loss: 2.9446


Epoch 4/10:  46%|████▋     | 1651/3568 [16:56<19:30,  1.64it/s]

Epoch [4/10], Step [1651/3567], Loss: 2.9446


Epoch 4/10:  47%|████▋     | 1661/3568 [17:02<19:22,  1.64it/s]

Epoch [4/10], Step [1661/3567], Loss: 2.9446


Epoch 4/10:  47%|████▋     | 1671/3568 [17:08<19:35,  1.61it/s]

Epoch [4/10], Step [1671/3567], Loss: 2.9446


Epoch 4/10:  47%|████▋     | 1681/3568 [17:14<19:02,  1.65it/s]

Epoch [4/10], Step [1681/3567], Loss: 2.9446


Epoch 4/10:  47%|████▋     | 1691/3568 [17:20<18:47,  1.66it/s]

Epoch [4/10], Step [1691/3567], Loss: 2.9446


Epoch 4/10:  48%|████▊     | 1701/3568 [17:26<19:18,  1.61it/s]

Epoch [4/10], Step [1701/3567], Loss: 2.9446


Epoch 4/10:  48%|████▊     | 1711/3568 [17:32<19:04,  1.62it/s]

Epoch [4/10], Step [1711/3567], Loss: 2.9446


Epoch 4/10:  48%|████▊     | 1721/3568 [17:38<18:40,  1.65it/s]

Epoch [4/10], Step [1721/3567], Loss: 2.9446


Epoch 4/10:  49%|████▊     | 1731/3568 [17:45<18:58,  1.61it/s]

Epoch [4/10], Step [1731/3567], Loss: 2.9446


Epoch 4/10:  49%|████▉     | 1741/3568 [17:51<18:51,  1.61it/s]

Epoch [4/10], Step [1741/3567], Loss: 2.9446


Epoch 4/10:  49%|████▉     | 1751/3568 [17:57<18:33,  1.63it/s]

Epoch [4/10], Step [1751/3567], Loss: 2.9446


Epoch 4/10:  49%|████▉     | 1761/3568 [18:03<18:41,  1.61it/s]

Epoch [4/10], Step [1761/3567], Loss: 2.9446


Epoch 4/10:  50%|████▉     | 1771/3568 [18:09<19:09,  1.56it/s]

Epoch [4/10], Step [1771/3567], Loss: 2.9446


Epoch 4/10:  50%|████▉     | 1781/3568 [18:16<18:10,  1.64it/s]

Epoch [4/10], Step [1781/3567], Loss: 2.9446


Epoch 4/10:  50%|█████     | 1791/3568 [18:22<19:18,  1.53it/s]

Epoch [4/10], Step [1791/3567], Loss: 2.9446


Epoch 4/10:  50%|█████     | 1801/3568 [18:28<18:08,  1.62it/s]

Epoch [4/10], Step [1801/3567], Loss: 2.9446


Epoch 4/10:  51%|█████     | 1811/3568 [18:34<18:02,  1.62it/s]

Epoch [4/10], Step [1811/3567], Loss: 2.9446


Epoch 4/10:  51%|█████     | 1821/3568 [18:40<17:42,  1.64it/s]

Epoch [4/10], Step [1821/3567], Loss: 2.9446


Epoch 4/10:  51%|█████▏    | 1831/3568 [18:47<17:56,  1.61it/s]

Epoch [4/10], Step [1831/3567], Loss: 2.9446


Epoch 4/10:  52%|█████▏    | 1841/3568 [18:53<17:36,  1.64it/s]

Epoch [4/10], Step [1841/3567], Loss: 2.9446


Epoch 4/10:  52%|█████▏    | 1851/3568 [18:59<17:31,  1.63it/s]

Epoch [4/10], Step [1851/3567], Loss: 2.9446


Epoch 4/10:  52%|█████▏    | 1861/3568 [19:05<17:27,  1.63it/s]

Epoch [4/10], Step [1861/3567], Loss: 2.9446


Epoch 4/10:  52%|█████▏    | 1871/3568 [19:11<17:12,  1.64it/s]

Epoch [4/10], Step [1871/3567], Loss: 2.9446


Epoch 4/10:  53%|█████▎    | 1881/3568 [19:17<17:27,  1.61it/s]

Epoch [4/10], Step [1881/3567], Loss: 2.9446


Epoch 4/10:  53%|█████▎    | 1891/3568 [19:23<17:01,  1.64it/s]

Epoch [4/10], Step [1891/3567], Loss: 2.9446


Epoch 4/10:  53%|█████▎    | 1901/3568 [19:29<17:07,  1.62it/s]

Epoch [4/10], Step [1901/3567], Loss: 2.9446


Epoch 4/10:  54%|█████▎    | 1911/3568 [19:36<17:04,  1.62it/s]

Epoch [4/10], Step [1911/3567], Loss: 2.9446


Epoch 4/10:  54%|█████▍    | 1921/3568 [19:42<16:46,  1.64it/s]

Epoch [4/10], Step [1921/3567], Loss: 2.9446


Epoch 4/10:  54%|█████▍    | 1931/3568 [19:48<16:40,  1.64it/s]

Epoch [4/10], Step [1931/3567], Loss: 2.9446


Epoch 4/10:  54%|█████▍    | 1941/3568 [19:54<16:38,  1.63it/s]

Epoch [4/10], Step [1941/3567], Loss: 2.9446


Epoch 4/10:  55%|█████▍    | 1951/3568 [20:00<16:23,  1.64it/s]

Epoch [4/10], Step [1951/3567], Loss: 2.9446


Epoch 4/10:  55%|█████▍    | 1961/3568 [20:06<16:25,  1.63it/s]

Epoch [4/10], Step [1961/3567], Loss: 2.9446


Epoch 4/10:  55%|█████▌    | 1971/3568 [20:12<16:23,  1.62it/s]

Epoch [4/10], Step [1971/3567], Loss: 2.9446


Epoch 4/10:  56%|█████▌    | 1981/3568 [20:18<16:05,  1.64it/s]

Epoch [4/10], Step [1981/3567], Loss: 2.9446


Epoch 4/10:  56%|█████▌    | 1991/3568 [20:25<15:56,  1.65it/s]

Epoch [4/10], Step [1991/3567], Loss: 2.9446


Epoch 4/10:  56%|█████▌    | 2001/3568 [20:31<15:59,  1.63it/s]

Epoch [4/10], Step [2001/3567], Loss: 2.9446


Epoch 4/10:  56%|█████▋    | 2011/3568 [20:37<16:30,  1.57it/s]

Epoch [4/10], Step [2011/3567], Loss: 2.9446


Epoch 4/10:  57%|█████▋    | 2021/3568 [20:43<15:37,  1.65it/s]

Epoch [4/10], Step [2021/3567], Loss: 2.9446


Epoch 4/10:  57%|█████▋    | 2031/3568 [20:49<15:44,  1.63it/s]

Epoch [4/10], Step [2031/3567], Loss: 2.9446


Epoch 4/10:  57%|█████▋    | 2041/3568 [20:55<15:26,  1.65it/s]

Epoch [4/10], Step [2041/3567], Loss: 2.9446


Epoch 4/10:  57%|█████▋    | 2051/3568 [21:01<15:29,  1.63it/s]

Epoch [4/10], Step [2051/3567], Loss: 2.9446


Epoch 4/10:  58%|█████▊    | 2061/3568 [21:08<15:15,  1.65it/s]

Epoch [4/10], Step [2061/3567], Loss: 2.9446


Epoch 4/10:  58%|█████▊    | 2071/3568 [21:14<15:16,  1.63it/s]

Epoch [4/10], Step [2071/3567], Loss: 2.9446


Epoch 4/10:  58%|█████▊    | 2081/3568 [21:20<15:22,  1.61it/s]

Epoch [4/10], Step [2081/3567], Loss: 2.9446


Epoch 4/10:  59%|█████▊    | 2091/3568 [21:26<15:04,  1.63it/s]

Epoch [4/10], Step [2091/3567], Loss: 2.9446


Epoch 4/10:  59%|█████▉    | 2101/3568 [21:32<15:04,  1.62it/s]

Epoch [4/10], Step [2101/3567], Loss: 2.9446


Epoch 4/10:  59%|█████▉    | 2111/3568 [21:38<14:36,  1.66it/s]

Epoch [4/10], Step [2111/3567], Loss: 2.9446


Epoch 4/10:  59%|█████▉    | 2121/3568 [21:44<14:58,  1.61it/s]

Epoch [4/10], Step [2121/3567], Loss: 2.9446


Epoch 4/10:  60%|█████▉    | 2131/3568 [21:51<14:27,  1.66it/s]

Epoch [4/10], Step [2131/3567], Loss: 2.9446


Epoch 4/10:  60%|██████    | 2141/3568 [21:57<14:31,  1.64it/s]

Epoch [4/10], Step [2141/3567], Loss: 2.9446


Epoch 4/10:  60%|██████    | 2151/3568 [22:03<14:30,  1.63it/s]

Epoch [4/10], Step [2151/3567], Loss: 2.9446


Epoch 4/10:  61%|██████    | 2161/3568 [22:09<14:21,  1.63it/s]

Epoch [4/10], Step [2161/3567], Loss: 2.9446


Epoch 4/10:  61%|██████    | 2171/3568 [22:15<14:02,  1.66it/s]

Epoch [4/10], Step [2171/3567], Loss: 2.9446


Epoch 4/10:  61%|██████    | 2181/3568 [22:21<14:10,  1.63it/s]

Epoch [4/10], Step [2181/3567], Loss: 2.9446


Epoch 4/10:  61%|██████▏   | 2191/3568 [22:27<13:57,  1.65it/s]

Epoch [4/10], Step [2191/3567], Loss: 2.9446


Epoch 4/10:  62%|██████▏   | 2201/3568 [22:33<13:58,  1.63it/s]

Epoch [4/10], Step [2201/3567], Loss: 2.9446


Epoch 4/10:  62%|██████▏   | 2211/3568 [22:40<13:46,  1.64it/s]

Epoch [4/10], Step [2211/3567], Loss: 2.9446


Epoch 4/10:  62%|██████▏   | 2221/3568 [22:46<13:47,  1.63it/s]

Epoch [4/10], Step [2221/3567], Loss: 2.9446


Epoch 4/10:  63%|██████▎   | 2231/3568 [22:52<13:34,  1.64it/s]

Epoch [4/10], Step [2231/3567], Loss: 2.9446


Epoch 4/10:  63%|██████▎   | 2241/3568 [22:58<13:24,  1.65it/s]

Epoch [4/10], Step [2241/3567], Loss: 2.9446


Epoch 4/10:  63%|██████▎   | 2251/3568 [23:04<13:14,  1.66it/s]

Epoch [4/10], Step [2251/3567], Loss: 2.9446


Epoch 4/10:  63%|██████▎   | 2261/3568 [23:10<13:27,  1.62it/s]

Epoch [4/10], Step [2261/3567], Loss: 2.9446


Epoch 4/10:  64%|██████▎   | 2271/3568 [23:16<13:23,  1.61it/s]

Epoch [4/10], Step [2271/3567], Loss: 2.9446


Epoch 4/10:  64%|██████▍   | 2281/3568 [23:23<13:06,  1.64it/s]

Epoch [4/10], Step [2281/3567], Loss: 2.9446


Epoch 4/10:  64%|██████▍   | 2291/3568 [23:29<13:05,  1.63it/s]

Epoch [4/10], Step [2291/3567], Loss: 2.9446


Epoch 4/10:  64%|██████▍   | 2301/3568 [23:35<12:56,  1.63it/s]

Epoch [4/10], Step [2301/3567], Loss: 2.9446


Epoch 4/10:  65%|██████▍   | 2311/3568 [23:41<12:49,  1.63it/s]

Epoch [4/10], Step [2311/3567], Loss: 2.9446


Epoch 4/10:  65%|██████▌   | 2321/3568 [23:47<12:38,  1.64it/s]

Epoch [4/10], Step [2321/3567], Loss: 2.9446


Epoch 4/10:  65%|██████▌   | 2331/3568 [23:53<12:29,  1.65it/s]

Epoch [4/10], Step [2331/3567], Loss: 2.9446


Epoch 4/10:  66%|██████▌   | 2341/3568 [23:59<12:31,  1.63it/s]

Epoch [4/10], Step [2341/3567], Loss: 2.9446


Epoch 4/10:  66%|██████▌   | 2351/3568 [24:06<12:29,  1.62it/s]

Epoch [4/10], Step [2351/3567], Loss: 2.9446


Epoch 4/10:  66%|██████▌   | 2361/3568 [24:12<12:30,  1.61it/s]

Epoch [4/10], Step [2361/3567], Loss: 2.9446


Epoch 4/10:  66%|██████▋   | 2371/3568 [24:18<12:14,  1.63it/s]

Epoch [4/10], Step [2371/3567], Loss: 2.9446


Epoch 4/10:  67%|██████▋   | 2381/3568 [24:24<12:20,  1.60it/s]

Epoch [4/10], Step [2381/3567], Loss: 2.9446


Epoch 4/10:  67%|██████▋   | 2391/3568 [24:31<11:58,  1.64it/s]

Epoch [4/10], Step [2391/3567], Loss: 2.9446


Epoch 4/10:  67%|██████▋   | 2401/3568 [24:37<11:54,  1.63it/s]

Epoch [4/10], Step [2401/3567], Loss: 2.9446


Epoch 4/10:  68%|██████▊   | 2411/3568 [24:43<11:41,  1.65it/s]

Epoch [4/10], Step [2411/3567], Loss: 2.9446


Epoch 4/10:  68%|██████▊   | 2421/3568 [24:49<11:46,  1.62it/s]

Epoch [4/10], Step [2421/3567], Loss: 2.9446


Epoch 4/10:  68%|██████▊   | 2431/3568 [24:55<11:31,  1.65it/s]

Epoch [4/10], Step [2431/3567], Loss: 2.9446


Epoch 4/10:  68%|██████▊   | 2441/3568 [25:01<11:28,  1.64it/s]

Epoch [4/10], Step [2441/3567], Loss: 2.9446


Epoch 4/10:  69%|██████▊   | 2451/3568 [25:07<11:31,  1.61it/s]

Epoch [4/10], Step [2451/3567], Loss: 2.9446


Epoch 4/10:  69%|██████▉   | 2461/3568 [25:13<11:25,  1.61it/s]

Epoch [4/10], Step [2461/3567], Loss: 2.9446


Epoch 4/10:  69%|██████▉   | 2471/3568 [25:20<11:13,  1.63it/s]

Epoch [4/10], Step [2471/3567], Loss: 2.9446


Epoch 4/10:  70%|██████▉   | 2481/3568 [25:26<12:08,  1.49it/s]

Epoch [4/10], Step [2481/3567], Loss: 2.9446


Epoch 4/10:  70%|██████▉   | 2491/3568 [25:32<11:12,  1.60it/s]

Epoch [4/10], Step [2491/3567], Loss: 2.9446


Epoch 4/10:  70%|███████   | 2501/3568 [25:38<11:00,  1.62it/s]

Epoch [4/10], Step [2501/3567], Loss: 2.9446


Epoch 4/10:  70%|███████   | 2511/3568 [25:45<10:48,  1.63it/s]

Epoch [4/10], Step [2511/3567], Loss: 2.9446


Epoch 4/10:  71%|███████   | 2521/3568 [25:51<10:55,  1.60it/s]

Epoch [4/10], Step [2521/3567], Loss: 2.9446


Epoch 4/10:  71%|███████   | 2531/3568 [25:57<10:28,  1.65it/s]

Epoch [4/10], Step [2531/3567], Loss: 2.9446


Epoch 4/10:  71%|███████   | 2541/3568 [26:03<10:34,  1.62it/s]

Epoch [4/10], Step [2541/3567], Loss: 2.9446


Epoch 4/10:  71%|███████▏  | 2551/3568 [26:09<10:35,  1.60it/s]

Epoch [4/10], Step [2551/3567], Loss: 2.9446


Epoch 4/10:  72%|███████▏  | 2561/3568 [26:15<10:22,  1.62it/s]

Epoch [4/10], Step [2561/3567], Loss: 2.9446


Epoch 4/10:  72%|███████▏  | 2571/3568 [26:21<10:08,  1.64it/s]

Epoch [4/10], Step [2571/3567], Loss: 2.9446


Epoch 4/10:  72%|███████▏  | 2581/3568 [26:27<10:04,  1.63it/s]

Epoch [4/10], Step [2581/3567], Loss: 2.9446


Epoch 4/10:  73%|███████▎  | 2591/3568 [26:34<09:52,  1.65it/s]

Epoch [4/10], Step [2591/3567], Loss: 2.9446


Epoch 4/10:  73%|███████▎  | 2601/3568 [26:40<09:52,  1.63it/s]

Epoch [4/10], Step [2601/3567], Loss: 2.9446


Epoch 4/10:  73%|███████▎  | 2611/3568 [26:46<09:42,  1.64it/s]

Epoch [4/10], Step [2611/3567], Loss: 2.9446


Epoch 4/10:  73%|███████▎  | 2621/3568 [26:52<09:46,  1.61it/s]

Epoch [4/10], Step [2621/3567], Loss: 2.9446


Epoch 4/10:  74%|███████▎  | 2631/3568 [26:58<09:35,  1.63it/s]

Epoch [4/10], Step [2631/3567], Loss: 2.9446


Epoch 4/10:  74%|███████▍  | 2641/3568 [27:04<09:21,  1.65it/s]

Epoch [4/10], Step [2641/3567], Loss: 2.9446


Epoch 4/10:  74%|███████▍  | 2651/3568 [27:10<09:20,  1.64it/s]

Epoch [4/10], Step [2651/3567], Loss: 2.9446


Epoch 4/10:  75%|███████▍  | 2661/3568 [27:17<09:16,  1.63it/s]

Epoch [4/10], Step [2661/3567], Loss: 2.9446


Epoch 4/10:  75%|███████▍  | 2671/3568 [27:23<09:19,  1.60it/s]

Epoch [4/10], Step [2671/3567], Loss: 2.9446


Epoch 4/10:  75%|███████▌  | 2681/3568 [27:29<09:10,  1.61it/s]

Epoch [4/10], Step [2681/3567], Loss: 2.9446


Epoch 4/10:  75%|███████▌  | 2691/3568 [27:35<09:10,  1.59it/s]

Epoch [4/10], Step [2691/3567], Loss: 2.9446


Epoch 4/10:  76%|███████▌  | 2701/3568 [27:41<08:43,  1.66it/s]

Epoch [4/10], Step [2701/3567], Loss: 2.9446


Epoch 4/10:  76%|███████▌  | 2711/3568 [27:47<08:41,  1.64it/s]

Epoch [4/10], Step [2711/3567], Loss: 2.9446


Epoch 4/10:  76%|███████▋  | 2721/3568 [27:53<08:45,  1.61it/s]

Epoch [4/10], Step [2721/3567], Loss: 2.9446


Epoch 4/10:  77%|███████▋  | 2731/3568 [28:00<09:13,  1.51it/s]

Epoch [4/10], Step [2731/3567], Loss: 2.9446


Epoch 4/10:  77%|███████▋  | 2741/3568 [28:06<08:35,  1.60it/s]

Epoch [4/10], Step [2741/3567], Loss: 2.9446


Epoch 4/10:  77%|███████▋  | 2751/3568 [28:12<08:23,  1.62it/s]

Epoch [4/10], Step [2751/3567], Loss: 2.9446


Epoch 4/10:  77%|███████▋  | 2761/3568 [28:19<08:22,  1.61it/s]

Epoch [4/10], Step [2761/3567], Loss: 2.9446


Epoch 4/10:  78%|███████▊  | 2771/3568 [28:25<08:33,  1.55it/s]

Epoch [4/10], Step [2771/3567], Loss: 2.9446


Epoch 4/10:  78%|███████▊  | 2781/3568 [28:31<08:08,  1.61it/s]

Epoch [4/10], Step [2781/3567], Loss: 2.9446


Epoch 4/10:  78%|███████▊  | 2791/3568 [28:37<07:57,  1.63it/s]

Epoch [4/10], Step [2791/3567], Loss: 2.9446


Epoch 4/10:  79%|███████▊  | 2801/3568 [28:43<07:49,  1.63it/s]

Epoch [4/10], Step [2801/3567], Loss: 2.9446


Epoch 4/10:  79%|███████▉  | 2811/3568 [28:49<07:43,  1.63it/s]

Epoch [4/10], Step [2811/3567], Loss: 2.9446


Epoch 4/10:  79%|███████▉  | 2821/3568 [28:56<07:37,  1.63it/s]

Epoch [4/10], Step [2821/3567], Loss: 2.9446


Epoch 4/10:  79%|███████▉  | 2831/3568 [29:02<07:21,  1.67it/s]

Epoch [4/10], Step [2831/3567], Loss: 2.9446


Epoch 4/10:  80%|███████▉  | 2841/3568 [29:08<07:34,  1.60it/s]

Epoch [4/10], Step [2841/3567], Loss: 2.9446


Epoch 4/10:  80%|███████▉  | 2851/3568 [29:14<07:45,  1.54it/s]

Epoch [4/10], Step [2851/3567], Loss: 2.9446


Epoch 4/10:  80%|████████  | 2861/3568 [29:20<07:08,  1.65it/s]

Epoch [4/10], Step [2861/3567], Loss: 2.9446


Epoch 4/10:  80%|████████  | 2871/3568 [29:26<07:10,  1.62it/s]

Epoch [4/10], Step [2871/3567], Loss: 2.9446


Epoch 4/10:  81%|████████  | 2881/3568 [29:32<07:03,  1.62it/s]

Epoch [4/10], Step [2881/3567], Loss: 2.9446


Epoch 4/10:  81%|████████  | 2891/3568 [29:38<06:54,  1.63it/s]

Epoch [4/10], Step [2891/3567], Loss: 2.9446


Epoch 4/10:  81%|████████▏ | 2901/3568 [29:45<06:47,  1.64it/s]

Epoch [4/10], Step [2901/3567], Loss: 2.9446


Epoch 4/10:  82%|████████▏ | 2911/3568 [29:51<06:47,  1.61it/s]

Epoch [4/10], Step [2911/3567], Loss: 2.9446


Epoch 4/10:  82%|████████▏ | 2921/3568 [29:57<06:39,  1.62it/s]

Epoch [4/10], Step [2921/3567], Loss: 2.9446


Epoch 4/10:  82%|████████▏ | 2931/3568 [30:03<06:30,  1.63it/s]

Epoch [4/10], Step [2931/3567], Loss: 2.9446


Epoch 4/10:  82%|████████▏ | 2941/3568 [30:09<06:28,  1.61it/s]

Epoch [4/10], Step [2941/3567], Loss: 2.9446


Epoch 4/10:  83%|████████▎ | 2951/3568 [30:15<06:16,  1.64it/s]

Epoch [4/10], Step [2951/3567], Loss: 2.9446


Epoch 4/10:  83%|████████▎ | 2961/3568 [30:21<06:03,  1.67it/s]

Epoch [4/10], Step [2961/3567], Loss: 2.9446


Epoch 4/10:  83%|████████▎ | 2971/3568 [30:28<06:09,  1.61it/s]

Epoch [4/10], Step [2971/3567], Loss: 2.9446


Epoch 4/10:  84%|████████▎ | 2981/3568 [30:34<06:00,  1.63it/s]

Epoch [4/10], Step [2981/3567], Loss: 2.9446


Epoch 4/10:  84%|████████▍ | 2991/3568 [30:40<05:50,  1.65it/s]

Epoch [4/10], Step [2991/3567], Loss: 2.9446


Epoch 4/10:  84%|████████▍ | 3001/3568 [30:46<05:50,  1.62it/s]

Epoch [4/10], Step [3001/3567], Loss: 2.9446


Epoch 4/10:  84%|████████▍ | 3011/3568 [30:52<05:43,  1.62it/s]

Epoch [4/10], Step [3011/3567], Loss: 2.9446


Epoch 4/10:  85%|████████▍ | 3021/3568 [30:58<05:29,  1.66it/s]

Epoch [4/10], Step [3021/3567], Loss: 2.9446


Epoch 4/10:  85%|████████▍ | 3031/3568 [31:04<05:33,  1.61it/s]

Epoch [4/10], Step [3031/3567], Loss: 2.9446


Epoch 4/10:  85%|████████▌ | 3041/3568 [31:10<05:29,  1.60it/s]

Epoch [4/10], Step [3041/3567], Loss: 2.9446


Epoch 4/10:  86%|████████▌ | 3051/3568 [31:17<05:20,  1.61it/s]

Epoch [4/10], Step [3051/3567], Loss: 2.9446


Epoch 4/10:  86%|████████▌ | 3061/3568 [31:23<05:08,  1.64it/s]

Epoch [4/10], Step [3061/3567], Loss: 2.9446


Epoch 4/10:  86%|████████▌ | 3071/3568 [31:29<05:00,  1.65it/s]

Epoch [4/10], Step [3071/3567], Loss: 2.9446


Epoch 4/10:  86%|████████▋ | 3081/3568 [31:35<05:00,  1.62it/s]

Epoch [4/10], Step [3081/3567], Loss: 2.9446


Epoch 4/10:  87%|████████▋ | 3091/3568 [31:41<05:02,  1.58it/s]

Epoch [4/10], Step [3091/3567], Loss: 2.9446


Epoch 4/10:  87%|████████▋ | 3101/3568 [31:47<04:51,  1.60it/s]

Epoch [4/10], Step [3101/3567], Loss: 2.9446


Epoch 4/10:  87%|████████▋ | 3111/3568 [31:54<04:40,  1.63it/s]

Epoch [4/10], Step [3111/3567], Loss: 2.9446


Epoch 4/10:  87%|████████▋ | 3121/3568 [32:00<04:32,  1.64it/s]

Epoch [4/10], Step [3121/3567], Loss: 2.9446


Epoch 4/10:  88%|████████▊ | 3131/3568 [32:06<04:30,  1.61it/s]

Epoch [4/10], Step [3131/3567], Loss: 2.9446


Epoch 4/10:  88%|████████▊ | 3141/3568 [32:12<04:23,  1.62it/s]

Epoch [4/10], Step [3141/3567], Loss: 2.9446


Epoch 4/10:  88%|████████▊ | 3151/3568 [32:18<04:14,  1.64it/s]

Epoch [4/10], Step [3151/3567], Loss: 2.9446


Epoch 4/10:  89%|████████▊ | 3161/3568 [32:24<04:14,  1.60it/s]

Epoch [4/10], Step [3161/3567], Loss: 2.9446


Epoch 4/10:  89%|████████▉ | 3171/3568 [32:31<04:02,  1.64it/s]

Epoch [4/10], Step [3171/3567], Loss: 2.9446


Epoch 4/10:  89%|████████▉ | 3181/3568 [32:37<03:58,  1.62it/s]

Epoch [4/10], Step [3181/3567], Loss: 2.9446


Epoch 4/10:  89%|████████▉ | 3191/3568 [32:43<03:54,  1.61it/s]

Epoch [4/10], Step [3191/3567], Loss: 2.9446


Epoch 4/10:  90%|████████▉ | 3201/3568 [32:49<03:44,  1.63it/s]

Epoch [4/10], Step [3201/3567], Loss: 2.9446


Epoch 4/10:  90%|████████▉ | 3211/3568 [32:55<03:38,  1.64it/s]

Epoch [4/10], Step [3211/3567], Loss: 2.9446


Epoch 4/10:  90%|█████████ | 3221/3568 [33:01<03:31,  1.64it/s]

Epoch [4/10], Step [3221/3567], Loss: 2.9446


Epoch 4/10:  91%|█████████ | 3231/3568 [33:08<03:26,  1.63it/s]

Epoch [4/10], Step [3231/3567], Loss: 2.9446


Epoch 4/10:  91%|█████████ | 3241/3568 [33:14<03:26,  1.58it/s]

Epoch [4/10], Step [3241/3567], Loss: 2.9446


Epoch 4/10:  91%|█████████ | 3251/3568 [33:20<03:13,  1.64it/s]

Epoch [4/10], Step [3251/3567], Loss: 2.9446


Epoch 4/10:  91%|█████████▏| 3261/3568 [33:26<03:10,  1.61it/s]

Epoch [4/10], Step [3261/3567], Loss: 2.9446


Epoch 4/10:  92%|█████████▏| 3271/3568 [33:32<03:02,  1.63it/s]

Epoch [4/10], Step [3271/3567], Loss: 2.9446


Epoch 4/10:  92%|█████████▏| 3281/3568 [33:38<02:56,  1.63it/s]

Epoch [4/10], Step [3281/3567], Loss: 2.9446


Epoch 4/10:  92%|█████████▏| 3291/3568 [33:44<02:48,  1.64it/s]

Epoch [4/10], Step [3291/3567], Loss: 2.9446


Epoch 4/10:  93%|█████████▎| 3301/3568 [33:51<02:46,  1.60it/s]

Epoch [4/10], Step [3301/3567], Loss: 2.9446


Epoch 4/10:  93%|█████████▎| 3311/3568 [33:57<02:38,  1.63it/s]

Epoch [4/10], Step [3311/3567], Loss: 2.9446


Epoch 4/10:  93%|█████████▎| 3321/3568 [34:03<02:30,  1.64it/s]

Epoch [4/10], Step [3321/3567], Loss: 2.9446


Epoch 4/10:  93%|█████████▎| 3331/3568 [34:09<02:27,  1.61it/s]

Epoch [4/10], Step [3331/3567], Loss: 2.9446


Epoch 4/10:  94%|█████████▎| 3341/3568 [34:15<02:16,  1.66it/s]

Epoch [4/10], Step [3341/3567], Loss: 2.9446


Epoch 4/10:  94%|█████████▍| 3351/3568 [34:22<02:16,  1.58it/s]

Epoch [4/10], Step [3351/3567], Loss: 2.9446


Epoch 4/10:  94%|█████████▍| 3361/3568 [34:28<02:05,  1.65it/s]

Epoch [4/10], Step [3361/3567], Loss: 2.9446


Epoch 4/10:  94%|█████████▍| 3371/3568 [34:34<01:58,  1.66it/s]

Epoch [4/10], Step [3371/3567], Loss: 2.9446


Epoch 4/10:  95%|█████████▍| 3381/3568 [34:40<01:53,  1.64it/s]

Epoch [4/10], Step [3381/3567], Loss: 2.9446


Epoch 4/10:  95%|█████████▌| 3391/3568 [34:46<01:49,  1.61it/s]

Epoch [4/10], Step [3391/3567], Loss: 2.9446


Epoch 4/10:  95%|█████████▌| 3401/3568 [34:52<01:43,  1.61it/s]

Epoch [4/10], Step [3401/3567], Loss: 2.9446


Epoch 4/10:  96%|█████████▌| 3411/3568 [34:58<01:35,  1.64it/s]

Epoch [4/10], Step [3411/3567], Loss: 2.9446


Epoch 4/10:  96%|█████████▌| 3421/3568 [35:05<01:29,  1.64it/s]

Epoch [4/10], Step [3421/3567], Loss: 2.9446


Epoch 4/10:  96%|█████████▌| 3431/3568 [35:11<01:25,  1.61it/s]

Epoch [4/10], Step [3431/3567], Loss: 2.9446


Epoch 4/10:  96%|█████████▋| 3441/3568 [35:17<01:16,  1.66it/s]

Epoch [4/10], Step [3441/3567], Loss: 2.9446


Epoch 4/10:  97%|█████████▋| 3451/3568 [35:23<01:11,  1.63it/s]

Epoch [4/10], Step [3451/3567], Loss: 2.9446


Epoch 4/10:  97%|█████████▋| 3461/3568 [35:29<01:05,  1.64it/s]

Epoch [4/10], Step [3461/3567], Loss: 2.9446


Epoch 4/10:  97%|█████████▋| 3471/3568 [35:35<00:58,  1.65it/s]

Epoch [4/10], Step [3471/3567], Loss: 2.9446


Epoch 4/10:  98%|█████████▊| 3481/3568 [35:42<00:54,  1.61it/s]

Epoch [4/10], Step [3481/3567], Loss: 2.9446


Epoch 4/10:  98%|█████████▊| 3491/3568 [35:48<00:46,  1.64it/s]

Epoch [4/10], Step [3491/3567], Loss: 2.9446


Epoch 4/10:  98%|█████████▊| 3501/3568 [35:54<00:40,  1.65it/s]

Epoch [4/10], Step [3501/3567], Loss: 2.9446


Epoch 4/10:  98%|█████████▊| 3511/3568 [36:00<00:34,  1.63it/s]

Epoch [4/10], Step [3511/3567], Loss: 2.9446


Epoch 4/10:  99%|█████████▊| 3521/3568 [36:06<00:28,  1.63it/s]

Epoch [4/10], Step [3521/3567], Loss: 2.9446


Epoch 4/10:  99%|█████████▉| 3531/3568 [36:12<00:22,  1.65it/s]

Epoch [4/10], Step [3531/3567], Loss: 2.9446


Epoch 4/10:  99%|█████████▉| 3541/3568 [36:18<00:16,  1.63it/s]

Epoch [4/10], Step [3541/3567], Loss: 2.9446


Epoch 4/10: 100%|█████████▉| 3551/3568 [36:24<00:10,  1.63it/s]

Epoch [4/10], Step [3551/3567], Loss: 2.9446


Epoch 4/10: 100%|█████████▉| 3561/3568 [36:30<00:04,  1.63it/s]

Epoch [4/10], Step [3561/3567], Loss: 2.9446


Epoch 4/10: 100%|█████████▉| 3567/3568 [36:34<00:00,  1.63it/s]


Epoch [4/10] Average Loss: 2.9446


Epoch 5/10:   0%|          | 1/3568 [00:00<37:04,  1.60it/s]

Epoch [5/10], Step [1/3567], Loss: 2.9446


Epoch 5/10:   0%|          | 11/3568 [00:06<36:42,  1.61it/s]

Epoch [5/10], Step [11/3567], Loss: 2.9446


Epoch 5/10:   1%|          | 21/3568 [00:13<37:46,  1.57it/s]

Epoch [5/10], Step [21/3567], Loss: 2.9446


Epoch 5/10:   1%|          | 31/3568 [00:19<35:31,  1.66it/s]

Epoch [5/10], Step [31/3567], Loss: 2.9446


Epoch 5/10:   1%|          | 41/3568 [00:25<35:43,  1.65it/s]

Epoch [5/10], Step [41/3567], Loss: 2.9446


Epoch 5/10:   1%|▏         | 51/3568 [00:31<35:39,  1.64it/s]

Epoch [5/10], Step [51/3567], Loss: 2.9446


Epoch 5/10:   2%|▏         | 61/3568 [00:37<35:48,  1.63it/s]

Epoch [5/10], Step [61/3567], Loss: 2.9446


Epoch 5/10:   2%|▏         | 71/3568 [00:43<35:25,  1.65it/s]

Epoch [5/10], Step [71/3567], Loss: 2.9446


Epoch 5/10:   2%|▏         | 81/3568 [00:49<36:28,  1.59it/s]

Epoch [5/10], Step [81/3567], Loss: 2.9446


Epoch 5/10:   3%|▎         | 91/3568 [00:55<35:13,  1.64it/s]

Epoch [5/10], Step [91/3567], Loss: 2.9446


Epoch 5/10:   3%|▎         | 101/3568 [01:02<35:28,  1.63it/s]

Epoch [5/10], Step [101/3567], Loss: 2.9446


Epoch 5/10:   3%|▎         | 111/3568 [01:08<35:25,  1.63it/s]

Epoch [5/10], Step [111/3567], Loss: 2.9446


Epoch 5/10:   3%|▎         | 121/3568 [01:14<35:38,  1.61it/s]

Epoch [5/10], Step [121/3567], Loss: 2.9446


Epoch 5/10:   4%|▎         | 131/3568 [01:20<35:01,  1.64it/s]

Epoch [5/10], Step [131/3567], Loss: 2.9446


Epoch 5/10:   4%|▍         | 141/3568 [01:26<35:23,  1.61it/s]

Epoch [5/10], Step [141/3567], Loss: 2.9446


Epoch 5/10:   4%|▍         | 151/3568 [01:32<34:48,  1.64it/s]

Epoch [5/10], Step [151/3567], Loss: 2.9446


Epoch 5/10:   5%|▍         | 161/3568 [01:38<34:28,  1.65it/s]

Epoch [5/10], Step [161/3567], Loss: 2.9446


Epoch 5/10:   5%|▍         | 171/3568 [01:45<34:49,  1.63it/s]

Epoch [5/10], Step [171/3567], Loss: 2.9446


Epoch 5/10:   5%|▌         | 181/3568 [01:51<35:17,  1.60it/s]

Epoch [5/10], Step [181/3567], Loss: 2.9446


Epoch 5/10:   5%|▌         | 191/3568 [01:57<35:28,  1.59it/s]

Epoch [5/10], Step [191/3567], Loss: 2.9446


Epoch 5/10:   6%|▌         | 201/3568 [02:03<34:02,  1.65it/s]

Epoch [5/10], Step [201/3567], Loss: 2.9446


Epoch 5/10:   6%|▌         | 211/3568 [02:09<34:28,  1.62it/s]

Epoch [5/10], Step [211/3567], Loss: 2.9446


Epoch 5/10:   6%|▌         | 221/3568 [02:15<34:25,  1.62it/s]

Epoch [5/10], Step [221/3567], Loss: 2.9446


Epoch 5/10:   6%|▋         | 231/3568 [02:21<34:41,  1.60it/s]

Epoch [5/10], Step [231/3567], Loss: 2.9446


Epoch 5/10:   7%|▋         | 241/3568 [02:28<34:00,  1.63it/s]

Epoch [5/10], Step [241/3567], Loss: 2.9446


Epoch 5/10:   7%|▋         | 251/3568 [02:34<33:45,  1.64it/s]

Epoch [5/10], Step [251/3567], Loss: 2.9446


Epoch 5/10:   7%|▋         | 261/3568 [02:40<34:19,  1.61it/s]

Epoch [5/10], Step [261/3567], Loss: 2.9446


Epoch 5/10:   8%|▊         | 271/3568 [02:46<33:37,  1.63it/s]

Epoch [5/10], Step [271/3567], Loss: 2.9446


Epoch 5/10:   8%|▊         | 281/3568 [02:52<33:45,  1.62it/s]

Epoch [5/10], Step [281/3567], Loss: 2.9446


Epoch 5/10:   8%|▊         | 291/3568 [02:58<32:46,  1.67it/s]

Epoch [5/10], Step [291/3567], Loss: 2.9446


Epoch 5/10:   8%|▊         | 301/3568 [03:04<33:14,  1.64it/s]

Epoch [5/10], Step [301/3567], Loss: 2.9446


Epoch 5/10:   9%|▊         | 311/3568 [03:11<33:11,  1.64it/s]

Epoch [5/10], Step [311/3567], Loss: 2.9446


Epoch 5/10:   9%|▉         | 321/3568 [03:17<33:20,  1.62it/s]

Epoch [5/10], Step [321/3567], Loss: 2.9446


Epoch 5/10:   9%|▉         | 331/3568 [03:23<34:15,  1.57it/s]

Epoch [5/10], Step [331/3567], Loss: 2.9446


Epoch 5/10:  10%|▉         | 341/3568 [03:29<33:11,  1.62it/s]

Epoch [5/10], Step [341/3567], Loss: 2.9446


Epoch 5/10:  10%|▉         | 351/3568 [03:35<33:28,  1.60it/s]

Epoch [5/10], Step [351/3567], Loss: 2.9446


Epoch 5/10:  10%|█         | 361/3568 [03:41<32:15,  1.66it/s]

Epoch [5/10], Step [361/3567], Loss: 2.9446


Epoch 5/10:  10%|█         | 371/3568 [03:47<32:15,  1.65it/s]

Epoch [5/10], Step [371/3567], Loss: 2.9446


Epoch 5/10:  11%|█         | 381/3568 [03:54<32:02,  1.66it/s]

Epoch [5/10], Step [381/3567], Loss: 2.9446


Epoch 5/10:  11%|█         | 391/3568 [04:00<31:49,  1.66it/s]

Epoch [5/10], Step [391/3567], Loss: 2.9446


Epoch 5/10:  11%|█         | 401/3568 [04:06<33:06,  1.59it/s]

Epoch [5/10], Step [401/3567], Loss: 2.9446


Epoch 5/10:  12%|█▏        | 411/3568 [04:12<32:38,  1.61it/s]

Epoch [5/10], Step [411/3567], Loss: 2.9446


Epoch 5/10:  12%|█▏        | 421/3568 [04:18<32:29,  1.61it/s]

Epoch [5/10], Step [421/3567], Loss: 2.9446


Epoch 5/10:  12%|█▏        | 431/3568 [04:24<31:51,  1.64it/s]

Epoch [5/10], Step [431/3567], Loss: 2.9446


Epoch 5/10:  12%|█▏        | 441/3568 [04:30<31:15,  1.67it/s]

Epoch [5/10], Step [441/3567], Loss: 2.9446


Epoch 5/10:  13%|█▎        | 451/3568 [04:36<32:02,  1.62it/s]

Epoch [5/10], Step [451/3567], Loss: 2.9446


Epoch 5/10:  13%|█▎        | 461/3568 [04:43<32:12,  1.61it/s]

Epoch [5/10], Step [461/3567], Loss: 2.9446


Epoch 5/10:  13%|█▎        | 471/3568 [04:49<31:56,  1.62it/s]

Epoch [5/10], Step [471/3567], Loss: 2.9446


Epoch 5/10:  13%|█▎        | 481/3568 [04:55<31:47,  1.62it/s]

Epoch [5/10], Step [481/3567], Loss: 2.9446


Epoch 5/10:  14%|█▍        | 491/3568 [05:01<31:28,  1.63it/s]

Epoch [5/10], Step [491/3567], Loss: 2.9446


Epoch 5/10:  14%|█▍        | 501/3568 [05:07<31:08,  1.64it/s]

Epoch [5/10], Step [501/3567], Loss: 2.9446


Epoch 5/10:  14%|█▍        | 511/3568 [05:13<31:15,  1.63it/s]

Epoch [5/10], Step [511/3567], Loss: 2.9446


Epoch 5/10:  15%|█▍        | 521/3568 [05:19<31:03,  1.63it/s]

Epoch [5/10], Step [521/3567], Loss: 2.9446


Epoch 5/10:  15%|█▍        | 531/3568 [05:26<30:50,  1.64it/s]

Epoch [5/10], Step [531/3567], Loss: 2.9446


Epoch 5/10:  15%|█▌        | 541/3568 [05:32<30:53,  1.63it/s]

Epoch [5/10], Step [541/3567], Loss: 2.9446


Epoch 5/10:  15%|█▌        | 551/3568 [05:38<30:45,  1.63it/s]

Epoch [5/10], Step [551/3567], Loss: 2.9446


Epoch 5/10:  16%|█▌        | 561/3568 [05:44<31:53,  1.57it/s]

Epoch [5/10], Step [561/3567], Loss: 2.9446


Epoch 5/10:  16%|█▌        | 571/3568 [05:50<31:43,  1.57it/s]

Epoch [5/10], Step [571/3567], Loss: 2.9446


Epoch 5/10:  16%|█▋        | 581/3568 [05:57<30:26,  1.64it/s]

Epoch [5/10], Step [581/3567], Loss: 2.9446


Epoch 5/10:  17%|█▋        | 591/3568 [06:03<30:26,  1.63it/s]

Epoch [5/10], Step [591/3567], Loss: 2.9446


Epoch 5/10:  17%|█▋        | 601/3568 [06:09<30:31,  1.62it/s]

Epoch [5/10], Step [601/3567], Loss: 2.9446


Epoch 5/10:  17%|█▋        | 611/3568 [06:15<30:13,  1.63it/s]

Epoch [5/10], Step [611/3567], Loss: 2.9446


Epoch 5/10:  17%|█▋        | 621/3568 [06:21<30:02,  1.64it/s]

Epoch [5/10], Step [621/3567], Loss: 2.9446


Epoch 5/10:  18%|█▊        | 631/3568 [06:27<30:15,  1.62it/s]

Epoch [5/10], Step [631/3567], Loss: 2.9446


Epoch 5/10:  18%|█▊        | 641/3568 [06:33<29:38,  1.65it/s]

Epoch [5/10], Step [641/3567], Loss: 2.9446


Epoch 5/10:  18%|█▊        | 651/3568 [06:40<30:49,  1.58it/s]

Epoch [5/10], Step [651/3567], Loss: 2.9446


Epoch 5/10:  19%|█▊        | 661/3568 [06:46<30:23,  1.59it/s]

Epoch [5/10], Step [661/3567], Loss: 2.9446


Epoch 5/10:  19%|█▉        | 671/3568 [06:52<29:47,  1.62it/s]

Epoch [5/10], Step [671/3567], Loss: 2.9446


Epoch 5/10:  19%|█▉        | 681/3568 [06:58<29:30,  1.63it/s]

Epoch [5/10], Step [681/3567], Loss: 2.9446


Epoch 5/10:  19%|█▉        | 691/3568 [07:04<29:28,  1.63it/s]

Epoch [5/10], Step [691/3567], Loss: 2.9446


Epoch 5/10:  20%|█▉        | 701/3568 [07:10<29:30,  1.62it/s]

Epoch [5/10], Step [701/3567], Loss: 2.9446


Epoch 5/10:  20%|█▉        | 711/3568 [07:17<28:53,  1.65it/s]

Epoch [5/10], Step [711/3567], Loss: 2.9446


Epoch 5/10:  20%|██        | 721/3568 [07:23<29:25,  1.61it/s]

Epoch [5/10], Step [721/3567], Loss: 2.9446


Epoch 5/10:  20%|██        | 731/3568 [07:29<29:07,  1.62it/s]

Epoch [5/10], Step [731/3567], Loss: 2.9446


Epoch 5/10:  21%|██        | 741/3568 [07:35<28:55,  1.63it/s]

Epoch [5/10], Step [741/3567], Loss: 2.9446


Epoch 5/10:  21%|██        | 751/3568 [07:41<28:19,  1.66it/s]

Epoch [5/10], Step [751/3567], Loss: 2.9446


Epoch 5/10:  21%|██▏       | 761/3568 [07:47<28:35,  1.64it/s]

Epoch [5/10], Step [761/3567], Loss: 2.9446


Epoch 5/10:  22%|██▏       | 771/3568 [07:53<28:15,  1.65it/s]

Epoch [5/10], Step [771/3567], Loss: 2.9446


Epoch 5/10:  22%|██▏       | 781/3568 [07:59<28:30,  1.63it/s]

Epoch [5/10], Step [781/3567], Loss: 2.9446


Epoch 5/10:  22%|██▏       | 791/3568 [08:06<28:42,  1.61it/s]

Epoch [5/10], Step [791/3567], Loss: 2.9446


Epoch 5/10:  22%|██▏       | 801/3568 [08:12<28:35,  1.61it/s]

Epoch [5/10], Step [801/3567], Loss: 2.9446


Epoch 5/10:  23%|██▎       | 811/3568 [08:18<28:14,  1.63it/s]

Epoch [5/10], Step [811/3567], Loss: 2.9446


Epoch 5/10:  23%|██▎       | 821/3568 [08:24<29:07,  1.57it/s]

Epoch [5/10], Step [821/3567], Loss: 2.9446


Epoch 5/10:  23%|██▎       | 831/3568 [08:30<27:43,  1.65it/s]

Epoch [5/10], Step [831/3567], Loss: 2.9446


Epoch 5/10:  24%|██▎       | 841/3568 [08:36<27:40,  1.64it/s]

Epoch [5/10], Step [841/3567], Loss: 2.9446


Epoch 5/10:  24%|██▍       | 851/3568 [08:43<27:23,  1.65it/s]

Epoch [5/10], Step [851/3567], Loss: 2.9446


Epoch 5/10:  24%|██▍       | 861/3568 [08:49<27:02,  1.67it/s]

Epoch [5/10], Step [861/3567], Loss: 2.9446


Epoch 5/10:  24%|██▍       | 871/3568 [08:55<27:13,  1.65it/s]

Epoch [5/10], Step [871/3567], Loss: 2.9446


Epoch 5/10:  25%|██▍       | 881/3568 [09:01<28:11,  1.59it/s]

Epoch [5/10], Step [881/3567], Loss: 2.9446


Epoch 5/10:  25%|██▍       | 891/3568 [09:07<27:28,  1.62it/s]

Epoch [5/10], Step [891/3567], Loss: 2.9446


Epoch 5/10:  25%|██▌       | 901/3568 [09:13<27:16,  1.63it/s]

Epoch [5/10], Step [901/3567], Loss: 2.9446


Epoch 5/10:  26%|██▌       | 911/3568 [09:19<27:03,  1.64it/s]

Epoch [5/10], Step [911/3567], Loss: 2.9446


Epoch 5/10:  26%|██▌       | 921/3568 [09:25<26:53,  1.64it/s]

Epoch [5/10], Step [921/3567], Loss: 2.9446


Epoch 5/10:  26%|██▌       | 931/3568 [09:32<27:11,  1.62it/s]

Epoch [5/10], Step [931/3567], Loss: 2.9446


Epoch 5/10:  26%|██▋       | 941/3568 [09:38<27:59,  1.56it/s]

Epoch [5/10], Step [941/3567], Loss: 2.9446


Epoch 5/10:  27%|██▋       | 951/3568 [09:44<26:51,  1.62it/s]

Epoch [5/10], Step [951/3567], Loss: 2.9446


Epoch 5/10:  27%|██▋       | 961/3568 [09:50<26:28,  1.64it/s]

Epoch [5/10], Step [961/3567], Loss: 2.9446


Epoch 5/10:  27%|██▋       | 971/3568 [09:56<26:50,  1.61it/s]

Epoch [5/10], Step [971/3567], Loss: 2.9446


Epoch 5/10:  27%|██▋       | 981/3568 [10:03<26:17,  1.64it/s]

Epoch [5/10], Step [981/3567], Loss: 2.9446


Epoch 5/10:  28%|██▊       | 991/3568 [10:09<27:44,  1.55it/s]

Epoch [5/10], Step [991/3567], Loss: 2.9446


Epoch 5/10:  28%|██▊       | 1001/3568 [10:15<26:37,  1.61it/s]

Epoch [5/10], Step [1001/3567], Loss: 2.9446


Epoch 5/10:  28%|██▊       | 1011/3568 [10:21<26:06,  1.63it/s]

Epoch [5/10], Step [1011/3567], Loss: 2.9446


Epoch 5/10:  29%|██▊       | 1021/3568 [10:27<25:40,  1.65it/s]

Epoch [5/10], Step [1021/3567], Loss: 2.9446


Epoch 5/10:  29%|██▉       | 1031/3568 [10:34<25:41,  1.65it/s]

Epoch [5/10], Step [1031/3567], Loss: 2.9446


Epoch 5/10:  29%|██▉       | 1041/3568 [10:40<25:11,  1.67it/s]

Epoch [5/10], Step [1041/3567], Loss: 2.9446


Epoch 5/10:  29%|██▉       | 1051/3568 [10:46<25:15,  1.66it/s]

Epoch [5/10], Step [1051/3567], Loss: 2.9446


Epoch 5/10:  30%|██▉       | 1061/3568 [10:52<25:21,  1.65it/s]

Epoch [5/10], Step [1061/3567], Loss: 2.9446


Epoch 5/10:  30%|███       | 1071/3568 [10:58<25:16,  1.65it/s]

Epoch [5/10], Step [1071/3567], Loss: 2.9446


Epoch 5/10:  30%|███       | 1081/3568 [11:04<25:48,  1.61it/s]

Epoch [5/10], Step [1081/3567], Loss: 2.9446


Epoch 5/10:  31%|███       | 1091/3568 [11:10<25:26,  1.62it/s]

Epoch [5/10], Step [1091/3567], Loss: 2.9446


Epoch 5/10:  31%|███       | 1101/3568 [11:16<25:12,  1.63it/s]

Epoch [5/10], Step [1101/3567], Loss: 2.9446


Epoch 5/10:  31%|███       | 1111/3568 [11:23<24:47,  1.65it/s]

Epoch [5/10], Step [1111/3567], Loss: 2.9446


Epoch 5/10:  31%|███▏      | 1121/3568 [11:29<26:07,  1.56it/s]

Epoch [5/10], Step [1121/3567], Loss: 2.9446


Epoch 5/10:  32%|███▏      | 1131/3568 [11:35<24:37,  1.65it/s]

Epoch [5/10], Step [1131/3567], Loss: 2.9446


Epoch 5/10:  32%|███▏      | 1141/3568 [11:41<24:46,  1.63it/s]

Epoch [5/10], Step [1141/3567], Loss: 2.9446


Epoch 5/10:  32%|███▏      | 1151/3568 [11:47<24:53,  1.62it/s]

Epoch [5/10], Step [1151/3567], Loss: 2.9446


Epoch 5/10:  33%|███▎      | 1161/3568 [11:53<24:51,  1.61it/s]

Epoch [5/10], Step [1161/3567], Loss: 2.9446


Epoch 5/10:  33%|███▎      | 1171/3568 [11:59<24:30,  1.63it/s]

Epoch [5/10], Step [1171/3567], Loss: 2.9446


Epoch 5/10:  33%|███▎      | 1181/3568 [12:06<24:24,  1.63it/s]

Epoch [5/10], Step [1181/3567], Loss: 2.9446


Epoch 5/10:  33%|███▎      | 1191/3568 [12:12<24:04,  1.65it/s]

Epoch [5/10], Step [1191/3567], Loss: 2.9446


Epoch 5/10:  34%|███▎      | 1201/3568 [12:18<24:12,  1.63it/s]

Epoch [5/10], Step [1201/3567], Loss: 2.9446


Epoch 5/10:  34%|███▍      | 1211/3568 [12:24<23:51,  1.65it/s]

Epoch [5/10], Step [1211/3567], Loss: 2.9446


Epoch 5/10:  34%|███▍      | 1221/3568 [12:30<23:46,  1.64it/s]

Epoch [5/10], Step [1221/3567], Loss: 2.9446


Epoch 5/10:  35%|███▍      | 1231/3568 [12:36<23:39,  1.65it/s]

Epoch [5/10], Step [1231/3567], Loss: 2.9446


Epoch 5/10:  35%|███▍      | 1241/3568 [12:42<23:26,  1.65it/s]

Epoch [5/10], Step [1241/3567], Loss: 2.9446


Epoch 5/10:  35%|███▌      | 1251/3568 [12:48<23:09,  1.67it/s]

Epoch [5/10], Step [1251/3567], Loss: 2.9446


Epoch 5/10:  35%|███▌      | 1261/3568 [12:55<23:47,  1.62it/s]

Epoch [5/10], Step [1261/3567], Loss: 2.9446


Epoch 5/10:  36%|███▌      | 1271/3568 [13:01<23:41,  1.62it/s]

Epoch [5/10], Step [1271/3567], Loss: 2.9446


Epoch 5/10:  36%|███▌      | 1281/3568 [13:07<23:01,  1.66it/s]

Epoch [5/10], Step [1281/3567], Loss: 2.9446


Epoch 5/10:  36%|███▌      | 1291/3568 [13:13<23:39,  1.60it/s]

Epoch [5/10], Step [1291/3567], Loss: 2.9446


Epoch 5/10:  36%|███▋      | 1301/3568 [13:19<24:01,  1.57it/s]

Epoch [5/10], Step [1301/3567], Loss: 2.9446


Epoch 5/10:  37%|███▋      | 1311/3568 [13:25<23:36,  1.59it/s]

Epoch [5/10], Step [1311/3567], Loss: 2.9446


Epoch 5/10:  37%|███▋      | 1321/3568 [13:32<22:50,  1.64it/s]

Epoch [5/10], Step [1321/3567], Loss: 2.9446


Epoch 5/10:  37%|███▋      | 1331/3568 [13:38<23:10,  1.61it/s]

Epoch [5/10], Step [1331/3567], Loss: 2.9446


Epoch 5/10:  38%|███▊      | 1341/3568 [13:44<23:01,  1.61it/s]

Epoch [5/10], Step [1341/3567], Loss: 2.9446


Epoch 5/10:  38%|███▊      | 1351/3568 [13:50<22:47,  1.62it/s]

Epoch [5/10], Step [1351/3567], Loss: 2.9446


Epoch 5/10:  38%|███▊      | 1361/3568 [13:57<22:50,  1.61it/s]

Epoch [5/10], Step [1361/3567], Loss: 2.9446


Epoch 5/10:  38%|███▊      | 1371/3568 [14:03<22:27,  1.63it/s]

Epoch [5/10], Step [1371/3567], Loss: 2.9446


Epoch 5/10:  39%|███▊      | 1381/3568 [14:09<22:24,  1.63it/s]

Epoch [5/10], Step [1381/3567], Loss: 2.9446


Epoch 5/10:  39%|███▉      | 1391/3568 [14:15<23:30,  1.54it/s]

Epoch [5/10], Step [1391/3567], Loss: 2.9446


Epoch 5/10:  39%|███▉      | 1401/3568 [14:21<22:18,  1.62it/s]

Epoch [5/10], Step [1401/3567], Loss: 2.9446


Epoch 5/10:  40%|███▉      | 1411/3568 [14:27<21:45,  1.65it/s]

Epoch [5/10], Step [1411/3567], Loss: 2.9446


Epoch 5/10:  40%|███▉      | 1421/3568 [14:34<22:06,  1.62it/s]

Epoch [5/10], Step [1421/3567], Loss: 2.9446


Epoch 5/10:  40%|████      | 1431/3568 [14:40<21:42,  1.64it/s]

Epoch [5/10], Step [1431/3567], Loss: 2.9446


Epoch 5/10:  40%|████      | 1441/3568 [14:46<22:01,  1.61it/s]

Epoch [5/10], Step [1441/3567], Loss: 2.9446


Epoch 5/10:  41%|████      | 1451/3568 [14:52<21:21,  1.65it/s]

Epoch [5/10], Step [1451/3567], Loss: 2.9446


Epoch 5/10:  41%|████      | 1461/3568 [14:58<21:34,  1.63it/s]

Epoch [5/10], Step [1461/3567], Loss: 2.9446


Epoch 5/10:  41%|████      | 1471/3568 [15:04<21:32,  1.62it/s]

Epoch [5/10], Step [1471/3567], Loss: 2.9446


Epoch 5/10:  42%|████▏     | 1481/3568 [15:10<21:21,  1.63it/s]

Epoch [5/10], Step [1481/3567], Loss: 2.9446


Epoch 5/10:  42%|████▏     | 1491/3568 [15:17<21:08,  1.64it/s]

Epoch [5/10], Step [1491/3567], Loss: 2.9446


Epoch 5/10:  42%|████▏     | 1501/3568 [15:23<21:42,  1.59it/s]

Epoch [5/10], Step [1501/3567], Loss: 2.9446


Epoch 5/10:  42%|████▏     | 1511/3568 [15:29<20:44,  1.65it/s]

Epoch [5/10], Step [1511/3567], Loss: 2.9446


Epoch 5/10:  43%|████▎     | 1521/3568 [15:35<20:58,  1.63it/s]

Epoch [5/10], Step [1521/3567], Loss: 2.9446


Epoch 5/10:  43%|████▎     | 1531/3568 [15:41<21:19,  1.59it/s]

Epoch [5/10], Step [1531/3567], Loss: 2.9446


Epoch 5/10:  43%|████▎     | 1541/3568 [15:47<20:34,  1.64it/s]

Epoch [5/10], Step [1541/3567], Loss: 2.9446


Epoch 5/10:  43%|████▎     | 1551/3568 [15:54<20:17,  1.66it/s]

Epoch [5/10], Step [1551/3567], Loss: 2.9446


Epoch 5/10:  44%|████▍     | 1561/3568 [16:00<20:34,  1.63it/s]

Epoch [5/10], Step [1561/3567], Loss: 2.9446


Epoch 5/10:  44%|████▍     | 1571/3568 [16:06<20:33,  1.62it/s]

Epoch [5/10], Step [1571/3567], Loss: 2.9446


Epoch 5/10:  44%|████▍     | 1581/3568 [16:12<20:09,  1.64it/s]

Epoch [5/10], Step [1581/3567], Loss: 2.9446


Epoch 5/10:  45%|████▍     | 1591/3568 [16:18<19:59,  1.65it/s]

Epoch [5/10], Step [1591/3567], Loss: 2.9446


Epoch 5/10:  45%|████▍     | 1601/3568 [16:24<20:08,  1.63it/s]

Epoch [5/10], Step [1601/3567], Loss: 2.9446


Epoch 5/10:  45%|████▌     | 1611/3568 [16:30<19:44,  1.65it/s]

Epoch [5/10], Step [1611/3567], Loss: 2.9446


Epoch 5/10:  45%|████▌     | 1621/3568 [16:37<19:47,  1.64it/s]

Epoch [5/10], Step [1621/3567], Loss: 2.9446


Epoch 5/10:  46%|████▌     | 1631/3568 [16:43<20:06,  1.61it/s]

Epoch [5/10], Step [1631/3567], Loss: 2.9446


Epoch 5/10:  46%|████▌     | 1641/3568 [16:49<19:29,  1.65it/s]

Epoch [5/10], Step [1641/3567], Loss: 2.9446


Epoch 5/10:  46%|████▋     | 1651/3568 [16:55<19:38,  1.63it/s]

Epoch [5/10], Step [1651/3567], Loss: 2.9446


Epoch 5/10:  47%|████▋     | 1661/3568 [17:01<19:31,  1.63it/s]

Epoch [5/10], Step [1661/3567], Loss: 2.9446


Epoch 5/10:  47%|████▋     | 1671/3568 [17:07<19:15,  1.64it/s]

Epoch [5/10], Step [1671/3567], Loss: 2.9446


Epoch 5/10:  47%|████▋     | 1681/3568 [17:13<19:18,  1.63it/s]

Epoch [5/10], Step [1681/3567], Loss: 2.9446


Epoch 5/10:  47%|████▋     | 1691/3568 [17:20<19:39,  1.59it/s]

Epoch [5/10], Step [1691/3567], Loss: 2.9446


Epoch 5/10:  48%|████▊     | 1701/3568 [17:26<18:55,  1.64it/s]

Epoch [5/10], Step [1701/3567], Loss: 2.9446


Epoch 5/10:  48%|████▊     | 1711/3568 [17:32<19:03,  1.62it/s]

Epoch [5/10], Step [1711/3567], Loss: 2.9446


Epoch 5/10:  48%|████▊     | 1721/3568 [17:38<19:05,  1.61it/s]

Epoch [5/10], Step [1721/3567], Loss: 2.9446


Epoch 5/10:  49%|████▊     | 1731/3568 [17:44<18:44,  1.63it/s]

Epoch [5/10], Step [1731/3567], Loss: 2.9446


Epoch 5/10:  49%|████▉     | 1741/3568 [17:51<18:28,  1.65it/s]

Epoch [5/10], Step [1741/3567], Loss: 2.9446


Epoch 5/10:  49%|████▉     | 1751/3568 [17:57<18:19,  1.65it/s]

Epoch [5/10], Step [1751/3567], Loss: 2.9446


Epoch 5/10:  49%|████▉     | 1761/3568 [18:03<18:20,  1.64it/s]

Epoch [5/10], Step [1761/3567], Loss: 2.9446


Epoch 5/10:  50%|████▉     | 1771/3568 [18:09<18:41,  1.60it/s]

Epoch [5/10], Step [1771/3567], Loss: 2.9446


Epoch 5/10:  50%|████▉     | 1781/3568 [18:15<18:07,  1.64it/s]

Epoch [5/10], Step [1781/3567], Loss: 2.9446


Epoch 5/10:  50%|█████     | 1791/3568 [18:21<17:56,  1.65it/s]

Epoch [5/10], Step [1791/3567], Loss: 2.9446


Epoch 5/10:  50%|█████     | 1801/3568 [18:27<17:58,  1.64it/s]

Epoch [5/10], Step [1801/3567], Loss: 2.9446


Epoch 5/10:  51%|█████     | 1811/3568 [18:34<18:26,  1.59it/s]

Epoch [5/10], Step [1811/3567], Loss: 2.9446


Epoch 5/10:  51%|█████     | 1821/3568 [18:40<18:01,  1.62it/s]

Epoch [5/10], Step [1821/3567], Loss: 2.9446


Epoch 5/10:  51%|█████▏    | 1831/3568 [18:46<17:48,  1.63it/s]

Epoch [5/10], Step [1831/3567], Loss: 2.9446


Epoch 5/10:  52%|█████▏    | 1841/3568 [18:52<17:49,  1.62it/s]

Epoch [5/10], Step [1841/3567], Loss: 2.9446


Epoch 5/10:  52%|█████▏    | 1851/3568 [18:58<17:27,  1.64it/s]

Epoch [5/10], Step [1851/3567], Loss: 2.9446


Epoch 5/10:  52%|█████▏    | 1861/3568 [19:05<17:49,  1.60it/s]

Epoch [5/10], Step [1861/3567], Loss: 2.9446


Epoch 5/10:  52%|█████▏    | 1871/3568 [19:11<17:19,  1.63it/s]

Epoch [5/10], Step [1871/3567], Loss: 2.9446


Epoch 5/10:  53%|█████▎    | 1881/3568 [19:17<17:26,  1.61it/s]

Epoch [5/10], Step [1881/3567], Loss: 2.9446


Epoch 5/10:  53%|█████▎    | 1891/3568 [19:23<17:15,  1.62it/s]

Epoch [5/10], Step [1891/3567], Loss: 2.9446


Epoch 5/10:  53%|█████▎    | 1901/3568 [19:29<17:58,  1.55it/s]

Epoch [5/10], Step [1901/3567], Loss: 2.9446


Epoch 5/10:  54%|█████▎    | 1911/3568 [19:36<17:24,  1.59it/s]

Epoch [5/10], Step [1911/3567], Loss: 2.9446


Epoch 5/10:  54%|█████▍    | 1921/3568 [19:42<16:44,  1.64it/s]

Epoch [5/10], Step [1921/3567], Loss: 2.9446


Epoch 5/10:  54%|█████▍    | 1931/3568 [19:48<16:55,  1.61it/s]

Epoch [5/10], Step [1931/3567], Loss: 2.9446


Epoch 5/10:  54%|█████▍    | 1941/3568 [19:54<16:31,  1.64it/s]

Epoch [5/10], Step [1941/3567], Loss: 2.9446


Epoch 5/10:  55%|█████▍    | 1951/3568 [20:00<16:26,  1.64it/s]

Epoch [5/10], Step [1951/3567], Loss: 2.9446


Epoch 5/10:  55%|█████▍    | 1961/3568 [20:07<16:24,  1.63it/s]

Epoch [5/10], Step [1961/3567], Loss: 2.9446


Epoch 5/10:  55%|█████▌    | 1971/3568 [20:13<16:20,  1.63it/s]

Epoch [5/10], Step [1971/3567], Loss: 2.9446


Epoch 5/10:  56%|█████▌    | 1981/3568 [20:19<16:18,  1.62it/s]

Epoch [5/10], Step [1981/3567], Loss: 2.9446


Epoch 5/10:  56%|█████▌    | 1991/3568 [20:25<16:24,  1.60it/s]

Epoch [5/10], Step [1991/3567], Loss: 2.9446


Epoch 5/10:  56%|█████▌    | 2001/3568 [20:31<16:05,  1.62it/s]

Epoch [5/10], Step [2001/3567], Loss: 2.9446


Epoch 5/10:  56%|█████▋    | 2011/3568 [20:38<17:04,  1.52it/s]

Epoch [5/10], Step [2011/3567], Loss: 2.9446


Epoch 5/10:  57%|█████▋    | 2021/3568 [20:44<15:53,  1.62it/s]

Epoch [5/10], Step [2021/3567], Loss: 2.9446


Epoch 5/10:  57%|█████▋    | 2031/3568 [20:50<15:38,  1.64it/s]

Epoch [5/10], Step [2031/3567], Loss: 2.9446


Epoch 5/10:  57%|█████▋    | 2041/3568 [20:56<15:43,  1.62it/s]

Epoch [5/10], Step [2041/3567], Loss: 2.9446


Epoch 5/10:  57%|█████▋    | 2051/3568 [21:02<15:30,  1.63it/s]

Epoch [5/10], Step [2051/3567], Loss: 2.9446


Epoch 5/10:  58%|█████▊    | 2061/3568 [21:08<15:37,  1.61it/s]

Epoch [5/10], Step [2061/3567], Loss: 2.9446


Epoch 5/10:  58%|█████▊    | 2071/3568 [21:15<15:27,  1.61it/s]

Epoch [5/10], Step [2071/3567], Loss: 2.9446


Epoch 5/10:  58%|█████▊    | 2081/3568 [21:21<15:23,  1.61it/s]

Epoch [5/10], Step [2081/3567], Loss: 2.9446


Epoch 5/10:  59%|█████▊    | 2091/3568 [21:27<15:05,  1.63it/s]

Epoch [5/10], Step [2091/3567], Loss: 2.9446


Epoch 5/10:  59%|█████▉    | 2101/3568 [21:33<15:29,  1.58it/s]

Epoch [5/10], Step [2101/3567], Loss: 2.9446


Epoch 5/10:  59%|█████▉    | 2111/3568 [21:39<14:38,  1.66it/s]

Epoch [5/10], Step [2111/3567], Loss: 2.9446


Epoch 5/10:  59%|█████▉    | 2121/3568 [21:45<14:54,  1.62it/s]

Epoch [5/10], Step [2121/3567], Loss: 2.9446


Epoch 5/10:  60%|█████▉    | 2131/3568 [21:52<14:46,  1.62it/s]

Epoch [5/10], Step [2131/3567], Loss: 2.9446


Epoch 5/10:  60%|██████    | 2141/3568 [21:58<14:27,  1.65it/s]

Epoch [5/10], Step [2141/3567], Loss: 2.9446


Epoch 5/10:  60%|██████    | 2151/3568 [22:04<14:21,  1.64it/s]

Epoch [5/10], Step [2151/3567], Loss: 2.9446


Epoch 5/10:  61%|██████    | 2161/3568 [22:10<14:27,  1.62it/s]

Epoch [5/10], Step [2161/3567], Loss: 2.9446


Epoch 5/10:  61%|██████    | 2171/3568 [22:16<13:55,  1.67it/s]

Epoch [5/10], Step [2171/3567], Loss: 2.9446


Epoch 5/10:  61%|██████    | 2181/3568 [22:22<14:08,  1.63it/s]

Epoch [5/10], Step [2181/3567], Loss: 2.9446


Epoch 5/10:  61%|██████▏   | 2191/3568 [22:28<14:17,  1.61it/s]

Epoch [5/10], Step [2191/3567], Loss: 2.9446


Epoch 5/10:  62%|██████▏   | 2201/3568 [22:34<13:58,  1.63it/s]

Epoch [5/10], Step [2201/3567], Loss: 2.9446


Epoch 5/10:  62%|██████▏   | 2211/3568 [22:40<13:43,  1.65it/s]

Epoch [5/10], Step [2211/3567], Loss: 2.9446


Epoch 5/10:  62%|██████▏   | 2221/3568 [22:46<13:35,  1.65it/s]

Epoch [5/10], Step [2221/3567], Loss: 2.9446


Epoch 5/10:  63%|██████▎   | 2231/3568 [22:53<13:46,  1.62it/s]

Epoch [5/10], Step [2231/3567], Loss: 2.9446


Epoch 5/10:  63%|██████▎   | 2241/3568 [22:59<13:28,  1.64it/s]

Epoch [5/10], Step [2241/3567], Loss: 2.9446


Epoch 5/10:  63%|██████▎   | 2251/3568 [23:05<13:45,  1.60it/s]

Epoch [5/10], Step [2251/3567], Loss: 2.9446


Epoch 5/10:  63%|██████▎   | 2261/3568 [23:11<13:15,  1.64it/s]

Epoch [5/10], Step [2261/3567], Loss: 2.9446


Epoch 5/10:  64%|██████▎   | 2271/3568 [23:17<13:51,  1.56it/s]

Epoch [5/10], Step [2271/3567], Loss: 2.9446


Epoch 5/10:  64%|██████▍   | 2281/3568 [23:24<13:28,  1.59it/s]

Epoch [5/10], Step [2281/3567], Loss: 2.9446


Epoch 5/10:  64%|██████▍   | 2291/3568 [23:30<12:58,  1.64it/s]

Epoch [5/10], Step [2291/3567], Loss: 2.9446


Epoch 5/10:  64%|██████▍   | 2301/3568 [23:36<13:00,  1.62it/s]

Epoch [5/10], Step [2301/3567], Loss: 2.9446


Epoch 5/10:  65%|██████▍   | 2311/3568 [23:42<12:39,  1.65it/s]

Epoch [5/10], Step [2311/3567], Loss: 2.9446


Epoch 5/10:  65%|██████▌   | 2321/3568 [23:48<12:47,  1.63it/s]

Epoch [5/10], Step [2321/3567], Loss: 2.9446


Epoch 5/10:  65%|██████▌   | 2331/3568 [23:54<12:33,  1.64it/s]

Epoch [5/10], Step [2331/3567], Loss: 2.9446


Epoch 5/10:  66%|██████▌   | 2341/3568 [24:00<13:05,  1.56it/s]

Epoch [5/10], Step [2341/3567], Loss: 2.9446


Epoch 5/10:  66%|██████▌   | 2351/3568 [24:06<12:12,  1.66it/s]

Epoch [5/10], Step [2351/3567], Loss: 2.9446


Epoch 5/10:  66%|██████▌   | 2361/3568 [24:13<12:20,  1.63it/s]

Epoch [5/10], Step [2361/3567], Loss: 2.9446


Epoch 5/10:  66%|██████▋   | 2371/3568 [24:19<12:22,  1.61it/s]

Epoch [5/10], Step [2371/3567], Loss: 2.9446


Epoch 5/10:  67%|██████▋   | 2381/3568 [24:25<12:19,  1.61it/s]

Epoch [5/10], Step [2381/3567], Loss: 2.9446


Epoch 5/10:  67%|██████▋   | 2391/3568 [24:31<12:13,  1.60it/s]

Epoch [5/10], Step [2391/3567], Loss: 2.9446


Epoch 5/10:  67%|██████▋   | 2401/3568 [24:37<11:40,  1.67it/s]

Epoch [5/10], Step [2401/3567], Loss: 2.9446


Epoch 5/10:  68%|██████▊   | 2411/3568 [24:43<11:41,  1.65it/s]

Epoch [5/10], Step [2411/3567], Loss: 2.9446


Epoch 5/10:  68%|██████▊   | 2421/3568 [24:49<11:45,  1.63it/s]

Epoch [5/10], Step [2421/3567], Loss: 2.9446


Epoch 5/10:  68%|██████▊   | 2431/3568 [24:55<11:38,  1.63it/s]

Epoch [5/10], Step [2431/3567], Loss: 2.9446


Epoch 5/10:  68%|██████▊   | 2441/3568 [25:02<11:40,  1.61it/s]

Epoch [5/10], Step [2441/3567], Loss: 2.9446


Epoch 5/10:  69%|██████▊   | 2451/3568 [25:08<11:11,  1.66it/s]

Epoch [5/10], Step [2451/3567], Loss: 2.9446


Epoch 5/10:  69%|██████▉   | 2461/3568 [25:14<11:23,  1.62it/s]

Epoch [5/10], Step [2461/3567], Loss: 2.9446


Epoch 5/10:  69%|██████▉   | 2471/3568 [25:20<11:03,  1.65it/s]

Epoch [5/10], Step [2471/3567], Loss: 2.9446


Epoch 5/10:  70%|██████▉   | 2481/3568 [25:26<10:54,  1.66it/s]

Epoch [5/10], Step [2481/3567], Loss: 2.9446


Epoch 5/10:  70%|██████▉   | 2491/3568 [25:32<11:04,  1.62it/s]

Epoch [5/10], Step [2491/3567], Loss: 2.9446


Epoch 5/10:  70%|███████   | 2501/3568 [25:38<10:47,  1.65it/s]

Epoch [5/10], Step [2501/3567], Loss: 2.9446


Epoch 5/10:  70%|███████   | 2511/3568 [25:44<10:48,  1.63it/s]

Epoch [5/10], Step [2511/3567], Loss: 2.9446


Epoch 5/10:  71%|███████   | 2521/3568 [25:50<10:26,  1.67it/s]

Epoch [5/10], Step [2521/3567], Loss: 2.9446


Epoch 5/10:  71%|███████   | 2531/3568 [25:56<10:35,  1.63it/s]

Epoch [5/10], Step [2531/3567], Loss: 2.9446


Epoch 5/10:  71%|███████   | 2541/3568 [26:03<10:54,  1.57it/s]

Epoch [5/10], Step [2541/3567], Loss: 2.9446


Epoch 5/10:  71%|███████▏  | 2551/3568 [26:09<10:25,  1.63it/s]

Epoch [5/10], Step [2551/3567], Loss: 2.9446


Epoch 5/10:  72%|███████▏  | 2561/3568 [26:15<10:22,  1.62it/s]

Epoch [5/10], Step [2561/3567], Loss: 2.9446


Epoch 5/10:  72%|███████▏  | 2571/3568 [26:21<10:07,  1.64it/s]

Epoch [5/10], Step [2571/3567], Loss: 2.9446


Epoch 5/10:  72%|███████▏  | 2581/3568 [26:27<09:56,  1.66it/s]

Epoch [5/10], Step [2581/3567], Loss: 2.9446


Epoch 5/10:  73%|███████▎  | 2591/3568 [26:33<09:44,  1.67it/s]

Epoch [5/10], Step [2591/3567], Loss: 2.9446


Epoch 5/10:  73%|███████▎  | 2601/3568 [26:39<09:49,  1.64it/s]

Epoch [5/10], Step [2601/3567], Loss: 2.9446


Epoch 5/10:  73%|███████▎  | 2611/3568 [26:45<09:45,  1.63it/s]

Epoch [5/10], Step [2611/3567], Loss: 2.9446


Epoch 5/10:  73%|███████▎  | 2621/3568 [26:51<09:33,  1.65it/s]

Epoch [5/10], Step [2621/3567], Loss: 2.9446


Epoch 5/10:  74%|███████▎  | 2631/3568 [26:57<09:30,  1.64it/s]

Epoch [5/10], Step [2631/3567], Loss: 2.9446


Epoch 5/10:  74%|███████▍  | 2641/3568 [27:04<09:50,  1.57it/s]

Epoch [5/10], Step [2641/3567], Loss: 2.9446


Epoch 5/10:  74%|███████▍  | 2651/3568 [27:10<09:41,  1.58it/s]

Epoch [5/10], Step [2651/3567], Loss: 2.9446


Epoch 5/10:  75%|███████▍  | 2661/3568 [27:16<09:18,  1.62it/s]

Epoch [5/10], Step [2661/3567], Loss: 2.9446


Epoch 5/10:  75%|███████▍  | 2671/3568 [27:22<09:00,  1.66it/s]

Epoch [5/10], Step [2671/3567], Loss: 2.9446


Epoch 5/10:  75%|███████▌  | 2681/3568 [27:28<08:53,  1.66it/s]

Epoch [5/10], Step [2681/3567], Loss: 2.9446


Epoch 5/10:  75%|███████▌  | 2691/3568 [27:34<09:09,  1.59it/s]

Epoch [5/10], Step [2691/3567], Loss: 2.9446


Epoch 5/10:  76%|███████▌  | 2701/3568 [27:41<09:16,  1.56it/s]

Epoch [5/10], Step [2701/3567], Loss: 2.9446


Epoch 5/10:  76%|███████▌  | 2711/3568 [27:47<08:53,  1.61it/s]

Epoch [5/10], Step [2711/3567], Loss: 2.9446


Epoch 5/10:  76%|███████▋  | 2721/3568 [27:53<08:43,  1.62it/s]

Epoch [5/10], Step [2721/3567], Loss: 2.9446


Epoch 5/10:  77%|███████▋  | 2731/3568 [28:00<09:01,  1.55it/s]

Epoch [5/10], Step [2731/3567], Loss: 2.9446


Epoch 5/10:  77%|███████▋  | 2741/3568 [28:06<08:24,  1.64it/s]

Epoch [5/10], Step [2741/3567], Loss: 2.9446


Epoch 5/10:  77%|███████▋  | 2751/3568 [28:12<08:16,  1.65it/s]

Epoch [5/10], Step [2751/3567], Loss: 2.9446


Epoch 5/10:  77%|███████▋  | 2761/3568 [28:18<08:23,  1.60it/s]

Epoch [5/10], Step [2761/3567], Loss: 2.9446


Epoch 5/10:  78%|███████▊  | 2771/3568 [28:24<08:11,  1.62it/s]

Epoch [5/10], Step [2771/3567], Loss: 2.9446


Epoch 5/10:  78%|███████▊  | 2781/3568 [28:31<08:16,  1.58it/s]

Epoch [5/10], Step [2781/3567], Loss: 2.9446


Epoch 5/10:  78%|███████▊  | 2791/3568 [28:37<08:00,  1.62it/s]

Epoch [5/10], Step [2791/3567], Loss: 2.9446


Epoch 5/10:  79%|███████▊  | 2801/3568 [28:43<07:45,  1.65it/s]

Epoch [5/10], Step [2801/3567], Loss: 2.9446


Epoch 5/10:  79%|███████▉  | 2811/3568 [28:49<07:38,  1.65it/s]

Epoch [5/10], Step [2811/3567], Loss: 2.9446


Epoch 5/10:  79%|███████▉  | 2821/3568 [28:55<07:31,  1.65it/s]

Epoch [5/10], Step [2821/3567], Loss: 2.9446


Epoch 5/10:  79%|███████▉  | 2831/3568 [29:01<07:36,  1.62it/s]

Epoch [5/10], Step [2831/3567], Loss: 2.9446


Epoch 5/10:  80%|███████▉  | 2841/3568 [29:07<07:49,  1.55it/s]

Epoch [5/10], Step [2841/3567], Loss: 2.9446


Epoch 5/10:  80%|███████▉  | 2851/3568 [29:13<07:13,  1.65it/s]

Epoch [5/10], Step [2851/3567], Loss: 2.9446


Epoch 5/10:  80%|████████  | 2861/3568 [29:20<07:09,  1.64it/s]

Epoch [5/10], Step [2861/3567], Loss: 2.9446


Epoch 5/10:  80%|████████  | 2871/3568 [29:26<07:04,  1.64it/s]

Epoch [5/10], Step [2871/3567], Loss: 2.9446


Epoch 5/10:  81%|████████  | 2881/3568 [29:32<07:01,  1.63it/s]

Epoch [5/10], Step [2881/3567], Loss: 2.9446


Epoch 5/10:  81%|████████  | 2891/3568 [29:38<06:56,  1.62it/s]

Epoch [5/10], Step [2891/3567], Loss: 2.9446


Epoch 5/10:  81%|████████▏ | 2901/3568 [29:44<06:46,  1.64it/s]

Epoch [5/10], Step [2901/3567], Loss: 2.9446


Epoch 5/10:  82%|████████▏ | 2911/3568 [29:50<06:41,  1.63it/s]

Epoch [5/10], Step [2911/3567], Loss: 2.9446


Epoch 5/10:  82%|████████▏ | 2921/3568 [29:56<06:35,  1.64it/s]

Epoch [5/10], Step [2921/3567], Loss: 2.9446


Epoch 5/10:  82%|████████▏ | 2931/3568 [30:03<06:28,  1.64it/s]

Epoch [5/10], Step [2931/3567], Loss: 2.9446


Epoch 5/10:  82%|████████▏ | 2941/3568 [30:09<06:28,  1.62it/s]

Epoch [5/10], Step [2941/3567], Loss: 2.9446


Epoch 5/10:  83%|████████▎ | 2951/3568 [30:15<06:26,  1.60it/s]

Epoch [5/10], Step [2951/3567], Loss: 2.9446


Epoch 5/10:  83%|████████▎ | 2961/3568 [30:21<06:22,  1.59it/s]

Epoch [5/10], Step [2961/3567], Loss: 2.9446


Epoch 5/10:  83%|████████▎ | 2971/3568 [30:27<06:11,  1.61it/s]

Epoch [5/10], Step [2971/3567], Loss: 2.9446


Epoch 5/10:  84%|████████▎ | 2981/3568 [30:34<06:03,  1.62it/s]

Epoch [5/10], Step [2981/3567], Loss: 2.9446


Epoch 5/10:  84%|████████▍ | 2991/3568 [30:40<05:53,  1.63it/s]

Epoch [5/10], Step [2991/3567], Loss: 2.9446


Epoch 5/10:  84%|████████▍ | 3001/3568 [30:46<05:42,  1.65it/s]

Epoch [5/10], Step [3001/3567], Loss: 2.9446


Epoch 5/10:  84%|████████▍ | 3011/3568 [30:52<05:33,  1.67it/s]

Epoch [5/10], Step [3011/3567], Loss: 2.9446


Epoch 5/10:  85%|████████▍ | 3021/3568 [30:58<05:35,  1.63it/s]

Epoch [5/10], Step [3021/3567], Loss: 2.9446


Epoch 5/10:  85%|████████▍ | 3031/3568 [31:04<05:27,  1.64it/s]

Epoch [5/10], Step [3031/3567], Loss: 2.9446


Epoch 5/10:  85%|████████▌ | 3041/3568 [31:10<05:20,  1.64it/s]

Epoch [5/10], Step [3041/3567], Loss: 2.9446


Epoch 5/10:  86%|████████▌ | 3051/3568 [31:16<05:12,  1.65it/s]

Epoch [5/10], Step [3051/3567], Loss: 2.9446


Epoch 5/10:  86%|████████▌ | 3061/3568 [31:22<05:10,  1.63it/s]

Epoch [5/10], Step [3061/3567], Loss: 2.9446


Epoch 5/10:  86%|████████▌ | 3071/3568 [31:28<05:09,  1.61it/s]

Epoch [5/10], Step [3071/3567], Loss: 2.9446


Epoch 5/10:  86%|████████▋ | 3081/3568 [31:35<05:24,  1.50it/s]

Epoch [5/10], Step [3081/3567], Loss: 2.9446


Epoch 5/10:  87%|████████▋ | 3091/3568 [31:41<04:54,  1.62it/s]

Epoch [5/10], Step [3091/3567], Loss: 2.9446


Epoch 5/10:  87%|████████▋ | 3101/3568 [31:47<04:41,  1.66it/s]

Epoch [5/10], Step [3101/3567], Loss: 2.9446


Epoch 5/10:  87%|████████▋ | 3111/3568 [31:53<04:34,  1.66it/s]

Epoch [5/10], Step [3111/3567], Loss: 2.9446


Epoch 5/10:  87%|████████▋ | 3121/3568 [31:59<04:32,  1.64it/s]

Epoch [5/10], Step [3121/3567], Loss: 2.9446


Epoch 5/10:  88%|████████▊ | 3131/3568 [32:05<04:29,  1.62it/s]

Epoch [5/10], Step [3131/3567], Loss: 2.9446


Epoch 5/10:  88%|████████▊ | 3141/3568 [32:12<04:32,  1.57it/s]

Epoch [5/10], Step [3141/3567], Loss: 2.9446


Epoch 5/10:  88%|████████▊ | 3151/3568 [32:18<04:17,  1.62it/s]

Epoch [5/10], Step [3151/3567], Loss: 2.9446


Epoch 5/10:  89%|████████▊ | 3161/3568 [32:24<04:07,  1.64it/s]

Epoch [5/10], Step [3161/3567], Loss: 2.9446


Epoch 5/10:  89%|████████▉ | 3171/3568 [32:30<04:00,  1.65it/s]

Epoch [5/10], Step [3171/3567], Loss: 2.9446


Epoch 5/10:  89%|████████▉ | 3181/3568 [32:36<03:56,  1.63it/s]

Epoch [5/10], Step [3181/3567], Loss: 2.9446


Epoch 5/10:  89%|████████▉ | 3191/3568 [32:42<03:48,  1.65it/s]

Epoch [5/10], Step [3191/3567], Loss: 2.9446


Epoch 5/10:  90%|████████▉ | 3201/3568 [32:48<03:48,  1.61it/s]

Epoch [5/10], Step [3201/3567], Loss: 2.9446


Epoch 5/10:  90%|████████▉ | 3211/3568 [32:54<03:39,  1.63it/s]

Epoch [5/10], Step [3211/3567], Loss: 2.9446


Epoch 5/10:  90%|█████████ | 3221/3568 [33:01<03:33,  1.62it/s]

Epoch [5/10], Step [3221/3567], Loss: 2.9446


Epoch 5/10:  91%|█████████ | 3231/3568 [33:07<03:36,  1.55it/s]

Epoch [5/10], Step [3231/3567], Loss: 2.9446


Epoch 5/10:  91%|█████████ | 3241/3568 [33:13<03:19,  1.64it/s]

Epoch [5/10], Step [3241/3567], Loss: 2.9446


Epoch 5/10:  91%|█████████ | 3251/3568 [33:19<03:15,  1.62it/s]

Epoch [5/10], Step [3251/3567], Loss: 2.9446


Epoch 5/10:  91%|█████████▏| 3261/3568 [33:25<03:06,  1.65it/s]

Epoch [5/10], Step [3261/3567], Loss: 2.9446


Epoch 5/10:  92%|█████████▏| 3271/3568 [33:32<03:05,  1.60it/s]

Epoch [5/10], Step [3271/3567], Loss: 2.9446


Epoch 5/10:  92%|█████████▏| 3281/3568 [33:38<02:54,  1.64it/s]

Epoch [5/10], Step [3281/3567], Loss: 2.9446


Epoch 5/10:  92%|█████████▏| 3291/3568 [33:44<02:50,  1.62it/s]

Epoch [5/10], Step [3291/3567], Loss: 2.9446


Epoch 5/10:  93%|█████████▎| 3301/3568 [33:50<02:45,  1.62it/s]

Epoch [5/10], Step [3301/3567], Loss: 2.9446


Epoch 5/10:  93%|█████████▎| 3311/3568 [33:56<02:37,  1.64it/s]

Epoch [5/10], Step [3311/3567], Loss: 2.9446


Epoch 5/10:  93%|█████████▎| 3321/3568 [34:02<02:30,  1.64it/s]

Epoch [5/10], Step [3321/3567], Loss: 2.9446


Epoch 5/10:  93%|█████████▎| 3331/3568 [34:08<02:26,  1.62it/s]

Epoch [5/10], Step [3331/3567], Loss: 2.9446


Epoch 5/10:  94%|█████████▎| 3341/3568 [34:15<02:23,  1.58it/s]

Epoch [5/10], Step [3341/3567], Loss: 2.9446


Epoch 5/10:  94%|█████████▍| 3351/3568 [34:21<02:13,  1.62it/s]

Epoch [5/10], Step [3351/3567], Loss: 2.9446


Epoch 5/10:  94%|█████████▍| 3361/3568 [34:27<02:07,  1.62it/s]

Epoch [5/10], Step [3361/3567], Loss: 2.9446


Epoch 5/10:  94%|█████████▍| 3371/3568 [34:33<02:00,  1.63it/s]

Epoch [5/10], Step [3371/3567], Loss: 2.9446


Epoch 5/10:  95%|█████████▍| 3381/3568 [34:39<01:56,  1.61it/s]

Epoch [5/10], Step [3381/3567], Loss: 2.9446


Epoch 5/10:  95%|█████████▌| 3391/3568 [34:46<01:51,  1.58it/s]

Epoch [5/10], Step [3391/3567], Loss: 2.9446


Epoch 5/10:  95%|█████████▌| 3401/3568 [34:52<01:44,  1.59it/s]

Epoch [5/10], Step [3401/3567], Loss: 2.9446


Epoch 5/10:  96%|█████████▌| 3411/3568 [34:58<01:37,  1.62it/s]

Epoch [5/10], Step [3411/3567], Loss: 2.9446


Epoch 5/10:  96%|█████████▌| 3421/3568 [35:04<01:30,  1.63it/s]

Epoch [5/10], Step [3421/3567], Loss: 2.9446


Epoch 5/10:  96%|█████████▌| 3431/3568 [35:10<01:23,  1.63it/s]

Epoch [5/10], Step [3431/3567], Loss: 2.9446


Epoch 5/10:  96%|█████████▋| 3441/3568 [35:17<01:18,  1.62it/s]

Epoch [5/10], Step [3441/3567], Loss: 2.9446


Epoch 5/10:  97%|█████████▋| 3451/3568 [35:23<01:11,  1.63it/s]

Epoch [5/10], Step [3451/3567], Loss: 2.9446


Epoch 5/10:  97%|█████████▋| 3461/3568 [35:29<01:06,  1.62it/s]

Epoch [5/10], Step [3461/3567], Loss: 2.9446


Epoch 5/10:  97%|█████████▋| 3471/3568 [35:35<00:58,  1.66it/s]

Epoch [5/10], Step [3471/3567], Loss: 2.9446


Epoch 5/10:  98%|█████████▊| 3481/3568 [35:41<00:53,  1.62it/s]

Epoch [5/10], Step [3481/3567], Loss: 2.9446


Epoch 5/10:  98%|█████████▊| 3491/3568 [35:47<00:47,  1.63it/s]

Epoch [5/10], Step [3491/3567], Loss: 2.9446


Epoch 5/10:  98%|█████████▊| 3501/3568 [35:53<00:41,  1.62it/s]

Epoch [5/10], Step [3501/3567], Loss: 2.9446


Epoch 5/10:  98%|█████████▊| 3511/3568 [35:59<00:35,  1.62it/s]

Epoch [5/10], Step [3511/3567], Loss: 2.9446


Epoch 5/10:  99%|█████████▊| 3521/3568 [36:06<00:29,  1.61it/s]

Epoch [5/10], Step [3521/3567], Loss: 2.9446


Epoch 5/10:  99%|█████████▉| 3531/3568 [36:12<00:22,  1.65it/s]

Epoch [5/10], Step [3531/3567], Loss: 2.9446


Epoch 5/10:  99%|█████████▉| 3541/3568 [36:18<00:16,  1.63it/s]

Epoch [5/10], Step [3541/3567], Loss: 2.9446


Epoch 5/10: 100%|█████████▉| 3551/3568 [36:24<00:10,  1.64it/s]

Epoch [5/10], Step [3551/3567], Loss: 2.9446


Epoch 5/10: 100%|█████████▉| 3561/3568 [36:30<00:04,  1.65it/s]

Epoch [5/10], Step [3561/3567], Loss: 2.9446


Epoch 5/10: 100%|█████████▉| 3567/3568 [36:34<00:00,  1.63it/s]


Epoch [5/10] Average Loss: 2.9446


Epoch 6/10:   0%|          | 1/3568 [00:00<35:49,  1.66it/s]

Epoch [6/10], Step [1/3567], Loss: 2.9446


Epoch 6/10:   0%|          | 11/3568 [00:06<36:34,  1.62it/s]

Epoch [6/10], Step [11/3567], Loss: 2.9446


Epoch 6/10:   1%|          | 21/3568 [00:12<36:03,  1.64it/s]

Epoch [6/10], Step [21/3567], Loss: 2.9446


Epoch 6/10:   1%|          | 31/3568 [00:18<35:45,  1.65it/s]

Epoch [6/10], Step [31/3567], Loss: 2.9446


Epoch 6/10:   1%|          | 41/3568 [00:25<36:37,  1.61it/s]

Epoch [6/10], Step [41/3567], Loss: 2.9446


Epoch 6/10:   1%|▏         | 51/3568 [00:31<37:03,  1.58it/s]

Epoch [6/10], Step [51/3567], Loss: 2.9446


Epoch 6/10:   2%|▏         | 61/3568 [00:37<36:18,  1.61it/s]

Epoch [6/10], Step [61/3567], Loss: 2.9446


Epoch 6/10:   2%|▏         | 71/3568 [00:43<35:53,  1.62it/s]

Epoch [6/10], Step [71/3567], Loss: 2.9446


Epoch 6/10:   2%|▏         | 81/3568 [00:49<35:31,  1.64it/s]

Epoch [6/10], Step [81/3567], Loss: 2.9446


Epoch 6/10:   3%|▎         | 91/3568 [00:55<35:03,  1.65it/s]

Epoch [6/10], Step [91/3567], Loss: 2.9446


Epoch 6/10:   3%|▎         | 101/3568 [01:01<35:22,  1.63it/s]

Epoch [6/10], Step [101/3567], Loss: 2.9446


Epoch 6/10:   6%|▌         | 201/3568 [02:03<34:52,  1.61it/s]

Epoch [6/10], Step [201/3567], Loss: 2.9446


Epoch 6/10:   6%|▌         | 211/3568 [02:09<36:48,  1.52it/s]

Epoch [6/10], Step [211/3567], Loss: 2.9446


Epoch 6/10:   6%|▌         | 221/3568 [02:15<33:58,  1.64it/s]

Epoch [6/10], Step [221/3567], Loss: 2.9446


Epoch 6/10:   6%|▋         | 231/3568 [02:21<34:15,  1.62it/s]

Epoch [6/10], Step [231/3567], Loss: 2.9446


Epoch 6/10:   7%|▋         | 241/3568 [02:27<34:24,  1.61it/s]

Epoch [6/10], Step [241/3567], Loss: 2.9446


Epoch 6/10:   7%|▋         | 251/3568 [02:33<33:52,  1.63it/s]

Epoch [6/10], Step [251/3567], Loss: 2.9446


Epoch 6/10:   7%|▋         | 261/3568 [02:40<35:22,  1.56it/s]

Epoch [6/10], Step [261/3567], Loss: 2.9446


Epoch 6/10:   8%|▊         | 271/3568 [02:46<33:19,  1.65it/s]

Epoch [6/10], Step [271/3567], Loss: 2.9446


Epoch 6/10:   8%|▊         | 281/3568 [02:52<33:59,  1.61it/s]

Epoch [6/10], Step [281/3567], Loss: 2.9446


Epoch 6/10:   8%|▊         | 291/3568 [02:58<33:55,  1.61it/s]

Epoch [6/10], Step [291/3567], Loss: 2.9446


Epoch 6/10:   8%|▊         | 301/3568 [03:04<33:15,  1.64it/s]

Epoch [6/10], Step [301/3567], Loss: 2.9446


Epoch 6/10:   9%|▊         | 311/3568 [03:11<33:35,  1.62it/s]

Epoch [6/10], Step [311/3567], Loss: 2.9446


Epoch 6/10:   9%|▉         | 321/3568 [03:17<33:28,  1.62it/s]

Epoch [6/10], Step [321/3567], Loss: 2.9446


Epoch 6/10:   9%|▉         | 331/3568 [03:23<32:34,  1.66it/s]

Epoch [6/10], Step [331/3567], Loss: 2.9446


Epoch 6/10:  10%|▉         | 341/3568 [03:29<33:03,  1.63it/s]

Epoch [6/10], Step [341/3567], Loss: 2.9446


Epoch 6/10:  10%|▉         | 351/3568 [03:35<32:44,  1.64it/s]

Epoch [6/10], Step [351/3567], Loss: 2.9446


Epoch 6/10:  10%|█         | 361/3568 [03:41<33:13,  1.61it/s]

Epoch [6/10], Step [361/3567], Loss: 2.9446


Epoch 6/10:  10%|█         | 371/3568 [03:48<33:52,  1.57it/s]

Epoch [6/10], Step [371/3567], Loss: 2.9446


Epoch 6/10:  11%|█         | 381/3568 [03:54<32:59,  1.61it/s]

Epoch [6/10], Step [381/3567], Loss: 2.9446


Epoch 6/10:  11%|█         | 391/3568 [04:00<32:25,  1.63it/s]

Epoch [6/10], Step [391/3567], Loss: 2.9446


Epoch 6/10:  11%|█         | 401/3568 [04:06<32:01,  1.65it/s]

Epoch [6/10], Step [401/3567], Loss: 2.9446


Epoch 6/10:  12%|█▏        | 411/3568 [04:12<32:21,  1.63it/s]

Epoch [6/10], Step [411/3567], Loss: 2.9446


Epoch 6/10:  12%|█▏        | 421/3568 [04:18<32:44,  1.60it/s]

Epoch [6/10], Step [421/3567], Loss: 2.9446


Epoch 6/10:  12%|█▏        | 431/3568 [04:25<32:29,  1.61it/s]

Epoch [6/10], Step [431/3567], Loss: 2.9446


Epoch 6/10:  12%|█▏        | 441/3568 [04:31<32:29,  1.60it/s]

Epoch [6/10], Step [441/3567], Loss: 2.9446


Epoch 6/10:  13%|█▎        | 451/3568 [04:37<31:26,  1.65it/s]

Epoch [6/10], Step [451/3567], Loss: 2.9446


Epoch 6/10:  13%|█▎        | 461/3568 [04:43<32:22,  1.60it/s]

Epoch [6/10], Step [461/3567], Loss: 2.9446


Epoch 6/10:  13%|█▎        | 471/3568 [04:49<31:33,  1.64it/s]

Epoch [6/10], Step [471/3567], Loss: 2.9446


Epoch 6/10:  13%|█▎        | 481/3568 [04:56<32:09,  1.60it/s]

Epoch [6/10], Step [481/3567], Loss: 2.9446


Epoch 6/10:  14%|█▍        | 491/3568 [05:02<31:51,  1.61it/s]

Epoch [6/10], Step [491/3567], Loss: 2.9446


Epoch 6/10:  14%|█▍        | 501/3568 [05:08<31:22,  1.63it/s]

Epoch [6/10], Step [501/3567], Loss: 2.9446


Epoch 6/10:  14%|█▍        | 511/3568 [05:14<31:13,  1.63it/s]

Epoch [6/10], Step [511/3567], Loss: 2.9446


Epoch 6/10:  15%|█▍        | 521/3568 [05:20<30:48,  1.65it/s]

Epoch [6/10], Step [521/3567], Loss: 2.9446


Epoch 6/10:  15%|█▍        | 531/3568 [05:27<30:46,  1.64it/s]

Epoch [6/10], Step [531/3567], Loss: 2.9446


Epoch 6/10:  15%|█▌        | 541/3568 [05:33<30:48,  1.64it/s]

Epoch [6/10], Step [541/3567], Loss: 2.9446


Epoch 6/10:  15%|█▌        | 551/3568 [05:39<30:25,  1.65it/s]

Epoch [6/10], Step [551/3567], Loss: 2.9446


Epoch 6/10:  16%|█▌        | 561/3568 [05:45<30:48,  1.63it/s]

Epoch [6/10], Step [561/3567], Loss: 2.9446


Epoch 6/10:  16%|█▌        | 571/3568 [05:51<30:52,  1.62it/s]

Epoch [6/10], Step [571/3567], Loss: 2.9446


Epoch 6/10:  16%|█▋        | 581/3568 [05:57<31:27,  1.58it/s]

Epoch [6/10], Step [581/3567], Loss: 2.9446


Epoch 6/10:  17%|█▋        | 591/3568 [06:03<30:33,  1.62it/s]

Epoch [6/10], Step [591/3567], Loss: 2.9446


Epoch 6/10:  17%|█▋        | 601/3568 [06:10<30:50,  1.60it/s]

Epoch [6/10], Step [601/3567], Loss: 2.9446


Epoch 6/10:  17%|█▋        | 611/3568 [06:16<31:28,  1.57it/s]

Epoch [6/10], Step [611/3567], Loss: 2.9446


Epoch 6/10:  17%|█▋        | 621/3568 [06:22<30:43,  1.60it/s]

Epoch [6/10], Step [621/3567], Loss: 2.9446


Epoch 6/10:  18%|█▊        | 631/3568 [06:28<30:06,  1.63it/s]

Epoch [6/10], Step [631/3567], Loss: 2.9446


Epoch 6/10:  18%|█▊        | 641/3568 [06:34<29:36,  1.65it/s]

Epoch [6/10], Step [641/3567], Loss: 2.9446


Epoch 6/10:  18%|█▊        | 651/3568 [06:40<29:31,  1.65it/s]

Epoch [6/10], Step [651/3567], Loss: 2.9446


Epoch 6/10:  19%|█▊        | 661/3568 [06:46<29:28,  1.64it/s]

Epoch [6/10], Step [661/3567], Loss: 2.9446


Epoch 6/10:  19%|█▉        | 671/3568 [06:53<29:46,  1.62it/s]

Epoch [6/10], Step [671/3567], Loss: 2.9446


Epoch 6/10:  19%|█▉        | 681/3568 [06:59<29:08,  1.65it/s]

Epoch [6/10], Step [681/3567], Loss: 2.9446


Epoch 6/10:  19%|█▉        | 691/3568 [07:05<29:04,  1.65it/s]

Epoch [6/10], Step [691/3567], Loss: 2.9446


Epoch 6/10:  20%|█▉        | 701/3568 [07:11<29:48,  1.60it/s]

Epoch [6/10], Step [701/3567], Loss: 2.9446


Epoch 6/10:  20%|█▉        | 711/3568 [07:17<29:28,  1.62it/s]

Epoch [6/10], Step [711/3567], Loss: 2.9446


Epoch 6/10:  20%|██        | 721/3568 [07:24<29:30,  1.61it/s]

Epoch [6/10], Step [721/3567], Loss: 2.9446


Epoch 6/10:  20%|██        | 731/3568 [07:30<29:06,  1.62it/s]

Epoch [6/10], Step [731/3567], Loss: 2.9446


Epoch 6/10:  21%|██        | 741/3568 [07:36<28:48,  1.64it/s]

Epoch [6/10], Step [741/3567], Loss: 2.9446


Epoch 6/10:  21%|██        | 751/3568 [07:42<28:34,  1.64it/s]

Epoch [6/10], Step [751/3567], Loss: 2.9446


Epoch 6/10:  21%|██▏       | 761/3568 [07:48<29:23,  1.59it/s]

Epoch [6/10], Step [761/3567], Loss: 2.9446


Epoch 6/10:  22%|██▏       | 771/3568 [07:54<28:34,  1.63it/s]

Epoch [6/10], Step [771/3567], Loss: 2.9446


Epoch 6/10:  22%|██▏       | 781/3568 [08:00<28:23,  1.64it/s]

Epoch [6/10], Step [781/3567], Loss: 2.9446


Epoch 6/10:  22%|██▏       | 791/3568 [08:07<27:56,  1.66it/s]

Epoch [6/10], Step [791/3567], Loss: 2.9446


Epoch 6/10:  22%|██▏       | 801/3568 [08:13<28:25,  1.62it/s]

Epoch [6/10], Step [801/3567], Loss: 2.9446


Epoch 6/10:  23%|██▎       | 811/3568 [08:19<28:33,  1.61it/s]

Epoch [6/10], Step [811/3567], Loss: 2.9446


Epoch 6/10:  23%|██▎       | 821/3568 [08:25<28:20,  1.62it/s]

Epoch [6/10], Step [821/3567], Loss: 2.9446


Epoch 6/10:  23%|██▎       | 831/3568 [08:31<28:15,  1.61it/s]

Epoch [6/10], Step [831/3567], Loss: 2.9446


Epoch 6/10:  24%|██▎       | 841/3568 [08:38<29:56,  1.52it/s]

Epoch [6/10], Step [841/3567], Loss: 2.9446


Epoch 6/10:  24%|██▍       | 851/3568 [08:44<27:31,  1.65it/s]

Epoch [6/10], Step [851/3567], Loss: 2.9446


Epoch 6/10:  24%|██▍       | 861/3568 [08:50<27:34,  1.64it/s]

Epoch [6/10], Step [861/3567], Loss: 2.9446


Epoch 6/10:  24%|██▍       | 871/3568 [08:56<27:28,  1.64it/s]

Epoch [6/10], Step [871/3567], Loss: 2.9446


Epoch 6/10:  25%|██▍       | 881/3568 [09:02<27:44,  1.61it/s]

Epoch [6/10], Step [881/3567], Loss: 2.9446


Epoch 6/10:  25%|██▍       | 891/3568 [09:08<27:21,  1.63it/s]

Epoch [6/10], Step [891/3567], Loss: 2.9446


Epoch 6/10:  25%|██▌       | 901/3568 [09:14<27:12,  1.63it/s]

Epoch [6/10], Step [901/3567], Loss: 2.9446


Epoch 6/10:  26%|██▌       | 911/3568 [09:21<27:43,  1.60it/s]

Epoch [6/10], Step [911/3567], Loss: 2.9446


Epoch 6/10:  26%|██▌       | 921/3568 [09:27<27:26,  1.61it/s]

Epoch [6/10], Step [921/3567], Loss: 2.9446


Epoch 6/10:  26%|██▌       | 931/3568 [09:33<26:39,  1.65it/s]

Epoch [6/10], Step [931/3567], Loss: 2.9446


Epoch 6/10:  26%|██▋       | 941/3568 [09:39<26:31,  1.65it/s]

Epoch [6/10], Step [941/3567], Loss: 2.9446


Epoch 6/10:  27%|██▋       | 951/3568 [09:45<26:44,  1.63it/s]

Epoch [6/10], Step [951/3567], Loss: 2.9446


Epoch 6/10:  27%|██▋       | 961/3568 [09:51<26:27,  1.64it/s]

Epoch [6/10], Step [961/3567], Loss: 2.9446


Epoch 6/10:  27%|██▋       | 971/3568 [09:58<26:37,  1.63it/s]

Epoch [6/10], Step [971/3567], Loss: 2.9446


Epoch 6/10:  27%|██▋       | 981/3568 [10:04<26:20,  1.64it/s]

Epoch [6/10], Step [981/3567], Loss: 2.9446


Epoch 6/10:  28%|██▊       | 991/3568 [10:10<26:06,  1.64it/s]

Epoch [6/10], Step [991/3567], Loss: 2.9446


Epoch 6/10:  28%|██▊       | 1001/3568 [10:16<26:19,  1.62it/s]

Epoch [6/10], Step [1001/3567], Loss: 2.9446


Epoch 6/10:  28%|██▊       | 1011/3568 [10:22<25:46,  1.65it/s]

Epoch [6/10], Step [1011/3567], Loss: 2.9446


Epoch 6/10:  29%|██▊       | 1021/3568 [10:28<25:52,  1.64it/s]

Epoch [6/10], Step [1021/3567], Loss: 2.9446


Epoch 6/10:  29%|██▉       | 1031/3568 [10:34<25:40,  1.65it/s]

Epoch [6/10], Step [1031/3567], Loss: 2.9446


Epoch 6/10:  29%|██▉       | 1041/3568 [10:40<25:42,  1.64it/s]

Epoch [6/10], Step [1041/3567], Loss: 2.9446


Epoch 6/10:  29%|██▉       | 1051/3568 [10:46<25:49,  1.62it/s]

Epoch [6/10], Step [1051/3567], Loss: 2.9446


Epoch 6/10:  30%|██▉       | 1061/3568 [10:52<25:13,  1.66it/s]

Epoch [6/10], Step [1061/3567], Loss: 2.9446


Epoch 6/10:  30%|███       | 1071/3568 [10:58<24:57,  1.67it/s]

Epoch [6/10], Step [1071/3567], Loss: 2.9446


Epoch 6/10:  30%|███       | 1081/3568 [11:05<25:08,  1.65it/s]

Epoch [6/10], Step [1081/3567], Loss: 2.9446


Epoch 6/10:  31%|███       | 1091/3568 [11:11<25:19,  1.63it/s]

Epoch [6/10], Step [1091/3567], Loss: 2.9446


Epoch 6/10:  31%|███       | 1101/3568 [11:17<25:27,  1.62it/s]

Epoch [6/10], Step [1101/3567], Loss: 2.9446


Epoch 6/10:  31%|███       | 1111/3568 [11:23<24:38,  1.66it/s]

Epoch [6/10], Step [1111/3567], Loss: 2.9446


Epoch 6/10:  31%|███▏      | 1121/3568 [11:29<24:34,  1.66it/s]

Epoch [6/10], Step [1121/3567], Loss: 2.9446


Epoch 6/10:  32%|███▏      | 1131/3568 [11:35<25:02,  1.62it/s]

Epoch [6/10], Step [1131/3567], Loss: 2.9446


Epoch 6/10:  32%|███▏      | 1141/3568 [11:41<24:33,  1.65it/s]

Epoch [6/10], Step [1141/3567], Loss: 2.9446


Epoch 6/10:  32%|███▏      | 1151/3568 [11:47<24:14,  1.66it/s]

Epoch [6/10], Step [1151/3567], Loss: 2.9446


Epoch 6/10:  33%|███▎      | 1161/3568 [11:53<24:08,  1.66it/s]

Epoch [6/10], Step [1161/3567], Loss: 2.9446


Epoch 6/10:  33%|███▎      | 1171/3568 [12:00<24:40,  1.62it/s]

Epoch [6/10], Step [1171/3567], Loss: 2.9446


Epoch 6/10:  33%|███▎      | 1181/3568 [12:06<24:08,  1.65it/s]

Epoch [6/10], Step [1181/3567], Loss: 2.9446


Epoch 6/10:  33%|███▎      | 1191/3568 [12:12<24:39,  1.61it/s]

Epoch [6/10], Step [1191/3567], Loss: 2.9446


Epoch 6/10:  34%|███▎      | 1201/3568 [12:18<24:23,  1.62it/s]

Epoch [6/10], Step [1201/3567], Loss: 2.9446


Epoch 6/10:  34%|███▍      | 1211/3568 [12:24<24:08,  1.63it/s]

Epoch [6/10], Step [1211/3567], Loss: 2.9446


Epoch 6/10:  34%|███▍      | 1221/3568 [12:31<23:58,  1.63it/s]

Epoch [6/10], Step [1221/3567], Loss: 2.9446


Epoch 6/10:  35%|███▍      | 1231/3568 [12:37<25:01,  1.56it/s]

Epoch [6/10], Step [1231/3567], Loss: 2.9446


Epoch 6/10:  35%|███▍      | 1241/3568 [12:43<23:42,  1.64it/s]

Epoch [6/10], Step [1241/3567], Loss: 2.9446


Epoch 6/10:  35%|███▌      | 1251/3568 [12:49<23:44,  1.63it/s]

Epoch [6/10], Step [1251/3567], Loss: 2.9446


Epoch 6/10:  35%|███▌      | 1261/3568 [12:55<23:08,  1.66it/s]

Epoch [6/10], Step [1261/3567], Loss: 2.9446


Epoch 6/10:  36%|███▌      | 1271/3568 [13:01<23:58,  1.60it/s]

Epoch [6/10], Step [1271/3567], Loss: 2.9446


Epoch 6/10:  36%|███▌      | 1281/3568 [13:07<23:13,  1.64it/s]

Epoch [6/10], Step [1281/3567], Loss: 2.9446


Epoch 6/10:  36%|███▌      | 1291/3568 [13:13<23:36,  1.61it/s]

Epoch [6/10], Step [1291/3567], Loss: 2.9446


Epoch 6/10:  36%|███▋      | 1301/3568 [13:20<22:52,  1.65it/s]

Epoch [6/10], Step [1301/3567], Loss: 2.9446


Epoch 6/10:  37%|███▋      | 1311/3568 [13:26<22:46,  1.65it/s]

Epoch [6/10], Step [1311/3567], Loss: 2.9446


Epoch 6/10:  37%|███▋      | 1321/3568 [13:32<23:00,  1.63it/s]

Epoch [6/10], Step [1321/3567], Loss: 2.9446


Epoch 6/10:  37%|███▋      | 1331/3568 [13:38<23:21,  1.60it/s]

Epoch [6/10], Step [1331/3567], Loss: 2.9446


Epoch 6/10:  38%|███▊      | 1341/3568 [13:44<22:49,  1.63it/s]

Epoch [6/10], Step [1341/3567], Loss: 2.9446


Epoch 6/10:  38%|███▊      | 1351/3568 [13:50<22:49,  1.62it/s]

Epoch [6/10], Step [1351/3567], Loss: 2.9446


Epoch 6/10:  38%|███▊      | 1361/3568 [13:56<22:29,  1.64it/s]

Epoch [6/10], Step [1361/3567], Loss: 2.9446


Epoch 6/10:  38%|███▊      | 1371/3568 [14:03<22:30,  1.63it/s]

Epoch [6/10], Step [1371/3567], Loss: 2.9446


Epoch 6/10:  39%|███▊      | 1381/3568 [14:09<23:34,  1.55it/s]

Epoch [6/10], Step [1381/3567], Loss: 2.9446


Epoch 6/10:  39%|███▉      | 1391/3568 [14:15<22:31,  1.61it/s]

Epoch [6/10], Step [1391/3567], Loss: 2.9446


Epoch 6/10:  39%|███▉      | 1401/3568 [14:21<22:10,  1.63it/s]

Epoch [6/10], Step [1401/3567], Loss: 2.9446


Epoch 6/10:  40%|███▉      | 1411/3568 [14:28<22:10,  1.62it/s]

Epoch [6/10], Step [1411/3567], Loss: 2.9446


Epoch 6/10:  40%|███▉      | 1421/3568 [14:34<21:59,  1.63it/s]

Epoch [6/10], Step [1421/3567], Loss: 2.9446


Epoch 6/10:  40%|████      | 1431/3568 [14:40<22:26,  1.59it/s]

Epoch [6/10], Step [1431/3567], Loss: 2.9446


Epoch 6/10:  40%|████      | 1441/3568 [14:46<21:22,  1.66it/s]

Epoch [6/10], Step [1441/3567], Loss: 2.9446


Epoch 6/10:  41%|████      | 1451/3568 [14:52<21:40,  1.63it/s]

Epoch [6/10], Step [1451/3567], Loss: 2.9446


Epoch 6/10:  41%|████      | 1461/3568 [14:58<21:24,  1.64it/s]

Epoch [6/10], Step [1461/3567], Loss: 2.9446


Epoch 6/10:  41%|████      | 1471/3568 [15:05<22:01,  1.59it/s]

Epoch [6/10], Step [1471/3567], Loss: 2.9446


Epoch 6/10:  42%|████▏     | 1481/3568 [15:11<21:22,  1.63it/s]

Epoch [6/10], Step [1481/3567], Loss: 2.9446


Epoch 6/10:  42%|████▏     | 1491/3568 [15:17<21:14,  1.63it/s]

Epoch [6/10], Step [1491/3567], Loss: 2.9446


Epoch 6/10:  42%|████▏     | 1501/3568 [15:23<20:58,  1.64it/s]

Epoch [6/10], Step [1501/3567], Loss: 2.9446


Epoch 6/10:  42%|████▏     | 1511/3568 [15:29<21:29,  1.59it/s]

Epoch [6/10], Step [1511/3567], Loss: 2.9446


Epoch 6/10:  43%|████▎     | 1521/3568 [15:35<20:37,  1.65it/s]

Epoch [6/10], Step [1521/3567], Loss: 2.9446


Epoch 6/10:  43%|████▎     | 1531/3568 [15:41<20:50,  1.63it/s]

Epoch [6/10], Step [1531/3567], Loss: 2.9446


Epoch 6/10:  43%|████▎     | 1541/3568 [15:48<20:45,  1.63it/s]

Epoch [6/10], Step [1541/3567], Loss: 2.9446


Epoch 6/10:  43%|████▎     | 1551/3568 [15:54<20:51,  1.61it/s]

Epoch [6/10], Step [1551/3567], Loss: 2.9446


Epoch 6/10:  44%|████▍     | 1561/3568 [16:00<20:00,  1.67it/s]

Epoch [6/10], Step [1561/3567], Loss: 2.9446


Epoch 6/10:  44%|████▍     | 1571/3568 [16:06<20:28,  1.63it/s]

Epoch [6/10], Step [1571/3567], Loss: 2.9446


Epoch 6/10:  44%|████▍     | 1581/3568 [16:12<20:30,  1.61it/s]

Epoch [6/10], Step [1581/3567], Loss: 2.9446


Epoch 6/10:  45%|████▍     | 1591/3568 [16:18<20:22,  1.62it/s]

Epoch [6/10], Step [1591/3567], Loss: 2.9446


Epoch 6/10:  45%|████▍     | 1601/3568 [16:24<20:04,  1.63it/s]

Epoch [6/10], Step [1601/3567], Loss: 2.9446


Epoch 6/10:  45%|████▌     | 1611/3568 [16:31<20:20,  1.60it/s]

Epoch [6/10], Step [1611/3567], Loss: 2.9446


Epoch 6/10:  45%|████▌     | 1621/3568 [16:37<20:02,  1.62it/s]

Epoch [6/10], Step [1621/3567], Loss: 2.9446


Epoch 6/10:  46%|████▌     | 1631/3568 [16:43<20:26,  1.58it/s]

Epoch [6/10], Step [1631/3567], Loss: 2.9446


Epoch 6/10:  46%|████▌     | 1641/3568 [16:49<19:49,  1.62it/s]

Epoch [6/10], Step [1641/3567], Loss: 2.9446


Epoch 6/10:  46%|████▋     | 1651/3568 [16:55<19:29,  1.64it/s]

Epoch [6/10], Step [1651/3567], Loss: 2.9446


Epoch 6/10:  47%|████▋     | 1661/3568 [17:01<19:19,  1.65it/s]

Epoch [6/10], Step [1661/3567], Loss: 2.9446


Epoch 6/10:  47%|████▋     | 1671/3568 [17:08<19:37,  1.61it/s]

Epoch [6/10], Step [1671/3567], Loss: 2.9446


Epoch 6/10:  47%|████▋     | 1681/3568 [17:14<19:17,  1.63it/s]

Epoch [6/10], Step [1681/3567], Loss: 2.9446


Epoch 6/10:  47%|████▋     | 1691/3568 [17:20<19:03,  1.64it/s]

Epoch [6/10], Step [1691/3567], Loss: 2.9446


Epoch 6/10:  48%|████▊     | 1701/3568 [17:26<19:07,  1.63it/s]

Epoch [6/10], Step [1701/3567], Loss: 2.9446


Epoch 6/10:  48%|████▊     | 1711/3568 [17:32<18:58,  1.63it/s]

Epoch [6/10], Step [1711/3567], Loss: 2.9446


Epoch 6/10:  48%|████▊     | 1721/3568 [17:38<18:54,  1.63it/s]

Epoch [6/10], Step [1721/3567], Loss: 2.9446


Epoch 6/10:  49%|████▊     | 1731/3568 [17:44<18:48,  1.63it/s]

Epoch [6/10], Step [1731/3567], Loss: 2.9446


Epoch 6/10:  49%|████▉     | 1741/3568 [17:50<18:58,  1.61it/s]

Epoch [6/10], Step [1741/3567], Loss: 2.9446


Epoch 6/10:  49%|████▉     | 1751/3568 [17:57<18:16,  1.66it/s]

Epoch [6/10], Step [1751/3567], Loss: 2.9446


Epoch 6/10:  49%|████▉     | 1761/3568 [18:03<18:29,  1.63it/s]

Epoch [6/10], Step [1761/3567], Loss: 2.9446


Epoch 6/10:  50%|████▉     | 1771/3568 [18:09<18:12,  1.65it/s]

Epoch [6/10], Step [1771/3567], Loss: 2.9446


Epoch 6/10:  50%|████▉     | 1781/3568 [18:15<18:05,  1.65it/s]

Epoch [6/10], Step [1781/3567], Loss: 2.9446


Epoch 6/10:  50%|█████     | 1791/3568 [18:21<18:12,  1.63it/s]

Epoch [6/10], Step [1791/3567], Loss: 2.9446


Epoch 6/10:  50%|█████     | 1801/3568 [18:27<18:40,  1.58it/s]

Epoch [6/10], Step [1801/3567], Loss: 2.9446


Epoch 6/10:  51%|█████     | 1811/3568 [18:33<17:53,  1.64it/s]

Epoch [6/10], Step [1811/3567], Loss: 2.9446


Epoch 6/10:  51%|█████     | 1821/3568 [18:40<18:56,  1.54it/s]

Epoch [6/10], Step [1821/3567], Loss: 2.9446


Epoch 6/10:  51%|█████▏    | 1831/3568 [18:46<17:39,  1.64it/s]

Epoch [6/10], Step [1831/3567], Loss: 2.9446


Epoch 6/10:  52%|█████▏    | 1841/3568 [18:52<17:59,  1.60it/s]

Epoch [6/10], Step [1841/3567], Loss: 2.9446


Epoch 6/10:  52%|█████▏    | 1851/3568 [18:58<17:41,  1.62it/s]

Epoch [6/10], Step [1851/3567], Loss: 2.9446


Epoch 6/10:  52%|█████▏    | 1861/3568 [19:04<17:37,  1.61it/s]

Epoch [6/10], Step [1861/3567], Loss: 2.9446


Epoch 6/10:  52%|█████▏    | 1871/3568 [19:10<18:20,  1.54it/s]

Epoch [6/10], Step [1871/3567], Loss: 2.9446


Epoch 6/10:  53%|█████▎    | 1881/3568 [19:16<16:57,  1.66it/s]

Epoch [6/10], Step [1881/3567], Loss: 2.9446


Epoch 6/10:  53%|█████▎    | 1891/3568 [19:22<17:02,  1.64it/s]

Epoch [6/10], Step [1891/3567], Loss: 2.9446


Epoch 6/10:  53%|█████▎    | 1901/3568 [19:29<17:01,  1.63it/s]

Epoch [6/10], Step [1901/3567], Loss: 2.9446


Epoch 6/10:  54%|█████▎    | 1911/3568 [19:35<16:52,  1.64it/s]

Epoch [6/10], Step [1911/3567], Loss: 2.9446


Epoch 6/10:  54%|█████▍    | 1921/3568 [19:41<16:54,  1.62it/s]

Epoch [6/10], Step [1921/3567], Loss: 2.9446


Epoch 6/10:  54%|█████▍    | 1931/3568 [19:47<16:43,  1.63it/s]

Epoch [6/10], Step [1931/3567], Loss: 2.9446


Epoch 6/10:  54%|█████▍    | 1941/3568 [19:53<16:50,  1.61it/s]

Epoch [6/10], Step [1941/3567], Loss: 2.9446


Epoch 6/10:  55%|█████▍    | 1951/3568 [20:00<16:27,  1.64it/s]

Epoch [6/10], Step [1951/3567], Loss: 2.9446


Epoch 6/10:  55%|█████▍    | 1961/3568 [20:06<16:15,  1.65it/s]

Epoch [6/10], Step [1961/3567], Loss: 2.9446


Epoch 6/10:  55%|█████▌    | 1971/3568 [20:12<16:27,  1.62it/s]

Epoch [6/10], Step [1971/3567], Loss: 2.9446


Epoch 6/10:  56%|█████▌    | 1981/3568 [20:18<16:05,  1.64it/s]

Epoch [6/10], Step [1981/3567], Loss: 2.9446


Epoch 6/10:  56%|█████▌    | 1991/3568 [20:24<15:52,  1.66it/s]

Epoch [6/10], Step [1991/3567], Loss: 2.9446


Epoch 6/10:  56%|█████▌    | 2001/3568 [20:30<15:49,  1.65it/s]

Epoch [6/10], Step [2001/3567], Loss: 2.9446


Epoch 6/10:  56%|█████▋    | 2011/3568 [20:36<16:00,  1.62it/s]

Epoch [6/10], Step [2011/3567], Loss: 2.9446


Epoch 6/10:  57%|█████▋    | 2021/3568 [20:42<15:35,  1.65it/s]

Epoch [6/10], Step [2021/3567], Loss: 2.9446


Epoch 6/10:  57%|█████▋    | 2031/3568 [20:48<15:22,  1.67it/s]

Epoch [6/10], Step [2031/3567], Loss: 2.9446


Epoch 6/10:  57%|█████▋    | 2041/3568 [20:55<15:41,  1.62it/s]

Epoch [6/10], Step [2041/3567], Loss: 2.9446


Epoch 6/10:  57%|█████▋    | 2051/3568 [21:01<15:25,  1.64it/s]

Epoch [6/10], Step [2051/3567], Loss: 2.9446


Epoch 6/10:  58%|█████▊    | 2061/3568 [21:07<15:22,  1.63it/s]

Epoch [6/10], Step [2061/3567], Loss: 2.9446


Epoch 6/10:  58%|█████▊    | 2071/3568 [21:13<15:28,  1.61it/s]

Epoch [6/10], Step [2071/3567], Loss: 2.9446


Epoch 6/10:  58%|█████▊    | 2081/3568 [21:19<15:21,  1.61it/s]

Epoch [6/10], Step [2081/3567], Loss: 2.9446


Epoch 6/10:  59%|█████▊    | 2091/3568 [21:26<15:22,  1.60it/s]

Epoch [6/10], Step [2091/3567], Loss: 2.9446


Epoch 6/10:  59%|█████▉    | 2101/3568 [21:32<14:53,  1.64it/s]

Epoch [6/10], Step [2101/3567], Loss: 2.9446


Epoch 6/10:  59%|█████▉    | 2111/3568 [21:38<14:55,  1.63it/s]

Epoch [6/10], Step [2111/3567], Loss: 2.9446


Epoch 6/10:  59%|█████▉    | 2121/3568 [21:44<14:46,  1.63it/s]

Epoch [6/10], Step [2121/3567], Loss: 2.9446


Epoch 6/10:  60%|█████▉    | 2131/3568 [21:50<14:44,  1.62it/s]

Epoch [6/10], Step [2131/3567], Loss: 2.9446


Epoch 6/10:  60%|██████    | 2141/3568 [21:56<14:27,  1.65it/s]

Epoch [6/10], Step [2141/3567], Loss: 2.9446


Epoch 6/10:  60%|██████    | 2151/3568 [22:02<14:29,  1.63it/s]

Epoch [6/10], Step [2151/3567], Loss: 2.9446


Epoch 6/10:  61%|██████    | 2161/3568 [22:08<14:18,  1.64it/s]

Epoch [6/10], Step [2161/3567], Loss: 2.9446


Epoch 6/10:  61%|██████    | 2171/3568 [22:15<14:19,  1.63it/s]

Epoch [6/10], Step [2171/3567], Loss: 2.9446


Epoch 6/10:  61%|██████    | 2181/3568 [22:21<14:02,  1.65it/s]

Epoch [6/10], Step [2181/3567], Loss: 2.9446


Epoch 6/10:  61%|██████▏   | 2191/3568 [22:27<13:59,  1.64it/s]

Epoch [6/10], Step [2191/3567], Loss: 2.9446


Epoch 6/10:  62%|██████▏   | 2201/3568 [22:33<13:49,  1.65it/s]

Epoch [6/10], Step [2201/3567], Loss: 2.9446


Epoch 6/10:  62%|██████▏   | 2211/3568 [22:39<13:48,  1.64it/s]

Epoch [6/10], Step [2211/3567], Loss: 2.9446


Epoch 6/10:  62%|██████▏   | 2221/3568 [22:45<13:49,  1.62it/s]

Epoch [6/10], Step [2221/3567], Loss: 2.9446


Epoch 6/10:  63%|██████▎   | 2231/3568 [22:51<13:35,  1.64it/s]

Epoch [6/10], Step [2231/3567], Loss: 2.9446


Epoch 6/10:  63%|██████▎   | 2241/3568 [22:57<13:41,  1.61it/s]

Epoch [6/10], Step [2241/3567], Loss: 2.9446


Epoch 6/10:  63%|██████▎   | 2251/3568 [23:04<13:27,  1.63it/s]

Epoch [6/10], Step [2251/3567], Loss: 2.9446


Epoch 6/10:  63%|██████▎   | 2261/3568 [23:10<13:19,  1.64it/s]

Epoch [6/10], Step [2261/3567], Loss: 2.9446


Epoch 6/10:  64%|██████▎   | 2271/3568 [23:16<13:25,  1.61it/s]

Epoch [6/10], Step [2271/3567], Loss: 2.9446


Epoch 6/10:  64%|██████▍   | 2281/3568 [23:22<13:10,  1.63it/s]

Epoch [6/10], Step [2281/3567], Loss: 2.9446


Epoch 6/10:  64%|██████▍   | 2291/3568 [23:28<13:33,  1.57it/s]

Epoch [6/10], Step [2291/3567], Loss: 2.9446


Epoch 6/10:  64%|██████▍   | 2301/3568 [23:34<13:00,  1.62it/s]

Epoch [6/10], Step [2301/3567], Loss: 2.9446


Epoch 6/10:  65%|██████▍   | 2311/3568 [23:41<12:46,  1.64it/s]

Epoch [6/10], Step [2311/3567], Loss: 2.9446


Epoch 6/10:  65%|██████▌   | 2321/3568 [23:47<13:02,  1.59it/s]

Epoch [6/10], Step [2321/3567], Loss: 2.9446


Epoch 6/10:  65%|██████▌   | 2331/3568 [23:53<12:33,  1.64it/s]

Epoch [6/10], Step [2331/3567], Loss: 2.9446


Epoch 6/10:  66%|██████▌   | 2341/3568 [23:59<12:39,  1.62it/s]

Epoch [6/10], Step [2341/3567], Loss: 2.9446


Epoch 6/10:  66%|██████▌   | 2351/3568 [24:05<12:21,  1.64it/s]

Epoch [6/10], Step [2351/3567], Loss: 2.9446


Epoch 6/10:  66%|██████▌   | 2361/3568 [24:11<12:14,  1.64it/s]

Epoch [6/10], Step [2361/3567], Loss: 2.9446


Epoch 6/10:  66%|██████▋   | 2371/3568 [24:17<12:05,  1.65it/s]

Epoch [6/10], Step [2371/3567], Loss: 2.9446


Epoch 6/10:  67%|██████▋   | 2381/3568 [24:24<12:18,  1.61it/s]

Epoch [6/10], Step [2381/3567], Loss: 2.9446


Epoch 6/10:  67%|██████▋   | 2391/3568 [24:30<11:50,  1.66it/s]

Epoch [6/10], Step [2391/3567], Loss: 2.9446


Epoch 6/10:  67%|██████▋   | 2401/3568 [24:36<11:44,  1.66it/s]

Epoch [6/10], Step [2401/3567], Loss: 2.9446


Epoch 6/10:  68%|██████▊   | 2411/3568 [24:42<11:45,  1.64it/s]

Epoch [6/10], Step [2411/3567], Loss: 2.9446


Epoch 6/10:  68%|██████▊   | 2421/3568 [24:48<11:36,  1.65it/s]

Epoch [6/10], Step [2421/3567], Loss: 2.9446


Epoch 6/10:  68%|██████▊   | 2431/3568 [24:54<11:36,  1.63it/s]

Epoch [6/10], Step [2431/3567], Loss: 2.9446


Epoch 6/10:  68%|██████▊   | 2441/3568 [25:00<11:29,  1.63it/s]

Epoch [6/10], Step [2441/3567], Loss: 2.9446


Epoch 6/10:  69%|██████▊   | 2451/3568 [25:06<11:32,  1.61it/s]

Epoch [6/10], Step [2451/3567], Loss: 2.9446


Epoch 6/10:  69%|██████▉   | 2461/3568 [25:12<11:19,  1.63it/s]

Epoch [6/10], Step [2461/3567], Loss: 2.9446


Epoch 6/10:  69%|██████▉   | 2471/3568 [25:19<11:08,  1.64it/s]

Epoch [6/10], Step [2471/3567], Loss: 2.9446


Epoch 6/10:  70%|██████▉   | 2481/3568 [25:25<11:03,  1.64it/s]

Epoch [6/10], Step [2481/3567], Loss: 2.9446


Epoch 6/10:  70%|██████▉   | 2491/3568 [25:31<11:01,  1.63it/s]

Epoch [6/10], Step [2491/3567], Loss: 2.9446


Epoch 6/10:  70%|███████   | 2501/3568 [25:37<10:54,  1.63it/s]

Epoch [6/10], Step [2501/3567], Loss: 2.9446


Epoch 6/10:  70%|███████   | 2511/3568 [25:43<10:40,  1.65it/s]

Epoch [6/10], Step [2511/3567], Loss: 2.9446


Epoch 6/10:  71%|███████   | 2521/3568 [25:49<11:12,  1.56it/s]

Epoch [6/10], Step [2521/3567], Loss: 2.9446


Epoch 6/10:  71%|███████   | 2531/3568 [25:55<10:29,  1.65it/s]

Epoch [6/10], Step [2531/3567], Loss: 2.9446


Epoch 6/10:  71%|███████   | 2541/3568 [26:02<10:25,  1.64it/s]

Epoch [6/10], Step [2541/3567], Loss: 2.9446


Epoch 6/10:  71%|███████▏  | 2551/3568 [26:08<10:24,  1.63it/s]

Epoch [6/10], Step [2551/3567], Loss: 2.9446


Epoch 6/10:  72%|███████▏  | 2561/3568 [26:14<10:24,  1.61it/s]

Epoch [6/10], Step [2561/3567], Loss: 2.9446


Epoch 6/10:  72%|███████▏  | 2571/3568 [26:20<10:25,  1.59it/s]

Epoch [6/10], Step [2571/3567], Loss: 2.9446


Epoch 6/10:  72%|███████▏  | 2581/3568 [26:26<10:08,  1.62it/s]

Epoch [6/10], Step [2581/3567], Loss: 2.9446


Epoch 6/10:  73%|███████▎  | 2591/3568 [26:33<10:01,  1.62it/s]

Epoch [6/10], Step [2591/3567], Loss: 2.9446


Epoch 6/10:  73%|███████▎  | 2601/3568 [26:39<10:12,  1.58it/s]

Epoch [6/10], Step [2601/3567], Loss: 2.9446


Epoch 6/10:  73%|███████▎  | 2611/3568 [26:45<09:51,  1.62it/s]

Epoch [6/10], Step [2611/3567], Loss: 2.9446


Epoch 6/10:  73%|███████▎  | 2621/3568 [26:51<09:33,  1.65it/s]

Epoch [6/10], Step [2621/3567], Loss: 2.9446


Epoch 6/10:  74%|███████▎  | 2631/3568 [26:57<10:02,  1.56it/s]

Epoch [6/10], Step [2631/3567], Loss: 2.9446


Epoch 6/10:  74%|███████▍  | 2641/3568 [27:04<09:34,  1.61it/s]

Epoch [6/10], Step [2641/3567], Loss: 2.9446


Epoch 6/10:  74%|███████▍  | 2651/3568 [27:10<09:16,  1.65it/s]

Epoch [6/10], Step [2651/3567], Loss: 2.9446


Epoch 6/10:  75%|███████▍  | 2661/3568 [27:16<09:13,  1.64it/s]

Epoch [6/10], Step [2661/3567], Loss: 2.9446


Epoch 6/10:  75%|███████▍  | 2671/3568 [27:22<09:03,  1.65it/s]

Epoch [6/10], Step [2671/3567], Loss: 2.9446


Epoch 6/10:  75%|███████▌  | 2681/3568 [27:28<09:00,  1.64it/s]

Epoch [6/10], Step [2681/3567], Loss: 2.9446


Epoch 6/10:  75%|███████▌  | 2691/3568 [27:34<08:58,  1.63it/s]

Epoch [6/10], Step [2691/3567], Loss: 2.9446


Epoch 6/10:  76%|███████▌  | 2701/3568 [27:40<08:59,  1.61it/s]

Epoch [6/10], Step [2701/3567], Loss: 2.9446


Epoch 6/10:  76%|███████▌  | 2711/3568 [27:46<08:47,  1.62it/s]

Epoch [6/10], Step [2711/3567], Loss: 2.9446


Epoch 6/10:  76%|███████▋  | 2721/3568 [27:53<08:35,  1.64it/s]

Epoch [6/10], Step [2721/3567], Loss: 2.9446


Epoch 6/10:  77%|███████▋  | 2731/3568 [27:59<08:33,  1.63it/s]

Epoch [6/10], Step [2731/3567], Loss: 2.9446


Epoch 6/10:  77%|███████▋  | 2741/3568 [28:05<08:26,  1.63it/s]

Epoch [6/10], Step [2741/3567], Loss: 2.9446


Epoch 6/10:  77%|███████▋  | 2751/3568 [28:11<08:27,  1.61it/s]

Epoch [6/10], Step [2751/3567], Loss: 2.9446


Epoch 6/10:  77%|███████▋  | 2761/3568 [28:17<08:19,  1.62it/s]

Epoch [6/10], Step [2761/3567], Loss: 2.9446


Epoch 6/10:  78%|███████▊  | 2771/3568 [28:23<08:04,  1.64it/s]

Epoch [6/10], Step [2771/3567], Loss: 2.9446


Epoch 6/10:  78%|███████▊  | 2781/3568 [28:29<08:11,  1.60it/s]

Epoch [6/10], Step [2781/3567], Loss: 2.9446


Epoch 6/10:  78%|███████▊  | 2791/3568 [28:36<07:54,  1.64it/s]

Epoch [6/10], Step [2791/3567], Loss: 2.9446


Epoch 6/10:  79%|███████▊  | 2801/3568 [28:42<07:55,  1.61it/s]

Epoch [6/10], Step [2801/3567], Loss: 2.9446


Epoch 6/10:  79%|███████▉  | 2811/3568 [28:48<07:40,  1.65it/s]

Epoch [6/10], Step [2811/3567], Loss: 2.9446


Epoch 6/10:  79%|███████▉  | 2821/3568 [28:54<07:39,  1.63it/s]

Epoch [6/10], Step [2821/3567], Loss: 2.9446


Epoch 6/10:  79%|███████▉  | 2831/3568 [29:00<07:32,  1.63it/s]

Epoch [6/10], Step [2831/3567], Loss: 2.9446


Epoch 6/10:  80%|███████▉  | 2841/3568 [29:06<07:28,  1.62it/s]

Epoch [6/10], Step [2841/3567], Loss: 2.9446


Epoch 6/10:  80%|███████▉  | 2851/3568 [29:13<07:26,  1.61it/s]

Epoch [6/10], Step [2851/3567], Loss: 2.9446


Epoch 6/10:  80%|████████  | 2861/3568 [29:19<07:09,  1.65it/s]

Epoch [6/10], Step [2861/3567], Loss: 2.9446


Epoch 6/10:  80%|████████  | 2871/3568 [29:25<07:13,  1.61it/s]

Epoch [6/10], Step [2871/3567], Loss: 2.9446


Epoch 6/10:  81%|████████  | 2881/3568 [29:31<06:55,  1.65it/s]

Epoch [6/10], Step [2881/3567], Loss: 2.9446


Epoch 6/10:  81%|████████  | 2891/3568 [29:37<06:48,  1.66it/s]

Epoch [6/10], Step [2891/3567], Loss: 2.9446


Epoch 6/10:  81%|████████▏ | 2901/3568 [29:43<06:57,  1.60it/s]

Epoch [6/10], Step [2901/3567], Loss: 2.9446


Epoch 6/10:  82%|████████▏ | 2911/3568 [29:49<06:42,  1.63it/s]

Epoch [6/10], Step [2911/3567], Loss: 2.9446


Epoch 6/10:  82%|████████▏ | 2921/3568 [29:55<06:40,  1.62it/s]

Epoch [6/10], Step [2921/3567], Loss: 2.9446


Epoch 6/10:  82%|████████▏ | 2923/3568 [29:57<06:35,  1.63it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

Epoch 8/10:  78%|███████▊  | 2791/3568 [28:33<08:04,  1.60it/s]

Epoch [8/10], Step [2791/3567], Loss: 2.9446


Epoch 8/10:  79%|███████▊  | 2801/3568 [28:40<07:50,  1.63it/s]

Epoch [8/10], Step [2801/3567], Loss: 2.9446


Epoch 8/10:  79%|███████▉  | 2811/3568 [28:46<07:45,  1.63it/s]

Epoch [8/10], Step [2811/3567], Loss: 2.9446


Epoch 8/10:  79%|███████▉  | 2821/3568 [28:52<07:44,  1.61it/s]

Epoch [8/10], Step [2821/3567], Loss: 2.9446


Epoch 8/10:  79%|███████▉  | 2829/3568 [28:57<07:33,  1.63it/s]


KeyboardInterrupt: 

In [None]:
from sklearn.metrics import accuracy_score, f1_score

In [None]:
model.eval()

# Créez un DataLoader pour le jeu de données de test
dataset = TempLibriSpeech.LibriSpeech(split="test-clean", target_length=480000, device='cuda')
data_loader = DataLoader(dataset, batch_size=16, shuffle=False)

all_quantized_reps = []
all_contextualized_reps = []

# Calcul de la perte de reconstruction ou de quantification
total_loss = 0
with torch.no_grad():
    for batch_idx, batch in enumerate(data_loader):
        inputs, _ = batch  # Ici, _ signifie qu'il n'y a pas de labels
        
        inputs = inputs.to('cuda')  # Assurez-vous que les inputs sont sur le bon device

        # Passe avant
        quantized_repr, contextualized_reps, loss = model(inputs)

        # Ajout des représentations à la liste
        all_quantized_reps.append(quantized_repr.cpu().numpy())
        all_contextualized_reps.append(contextualized_reps.cpu().numpy())

        # Accumuler la perte
        total_loss += loss.item()

# Calcul de la moyenne de la perte sur l'ensemble du dataset
average_loss = total_loss / len(data_loader)

# Affichage des résultats
print(f"Average Loss: {average_loss:.4f}")


In [None]:
average_loss = total_loss / len(data_loader)

# Affichage des résultats
print(f"Average Loss: {average_loss:.4f}")

In [None]:
inputs