In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import math
import copy
from tqdm import tqdm

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        assert d_model % num_heads == 0, "d_model must be divisible by num_heads"

        self.d_model = d_model
        self.num_heads = num_heads
        self.d_k = d_model // num_heads

        self.W_q = nn.Linear(d_model, d_model)
        self.W_k = nn.Linear(d_model, d_model)
        self.W_v = nn.Linear(d_model, d_model)
        self.W_o = nn.Linear(d_model, d_model)

    def scaled_dot_product_attention(self, Q, K, V, mask=None):
        attn_scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
        if mask is not None:
            attn_scores = attn_scores.masked_fill(mask == 0, -1e9)
        attn_probs = torch.softmax(attn_scores, dim=-1)
        output = torch.matmul(attn_probs, V)
        return output

    def split_heads(self, x):
        batch_size, seq_length, d_model = x.size()
        return x.view(batch_size, seq_length, self.num_heads, self.d_k).transpose(1, 2)

    def combine_heads(self, x):
        batch_size, _, seq_length, d_k = x.size()
        return x.transpose(1, 2).contiguous().view(batch_size, seq_length, self.d_model)

    def forward(self, Q, K, V, mask=None):
        Q = self.split_heads(self.W_q(Q))
        K = self.split_heads(self.W_k(K))
        V = self.split_heads(self.W_v(V))

        attn_output = self.scaled_dot_product_attention(Q, K, V, mask)
        output = self.W_o(self.combine_heads(attn_output))
        return output

In [None]:
class PositionWiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff):
        super(PositionWiseFeedForward, self).__init__()
        self.fc1 = nn.Linear(d_model, d_ff)
        self.fc2 = nn.Linear(d_ff, d_model)
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_seq_length):
        super(PositionalEncoding, self).__init__()

        pe = torch.zeros(max_seq_length, d_model)
        position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        self.register_buffer('pe', pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

##Ошибка в методе   
    def forward(self, x, mask):
        ff_output = self.feed_forward(x)
        attn_output = self.self_attn(x, x, x, mask)
        x = self.norm1(x + self.dropout(attn_output))
        x = self.norm2(x + self.dropout(ff_output))
        return x

In [None]:
class EncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout):
        super(EncoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = PositionWiseFeedForward(d_model, d_ff)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask):
        attn_output = self.self_attn(x, x, x, mask)
        x = self.norm1(x + self.dropout(attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm2(x + self.dropout(ff_output))
        return x

## Ошибка в методе
    def forward(self, x, enc_output, src_mask, tgt_mask):
        attn_output = self.self_attn(x, x, x, tgt_mask)
        x = self.norm1(x + self.dropout(attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm3(x + self.dropout(ff_output))
        return x

In [None]:
class DecoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout):
        super(DecoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.cross_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = PositionWiseFeedForward(d_model, d_ff)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, enc_output, src_mask, tgt_mask):
        attn_output = self.self_attn(x, x, x, tgt_mask)
        x = self.norm1(x + self.dropout(attn_output))
        attn_output = self.cross_attn(x, enc_output, enc_output, src_mask)
        x = self.norm2(x + self.dropout(attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm3(x + self.dropout(ff_output))
        return x

In [None]:
class Transformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, d_model, num_heads, num_layers, d_ff, max_seq_length, dropout):
        super(Transformer, self).__init__()
        self.encoder_embedding = nn.Embedding(src_vocab_size, d_model)
        self.decoder_embedding = nn.Embedding(tgt_vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_seq_length)

        self.encoder_layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.decoder_layers = nn.ModuleList([DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])

        self.fc = nn.Linear(d_model, tgt_vocab_size)
        self.dropout = nn.Dropout(dropout)

    def generate_mask(self, src, tgt):
        src_mask = (src != 0).unsqueeze(1).unsqueeze(2)
        tgt_mask = (tgt != 0).unsqueeze(1).unsqueeze(3)
        seq_length = tgt.size(1)
        nopeak_mask = (1 - torch.triu(torch.ones(1, seq_length, seq_length), diagonal=1)).bool()
        nopeak_mask = nopeak_mask.to(tgt_mask.device)  # Move nopeak_mask to the same device as tgt_mask
        tgt_mask = tgt_mask & nopeak_mask
        return src_mask, tgt_mask


    def forward(self, src, tgt):
        src_mask, tgt_mask = self.generate_mask(src, tgt)
        src_embedded = self.dropout(self.positional_encoding(self.encoder_embedding(src)))
        tgt_embedded = self.dropout(self.positional_encoding(self.decoder_embedding(tgt)))

        enc_output = src_embedded
        for enc_layer in self.encoder_layers:
            enc_output = enc_layer(enc_output, src_mask)

        dec_output = tgt_embedded
        for dec_layer in self.decoder_layers:
            dec_output = dec_layer(dec_output, enc_output, src_mask, tgt_mask)

        output = self.fc(dec_output)
        return output

In [None]:
src_vocab_size = 1000
tgt_vocab_size = 1000
d_model = 512
num_heads = 8
num_layers = 6
d_ff = 2048
max_seq_length = 50
dropout = 0.1

transformer = Transformer(src_vocab_size, tgt_vocab_size, d_model, num_heads, num_layers, d_ff, max_seq_length, dropout)

In [None]:
from random import randint

In [None]:
def generate_sample():
    enc_src_start = randint(1, 944)
    dec_src_start = randint(1, 944)

    enc_src = torch.arange(enc_src_start, enc_src_start + 50, dtype=torch.int64)
    dec_src = torch.arange(dec_src_start, dec_src_start + 50, dtype=torch.int64)

    trg = torch.arange(dec_src_start + 1, dec_src_start + 51, dtype=torch.int64)  # Пример для целевой последовательности

    return enc_src, dec_src, trg

In [None]:
def generate_batch(batch_size: int = 128):
    src_batch = []
    dec_batch = []
    trg_batch = []

    while len(src_batch) < batch_size:
        src_sample, dec_sample, trg_sample = generate_sample()
        src_batch.append(src_sample.unsqueeze(0))
        dec_batch.append(dec_sample.unsqueeze(0))
        trg_batch.append(trg_sample.unsqueeze(0))

    src_batch = torch.cat(src_batch, dim=0)
    dec_batch = torch.cat(dec_batch, dim=0)
    trg_batch = torch.cat(trg_batch, dim=0)

    return src_batch, dec_batch, trg_batch

In [None]:
transformer = transformer.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

transformer.train()

for step in tqdm(range(1000)):
    src_batch, dec_batch, trg_batch = generate_batch(64)
    src_batch, dec_batch, trg_batch = src_batch.to(device), dec_batch.to(device),trg_batch.to(device)
    optimizer.zero_grad()
    output = transformer(src_batch, dec_batch)
    loss = criterion(output.view(-1, output.size(-1)), trg_batch.view(-1))
    loss.backward()
    optimizer.step()
    print(f"Step: {step+1}, Loss: {loss.item()}")

  0%|          | 1/1000 [00:01<28:07,  1.69s/it]

Step: 1, Loss: 7.090574264526367


  0%|          | 2/1000 [00:01<14:08,  1.18it/s]

Step: 2, Loss: 7.012030124664307


  0%|          | 3/1000 [00:02<09:41,  1.71it/s]

Step: 3, Loss: 6.900892734527588


  0%|          | 4/1000 [00:02<07:36,  2.18it/s]

Step: 4, Loss: 6.831659317016602


  0%|          | 5/1000 [00:02<06:38,  2.49it/s]

Step: 5, Loss: 6.735196590423584


  1%|          | 6/1000 [00:03<05:53,  2.81it/s]

Step: 6, Loss: 6.6589508056640625


  1%|          | 7/1000 [00:03<05:24,  3.06it/s]

Step: 7, Loss: 6.452132701873779


  1%|          | 8/1000 [00:03<05:11,  3.19it/s]

Step: 8, Loss: 6.263952255249023


  1%|          | 9/1000 [00:03<05:00,  3.30it/s]

Step: 9, Loss: 6.118674278259277


  1%|          | 10/1000 [00:04<04:50,  3.41it/s]

Step: 10, Loss: 6.062994956970215


  1%|          | 11/1000 [00:04<04:42,  3.50it/s]

Step: 11, Loss: 5.947001934051514


  1%|          | 12/1000 [00:04<04:42,  3.49it/s]

Step: 12, Loss: 5.698605537414551


  1%|▏         | 13/1000 [00:04<04:39,  3.53it/s]

Step: 13, Loss: 5.679849624633789


  1%|▏         | 14/1000 [00:05<04:36,  3.57it/s]

Step: 14, Loss: 5.522597789764404


  2%|▏         | 15/1000 [00:05<04:34,  3.59it/s]

Step: 15, Loss: 5.3294243812561035


  2%|▏         | 16/1000 [00:05<04:36,  3.56it/s]

Step: 16, Loss: 5.182381629943848


  2%|▏         | 17/1000 [00:06<04:35,  3.57it/s]

Step: 17, Loss: 4.992959976196289


  2%|▏         | 18/1000 [00:06<04:33,  3.59it/s]

Step: 18, Loss: 4.8849287033081055


  2%|▏         | 19/1000 [00:06<04:33,  3.59it/s]

Step: 19, Loss: 4.7567925453186035


  2%|▏         | 20/1000 [00:06<04:34,  3.57it/s]

Step: 20, Loss: 4.684217929840088


  2%|▏         | 21/1000 [00:07<04:31,  3.60it/s]

Step: 21, Loss: 4.516933441162109


  2%|▏         | 22/1000 [00:07<04:31,  3.60it/s]

Step: 22, Loss: 4.404354572296143


  2%|▏         | 23/1000 [00:07<04:33,  3.58it/s]

Step: 23, Loss: 4.242887496948242


  2%|▏         | 24/1000 [00:08<04:32,  3.58it/s]

Step: 24, Loss: 4.04670524597168


  2%|▎         | 25/1000 [00:08<04:32,  3.58it/s]

Step: 25, Loss: 3.7867298126220703


  3%|▎         | 26/1000 [00:08<04:32,  3.57it/s]

Step: 26, Loss: 3.7445285320281982


  3%|▎         | 27/1000 [00:08<04:32,  3.57it/s]

Step: 27, Loss: 3.5507919788360596


  3%|▎         | 28/1000 [00:09<04:30,  3.59it/s]

Step: 28, Loss: 3.4906258583068848


  3%|▎         | 29/1000 [00:09<04:29,  3.60it/s]

Step: 29, Loss: 3.325805425643921


  3%|▎         | 30/1000 [00:09<04:30,  3.58it/s]

Step: 30, Loss: 3.1469345092773438


  3%|▎         | 31/1000 [00:10<04:30,  3.58it/s]

Step: 31, Loss: 3.0634067058563232


  3%|▎         | 32/1000 [00:10<04:30,  3.58it/s]

Step: 32, Loss: 2.9913482666015625


  3%|▎         | 33/1000 [00:10<04:30,  3.58it/s]

Step: 33, Loss: 2.88606595993042


  3%|▎         | 34/1000 [00:10<04:30,  3.58it/s]

Step: 34, Loss: 2.723935842514038


  4%|▎         | 35/1000 [00:11<04:28,  3.59it/s]

Step: 35, Loss: 2.6324198246002197


  4%|▎         | 36/1000 [00:11<04:28,  3.59it/s]

Step: 36, Loss: 2.48125958442688


  4%|▎         | 37/1000 [00:11<04:28,  3.58it/s]

Step: 37, Loss: 2.4589273929595947


  4%|▍         | 38/1000 [00:11<04:28,  3.58it/s]

Step: 38, Loss: 2.28779673576355


  4%|▍         | 39/1000 [00:12<04:27,  3.59it/s]

Step: 39, Loss: 2.18212628364563


  4%|▍         | 40/1000 [00:12<04:28,  3.58it/s]

Step: 40, Loss: 2.110452651977539


  4%|▍         | 41/1000 [00:12<04:28,  3.57it/s]

Step: 41, Loss: 2.0145885944366455


  4%|▍         | 42/1000 [00:13<04:28,  3.57it/s]

Step: 42, Loss: 1.9530344009399414


  4%|▍         | 43/1000 [00:13<04:27,  3.57it/s]

Step: 43, Loss: 1.8064852952957153


  4%|▍         | 44/1000 [00:13<04:28,  3.57it/s]

Step: 44, Loss: 1.7071278095245361


  4%|▍         | 45/1000 [00:13<04:28,  3.56it/s]

Step: 45, Loss: 1.5810967683792114


  5%|▍         | 46/1000 [00:14<04:26,  3.58it/s]

Step: 46, Loss: 1.585921287536621


  5%|▍         | 47/1000 [00:14<04:27,  3.56it/s]

Step: 47, Loss: 1.4620901346206665


  5%|▍         | 48/1000 [00:14<04:28,  3.55it/s]

Step: 48, Loss: 1.507847785949707


  5%|▍         | 49/1000 [00:15<04:27,  3.55it/s]

Step: 49, Loss: 1.4065392017364502


  5%|▌         | 50/1000 [00:15<04:27,  3.55it/s]

Step: 50, Loss: 1.3127121925354004


  5%|▌         | 51/1000 [00:15<04:28,  3.53it/s]

Step: 51, Loss: 1.2267513275146484


  5%|▌         | 52/1000 [00:15<04:28,  3.53it/s]

Step: 52, Loss: 1.1661396026611328


  5%|▌         | 53/1000 [00:16<04:27,  3.54it/s]

Step: 53, Loss: 1.1261248588562012


  5%|▌         | 54/1000 [00:16<04:27,  3.54it/s]

Step: 54, Loss: 1.0794203281402588


  6%|▌         | 55/1000 [00:16<04:28,  3.52it/s]

Step: 55, Loss: 0.9573220610618591


  6%|▌         | 56/1000 [00:17<04:27,  3.53it/s]

Step: 56, Loss: 0.9280182719230652


  6%|▌         | 57/1000 [00:17<04:26,  3.53it/s]

Step: 57, Loss: 0.8650563955307007


  6%|▌         | 58/1000 [00:17<04:26,  3.53it/s]

Step: 58, Loss: 0.8941268920898438


  6%|▌         | 59/1000 [00:17<04:27,  3.52it/s]

Step: 59, Loss: 0.843384861946106


  6%|▌         | 60/1000 [00:18<04:26,  3.52it/s]

Step: 60, Loss: 0.8544557690620422


  6%|▌         | 61/1000 [00:18<04:26,  3.53it/s]

Step: 61, Loss: 0.7614619731903076


  6%|▌         | 62/1000 [00:18<04:27,  3.51it/s]

Step: 62, Loss: 0.687542200088501


  6%|▋         | 63/1000 [00:19<04:26,  3.52it/s]

Step: 63, Loss: 0.6940793395042419


  6%|▋         | 64/1000 [00:19<04:25,  3.53it/s]

Step: 64, Loss: 0.6524271965026855


  6%|▋         | 65/1000 [00:19<04:26,  3.52it/s]

Step: 65, Loss: 0.6388392448425293


  7%|▋         | 66/1000 [00:19<04:25,  3.51it/s]

Step: 66, Loss: 0.5853393077850342


  7%|▋         | 67/1000 [00:20<04:25,  3.52it/s]

Step: 67, Loss: 0.6378238201141357


  7%|▋         | 68/1000 [00:20<04:24,  3.52it/s]

Step: 68, Loss: 0.5462587475776672


  7%|▋         | 69/1000 [00:20<04:26,  3.49it/s]

Step: 69, Loss: 0.5044734477996826


  7%|▋         | 70/1000 [00:21<04:25,  3.50it/s]

Step: 70, Loss: 0.48824232816696167


  7%|▋         | 71/1000 [00:21<04:25,  3.50it/s]

Step: 71, Loss: 0.5294193029403687


  7%|▋         | 72/1000 [00:21<04:25,  3.49it/s]

Step: 72, Loss: 0.4515480697154999


  7%|▋         | 73/1000 [00:21<04:24,  3.50it/s]

Step: 73, Loss: 0.4073456823825836


  7%|▋         | 74/1000 [00:22<04:24,  3.50it/s]

Step: 74, Loss: 0.425948828458786


  8%|▊         | 75/1000 [00:22<04:23,  3.51it/s]

Step: 75, Loss: 0.3851086497306824


  8%|▊         | 76/1000 [00:22<04:23,  3.51it/s]

Step: 76, Loss: 0.404204398393631


  8%|▊         | 77/1000 [00:23<04:22,  3.52it/s]

Step: 77, Loss: 0.3572736084461212


  8%|▊         | 78/1000 [00:23<04:21,  3.52it/s]

Step: 78, Loss: 0.35979002714157104


  8%|▊         | 79/1000 [00:23<04:21,  3.52it/s]

Step: 79, Loss: 0.349821001291275


  8%|▊         | 80/1000 [00:23<04:20,  3.53it/s]

Step: 80, Loss: 0.31213095784187317


  8%|▊         | 81/1000 [00:24<04:20,  3.53it/s]

Step: 81, Loss: 0.3261975049972534


  8%|▊         | 82/1000 [00:24<04:19,  3.53it/s]

Step: 82, Loss: 0.3072591722011566


  8%|▊         | 83/1000 [00:24<04:20,  3.52it/s]

Step: 83, Loss: 0.31613004207611084


  8%|▊         | 84/1000 [00:24<04:20,  3.52it/s]

Step: 84, Loss: 0.3066028356552124


  8%|▊         | 85/1000 [00:25<04:19,  3.52it/s]

Step: 85, Loss: 0.28958484530448914


  9%|▊         | 86/1000 [00:25<04:19,  3.52it/s]

Step: 86, Loss: 0.24546532332897186


  9%|▊         | 87/1000 [00:25<04:20,  3.51it/s]

Step: 87, Loss: 0.27096158266067505


  9%|▉         | 88/1000 [00:26<04:20,  3.50it/s]

Step: 88, Loss: 0.2415064424276352


  9%|▉         | 89/1000 [00:26<04:20,  3.50it/s]

Step: 89, Loss: 0.24044936895370483


  9%|▉         | 90/1000 [00:26<04:20,  3.49it/s]

Step: 90, Loss: 0.24047251045703888


  9%|▉         | 91/1000 [00:26<04:19,  3.50it/s]

Step: 91, Loss: 0.21542534232139587


  9%|▉         | 92/1000 [00:27<04:18,  3.51it/s]

Step: 92, Loss: 0.22329111397266388


  9%|▉         | 93/1000 [00:27<04:19,  3.50it/s]

Step: 93, Loss: 0.21057161688804626


  9%|▉         | 94/1000 [00:27<04:18,  3.50it/s]

Step: 94, Loss: 0.1956491619348526


 10%|▉         | 95/1000 [00:28<04:18,  3.50it/s]

Step: 95, Loss: 0.220535010099411


 10%|▉         | 96/1000 [00:28<04:18,  3.50it/s]

Step: 96, Loss: 0.18276207149028778


 10%|▉         | 97/1000 [00:28<04:19,  3.48it/s]

Step: 97, Loss: 0.198796346783638


 10%|▉         | 98/1000 [00:28<04:18,  3.49it/s]

Step: 98, Loss: 0.206277996301651


 10%|▉         | 99/1000 [00:29<04:18,  3.49it/s]

Step: 99, Loss: 0.22072380781173706


 10%|█         | 100/1000 [00:29<04:18,  3.48it/s]

Step: 100, Loss: 0.18635261058807373


 10%|█         | 101/1000 [00:29<04:18,  3.47it/s]

Step: 101, Loss: 0.1975289285182953


 10%|█         | 102/1000 [00:30<04:18,  3.47it/s]

Step: 102, Loss: 0.17087779939174652


 10%|█         | 103/1000 [00:30<04:17,  3.48it/s]

Step: 103, Loss: 0.15376658737659454


 10%|█         | 104/1000 [00:30<04:18,  3.47it/s]

Step: 104, Loss: 0.178072988986969


 10%|█         | 105/1000 [00:31<04:17,  3.48it/s]

Step: 105, Loss: 0.16177348792552948


 11%|█         | 106/1000 [00:31<04:17,  3.47it/s]

Step: 106, Loss: 0.15753960609436035


 11%|█         | 107/1000 [00:31<04:17,  3.47it/s]

Step: 107, Loss: 0.15094813704490662


 11%|█         | 108/1000 [00:31<04:17,  3.47it/s]

Step: 108, Loss: 0.16049480438232422


 11%|█         | 109/1000 [00:32<04:17,  3.45it/s]

Step: 109, Loss: 0.14102301001548767


 11%|█         | 110/1000 [00:32<04:17,  3.45it/s]

Step: 110, Loss: 0.1348346471786499


 11%|█         | 111/1000 [00:32<04:19,  3.43it/s]

Step: 111, Loss: 0.13556577265262604


 11%|█         | 112/1000 [00:33<04:18,  3.43it/s]

Step: 112, Loss: 0.14444701373577118


 11%|█▏        | 113/1000 [00:33<04:19,  3.42it/s]

Step: 113, Loss: 0.12537015974521637


 11%|█▏        | 114/1000 [00:33<04:19,  3.42it/s]

Step: 114, Loss: 0.13268069922924042


 12%|█▏        | 115/1000 [00:33<04:19,  3.41it/s]

Step: 115, Loss: 0.15341061353683472


 12%|█▏        | 116/1000 [00:34<04:19,  3.41it/s]

Step: 116, Loss: 0.11987408995628357


 12%|█▏        | 117/1000 [00:34<04:19,  3.40it/s]

Step: 117, Loss: 0.13261868059635162


 12%|█▏        | 118/1000 [00:34<04:18,  3.41it/s]

Step: 118, Loss: 0.1290505975484848


 12%|█▏        | 119/1000 [00:35<04:18,  3.41it/s]

Step: 119, Loss: 0.11762931197881699


 12%|█▏        | 120/1000 [00:35<04:17,  3.42it/s]

Step: 120, Loss: 0.1254623681306839


 12%|█▏        | 121/1000 [00:35<04:17,  3.41it/s]

Step: 121, Loss: 0.10816508531570435


 12%|█▏        | 122/1000 [00:35<04:16,  3.42it/s]

Step: 122, Loss: 0.10446486622095108


 12%|█▏        | 123/1000 [00:36<04:17,  3.41it/s]

Step: 123, Loss: 0.1365938037633896


 12%|█▏        | 124/1000 [00:36<04:16,  3.41it/s]

Step: 124, Loss: 0.10394743829965591


 12%|█▎        | 125/1000 [00:36<04:17,  3.40it/s]

Step: 125, Loss: 0.11894096434116364


 13%|█▎        | 126/1000 [00:37<04:17,  3.40it/s]

Step: 126, Loss: 0.11525924503803253


 13%|█▎        | 127/1000 [00:37<04:17,  3.39it/s]

Step: 127, Loss: 0.1009521484375


 13%|█▎        | 128/1000 [00:37<04:17,  3.39it/s]

Step: 128, Loss: 0.09377758204936981


 13%|█▎        | 129/1000 [00:38<04:16,  3.39it/s]

Step: 129, Loss: 0.09230484813451767


 13%|█▎        | 130/1000 [00:38<04:16,  3.39it/s]

Step: 130, Loss: 0.0983743965625763


 13%|█▎        | 131/1000 [00:38<04:16,  3.39it/s]

Step: 131, Loss: 0.10416319966316223


 13%|█▎        | 132/1000 [00:38<04:16,  3.38it/s]

Step: 132, Loss: 0.10555493086576462


 13%|█▎        | 133/1000 [00:39<04:16,  3.38it/s]

Step: 133, Loss: 0.09145798534154892


 13%|█▎        | 134/1000 [00:39<04:15,  3.38it/s]

Step: 134, Loss: 0.08578703552484512


 14%|█▎        | 135/1000 [00:39<04:16,  3.38it/s]

Step: 135, Loss: 0.08722972124814987


 14%|█▎        | 136/1000 [00:40<04:16,  3.37it/s]

Step: 136, Loss: 0.08728434890508652


 14%|█▎        | 137/1000 [00:40<04:15,  3.37it/s]

Step: 137, Loss: 0.0799567773938179


 14%|█▍        | 138/1000 [00:40<04:15,  3.38it/s]

Step: 138, Loss: 0.07964460551738739


 14%|█▍        | 139/1000 [00:41<04:15,  3.37it/s]

Step: 139, Loss: 0.08058217912912369


 14%|█▍        | 140/1000 [00:41<04:16,  3.36it/s]

Step: 140, Loss: 0.07946673035621643


 14%|█▍        | 141/1000 [00:41<04:15,  3.36it/s]

Step: 141, Loss: 0.07675917446613312


 14%|█▍        | 142/1000 [00:41<04:16,  3.35it/s]

Step: 142, Loss: 0.08650916814804077


 14%|█▍        | 143/1000 [00:42<04:15,  3.35it/s]

Step: 143, Loss: 0.07963185757398605


 14%|█▍        | 144/1000 [00:42<04:16,  3.34it/s]

Step: 144, Loss: 0.07633180916309357


 14%|█▍        | 145/1000 [00:42<04:16,  3.33it/s]

Step: 145, Loss: 0.09233062714338303


 15%|█▍        | 146/1000 [00:43<04:16,  3.32it/s]

Step: 146, Loss: 0.07441215962171555


 15%|█▍        | 147/1000 [00:43<04:17,  3.32it/s]

Step: 147, Loss: 0.07143202424049377


 15%|█▍        | 148/1000 [00:43<04:16,  3.32it/s]

Step: 148, Loss: 0.06588348746299744


 15%|█▍        | 149/1000 [00:44<04:17,  3.31it/s]

Step: 149, Loss: 0.07225089520215988


 15%|█▌        | 150/1000 [00:44<04:17,  3.30it/s]

Step: 150, Loss: 0.07177888602018356


 15%|█▌        | 151/1000 [00:44<04:16,  3.31it/s]

Step: 151, Loss: 0.07815524935722351


 15%|█▌        | 152/1000 [00:44<04:16,  3.30it/s]

Step: 152, Loss: 0.06532523036003113


 15%|█▌        | 153/1000 [00:45<04:17,  3.29it/s]

Step: 153, Loss: 0.07105783373117447


 15%|█▌        | 154/1000 [00:45<04:17,  3.29it/s]

Step: 154, Loss: 0.06242934241890907


 16%|█▌        | 155/1000 [00:45<04:16,  3.29it/s]

Step: 155, Loss: 0.062164898961782455


 16%|█▌        | 156/1000 [00:46<04:17,  3.28it/s]

Step: 156, Loss: 0.06958822906017303


 16%|█▌        | 157/1000 [00:46<04:16,  3.29it/s]

Step: 157, Loss: 0.06405612081289291


 16%|█▌        | 158/1000 [00:46<04:16,  3.28it/s]

Step: 158, Loss: 0.05838547646999359


 16%|█▌        | 159/1000 [00:47<04:16,  3.28it/s]

Step: 159, Loss: 0.056970227509737015


 16%|█▌        | 160/1000 [00:47<04:15,  3.28it/s]

Step: 160, Loss: 0.0633600652217865


 16%|█▌        | 161/1000 [00:47<04:15,  3.29it/s]

Step: 161, Loss: 0.06864672899246216


 16%|█▌        | 162/1000 [00:47<04:15,  3.29it/s]

Step: 162, Loss: 0.056407712399959564


 16%|█▋        | 163/1000 [00:48<04:15,  3.28it/s]

Step: 163, Loss: 0.05637852102518082


 16%|█▋        | 164/1000 [00:48<04:15,  3.27it/s]

Step: 164, Loss: 0.05354315787553787


 16%|█▋        | 165/1000 [00:48<04:16,  3.25it/s]

Step: 165, Loss: 0.056687287986278534


 17%|█▋        | 166/1000 [00:49<04:15,  3.26it/s]

Step: 166, Loss: 0.056037601083517075


 17%|█▋        | 167/1000 [00:49<04:15,  3.27it/s]

Step: 167, Loss: 0.05770968273282051


 17%|█▋        | 168/1000 [00:49<04:15,  3.26it/s]

Step: 168, Loss: 0.06485699862241745


 17%|█▋        | 169/1000 [00:50<04:15,  3.25it/s]

Step: 169, Loss: 0.05264484882354736


 17%|█▋        | 170/1000 [00:50<04:15,  3.25it/s]

Step: 170, Loss: 0.05056287720799446


 17%|█▋        | 171/1000 [00:50<04:15,  3.25it/s]

Step: 171, Loss: 0.04790985956788063


 17%|█▋        | 172/1000 [00:51<04:15,  3.24it/s]

Step: 172, Loss: 0.05518954247236252


 17%|█▋        | 173/1000 [00:51<04:15,  3.24it/s]

Step: 173, Loss: 0.051067426800727844


 17%|█▋        | 174/1000 [00:51<04:15,  3.23it/s]

Step: 174, Loss: 0.04754631593823433


 18%|█▊        | 175/1000 [00:51<04:13,  3.26it/s]

Step: 175, Loss: 0.04767542704939842


 18%|█▊        | 176/1000 [00:52<04:13,  3.25it/s]

Step: 176, Loss: 0.0474926121532917


 18%|█▊        | 177/1000 [00:52<04:14,  3.23it/s]

Step: 177, Loss: 0.04377414286136627


 18%|█▊        | 178/1000 [00:52<04:16,  3.20it/s]

Step: 178, Loss: 0.0437658317387104


 18%|█▊        | 179/1000 [00:53<04:16,  3.20it/s]

Step: 179, Loss: 0.04499809816479683


 18%|█▊        | 180/1000 [00:53<04:20,  3.14it/s]

Step: 180, Loss: 0.047389425337314606


 18%|█▊        | 181/1000 [00:53<04:18,  3.17it/s]

Step: 181, Loss: 0.043228745460510254


 18%|█▊        | 182/1000 [00:54<04:19,  3.15it/s]

Step: 182, Loss: 0.048669084906578064


 18%|█▊        | 183/1000 [00:54<04:17,  3.17it/s]

Step: 183, Loss: 0.042423173785209656


 18%|█▊        | 184/1000 [00:54<04:15,  3.19it/s]

Step: 184, Loss: 0.041847120970487595


 18%|█▊        | 185/1000 [00:55<04:16,  3.18it/s]

Step: 185, Loss: 0.04710199311375618


 19%|█▊        | 186/1000 [00:55<04:15,  3.18it/s]

Step: 186, Loss: 0.0415695384144783


 19%|█▊        | 187/1000 [00:55<04:15,  3.18it/s]

Step: 187, Loss: 0.04318034648895264


 19%|█▉        | 188/1000 [00:56<04:15,  3.18it/s]

Step: 188, Loss: 0.039793021976947784


 19%|█▉        | 189/1000 [00:56<04:16,  3.17it/s]

Step: 189, Loss: 0.0430186428129673


 19%|█▉        | 190/1000 [00:56<04:16,  3.16it/s]

Step: 190, Loss: 0.0384257435798645


 19%|█▉        | 191/1000 [00:57<04:15,  3.16it/s]

Step: 191, Loss: 0.040581732988357544


 19%|█▉        | 192/1000 [00:57<04:16,  3.15it/s]

Step: 192, Loss: 0.03983263298869133


 19%|█▉        | 193/1000 [00:57<04:16,  3.14it/s]

Step: 193, Loss: 0.03908960893750191


 19%|█▉        | 194/1000 [00:57<04:15,  3.15it/s]

Step: 194, Loss: 0.036705926060676575


 20%|█▉        | 195/1000 [00:58<04:15,  3.16it/s]

Step: 195, Loss: 0.0352754220366478


 20%|█▉        | 196/1000 [00:58<04:15,  3.15it/s]

Step: 196, Loss: 0.04163765534758568


 20%|█▉        | 197/1000 [00:58<04:15,  3.14it/s]

Step: 197, Loss: 0.033924080431461334


 20%|█▉        | 198/1000 [00:59<04:14,  3.15it/s]

Step: 198, Loss: 0.03941257297992706


 20%|█▉        | 199/1000 [00:59<04:14,  3.15it/s]

Step: 199, Loss: 0.03442283719778061


 20%|██        | 200/1000 [00:59<04:15,  3.13it/s]

Step: 200, Loss: 0.0365510918200016


 20%|██        | 201/1000 [01:00<04:15,  3.13it/s]

Step: 201, Loss: 0.03412080183625221


 20%|██        | 202/1000 [01:00<04:14,  3.13it/s]

Step: 202, Loss: 0.03259977325797081


 20%|██        | 203/1000 [01:00<04:14,  3.14it/s]

Step: 203, Loss: 0.03862522915005684


 20%|██        | 204/1000 [01:01<04:13,  3.14it/s]

Step: 204, Loss: 0.03258370980620384


 20%|██        | 205/1000 [01:01<04:13,  3.13it/s]

Step: 205, Loss: 0.03359611704945564


 21%|██        | 206/1000 [01:01<04:14,  3.13it/s]

Step: 206, Loss: 0.031260740011930466


 21%|██        | 207/1000 [01:02<04:13,  3.13it/s]

Step: 207, Loss: 0.03215505927801132


 21%|██        | 208/1000 [01:02<04:12,  3.14it/s]

Step: 208, Loss: 0.03382299095392227


 21%|██        | 209/1000 [01:02<04:12,  3.13it/s]

Step: 209, Loss: 0.029432430863380432


 21%|██        | 210/1000 [01:03<04:12,  3.12it/s]

Step: 210, Loss: 0.03202706202864647


 21%|██        | 211/1000 [01:03<04:12,  3.13it/s]

Step: 211, Loss: 0.02992129884660244


 21%|██        | 212/1000 [01:03<04:12,  3.12it/s]

Step: 212, Loss: 0.029754245653748512


 21%|██▏       | 213/1000 [01:04<04:10,  3.14it/s]

Step: 213, Loss: 0.02838933654129505


 21%|██▏       | 214/1000 [01:04<04:10,  3.13it/s]

Step: 214, Loss: 0.028616230934858322


 22%|██▏       | 215/1000 [01:04<04:11,  3.12it/s]

Step: 215, Loss: 0.027473170310258865


 22%|██▏       | 216/1000 [01:05<04:10,  3.13it/s]

Step: 216, Loss: 0.02760632522404194


 22%|██▏       | 217/1000 [01:05<04:08,  3.15it/s]

Step: 217, Loss: 0.02668222039937973


 22%|██▏       | 218/1000 [01:05<04:09,  3.14it/s]

Step: 218, Loss: 0.026795964688062668


 22%|██▏       | 219/1000 [01:05<04:09,  3.14it/s]

Step: 219, Loss: 0.026265334337949753


 22%|██▏       | 220/1000 [01:06<04:08,  3.14it/s]

Step: 220, Loss: 0.026976412162184715


 22%|██▏       | 221/1000 [01:06<04:07,  3.15it/s]

Step: 221, Loss: 0.025784609839320183


 22%|██▏       | 222/1000 [01:06<04:07,  3.15it/s]

Step: 222, Loss: 0.026530008763074875


 22%|██▏       | 223/1000 [01:07<04:07,  3.14it/s]

Step: 223, Loss: 0.025801848620176315


 22%|██▏       | 224/1000 [01:07<04:05,  3.16it/s]

Step: 224, Loss: 0.02484317496418953


 22%|██▎       | 225/1000 [01:07<04:05,  3.16it/s]

Step: 225, Loss: 0.024120302870869637


 23%|██▎       | 226/1000 [01:08<04:05,  3.16it/s]

Step: 226, Loss: 0.024092748761177063


 23%|██▎       | 227/1000 [01:08<04:03,  3.17it/s]

Step: 227, Loss: 0.025844182819128036


 23%|██▎       | 228/1000 [01:08<04:04,  3.16it/s]

Step: 228, Loss: 0.023567985743284225


 23%|██▎       | 229/1000 [01:09<04:03,  3.17it/s]

Step: 229, Loss: 0.022892432287335396


 23%|██▎       | 230/1000 [01:09<04:02,  3.17it/s]

Step: 230, Loss: 0.023085329681634903


 23%|██▎       | 231/1000 [01:09<04:00,  3.19it/s]

Step: 231, Loss: 0.022883381694555283


 23%|██▎       | 232/1000 [01:10<04:01,  3.18it/s]

Step: 232, Loss: 0.023258641362190247


 23%|██▎       | 233/1000 [01:10<04:00,  3.18it/s]

Step: 233, Loss: 0.024090444669127464


 23%|██▎       | 234/1000 [01:10<03:59,  3.19it/s]

Step: 234, Loss: 0.021579334512352943


 24%|██▎       | 235/1000 [01:11<03:59,  3.19it/s]

Step: 235, Loss: 0.02164928987622261


 24%|██▎       | 236/1000 [01:11<03:59,  3.19it/s]

Step: 236, Loss: 0.021701274439692497


 24%|██▎       | 237/1000 [01:11<03:58,  3.20it/s]

Step: 237, Loss: 0.022274291142821312


 24%|██▍       | 238/1000 [01:11<03:57,  3.21it/s]

Step: 238, Loss: 0.02195839211344719


 24%|██▍       | 239/1000 [01:12<03:57,  3.21it/s]

Step: 239, Loss: 0.020679976791143417


 24%|██▍       | 240/1000 [01:12<03:56,  3.22it/s]

Step: 240, Loss: 0.021276403218507767


 24%|██▍       | 241/1000 [01:12<03:55,  3.22it/s]

Step: 241, Loss: 0.021564627066254616


 24%|██▍       | 242/1000 [01:13<03:55,  3.22it/s]

Step: 242, Loss: 0.020031020045280457


 24%|██▍       | 243/1000 [01:13<03:54,  3.23it/s]

Step: 243, Loss: 0.020746838301420212


 24%|██▍       | 244/1000 [01:13<03:53,  3.24it/s]

Step: 244, Loss: 0.020918089896440506


 24%|██▍       | 245/1000 [01:14<03:52,  3.25it/s]

Step: 245, Loss: 0.019801555201411247


 25%|██▍       | 246/1000 [01:14<03:53,  3.23it/s]

Step: 246, Loss: 0.019390027970075607


 25%|██▍       | 247/1000 [01:14<03:52,  3.24it/s]

Step: 247, Loss: 0.019617188721895218


 25%|██▍       | 248/1000 [01:15<03:51,  3.25it/s]

Step: 248, Loss: 0.018689000979065895


 25%|██▍       | 249/1000 [01:15<03:51,  3.25it/s]

Step: 249, Loss: 0.01986234076321125


 25%|██▌       | 250/1000 [01:15<03:50,  3.25it/s]

Step: 250, Loss: 0.01910185068845749


 25%|██▌       | 251/1000 [01:15<03:50,  3.25it/s]

Step: 251, Loss: 0.01863650046288967


 25%|██▌       | 252/1000 [01:16<03:49,  3.25it/s]

Step: 252, Loss: 0.018778812140226364


 25%|██▌       | 253/1000 [01:16<03:48,  3.26it/s]

Step: 253, Loss: 0.02366943284869194


 25%|██▌       | 254/1000 [01:16<03:48,  3.27it/s]

Step: 254, Loss: 0.017470916733145714


 26%|██▌       | 255/1000 [01:17<03:48,  3.27it/s]

Step: 255, Loss: 0.017397210001945496


 26%|██▌       | 256/1000 [01:17<03:47,  3.27it/s]

Step: 256, Loss: 0.019218778237700462


 26%|██▌       | 257/1000 [01:17<03:47,  3.26it/s]

Step: 257, Loss: 0.018953146412968636


 26%|██▌       | 258/1000 [01:18<03:47,  3.26it/s]

Step: 258, Loss: 0.017135851085186005


 26%|██▌       | 259/1000 [01:18<03:46,  3.27it/s]

Step: 259, Loss: 0.01668257638812065


 26%|██▌       | 260/1000 [01:18<03:45,  3.28it/s]

Step: 260, Loss: 0.016464663669466972


 26%|██▌       | 261/1000 [01:19<03:45,  3.27it/s]

Step: 261, Loss: 0.016079651191830635


 26%|██▌       | 262/1000 [01:19<03:44,  3.28it/s]

Step: 262, Loss: 0.01618027128279209


 26%|██▋       | 263/1000 [01:19<03:43,  3.30it/s]

Step: 263, Loss: 0.015784138813614845


 26%|██▋       | 264/1000 [01:19<03:44,  3.29it/s]

Step: 264, Loss: 0.016369029879570007


 26%|██▋       | 265/1000 [01:20<03:43,  3.28it/s]

Step: 265, Loss: 0.01612391509115696


 27%|██▋       | 266/1000 [01:20<03:42,  3.30it/s]

Step: 266, Loss: 0.01535138487815857


 27%|██▋       | 267/1000 [01:20<03:42,  3.29it/s]

Step: 267, Loss: 0.017364967614412308


 27%|██▋       | 268/1000 [01:21<03:42,  3.29it/s]

Step: 268, Loss: 0.016242925077676773


 27%|██▋       | 269/1000 [01:21<03:41,  3.30it/s]

Step: 269, Loss: 0.015038523823022842


 27%|██▋       | 270/1000 [01:21<03:41,  3.30it/s]

Step: 270, Loss: 0.014568991959095001


 27%|██▋       | 271/1000 [01:22<03:41,  3.29it/s]

Step: 271, Loss: 0.0163307785987854


 27%|██▋       | 272/1000 [01:22<03:40,  3.30it/s]

Step: 272, Loss: 0.014655286446213722


 27%|██▋       | 273/1000 [01:22<03:39,  3.32it/s]

Step: 273, Loss: 0.014690590091049671


 27%|██▋       | 274/1000 [01:22<03:39,  3.30it/s]

Step: 274, Loss: 0.015271016396582127


 28%|██▊       | 275/1000 [01:23<03:39,  3.30it/s]

Step: 275, Loss: 0.013621327467262745


 28%|██▊       | 276/1000 [01:23<03:39,  3.31it/s]

Step: 276, Loss: 0.01383326854556799


 28%|██▊       | 277/1000 [01:23<03:39,  3.30it/s]

Step: 277, Loss: 0.015625428408384323


 28%|██▊       | 278/1000 [01:24<03:39,  3.29it/s]

Step: 278, Loss: 0.013227883726358414


 28%|██▊       | 279/1000 [01:24<03:38,  3.29it/s]

Step: 279, Loss: 0.013918411917984486


 28%|██▊       | 280/1000 [01:24<03:37,  3.30it/s]

Step: 280, Loss: 0.013215259648859501


 28%|██▊       | 281/1000 [01:25<03:38,  3.29it/s]

Step: 281, Loss: 0.01339542306959629


 28%|██▊       | 282/1000 [01:25<03:37,  3.30it/s]

Step: 282, Loss: 0.012901692651212215


 28%|██▊       | 283/1000 [01:25<03:36,  3.30it/s]

Step: 283, Loss: 0.013029281981289387


 28%|██▊       | 284/1000 [01:25<03:36,  3.31it/s]

Step: 284, Loss: 0.012700839899480343


 28%|██▊       | 285/1000 [01:26<03:35,  3.32it/s]

Step: 285, Loss: 0.0127948559820652


 29%|██▊       | 286/1000 [01:26<03:34,  3.32it/s]

Step: 286, Loss: 0.012545866891741753


 29%|██▊       | 287/1000 [01:26<03:34,  3.32it/s]

Step: 287, Loss: 0.012579314410686493


 29%|██▉       | 288/1000 [01:27<03:34,  3.32it/s]

Step: 288, Loss: 0.011923556216061115


 29%|██▉       | 289/1000 [01:27<03:34,  3.32it/s]

Step: 289, Loss: 0.012092974036931992


 29%|██▉       | 290/1000 [01:27<03:33,  3.32it/s]

Step: 290, Loss: 0.012742986902594566


 29%|██▉       | 291/1000 [01:28<03:33,  3.33it/s]

Step: 291, Loss: 0.01163543201982975


 29%|██▉       | 292/1000 [01:28<03:32,  3.33it/s]

Step: 292, Loss: 0.011373991146683693


 29%|██▉       | 293/1000 [01:28<03:31,  3.34it/s]

Step: 293, Loss: 0.011941847391426563


 29%|██▉       | 294/1000 [01:28<03:31,  3.34it/s]

Step: 294, Loss: 0.011795413680374622


 30%|██▉       | 295/1000 [01:29<03:31,  3.34it/s]

Step: 295, Loss: 0.011640784330666065


 30%|██▉       | 296/1000 [01:29<03:30,  3.35it/s]

Step: 296, Loss: 0.011171967722475529


 30%|██▉       | 297/1000 [01:29<03:29,  3.35it/s]

Step: 297, Loss: 0.011775675229728222


 30%|██▉       | 298/1000 [01:30<03:29,  3.35it/s]

Step: 298, Loss: 0.01065696869045496


 30%|██▉       | 299/1000 [01:30<03:29,  3.35it/s]

Step: 299, Loss: 0.010946474969387054


 30%|███       | 300/1000 [01:30<03:28,  3.35it/s]

Step: 300, Loss: 0.010516396723687649


 30%|███       | 301/1000 [01:31<03:28,  3.35it/s]

Step: 301, Loss: 0.012122702784836292


 30%|███       | 302/1000 [01:31<03:28,  3.35it/s]

Step: 302, Loss: 0.010220101103186607


 30%|███       | 303/1000 [01:31<03:28,  3.35it/s]

Step: 303, Loss: 0.010690595023334026


 30%|███       | 304/1000 [01:31<03:27,  3.36it/s]

Step: 304, Loss: 0.011347564868628979


 30%|███       | 305/1000 [01:32<03:26,  3.36it/s]

Step: 305, Loss: 0.010232863016426563


 31%|███       | 306/1000 [01:32<03:26,  3.36it/s]

Step: 306, Loss: 0.00993893388658762


 31%|███       | 307/1000 [01:32<03:26,  3.36it/s]

Step: 307, Loss: 0.010072808712720871


 31%|███       | 308/1000 [01:33<03:26,  3.36it/s]

Step: 308, Loss: 0.00960505474358797


 31%|███       | 309/1000 [01:33<03:26,  3.35it/s]

Step: 309, Loss: 0.009635159745812416


 31%|███       | 310/1000 [01:33<03:25,  3.36it/s]

Step: 310, Loss: 0.010160751640796661


 31%|███       | 311/1000 [01:34<03:25,  3.36it/s]

Step: 311, Loss: 0.009808039292693138


 31%|███       | 312/1000 [01:34<03:25,  3.36it/s]

Step: 312, Loss: 0.009323587641119957


 31%|███▏      | 313/1000 [01:34<03:24,  3.35it/s]

Step: 313, Loss: 0.009297782555222511


 31%|███▏      | 314/1000 [01:34<03:23,  3.36it/s]

Step: 314, Loss: 0.009162850677967072


 32%|███▏      | 315/1000 [01:35<03:23,  3.37it/s]

Step: 315, Loss: 0.00896149780601263


 32%|███▏      | 316/1000 [01:35<03:23,  3.35it/s]

Step: 316, Loss: 0.009000362828373909


 32%|███▏      | 317/1000 [01:35<03:23,  3.36it/s]

Step: 317, Loss: 0.009419801644980907


 32%|███▏      | 318/1000 [01:36<03:23,  3.35it/s]

Step: 318, Loss: 0.008697031065821648


 32%|███▏      | 319/1000 [01:36<03:22,  3.36it/s]

Step: 319, Loss: 0.009348277933895588


 32%|███▏      | 320/1000 [01:36<03:22,  3.35it/s]

Step: 320, Loss: 0.008878419175744057


 32%|███▏      | 321/1000 [01:37<03:22,  3.36it/s]

Step: 321, Loss: 0.008466762490570545


 32%|███▏      | 322/1000 [01:37<03:21,  3.37it/s]

Step: 322, Loss: 0.009212290868163109


 32%|███▏      | 323/1000 [01:37<03:21,  3.35it/s]

Step: 323, Loss: 0.00858467910438776


 32%|███▏      | 324/1000 [01:37<03:21,  3.35it/s]

Step: 324, Loss: 0.0081839794293046


 32%|███▎      | 325/1000 [01:38<03:19,  3.38it/s]

Step: 325, Loss: 0.00847280491143465


 33%|███▎      | 326/1000 [01:38<03:20,  3.37it/s]

Step: 326, Loss: 0.008317681029438972


 33%|███▎      | 327/1000 [01:38<03:19,  3.37it/s]

Step: 327, Loss: 0.00844266265630722


 33%|███▎      | 328/1000 [01:39<03:19,  3.37it/s]

Step: 328, Loss: 0.008202141150832176


 33%|███▎      | 329/1000 [01:39<03:18,  3.38it/s]

Step: 329, Loss: 0.007859611883759499


 33%|███▎      | 330/1000 [01:39<03:18,  3.37it/s]

Step: 330, Loss: 0.007934324443340302


 33%|███▎      | 331/1000 [01:39<03:18,  3.37it/s]

Step: 331, Loss: 0.00760703394189477


 33%|███▎      | 332/1000 [01:40<03:17,  3.37it/s]

Step: 332, Loss: 0.007809941656887531


 33%|███▎      | 333/1000 [01:40<03:17,  3.38it/s]

Step: 333, Loss: 0.00752867991104722


 33%|███▎      | 334/1000 [01:40<03:17,  3.37it/s]

Step: 334, Loss: 0.007594451308250427


 34%|███▎      | 335/1000 [01:41<03:17,  3.37it/s]

Step: 335, Loss: 0.007217717822641134


 34%|███▎      | 336/1000 [01:41<03:16,  3.37it/s]

Step: 336, Loss: 0.007782333996146917


 34%|███▎      | 337/1000 [01:41<03:16,  3.37it/s]

Step: 337, Loss: 0.007517191581428051


 34%|███▍      | 338/1000 [01:42<03:16,  3.37it/s]

Step: 338, Loss: 0.007528072688728571


 34%|███▍      | 339/1000 [01:42<03:15,  3.38it/s]

Step: 339, Loss: 0.006973138079047203


 34%|███▍      | 340/1000 [01:42<03:16,  3.36it/s]

Step: 340, Loss: 0.00685746967792511


 34%|███▍      | 341/1000 [01:42<03:15,  3.37it/s]

Step: 341, Loss: 0.007200128398835659


 34%|███▍      | 342/1000 [01:43<03:14,  3.39it/s]

Step: 342, Loss: 0.00702128279954195


 34%|███▍      | 343/1000 [01:43<03:14,  3.37it/s]

Step: 343, Loss: 0.006816377863287926


 34%|███▍      | 344/1000 [01:43<03:15,  3.36it/s]

Step: 344, Loss: 0.0066123236902058125


 34%|███▍      | 345/1000 [01:44<03:14,  3.37it/s]

Step: 345, Loss: 0.0065689655020833015


 35%|███▍      | 346/1000 [01:44<03:13,  3.38it/s]

Step: 346, Loss: 0.006733207497745752


 35%|███▍      | 347/1000 [01:44<03:14,  3.36it/s]

Step: 347, Loss: 0.006527651101350784


 35%|███▍      | 348/1000 [01:45<03:14,  3.36it/s]

Step: 348, Loss: 0.006340933497995138


 35%|███▍      | 349/1000 [01:45<03:13,  3.37it/s]

Step: 349, Loss: 0.006893424782902002


 35%|███▌      | 350/1000 [01:45<03:13,  3.36it/s]

Step: 350, Loss: 0.00629991153255105


 35%|███▌      | 351/1000 [01:45<03:13,  3.36it/s]

Step: 351, Loss: 0.006487589329481125


 35%|███▌      | 352/1000 [01:46<03:13,  3.36it/s]

Step: 352, Loss: 0.006145182065665722


 35%|███▌      | 353/1000 [01:46<03:12,  3.36it/s]

Step: 353, Loss: 0.006584881339222193


 35%|███▌      | 354/1000 [01:46<03:12,  3.36it/s]

Step: 354, Loss: 0.00629795016720891


 36%|███▌      | 355/1000 [01:47<03:11,  3.36it/s]

Step: 355, Loss: 0.006602579262107611


 36%|███▌      | 356/1000 [01:47<03:11,  3.37it/s]

Step: 356, Loss: 0.0060205948539078236


 36%|███▌      | 357/1000 [01:47<03:11,  3.36it/s]

Step: 357, Loss: 0.005829604342579842


 36%|███▌      | 358/1000 [01:48<03:11,  3.36it/s]

Step: 358, Loss: 0.006068023853003979


 36%|███▌      | 359/1000 [01:48<03:11,  3.35it/s]

Step: 359, Loss: 0.005709782242774963


 36%|███▌      | 360/1000 [01:48<03:09,  3.37it/s]

Step: 360, Loss: 0.005592529661953449


 36%|███▌      | 361/1000 [01:48<03:09,  3.37it/s]

Step: 361, Loss: 0.005871102213859558


 36%|███▌      | 362/1000 [01:49<03:10,  3.36it/s]

Step: 362, Loss: 0.005505058914422989


 36%|███▋      | 363/1000 [01:49<03:09,  3.36it/s]

Step: 363, Loss: 0.00602657301351428


 36%|███▋      | 364/1000 [01:49<03:08,  3.37it/s]

Step: 364, Loss: 0.005546998232603073


 36%|███▋      | 365/1000 [01:50<03:09,  3.36it/s]

Step: 365, Loss: 0.0054028453305363655


 37%|███▋      | 366/1000 [01:50<03:08,  3.36it/s]

Step: 366, Loss: 0.0052852267399430275


 37%|███▋      | 367/1000 [01:50<03:08,  3.36it/s]

Step: 367, Loss: 0.005363506264984608


 37%|███▋      | 368/1000 [01:50<03:07,  3.36it/s]

Step: 368, Loss: 0.005853137467056513


 37%|███▋      | 369/1000 [01:51<03:08,  3.35it/s]

Step: 369, Loss: 0.00554557703435421


 37%|███▋      | 370/1000 [01:51<03:08,  3.35it/s]

Step: 370, Loss: 0.00518053537234664


 37%|███▋      | 371/1000 [01:51<03:06,  3.36it/s]

Step: 371, Loss: 0.004992228001356125


 37%|███▋      | 372/1000 [01:52<03:06,  3.36it/s]

Step: 372, Loss: 0.005139768123626709


 37%|███▋      | 373/1000 [01:52<03:06,  3.36it/s]

Step: 373, Loss: 0.005093976855278015


 37%|███▋      | 374/1000 [01:52<03:06,  3.36it/s]

Step: 374, Loss: 0.005033062770962715


 38%|███▊      | 375/1000 [01:53<03:06,  3.36it/s]

Step: 375, Loss: 0.005027002654969692


 38%|███▊      | 376/1000 [01:53<03:06,  3.35it/s]

Step: 376, Loss: 0.005137957166880369


 38%|███▊      | 377/1000 [01:53<03:06,  3.34it/s]

Step: 377, Loss: 0.0048974668607115746


 38%|███▊      | 378/1000 [01:53<03:04,  3.36it/s]

Step: 378, Loss: 0.00488010048866272


 38%|███▊      | 379/1000 [01:54<03:06,  3.33it/s]

Step: 379, Loss: 0.004817971959710121


 38%|███▊      | 380/1000 [01:54<03:09,  3.27it/s]

Step: 380, Loss: 0.004631522577255964


 38%|███▊      | 381/1000 [01:54<03:08,  3.28it/s]

Step: 381, Loss: 0.0046422709710896015


 38%|███▊      | 382/1000 [01:55<03:07,  3.29it/s]

Step: 382, Loss: 0.004588000476360321


 38%|███▊      | 383/1000 [01:55<03:06,  3.31it/s]

Step: 383, Loss: 0.0045323907397687435


 38%|███▊      | 384/1000 [01:55<03:05,  3.32it/s]

Step: 384, Loss: 0.004451709799468517


 38%|███▊      | 385/1000 [01:56<03:04,  3.34it/s]

Step: 385, Loss: 0.004656144417822361


 39%|███▊      | 386/1000 [01:56<03:03,  3.34it/s]

Step: 386, Loss: 0.004483821801841259


 39%|███▊      | 387/1000 [01:56<03:04,  3.32it/s]

Step: 387, Loss: 0.004354511853307486


 39%|███▉      | 388/1000 [01:56<03:04,  3.31it/s]

Step: 388, Loss: 0.004566955380141735


 39%|███▉      | 389/1000 [01:57<03:04,  3.32it/s]

Step: 389, Loss: 0.004203524440526962


 39%|███▉      | 390/1000 [01:57<03:04,  3.31it/s]

Step: 390, Loss: 0.004218362271785736


 39%|███▉      | 391/1000 [01:57<03:04,  3.31it/s]

Step: 391, Loss: 0.004099064972251654


 39%|███▉      | 392/1000 [01:58<03:02,  3.33it/s]

Step: 392, Loss: 0.004112789407372475


 39%|███▉      | 393/1000 [01:58<03:01,  3.34it/s]

Step: 393, Loss: 0.004330798052251339


 39%|███▉      | 394/1000 [01:58<03:02,  3.33it/s]

Step: 394, Loss: 0.004187938757240772


 40%|███▉      | 395/1000 [01:59<03:01,  3.34it/s]

Step: 395, Loss: 0.004076511133462191


 40%|███▉      | 396/1000 [01:59<03:01,  3.34it/s]

Step: 396, Loss: 0.004016232211142778


 40%|███▉      | 397/1000 [01:59<03:01,  3.33it/s]

Step: 397, Loss: 0.003973362036049366


 40%|███▉      | 398/1000 [01:59<03:00,  3.34it/s]

Step: 398, Loss: 0.0038761196192353964


 40%|███▉      | 399/1000 [02:00<03:00,  3.33it/s]

Step: 399, Loss: 0.003989559598267078


 40%|████      | 400/1000 [02:00<03:00,  3.33it/s]

Step: 400, Loss: 0.003890999360010028


 40%|████      | 401/1000 [02:00<02:59,  3.33it/s]

Step: 401, Loss: 0.003710235469043255


 40%|████      | 402/1000 [02:01<02:59,  3.32it/s]

Step: 402, Loss: 0.003921602852642536


 40%|████      | 403/1000 [02:01<02:59,  3.32it/s]

Step: 403, Loss: 0.00358435302041471


 40%|████      | 404/1000 [02:01<02:59,  3.32it/s]

Step: 404, Loss: 0.003692423924803734


 40%|████      | 405/1000 [02:02<02:58,  3.33it/s]

Step: 405, Loss: 0.00399574963375926


 41%|████      | 406/1000 [02:02<02:58,  3.33it/s]

Step: 406, Loss: 0.0035788544919341803


 41%|████      | 407/1000 [02:02<02:57,  3.33it/s]

Step: 407, Loss: 0.003772385185584426


 41%|████      | 408/1000 [02:02<02:57,  3.33it/s]

Step: 408, Loss: 0.0035054958425462246


 41%|████      | 409/1000 [02:03<02:58,  3.32it/s]

Step: 409, Loss: 0.0034706455189734697


 41%|████      | 410/1000 [02:03<02:57,  3.32it/s]

Step: 410, Loss: 0.003481632797047496


 41%|████      | 411/1000 [02:03<02:57,  3.31it/s]

Step: 411, Loss: 0.003491935320198536


 41%|████      | 412/1000 [02:04<02:57,  3.31it/s]

Step: 412, Loss: 0.0034195089247077703


 41%|████▏     | 413/1000 [02:04<02:57,  3.31it/s]

Step: 413, Loss: 0.0034686997532844543


 41%|████▏     | 414/1000 [02:04<02:57,  3.31it/s]

Step: 414, Loss: 0.003268944099545479


 42%|████▏     | 415/1000 [02:05<02:57,  3.30it/s]

Step: 415, Loss: 0.0035527669824659824


 42%|████▏     | 416/1000 [02:05<02:56,  3.31it/s]

Step: 416, Loss: 0.003235245356336236


 42%|████▏     | 417/1000 [02:05<02:56,  3.31it/s]

Step: 417, Loss: 0.0033655345905572176


 42%|████▏     | 418/1000 [02:06<02:55,  3.31it/s]

Step: 418, Loss: 0.0032037729397416115


 42%|████▏     | 419/1000 [02:06<02:55,  3.31it/s]

Step: 419, Loss: 0.003282044315710664


 42%|████▏     | 420/1000 [02:06<02:54,  3.32it/s]

Step: 420, Loss: 0.0033238944597542286


 42%|████▏     | 421/1000 [02:06<02:54,  3.32it/s]

Step: 421, Loss: 0.003222357016056776


 42%|████▏     | 422/1000 [02:07<02:54,  3.31it/s]

Step: 422, Loss: 0.003071183804422617


 42%|████▏     | 423/1000 [02:07<02:54,  3.31it/s]

Step: 423, Loss: 0.003058113856241107


 42%|████▏     | 424/1000 [02:07<02:54,  3.31it/s]

Step: 424, Loss: 0.002935218857601285


 42%|████▎     | 425/1000 [02:08<02:53,  3.30it/s]

Step: 425, Loss: 0.002929229522123933


 43%|████▎     | 426/1000 [02:08<02:54,  3.30it/s]

Step: 426, Loss: 0.002975157694891095


 43%|████▎     | 427/1000 [02:08<02:53,  3.30it/s]

Step: 427, Loss: 0.0029929159209132195


 43%|████▎     | 428/1000 [02:09<02:53,  3.30it/s]

Step: 428, Loss: 0.002941092476248741


 43%|████▎     | 429/1000 [02:09<02:52,  3.30it/s]

Step: 429, Loss: 0.002869351301342249


 43%|████▎     | 430/1000 [02:09<02:53,  3.29it/s]

Step: 430, Loss: 0.002846221439540386


 43%|████▎     | 431/1000 [02:09<02:52,  3.29it/s]

Step: 431, Loss: 0.002765093697234988


 43%|████▎     | 432/1000 [02:10<02:52,  3.29it/s]

Step: 432, Loss: 0.0027398401871323586


 43%|████▎     | 433/1000 [02:10<02:51,  3.30it/s]

Step: 433, Loss: 0.002924908185377717


 43%|████▎     | 434/1000 [02:10<02:51,  3.30it/s]

Step: 434, Loss: 0.002843610243871808


 44%|████▎     | 435/1000 [02:11<02:51,  3.29it/s]

Step: 435, Loss: 0.0026425316464155912


 44%|████▎     | 436/1000 [02:11<02:51,  3.30it/s]

Step: 436, Loss: 0.002689954824745655


 44%|████▎     | 437/1000 [02:11<02:50,  3.29it/s]

Step: 437, Loss: 0.0027693831361830235


 44%|████▍     | 438/1000 [02:12<02:50,  3.29it/s]

Step: 438, Loss: 0.0026250218506902456


 44%|████▍     | 439/1000 [02:12<02:50,  3.29it/s]

Step: 439, Loss: 0.0025555850006639957


 44%|████▍     | 440/1000 [02:12<02:50,  3.29it/s]

Step: 440, Loss: 0.0027219378389418125


 44%|████▍     | 441/1000 [02:12<02:49,  3.29it/s]

Step: 441, Loss: 0.002610609168186784


 44%|████▍     | 442/1000 [02:13<02:49,  3.30it/s]

Step: 442, Loss: 0.002488008700311184


 44%|████▍     | 443/1000 [02:13<02:49,  3.30it/s]

Step: 443, Loss: 0.0024727354757487774


 44%|████▍     | 444/1000 [02:13<02:48,  3.29it/s]

Step: 444, Loss: 0.0026326500810682774


 44%|████▍     | 445/1000 [02:14<02:48,  3.30it/s]

Step: 445, Loss: 0.002550765173509717


 45%|████▍     | 446/1000 [02:14<02:48,  3.30it/s]

Step: 446, Loss: 0.002545975148677826


 45%|████▍     | 447/1000 [02:14<02:48,  3.29it/s]

Step: 447, Loss: 0.002429276704788208


 45%|████▍     | 448/1000 [02:15<02:47,  3.29it/s]

Step: 448, Loss: 0.0023842069786041975


 45%|████▍     | 449/1000 [02:15<02:47,  3.29it/s]

Step: 449, Loss: 0.002359389327466488


 45%|████▌     | 450/1000 [02:15<02:46,  3.30it/s]

Step: 450, Loss: 0.002279955195263028


 45%|████▌     | 451/1000 [02:16<02:46,  3.29it/s]

Step: 451, Loss: 0.0023587998002767563


 45%|████▌     | 452/1000 [02:16<02:46,  3.29it/s]

Step: 452, Loss: 0.002320702653378248


 45%|████▌     | 453/1000 [02:16<02:46,  3.29it/s]

Step: 453, Loss: 0.002370874397456646


 45%|████▌     | 454/1000 [02:16<02:45,  3.30it/s]

Step: 454, Loss: 0.0022911967243999243


 46%|████▌     | 455/1000 [02:17<02:45,  3.30it/s]

Step: 455, Loss: 0.002247293945401907


 46%|████▌     | 456/1000 [02:17<02:45,  3.30it/s]

Step: 456, Loss: 0.002260304754599929


 46%|████▌     | 457/1000 [02:17<02:44,  3.30it/s]

Step: 457, Loss: 0.0022380950395017862


 46%|████▌     | 458/1000 [02:18<02:44,  3.29it/s]

Step: 458, Loss: 0.0022090421989560127


 46%|████▌     | 459/1000 [02:18<02:44,  3.29it/s]

Step: 459, Loss: 0.002122771693393588


 46%|████▌     | 460/1000 [02:18<02:44,  3.29it/s]

Step: 460, Loss: 0.0021933671087026596


 46%|████▌     | 461/1000 [02:19<02:43,  3.29it/s]

Step: 461, Loss: 0.0020957202650606632


 46%|████▌     | 462/1000 [02:19<02:43,  3.29it/s]

Step: 462, Loss: 0.002126030158251524


 46%|████▋     | 463/1000 [02:19<02:42,  3.30it/s]

Step: 463, Loss: 0.002097693970426917


 46%|████▋     | 464/1000 [02:19<02:42,  3.29it/s]

Step: 464, Loss: 0.001999284140765667


 46%|████▋     | 465/1000 [02:20<02:42,  3.29it/s]

Step: 465, Loss: 0.002081463811919093


 47%|████▋     | 466/1000 [02:20<02:41,  3.30it/s]

Step: 466, Loss: 0.0020301295444369316


 47%|████▋     | 467/1000 [02:20<02:42,  3.29it/s]

Step: 467, Loss: 0.001984793459996581


 47%|████▋     | 468/1000 [02:21<02:41,  3.30it/s]

Step: 468, Loss: 0.0019611846655607224


 47%|████▋     | 469/1000 [02:21<02:40,  3.30it/s]

Step: 469, Loss: 0.0020376048050820827


 47%|████▋     | 470/1000 [02:21<02:41,  3.29it/s]

Step: 470, Loss: 0.0020856009796261787


 47%|████▋     | 471/1000 [02:22<02:41,  3.28it/s]

Step: 471, Loss: 0.001881876029074192


 47%|████▋     | 472/1000 [02:22<02:40,  3.29it/s]

Step: 472, Loss: 0.001840369077399373


 47%|████▋     | 473/1000 [02:22<02:40,  3.29it/s]

Step: 473, Loss: 0.0018598997266963124


 47%|████▋     | 474/1000 [02:23<02:40,  3.28it/s]

Step: 474, Loss: 0.0018295487388968468


 48%|████▊     | 475/1000 [02:23<02:40,  3.28it/s]

Step: 475, Loss: 0.0019150807056576014


 48%|████▊     | 476/1000 [02:23<02:39,  3.28it/s]

Step: 476, Loss: 0.0017874492332339287


 48%|████▊     | 477/1000 [02:23<02:39,  3.27it/s]

Step: 477, Loss: 0.001884367666207254


 48%|████▊     | 478/1000 [02:24<02:39,  3.28it/s]

Step: 478, Loss: 0.001758485333994031


 48%|████▊     | 479/1000 [02:24<02:38,  3.28it/s]

Step: 479, Loss: 0.0017749122343957424


 48%|████▊     | 480/1000 [02:24<02:38,  3.28it/s]

Step: 480, Loss: 0.0017452031606808305


 48%|████▊     | 481/1000 [02:25<02:38,  3.28it/s]

Step: 481, Loss: 0.001719632651656866


 48%|████▊     | 482/1000 [02:25<02:37,  3.29it/s]

Step: 482, Loss: 0.0018156010191887617


 48%|████▊     | 483/1000 [02:25<02:37,  3.29it/s]

Step: 483, Loss: 0.0016484340885654092


 48%|████▊     | 484/1000 [02:26<02:36,  3.29it/s]

Step: 484, Loss: 0.0017175092361867428


 48%|████▊     | 485/1000 [02:26<02:36,  3.29it/s]

Step: 485, Loss: 0.0016362587921321392


 49%|████▊     | 486/1000 [02:26<02:36,  3.29it/s]

Step: 486, Loss: 0.001784171792678535


 49%|████▊     | 487/1000 [02:26<02:36,  3.28it/s]

Step: 487, Loss: 0.001704965252429247


 49%|████▉     | 488/1000 [02:27<02:35,  3.29it/s]

Step: 488, Loss: 0.0016311460640281439


 49%|████▉     | 489/1000 [02:27<02:34,  3.30it/s]

Step: 489, Loss: 0.0015479594003409147


 49%|████▉     | 490/1000 [02:27<02:34,  3.29it/s]

Step: 490, Loss: 0.0015748386504128575


 49%|████▉     | 491/1000 [02:28<02:34,  3.30it/s]

Step: 491, Loss: 0.0016040641348809004


 49%|████▉     | 492/1000 [02:28<02:33,  3.30it/s]

Step: 492, Loss: 0.0015320952516049147


 49%|████▉     | 493/1000 [02:28<02:33,  3.30it/s]

Step: 493, Loss: 0.0015853670192882419


 49%|████▉     | 494/1000 [02:29<02:33,  3.29it/s]

Step: 494, Loss: 0.0015020781429484487


 50%|████▉     | 495/1000 [02:29<02:33,  3.29it/s]

Step: 495, Loss: 0.0015209333505481482


 50%|████▉     | 496/1000 [02:29<02:33,  3.29it/s]

Step: 496, Loss: 0.0014729861868545413


 50%|████▉     | 497/1000 [02:30<02:33,  3.28it/s]

Step: 497, Loss: 0.0015517601277679205


 50%|████▉     | 498/1000 [02:30<02:32,  3.29it/s]

Step: 498, Loss: 0.0014637878630310297


 50%|████▉     | 499/1000 [02:30<02:32,  3.29it/s]

Step: 499, Loss: 0.0015069880755618215


 50%|█████     | 500/1000 [02:30<02:32,  3.29it/s]

Step: 500, Loss: 0.0014727230882272124


 50%|█████     | 501/1000 [02:31<02:31,  3.29it/s]

Step: 501, Loss: 0.0014458326622843742


 50%|█████     | 502/1000 [02:31<02:31,  3.30it/s]

Step: 502, Loss: 0.0014640855370089412


 50%|█████     | 503/1000 [02:31<02:31,  3.29it/s]

Step: 503, Loss: 0.0015116684371605515


 50%|█████     | 504/1000 [02:32<02:30,  3.29it/s]

Step: 504, Loss: 0.0015183740761131048


 50%|█████     | 505/1000 [02:32<02:30,  3.30it/s]

Step: 505, Loss: 0.0013546266127377748


 51%|█████     | 506/1000 [02:32<02:30,  3.29it/s]

Step: 506, Loss: 0.0013186135329306126


 51%|█████     | 507/1000 [02:33<02:29,  3.29it/s]

Step: 507, Loss: 0.0013900581980124116


 51%|█████     | 508/1000 [02:33<02:29,  3.29it/s]

Step: 508, Loss: 0.0012899789726361632


 51%|█████     | 509/1000 [02:33<02:29,  3.28it/s]

Step: 509, Loss: 0.001341922557912767


 51%|█████     | 510/1000 [02:33<02:29,  3.28it/s]

Step: 510, Loss: 0.0013340931618586183


 51%|█████     | 511/1000 [02:34<02:29,  3.27it/s]

Step: 511, Loss: 0.0012639533961191773


 51%|█████     | 512/1000 [02:34<02:28,  3.28it/s]

Step: 512, Loss: 0.0013182060793042183


 51%|█████▏    | 513/1000 [02:34<02:28,  3.28it/s]

Step: 513, Loss: 0.0012911875965073705


 51%|█████▏    | 514/1000 [02:35<02:28,  3.28it/s]

Step: 514, Loss: 0.0013330683577805758


 52%|█████▏    | 515/1000 [02:35<02:28,  3.28it/s]

Step: 515, Loss: 0.0012457818957045674


 52%|█████▏    | 516/1000 [02:35<02:27,  3.28it/s]

Step: 516, Loss: 0.0011958678951486945


 52%|█████▏    | 517/1000 [02:36<02:27,  3.28it/s]

Step: 517, Loss: 0.0012975804274901748


 52%|█████▏    | 518/1000 [02:36<02:26,  3.29it/s]

Step: 518, Loss: 0.0012588688405230641


 52%|█████▏    | 519/1000 [02:36<02:26,  3.29it/s]

Step: 519, Loss: 0.0011967745376750827


 52%|█████▏    | 520/1000 [02:37<02:26,  3.28it/s]

Step: 520, Loss: 0.0012269153958186507


 52%|█████▏    | 521/1000 [02:37<02:25,  3.28it/s]

Step: 521, Loss: 0.001157634425908327


 52%|█████▏    | 522/1000 [02:37<02:25,  3.29it/s]

Step: 522, Loss: 0.001182962441816926


 52%|█████▏    | 523/1000 [02:37<02:24,  3.29it/s]

Step: 523, Loss: 0.001148940296843648


 52%|█████▏    | 524/1000 [02:38<02:24,  3.29it/s]

Step: 524, Loss: 0.001131957396864891


 52%|█████▎    | 525/1000 [02:38<02:24,  3.29it/s]

Step: 525, Loss: 0.0011094931978732347


 53%|█████▎    | 526/1000 [02:38<02:23,  3.30it/s]

Step: 526, Loss: 0.0010812547989189625


 53%|█████▎    | 527/1000 [02:39<02:23,  3.30it/s]

Step: 527, Loss: 0.0011201975867152214


 53%|█████▎    | 528/1000 [02:39<02:23,  3.29it/s]

Step: 528, Loss: 0.0010858688037842512


 53%|█████▎    | 529/1000 [02:39<02:22,  3.30it/s]

Step: 529, Loss: 0.0011167823104187846


 53%|█████▎    | 530/1000 [02:40<02:22,  3.30it/s]

Step: 530, Loss: 0.0010598290245980024


 53%|█████▎    | 531/1000 [02:40<02:22,  3.30it/s]

Step: 531, Loss: 0.001119363121688366


 53%|█████▎    | 532/1000 [02:40<02:21,  3.30it/s]

Step: 532, Loss: 0.001047136727720499


 53%|█████▎    | 533/1000 [02:40<02:21,  3.30it/s]

Step: 533, Loss: 0.0010683442233130336


 53%|█████▎    | 534/1000 [02:41<02:21,  3.29it/s]

Step: 534, Loss: 0.0010268689366057515


 54%|█████▎    | 535/1000 [02:41<02:20,  3.30it/s]

Step: 535, Loss: 0.0010260080453008413


 54%|█████▎    | 536/1000 [02:41<02:20,  3.30it/s]

Step: 536, Loss: 0.0010685308370739222


 54%|█████▎    | 537/1000 [02:42<02:20,  3.29it/s]

Step: 537, Loss: 0.0011009406298398972


 54%|█████▍    | 538/1000 [02:42<02:20,  3.29it/s]

Step: 538, Loss: 0.0009705405100248754


 54%|█████▍    | 539/1000 [02:42<02:19,  3.29it/s]

Step: 539, Loss: 0.0009966254001483321


 54%|█████▍    | 540/1000 [02:43<02:19,  3.29it/s]

Step: 540, Loss: 0.0009667232516221702


 54%|█████▍    | 541/1000 [02:43<02:19,  3.29it/s]

Step: 541, Loss: 0.0009359947871416807


 54%|█████▍    | 542/1000 [02:43<02:18,  3.30it/s]

Step: 542, Loss: 0.0009876582771539688


 54%|█████▍    | 543/1000 [02:43<02:18,  3.29it/s]

Step: 543, Loss: 0.0009580344194546342


 54%|█████▍    | 544/1000 [02:44<02:18,  3.30it/s]

Step: 544, Loss: 0.0009056158596649766


 55%|█████▍    | 545/1000 [02:44<02:18,  3.29it/s]

Step: 545, Loss: 0.000925453205127269


 55%|█████▍    | 546/1000 [02:44<02:18,  3.29it/s]

Step: 546, Loss: 0.0009005037136375904


 55%|█████▍    | 547/1000 [02:45<02:17,  3.29it/s]

Step: 547, Loss: 0.0008938411483541131


 55%|█████▍    | 548/1000 [02:45<02:17,  3.28it/s]

Step: 548, Loss: 0.0008995693642646074


 55%|█████▍    | 549/1000 [02:45<02:17,  3.29it/s]

Step: 549, Loss: 0.0009019265417009592


 55%|█████▌    | 550/1000 [02:46<02:16,  3.29it/s]

Step: 550, Loss: 0.0008941918495111167


 55%|█████▌    | 551/1000 [02:46<02:16,  3.30it/s]

Step: 551, Loss: 0.0008681008475832641


 55%|█████▌    | 552/1000 [02:46<02:15,  3.30it/s]

Step: 552, Loss: 0.0008462840924039483


 55%|█████▌    | 553/1000 [02:47<02:15,  3.30it/s]

Step: 553, Loss: 0.0008720610057935119


 55%|█████▌    | 554/1000 [02:47<02:14,  3.30it/s]

Step: 554, Loss: 0.0008519350667484105


 56%|█████▌    | 555/1000 [02:47<02:15,  3.30it/s]

Step: 555, Loss: 0.0008414385956712067


 56%|█████▌    | 556/1000 [02:47<02:14,  3.30it/s]

Step: 556, Loss: 0.0008126229513436556


 56%|█████▌    | 557/1000 [02:48<02:14,  3.30it/s]

Step: 557, Loss: 0.000829181750304997


 56%|█████▌    | 558/1000 [02:48<02:13,  3.30it/s]

Step: 558, Loss: 0.000807433039881289


 56%|█████▌    | 559/1000 [02:48<02:13,  3.30it/s]

Step: 559, Loss: 0.0008069269824773073


 56%|█████▌    | 560/1000 [02:49<02:13,  3.30it/s]

Step: 560, Loss: 0.0008047630544751883


 56%|█████▌    | 561/1000 [02:49<02:12,  3.30it/s]

Step: 561, Loss: 0.0007999776280485094


 56%|█████▌    | 562/1000 [02:49<02:12,  3.30it/s]

Step: 562, Loss: 0.0007772350800223649


 56%|█████▋    | 563/1000 [02:50<02:12,  3.30it/s]

Step: 563, Loss: 0.0007948122220113873


 56%|█████▋    | 564/1000 [02:50<02:11,  3.31it/s]

Step: 564, Loss: 0.0007899695774540305


 56%|█████▋    | 565/1000 [02:50<02:11,  3.31it/s]

Step: 565, Loss: 0.0007551842136308551


 57%|█████▋    | 566/1000 [02:50<02:11,  3.30it/s]

Step: 566, Loss: 0.0007499728817492723


 57%|█████▋    | 567/1000 [02:51<02:11,  3.30it/s]

Step: 567, Loss: 0.0007452061981894076


 57%|█████▋    | 568/1000 [02:51<02:10,  3.30it/s]

Step: 568, Loss: 0.0007373309344984591


 57%|█████▋    | 569/1000 [02:51<02:10,  3.30it/s]

Step: 569, Loss: 0.0007154635968618095


 57%|█████▋    | 570/1000 [02:52<02:10,  3.30it/s]

Step: 570, Loss: 0.0007064062519930303


 57%|█████▋    | 571/1000 [02:52<02:10,  3.30it/s]

Step: 571, Loss: 0.0007106389384716749


 57%|█████▋    | 572/1000 [02:52<02:09,  3.30it/s]

Step: 572, Loss: 0.0007076922920532525


 57%|█████▋    | 573/1000 [02:53<02:09,  3.30it/s]

Step: 573, Loss: 0.0007244220469146967


 57%|█████▋    | 574/1000 [02:53<02:09,  3.29it/s]

Step: 574, Loss: 0.0007406420772895217


 57%|█████▊    | 575/1000 [02:53<02:08,  3.30it/s]

Step: 575, Loss: 0.0006967123481445014


 58%|█████▊    | 576/1000 [02:53<02:08,  3.29it/s]

Step: 576, Loss: 0.0007113468600437045


 58%|█████▊    | 577/1000 [02:54<02:08,  3.29it/s]

Step: 577, Loss: 0.0007231333293020725


 58%|█████▊    | 578/1000 [02:54<02:08,  3.29it/s]

Step: 578, Loss: 0.0006869912031106651


 58%|█████▊    | 579/1000 [02:54<02:07,  3.29it/s]

Step: 579, Loss: 0.000677935779094696


 58%|█████▊    | 580/1000 [02:55<02:07,  3.29it/s]

Step: 580, Loss: 0.0006632672739215195


 58%|█████▊    | 581/1000 [02:55<02:07,  3.29it/s]

Step: 581, Loss: 0.0006528896046802402


 58%|█████▊    | 582/1000 [02:55<02:06,  3.30it/s]

Step: 582, Loss: 0.0006551394471898675


 58%|█████▊    | 583/1000 [02:56<02:06,  3.30it/s]

Step: 583, Loss: 0.0006440350553020835


 58%|█████▊    | 584/1000 [02:56<02:06,  3.30it/s]

Step: 584, Loss: 0.0006253814790397882


 58%|█████▊    | 585/1000 [02:56<02:05,  3.30it/s]

Step: 585, Loss: 0.0006232501473277807


 59%|█████▊    | 586/1000 [02:57<02:05,  3.30it/s]

Step: 586, Loss: 0.0006118433666415513


 59%|█████▊    | 587/1000 [02:57<02:05,  3.30it/s]

Step: 587, Loss: 0.0006549490499310195


 59%|█████▉    | 588/1000 [02:57<02:05,  3.29it/s]

Step: 588, Loss: 0.0005927561433054507


 59%|█████▉    | 589/1000 [02:57<02:05,  3.29it/s]

Step: 589, Loss: 0.0005884992424398661


 59%|█████▉    | 590/1000 [02:58<02:04,  3.29it/s]

Step: 590, Loss: 0.0005978186381980777


 59%|█████▉    | 591/1000 [02:58<02:03,  3.30it/s]

Step: 591, Loss: 0.0006130692781880498


 59%|█████▉    | 592/1000 [02:58<02:03,  3.30it/s]

Step: 592, Loss: 0.0005869395099580288


 59%|█████▉    | 593/1000 [02:59<02:03,  3.30it/s]

Step: 593, Loss: 0.000606026325840503


 59%|█████▉    | 594/1000 [02:59<02:02,  3.30it/s]

Step: 594, Loss: 0.000564323621802032


 60%|█████▉    | 595/1000 [02:59<02:02,  3.30it/s]

Step: 595, Loss: 0.0005502185667864978


 60%|█████▉    | 596/1000 [03:00<02:02,  3.30it/s]

Step: 596, Loss: 0.0005616409471258521


 60%|█████▉    | 597/1000 [03:00<02:02,  3.30it/s]

Step: 597, Loss: 0.0005444701528176665


 60%|█████▉    | 598/1000 [03:00<02:01,  3.31it/s]

Step: 598, Loss: 0.0005431136814877391


 60%|█████▉    | 599/1000 [03:00<02:01,  3.31it/s]

Step: 599, Loss: 0.0005449355230666697


 60%|██████    | 600/1000 [03:01<02:00,  3.31it/s]

Step: 600, Loss: 0.0005484652938321233


 60%|██████    | 601/1000 [03:01<02:00,  3.32it/s]

Step: 601, Loss: 0.0005684791249223053


 60%|██████    | 602/1000 [03:01<01:59,  3.32it/s]

Step: 602, Loss: 0.0005654582637362182


 60%|██████    | 603/1000 [03:02<01:59,  3.32it/s]

Step: 603, Loss: 0.0005221968167461455


 60%|██████    | 604/1000 [03:02<01:59,  3.32it/s]

Step: 604, Loss: 0.0005147179472260177


 60%|██████    | 605/1000 [03:02<01:58,  3.32it/s]

Step: 605, Loss: 0.0005267010419629514


 61%|██████    | 606/1000 [03:03<01:58,  3.31it/s]

Step: 606, Loss: 0.0005061402334831655


 61%|██████    | 607/1000 [03:03<01:58,  3.31it/s]

Step: 607, Loss: 0.0005111495265737176


 61%|██████    | 608/1000 [03:03<01:58,  3.31it/s]

Step: 608, Loss: 0.0005057064699940383


 61%|██████    | 609/1000 [03:03<01:58,  3.31it/s]

Step: 609, Loss: 0.0004948151763528585


 61%|██████    | 610/1000 [03:04<01:57,  3.31it/s]

Step: 610, Loss: 0.0004975993069820106


 61%|██████    | 611/1000 [03:04<01:57,  3.31it/s]

Step: 611, Loss: 0.0005071384366601706


 61%|██████    | 612/1000 [03:04<01:56,  3.32it/s]

Step: 612, Loss: 0.0005098507972434163


 61%|██████▏   | 613/1000 [03:05<01:56,  3.32it/s]

Step: 613, Loss: 0.00047251966316252947


 61%|██████▏   | 614/1000 [03:05<01:56,  3.32it/s]

Step: 614, Loss: 0.00046454454422928393


 62%|██████▏   | 615/1000 [03:05<01:55,  3.33it/s]

Step: 615, Loss: 0.00045953094377182424


 62%|██████▏   | 616/1000 [03:06<01:55,  3.33it/s]

Step: 616, Loss: 0.00046237700735218823


 62%|██████▏   | 617/1000 [03:06<01:55,  3.33it/s]

Step: 617, Loss: 0.00045872811460867524


 62%|██████▏   | 618/1000 [03:06<01:54,  3.32it/s]

Step: 618, Loss: 0.00046040478628128767


 62%|██████▏   | 619/1000 [03:06<01:54,  3.33it/s]

Step: 619, Loss: 0.00044923098175786436


 62%|██████▏   | 620/1000 [03:07<01:54,  3.32it/s]

Step: 620, Loss: 0.0004349792725406587


 62%|██████▏   | 621/1000 [03:07<01:53,  3.33it/s]

Step: 621, Loss: 0.0004335929697845131


 62%|██████▏   | 622/1000 [03:07<01:53,  3.33it/s]

Step: 622, Loss: 0.0004451559216249734


 62%|██████▏   | 623/1000 [03:08<01:53,  3.32it/s]

Step: 623, Loss: 0.0004306515911594033


 62%|██████▏   | 624/1000 [03:08<01:53,  3.32it/s]

Step: 624, Loss: 0.00042139843571931124


 62%|██████▎   | 625/1000 [03:08<01:52,  3.32it/s]

Step: 625, Loss: 0.00041373010026291013


 63%|██████▎   | 626/1000 [03:09<01:53,  3.31it/s]

Step: 626, Loss: 0.00043691135942935944


 63%|██████▎   | 627/1000 [03:09<01:52,  3.30it/s]

Step: 627, Loss: 0.00041177100501954556


 63%|██████▎   | 628/1000 [03:09<01:52,  3.31it/s]

Step: 628, Loss: 0.0004067554837092757


 63%|██████▎   | 629/1000 [03:10<01:52,  3.31it/s]

Step: 629, Loss: 0.0004203153366688639


 63%|██████▎   | 630/1000 [03:10<01:51,  3.30it/s]

Step: 630, Loss: 0.00041054413304664195


 63%|██████▎   | 631/1000 [03:10<01:51,  3.31it/s]

Step: 631, Loss: 0.00039474281948059797


 63%|██████▎   | 632/1000 [03:10<01:51,  3.31it/s]

Step: 632, Loss: 0.0003930252860300243


 63%|██████▎   | 633/1000 [03:11<01:50,  3.32it/s]

Step: 633, Loss: 0.0003853403322864324


 63%|██████▎   | 634/1000 [03:11<01:49,  3.33it/s]

Step: 634, Loss: 0.0003900035808328539


 64%|██████▎   | 635/1000 [03:11<01:49,  3.33it/s]

Step: 635, Loss: 0.00039248436223715544


 64%|██████▎   | 636/1000 [03:12<01:49,  3.33it/s]

Step: 636, Loss: 0.00038050106377340853


 64%|██████▎   | 637/1000 [03:12<01:49,  3.32it/s]

Step: 637, Loss: 0.00037529607652686536


 64%|██████▍   | 638/1000 [03:12<01:49,  3.31it/s]

Step: 638, Loss: 0.00040561065543442965


 64%|██████▍   | 639/1000 [03:13<01:49,  3.31it/s]

Step: 639, Loss: 0.0003645550459623337


 64%|██████▍   | 640/1000 [03:13<01:48,  3.31it/s]

Step: 640, Loss: 0.0003686236450448632


 64%|██████▍   | 641/1000 [03:13<01:48,  3.32it/s]

Step: 641, Loss: 0.0003644841490313411


 64%|██████▍   | 642/1000 [03:13<01:47,  3.32it/s]

Step: 642, Loss: 0.00036632962292060256


 64%|██████▍   | 643/1000 [03:14<01:47,  3.33it/s]

Step: 643, Loss: 0.0003583853249438107


 64%|██████▍   | 644/1000 [03:14<01:46,  3.33it/s]

Step: 644, Loss: 0.00036367762368172407


 64%|██████▍   | 645/1000 [03:14<01:46,  3.33it/s]

Step: 645, Loss: 0.00034882070031017065


 65%|██████▍   | 646/1000 [03:15<01:46,  3.32it/s]

Step: 646, Loss: 0.00034386946936137974


 65%|██████▍   | 647/1000 [03:15<01:46,  3.32it/s]

Step: 647, Loss: 0.000339189893566072


 65%|██████▍   | 648/1000 [03:15<01:45,  3.32it/s]

Step: 648, Loss: 0.0003329889150336385


 65%|██████▍   | 649/1000 [03:16<01:45,  3.32it/s]

Step: 649, Loss: 0.0003271392488386482


 65%|██████▌   | 650/1000 [03:16<01:45,  3.33it/s]

Step: 650, Loss: 0.0003350093902554363


 65%|██████▌   | 651/1000 [03:16<01:44,  3.34it/s]

Step: 651, Loss: 0.0003282911202404648


 65%|██████▌   | 652/1000 [03:16<01:44,  3.33it/s]

Step: 652, Loss: 0.00034688482992351055


 65%|██████▌   | 653/1000 [03:17<01:44,  3.33it/s]

Step: 653, Loss: 0.0003227837441954762


 65%|██████▌   | 654/1000 [03:17<01:43,  3.33it/s]

Step: 654, Loss: 0.0003099751193076372


 66%|██████▌   | 655/1000 [03:17<01:43,  3.33it/s]

Step: 655, Loss: 0.0003207558474969119


 66%|██████▌   | 656/1000 [03:18<01:43,  3.33it/s]

Step: 656, Loss: 0.0003270007437095046


 66%|██████▌   | 657/1000 [03:18<01:42,  3.33it/s]

Step: 657, Loss: 0.00032227474730461836


 66%|██████▌   | 658/1000 [03:18<01:42,  3.33it/s]

Step: 658, Loss: 0.0003037727437913418


 66%|██████▌   | 659/1000 [03:19<01:42,  3.33it/s]

Step: 659, Loss: 0.00030903282458893955


 66%|██████▌   | 660/1000 [03:19<01:42,  3.33it/s]

Step: 660, Loss: 0.00029752831324003637


 66%|██████▌   | 661/1000 [03:19<01:41,  3.33it/s]

Step: 661, Loss: 0.00030468543991446495


 66%|██████▌   | 662/1000 [03:19<01:41,  3.32it/s]

Step: 662, Loss: 0.00029595845262520015


 66%|██████▋   | 663/1000 [03:20<01:41,  3.32it/s]

Step: 663, Loss: 0.0002935691154561937


 66%|██████▋   | 664/1000 [03:20<01:41,  3.33it/s]

Step: 664, Loss: 0.0002933964424300939


 66%|██████▋   | 665/1000 [03:20<01:40,  3.32it/s]

Step: 665, Loss: 0.0002990734647028148


 67%|██████▋   | 666/1000 [03:21<01:40,  3.32it/s]

Step: 666, Loss: 0.0002925543813034892


 67%|██████▋   | 667/1000 [03:21<01:39,  3.34it/s]

Step: 667, Loss: 0.00029748454107902944


 67%|██████▋   | 668/1000 [03:21<01:39,  3.33it/s]

Step: 668, Loss: 0.0002720159536693245


 67%|██████▋   | 669/1000 [03:22<01:39,  3.32it/s]

Step: 669, Loss: 0.0002727715182118118


 67%|██████▋   | 670/1000 [03:22<01:39,  3.32it/s]

Step: 670, Loss: 0.00027310263249091804


 67%|██████▋   | 671/1000 [03:22<01:38,  3.33it/s]

Step: 671, Loss: 0.0002802381932269782


 67%|██████▋   | 672/1000 [03:22<01:38,  3.33it/s]

Step: 672, Loss: 0.0002700932964216918


 67%|██████▋   | 673/1000 [03:23<01:38,  3.33it/s]

Step: 673, Loss: 0.00028303597355261445


 67%|██████▋   | 674/1000 [03:23<01:38,  3.32it/s]

Step: 674, Loss: 0.0002767100522760302


 68%|██████▊   | 675/1000 [03:23<01:37,  3.32it/s]

Step: 675, Loss: 0.00025510633713565767


 68%|██████▊   | 676/1000 [03:24<01:37,  3.33it/s]

Step: 676, Loss: 0.0002568133932072669


 68%|██████▊   | 677/1000 [03:24<01:37,  3.32it/s]

Step: 677, Loss: 0.0002617246937006712


 68%|██████▊   | 678/1000 [03:24<01:37,  3.31it/s]

Step: 678, Loss: 0.00025249135796912014


 68%|██████▊   | 679/1000 [03:25<01:37,  3.29it/s]

Step: 679, Loss: 0.0002544709714129567


 68%|██████▊   | 680/1000 [03:25<01:37,  3.30it/s]

Step: 680, Loss: 0.00025231883046217263


 68%|██████▊   | 681/1000 [03:25<01:36,  3.29it/s]

Step: 681, Loss: 0.0002508500183466822


 68%|██████▊   | 682/1000 [03:25<01:36,  3.29it/s]

Step: 682, Loss: 0.0002438242663629353


 68%|██████▊   | 683/1000 [03:26<01:35,  3.31it/s]

Step: 683, Loss: 0.00024486854090355337


 68%|██████▊   | 684/1000 [03:26<01:35,  3.32it/s]

Step: 684, Loss: 0.000248282914981246


 68%|██████▊   | 685/1000 [03:26<01:35,  3.32it/s]

Step: 685, Loss: 0.00024152213882189244


 69%|██████▊   | 686/1000 [03:27<01:34,  3.32it/s]

Step: 686, Loss: 0.00023127561144065112


 69%|██████▊   | 687/1000 [03:27<01:33,  3.34it/s]

Step: 687, Loss: 0.00023266002244781703


 69%|██████▉   | 688/1000 [03:27<01:33,  3.32it/s]

Step: 688, Loss: 0.00023337945458479226


 69%|██████▉   | 689/1000 [03:28<01:33,  3.32it/s]

Step: 689, Loss: 0.0002250705729238689


 69%|██████▉   | 690/1000 [03:28<01:33,  3.33it/s]

Step: 690, Loss: 0.00022893610002938658


 69%|██████▉   | 691/1000 [03:28<01:32,  3.32it/s]

Step: 691, Loss: 0.0002330253046238795


 69%|██████▉   | 692/1000 [03:28<01:32,  3.32it/s]

Step: 692, Loss: 0.0002249680255772546


 69%|██████▉   | 693/1000 [03:29<01:32,  3.34it/s]

Step: 693, Loss: 0.00021620403276756406


 69%|██████▉   | 694/1000 [03:29<01:31,  3.34it/s]

Step: 694, Loss: 0.00021820308757014573


 70%|██████▉   | 695/1000 [03:29<01:31,  3.33it/s]

Step: 695, Loss: 0.00020901000243611634


 70%|██████▉   | 696/1000 [03:30<01:31,  3.33it/s]

Step: 696, Loss: 0.00020919775124639273


 70%|██████▉   | 697/1000 [03:30<01:31,  3.33it/s]

Step: 697, Loss: 0.000215984953683801


 70%|██████▉   | 698/1000 [03:30<01:30,  3.33it/s]

Step: 698, Loss: 0.0002135945251211524


 70%|██████▉   | 699/1000 [03:31<01:30,  3.32it/s]

Step: 699, Loss: 0.00021370926697272807


 70%|███████   | 700/1000 [03:31<01:30,  3.33it/s]

Step: 700, Loss: 0.00020507491717580706


 70%|███████   | 701/1000 [03:31<01:29,  3.33it/s]

Step: 701, Loss: 0.00021525853662751615


 70%|███████   | 702/1000 [03:31<01:29,  3.32it/s]

Step: 702, Loss: 0.0002080652047879994


 70%|███████   | 703/1000 [03:32<01:29,  3.32it/s]

Step: 703, Loss: 0.00019543740199878812


 70%|███████   | 704/1000 [03:32<01:29,  3.32it/s]

Step: 704, Loss: 0.00021135908900760114


 70%|███████   | 705/1000 [03:32<01:28,  3.32it/s]

Step: 705, Loss: 0.00019472060375846922


 71%|███████   | 706/1000 [03:33<01:28,  3.31it/s]

Step: 706, Loss: 0.00019846571376547217


 71%|███████   | 707/1000 [03:33<01:28,  3.31it/s]

Step: 707, Loss: 0.00019654937204904854


 71%|███████   | 708/1000 [03:33<01:28,  3.30it/s]

Step: 708, Loss: 0.0001982912071980536


 71%|███████   | 709/1000 [03:34<01:28,  3.30it/s]

Step: 709, Loss: 0.00019291303760837764


 71%|███████   | 710/1000 [03:34<01:27,  3.30it/s]

Step: 710, Loss: 0.000191939776414074


 71%|███████   | 711/1000 [03:34<01:27,  3.30it/s]

Step: 711, Loss: 0.00019723169680219144


 71%|███████   | 712/1000 [03:35<01:27,  3.30it/s]

Step: 712, Loss: 0.00018003079458139837


 71%|███████▏  | 713/1000 [03:35<01:26,  3.31it/s]

Step: 713, Loss: 0.00018473011732567102


 71%|███████▏  | 714/1000 [03:35<01:26,  3.32it/s]

Step: 714, Loss: 0.00019903974316548556


 72%|███████▏  | 715/1000 [03:35<01:25,  3.32it/s]

Step: 715, Loss: 0.00018223024380858988


 72%|███████▏  | 716/1000 [03:36<01:25,  3.32it/s]

Step: 716, Loss: 0.00017900147940963507


 72%|███████▏  | 717/1000 [03:36<01:25,  3.32it/s]

Step: 717, Loss: 0.0001762041647452861


 72%|███████▏  | 718/1000 [03:36<01:24,  3.32it/s]

Step: 718, Loss: 0.0001837024319684133


 72%|███████▏  | 719/1000 [03:37<01:24,  3.31it/s]

Step: 719, Loss: 0.0001705579343251884


 72%|███████▏  | 720/1000 [03:37<01:24,  3.31it/s]

Step: 720, Loss: 0.0001679982669884339


 72%|███████▏  | 721/1000 [03:37<01:23,  3.32it/s]

Step: 721, Loss: 0.00017514241335447878


 72%|███████▏  | 722/1000 [03:38<01:23,  3.33it/s]

Step: 722, Loss: 0.00016208873421419412


 72%|███████▏  | 723/1000 [03:38<01:23,  3.33it/s]

Step: 723, Loss: 0.00017484738782513887


 72%|███████▏  | 724/1000 [03:38<01:22,  3.33it/s]

Step: 724, Loss: 0.00017019287042785436


 72%|███████▎  | 725/1000 [03:38<01:22,  3.33it/s]

Step: 725, Loss: 0.00016518407210242003


 73%|███████▎  | 726/1000 [03:39<01:22,  3.34it/s]

Step: 726, Loss: 0.00016300186689477414


 73%|███████▎  | 727/1000 [03:39<01:21,  3.34it/s]

Step: 727, Loss: 0.00016364619659725577


 73%|███████▎  | 728/1000 [03:39<01:21,  3.33it/s]

Step: 728, Loss: 0.00016058348410297185


 73%|███████▎  | 729/1000 [03:40<01:21,  3.33it/s]

Step: 729, Loss: 0.00015378498937934637


 73%|███████▎  | 730/1000 [03:40<01:21,  3.32it/s]

Step: 730, Loss: 0.0001547711290186271


 73%|███████▎  | 731/1000 [03:40<01:21,  3.31it/s]

Step: 731, Loss: 0.00014717176964040846


 73%|███████▎  | 732/1000 [03:41<01:20,  3.32it/s]

Step: 732, Loss: 0.00016130706353578717


 73%|███████▎  | 733/1000 [03:41<01:20,  3.32it/s]

Step: 733, Loss: 0.00014628033386543393


 73%|███████▎  | 734/1000 [03:41<01:20,  3.32it/s]

Step: 734, Loss: 0.0001447504764655605


 74%|███████▎  | 735/1000 [03:41<01:19,  3.33it/s]

Step: 735, Loss: 0.0001468329573981464


 74%|███████▎  | 736/1000 [03:42<01:19,  3.33it/s]

Step: 736, Loss: 0.000147145168739371


 74%|███████▎  | 737/1000 [03:42<01:19,  3.32it/s]

Step: 737, Loss: 0.00014475418720394373


 74%|███████▍  | 738/1000 [03:42<01:18,  3.32it/s]

Step: 738, Loss: 0.00014614767860621214


 74%|███████▍  | 739/1000 [03:43<01:18,  3.32it/s]

Step: 739, Loss: 0.0001408308744430542


 74%|███████▍  | 740/1000 [03:43<01:18,  3.32it/s]

Step: 740, Loss: 0.00014160864520817995


 74%|███████▍  | 741/1000 [03:43<01:18,  3.31it/s]

Step: 741, Loss: 0.00013691262574866414


 74%|███████▍  | 742/1000 [03:44<01:17,  3.32it/s]

Step: 742, Loss: 0.00013864973152521998


 74%|███████▍  | 743/1000 [03:44<01:17,  3.33it/s]

Step: 743, Loss: 0.0001363008195767179


 74%|███████▍  | 744/1000 [03:44<01:16,  3.33it/s]

Step: 744, Loss: 0.00013064849190413952


 74%|███████▍  | 745/1000 [03:44<01:16,  3.32it/s]

Step: 745, Loss: 0.00013197962834965438


 75%|███████▍  | 746/1000 [03:45<01:16,  3.31it/s]

Step: 746, Loss: 0.00013050298730377108


 75%|███████▍  | 747/1000 [03:45<01:16,  3.31it/s]

Step: 747, Loss: 0.00012844987213611603


 75%|███████▍  | 748/1000 [03:45<01:16,  3.30it/s]

Step: 748, Loss: 0.00013341562589630485


 75%|███████▍  | 749/1000 [03:46<01:16,  3.30it/s]

Step: 749, Loss: 0.0001279466669075191


 75%|███████▌  | 750/1000 [03:46<01:15,  3.30it/s]

Step: 750, Loss: 0.00012810509360861033


 75%|███████▌  | 751/1000 [03:46<01:15,  3.30it/s]

Step: 751, Loss: 0.0001239887351403013


 75%|███████▌  | 752/1000 [03:47<01:15,  3.30it/s]

Step: 752, Loss: 0.00012369199248496443


 75%|███████▌  | 753/1000 [03:47<01:14,  3.30it/s]

Step: 753, Loss: 0.00012407632311806083


 75%|███████▌  | 754/1000 [03:47<01:14,  3.30it/s]

Step: 754, Loss: 0.0001255015522474423


 76%|███████▌  | 755/1000 [03:47<01:14,  3.30it/s]

Step: 755, Loss: 0.00011965383600909263


 76%|███████▌  | 756/1000 [03:48<01:13,  3.30it/s]

Step: 756, Loss: 0.00011577654368011281


 76%|███████▌  | 757/1000 [03:48<01:13,  3.30it/s]

Step: 757, Loss: 0.00012111211981391534


 76%|███████▌  | 758/1000 [03:48<01:13,  3.31it/s]

Step: 758, Loss: 0.00011806811380665749


 76%|███████▌  | 759/1000 [03:49<01:12,  3.32it/s]

Step: 759, Loss: 0.00011452301259851083


 76%|███████▌  | 760/1000 [03:49<01:12,  3.32it/s]

Step: 760, Loss: 0.00011510005424497649


 76%|███████▌  | 761/1000 [03:49<01:12,  3.32it/s]

Step: 761, Loss: 0.00011664086196105927


 76%|███████▌  | 762/1000 [03:50<01:11,  3.32it/s]

Step: 762, Loss: 0.00011306088708806783


 76%|███████▋  | 763/1000 [03:50<01:11,  3.32it/s]

Step: 763, Loss: 0.00011421038652770221


 76%|███████▋  | 764/1000 [03:50<01:11,  3.31it/s]

Step: 764, Loss: 0.00010911661229329184


 76%|███████▋  | 765/1000 [03:50<01:10,  3.31it/s]

Step: 765, Loss: 0.00011199621803825721


 77%|███████▋  | 766/1000 [03:51<01:10,  3.32it/s]

Step: 766, Loss: 0.00010770090739242733


 77%|███████▋  | 767/1000 [03:51<01:10,  3.32it/s]

Step: 767, Loss: 0.00011140582500956953


 77%|███████▋  | 768/1000 [03:51<01:09,  3.32it/s]

Step: 768, Loss: 0.00010537695197854191


 77%|███████▋  | 769/1000 [03:52<01:09,  3.32it/s]

Step: 769, Loss: 0.00010639808169798926


 77%|███████▋  | 770/1000 [03:52<01:09,  3.32it/s]

Step: 770, Loss: 0.00010769635264296085


 77%|███████▋  | 771/1000 [03:52<01:08,  3.32it/s]

Step: 771, Loss: 0.00010078459308715537


 77%|███████▋  | 772/1000 [03:53<01:08,  3.32it/s]

Step: 772, Loss: 0.00010377840226283297


 77%|███████▋  | 773/1000 [03:53<01:08,  3.31it/s]

Step: 773, Loss: 0.00010315334657207131


 77%|███████▋  | 774/1000 [03:53<01:08,  3.31it/s]

Step: 774, Loss: 9.984385542338714e-05


 78%|███████▊  | 775/1000 [03:53<01:08,  3.30it/s]

Step: 775, Loss: 0.00010360816668253392


 78%|███████▊  | 776/1000 [03:54<01:07,  3.31it/s]

Step: 776, Loss: 0.00011605230247369036


 78%|███████▊  | 777/1000 [03:54<01:07,  3.30it/s]

Step: 777, Loss: 9.796288213692605e-05


 78%|███████▊  | 778/1000 [03:54<01:07,  3.30it/s]

Step: 778, Loss: 9.83278951025568e-05


 78%|███████▊  | 779/1000 [03:55<01:06,  3.30it/s]

Step: 779, Loss: 9.643202793085948e-05


 78%|███████▊  | 780/1000 [03:55<01:06,  3.31it/s]

Step: 780, Loss: 9.37885488383472e-05


 78%|███████▊  | 781/1000 [03:55<01:06,  3.31it/s]

Step: 781, Loss: 9.265854896511883e-05


 78%|███████▊  | 782/1000 [03:56<01:05,  3.32it/s]

Step: 782, Loss: 9.759133536135778e-05


 78%|███████▊  | 783/1000 [03:56<01:05,  3.32it/s]

Step: 783, Loss: 9.331344335805625e-05


 78%|███████▊  | 784/1000 [03:56<01:05,  3.32it/s]

Step: 784, Loss: 9.1541551228147e-05


 78%|███████▊  | 785/1000 [03:57<01:04,  3.32it/s]

Step: 785, Loss: 9.508125367574394e-05


 79%|███████▊  | 786/1000 [03:57<01:04,  3.31it/s]

Step: 786, Loss: 9.113948181038722e-05


 79%|███████▊  | 787/1000 [03:57<01:04,  3.31it/s]

Step: 787, Loss: 9.112363477470353e-05


 79%|███████▉  | 788/1000 [03:57<01:04,  3.31it/s]

Step: 788, Loss: 9.243725071428344e-05


 79%|███████▉  | 789/1000 [03:58<01:03,  3.30it/s]

Step: 789, Loss: 8.515825174981728e-05


 79%|███████▉  | 790/1000 [03:58<01:03,  3.30it/s]

Step: 790, Loss: 8.826885459711775e-05


 79%|███████▉  | 791/1000 [03:58<01:03,  3.30it/s]

Step: 791, Loss: 8.718519529793411e-05


 79%|███████▉  | 792/1000 [03:59<01:02,  3.30it/s]

Step: 792, Loss: 8.451683243038133e-05


 79%|███████▉  | 793/1000 [03:59<01:02,  3.31it/s]

Step: 793, Loss: 8.298418833874166e-05


 79%|███████▉  | 794/1000 [03:59<01:02,  3.30it/s]

Step: 794, Loss: 8.648130460642278e-05


 80%|███████▉  | 795/1000 [04:00<01:01,  3.31it/s]

Step: 795, Loss: 8.738000906305388e-05


 80%|███████▉  | 796/1000 [04:00<01:01,  3.30it/s]

Step: 796, Loss: 8.119233825709671e-05


 80%|███████▉  | 797/1000 [04:00<01:01,  3.31it/s]

Step: 797, Loss: 8.076461381278932e-05


 80%|███████▉  | 798/1000 [04:00<01:00,  3.32it/s]

Step: 798, Loss: 8.226502541219816e-05


 80%|███████▉  | 799/1000 [04:01<01:00,  3.32it/s]

Step: 799, Loss: 7.914110756246373e-05


 80%|████████  | 800/1000 [04:01<01:00,  3.32it/s]

Step: 800, Loss: 7.68678291933611e-05


 80%|████████  | 801/1000 [04:01<00:59,  3.32it/s]

Step: 801, Loss: 7.84510193625465e-05


 80%|████████  | 802/1000 [04:02<00:59,  3.32it/s]

Step: 802, Loss: 7.557886419817805e-05


 80%|████████  | 803/1000 [04:02<00:59,  3.32it/s]

Step: 803, Loss: 8.04278752184473e-05


 80%|████████  | 804/1000 [04:02<00:59,  3.31it/s]

Step: 804, Loss: 7.592088513774797e-05


 80%|████████  | 805/1000 [04:03<00:58,  3.31it/s]

Step: 805, Loss: 7.438362808898091e-05


 81%|████████  | 806/1000 [04:03<00:58,  3.33it/s]

Step: 806, Loss: 7.367520447587594e-05


 81%|████████  | 807/1000 [04:03<00:57,  3.33it/s]

Step: 807, Loss: 7.400102185783908e-05


 81%|████████  | 808/1000 [04:03<00:57,  3.33it/s]

Step: 808, Loss: 7.116160850273445e-05


 81%|████████  | 809/1000 [04:04<00:57,  3.32it/s]

Step: 809, Loss: 7.455371087417006e-05


 81%|████████  | 810/1000 [04:04<00:57,  3.32it/s]

Step: 810, Loss: 7.115939661161974e-05


 81%|████████  | 811/1000 [04:04<00:57,  3.31it/s]

Step: 811, Loss: 7.115049083949998e-05


 81%|████████  | 812/1000 [04:05<00:56,  3.30it/s]

Step: 812, Loss: 7.073883898556232e-05


 81%|████████▏ | 813/1000 [04:05<00:56,  3.30it/s]

Step: 813, Loss: 7.006279338384047e-05


 81%|████████▏ | 814/1000 [04:05<00:56,  3.31it/s]

Step: 814, Loss: 7.493879093090072e-05


 82%|████████▏ | 815/1000 [04:06<00:56,  3.30it/s]

Step: 815, Loss: 7.010414265096188e-05


 82%|████████▏ | 816/1000 [04:06<00:55,  3.30it/s]

Step: 816, Loss: 6.62084567011334e-05


 82%|████████▏ | 817/1000 [04:06<00:55,  3.30it/s]

Step: 817, Loss: 6.87516076141037e-05


 82%|████████▏ | 818/1000 [04:06<00:55,  3.30it/s]

Step: 818, Loss: 6.524824857478961e-05


 82%|████████▏ | 819/1000 [04:07<00:54,  3.30it/s]

Step: 819, Loss: 6.513996777357534e-05


 82%|████████▏ | 820/1000 [04:07<00:54,  3.30it/s]

Step: 820, Loss: 6.406340980902314e-05


 82%|████████▏ | 821/1000 [04:07<00:54,  3.30it/s]

Step: 821, Loss: 6.652692536590621e-05


 82%|████████▏ | 822/1000 [04:08<00:53,  3.30it/s]

Step: 822, Loss: 6.67400672682561e-05


 82%|████████▏ | 823/1000 [04:08<00:53,  3.31it/s]

Step: 823, Loss: 6.499111623270437e-05


 82%|████████▏ | 824/1000 [04:08<00:53,  3.32it/s]

Step: 824, Loss: 6.226982804946601e-05


 82%|████████▎ | 825/1000 [04:09<00:52,  3.32it/s]

Step: 825, Loss: 6.575474253622815e-05


 83%|████████▎ | 826/1000 [04:09<00:52,  3.33it/s]

Step: 826, Loss: 6.16105753579177e-05


 83%|████████▎ | 827/1000 [04:09<00:51,  3.33it/s]

Step: 827, Loss: 6.027458584867418e-05


 83%|████████▎ | 828/1000 [04:09<00:51,  3.32it/s]

Step: 828, Loss: 5.743527799495496e-05


 83%|████████▎ | 829/1000 [04:10<00:51,  3.31it/s]

Step: 829, Loss: 5.884545316803269e-05


 83%|████████▎ | 830/1000 [04:10<00:51,  3.31it/s]

Step: 830, Loss: 5.921996125834994e-05


 83%|████████▎ | 831/1000 [04:10<00:51,  3.30it/s]

Step: 831, Loss: 6.23268133495003e-05


 83%|████████▎ | 832/1000 [04:11<00:50,  3.30it/s]

Step: 832, Loss: 6.0027461586287245e-05


 83%|████████▎ | 833/1000 [04:11<00:50,  3.31it/s]

Step: 833, Loss: 5.976864122203551e-05


 83%|████████▎ | 834/1000 [04:11<00:50,  3.31it/s]

Step: 834, Loss: 5.6598768424009904e-05


 84%|████████▎ | 835/1000 [04:12<00:49,  3.31it/s]

Step: 835, Loss: 5.42816414963454e-05


 84%|████████▎ | 836/1000 [04:12<00:49,  3.32it/s]

Step: 836, Loss: 5.471878102980554e-05


 84%|████████▎ | 837/1000 [04:12<00:49,  3.32it/s]

Step: 837, Loss: 5.4002572142053396e-05


 84%|████████▍ | 838/1000 [04:13<00:48,  3.33it/s]

Step: 838, Loss: 5.5357158998958766e-05


 84%|████████▍ | 839/1000 [04:13<00:48,  3.32it/s]

Step: 839, Loss: 5.403013346949592e-05


 84%|████████▍ | 840/1000 [04:13<00:48,  3.32it/s]

Step: 840, Loss: 5.3176878282101825e-05


 84%|████████▍ | 841/1000 [04:13<00:47,  3.31it/s]

Step: 841, Loss: 5.282509300741367e-05


 84%|████████▍ | 842/1000 [04:14<00:47,  3.31it/s]

Step: 842, Loss: 5.274578870739788e-05


 84%|████████▍ | 843/1000 [04:14<00:47,  3.32it/s]

Step: 843, Loss: 5.2049090299988165e-05


 84%|████████▍ | 844/1000 [04:14<00:47,  3.31it/s]

Step: 844, Loss: 5.261691694613546e-05


 84%|████████▍ | 845/1000 [04:15<00:46,  3.31it/s]

Step: 845, Loss: 5.146273542777635e-05


 85%|████████▍ | 846/1000 [04:15<00:46,  3.30it/s]

Step: 846, Loss: 5.0236249080626294e-05


 85%|████████▍ | 847/1000 [04:15<00:46,  3.30it/s]

Step: 847, Loss: 4.89777339680586e-05


 85%|████████▍ | 848/1000 [04:16<00:45,  3.30it/s]

Step: 848, Loss: 4.912056101602502e-05


 85%|████████▍ | 849/1000 [04:16<00:45,  3.30it/s]

Step: 849, Loss: 4.946529952576384e-05


 85%|████████▌ | 850/1000 [04:16<00:45,  3.31it/s]

Step: 850, Loss: 4.827846714761108e-05


 85%|████████▌ | 851/1000 [04:16<00:45,  3.30it/s]

Step: 851, Loss: 4.7845536755630746e-05


 85%|████████▌ | 852/1000 [04:17<00:44,  3.31it/s]

Step: 852, Loss: 4.7221677959896624e-05


 85%|████████▌ | 853/1000 [04:17<00:44,  3.32it/s]

Step: 853, Loss: 4.6405613829847425e-05


 85%|████████▌ | 854/1000 [04:17<00:43,  3.32it/s]

Step: 854, Loss: 4.700342833530158e-05


 86%|████████▌ | 855/1000 [04:18<00:43,  3.33it/s]

Step: 855, Loss: 4.7685298341093585e-05


 86%|████████▌ | 856/1000 [04:18<00:43,  3.33it/s]

Step: 856, Loss: 4.583773988997564e-05


 86%|████████▌ | 857/1000 [04:18<00:43,  3.32it/s]

Step: 857, Loss: 4.51666783192195e-05


 86%|████████▌ | 858/1000 [04:19<00:42,  3.31it/s]

Step: 858, Loss: 4.3577951146289706e-05


 86%|████████▌ | 859/1000 [04:19<00:42,  3.31it/s]

Step: 859, Loss: 4.435221489984542e-05


 86%|████████▌ | 860/1000 [04:19<00:42,  3.31it/s]

Step: 860, Loss: 4.38306997239124e-05


 86%|████████▌ | 861/1000 [04:19<00:41,  3.31it/s]

Step: 861, Loss: 4.311544034862891e-05


 86%|████████▌ | 862/1000 [04:20<00:41,  3.31it/s]

Step: 862, Loss: 4.343803084339015e-05


 86%|████████▋ | 863/1000 [04:20<00:41,  3.31it/s]

Step: 863, Loss: 4.3926014768658206e-05


 86%|████████▋ | 864/1000 [04:20<00:41,  3.31it/s]

Step: 864, Loss: 4.452306166058406e-05


 86%|████████▋ | 865/1000 [04:21<00:40,  3.31it/s]

Step: 865, Loss: 4.131121386308223e-05


 87%|████████▋ | 866/1000 [04:21<00:40,  3.30it/s]

Step: 866, Loss: 4.0236671338789165e-05


 87%|████████▋ | 867/1000 [04:21<00:40,  3.31it/s]

Step: 867, Loss: 4.577814252115786e-05


 87%|████████▋ | 868/1000 [04:22<00:39,  3.32it/s]

Step: 868, Loss: 4.124231054447591e-05


 87%|████████▋ | 869/1000 [04:22<00:39,  3.32it/s]

Step: 869, Loss: 4.1204883018508554e-05


 87%|████████▋ | 870/1000 [04:22<00:39,  3.31it/s]

Step: 870, Loss: 4.592802361003123e-05


 87%|████████▋ | 871/1000 [04:22<00:38,  3.31it/s]

Step: 871, Loss: 3.980625842814334e-05


 87%|████████▋ | 872/1000 [04:23<00:38,  3.31it/s]

Step: 872, Loss: 4.194642315269448e-05


 87%|████████▋ | 873/1000 [04:23<00:38,  3.31it/s]

Step: 873, Loss: 4.096484190085903e-05


 87%|████████▋ | 874/1000 [04:23<00:38,  3.30it/s]

Step: 874, Loss: 3.902205571648665e-05


 88%|████████▊ | 875/1000 [04:24<00:37,  3.30it/s]

Step: 875, Loss: 3.726970317075029e-05


 88%|████████▊ | 876/1000 [04:24<00:37,  3.31it/s]

Step: 876, Loss: 3.929374724975787e-05


 88%|████████▊ | 877/1000 [04:24<00:37,  3.30it/s]

Step: 877, Loss: 3.6339588405098766e-05


 88%|████████▊ | 878/1000 [04:25<00:36,  3.30it/s]

Step: 878, Loss: 3.7252222682582214e-05


 88%|████████▊ | 879/1000 [04:25<00:36,  3.29it/s]

Step: 879, Loss: 3.610011117416434e-05


 88%|████████▊ | 880/1000 [04:25<00:36,  3.30it/s]

Step: 880, Loss: 3.6563102185027674e-05


 88%|████████▊ | 881/1000 [04:26<00:36,  3.30it/s]

Step: 881, Loss: 3.612854197854176e-05


 88%|████████▊ | 882/1000 [04:26<00:35,  3.30it/s]

Step: 882, Loss: 3.720019230968319e-05


 88%|████████▊ | 883/1000 [04:26<00:35,  3.30it/s]

Step: 883, Loss: 3.7342673749662936e-05


 88%|████████▊ | 884/1000 [04:26<00:35,  3.30it/s]

Step: 884, Loss: 3.399084380362183e-05


 88%|████████▊ | 885/1000 [04:27<00:34,  3.30it/s]

Step: 885, Loss: 3.426515468163416e-05


 89%|████████▊ | 886/1000 [04:27<00:34,  3.31it/s]

Step: 886, Loss: 3.4381555451545864e-05


 89%|████████▊ | 887/1000 [04:27<00:34,  3.30it/s]

Step: 887, Loss: 3.5271219530841336e-05


 89%|████████▉ | 888/1000 [04:28<00:33,  3.30it/s]

Step: 888, Loss: 3.32523813995067e-05


 89%|████████▉ | 889/1000 [04:28<00:33,  3.30it/s]

Step: 889, Loss: 3.6373126931721345e-05


 89%|████████▉ | 890/1000 [04:28<00:33,  3.30it/s]

Step: 890, Loss: 3.4110846172552556e-05


 89%|████████▉ | 891/1000 [04:29<00:33,  3.30it/s]

Step: 891, Loss: 3.2492920581717044e-05


 89%|████████▉ | 892/1000 [04:29<00:32,  3.30it/s]

Step: 892, Loss: 3.156623643008061e-05


 89%|████████▉ | 893/1000 [04:29<00:32,  3.30it/s]

Step: 893, Loss: 3.4167798730777577e-05


 89%|████████▉ | 894/1000 [04:29<00:32,  3.30it/s]

Step: 894, Loss: 3.2311589166056365e-05


 90%|████████▉ | 895/1000 [04:30<00:31,  3.30it/s]

Step: 895, Loss: 3.6557285056915134e-05


 90%|████████▉ | 896/1000 [04:30<00:31,  3.30it/s]

Step: 896, Loss: 3.0257126127253287e-05


 90%|████████▉ | 897/1000 [04:30<00:31,  3.30it/s]

Step: 897, Loss: 3.203344385838136e-05


 90%|████████▉ | 898/1000 [04:31<00:30,  3.30it/s]

Step: 898, Loss: 3.167886097799055e-05


 90%|████████▉ | 899/1000 [04:31<00:30,  3.30it/s]

Step: 899, Loss: 3.0446130040218122e-05


 90%|█████████ | 900/1000 [04:31<00:30,  3.30it/s]

Step: 900, Loss: 3.057122739846818e-05


 90%|█████████ | 901/1000 [04:32<00:30,  3.30it/s]

Step: 901, Loss: 3.2381052733398974e-05


 90%|█████████ | 902/1000 [04:32<00:29,  3.29it/s]

Step: 902, Loss: 3.0302509912871756e-05


 90%|█████████ | 903/1000 [04:32<00:29,  3.30it/s]

Step: 903, Loss: 2.9460357836796902e-05


 90%|█████████ | 904/1000 [04:32<00:29,  3.30it/s]

Step: 904, Loss: 2.9223156161606312e-05


 90%|█████████ | 905/1000 [04:33<00:28,  3.29it/s]

Step: 905, Loss: 2.9207823899923824e-05


 91%|█████████ | 906/1000 [04:33<00:28,  3.30it/s]

Step: 906, Loss: 2.7903464797418565e-05


 91%|█████████ | 907/1000 [04:33<00:28,  3.29it/s]

Step: 907, Loss: 2.8416239729267545e-05


 91%|█████████ | 908/1000 [04:34<00:27,  3.29it/s]

Step: 908, Loss: 2.8420474336599e-05


 91%|█████████ | 909/1000 [04:34<00:27,  3.30it/s]

Step: 909, Loss: 2.794485590129625e-05


 91%|█████████ | 910/1000 [04:34<00:27,  3.29it/s]

Step: 910, Loss: 2.829160075634718e-05


 91%|█████████ | 911/1000 [04:35<00:27,  3.29it/s]

Step: 911, Loss: 2.711214983719401e-05


 91%|█████████ | 912/1000 [04:35<00:26,  3.29it/s]

Step: 912, Loss: 2.654071431607008e-05


 91%|█████████▏| 913/1000 [04:35<00:26,  3.29it/s]

Step: 913, Loss: 2.7742620659410022e-05


 91%|█████████▏| 914/1000 [04:36<00:26,  3.30it/s]

Step: 914, Loss: 2.592001146695111e-05


 92%|█████████▏| 915/1000 [04:36<00:25,  3.30it/s]

Step: 915, Loss: 2.6015091862063855e-05


 92%|█████████▏| 916/1000 [04:36<00:25,  3.30it/s]

Step: 916, Loss: 2.6618676201906055e-05


 92%|█████████▏| 917/1000 [04:36<00:25,  3.29it/s]

Step: 917, Loss: 2.68584699369967e-05


 92%|█████████▏| 918/1000 [04:37<00:24,  3.30it/s]

Step: 918, Loss: 2.542191396059934e-05


 92%|█████████▏| 919/1000 [04:37<00:24,  3.30it/s]

Step: 919, Loss: 2.5054778234334663e-05


 92%|█████████▏| 920/1000 [04:37<00:24,  3.30it/s]

Step: 920, Loss: 2.5317014660686255e-05


 92%|█████████▏| 921/1000 [04:38<00:23,  3.30it/s]

Step: 921, Loss: 2.3852124286349863e-05


 92%|█████████▏| 922/1000 [04:38<00:23,  3.31it/s]

Step: 922, Loss: 2.3746371880406514e-05


 92%|█████████▏| 923/1000 [04:38<00:23,  3.32it/s]

Step: 923, Loss: 2.389883593423292e-05


 92%|█████████▏| 924/1000 [04:39<00:22,  3.32it/s]

Step: 924, Loss: 2.4601942641311325e-05


 92%|█████████▎| 925/1000 [04:39<00:22,  3.33it/s]

Step: 925, Loss: 2.467139893269632e-05


 93%|█████████▎| 926/1000 [04:39<00:22,  3.32it/s]

Step: 926, Loss: 2.3232447347254492e-05


 93%|█████████▎| 927/1000 [04:39<00:22,  3.31it/s]

Step: 927, Loss: 2.2750806238036603e-05


 93%|█████████▎| 928/1000 [04:40<00:21,  3.32it/s]

Step: 928, Loss: 2.249355020467192e-05


 93%|█████████▎| 929/1000 [04:40<00:21,  3.31it/s]

Step: 929, Loss: 2.1995552742737345e-05


 93%|█████████▎| 930/1000 [04:40<00:21,  3.30it/s]

Step: 930, Loss: 2.29769830184523e-05


 93%|█████████▎| 931/1000 [04:41<00:20,  3.30it/s]

Step: 931, Loss: 2.2576852643396705e-05


 93%|█████████▎| 932/1000 [04:41<00:20,  3.31it/s]

Step: 932, Loss: 2.1442619981826283e-05


 93%|█████████▎| 933/1000 [04:41<00:20,  3.31it/s]

Step: 933, Loss: 2.2910588086233474e-05


 93%|█████████▎| 934/1000 [04:42<00:19,  3.30it/s]

Step: 934, Loss: 2.3039681764203124e-05


 94%|█████████▎| 935/1000 [04:42<00:19,  3.30it/s]

Step: 935, Loss: 2.0926860088366084e-05


 94%|█████████▎| 936/1000 [04:42<00:19,  3.31it/s]

Step: 936, Loss: 2.089725421683397e-05


 94%|█████████▎| 937/1000 [04:42<00:19,  3.31it/s]

Step: 937, Loss: 2.073724681395106e-05


 94%|█████████▍| 938/1000 [04:43<00:18,  3.32it/s]

Step: 938, Loss: 2.051547562587075e-05


 94%|█████████▍| 939/1000 [04:43<00:18,  3.32it/s]

Step: 939, Loss: 2.182140997319948e-05


 94%|█████████▍| 940/1000 [04:43<00:18,  3.33it/s]

Step: 940, Loss: 2.0081319235032424e-05


 94%|█████████▍| 941/1000 [04:44<00:17,  3.31it/s]

Step: 941, Loss: 2.0516088625299744e-05


 94%|█████████▍| 942/1000 [04:44<00:17,  3.31it/s]

Step: 942, Loss: 2.1644966182066128e-05


 94%|█████████▍| 943/1000 [04:44<00:17,  3.31it/s]

Step: 943, Loss: 1.9608747606980614e-05


 94%|█████████▍| 944/1000 [04:45<00:16,  3.31it/s]

Step: 944, Loss: 1.9803648683591746e-05


 94%|█████████▍| 945/1000 [04:45<00:16,  3.30it/s]

Step: 945, Loss: 1.9294469893793575e-05


 95%|█████████▍| 946/1000 [04:45<00:16,  3.30it/s]

Step: 946, Loss: 1.901503128465265e-05


 95%|█████████▍| 947/1000 [04:45<00:16,  3.30it/s]

Step: 947, Loss: 1.9273073121439666e-05


 95%|█████████▍| 948/1000 [04:46<00:15,  3.30it/s]

Step: 948, Loss: 1.8434222511132248e-05


 95%|█████████▍| 949/1000 [04:46<00:15,  3.28it/s]

Step: 949, Loss: 1.838959360611625e-05


 95%|█████████▌| 950/1000 [04:46<00:15,  3.28it/s]

Step: 950, Loss: 1.8752265532384627e-05


 95%|█████████▌| 951/1000 [04:47<00:14,  3.29it/s]

Step: 951, Loss: 1.899664493976161e-05


 95%|█████████▌| 952/1000 [04:47<00:14,  3.29it/s]

Step: 952, Loss: 1.766368222888559e-05


 95%|█████████▌| 953/1000 [04:47<00:14,  3.30it/s]

Step: 953, Loss: 1.8019292838289402e-05


 95%|█████████▌| 954/1000 [04:48<00:13,  3.30it/s]

Step: 954, Loss: 1.8426086171530187e-05


 96%|█████████▌| 955/1000 [04:48<00:13,  3.30it/s]

Step: 955, Loss: 1.7191692677442916e-05


 96%|█████████▌| 956/1000 [04:48<00:13,  3.31it/s]

Step: 956, Loss: 1.8533079128246754e-05


 96%|█████████▌| 957/1000 [04:49<00:12,  3.32it/s]

Step: 957, Loss: 1.727910239424091e-05


 96%|█████████▌| 958/1000 [04:49<00:12,  3.31it/s]

Step: 958, Loss: 1.7212534658028744e-05


 96%|█████████▌| 959/1000 [04:49<00:12,  3.32it/s]

Step: 959, Loss: 1.6995361875160597e-05


 96%|█████████▌| 960/1000 [04:49<00:12,  3.31it/s]

Step: 960, Loss: 1.7259413652936928e-05


 96%|█████████▌| 961/1000 [04:50<00:11,  3.31it/s]

Step: 961, Loss: 1.751239142322447e-05


 96%|█████████▌| 962/1000 [04:50<00:11,  3.31it/s]

Step: 962, Loss: 1.6428728486062028e-05


 96%|█████████▋| 963/1000 [04:50<00:11,  3.31it/s]

Step: 963, Loss: 1.677878390182741e-05


 96%|█████████▋| 964/1000 [04:51<00:10,  3.31it/s]

Step: 964, Loss: 1.6211784895858727e-05


 96%|█████████▋| 965/1000 [04:51<00:10,  3.30it/s]

Step: 965, Loss: 1.5702646123827435e-05


 97%|█████████▋| 966/1000 [04:51<00:10,  3.30it/s]

Step: 966, Loss: 1.6700099877198227e-05


 97%|█████████▋| 967/1000 [04:52<00:09,  3.30it/s]

Step: 967, Loss: 1.5601721315761097e-05


 97%|█████████▋| 968/1000 [04:52<00:09,  3.30it/s]

Step: 968, Loss: 1.638739377085585e-05


 97%|█████████▋| 969/1000 [04:52<00:09,  3.30it/s]

Step: 969, Loss: 1.6552547094761394e-05


 97%|█████████▋| 970/1000 [04:52<00:09,  3.31it/s]

Step: 970, Loss: 1.5242693734762724e-05


 97%|█████████▋| 971/1000 [04:53<00:08,  3.32it/s]

Step: 971, Loss: 1.4951315279176924e-05


 97%|█████████▋| 972/1000 [04:53<00:08,  3.31it/s]

Step: 972, Loss: 1.5686244296375662e-05


 97%|█████████▋| 973/1000 [04:53<00:08,  3.32it/s]

Step: 973, Loss: 1.4850240404484794e-05


 97%|█████████▋| 974/1000 [04:54<00:07,  3.32it/s]

Step: 974, Loss: 1.4117166756477673e-05


 98%|█████████▊| 975/1000 [04:54<00:07,  3.31it/s]

Step: 975, Loss: 1.4807232219027355e-05


 98%|█████████▊| 976/1000 [04:54<00:07,  3.31it/s]

Step: 976, Loss: 1.4190334695740603e-05


 98%|█████████▊| 977/1000 [04:55<00:06,  3.32it/s]

Step: 977, Loss: 1.4327046301332302e-05


 98%|█████████▊| 978/1000 [04:55<00:06,  3.32it/s]

Step: 978, Loss: 1.397966389049543e-05


 98%|█████████▊| 979/1000 [04:55<00:06,  3.32it/s]

Step: 979, Loss: 1.4307214769360144e-05


 98%|█████████▊| 980/1000 [04:55<00:06,  3.32it/s]

Step: 980, Loss: 1.370034533465514e-05


 98%|█████████▊| 981/1000 [04:56<00:05,  3.32it/s]

Step: 981, Loss: 1.3779502296529245e-05


 98%|█████████▊| 982/1000 [04:56<00:05,  3.32it/s]

Step: 982, Loss: 1.3391563697950915e-05


 98%|█████████▊| 983/1000 [04:56<00:05,  3.32it/s]

Step: 983, Loss: 1.4346836906042881e-05


 98%|█████████▊| 984/1000 [04:57<00:04,  3.31it/s]

Step: 984, Loss: 1.4158467820379883e-05


 98%|█████████▊| 985/1000 [04:57<00:04,  3.31it/s]

Step: 985, Loss: 1.3309864698385354e-05


 99%|█████████▊| 986/1000 [04:57<00:04,  3.32it/s]

Step: 986, Loss: 1.3805859452986624e-05


 99%|█████████▊| 987/1000 [04:58<00:03,  3.30it/s]

Step: 987, Loss: 1.3092685549054295e-05


 99%|█████████▉| 988/1000 [04:58<00:03,  3.29it/s]

Step: 988, Loss: 1.328112466580933e-05


 99%|█████████▉| 989/1000 [04:58<00:03,  3.29it/s]

Step: 989, Loss: 1.2995699762541335e-05


 99%|█████████▉| 990/1000 [04:58<00:03,  3.29it/s]

Step: 990, Loss: 1.3388536899583414e-05


 99%|█████████▉| 991/1000 [04:59<00:02,  3.28it/s]

Step: 991, Loss: 1.2387923561618663e-05


 99%|█████████▉| 992/1000 [04:59<00:02,  3.28it/s]

Step: 992, Loss: 1.2374139259918593e-05


 99%|█████████▉| 993/1000 [04:59<00:02,  3.30it/s]

Step: 993, Loss: 1.2373382560326718e-05


 99%|█████████▉| 994/1000 [05:00<00:01,  3.30it/s]

Step: 994, Loss: 1.2059212167514488e-05


100%|█████████▉| 995/1000 [05:00<00:01,  3.30it/s]

Step: 995, Loss: 1.1544240805960726e-05


100%|█████████▉| 996/1000 [05:00<00:01,  3.30it/s]

Step: 996, Loss: 1.1742372407752555e-05


100%|█████████▉| 997/1000 [05:01<00:00,  3.29it/s]

Step: 997, Loss: 1.3008901078137569e-05


100%|█████████▉| 998/1000 [05:01<00:00,  3.30it/s]

Step: 998, Loss: 1.1642241588560864e-05


100%|█████████▉| 999/1000 [05:01<00:00,  3.30it/s]

Step: 999, Loss: 1.2179474651929922e-05


100%|██████████| 1000/1000 [05:02<00:00,  3.31it/s]

Step: 1000, Loss: 1.1905620340257883e-05





In [None]:
transformer.eval()
src_sample = torch.zeros(50, dtype=torch.int64)
src_sample[:10] = torch.arange(833, 843, dtype=torch.int64)

In [None]:
src_sample.unsqueeze(0)

tensor([[833, 834, 835, 836, 837, 838, 839, 840, 841, 842,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0]])

In [None]:
res = transformer(src_sample.unsqueeze(0).to(device), src_sample.unsqueeze(0).to(device))

In [None]:
res.squeeze().argmax(dim=1)

tensor([834, 835, 836, 837, 838, 839, 840, 841, 842, 843,  47,  47,  47,  47,
         47,  47,  47,  47,  47,  47,  47,  47,  47,  47,  47,  47,  47,  47,
         47,  47,  47,  47,  47,  47, 250, 250, 250, 250, 250, 250, 250, 250,
        250, 250,  47,  47,  47, 250, 250, 250], device='cuda:0')

In [None]:
# Вспомогательная функция для генерации последовательности по заданным параметрам
def generate_sequence(start, length):
    return torch.arange(start, start + length, dtype=torch.int64)

def evaluate_single_sequence(transformer):
    enc_src = generate_sequence(1, 50)
    dec_src = generate_sequence(50, 50)
    enc_src_tensor = enc_src.unsqueeze(0)  # Добавляем размерность пакета
    dec_src_tensor = dec_src.unsqueeze(0)  # Добавляем размерность пакета

    enc_src_tensor, dec_src_tensor = enc_src_tensor.to(device), dec_src_tensor.to(device)

    # Пропускаем входные последовательности через модель
    output = transformer(enc_src_tensor, dec_src_tensor)

    # Генерируем таргет
    trg = generate_sequence(51, 50)

    # Конвертируем тензор вывода в последовательность
    predicted_sequence = torch.argmax(output, dim=-1).squeeze().tolist()

    # Выводим входную, предсказанную и целевую последовательности
    print("Входная последовательность (enc_src):", enc_src)
    print("Входная последовательность (dec_src):", dec_src)
    print("Предсказанная последовательность:", predicted_sequence)
    print("Целевая последовательность:", trg)
    print("=" * 50)

# Вызов функции оценки для отдельной последовательности
evaluate_single_sequence(transformer)


Входная последовательность (enc_src): tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
        37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])
Входная последовательность (dec_src): tensor([50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
        68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
        86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])
Предсказанная последовательность: [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]
Целевая последовательность: tensor([ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
         79,  80,  81,  82,  83,  84,  85,  86,  87,  