<a href="https://colab.research.google.com/github/NiloyPurkait/GSoC-2020/blob/master/RDF2Txt_GraphEncoder_RNNDecoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install sentencepiece



In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals


import pickle
import math
import os
import re
import unicodedata
from functools import reduce
import numpy as np
from tqdm import tqdm
import time
import sentencepiece as spm

import tensorflow as tf
tf.compat.v1.enable_eager_execution()
#import tensorflow.contrib.slim as slim


In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:

#tiny number
_NEG_INF = -1e9

## Preprocessing Helper functions 


In [0]:



def _tensorize(vocab, text):
    """
    Function to convert texts into number sequences first, and then
    add padding. Basically, tensorising them.
    :param vocab: The vocab which is used to lookup ids
    :type vocab: tf.tokenizer obj
    :param text: A list of sentences or a text file
    :type text: list
    :return: tensorised text data
    :rtype: tf.tensor
    """
    tensor = vocab.texts_to_sequences(text)
    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,
                                                           padding='post')

    return tensor


In [0]:
def max_length(tensor):
    return max(len(t) for t in tensor)

In [0]:

def padding(tensor, max_length):
    """
    Pads the given tensor to a maximum sequence length along
    axis 1.
    for ex -
    let the tensor be [1,2,3,4] if th given max_length is 5
    the tensor becomes [1,2,34,0]
    Mostly used to pad the target sentences of the multilingual
    model and the node_list of all models,

    :param tensor:A tf tensor
    :type tensor:tf.tensor
    :param max_length:Dimension along axis 1, of the new tensor
    :type max_length:int
    :return:The padded tensor
    :rtype:tf tensor.
    """

    padding = tf.constant([[0, 0], [0, max_length - tensor.shape[1]]])
    padded_tensor = tf.pad(tensor, padding, mode='CONSTANT')

    return padded_tensor

In [0]:
# from src.DataLoader imports

## Dataset Loading Functions

In [0]:
def LoadGatDataset(train_path, eval_path, test_path, srv_vocab,
                   tgt_vocab, opt, sentencepiece, lang, num_examples=None):
    train_ = {}
    eval_ = {}
    test_ = {}

    if opt == 'reif':
        # load the train and eval datasets
        with open(train_path, 'rb') as f:
            train_set = pickle.load(f)
        with open(eval_path, 'rb') as f:
            eval_set = pickle.load(f)
        with open(test_path, 'rb') as f:
            test_set = pickle.load(f)

        # load vocab
        if sentencepiece == 'True':
            sp = spm.SentencePieceProcessor()
            sp.load(tgt_vocab)
        with open(srv_vocab, 'rb') as f:
            src_vocab = pickle.load(f)

        train_input, train_tgt = zip(*train_set)
        eval_input, eval_tgt = zip(*eval_set)
        (train_nodes, train_labels, train_node1, train_node2) = zip(*train_input)
        (eval_nodes, eval_labels, eval_node1, eval_node2) = zip(*eval_input)
        (test_nodes, test_labels, test_node1, test_node2) = zip(*test_set)

        train_["train_node_tensor"] = _tensorize(src_vocab, train_nodes)
        train_["train_label_tensor"] = _tensorize(src_vocab, train_labels)
        train_["train_node1_tensor"] = _tensorize(src_vocab, train_node1)
        train_["train_node2_tensor"] = _tensorize(src_vocab, train_node2)

        eval_["eval_node_tensor"] = _tensorize(src_vocab, eval_nodes)
        eval_["eval_label_tensor"] = _tensorize(src_vocab, eval_labels)
        eval_["eval_node1_tensor"] = _tensorize(src_vocab, eval_node1)
        eval_["eval_node2_tensor"] = _tensorize(src_vocab, eval_node2)

        test_["test_node_tensor"] = _tensorize(src_vocab, test_nodes)
        test_["test_label_tensor"] = _tensorize(src_vocab, test_labels)
        test_["test_node1_tensor"] = _tensorize(src_vocab, test_node1)
        test_["test_node2_tensor"] = _tensorize(src_vocab, test_node1)

        #######exp######
        if sentencepiece == 'True':
            train_tgt_tensor = [sp.encode_as_ids(w) for w in train_tgt]
            train_["train_tgt_tensor"] = tf.keras.preprocessing.sequence.pad_sequences(train_tgt_tensor, padding='post')
            eval_tgt_tensor = [sp.encode_as_ids(w) for w in eval_tgt]
            eval_["eval_tgt_tensor"] = tf.keras.preprocessing.sequence.pad_sequences(eval_tgt_tensor, padding='post')
            target_vocab = sp
        else:
            train_tgt_tensor = src_vocab.texts_to_sequences(train_tgt)
            train_["train_tgt_tensor"] = tf.keras.preprocessing.sequence.pad_sequences(train_tgt_tensor, padding='post')
            eval_tgt_tensor = src_vocab.texts_to_sequences(eval_tgt)
            eval_["eval_tgt_tensor"] = tf.keras.preprocessing.sequence.pad_sequences(eval_tgt_tensor, padding='post')
            target_vocab = src_vocab

        return (train_, eval_, test_, src_vocab, target_vocab, max_length(train_tgt_tensor))

    else:
        # load the train and eval datasets
        with open(train_path, 'rb') as f:
            train_set = pickle.loads(f)
        with open(eval_path, 'rb') as f:
            eval_set = pickle.loads(f)
        # load vocab
        with open(srv_vocab, 'rb') as f:
            src_vocab = pickle.loads(f)

        train_input, train_tgt = zip(*train_set)
        eval_input, eval_tgt = zip(*eval_set)
        (train_adj, train_nodes, train_roles, train_edges) = zip(*train_input)
        (eval_adj, eval_nodes, eval_roles, eval_edges) = zip(*eval_input)

        train_node_tensor = _tensorize(src_vocab, train_nodes)
        train_role_tensor = _tensorize(src_vocab, train_roles)
        train_edges_tensor = _tensorize(src_vocab, train_edges)
        train_tgt_tensor = _tensorize(src_vocab, train_tgt)

        eval_node_tensor = _tensorize(src_vocab, eval_nodes)
        eval_role_tensor = _tensorize(src_vocab, eval_roles)
        eval_edges_tensor = _tensorize(src_vocab, eval_edges)

        return (train_adj, train_node_tensor, train_role_tensor, train_edges_tensor, train_tgt_tensor, eval_adj,
                eval_node_tensor, eval_role_tensor, eval_edges_tensor, src_vocab, max_length(train_tgt_tensor))




In [0]:
def GetGATDataset(train_path, eval_path,
                  test_path, src_vocab,
                  tgt_vocab, opt,
                  sentencepiece, lang,
                  set=None):
  
    if opt == 'reif':
        (train, eval, test, src_vocab, tgt_vocab, max_length_targ) = LoadGatDataset(train_path,
                                                                                    eval_path,
                                                                                    test_path, src_vocab,
                                                                                    tgt_vocab, opt,
                                                                                    sentencepiece, lang)

        node_tensor = padding(train["train_node_tensor"], 16)
        label_tensor = padding(train["train_label_tensor"], 16)
        node1_tensor = padding(train["train_node1_tensor"], 16)
        node2_tensor = padding(train["train_node2_tensor"], 16)

        eval_nodes = padding(eval["eval_node_tensor"], 16)
        eval_labels = padding(eval["eval_label_tensor"], 16)
        eval_node1 = padding(eval["eval_node1_tensor"], 16)
        eval_node2 = padding(eval["eval_node2_tensor"], 16)

        test_nodes = padding(test["test_node_tensor"], 16)
        test_labels = padding(test["test_label_tensor"], 16)
        test_node1 = padding(test["test_node1_tensor"], 16)
        test_node2 = padding(test["test_node2_tensor"], 16)

        print('\nTrain Tensor shapes (nodes, labels, node1, node2, target) : ')
        print(node_tensor.shape, label_tensor.shape, node1_tensor.shape, node2_tensor.shape, train["train_tgt_tensor"].shape)
        print('\nEval Tensor shapes (nodes, labes, node1, node2) : ')
        print(eval_nodes.shape, eval_labels.shape, eval_node1.shape, eval_node2.shape, eval["eval_tgt_tensor"].shape)
        print('\nTest Tensor shapes (nodes, labes, node1, node2) : ')
        print(test_nodes.shape, test_labels.shape, test_node1.shape, test_node2.shape)

        TRAIN_BUFFER_SIZE = len(train["train_tgt_tensor"])
        EVAL_BUFFER_SIZE = len(eval["eval_tgt_tensor"])
        BATCH_SIZE = batch_size
        steps_per_epoch = len(train["train_tgt_tensor"]) // BATCH_SIZE
        src_vocab_size = len(src_vocab.word_index) + 1
        if sentencepiece == 'True':
            tgt_vocab_size = tgt_vocab.get_piece_size()
        else:
            tgt_vocab_size = len(tgt_vocab.word_index) + 1

        dataset_size = train["train_tgt_tensor"].shape[0]

        dataset = tf.data.Dataset.from_tensor_slices((node_tensor, label_tensor,
                                                      node1_tensor, node2_tensor, train["train_tgt_tensor"])).shuffle(TRAIN_BUFFER_SIZE)
        dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

        eval_set = tf.data.Dataset.from_tensor_slices((eval_nodes, eval_labels,
                                                       eval_node1, eval_node2, eval["eval_tgt_tensor"])).shuffle(EVAL_BUFFER_SIZE)
        eval_set = eval_set.batch(BATCH_SIZE, drop_remainder=True)

        test_set = tf.data.Dataset.from_tensor_slices((test_nodes, test_labels,
                                                       test_node1, test_node2))
        test_set = test_set.batch(BATCH_SIZE, drop_remainder=True)

        if set == None:
            return (dataset, eval_set, test_set, TRAIN_BUFFER_SIZE, BATCH_SIZE, steps_per_epoch,
                    src_vocab_size, src_vocab, tgt_vocab_size, tgt_vocab,
                    max_length_targ, dataset_size)
        elif set == 'test':
            return (test_set, TRAIN_BUFFER_SIZE, BATCH_SIZE, steps_per_epoch,
                    src_vocab_size, src_vocab, tgt_vocab_size, tgt_vocab)

  



## Load processed Dataset

In [0]:

train_path = '/content/gdrive/My Drive/data/processed_graphs/eng/gat/reif_train'
eval_path = '/content/gdrive/My Drive/data/processed_graphs/eng/gat/reif_eval'
test_path = '/content/gdrive/My Drive/data/processed_graphs/eng/gat/reif_test'
src_vocab = '/content/gdrive/My Drive/data/processed_graphs/eng/gat/reif_src_vocab'
tgt_vocab =  '/content/gdrive/My Drive/data/processed_graphs/eng/gat/train_vocab.model'

opt = 'reif'
sentencepiece = True
lang = 'eng'
batch_size = 16

In [12]:
(dataset, eval_set, test_set, BUFFER_SIZE, BATCH_SIZE, steps_per_epoch,
     src_vocab_size, src_vocab, tgt_vocab_size, tgt_vocab, max_length_targ, dataset_size) = GetGATDataset(train_path, eval_path,
                                                                                                     test_path, src_vocab,
                                                                                                      tgt_vocab, opt,
                                                                                                      sentencepiece, lang)


Train Tensor shapes (nodes, labels, node1, node2, target) : 
(34352, 16) (34352, 16) (34352, 16) (34352, 16) (34352, 82)

Eval Tensor shapes (nodes, labes, node1, node2) : 
(34352, 16) (34352, 16) (34352, 16) (34352, 16) (34352, 82)

Test Tensor shapes (nodes, labes, node1, node2) : 
(1600, 16) (1600, 16) (1600, 16) (1600, 16)


## Probe the loaded dataset

In [0]:
print('Training batches have %d tensors'%len(next(iter(dataset))))

In [14]:
tgt_vocab_size

(10013, 10013)

In [15]:
 dataset_size, BUFFER_SIZE, BATCH_SIZE, steps_per_epoch

(34352, 34352, 16, 2147)

In [17]:
def least_frequent(word_count_dic, fq=200):
   return sorted([(i,word_count_dic[i]) for i in word_count_dic.keys() if word_count_dic[i] <= fq],
       key = lambda x : x[1],
       reverse=True)[:5]

def most_frequent(word_count_dic, fq=1000):
  return sorted([(i,word_count_dic[i]) for i in word_count_dic.keys() if word_count_dic[i] >= fq],
       key = lambda x : x[1],
       reverse=True)[:5]


[('3800.0', 200),
 ('raúl_fernando_sendic_rodríguez', 200),
 ('"100305.0"(minutes)', 200),
 ('david_scott', 200),
 ('rashid_rakhimov', 200)]

In [0]:

least_frequent(tgt_vocab.word_counts, fq=200)

In [18]:


most_frequent(tgt_vocab.word_counts, fq=1000)

[('a_zero', 203362),
 ('a_one', 203362),
 ('.', 125116),
 ('the', 102384),
 ('is', 71868)]

## Implement Encoder Module Layers:
To assemble the encoder modules, we must first define the following layers:
- Shared Embedding Layer
- Graph Attention Layer
- Feed Forward Layer


### Define Embedding Layer

In [0]:

class EmbeddingSharedWeights(tf.keras.layers.Layer):
    """Calculates input embeddings and pre-softmax linear with shared weights."""

    def __init__(self, vocab_size, hidden_size):
        """Specify characteristic parameters of embedding layer.

        Args:
          vocab_size: Number of tokens in the embedding. (Typically ~32,000)
          hidden_size: Dimensionality of the embedding. (Typically 512 or 1024)
        """
        super(EmbeddingSharedWeights, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size

    def build(self, input_shape):
        """Build embedding layer."""
        with tf.name_scope("embedding_and_softmax"):
            # Create and initialize weights. The random normal initializer was chosen
            # arbitrarily, and works well.
            self.shared_weights = self.add_weight(
                "weights",
                shape=[self.vocab_size, self.hidden_size],
                dtype="float32",
                initializer=tf.random_normal_initializer(
                    mean=0., stddev=self.hidden_size ** -0.5))
        super(EmbeddingSharedWeights, self).build(input_shape)

    def get_config(self):
        return {
            "vocab_size": self.vocab_size,
            "hidden_size": self.hidden_size,
        }

    def call(self, inputs, mode="embedding"):
        """Get token embeddings of inputs.

        Args:
          inputs: An int64 tensor with shape [batch_size, length]
          mode: string, a valid value is one of "embedding" and "linear".
        Returns:
          outputs: (1) If mode == "embedding", output embedding tensor, float32 with
            shape [batch_size, length, embedding_size]; (2) mode == "linear", output
            linear tensor, float32 with shape [batch_size, length, vocab_size].
        Raises:
          ValueError: if mode is not valid.
        """
        if mode == "embedding":
            return self._embedding(inputs)
        elif mode == "linear":
            return self._linear(inputs)
        else:
            raise ValueError("mode {} is not valid.".format(mode))

    def _embedding(self, inputs):
        """Applies embedding based on inputs tensor."""
        with tf.name_scope("embedding"):
            # Create binary mask of size [batch_size, length]
            mask = tf.cast(tf.not_equal(inputs, 0), tf.float32)
            embeddings = tf.gather(self.shared_weights, inputs)
            embeddings *= tf.expand_dims(mask, -1)
            # Scale embedding by the sqrt of the hidden size
            embeddings *= self.hidden_size ** 0.5

            return embeddings

    def _linear(self, inputs):
        """Computes logits by running inputs through a linear layer.

        Args:
          inputs: A float32 tensor with shape [batch_size, length, hidden_size]
        Returns:
          float32 tensor with shape [batch_size, length, vocab_size].
        """
        with tf.name_scope("presoftmax_linear"):
            batch_size = tf.shape(inputs)[0]
            length = tf.shape(inputs)[1]

            x = tf.reshape(inputs, [-1, self.hidden_size])
            logits = tf.matmul(x, self.shared_weights, transpose_b=True)

            return tf.reshape(logits, [batch_size, length, self.vocab_size])



Define Graph Attention Layer


In [0]:
# Encoder layers = Embedding shared weights + GA Layer + FFN Layer


class GraphAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, dff, num_heads, reg_scale=0.001, rate=0.1):
        """
        Graph Attention Network Layer, takes input and returns embedded
        node features with self attention applied on the feature matrix
        """
        super(GraphAttentionLayer, self).__init__()
        self.in_dim = d_model
        self.out_dim = dff
        self.num_heads = num_heads
        self.dropout_rate = rate
        self.kernels = []
        self.biases = []
        self.attn_kernels = []

        self.lrelu = tf.keras.layers.LeakyReLU()
        self.dropout = tf.keras.layers.Dropout(rate)
        self.reg = tf.keras.regularizers.l2(l=reg_scale)

        for head in range(self.num_heads):
            kernel = self.add_weight(shape=(self.in_dim, self.out_dim),
                                     initializer='glorot_uniform',
                                     regularizer=self.reg,
                                     name='kernel_{}'.format(head))
            bias = self.add_weight(shape=(self.out_dim,),
                                   initializer='glorot_uniform',
                                   regularizer=self.reg,
                                   name='bias_{}'.format(head))
            self.kernels.append([kernel, bias])
            # Attention kernels
            attn_kernel_self = self.add_weight(shape=(self.out_dim, 1),
                                               initializer='glorot_uniform',
                                               regularizer=self.reg,
                                               name='attn_kernel_self_{}'.format(head))
            attn_kernel_neighs = self.add_weight(shape=(self.out_dim, 1),
                                                 initializer='glorot_uniform',
                                                 regularizer=self.reg,
                                                 name='attn_kernel_neigh_{}'.format(head))
            self.attn_kernels.append([attn_kernel_self, attn_kernel_neighs])

    def call(self, nodes):
        inputs = nodes

        outputs = []
        for head in range(self.num_heads):
            kernel = self.kernels[head]
            attention_kernel = self.attn_kernels[head]
            features = tf.keras.backend.dot(inputs, kernel[0])
            features = tf.add(features, kernel[1])
            attn_for_self = tf.keras.backend.dot(features, attention_kernel[0])
            attn_for_neighs = tf.keras.backend.dot(features, attention_kernel[1])
            # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
            dense = tf.matmul(attn_for_self, attn_for_neighs, transpose_b=True)
            dense = self.lrelu(dense)

            # Mask values before activation (Vaswani et al., 2017)
            # mask_local = -10e9 * (1.0 - adj)
            # dense += mask_local

            # Apply softmax to get attention coefficients
            dense = tf.math.softmax(dense)  # (N x N)

            # Apply dropout to features and attention coefficients
            if self.trainable:
                dense = self.dropout(dense)  # (N x N)
                features = self.dropout(features)  # (N x F')

            # Linear combination with neighbors' features
            node_features = tf.matmul(dense, features)  # (N x F')
            outputs.append(node_features)

        output = tf.reduce_mean(tf.stack(outputs), axis=0)  # N x F')
        output = tf.nn.relu(output)

        return output



### Define Feed-Forward Layer

In [0]:
# Feed forward layer

class FeedForwardNetwork(tf.keras.layers.Layer):
    """Fully connected feedforward network."""

    def __init__(self, hidden_size, filter_size, relu_dropout):
        """Initialize FeedForwardNetwork.

        Args:
          hidden_size: int, output dim of hidden layer.
          filter_size: int, filter size for the inner (first) dense layer.
          relu_dropout: float, dropout rate for training.
        """
        super(FeedForwardNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.filter_size = filter_size
        self.relu_dropout = relu_dropout

    def build(self, input_shape):
        self.filter_dense_layer = tf.keras.layers.Dense(
            self.filter_size,
            use_bias=True,
            activation=tf.nn.relu,
            name="filter_layer")
        self.output_dense_layer = tf.keras.layers.Dense(
            self.hidden_size, use_bias=True, name="output_layer")
        super(FeedForwardNetwork, self).build(input_shape)

    def get_config(self):
        return {
            "hidden_size": self.hidden_size,
            "filter_size": self.filter_size,
            "relu_dropout": self.relu_dropout,
        }

    def call(self, x, training):
        """Return outputs of the feedforward network.

        Args:
          x: tensor with shape [batch_size, length, hidden_size]
          training: boolean, whether in training mode or not.

        Returns:
          Output of the feedforward network.
          tensor with shape [batch_size, length, hidden_size]
        """
        # Retrieve dynamically known shapes
        batch_size = tf.shape(x)[0]
        length = tf.shape(x)[1]

        output = self.filter_dense_layer(x)
        if training:
            output = tf.nn.dropout(output, rate=self.relu_dropout)
        output = self.output_dense_layer(output)

        return output



## Assemble Graph Encoder module
We put together the encoder module using the defined layers above

In [0]:
# Graph Encoder Layer

class GraphEncoder(tf.keras.layers.Layer):
    def __init__(self, num_layers, d_model, num_heads, trainable, dff,
                 filter_size, reg_scale=0.001, rate=0.1):

        super(GraphEncoder, self).__init__()
        self.d_model = d_model
        self.num_layers = num_layers

        self.node_role_layer = tf.keras.layers.Dense(self.d_model, input_shape=(2 * d_model,))
        self.enc_layers = []
        for _ in range(num_layers):
            gat_layer = GraphAttentionLayer(d_model, dff, num_heads,
                                            reg_scale=reg_scale, rate=rate)
            ffn_layer = FeedForwardNetwork(dff, filter_size, rate)
            self.enc_layers.append([gat_layer, ffn_layer])

        self.dropout = tf.keras.layers.Dropout(rate)
        self.layernorm = tf.keras.layers.LayerNormalization()
        self.edge_layer = tf.keras.layers.Dense(self.d_model)
        self.trainable = trainable

    def call(self, node_tensor, label_tensor, node1_tensor, node2_tensor):
        # adding embedding and position encoding.

        edge_tensor = tf.concat([node1_tensor, node2_tensor], 2)
        edge_tensor = tf.cast(self.node_role_layer(edge_tensor), dtype=tf.float32)
        # node_tensor = tf.add(node_tensor, role_tensor)
        node_tensor *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        edge_tensor *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        # node_tensor += self.node_pos_enc[:, :node_seq_len, :]
        edges = self.edge_layer(tf.add(edge_tensor, label_tensor))


        for i, layer in enumerate(self.enc_layers):
            if i == 0:
                x = self.enc_layers[i][0](node_tensor)
                x = self.enc_layers[i][1](x, self.trainable)
                x += edges 
            else:
                shortcut = x
                x = self.enc_layers[i][0](x)
                x = self.enc_layers[i][1](x, self.trainable)
                x += edges
                x += shortcut

        return self.layernorm(x)

## Implement RNN Decoder Module Layers
To assemble the Decoder module, we will use the following layers:
- Embedding Layer
- Bidirectional GRU
- Bahanadu Attention Layer

In [0]:
# RNN Decoder =  embedding + birirect_GRU + BahanaduAttention

### Define Bahdanau Attention Layer

In [0]:
# Bahanadu Attention layer

class BahdanauAttention(tf.keras.Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        # hidden shape == (batch_size, hidden size)
        # hidden_with_time_axis shape == (batch_size, 1, hidden size)
        # we are doing this to perform addition to calculate the score
        hidden_with_time_axis = tf.expand_dims(query, 1)

        # score shape == (batch_size, max_length, hidden_size)
        score = self.V(tf.nn.tanh(
            self.W1(values) + self.W2(hidden_with_time_axis)))

        # attention_weights shape == (batch_size, max_length, 1)
        # we get 1 at the last axis because we are applying score to self.V
        attention_weights = tf.nn.softmax(score, axis=1)

        # context_vector shape after sum == (batch_size, hidden_size)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

### Assemble RNN decoder Module
We combine all the decoder layers under one class

In [0]:
# RNN Decoder
class RNNDecoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
        super(RNNDecoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.forward_gru = tf.keras.layers.GRU(self.dec_units,
                                                    return_sequences=True,
                                                    return_state=True,
                                                    go_backwards=False,
                                                    recurrent_initializer='glorot_uniform')
        self.backward_gru = tf.keras.layers.GRU(self.dec_units,
                                                     return_sequences=True,
                                                     return_state=True,
                                                     go_backwards=True,
                                                     recurrent_initializer='glorot_uniform')
        self.gru = tf.keras.layers.Bidirectional(self.forward_gru, backward_layer=self.backward_gru,
                                                 merge_mode='ave')
        self.fc = tf.keras.layers.Dense(vocab_size)

        # used for attention
        self.attention = BahdanauAttention(self.dec_units)
        self.layernorm = tf.keras.layers.LayerNormalization()

    def call(self, x, hidden, enc_output):
        # enc_output shape == (batch_size, max_length, hidden_size)
        context_vector, attention_weights = self.attention(hidden, enc_output)

        # x shape after passing through embedding == (batch_size, 1, embedding_dim)
        x = self.embedding(x)

        # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        # passing the concatenated vector to the GRU
        output = self.gru(x)
        output, state = output[0], output[2]
        # output shape == (batch_size * 1, hidden_size)
        output = tf.reshape(output, (-1, output.shape[2]))
        output = self.layernorm(output)

        # output shape == (batch_size, vocab)
        x = self.fc(output)

        return x, state, attention_weights



## Implement GAT Model
We put together the encoder and decoder modules from above to construct the Graph Attention model:

In [0]:
# GAT Model 

class GATModel(tf.keras.Model):
    """
    Model that uses Graph Attention encoder and RNN decoder (for now)
    """

    def __init__(self, 
                 enc_layers, enc_units,  emb_dim, num_heads,
                 hidden_size, filter_size, batch_size,  reg_scale,
                 dropout, src_vocab_size, tgt_vocab_size,
                 target_lang):
      
        super(GATModel, self).__init__()

        self.trainable = True
        self.emb_layer = EmbeddingSharedWeights(
            src_vocab_size, emb_dim)

        self.tgt_emb_layer = EmbeddingSharedWeights(
            tgt_vocab_size, emb_dim)

        self.encoder = GraphEncoder(enc_layers, emb_dim, num_heads, self.trainable,  hidden_size, # trainable=True
                                    filter_size, reg_scale=reg_scale, rate=dropout)
        self.decoder = RNNDecoder(tgt_vocab_size, emb_dim, enc_units, batch_size)
        self.vocab_tgt_size = tgt_vocab_size
        self.batch_size=batch_size
        self.num_heads = num_heads
        self.target_lang = target_lang
        self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
        self.hidden = tf.keras.layers.Dense(hidden_size)

    def __call__(self, nodes, labels, node1, node2, targ):
        """
        Puts the tensors through encoders and decoders
        :param adj: Adjacency matrices of input example
        :type adj: tf.tensor
        :param nodes: node features
        :type nodes: tf.tensor
        :param targ: target sequences
        :type targ: tf.tensor
        :return: output probability distribution
        :rtype: tf.tensor
        """
        node_tensor = tf.cast(self.emb_layer(nodes), dtype=tf.float32)
        label_tensor = tf.cast(self.emb_layer(labels), dtype=tf.float32)
        node1_tensor = tf.cast(self.emb_layer(node1), dtype=tf.float32)
        node2_tensor = tf.cast(self.emb_layer(node2), dtype=tf.float32)

        enc_output = self.encoder(node_tensor, label_tensor, node1_tensor, node2_tensor) # self.num_heads, self.encoder.trainable
        batch = enc_output.shape[0]
        self.enc_output_hidden = tf.reshape(enc_output, shape=[batch, -1])
        enc_hidden = self.hidden(self.enc_output_hidden)

        dec_input = tf.expand_dims([self.target_lang.word_index['<start>']] * self.batch_size, 1)
        loss = 0

        for t in range(1, targ.shape[1]):
            # pass encoder output to decoder
            predictions, dec_hidden, _ = self.decoder(dec_input, enc_hidden, enc_output)
            loss += loss_function(targ[:, t], predictions, self.loss_object)

            # using teacher forcing
            dec_input = tf.expand_dims(targ[:, t], 1)

        return predictions, dec_hidden, loss


## Define Loss function

In [0]:
def loss_function(real, pred, loss_object):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)


## Instantiate Model

In [0]:
'''
GATModel args: 

    enc_layers, enc_units,  emb_dim, num_heads,
    hidden_size, filter_size, batch_size,  reg_scale,
    dropout, src_vocab_size, tgt_vocab_size,
    target_lang

'''

model = GATModel(2, 64,  64, 2,
                 64, 16, 16,0.0,
                 0.2, src_vocab_size, tgt_vocab_size,
                 tgt_vocab)



## Test on sample output

In [32]:
#Example inputs and target batch for testing
nodes_, labels_, node1_, node2_, target_ = next(iter(dataset))

predictions, dec_hidden, loss = model(nodes_, labels_, node1_, node2_, target_)


for i in [predictions, dec_hidden]:
  print(i.shape) 

print(loss)


(16, 10013)
(16, 64)
tf.Tensor(225.53134, shape=(), dtype=float32)


## Define Train Step

In [0]:

def train_step(nodes, labels, node1, node2, targ):
    with tf.GradientTape() as tape:
        predictions, dec_hidden, loss = model(nodes, labels, node1, node2, targ)
        reg_loss = tf.reduce_sum(model.losses)
        loss += reg_loss
    batch_loss = (loss / int(targ.shape[1]))
    variables = model.trainable_variables
    gradients = tape.gradient(loss, variables)

    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss

## Define Eval step and helper functions for rogue_n score

In [0]:
def _get_ngrams(n, text):
    """Calculates n-grams.
    Args:
      n: which n-grams to calculate
      text: An array of tokens
    Returns:
      A set of n-grams
    """
    ngram_set = set()
    text_length = len(text)
    max_index_ngram_start = text_length - n
    for i in range(max_index_ngram_start + 1):
        ngram_set.add(tuple(text[i:i + n]))
    return ngram_set

In [0]:
def rouge_n(eval_sentences, ref_sentences, n=2):
    """Computes ROUGE-N f1 score of two text collections of sentences.
    Source: https://www.microsoft.com/en-us/research/publication/
    rouge-a-package-for-automatic-evaluation-of-summaries/
    Args:
      eval_sentences: The sentences that have been picked by the summarizer
      ref_sentences: The sentences from the reference set
      n: Size of ngram.  Defaults to 2.
    Returns:
      f1 score for ROUGE-N
    """

    f1_scores = []
    for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences):
        eval_ngrams = _get_ngrams(n, eval_sentence)
        ref_ngrams = _get_ngrams(n, ref_sentence)
        ref_count = len(ref_ngrams)
        eval_count = len(eval_ngrams)

        # Gets the overlapping ngrams between evaluated and reference
        overlapping_ngrams = eval_ngrams.intersection(ref_ngrams)
        overlapping_count = len(overlapping_ngrams)

        # Handle edge case. This isn't mathematically correct, but it's good enough
        if eval_count == 0:
            precision = 0.0
        else:
            precision = overlapping_count / eval_count

        if ref_count == 0:
            recall = 0.0
        else:
            recall = overlapping_count / ref_count

        f1_scores.append(2.0 * ((precision * recall) / (precision + recall + 1e-8)))

    # return overlapping_count / reference_count
    return np.mean(f1_scores, dtype=np.float32)

In [0]:
 def render(target):
    '''
    args:
      target_ : target tensor batch
      n = nth element in batch 

    usage:
      Takes in a tensor of (batch_size, vocab_size) and
      converts a batch instance into its string equivalent
    '''
    return [[tf.reduce_sum(i).numpy() for i in target_] for target_ in target]



In [0]:
def eval_step(EvalResultsFile, steps=5):

    reference = open(eval_path, 'r', encoding='ascii')
    model.trainable = False
    results = []
    ref_target = []
    eval_results = open(EvalResultsFile, 'w+')
    if steps is None:
        dev_set = eval_set
    else:
        dev_set = eval_set.take(steps)

    for (batch, (nodes, labels, node1, node2, targets)) in tqdm(enumerate(dev_set)):
        predictions = model(nodes, labels, node1,
                            node2, targets)
        pred = [(predictions[0].numpy().tolist())]


        for i in pred:
            sentences = tgt_vocab.sequences_to_texts(i)
            sentence = [j.partition("<start>")[2].partition("<end>")[0] for j in sentences]
            for w in sentence:
                eval_results.write((w + '\n'))
                ref_target.append(reference.readline())
                results.append(w)

    rogue = (rouge_n(results, ref_target))
    eval_results.close()
    model.trainable = True

    return rogue


## Set Model variables

In [0]:
epochs = 10
batch_size = 16

optimizer = tf.keras.optimizers.Adam()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

## Setup checkpoint manager

In [101]:

checkpoint_dir = './gdrive/My Drive/RDF_GAT_TF/training_checkpoints'

checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")

checkpoint = tf.train.Checkpoint(optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=5)




## Define Loss and accuracy objects

In [0]:
train_loss = tf.keras.metrics.Mean(name='train_loss')

train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

In [0]:
step=0
steps = epochs * steps_per_epoch
total_loss = 0
eval_file = './gdrive/My Drive/RDF_GAT_TF/eval_file.txt'

## Define training loop

In [0]:



for epoch in range(epochs):
    train_loss.reset_states()
    train_accuracy.reset_states()

    if ckpt_manager.latest_checkpoint:
        checkpoint.restore(ckpt_manager.latest_checkpoint)
        print('Latest checkpoint restored!!')

    # saving (checkpoint) the model every 2 epochs
    if (epoch + 1) % 2 == 0:
        print('- > Saving progress...')
        checkpoint.save(file_prefix = checkpoint_prefix)

    with tqdm(total=(38668 // batch_size)) as pbar:
        for (batch, (node_, label_, node1_, node2_, targ_)) in tqdm(enumerate(dataset)):
            start = time.time()
            step += 1

            batch_loss = train_step(node_, label_, node1_, node2_, targ_)
            print('Epoch {} Batch {} Batch Loss {:.4f} '.format(epoch, batch,
                                                                    batch_loss.numpy()))

            print('Time {} \n'.format(time.time() - start))
            pbar.update(1)

            #if batch % 5 ==0:
              #eval_step(eval_file)








  0%|          | 0/2416 [00:00<?, ?it/s][A[A[A[A[A





0it [00:00, ?it/s][A[A[A[A[A[A

Latest checkpoint restored!!







  0%|          | 1/2416 [00:02<2:00:09,  2.99s/it][A[A[A[A[A





1it [00:02,  2.96s/it][A[A[A[A[A[A

Epoch 0 Batch 0 Batch Loss 1.2630 
Time 2.89756441116333 








  0%|          | 2/2416 [00:05<1:54:37,  2.85s/it][A[A[A[A[A





2it [00:05,  2.83s/it][A[A[A[A[A[A

Epoch 0 Batch 1 Batch Loss 1.3270 
Time 2.5188369750976562 








  0%|          | 3/2416 [00:08<1:51:16,  2.77s/it][A[A[A[A[A





3it [00:08,  2.75s/it][A[A[A[A[A[A

Epoch 0 Batch 2 Batch Loss 1.3551 
Time 2.560530185699463 








  0%|          | 4/2416 [00:10<1:48:30,  2.70s/it][A[A[A[A[A





4it [00:10,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 3 Batch Loss 1.1326 
Time 2.5327465534210205 








  0%|          | 5/2416 [00:13<1:47:06,  2.67s/it][A[A[A[A[A





5it [00:13,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 4 Batch Loss 1.4066 
Time 2.5719799995422363 








  0%|          | 6/2416 [00:15<1:46:12,  2.64s/it][A[A[A[A[A





6it [00:15,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 5 Batch Loss 1.2804 
Time 2.5847809314727783 








  0%|          | 7/2416 [00:18<1:44:53,  2.61s/it][A[A[A[A[A





7it [00:18,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 6 Batch Loss 1.3745 
Time 2.5296897888183594 








  0%|          | 8/2416 [00:20<1:44:13,  2.60s/it][A[A[A[A[A





8it [00:20,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 7 Batch Loss 1.4135 
Time 2.546293020248413 








  0%|          | 9/2416 [00:24<1:50:22,  2.75s/it][A[A[A[A[A





9it [00:24,  2.75s/it][A[A[A[A[A[A

Epoch 0 Batch 8 Batch Loss 1.4128 
Time 3.09462308883667 








  0%|          | 10/2416 [00:26<1:49:14,  2.72s/it][A[A[A[A[A





10it [00:26,  2.72s/it][A[A[A[A[A[A

Epoch 0 Batch 9 Batch Loss 1.5145 
Time 2.6505227088928223 








  0%|          | 11/2416 [00:29<1:48:43,  2.71s/it][A[A[A[A[A





11it [00:29,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 10 Batch Loss 1.5721 
Time 2.6682419776916504 








  0%|          | 12/2416 [00:31<1:47:33,  2.68s/it][A[A[A[A[A





12it [00:31,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 11 Batch Loss 1.0943 
Time 2.6000137329101562 








  1%|          | 13/2416 [00:34<1:46:48,  2.67s/it][A[A[A[A[A





13it [00:34,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 12 Batch Loss 1.4570 
Time 2.6193604469299316 








  1%|          | 14/2416 [00:37<1:45:40,  2.64s/it][A[A[A[A[A





14it [00:37,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 13 Batch Loss 1.5444 
Time 2.570020914077759 








  1%|          | 15/2416 [00:39<1:45:01,  2.62s/it][A[A[A[A[A





15it [00:39,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 14 Batch Loss 1.2399 
Time 2.577866554260254 








  1%|          | 16/2416 [00:42<1:44:20,  2.61s/it][A[A[A[A[A





16it [00:42,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 15 Batch Loss 1.3136 
Time 2.5559439659118652 








  1%|          | 17/2416 [00:44<1:44:25,  2.61s/it][A[A[A[A[A





17it [00:44,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 16 Batch Loss 1.0801 
Time 2.604288101196289 








  1%|          | 18/2416 [00:47<1:44:05,  2.60s/it][A[A[A[A[A





18it [00:47,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 17 Batch Loss 1.0141 
Time 2.5761542320251465 








  1%|          | 19/2416 [00:50<1:49:10,  2.73s/it][A[A[A[A[A





19it [00:50,  2.73s/it][A[A[A[A[A[A

Epoch 0 Batch 18 Batch Loss 1.3009 
Time 3.016226291656494 








  1%|          | 20/2416 [00:53<1:47:10,  2.68s/it][A[A[A[A[A





20it [00:53,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 19 Batch Loss 1.3409 
Time 2.5561914443969727 








  1%|          | 21/2416 [00:55<1:46:08,  2.66s/it][A[A[A[A[A





21it [00:55,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 20 Batch Loss 1.1996 
Time 2.5873963832855225 








  1%|          | 22/2416 [00:58<1:44:53,  2.63s/it][A[A[A[A[A





22it [00:58,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 21 Batch Loss 1.6438 
Time 2.544529676437378 








  1%|          | 23/2416 [01:00<1:44:05,  2.61s/it][A[A[A[A[A





23it [01:00,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 22 Batch Loss 1.3737 
Time 2.555171251296997 








  1%|          | 24/2416 [01:03<1:43:34,  2.60s/it][A[A[A[A[A





24it [01:03,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 23 Batch Loss 1.1693 
Time 2.564314603805542 








  1%|          | 25/2416 [01:06<1:43:28,  2.60s/it][A[A[A[A[A





25it [01:06,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 24 Batch Loss 1.4110 
Time 2.5779976844787598 








  1%|          | 26/2416 [01:08<1:42:40,  2.58s/it][A[A[A[A[A





26it [01:08,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 25 Batch Loss 1.1846 
Time 2.52213191986084 








  1%|          | 27/2416 [01:11<1:42:31,  2.57s/it][A[A[A[A[A





27it [01:11,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 26 Batch Loss 1.1920 
Time 2.5550448894500732 








  1%|          | 28/2416 [01:13<1:42:29,  2.58s/it][A[A[A[A[A





28it [01:13,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 27 Batch Loss 1.4964 
Time 2.5585570335388184 








  1%|          | 29/2416 [01:16<1:42:47,  2.58s/it][A[A[A[A[A





29it [01:16,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 28 Batch Loss 1.3118 
Time 2.58463454246521 








  1%|          | 30/2416 [01:19<1:48:09,  2.72s/it][A[A[A[A[A





30it [01:19,  2.72s/it][A[A[A[A[A[A

Epoch 0 Batch 29 Batch Loss 1.2354 
Time 3.025479555130005 








  1%|▏         | 31/2416 [01:21<1:46:17,  2.67s/it][A[A[A[A[A





31it [01:21,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 30 Batch Loss 1.3967 
Time 2.555852174758911 








  1%|▏         | 32/2416 [01:24<1:45:05,  2.64s/it][A[A[A[A[A





32it [01:24,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 31 Batch Loss 1.4592 
Time 2.562103033065796 








  1%|▏         | 33/2416 [01:27<1:44:11,  2.62s/it][A[A[A[A[A





33it [01:27,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 32 Batch Loss 1.0288 
Time 2.5602059364318848 








  1%|▏         | 34/2416 [01:29<1:43:30,  2.61s/it][A[A[A[A[A





34it [01:29,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 33 Batch Loss 1.4984 
Time 2.560175657272339 








  1%|▏         | 35/2416 [01:32<1:43:50,  2.62s/it][A[A[A[A[A





35it [01:32,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 34 Batch Loss 1.3081 
Time 2.6237621307373047 








  1%|▏         | 36/2416 [01:34<1:43:12,  2.60s/it][A[A[A[A[A





36it [01:34,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 35 Batch Loss 1.2631 
Time 2.56033992767334 








  2%|▏         | 37/2416 [01:37<1:42:31,  2.59s/it][A[A[A[A[A





37it [01:37,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 36 Batch Loss 1.2584 
Time 2.5419058799743652 








  2%|▏         | 38/2416 [01:39<1:42:03,  2.58s/it][A[A[A[A[A





38it [01:39,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 37 Batch Loss 1.0910 
Time 2.539735794067383 








  2%|▏         | 39/2416 [01:42<1:41:57,  2.57s/it][A[A[A[A[A





39it [01:42,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 38 Batch Loss 1.3605 
Time 2.5526721477508545 








  2%|▏         | 40/2416 [01:45<1:47:24,  2.71s/it][A[A[A[A[A





40it [01:45,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 39 Batch Loss 1.3244 
Time 3.020540237426758 








  2%|▏         | 41/2416 [01:48<1:45:20,  2.66s/it][A[A[A[A[A





41it [01:48,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 40 Batch Loss 1.4426 
Time 2.5264151096343994 








  2%|▏         | 42/2416 [01:50<1:44:41,  2.65s/it][A[A[A[A[A





42it [01:50,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 41 Batch Loss 1.5764 
Time 2.5951948165893555 








  2%|▏         | 43/2416 [01:53<1:43:35,  2.62s/it][A[A[A[A[A





43it [01:53,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 42 Batch Loss 1.1975 
Time 2.5450572967529297 








  2%|▏         | 44/2416 [01:55<1:42:55,  2.60s/it][A[A[A[A[A





44it [01:55,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 43 Batch Loss 1.1989 
Time 2.5536599159240723 








  2%|▏         | 45/2416 [01:58<1:42:12,  2.59s/it][A[A[A[A[A





45it [01:58,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 44 Batch Loss 1.3247 
Time 2.5356390476226807 








  2%|▏         | 46/2416 [02:00<1:41:51,  2.58s/it][A[A[A[A[A





46it [02:00,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 45 Batch Loss 1.6579 
Time 2.5495710372924805 








  2%|▏         | 47/2416 [02:03<1:41:43,  2.58s/it][A[A[A[A[A





47it [02:03,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 46 Batch Loss 1.3668 
Time 2.555793285369873 








  2%|▏         | 48/2416 [02:06<1:41:46,  2.58s/it][A[A[A[A[A





48it [02:06,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 47 Batch Loss 1.3213 
Time 2.569244861602783 








  2%|▏         | 49/2416 [02:08<1:41:17,  2.57s/it][A[A[A[A[A





49it [02:08,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 48 Batch Loss 1.2887 
Time 2.531578540802002 








  2%|▏         | 50/2416 [02:11<1:47:04,  2.72s/it][A[A[A[A[A





50it [02:11,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 49 Batch Loss 1.3424 
Time 3.048997163772583 








  2%|▏         | 51/2416 [02:14<1:45:17,  2.67s/it][A[A[A[A[A





51it [02:14,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 50 Batch Loss 1.3720 
Time 2.558889627456665 








  2%|▏         | 52/2416 [02:16<1:44:29,  2.65s/it][A[A[A[A[A





52it [02:16,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 51 Batch Loss 1.2789 
Time 2.5959393978118896 








  2%|▏         | 53/2416 [02:19<1:43:13,  2.62s/it][A[A[A[A[A





53it [02:19,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 52 Batch Loss 1.2917 
Time 2.54167103767395 








  2%|▏         | 54/2416 [02:22<1:42:35,  2.61s/it][A[A[A[A[A





54it [02:21,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 53 Batch Loss 1.1514 
Time 2.561816692352295 








  2%|▏         | 55/2416 [02:24<1:42:17,  2.60s/it][A[A[A[A[A





55it [02:24,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 54 Batch Loss 1.3170 
Time 2.574525833129883 








  2%|▏         | 56/2416 [02:27<1:42:53,  2.62s/it][A[A[A[A[A





56it [02:27,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 55 Batch Loss 0.9615 
Time 2.639188766479492 








  2%|▏         | 57/2416 [02:29<1:43:13,  2.63s/it][A[A[A[A[A





57it [02:29,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 56 Batch Loss 1.5460 
Time 2.630826473236084 








  2%|▏         | 58/2416 [02:32<1:43:25,  2.63s/it][A[A[A[A[A





58it [02:32,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 57 Batch Loss 1.2261 
Time 2.6280651092529297 








  2%|▏         | 59/2416 [02:35<1:43:29,  2.63s/it][A[A[A[A[A





59it [02:35,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 58 Batch Loss 1.3391 
Time 2.6264431476593018 








  2%|▏         | 60/2416 [02:38<1:48:30,  2.76s/it][A[A[A[A[A





60it [02:38,  2.76s/it][A[A[A[A[A[A

Epoch 0 Batch 59 Batch Loss 1.0760 
Time 3.0514726638793945 








  3%|▎         | 61/2416 [02:40<1:46:14,  2.71s/it][A[A[A[A[A





61it [02:40,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 60 Batch Loss 1.1901 
Time 2.5682828426361084 








  3%|▎         | 62/2416 [02:43<1:45:18,  2.68s/it][A[A[A[A[A





62it [02:43,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 61 Batch Loss 1.2628 
Time 2.6196799278259277 








  3%|▎         | 63/2416 [02:46<1:44:19,  2.66s/it][A[A[A[A[A





63it [02:46,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 62 Batch Loss 1.2059 
Time 2.5981903076171875 








  3%|▎         | 64/2416 [02:48<1:43:28,  2.64s/it][A[A[A[A[A





64it [02:48,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 63 Batch Loss 1.4912 
Time 2.5788586139678955 








  3%|▎         | 65/2416 [02:51<1:43:15,  2.64s/it][A[A[A[A[A





65it [02:51,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 64 Batch Loss 1.5464 
Time 2.6166398525238037 








  3%|▎         | 66/2416 [02:53<1:43:03,  2.63s/it][A[A[A[A[A





66it [02:53,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 65 Batch Loss 1.1024 
Time 2.6120736598968506 








  3%|▎         | 67/2416 [02:56<1:42:25,  2.62s/it][A[A[A[A[A





67it [02:56,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 66 Batch Loss 1.4829 
Time 2.5628914833068848 








  3%|▎         | 68/2416 [02:59<1:42:03,  2.61s/it][A[A[A[A[A





68it [02:59,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 67 Batch Loss 1.1430 
Time 2.573516368865967 








  3%|▎         | 69/2416 [03:01<1:41:21,  2.59s/it][A[A[A[A[A





69it [03:01,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 68 Batch Loss 1.3367 
Time 2.5467350482940674 








  3%|▎         | 70/2416 [03:04<1:46:03,  2.71s/it][A[A[A[A[A





70it [03:04,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 69 Batch Loss 1.4512 
Time 2.9788708686828613 








  3%|▎         | 71/2416 [03:07<1:44:02,  2.66s/it][A[A[A[A[A





71it [03:07,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 70 Batch Loss 1.8049 
Time 2.532402753829956 








  3%|▎         | 72/2416 [03:09<1:42:14,  2.62s/it][A[A[A[A[A





72it [03:09,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 71 Batch Loss 1.1464 
Time 2.4994559288024902 








  3%|▎         | 73/2416 [03:12<1:41:46,  2.61s/it][A[A[A[A[A





73it [03:12,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 72 Batch Loss 1.4172 
Time 2.5756165981292725 








  3%|▎         | 74/2416 [03:14<1:40:19,  2.57s/it][A[A[A[A[A





74it [03:14,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 73 Batch Loss 1.1636 
Time 2.4786081314086914 








  3%|▎         | 75/2416 [03:17<1:41:00,  2.59s/it][A[A[A[A[A





75it [03:17,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 74 Batch Loss 1.3123 
Time 2.618685245513916 








  3%|▎         | 76/2416 [03:19<1:40:41,  2.58s/it][A[A[A[A[A





76it [03:19,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 75 Batch Loss 1.8019 
Time 2.5570015907287598 








  3%|▎         | 77/2416 [03:22<1:40:22,  2.57s/it][A[A[A[A[A





77it [03:22,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 76 Batch Loss 1.2673 
Time 2.551417350769043 








  3%|▎         | 78/2416 [03:25<1:39:51,  2.56s/it][A[A[A[A[A





78it [03:25,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 77 Batch Loss 1.2416 
Time 2.520395517349243 








  3%|▎         | 79/2416 [03:27<1:40:14,  2.57s/it][A[A[A[A[A





79it [03:27,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 78 Batch Loss 1.3974 
Time 2.583669662475586 








  3%|▎         | 80/2416 [03:30<1:46:17,  2.73s/it][A[A[A[A[A





80it [03:30,  2.73s/it][A[A[A[A[A[A

Epoch 0 Batch 79 Batch Loss 1.3535 
Time 3.0889151096343994 








  3%|▎         | 81/2416 [03:33<1:44:54,  2.70s/it][A[A[A[A[A





81it [03:33,  2.70s/it][A[A[A[A[A[A

Epoch 0 Batch 80 Batch Loss 1.2563 
Time 2.604353904724121 








  3%|▎         | 82/2416 [03:35<1:43:52,  2.67s/it][A[A[A[A[A





82it [03:35,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 81 Batch Loss 1.3752 
Time 2.603710651397705 








  3%|▎         | 83/2416 [03:38<1:42:59,  2.65s/it][A[A[A[A[A





83it [03:38,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 82 Batch Loss 1.3573 
Time 2.592780113220215 








  3%|▎         | 84/2416 [03:41<1:42:55,  2.65s/it][A[A[A[A[A





84it [03:41,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 83 Batch Loss 1.0818 
Time 2.632134199142456 








  4%|▎         | 85/2416 [03:43<1:42:48,  2.65s/it][A[A[A[A[A





85it [03:43,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 84 Batch Loss 1.1856 
Time 2.628464698791504 








  4%|▎         | 86/2416 [03:46<1:42:54,  2.65s/it][A[A[A[A[A





86it [03:46,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 85 Batch Loss 1.4991 
Time 2.640793561935425 








  4%|▎         | 87/2416 [03:49<1:42:49,  2.65s/it][A[A[A[A[A





87it [03:49,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 86 Batch Loss 1.0501 
Time 2.6311638355255127 








  4%|▎         | 88/2416 [03:51<1:41:54,  2.63s/it][A[A[A[A[A





88it [03:51,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 87 Batch Loss 1.3122 
Time 2.566924810409546 








  4%|▎         | 89/2416 [03:54<1:41:46,  2.62s/it][A[A[A[A[A





89it [03:54,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 88 Batch Loss 1.6246 
Time 2.603865146636963 








  4%|▎         | 90/2416 [03:57<1:47:34,  2.77s/it][A[A[A[A[A





90it [03:57,  2.78s/it][A[A[A[A[A[A

Epoch 0 Batch 89 Batch Loss 1.3916 
Time 3.120424270629883 








  4%|▍         | 91/2416 [04:00<1:45:04,  2.71s/it][A[A[A[A[A





91it [03:59,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 90 Batch Loss 1.4933 
Time 2.551546812057495 








  4%|▍         | 92/2416 [04:02<1:44:02,  2.69s/it][A[A[A[A[A





92it [04:02,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 91 Batch Loss 1.7295 
Time 2.6151578426361084 








  4%|▍         | 93/2416 [04:05<1:42:27,  2.65s/it][A[A[A[A[A





93it [04:05,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 92 Batch Loss 1.3000 
Time 2.548938035964966 








  4%|▍         | 94/2416 [04:07<1:41:55,  2.63s/it][A[A[A[A[A





94it [04:07,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 93 Batch Loss 1.0124 
Time 2.593100070953369 








  4%|▍         | 95/2416 [04:10<1:41:44,  2.63s/it][A[A[A[A[A





95it [04:10,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 94 Batch Loss 1.1970 
Time 2.6011481285095215 








  4%|▍         | 96/2416 [04:13<1:42:06,  2.64s/it][A[A[A[A[A





96it [04:13,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 95 Batch Loss 1.6248 
Time 2.652587890625 








  4%|▍         | 97/2416 [04:15<1:41:41,  2.63s/it][A[A[A[A[A





97it [04:15,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 96 Batch Loss 1.3893 
Time 2.596357583999634 








  4%|▍         | 98/2416 [04:18<1:41:48,  2.64s/it][A[A[A[A[A





98it [04:18,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 97 Batch Loss 1.4698 
Time 2.6398329734802246 








  4%|▍         | 99/2416 [04:20<1:41:32,  2.63s/it][A[A[A[A[A





99it [04:20,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 98 Batch Loss 1.4119 
Time 2.607039451599121 








  4%|▍         | 100/2416 [04:23<1:41:45,  2.64s/it][A[A[A[A[A





100it [04:23,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 99 Batch Loss 1.3862 
Time 2.6424286365509033 








  4%|▍         | 101/2416 [04:26<1:46:35,  2.76s/it][A[A[A[A[A





101it [04:26,  2.76s/it][A[A[A[A[A[A

Epoch 0 Batch 100 Batch Loss 1.5201 
Time 3.0490708351135254 








  4%|▍         | 102/2416 [04:29<1:44:38,  2.71s/it][A[A[A[A[A





102it [04:29,  2.72s/it][A[A[A[A[A[A

Epoch 0 Batch 101 Batch Loss 1.2939 
Time 2.58382511138916 








  4%|▍         | 103/2416 [04:31<1:43:09,  2.68s/it][A[A[A[A[A





103it [04:31,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 102 Batch Loss 0.9765 
Time 2.5655243396759033 








  4%|▍         | 104/2416 [04:34<1:42:01,  2.65s/it][A[A[A[A[A





104it [04:34,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 103 Batch Loss 1.1946 
Time 2.565063714981079 








  4%|▍         | 105/2416 [04:37<1:41:09,  2.63s/it][A[A[A[A[A





105it [04:36,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 104 Batch Loss 1.2570 
Time 2.566004753112793 








  4%|▍         | 106/2416 [04:39<1:40:21,  2.61s/it][A[A[A[A[A





106it [04:39,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 105 Batch Loss 1.6067 
Time 2.5544207096099854 








  4%|▍         | 107/2416 [04:42<1:39:45,  2.59s/it][A[A[A[A[A





107it [04:42,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 106 Batch Loss 1.2454 
Time 2.5501112937927246 








  4%|▍         | 108/2416 [04:44<1:39:20,  2.58s/it][A[A[A[A[A





108it [04:44,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 107 Batch Loss 1.3846 
Time 2.5419461727142334 








  5%|▍         | 109/2416 [04:47<1:40:31,  2.61s/it][A[A[A[A[A





109it [04:47,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 108 Batch Loss 0.9951 
Time 2.6778533458709717 








  5%|▍         | 110/2416 [04:50<1:40:34,  2.62s/it][A[A[A[A[A





110it [04:49,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 109 Batch Loss 1.1319 
Time 2.609976291656494 








  5%|▍         | 111/2416 [04:52<1:40:42,  2.62s/it][A[A[A[A[A





111it [04:52,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 110 Batch Loss 1.3578 
Time 2.614912271499634 








  5%|▍         | 112/2416 [04:55<1:45:43,  2.75s/it][A[A[A[A[A





112it [04:55,  2.75s/it][A[A[A[A[A[A

Epoch 0 Batch 111 Batch Loss 1.1990 
Time 3.044299840927124 








  5%|▍         | 113/2416 [04:58<1:44:37,  2.73s/it][A[A[A[A[A





113it [04:58,  2.73s/it][A[A[A[A[A[A

Epoch 0 Batch 112 Batch Loss 1.4685 
Time 2.6493849754333496 








  5%|▍         | 114/2416 [05:00<1:43:24,  2.70s/it][A[A[A[A[A





114it [05:00,  2.70s/it][A[A[A[A[A[A

Epoch 0 Batch 113 Batch Loss 1.2755 
Time 2.609980821609497 








  5%|▍         | 115/2416 [05:03<1:42:43,  2.68s/it][A[A[A[A[A





115it [05:03,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 114 Batch Loss 1.3641 
Time 2.625131130218506 








  5%|▍         | 116/2416 [05:06<1:41:25,  2.65s/it][A[A[A[A[A





116it [05:06,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 115 Batch Loss 1.3929 
Time 2.554997205734253 








  5%|▍         | 117/2416 [05:08<1:40:48,  2.63s/it][A[A[A[A[A





117it [05:08,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 116 Batch Loss 1.3261 
Time 2.5879030227661133 








  5%|▍         | 118/2416 [05:11<1:40:44,  2.63s/it][A[A[A[A[A





118it [05:11,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 117 Batch Loss 1.3442 
Time 2.617269277572632 








  5%|▍         | 119/2416 [05:14<1:40:40,  2.63s/it][A[A[A[A[A





119it [05:14,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 118 Batch Loss 1.2082 
Time 2.613942861557007 








  5%|▍         | 120/2416 [05:16<1:40:34,  2.63s/it][A[A[A[A[A





120it [05:16,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 119 Batch Loss 1.5062 
Time 2.613314628601074 








  5%|▌         | 121/2416 [05:19<1:40:44,  2.63s/it][A[A[A[A[A





121it [05:19,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 120 Batch Loss 1.7009 
Time 2.631277084350586 








  5%|▌         | 122/2416 [05:22<1:45:33,  2.76s/it][A[A[A[A[A





122it [05:22,  2.76s/it][A[A[A[A[A[A

Epoch 0 Batch 121 Batch Loss 1.3435 
Time 3.0450947284698486 








  5%|▌         | 123/2416 [05:24<1:43:17,  2.70s/it][A[A[A[A[A





123it [05:24,  2.70s/it][A[A[A[A[A[A

Epoch 0 Batch 122 Batch Loss 1.0331 
Time 2.5506350994110107 








  5%|▌         | 124/2416 [05:27<1:41:42,  2.66s/it][A[A[A[A[A





124it [05:27,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 123 Batch Loss 1.2685 
Time 2.550081491470337 








  5%|▌         | 125/2416 [05:30<1:40:42,  2.64s/it][A[A[A[A[A





125it [05:30,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 124 Batch Loss 1.3859 
Time 2.571868419647217 








  5%|▌         | 126/2416 [05:32<1:40:56,  2.64s/it][A[A[A[A[A





126it [05:32,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 125 Batch Loss 1.2061 
Time 2.6455183029174805 








  5%|▌         | 127/2416 [05:35<1:41:18,  2.66s/it][A[A[A[A[A





127it [05:35,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 126 Batch Loss 1.5222 
Time 2.6744890213012695 








  5%|▌         | 128/2416 [05:38<1:40:28,  2.63s/it][A[A[A[A[A





128it [05:37,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 127 Batch Loss 1.3262 
Time 2.570683240890503 








  5%|▌         | 129/2416 [05:40<1:39:51,  2.62s/it][A[A[A[A[A





129it [05:40,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 128 Batch Loss 1.2023 
Time 2.5764808654785156 








  5%|▌         | 130/2416 [05:43<1:39:19,  2.61s/it][A[A[A[A[A





130it [05:43,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 129 Batch Loss 1.3745 
Time 2.5659170150756836 








  5%|▌         | 131/2416 [05:45<1:39:07,  2.60s/it][A[A[A[A[A





131it [05:45,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 130 Batch Loss 1.0481 
Time 2.5804877281188965 








  5%|▌         | 132/2416 [05:48<1:38:49,  2.60s/it][A[A[A[A[A





132it [05:48,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 131 Batch Loss 1.3063 
Time 2.56339430809021 








  6%|▌         | 133/2416 [05:51<1:44:14,  2.74s/it][A[A[A[A[A





133it [05:51,  2.74s/it][A[A[A[A[A[A

Epoch 0 Batch 132 Batch Loss 1.2798 
Time 3.06259822845459 








  6%|▌         | 134/2416 [05:54<1:42:23,  2.69s/it][A[A[A[A[A





134it [05:53,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 133 Batch Loss 1.2574 
Time 2.5696828365325928 








  6%|▌         | 135/2416 [05:56<1:40:48,  2.65s/it][A[A[A[A[A





135it [05:56,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 134 Batch Loss 1.4746 
Time 2.544065237045288 








  6%|▌         | 136/2416 [05:59<1:40:06,  2.63s/it][A[A[A[A[A





136it [05:59,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 135 Batch Loss 1.8387 
Time 2.582542896270752 








  6%|▌         | 137/2416 [06:01<1:39:27,  2.62s/it][A[A[A[A[A





137it [06:01,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 136 Batch Loss 1.2077 
Time 2.5658814907073975 








  6%|▌         | 138/2416 [06:04<1:38:44,  2.60s/it][A[A[A[A[A





138it [06:04,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 137 Batch Loss 1.4329 
Time 2.54797101020813 








  6%|▌         | 139/2416 [06:06<1:38:28,  2.60s/it][A[A[A[A[A





139it [06:06,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 138 Batch Loss 1.7164 
Time 2.567244291305542 








  6%|▌         | 140/2416 [06:09<1:37:51,  2.58s/it][A[A[A[A[A





140it [06:09,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 139 Batch Loss 1.4387 
Time 2.536583185195923 








  6%|▌         | 141/2416 [06:11<1:37:24,  2.57s/it][A[A[A[A[A





141it [06:11,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 140 Batch Loss 1.2754 
Time 2.533393383026123 








  6%|▌         | 142/2416 [06:14<1:37:36,  2.58s/it][A[A[A[A[A





142it [06:14,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 141 Batch Loss 1.4190 
Time 2.5677850246429443 








  6%|▌         | 143/2416 [06:17<1:42:57,  2.72s/it][A[A[A[A[A





143it [06:17,  2.72s/it][A[A[A[A[A[A

Epoch 0 Batch 142 Batch Loss 1.3330 
Time 3.0381081104278564 








  6%|▌         | 144/2416 [06:20<1:41:23,  2.68s/it][A[A[A[A[A





144it [06:20,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 143 Batch Loss 1.3442 
Time 2.565852403640747 








  6%|▌         | 145/2416 [06:22<1:40:03,  2.64s/it][A[A[A[A[A





145it [06:22,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 144 Batch Loss 1.2379 
Time 2.553011655807495 








  6%|▌         | 146/2416 [06:25<1:39:12,  2.62s/it][A[A[A[A[A





146it [06:25,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 145 Batch Loss 1.2609 
Time 2.5648066997528076 








  6%|▌         | 147/2416 [06:27<1:38:34,  2.61s/it][A[A[A[A[A





147it [06:27,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 146 Batch Loss 1.2200 
Time 2.5641841888427734 








  6%|▌         | 148/2416 [06:30<1:38:09,  2.60s/it][A[A[A[A[A





148it [06:30,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 147 Batch Loss 1.4436 
Time 2.5553219318389893 








  6%|▌         | 149/2416 [06:33<1:37:32,  2.58s/it][A[A[A[A[A





149it [06:32,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 148 Batch Loss 1.7348 
Time 2.5341691970825195 








  6%|▌         | 150/2416 [06:35<1:37:26,  2.58s/it][A[A[A[A[A





150it [06:35,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 149 Batch Loss 1.2528 
Time 2.5654804706573486 








  6%|▋         | 151/2416 [06:38<1:37:25,  2.58s/it][A[A[A[A[A





151it [06:38,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 150 Batch Loss 1.3055 
Time 2.5685555934906006 








  6%|▋         | 152/2416 [06:40<1:37:16,  2.58s/it][A[A[A[A[A





152it [06:40,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 151 Batch Loss 1.3231 
Time 2.560279607772827 








  6%|▋         | 153/2416 [06:43<1:37:01,  2.57s/it][A[A[A[A[A





153it [06:43,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 152 Batch Loss 1.4277 
Time 2.5507967472076416 








  6%|▋         | 154/2416 [06:46<1:42:20,  2.71s/it][A[A[A[A[A





154it [06:46,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 153 Batch Loss 1.1242 
Time 3.0289251804351807 








  6%|▋         | 155/2416 [06:48<1:40:41,  2.67s/it][A[A[A[A[A





155it [06:48,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 154 Batch Loss 1.6657 
Time 2.563009023666382 








  6%|▋         | 156/2416 [06:51<1:39:40,  2.65s/it][A[A[A[A[A





156it [06:51,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 155 Batch Loss 1.3150 
Time 2.5751092433929443 








  6%|▋         | 157/2416 [06:54<1:38:53,  2.63s/it][A[A[A[A[A





157it [06:54,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 156 Batch Loss 1.3958 
Time 2.5673210620880127 








  7%|▋         | 158/2416 [06:56<1:38:29,  2.62s/it][A[A[A[A[A





158it [06:56,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 157 Batch Loss 1.2487 
Time 2.5839316844940186 








  7%|▋         | 159/2416 [06:59<1:37:47,  2.60s/it][A[A[A[A[A





159it [06:59,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 158 Batch Loss 1.3405 
Time 2.545793056488037 








  7%|▋         | 160/2416 [07:01<1:37:37,  2.60s/it][A[A[A[A[A





160it [07:01,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 159 Batch Loss 1.5676 
Time 2.5741682052612305 








  7%|▋         | 161/2416 [07:04<1:37:10,  2.59s/it][A[A[A[A[A





161it [07:04,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 160 Batch Loss 1.2768 
Time 2.5474860668182373 








  7%|▋         | 162/2416 [07:07<1:37:27,  2.59s/it][A[A[A[A[A





162it [07:06,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 161 Batch Loss 1.3863 
Time 2.5984373092651367 








  7%|▋         | 163/2416 [07:09<1:37:34,  2.60s/it][A[A[A[A[A





163it [07:09,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 162 Batch Loss 1.3385 
Time 2.5960137844085693 








  7%|▋         | 164/2416 [07:12<1:37:13,  2.59s/it][A[A[A[A[A





164it [07:12,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 163 Batch Loss 1.5666 
Time 2.559685707092285 








  7%|▋         | 165/2416 [07:15<1:42:28,  2.73s/it][A[A[A[A[A





165it [07:15,  2.73s/it][A[A[A[A[A[A

Epoch 0 Batch 164 Batch Loss 1.4727 
Time 3.0434114933013916 








  7%|▋         | 166/2416 [07:17<1:40:43,  2.69s/it][A[A[A[A[A





166it [07:17,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 165 Batch Loss 1.3212 
Time 2.561403751373291 








  7%|▋         | 167/2416 [07:20<1:39:45,  2.66s/it][A[A[A[A[A





167it [07:20,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 166 Batch Loss 1.2674 
Time 2.5894107818603516 








  7%|▋         | 168/2416 [07:23<1:39:02,  2.64s/it][A[A[A[A[A





168it [07:23,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 167 Batch Loss 1.2453 
Time 2.5872554779052734 








  7%|▋         | 169/2416 [07:25<1:38:54,  2.64s/it][A[A[A[A[A





169it [07:25,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 168 Batch Loss 1.3087 
Time 2.620821237564087 








  7%|▋         | 170/2416 [07:28<1:38:11,  2.62s/it][A[A[A[A[A





170it [07:28,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 169 Batch Loss 1.5524 
Time 2.56221866607666 








  7%|▋         | 171/2416 [07:30<1:38:34,  2.63s/it][A[A[A[A[A





171it [07:30,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 170 Batch Loss 1.3235 
Time 2.6524150371551514 








  7%|▋         | 172/2416 [07:33<1:37:53,  2.62s/it][A[A[A[A[A





172it [07:33,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 171 Batch Loss 1.3818 
Time 2.567070722579956 








  7%|▋         | 173/2416 [07:36<1:37:32,  2.61s/it][A[A[A[A[A





173it [07:36,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 172 Batch Loss 1.5833 
Time 2.576246738433838 








  7%|▋         | 174/2416 [07:39<1:44:01,  2.78s/it][A[A[A[A[A





174it [07:39,  2.78s/it][A[A[A[A[A[A

Epoch 0 Batch 173 Batch Loss 1.3904 
Time 3.184840202331543 








  7%|▋         | 175/2416 [07:41<1:42:32,  2.75s/it][A[A[A[A[A





175it [07:41,  2.75s/it][A[A[A[A[A[A

Epoch 0 Batch 174 Batch Loss 1.3113 
Time 2.6402714252471924 








  7%|▋         | 176/2416 [07:44<1:41:08,  2.71s/it][A[A[A[A[A





176it [07:44,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 175 Batch Loss 1.4221 
Time 2.6099493503570557 








  7%|▋         | 177/2416 [07:47<1:39:43,  2.67s/it][A[A[A[A[A





177it [07:47,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 176 Batch Loss 1.5260 
Time 2.580716609954834 








  7%|▋         | 178/2416 [07:49<1:38:28,  2.64s/it][A[A[A[A[A





178it [07:49,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 177 Batch Loss 1.1280 
Time 2.55454158782959 








  7%|▋         | 179/2416 [07:52<1:37:23,  2.61s/it][A[A[A[A[A





179it [07:52,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 178 Batch Loss 1.1070 
Time 2.537064552307129 








  7%|▋         | 180/2416 [07:54<1:36:43,  2.60s/it][A[A[A[A[A





180it [07:54,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 179 Batch Loss 1.3800 
Time 2.5449888706207275 








  7%|▋         | 181/2416 [07:57<1:36:16,  2.58s/it][A[A[A[A[A





181it [07:57,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 180 Batch Loss 1.4960 
Time 2.5441861152648926 








  8%|▊         | 182/2416 [07:59<1:36:06,  2.58s/it][A[A[A[A[A





182it [07:59,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 181 Batch Loss 1.2073 
Time 2.5626354217529297 








  8%|▊         | 183/2416 [08:02<1:35:51,  2.58s/it][A[A[A[A[A





183it [08:02,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 182 Batch Loss 1.3781 
Time 2.5579674243927 








  8%|▊         | 184/2416 [08:05<1:36:19,  2.59s/it][A[A[A[A[A





184it [08:05,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 183 Batch Loss 1.6495 
Time 2.6053738594055176 








  8%|▊         | 185/2416 [08:08<1:41:45,  2.74s/it][A[A[A[A[A





185it [08:08,  2.74s/it][A[A[A[A[A[A

Epoch 0 Batch 184 Batch Loss 1.4548 
Time 3.0698494911193848 








  8%|▊         | 186/2416 [08:10<1:39:27,  2.68s/it][A[A[A[A[A





186it [08:10,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 185 Batch Loss 1.2387 
Time 2.5231900215148926 








  8%|▊         | 187/2416 [08:13<1:38:05,  2.64s/it][A[A[A[A[A





187it [08:13,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 186 Batch Loss 1.4508 
Time 2.5442559719085693 








  8%|▊         | 188/2416 [08:15<1:37:16,  2.62s/it][A[A[A[A[A





188it [08:15,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 187 Batch Loss 1.4822 
Time 2.5601437091827393 








  8%|▊         | 189/2416 [08:18<1:36:40,  2.60s/it][A[A[A[A[A





189it [08:18,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 188 Batch Loss 1.2107 
Time 2.554027557373047 








  8%|▊         | 190/2416 [08:21<1:36:08,  2.59s/it][A[A[A[A[A





190it [08:20,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 189 Batch Loss 1.2131 
Time 2.549455404281616 








  8%|▊         | 191/2416 [08:23<1:35:36,  2.58s/it][A[A[A[A[A





191it [08:23,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 190 Batch Loss 1.4599 
Time 2.5374186038970947 








  8%|▊         | 192/2416 [08:26<1:35:09,  2.57s/it][A[A[A[A[A





192it [08:26,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 191 Batch Loss 1.3175 
Time 2.5264317989349365 








  8%|▊         | 193/2416 [08:28<1:35:02,  2.57s/it][A[A[A[A[A





193it [08:28,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 192 Batch Loss 1.8443 
Time 2.5517125129699707 








  8%|▊         | 194/2416 [08:31<1:34:42,  2.56s/it][A[A[A[A[A





194it [08:31,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 193 Batch Loss 1.3532 
Time 2.522817850112915 








  8%|▊         | 195/2416 [08:33<1:34:58,  2.57s/it][A[A[A[A[A





195it [08:33,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 194 Batch Loss 1.4004 
Time 2.5675692558288574 








  8%|▊         | 196/2416 [08:36<1:34:52,  2.56s/it][A[A[A[A[A





196it [08:36,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 195 Batch Loss 1.1869 
Time 2.546283721923828 








  8%|▊         | 197/2416 [08:39<1:39:48,  2.70s/it][A[A[A[A[A





197it [08:39,  2.70s/it][A[A[A[A[A[A

Epoch 0 Batch 196 Batch Loss 1.4669 
Time 3.0026235580444336 








  8%|▊         | 198/2416 [08:41<1:37:57,  2.65s/it][A[A[A[A[A





198it [08:41,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 197 Batch Loss 1.1798 
Time 2.52925968170166 








  8%|▊         | 199/2416 [08:44<1:37:09,  2.63s/it][A[A[A[A[A





199it [08:44,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 198 Batch Loss 1.3482 
Time 2.568132162094116 








  8%|▊         | 200/2416 [08:47<1:36:48,  2.62s/it][A[A[A[A[A





200it [08:47,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 199 Batch Loss 1.3638 
Time 2.591085433959961 








  8%|▊         | 201/2416 [08:49<1:36:17,  2.61s/it][A[A[A[A[A





201it [08:49,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 200 Batch Loss 1.3898 
Time 2.5612382888793945 








  8%|▊         | 202/2416 [08:52<1:35:32,  2.59s/it][A[A[A[A[A





202it [08:52,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 201 Batch Loss 1.6699 
Time 2.5329959392547607 








  8%|▊         | 203/2416 [08:54<1:35:47,  2.60s/it][A[A[A[A[A





203it [08:54,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 202 Batch Loss 1.3972 
Time 2.5996861457824707 








  8%|▊         | 204/2416 [08:57<1:35:23,  2.59s/it][A[A[A[A[A





204it [08:57,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 203 Batch Loss 1.0764 
Time 2.550532579421997 








  8%|▊         | 205/2416 [08:59<1:35:31,  2.59s/it][A[A[A[A[A





205it [08:59,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 204 Batch Loss 1.1926 
Time 2.592791795730591 








  9%|▊         | 206/2416 [09:02<1:35:27,  2.59s/it][A[A[A[A[A





206it [09:02,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 205 Batch Loss 1.6053 
Time 2.5764176845550537 








  9%|▊         | 207/2416 [09:05<1:41:10,  2.75s/it][A[A[A[A[A





207it [09:05,  2.75s/it][A[A[A[A[A[A

Epoch 0 Batch 206 Batch Loss 1.6721 
Time 3.094536781311035 








  9%|▊         | 208/2416 [09:08<1:39:36,  2.71s/it][A[A[A[A[A





208it [09:08,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 207 Batch Loss 1.2982 
Time 2.5924243927001953 








  9%|▊         | 209/2416 [09:10<1:38:15,  2.67s/it][A[A[A[A[A





209it [09:10,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 208 Batch Loss 1.0754 
Time 2.5744214057922363 








  9%|▊         | 210/2416 [09:13<1:37:24,  2.65s/it][A[A[A[A[A





210it [09:13,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 209 Batch Loss 1.0324 
Time 2.5879337787628174 








  9%|▊         | 211/2416 [09:16<1:36:16,  2.62s/it][A[A[A[A[A





211it [09:16,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 210 Batch Loss 1.2597 
Time 2.534517765045166 








  9%|▉         | 212/2416 [09:18<1:35:08,  2.59s/it][A[A[A[A[A





212it [09:18,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 211 Batch Loss 1.1502 
Time 2.5123612880706787 








  9%|▉         | 213/2416 [09:21<1:35:22,  2.60s/it][A[A[A[A[A





213it [09:21,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 212 Batch Loss 1.3606 
Time 2.6083855628967285 








  9%|▉         | 214/2416 [09:23<1:35:15,  2.60s/it][A[A[A[A[A





214it [09:23,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 213 Batch Loss 1.5425 
Time 2.570498466491699 








  9%|▉         | 215/2416 [09:26<1:35:42,  2.61s/it][A[A[A[A[A





215it [09:26,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 214 Batch Loss 1.3948 
Time 2.6289689540863037 








  9%|▉         | 216/2416 [09:29<1:35:36,  2.61s/it][A[A[A[A[A





216it [09:28,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 215 Batch Loss 1.3316 
Time 2.5862834453582764 








  9%|▉         | 217/2416 [09:31<1:35:50,  2.61s/it][A[A[A[A[A





217it [09:31,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 216 Batch Loss 1.2524 
Time 2.61771821975708 








  9%|▉         | 218/2416 [09:34<1:41:07,  2.76s/it][A[A[A[A[A





218it [09:34,  2.76s/it][A[A[A[A[A[A

Epoch 0 Batch 217 Batch Loss 1.2337 
Time 3.085428237915039 








  9%|▉         | 219/2416 [09:37<1:39:49,  2.73s/it][A[A[A[A[A





219it [09:37,  2.73s/it][A[A[A[A[A[A

Epoch 0 Batch 218 Batch Loss 1.2545 
Time 2.6339070796966553 








  9%|▉         | 220/2416 [09:39<1:37:54,  2.68s/it][A[A[A[A[A





220it [09:39,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 219 Batch Loss 1.6176 
Time 2.5442070960998535 








  9%|▉         | 221/2416 [09:42<1:36:30,  2.64s/it][A[A[A[A[A





221it [09:42,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 220 Batch Loss 1.0066 
Time 2.5400750637054443 








  9%|▉         | 222/2416 [09:45<1:35:20,  2.61s/it][A[A[A[A[A





222it [09:44,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 221 Batch Loss 1.4034 
Time 2.526691436767578 








  9%|▉         | 223/2416 [09:47<1:34:56,  2.60s/it][A[A[A[A[A





223it [09:47,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 222 Batch Loss 1.2462 
Time 2.562516450881958 








  9%|▉         | 224/2416 [09:50<1:34:38,  2.59s/it][A[A[A[A[A





224it [09:50,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 223 Batch Loss 1.2815 
Time 2.5611915588378906 








  9%|▉         | 225/2416 [09:52<1:34:14,  2.58s/it][A[A[A[A[A





225it [09:52,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 224 Batch Loss 1.3535 
Time 2.544879913330078 








  9%|▉         | 226/2416 [09:55<1:33:53,  2.57s/it][A[A[A[A[A





226it [09:55,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 225 Batch Loss 1.6224 
Time 2.5341551303863525 








  9%|▉         | 227/2416 [09:57<1:33:29,  2.56s/it][A[A[A[A[A





227it [09:57,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 226 Batch Loss 1.1956 
Time 2.5220959186553955 








  9%|▉         | 228/2416 [10:00<1:38:10,  2.69s/it][A[A[A[A[A





228it [10:00,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 227 Batch Loss 1.2304 
Time 2.979146718978882 








  9%|▉         | 229/2416 [10:03<1:36:48,  2.66s/it][A[A[A[A[A





229it [10:03,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 228 Batch Loss 1.3129 
Time 2.5519986152648926 








 10%|▉         | 230/2416 [10:05<1:35:28,  2.62s/it][A[A[A[A[A





230it [10:05,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 229 Batch Loss 1.0518 
Time 2.531749963760376 








 10%|▉         | 231/2416 [10:08<1:34:27,  2.59s/it][A[A[A[A[A





231it [10:08,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 230 Batch Loss 1.4098 
Time 2.520778179168701 








 10%|▉         | 232/2416 [10:11<1:34:28,  2.60s/it][A[A[A[A[A





232it [10:11,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 231 Batch Loss 1.2146 
Time 2.5883846282958984 








 10%|▉         | 233/2416 [10:13<1:33:48,  2.58s/it][A[A[A[A[A





233it [10:13,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 232 Batch Loss 1.3671 
Time 2.5272133350372314 








 10%|▉         | 234/2416 [10:16<1:34:14,  2.59s/it][A[A[A[A[A





234it [10:16,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 233 Batch Loss 1.4869 
Time 2.607316255569458 








 10%|▉         | 235/2416 [10:18<1:33:48,  2.58s/it][A[A[A[A[A





235it [10:18,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 234 Batch Loss 1.4718 
Time 2.544900417327881 








 10%|▉         | 236/2416 [10:21<1:33:34,  2.58s/it][A[A[A[A[A





236it [10:21,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 235 Batch Loss 1.5528 
Time 2.556055784225464 








 10%|▉         | 237/2416 [10:23<1:33:15,  2.57s/it][A[A[A[A[A





237it [10:23,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 236 Batch Loss 1.5170 
Time 2.5396981239318848 








 10%|▉         | 238/2416 [10:26<1:38:22,  2.71s/it][A[A[A[A[A





238it [10:26,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 237 Batch Loss 1.3004 
Time 3.0252225399017334 








 10%|▉         | 239/2416 [10:29<1:36:42,  2.67s/it][A[A[A[A[A





239it [10:29,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 238 Batch Loss 1.4931 
Time 2.5471997261047363 








 10%|▉         | 240/2416 [10:32<1:35:19,  2.63s/it][A[A[A[A[A





240it [10:32,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 239 Batch Loss 1.1182 
Time 2.535958766937256 








 10%|▉         | 241/2416 [10:34<1:34:28,  2.61s/it][A[A[A[A[A





241it [10:34,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 240 Batch Loss 1.1630 
Time 2.5394766330718994 








 10%|█         | 242/2416 [10:37<1:34:12,  2.60s/it][A[A[A[A[A





242it [10:37,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 241 Batch Loss 1.3667 
Time 2.573267698287964 








 10%|█         | 243/2416 [10:39<1:34:34,  2.61s/it][A[A[A[A[A





243it [10:39,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 242 Batch Loss 1.0822 
Time 2.6300528049468994 








 10%|█         | 244/2416 [10:42<1:34:50,  2.62s/it][A[A[A[A[A





244it [10:42,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 243 Batch Loss 1.5457 
Time 2.627319574356079 








 10%|█         | 245/2416 [10:45<1:34:10,  2.60s/it][A[A[A[A[A





245it [10:44,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 244 Batch Loss 1.5529 
Time 2.5530924797058105 








 10%|█         | 246/2416 [10:47<1:33:34,  2.59s/it][A[A[A[A[A





246it [10:47,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 245 Batch Loss 1.2144 
Time 2.532597541809082 








 10%|█         | 247/2416 [10:50<1:32:35,  2.56s/it][A[A[A[A[A





247it [10:50,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 246 Batch Loss 1.5146 
Time 2.4915900230407715 








 10%|█         | 248/2416 [10:52<1:31:46,  2.54s/it][A[A[A[A[A





248it [10:52,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 247 Batch Loss 1.4229 
Time 2.4852094650268555 








 10%|█         | 249/2416 [10:55<1:31:31,  2.53s/it][A[A[A[A[A





249it [10:55,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 248 Batch Loss 1.2222 
Time 2.5135788917541504 








 10%|█         | 250/2416 [10:58<1:36:29,  2.67s/it][A[A[A[A[A





250it [10:58,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 249 Batch Loss 1.2789 
Time 2.985795259475708 








 10%|█         | 251/2416 [11:00<1:34:36,  2.62s/it][A[A[A[A[A





251it [11:00,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 250 Batch Loss 1.1336 
Time 2.496687173843384 








 10%|█         | 252/2416 [11:03<1:33:15,  2.59s/it][A[A[A[A[A





252it [11:03,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 251 Batch Loss 1.2531 
Time 2.486807107925415 








 10%|█         | 253/2416 [11:05<1:32:39,  2.57s/it][A[A[A[A[A





253it [11:05,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 252 Batch Loss 1.1668 
Time 2.5213963985443115 








 11%|█         | 254/2416 [11:08<1:32:15,  2.56s/it][A[A[A[A[A





254it [11:08,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 253 Batch Loss 1.2169 
Time 2.5205600261688232 








 11%|█         | 255/2416 [11:10<1:32:09,  2.56s/it][A[A[A[A[A





255it [11:10,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 254 Batch Loss 1.1694 
Time 2.5442919731140137 








 11%|█         | 256/2416 [11:13<1:31:38,  2.55s/it][A[A[A[A[A





256it [11:13,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 255 Batch Loss 1.3018 
Time 2.5016050338745117 








 11%|█         | 257/2416 [11:15<1:31:37,  2.55s/it][A[A[A[A[A





257it [11:15,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 256 Batch Loss 1.3051 
Time 2.535856008529663 








 11%|█         | 258/2416 [11:18<1:31:18,  2.54s/it][A[A[A[A[A





258it [11:18,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 257 Batch Loss 1.1848 
Time 2.5104448795318604 








 11%|█         | 259/2416 [11:20<1:31:29,  2.55s/it][A[A[A[A[A





259it [11:20,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 258 Batch Loss 1.4809 
Time 2.549509048461914 








 11%|█         | 260/2416 [11:23<1:36:07,  2.68s/it][A[A[A[A[A





260it [11:23,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 259 Batch Loss 1.4503 
Time 2.969357967376709 








 11%|█         | 261/2416 [11:26<1:34:18,  2.63s/it][A[A[A[A[A





261it [11:26,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 260 Batch Loss 1.3298 
Time 2.495171308517456 








 11%|█         | 262/2416 [11:28<1:33:30,  2.60s/it][A[A[A[A[A





262it [11:28,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 261 Batch Loss 1.3479 
Time 2.5407967567443848 








 11%|█         | 263/2416 [11:31<1:32:48,  2.59s/it][A[A[A[A[A





263it [11:31,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 262 Batch Loss 1.2887 
Time 2.5289371013641357 








 11%|█         | 264/2416 [11:33<1:32:11,  2.57s/it][A[A[A[A[A





264it [11:33,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 263 Batch Loss 1.3874 
Time 2.5223467350006104 








 11%|█         | 265/2416 [11:36<1:31:34,  2.55s/it][A[A[A[A[A





265it [11:36,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 264 Batch Loss 1.2713 
Time 2.504432439804077 








 11%|█         | 266/2416 [11:39<1:31:08,  2.54s/it][A[A[A[A[A





266it [11:38,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 265 Batch Loss 1.6528 
Time 2.503809928894043 








 11%|█         | 267/2416 [11:41<1:31:16,  2.55s/it][A[A[A[A[A





267it [11:41,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 266 Batch Loss 1.1805 
Time 2.541991949081421 








 11%|█         | 268/2416 [11:44<1:31:06,  2.55s/it][A[A[A[A[A





268it [11:44,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 267 Batch Loss 1.2737 
Time 2.5224199295043945 








 11%|█         | 269/2416 [11:46<1:31:22,  2.55s/it][A[A[A[A[A





269it [11:46,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 268 Batch Loss 1.3373 
Time 2.5611469745635986 








 11%|█         | 270/2416 [11:49<1:35:54,  2.68s/it][A[A[A[A[A





270it [11:49,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 269 Batch Loss 1.3067 
Time 2.9656217098236084 








 11%|█         | 271/2416 [11:52<1:35:09,  2.66s/it][A[A[A[A[A





271it [11:52,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 270 Batch Loss 1.1257 
Time 2.600773811340332 








 11%|█▏        | 272/2416 [11:54<1:33:38,  2.62s/it][A[A[A[A[A





272it [11:54,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 271 Batch Loss 1.3112 
Time 2.511225461959839 








 11%|█▏        | 273/2416 [11:57<1:32:23,  2.59s/it][A[A[A[A[A





273it [11:57,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 272 Batch Loss 1.3439 
Time 2.4931838512420654 








 11%|█▏        | 274/2416 [11:59<1:31:35,  2.57s/it][A[A[A[A[A





274it [11:59,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 273 Batch Loss 1.3146 
Time 2.504201889038086 








 11%|█▏        | 275/2416 [12:02<1:31:21,  2.56s/it][A[A[A[A[A





275it [12:02,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 274 Batch Loss 1.5759 
Time 2.5310778617858887 








 11%|█▏        | 276/2416 [12:04<1:30:49,  2.55s/it][A[A[A[A[A





276it [12:04,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 275 Batch Loss 1.5648 
Time 2.5037992000579834 








 11%|█▏        | 277/2416 [12:07<1:30:54,  2.55s/it][A[A[A[A[A





277it [12:07,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 276 Batch Loss 1.4733 
Time 2.543604850769043 








 12%|█▏        | 278/2416 [12:09<1:30:40,  2.54s/it][A[A[A[A[A





278it [12:09,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 277 Batch Loss 1.3757 
Time 2.516745090484619 








 12%|█▏        | 279/2416 [12:12<1:30:48,  2.55s/it][A[A[A[A[A





279it [12:12,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 278 Batch Loss 1.4615 
Time 2.5472631454467773 








 12%|█▏        | 280/2416 [12:15<1:35:23,  2.68s/it][A[A[A[A[A





280it [12:15,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 279 Batch Loss 1.3076 
Time 2.9704911708831787 








 12%|█▏        | 281/2416 [12:18<1:34:02,  2.64s/it][A[A[A[A[A





281it [12:18,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 280 Batch Loss 1.3914 
Time 2.5459349155426025 








 12%|█▏        | 282/2416 [12:20<1:33:30,  2.63s/it][A[A[A[A[A





282it [12:20,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 281 Batch Loss 1.1172 
Time 2.5904524326324463 








 12%|█▏        | 283/2416 [12:23<1:33:15,  2.62s/it][A[A[A[A[A





283it [12:23,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 282 Batch Loss 1.5159 
Time 2.593841791152954 








 12%|█▏        | 284/2416 [12:25<1:32:18,  2.60s/it][A[A[A[A[A





284it [12:25,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 283 Batch Loss 1.4789 
Time 2.5191409587860107 








 12%|█▏        | 285/2416 [12:28<1:31:14,  2.57s/it][A[A[A[A[A





285it [12:28,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 284 Batch Loss 1.6680 
Time 2.4933905601501465 








 12%|█▏        | 286/2416 [12:30<1:30:54,  2.56s/it][A[A[A[A[A





286it [12:30,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 285 Batch Loss 1.3108 
Time 2.5331759452819824 








 12%|█▏        | 287/2416 [12:33<1:30:41,  2.56s/it][A[A[A[A[A





287it [12:33,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 286 Batch Loss 1.1800 
Time 2.528610944747925 








 12%|█▏        | 288/2416 [12:35<1:30:26,  2.55s/it][A[A[A[A[A





288it [12:35,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 287 Batch Loss 1.5450 
Time 2.5202057361602783 








 12%|█▏        | 289/2416 [12:38<1:29:55,  2.54s/it][A[A[A[A[A





289it [12:38,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 288 Batch Loss 1.6488 
Time 2.4954707622528076 








 12%|█▏        | 290/2416 [12:41<1:30:06,  2.54s/it][A[A[A[A[A





290it [12:40,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 289 Batch Loss 1.6192 
Time 2.5450279712677 








 12%|█▏        | 291/2416 [12:44<1:35:08,  2.69s/it][A[A[A[A[A





291it [12:44,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 290 Batch Loss 1.2481 
Time 3.0109500885009766 








 12%|█▏        | 292/2416 [12:46<1:34:09,  2.66s/it][A[A[A[A[A





292it [12:46,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 291 Batch Loss 1.3193 
Time 2.5826737880706787 








 12%|█▏        | 293/2416 [12:49<1:34:01,  2.66s/it][A[A[A[A[A





293it [12:49,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 292 Batch Loss 1.6542 
Time 2.636711597442627 








 12%|█▏        | 294/2416 [12:51<1:33:44,  2.65s/it][A[A[A[A[A





294it [12:51,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 293 Batch Loss 1.4402 
Time 2.6157803535461426 








 12%|█▏        | 295/2416 [12:54<1:33:25,  2.64s/it][A[A[A[A[A





295it [12:54,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 294 Batch Loss 1.1206 
Time 2.613524913787842 








 12%|█▏        | 296/2416 [12:57<1:32:39,  2.62s/it][A[A[A[A[A





296it [12:57,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 295 Batch Loss 1.3407 
Time 2.5614264011383057 








 12%|█▏        | 297/2416 [12:59<1:31:07,  2.58s/it][A[A[A[A[A





297it [12:59,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 296 Batch Loss 1.3042 
Time 2.4668726921081543 








 12%|█▏        | 298/2416 [13:02<1:30:14,  2.56s/it][A[A[A[A[A





298it [13:02,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 297 Batch Loss 1.4429 
Time 2.4891324043273926 








 12%|█▏        | 299/2416 [13:04<1:29:50,  2.55s/it][A[A[A[A[A





299it [13:04,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 298 Batch Loss 1.2033 
Time 2.5081703662872314 








 12%|█▏        | 300/2416 [13:07<1:30:00,  2.55s/it][A[A[A[A[A





300it [13:07,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 299 Batch Loss 1.2550 
Time 2.5494515895843506 








 12%|█▏        | 301/2416 [13:10<1:34:34,  2.68s/it][A[A[A[A[A





301it [13:10,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 300 Batch Loss 1.0500 
Time 2.970520257949829 








 12%|█▎        | 302/2416 [13:12<1:32:49,  2.63s/it][A[A[A[A[A





302it [13:12,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 301 Batch Loss 1.1071 
Time 2.509852886199951 








 13%|█▎        | 303/2416 [13:15<1:31:58,  2.61s/it][A[A[A[A[A





303it [13:15,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 302 Batch Loss 1.5693 
Time 2.5426106452941895 








 13%|█▎        | 304/2416 [13:17<1:30:31,  2.57s/it][A[A[A[A[A





304it [13:17,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 303 Batch Loss 1.2664 
Time 2.468594789505005 








 13%|█▎        | 305/2416 [13:20<1:29:28,  2.54s/it][A[A[A[A[A





305it [13:20,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 304 Batch Loss 1.3774 
Time 2.4626731872558594 








 13%|█▎        | 306/2416 [13:22<1:28:59,  2.53s/it][A[A[A[A[A





306it [13:22,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 305 Batch Loss 1.0854 
Time 2.489410161972046 








 13%|█▎        | 307/2416 [13:25<1:29:06,  2.54s/it][A[A[A[A[A





307it [13:25,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 306 Batch Loss 1.4150 
Time 2.5353360176086426 








 13%|█▎        | 308/2416 [13:27<1:28:52,  2.53s/it][A[A[A[A[A





308it [13:27,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 307 Batch Loss 1.0076 
Time 2.505951404571533 








 13%|█▎        | 309/2416 [13:30<1:28:33,  2.52s/it][A[A[A[A[A





309it [13:30,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 308 Batch Loss 1.5665 
Time 2.491068124771118 








 13%|█▎        | 310/2416 [13:32<1:27:54,  2.50s/it][A[A[A[A[A





310it [13:32,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 309 Batch Loss 1.1719 
Time 2.450223445892334 








 13%|█▎        | 311/2416 [13:35<1:31:55,  2.62s/it][A[A[A[A[A





311it [13:35,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 310 Batch Loss 1.4496 
Time 2.871889114379883 








 13%|█▎        | 312/2416 [13:38<1:29:45,  2.56s/it][A[A[A[A[A





312it [13:38,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 311 Batch Loss 1.3184 
Time 2.4066848754882812 








 13%|█▎        | 313/2416 [13:40<1:28:54,  2.54s/it][A[A[A[A[A





313it [13:40,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 312 Batch Loss 1.3053 
Time 2.472511053085327 








 13%|█▎        | 314/2416 [13:42<1:27:59,  2.51s/it][A[A[A[A[A





314it [13:42,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 313 Batch Loss 1.3110 
Time 2.440361261367798 








 13%|█▎        | 315/2416 [13:45<1:27:53,  2.51s/it][A[A[A[A[A





315it [13:45,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 314 Batch Loss 1.2137 
Time 2.491347551345825 








 13%|█▎        | 316/2416 [13:47<1:27:14,  2.49s/it][A[A[A[A[A





316it [13:47,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 315 Batch Loss 1.5014 
Time 2.440333127975464 








 13%|█▎        | 317/2416 [13:50<1:26:53,  2.48s/it][A[A[A[A[A





317it [13:50,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 316 Batch Loss 1.2088 
Time 2.455509662628174 








 13%|█▎        | 318/2416 [13:52<1:27:35,  2.50s/it][A[A[A[A[A





318it [13:52,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 317 Batch Loss 1.2388 
Time 2.5392158031463623 








 13%|█▎        | 319/2416 [13:55<1:26:58,  2.49s/it][A[A[A[A[A





319it [13:55,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 318 Batch Loss 1.3342 
Time 2.442687749862671 








 13%|█▎        | 320/2416 [13:57<1:27:16,  2.50s/it][A[A[A[A[A





320it [13:57,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 319 Batch Loss 1.1275 
Time 2.509934663772583 








 13%|█▎        | 321/2416 [14:00<1:31:53,  2.63s/it][A[A[A[A[A





321it [14:00,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 320 Batch Loss 0.9276 
Time 2.9289968013763428 








 13%|█▎        | 322/2416 [14:03<1:30:07,  2.58s/it][A[A[A[A[A





322it [14:03,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 321 Batch Loss 1.4210 
Time 2.451531410217285 








 13%|█▎        | 323/2416 [14:05<1:29:09,  2.56s/it][A[A[A[A[A





323it [14:05,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 322 Batch Loss 1.4091 
Time 2.484818935394287 








 13%|█▎        | 324/2416 [14:08<1:28:32,  2.54s/it][A[A[A[A[A





324it [14:08,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 323 Batch Loss 1.1702 
Time 2.4932868480682373 








 13%|█▎        | 325/2416 [14:10<1:27:55,  2.52s/it][A[A[A[A[A





325it [14:10,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 324 Batch Loss 1.1999 
Time 2.4770021438598633 








 13%|█▎        | 326/2416 [14:13<1:27:40,  2.52s/it][A[A[A[A[A





326it [14:13,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 325 Batch Loss 1.3069 
Time 2.490262269973755 








 14%|█▎        | 327/2416 [14:15<1:27:21,  2.51s/it][A[A[A[A[A





327it [14:15,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 326 Batch Loss 1.2912 
Time 2.4805831909179688 








 14%|█▎        | 328/2416 [14:18<1:27:55,  2.53s/it][A[A[A[A[A





328it [14:18,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 327 Batch Loss 1.3479 
Time 2.554020404815674 








 14%|█▎        | 329/2416 [14:20<1:27:15,  2.51s/it][A[A[A[A[A





329it [14:20,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 328 Batch Loss 1.5396 
Time 2.455334186553955 








 14%|█▎        | 330/2416 [14:23<1:27:17,  2.51s/it][A[A[A[A[A





330it [14:23,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 329 Batch Loss 1.3650 
Time 2.509406566619873 








 14%|█▎        | 331/2416 [14:25<1:27:02,  2.50s/it][A[A[A[A[A





331it [14:25,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 330 Batch Loss 1.3091 
Time 2.474006175994873 








 14%|█▎        | 332/2416 [14:28<1:31:58,  2.65s/it][A[A[A[A[A





332it [14:28,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 331 Batch Loss 1.4974 
Time 2.9693901538848877 








 14%|█▍        | 333/2416 [14:31<1:30:14,  2.60s/it][A[A[A[A[A





333it [14:31,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 332 Batch Loss 1.1868 
Time 2.474517583847046 








 14%|█▍        | 334/2416 [14:33<1:28:57,  2.56s/it][A[A[A[A[A





334it [14:33,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 333 Batch Loss 1.3068 
Time 2.4708311557769775 








 14%|█▍        | 335/2416 [14:36<1:28:06,  2.54s/it][A[A[A[A[A





335it [14:36,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 334 Batch Loss 1.4029 
Time 2.473775625228882 








 14%|█▍        | 336/2416 [14:38<1:27:57,  2.54s/it][A[A[A[A[A





336it [14:38,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 335 Batch Loss 1.2752 
Time 2.5099010467529297 








 14%|█▍        | 337/2416 [14:41<1:27:24,  2.52s/it][A[A[A[A[A





337it [14:41,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 336 Batch Loss 1.2464 
Time 2.4811530113220215 








 14%|█▍        | 338/2416 [14:43<1:27:06,  2.52s/it][A[A[A[A[A





338it [14:43,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 337 Batch Loss 1.3355 
Time 2.4888384342193604 








 14%|█▍        | 339/2416 [14:46<1:26:40,  2.50s/it][A[A[A[A[A





339it [14:46,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 338 Batch Loss 1.6454 
Time 2.4680676460266113 








 14%|█▍        | 340/2416 [14:48<1:26:46,  2.51s/it][A[A[A[A[A





340it [14:48,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 339 Batch Loss 1.3463 
Time 2.5041608810424805 








 14%|█▍        | 341/2416 [14:51<1:26:21,  2.50s/it][A[A[A[A[A





341it [14:51,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 340 Batch Loss 1.2973 
Time 2.4591305255889893 








 14%|█▍        | 342/2416 [14:53<1:26:24,  2.50s/it][A[A[A[A[A





342it [14:53,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 341 Batch Loss 1.4176 
Time 2.5006673336029053 








 14%|█▍        | 343/2416 [14:56<1:30:50,  2.63s/it][A[A[A[A[A





343it [14:56,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 342 Batch Loss 1.3023 
Time 2.919496774673462 








 14%|█▍        | 344/2416 [14:59<1:29:30,  2.59s/it][A[A[A[A[A





344it [14:59,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 343 Batch Loss 1.2734 
Time 2.492046594619751 








 14%|█▍        | 345/2416 [15:01<1:28:31,  2.56s/it][A[A[A[A[A





345it [15:01,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 344 Batch Loss 1.1708 
Time 2.491539716720581 








 14%|█▍        | 346/2416 [15:04<1:27:52,  2.55s/it][A[A[A[A[A





346it [15:04,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 345 Batch Loss 1.5855 
Time 2.4917328357696533 








 14%|█▍        | 347/2416 [15:06<1:27:19,  2.53s/it][A[A[A[A[A





347it [15:06,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 346 Batch Loss 1.3951 
Time 2.485996723175049 








 14%|█▍        | 348/2416 [15:09<1:27:32,  2.54s/it][A[A[A[A[A





348it [15:09,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 347 Batch Loss 1.7508 
Time 2.542412757873535 








 14%|█▍        | 349/2416 [15:11<1:27:52,  2.55s/it][A[A[A[A[A





349it [15:11,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 348 Batch Loss 1.2069 
Time 2.5617408752441406 








 14%|█▍        | 350/2416 [15:14<1:27:49,  2.55s/it][A[A[A[A[A





350it [15:14,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 349 Batch Loss 1.5509 
Time 2.539929151535034 








 15%|█▍        | 351/2416 [15:16<1:27:48,  2.55s/it][A[A[A[A[A





351it [15:16,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 350 Batch Loss 1.3928 
Time 2.537550210952759 








 15%|█▍        | 352/2416 [15:19<1:28:01,  2.56s/it][A[A[A[A[A





352it [15:19,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 351 Batch Loss 1.2843 
Time 2.5652852058410645 








 15%|█▍        | 353/2416 [15:22<1:28:04,  2.56s/it][A[A[A[A[A





353it [15:22,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 352 Batch Loss 1.1310 
Time 2.5582640171051025 








 15%|█▍        | 354/2416 [15:25<1:32:15,  2.68s/it][A[A[A[A[A





354it [15:25,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 353 Batch Loss 1.1898 
Time 2.9593288898468018 








 15%|█▍        | 355/2416 [15:27<1:30:51,  2.65s/it][A[A[A[A[A





355it [15:27,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 354 Batch Loss 1.2758 
Time 2.5414834022521973 








 15%|█▍        | 356/2416 [15:30<1:30:36,  2.64s/it][A[A[A[A[A





356it [15:30,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 355 Batch Loss 1.2768 
Time 2.6100215911865234 








 15%|█▍        | 357/2416 [15:32<1:29:38,  2.61s/it][A[A[A[A[A





357it [15:32,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 356 Batch Loss 1.1060 
Time 2.5388617515563965 








 15%|█▍        | 358/2416 [15:35<1:28:57,  2.59s/it][A[A[A[A[A





358it [15:35,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 357 Batch Loss 1.4785 
Time 2.53507399559021 








 15%|█▍        | 359/2416 [15:37<1:28:36,  2.58s/it][A[A[A[A[A





359it [15:37,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 358 Batch Loss 1.3161 
Time 2.558173656463623 








 15%|█▍        | 360/2416 [15:40<1:28:40,  2.59s/it][A[A[A[A[A





360it [15:40,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 359 Batch Loss 1.1793 
Time 2.579122304916382 








 15%|█▍        | 361/2416 [15:43<1:28:10,  2.57s/it][A[A[A[A[A





361it [15:43,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 360 Batch Loss 1.1001 
Time 2.526693344116211 








 15%|█▍        | 362/2416 [15:45<1:28:37,  2.59s/it][A[A[A[A[A





362it [15:45,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 361 Batch Loss 1.4834 
Time 2.6064565181732178 








 15%|█▌        | 363/2416 [15:48<1:29:20,  2.61s/it][A[A[A[A[A





363it [15:48,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 362 Batch Loss 1.3494 
Time 2.64254093170166 








 15%|█▌        | 364/2416 [15:50<1:29:13,  2.61s/it][A[A[A[A[A





364it [15:50,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 363 Batch Loss 1.3436 
Time 2.5922088623046875 








 15%|█▌        | 365/2416 [15:53<1:33:36,  2.74s/it][A[A[A[A[A





365it [15:53,  2.74s/it][A[A[A[A[A[A

Epoch 0 Batch 364 Batch Loss 1.4816 
Time 3.0245327949523926 








 15%|█▌        | 366/2416 [15:56<1:31:39,  2.68s/it][A[A[A[A[A





366it [15:56,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 365 Batch Loss 1.3214 
Time 2.544358015060425 








 15%|█▌        | 367/2416 [15:59<1:30:24,  2.65s/it][A[A[A[A[A





367it [15:59,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 366 Batch Loss 1.4273 
Time 2.5569908618927 








 15%|█▌        | 368/2416 [16:01<1:29:50,  2.63s/it][A[A[A[A[A





368it [16:01,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 367 Batch Loss 1.3241 
Time 2.586076259613037 








 15%|█▌        | 369/2416 [16:04<1:29:21,  2.62s/it][A[A[A[A[A





369it [16:04,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 368 Batch Loss 1.3581 
Time 2.5739662647247314 








 15%|█▌        | 370/2416 [16:06<1:28:38,  2.60s/it][A[A[A[A[A





370it [16:06,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 369 Batch Loss 1.2489 
Time 2.5466952323913574 








 15%|█▌        | 371/2416 [16:09<1:28:39,  2.60s/it][A[A[A[A[A





371it [16:09,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 370 Batch Loss 1.5150 
Time 2.591569423675537 








 15%|█▌        | 372/2416 [16:12<1:28:35,  2.60s/it][A[A[A[A[A





372it [16:12,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 371 Batch Loss 1.2430 
Time 2.586273431777954 








 15%|█▌        | 373/2416 [16:14<1:28:49,  2.61s/it][A[A[A[A[A





373it [16:14,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 372 Batch Loss 1.3999 
Time 2.6207845211029053 








 15%|█▌        | 374/2416 [16:17<1:28:45,  2.61s/it][A[A[A[A[A





374it [16:17,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 373 Batch Loss 1.3641 
Time 2.5970189571380615 








 16%|█▌        | 375/2416 [16:20<1:32:57,  2.73s/it][A[A[A[A[A





375it [16:20,  2.73s/it][A[A[A[A[A[A

Epoch 0 Batch 374 Batch Loss 1.2268 
Time 3.0130465030670166 








 16%|█▌        | 376/2416 [16:22<1:31:03,  2.68s/it][A[A[A[A[A





376it [16:22,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 375 Batch Loss 1.3550 
Time 2.541971206665039 








 16%|█▌        | 377/2416 [16:25<1:29:56,  2.65s/it][A[A[A[A[A





377it [16:25,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 376 Batch Loss 1.1182 
Time 2.564565658569336 








 16%|█▌        | 378/2416 [16:28<1:29:26,  2.63s/it][A[A[A[A[A





378it [16:28,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 377 Batch Loss 1.5153 
Time 2.58740496635437 








 16%|█▌        | 379/2416 [16:30<1:28:55,  2.62s/it][A[A[A[A[A





379it [16:30,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 378 Batch Loss 1.4407 
Time 2.5763604640960693 








 16%|█▌        | 380/2416 [16:33<1:28:21,  2.60s/it][A[A[A[A[A





380it [16:33,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 379 Batch Loss 1.1149 
Time 2.546762466430664 








 16%|█▌        | 381/2416 [16:35<1:28:31,  2.61s/it][A[A[A[A[A





381it [16:35,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 380 Batch Loss 1.4596 
Time 2.6119441986083984 








 16%|█▌        | 382/2416 [16:38<1:28:14,  2.60s/it][A[A[A[A[A





382it [16:38,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 381 Batch Loss 1.2831 
Time 2.571589231491089 








 16%|█▌        | 383/2416 [16:41<1:28:19,  2.61s/it][A[A[A[A[A





383it [16:40,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 382 Batch Loss 1.3294 
Time 2.6030027866363525 








 16%|█▌        | 384/2416 [16:43<1:27:52,  2.59s/it][A[A[A[A[A





384it [16:43,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 383 Batch Loss 1.6347 
Time 2.548945188522339 








 16%|█▌        | 385/2416 [16:46<1:28:05,  2.60s/it][A[A[A[A[A





385it [16:46,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 384 Batch Loss 1.3763 
Time 2.6040570735931396 








 16%|█▌        | 386/2416 [16:49<1:32:41,  2.74s/it][A[A[A[A[A





386it [16:49,  2.74s/it][A[A[A[A[A[A

Epoch 0 Batch 385 Batch Loss 1.4219 
Time 3.0392606258392334 








 16%|█▌        | 387/2416 [16:51<1:31:37,  2.71s/it][A[A[A[A[A





387it [16:51,  2.71s/it][A[A[A[A[A[A

Epoch 0 Batch 386 Batch Loss 1.4498 
Time 2.62713623046875 








 16%|█▌        | 388/2416 [16:54<1:30:21,  2.67s/it][A[A[A[A[A





388it [16:54,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 387 Batch Loss 1.5317 
Time 2.577460527420044 








 16%|█▌        | 389/2416 [16:57<1:29:09,  2.64s/it][A[A[A[A[A





389it [16:57,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 388 Batch Loss 1.1574 
Time 2.5493762493133545 








 16%|█▌        | 390/2416 [16:59<1:28:11,  2.61s/it][A[A[A[A[A





390it [16:59,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 389 Batch Loss 1.5697 
Time 2.5366852283477783 








 16%|█▌        | 391/2416 [17:02<1:28:14,  2.61s/it][A[A[A[A[A





391it [17:02,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 390 Batch Loss 1.1670 
Time 2.6092092990875244 








 16%|█▌        | 392/2416 [17:04<1:27:13,  2.59s/it][A[A[A[A[A





392it [17:04,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 391 Batch Loss 1.0927 
Time 2.505807399749756 








 16%|█▋        | 393/2416 [17:07<1:26:37,  2.57s/it][A[A[A[A[A





393it [17:07,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 392 Batch Loss 1.1992 
Time 2.5188426971435547 








 16%|█▋        | 394/2416 [17:09<1:26:07,  2.56s/it][A[A[A[A[A





394it [17:09,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 393 Batch Loss 1.2478 
Time 2.514570951461792 








 16%|█▋        | 395/2416 [17:12<1:25:58,  2.55s/it][A[A[A[A[A





395it [17:12,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 394 Batch Loss 1.4192 
Time 2.5279123783111572 








 16%|█▋        | 396/2416 [17:15<1:30:50,  2.70s/it][A[A[A[A[A





396it [17:15,  2.70s/it][A[A[A[A[A[A

Epoch 0 Batch 395 Batch Loss 1.3117 
Time 3.0281081199645996 








 16%|█▋        | 397/2416 [17:17<1:29:35,  2.66s/it][A[A[A[A[A





397it [17:17,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 396 Batch Loss 1.3392 
Time 2.5673532485961914 








 16%|█▋        | 398/2416 [17:20<1:28:20,  2.63s/it][A[A[A[A[A





398it [17:20,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 397 Batch Loss 1.2425 
Time 2.529789447784424 








 17%|█▋        | 399/2416 [17:23<1:27:41,  2.61s/it][A[A[A[A[A





399it [17:23,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 398 Batch Loss 1.6493 
Time 2.5534212589263916 








 17%|█▋        | 400/2416 [17:25<1:27:05,  2.59s/it][A[A[A[A[A





400it [17:25,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 399 Batch Loss 1.3556 
Time 2.545891523361206 








 17%|█▋        | 401/2416 [17:28<1:26:34,  2.58s/it][A[A[A[A[A





401it [17:28,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 400 Batch Loss 1.2981 
Time 2.533001661300659 








 17%|█▋        | 402/2416 [17:30<1:26:12,  2.57s/it][A[A[A[A[A





402it [17:30,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 401 Batch Loss 1.3099 
Time 2.5311925411224365 








 17%|█▋        | 403/2416 [17:33<1:25:57,  2.56s/it][A[A[A[A[A





403it [17:33,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 402 Batch Loss 1.7206 
Time 2.5390050411224365 








 17%|█▋        | 404/2416 [17:35<1:25:33,  2.55s/it][A[A[A[A[A





404it [17:35,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 403 Batch Loss 1.2389 
Time 2.5162625312805176 








 17%|█▋        | 405/2416 [17:38<1:26:00,  2.57s/it][A[A[A[A[A





405it [17:38,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 404 Batch Loss 1.3285 
Time 2.58788800239563 








 17%|█▋        | 406/2416 [17:40<1:26:23,  2.58s/it][A[A[A[A[A





406it [17:40,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 405 Batch Loss 1.4682 
Time 2.5892388820648193 








 17%|█▋        | 407/2416 [17:44<1:31:02,  2.72s/it][A[A[A[A[A





407it [17:44,  2.72s/it][A[A[A[A[A[A

Epoch 0 Batch 406 Batch Loss 1.3890 
Time 3.0383126735687256 








 17%|█▋        | 408/2416 [17:46<1:29:41,  2.68s/it][A[A[A[A[A





408it [17:46,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 407 Batch Loss 1.5421 
Time 2.579847812652588 








 17%|█▋        | 409/2416 [17:49<1:28:04,  2.63s/it][A[A[A[A[A





409it [17:49,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 408 Batch Loss 1.1167 
Time 2.5095927715301514 








 17%|█▋        | 410/2416 [17:51<1:27:02,  2.60s/it][A[A[A[A[A





410it [17:51,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 409 Batch Loss 1.2722 
Time 2.5251681804656982 








 17%|█▋        | 411/2416 [17:54<1:27:12,  2.61s/it][A[A[A[A[A





411it [17:54,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 410 Batch Loss 1.2098 
Time 2.609173059463501 








 17%|█▋        | 412/2416 [17:56<1:27:10,  2.61s/it][A[A[A[A[A





412it [17:56,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 411 Batch Loss 1.5477 
Time 2.5996649265289307 








 17%|█▋        | 413/2416 [17:59<1:27:04,  2.61s/it][A[A[A[A[A





413it [17:59,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 412 Batch Loss 1.1697 
Time 2.5870912075042725 








 17%|█▋        | 414/2416 [18:02<1:27:15,  2.62s/it][A[A[A[A[A





414it [18:02,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 413 Batch Loss 1.2086 
Time 2.62443208694458 








 17%|█▋        | 415/2416 [18:04<1:27:15,  2.62s/it][A[A[A[A[A





415it [18:04,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 414 Batch Loss 1.4160 
Time 2.6034960746765137 








 17%|█▋        | 416/2416 [18:07<1:27:26,  2.62s/it][A[A[A[A[A





416it [18:07,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 415 Batch Loss 1.2174 
Time 2.6264560222625732 








 17%|█▋        | 417/2416 [18:09<1:26:36,  2.60s/it][A[A[A[A[A





417it [18:09,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 416 Batch Loss 1.6405 
Time 2.528650999069214 








 17%|█▋        | 418/2416 [18:12<1:30:52,  2.73s/it][A[A[A[A[A





418it [18:12,  2.73s/it][A[A[A[A[A[A

Epoch 0 Batch 417 Batch Loss 0.9898 
Time 3.0144760608673096 








 17%|█▋        | 419/2416 [18:15<1:29:15,  2.68s/it][A[A[A[A[A





419it [18:15,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 418 Batch Loss 1.4883 
Time 2.5610711574554443 








 17%|█▋        | 420/2416 [18:18<1:27:40,  2.64s/it][A[A[A[A[A





420it [18:18,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 419 Batch Loss 1.2472 
Time 2.515277147293091 








 17%|█▋        | 421/2416 [18:20<1:26:44,  2.61s/it][A[A[A[A[A





421it [18:20,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 420 Batch Loss 1.2379 
Time 2.5277388095855713 








 17%|█▋        | 422/2416 [18:23<1:25:52,  2.58s/it][A[A[A[A[A





422it [18:23,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 421 Batch Loss 1.4373 
Time 2.5160775184631348 








 18%|█▊        | 423/2416 [18:25<1:25:21,  2.57s/it][A[A[A[A[A





423it [18:25,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 422 Batch Loss 1.5382 
Time 2.5247843265533447 








 18%|█▊        | 424/2416 [18:28<1:24:49,  2.55s/it][A[A[A[A[A





424it [18:28,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 423 Batch Loss 1.2758 
Time 2.5075578689575195 








 18%|█▊        | 425/2416 [18:30<1:24:33,  2.55s/it][A[A[A[A[A





425it [18:30,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 424 Batch Loss 1.3860 
Time 2.5219316482543945 








 18%|█▊        | 426/2416 [18:33<1:24:14,  2.54s/it][A[A[A[A[A





426it [18:33,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 425 Batch Loss 1.5126 
Time 2.503056526184082 








 18%|█▊        | 427/2416 [18:35<1:24:05,  2.54s/it][A[A[A[A[A





427it [18:35,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 426 Batch Loss 1.5668 
Time 2.5206265449523926 








 18%|█▊        | 428/2416 [18:38<1:29:30,  2.70s/it][A[A[A[A[A





428it [18:38,  2.70s/it][A[A[A[A[A[A

Epoch 0 Batch 427 Batch Loss 1.1392 
Time 3.0753204822540283 








 18%|█▊        | 429/2416 [18:41<1:28:39,  2.68s/it][A[A[A[A[A





429it [18:41,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 428 Batch Loss 1.2546 
Time 2.607491970062256 








 18%|█▊        | 430/2416 [18:44<1:27:07,  2.63s/it][A[A[A[A[A





430it [18:43,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 429 Batch Loss 1.4416 
Time 2.519775867462158 








 18%|█▊        | 431/2416 [18:46<1:27:01,  2.63s/it][A[A[A[A[A





431it [18:46,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 430 Batch Loss 1.0555 
Time 2.620110273361206 








 18%|█▊        | 432/2416 [18:49<1:25:58,  2.60s/it][A[A[A[A[A





432it [18:49,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 431 Batch Loss 1.3769 
Time 2.519956350326538 








 18%|█▊        | 433/2416 [18:51<1:25:16,  2.58s/it][A[A[A[A[A





433it [18:51,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 432 Batch Loss 1.3460 
Time 2.52180552482605 








 18%|█▊        | 434/2416 [18:54<1:25:00,  2.57s/it][A[A[A[A[A





434it [18:54,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 433 Batch Loss 1.3459 
Time 2.5489296913146973 








 18%|█▊        | 435/2416 [18:56<1:24:32,  2.56s/it][A[A[A[A[A





435it [18:56,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 434 Batch Loss 1.5067 
Time 2.518950939178467 








 18%|█▊        | 436/2416 [18:59<1:24:14,  2.55s/it][A[A[A[A[A





436it [18:59,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 435 Batch Loss 1.3457 
Time 2.5190842151641846 








 18%|█▊        | 437/2416 [19:01<1:24:55,  2.57s/it][A[A[A[A[A





437it [19:01,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 436 Batch Loss 1.1287 
Time 2.609894037246704 








 18%|█▊        | 438/2416 [19:04<1:24:55,  2.58s/it][A[A[A[A[A





438it [19:04,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 437 Batch Loss 1.2555 
Time 2.571315050125122 








 18%|█▊        | 439/2416 [19:07<1:24:55,  2.58s/it][A[A[A[A[A





439it [19:07,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 438 Batch Loss 1.5104 
Time 2.566659688949585 








 18%|█▊        | 440/2416 [19:10<1:29:40,  2.72s/it][A[A[A[A[A





440it [19:10,  2.72s/it][A[A[A[A[A[A

Epoch 0 Batch 439 Batch Loss 1.2344 
Time 3.043130874633789 








 18%|█▊        | 441/2416 [19:12<1:27:35,  2.66s/it][A[A[A[A[A





441it [19:12,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 440 Batch Loss 1.2986 
Time 2.5054709911346436 








 18%|█▊        | 442/2416 [19:15<1:27:03,  2.65s/it][A[A[A[A[A





442it [19:15,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 441 Batch Loss 1.0254 
Time 2.5949299335479736 








 18%|█▊        | 443/2416 [19:17<1:26:07,  2.62s/it][A[A[A[A[A





443it [19:17,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 442 Batch Loss 1.4253 
Time 2.5454161167144775 








 18%|█▊        | 444/2416 [19:20<1:25:21,  2.60s/it][A[A[A[A[A





444it [19:20,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 443 Batch Loss 1.6987 
Time 2.530965805053711 








 18%|█▊        | 445/2416 [19:22<1:24:51,  2.58s/it][A[A[A[A[A





445it [19:22,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 444 Batch Loss 1.2104 
Time 2.5439884662628174 








 18%|█▊        | 446/2416 [19:25<1:24:38,  2.58s/it][A[A[A[A[A





446it [19:25,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 445 Batch Loss 1.4386 
Time 2.5445377826690674 








 19%|█▊        | 447/2416 [19:28<1:24:08,  2.56s/it][A[A[A[A[A





447it [19:28,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 446 Batch Loss 1.2449 
Time 2.52386736869812 








 19%|█▊        | 448/2416 [19:30<1:23:59,  2.56s/it][A[A[A[A[A





448it [19:30,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 447 Batch Loss 1.1922 
Time 2.5426976680755615 








 19%|█▊        | 449/2416 [19:33<1:23:53,  2.56s/it][A[A[A[A[A





449it [19:33,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 448 Batch Loss 1.1488 
Time 2.549088954925537 








 19%|█▊        | 450/2416 [19:36<1:28:05,  2.69s/it][A[A[A[A[A





450it [19:36,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 449 Batch Loss 1.2617 
Time 2.9765279293060303 








 19%|█▊        | 451/2416 [19:38<1:26:46,  2.65s/it][A[A[A[A[A





451it [19:38,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 450 Batch Loss 1.4659 
Time 2.540226936340332 








 19%|█▊        | 452/2416 [19:41<1:25:40,  2.62s/it][A[A[A[A[A





452it [19:41,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 451 Batch Loss 1.3105 
Time 2.5353686809539795 








 19%|█▉        | 453/2416 [19:43<1:24:50,  2.59s/it][A[A[A[A[A





453it [19:43,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 452 Batch Loss 1.4330 
Time 2.5213491916656494 








 19%|█▉        | 454/2416 [19:46<1:24:12,  2.58s/it][A[A[A[A[A





454it [19:46,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 453 Batch Loss 1.3502 
Time 2.5176074504852295 








 19%|█▉        | 455/2416 [19:48<1:23:44,  2.56s/it][A[A[A[A[A





455it [19:48,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 454 Batch Loss 1.5696 
Time 2.521284580230713 








 19%|█▉        | 456/2416 [19:51<1:23:24,  2.55s/it][A[A[A[A[A





456it [19:51,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 455 Batch Loss 1.3380 
Time 2.5216479301452637 








 19%|█▉        | 457/2416 [19:53<1:23:19,  2.55s/it][A[A[A[A[A





457it [19:53,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 456 Batch Loss 1.2827 
Time 2.541708469390869 








 19%|█▉        | 458/2416 [19:56<1:23:17,  2.55s/it][A[A[A[A[A





458it [19:56,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 457 Batch Loss 1.2796 
Time 2.5412142276763916 








 19%|█▉        | 459/2416 [19:59<1:23:16,  2.55s/it][A[A[A[A[A





459it [19:59,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 458 Batch Loss 1.2827 
Time 2.5400192737579346 








 19%|█▉        | 460/2416 [20:02<1:27:41,  2.69s/it][A[A[A[A[A





460it [20:02,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 459 Batch Loss 1.3659 
Time 2.993227481842041 








 19%|█▉        | 461/2416 [20:04<1:26:02,  2.64s/it][A[A[A[A[A





461it [20:04,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 460 Batch Loss 1.4174 
Time 2.5155551433563232 








 19%|█▉        | 462/2416 [20:07<1:25:00,  2.61s/it][A[A[A[A[A





462it [20:07,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 461 Batch Loss 1.1230 
Time 2.5229434967041016 








 19%|█▉        | 463/2416 [20:09<1:24:20,  2.59s/it][A[A[A[A[A





463it [20:09,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 462 Batch Loss 1.3221 
Time 2.5346992015838623 








 19%|█▉        | 464/2416 [20:12<1:23:41,  2.57s/it][A[A[A[A[A





464it [20:12,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 463 Batch Loss 1.3503 
Time 2.5137500762939453 








 19%|█▉        | 465/2416 [20:14<1:23:15,  2.56s/it][A[A[A[A[A





465it [20:14,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 464 Batch Loss 1.3162 
Time 2.5265450477600098 








 19%|█▉        | 466/2416 [20:17<1:23:06,  2.56s/it][A[A[A[A[A





466it [20:17,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 465 Batch Loss 1.2917 
Time 2.5385758876800537 








 19%|█▉        | 467/2416 [20:19<1:22:59,  2.55s/it][A[A[A[A[A





467it [20:19,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 466 Batch Loss 1.2100 
Time 2.5389764308929443 








 19%|█▉        | 468/2416 [20:22<1:22:39,  2.55s/it][A[A[A[A[A





468it [20:22,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 467 Batch Loss 1.2760 
Time 2.5116639137268066 








 19%|█▉        | 469/2416 [20:24<1:22:35,  2.55s/it][A[A[A[A[A





469it [20:24,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 468 Batch Loss 1.2962 
Time 2.5344595909118652 








 19%|█▉        | 470/2416 [20:27<1:27:13,  2.69s/it][A[A[A[A[A





470it [20:27,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 469 Batch Loss 1.3142 
Time 3.016296625137329 








 19%|█▉        | 471/2416 [20:30<1:25:29,  2.64s/it][A[A[A[A[A





471it [20:30,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 470 Batch Loss 1.3545 
Time 2.50449275970459 








 20%|█▉        | 472/2416 [20:32<1:24:23,  2.60s/it][A[A[A[A[A





472it [20:32,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 471 Batch Loss 1.3793 
Time 2.5220043659210205 








 20%|█▉        | 473/2416 [20:35<1:23:38,  2.58s/it][A[A[A[A[A





473it [20:35,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 472 Batch Loss 1.4536 
Time 2.522630214691162 








 20%|█▉        | 474/2416 [20:38<1:23:07,  2.57s/it][A[A[A[A[A





474it [20:38,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 473 Batch Loss 1.5183 
Time 2.522756338119507 








 20%|█▉        | 475/2416 [20:40<1:22:38,  2.55s/it][A[A[A[A[A





475it [20:40,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 474 Batch Loss 1.2657 
Time 2.5076823234558105 








 20%|█▉        | 476/2416 [20:43<1:22:48,  2.56s/it][A[A[A[A[A





476it [20:43,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 475 Batch Loss 1.2171 
Time 2.5666754245758057 








 20%|█▉        | 477/2416 [20:45<1:22:37,  2.56s/it][A[A[A[A[A





477it [20:45,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 476 Batch Loss 1.4450 
Time 2.5347084999084473 








 20%|█▉        | 478/2416 [20:48<1:22:33,  2.56s/it][A[A[A[A[A





478it [20:48,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 477 Batch Loss 1.6020 
Time 2.5426065921783447 








 20%|█▉        | 479/2416 [20:50<1:22:44,  2.56s/it][A[A[A[A[A





479it [20:50,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 478 Batch Loss 1.2256 
Time 2.5697813034057617 








 20%|█▉        | 480/2416 [20:53<1:27:50,  2.72s/it][A[A[A[A[A





480it [20:53,  2.72s/it][A[A[A[A[A[A

Epoch 0 Batch 479 Batch Loss 1.1651 
Time 3.075554609298706 








 20%|█▉        | 481/2416 [20:56<1:26:56,  2.70s/it][A[A[A[A[A





481it [20:56,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 480 Batch Loss 1.2922 
Time 2.6134836673736572 








 20%|█▉        | 482/2416 [20:59<1:25:13,  2.64s/it][A[A[A[A[A





482it [20:59,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 481 Batch Loss 1.3633 
Time 2.5085983276367188 








 20%|█▉        | 483/2416 [21:01<1:23:50,  2.60s/it][A[A[A[A[A





483it [21:01,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 482 Batch Loss 1.1234 
Time 2.493447780609131 








 20%|██        | 484/2416 [21:04<1:22:52,  2.57s/it][A[A[A[A[A





484it [21:04,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 483 Batch Loss 1.5649 
Time 2.4893877506256104 








 20%|██        | 485/2416 [21:06<1:22:31,  2.56s/it][A[A[A[A[A





485it [21:06,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 484 Batch Loss 1.3378 
Time 2.531177043914795 








 20%|██        | 486/2416 [21:09<1:22:17,  2.56s/it][A[A[A[A[A





486it [21:09,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 485 Batch Loss 1.4546 
Time 2.5335440635681152 








 20%|██        | 487/2416 [21:11<1:22:09,  2.56s/it][A[A[A[A[A





487it [21:11,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 486 Batch Loss 1.5040 
Time 2.5414249897003174 








 20%|██        | 488/2416 [21:14<1:21:58,  2.55s/it][A[A[A[A[A





488it [21:14,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 487 Batch Loss 1.1349 
Time 2.5304322242736816 








 20%|██        | 489/2416 [21:16<1:21:53,  2.55s/it][A[A[A[A[A





489it [21:16,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 488 Batch Loss 1.3387 
Time 2.541390895843506 








 20%|██        | 490/2416 [21:19<1:26:28,  2.69s/it][A[A[A[A[A





490it [21:19,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 489 Batch Loss 1.0861 
Time 3.0152101516723633 








 20%|██        | 491/2416 [21:22<1:24:47,  2.64s/it][A[A[A[A[A





491it [21:22,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 490 Batch Loss 1.3394 
Time 2.5173301696777344 








 20%|██        | 492/2416 [21:24<1:23:53,  2.62s/it][A[A[A[A[A





492it [21:24,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 491 Batch Loss 1.4452 
Time 2.5373287200927734 








 20%|██        | 493/2416 [21:27<1:23:06,  2.59s/it][A[A[A[A[A





493it [21:27,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 492 Batch Loss 1.1986 
Time 2.5300674438476562 








 20%|██        | 494/2416 [21:29<1:22:36,  2.58s/it][A[A[A[A[A





494it [21:29,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 493 Batch Loss 1.3700 
Time 2.535271167755127 








 20%|██        | 495/2416 [21:32<1:22:09,  2.57s/it][A[A[A[A[A





495it [21:32,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 494 Batch Loss 1.5275 
Time 2.526777744293213 








 21%|██        | 496/2416 [21:35<1:21:37,  2.55s/it][A[A[A[A[A





496it [21:35,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 495 Batch Loss 1.3951 
Time 2.508571147918701 








 21%|██        | 497/2416 [21:37<1:21:25,  2.55s/it][A[A[A[A[A





497it [21:37,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 496 Batch Loss 1.1332 
Time 2.519899845123291 








 21%|██        | 498/2416 [21:40<1:21:11,  2.54s/it][A[A[A[A[A





498it [21:40,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 497 Batch Loss 1.2616 
Time 2.5148041248321533 








 21%|██        | 499/2416 [21:42<1:21:03,  2.54s/it][A[A[A[A[A





499it [21:42,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 498 Batch Loss 1.4159 
Time 2.514465808868408 








 21%|██        | 500/2416 [21:45<1:21:08,  2.54s/it][A[A[A[A[A





500it [21:45,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 499 Batch Loss 1.1827 
Time 2.536862373352051 








 21%|██        | 501/2416 [21:48<1:25:24,  2.68s/it][A[A[A[A[A





501it [21:48,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 500 Batch Loss 1.6583 
Time 2.982301950454712 








 21%|██        | 502/2416 [21:50<1:23:52,  2.63s/it][A[A[A[A[A





502it [21:50,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 501 Batch Loss 1.2072 
Time 2.5029594898223877 








 21%|██        | 503/2416 [21:53<1:22:52,  2.60s/it][A[A[A[A[A





503it [21:53,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 502 Batch Loss 1.1264 
Time 2.5170741081237793 








 21%|██        | 504/2416 [21:55<1:22:06,  2.58s/it][A[A[A[A[A





504it [21:55,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 503 Batch Loss 1.7288 
Time 2.5083706378936768 








 21%|██        | 505/2416 [21:58<1:21:39,  2.56s/it][A[A[A[A[A





505it [21:58,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 504 Batch Loss 1.3281 
Time 2.5242152214050293 








 21%|██        | 506/2416 [22:00<1:21:22,  2.56s/it][A[A[A[A[A





506it [22:00,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 505 Batch Loss 1.2216 
Time 2.519829273223877 








 21%|██        | 507/2416 [22:03<1:20:59,  2.55s/it][A[A[A[A[A





507it [22:03,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 506 Batch Loss 1.3596 
Time 2.505166530609131 








 21%|██        | 508/2416 [22:05<1:20:37,  2.54s/it][A[A[A[A[A





508it [22:05,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 507 Batch Loss 1.1765 
Time 2.499566078186035 








 21%|██        | 509/2416 [22:08<1:20:24,  2.53s/it][A[A[A[A[A





509it [22:08,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 508 Batch Loss 1.3719 
Time 2.5029611587524414 








 21%|██        | 510/2416 [22:10<1:20:13,  2.53s/it][A[A[A[A[A





510it [22:10,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 509 Batch Loss 1.1358 
Time 2.500138521194458 








 21%|██        | 511/2416 [22:13<1:20:20,  2.53s/it][A[A[A[A[A





511it [22:13,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 510 Batch Loss 1.4817 
Time 2.5364444255828857 








 21%|██        | 512/2416 [22:16<1:24:42,  2.67s/it][A[A[A[A[A





512it [22:16,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 511 Batch Loss 1.6693 
Time 2.9785702228546143 








 21%|██        | 513/2416 [22:18<1:23:23,  2.63s/it][A[A[A[A[A





513it [22:18,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 512 Batch Loss 1.3310 
Time 2.519787311553955 








 21%|██▏       | 514/2416 [22:21<1:22:16,  2.60s/it][A[A[A[A[A





514it [22:21,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 513 Batch Loss 1.2978 
Time 2.5115346908569336 








 21%|██▏       | 515/2416 [22:24<1:21:32,  2.57s/it][A[A[A[A[A





515it [22:23,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 514 Batch Loss 1.1863 
Time 2.5104618072509766 








 21%|██▏       | 516/2416 [22:26<1:21:06,  2.56s/it][A[A[A[A[A





516it [22:26,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 515 Batch Loss 1.1678 
Time 2.5208301544189453 








 21%|██▏       | 517/2416 [22:29<1:20:45,  2.55s/it][A[A[A[A[A





517it [22:29,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 516 Batch Loss 1.6821 
Time 2.513235569000244 








 21%|██▏       | 518/2416 [22:31<1:20:16,  2.54s/it][A[A[A[A[A





518it [22:31,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 517 Batch Loss 1.0424 
Time 2.495234251022339 








 21%|██▏       | 519/2416 [22:34<1:20:14,  2.54s/it][A[A[A[A[A





519it [22:34,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 518 Batch Loss 1.1240 
Time 2.5282161235809326 








 22%|██▏       | 520/2416 [22:36<1:20:20,  2.54s/it][A[A[A[A[A





520it [22:36,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 519 Batch Loss 1.7223 
Time 2.537954807281494 








 22%|██▏       | 521/2416 [22:39<1:20:06,  2.54s/it][A[A[A[A[A





521it [22:39,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 520 Batch Loss 1.3748 
Time 2.514580249786377 








 22%|██▏       | 522/2416 [22:42<1:24:27,  2.68s/it][A[A[A[A[A





522it [22:42,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 521 Batch Loss 1.5555 
Time 2.986837148666382 








 22%|██▏       | 523/2416 [22:44<1:23:20,  2.64s/it][A[A[A[A[A





523it [22:44,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 522 Batch Loss 1.3534 
Time 2.5511152744293213 








 22%|██▏       | 524/2416 [22:47<1:21:56,  2.60s/it][A[A[A[A[A





524it [22:47,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 523 Batch Loss 1.2626 
Time 2.486751079559326 








 22%|██▏       | 525/2416 [22:49<1:21:08,  2.57s/it][A[A[A[A[A





525it [22:49,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 524 Batch Loss 1.4039 
Time 2.5101115703582764 








 22%|██▏       | 526/2416 [22:52<1:20:33,  2.56s/it][A[A[A[A[A





526it [22:52,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 525 Batch Loss 0.9915 
Time 2.508908271789551 








 22%|██▏       | 527/2416 [22:54<1:20:18,  2.55s/it][A[A[A[A[A





527it [22:54,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 526 Batch Loss 1.1747 
Time 2.5252623558044434 








 22%|██▏       | 528/2416 [22:57<1:20:01,  2.54s/it][A[A[A[A[A





528it [22:57,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 527 Batch Loss 1.4240 
Time 2.5071234703063965 








 22%|██▏       | 529/2416 [22:59<1:19:48,  2.54s/it][A[A[A[A[A





529it [22:59,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 528 Batch Loss 1.3098 
Time 2.5140509605407715 








 22%|██▏       | 530/2416 [23:02<1:19:27,  2.53s/it][A[A[A[A[A





530it [23:02,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 529 Batch Loss 1.0731 
Time 2.4888222217559814 








 22%|██▏       | 531/2416 [23:04<1:19:05,  2.52s/it][A[A[A[A[A





531it [23:04,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 530 Batch Loss 1.5439 
Time 2.478510856628418 








 22%|██▏       | 532/2416 [23:07<1:18:57,  2.51s/it][A[A[A[A[A





532it [23:07,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 531 Batch Loss 1.4156 
Time 2.4982571601867676 








 22%|██▏       | 533/2416 [23:10<1:23:59,  2.68s/it][A[A[A[A[A





533it [23:10,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 532 Batch Loss 1.1943 
Time 3.0380191802978516 








 22%|██▏       | 534/2416 [23:13<1:23:08,  2.65s/it][A[A[A[A[A





534it [23:12,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 533 Batch Loss 1.6003 
Time 2.581170082092285 








 22%|██▏       | 535/2416 [23:15<1:22:44,  2.64s/it][A[A[A[A[A





535it [23:15,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 534 Batch Loss 1.5534 
Time 2.601288080215454 








 22%|██▏       | 536/2416 [23:18<1:22:34,  2.64s/it][A[A[A[A[A





536it [23:18,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 535 Batch Loss 1.5375 
Time 2.6096854209899902 








 22%|██▏       | 537/2416 [23:20<1:21:59,  2.62s/it][A[A[A[A[A





537it [23:20,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 536 Batch Loss 1.2895 
Time 2.564765214920044 








 22%|██▏       | 538/2416 [23:23<1:20:57,  2.59s/it][A[A[A[A[A





538it [23:23,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 537 Batch Loss 1.0853 
Time 2.5012269020080566 








 22%|██▏       | 539/2416 [23:25<1:20:03,  2.56s/it][A[A[A[A[A





539it [23:25,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 538 Batch Loss 1.2864 
Time 2.4770724773406982 








 22%|██▏       | 540/2416 [23:28<1:19:50,  2.55s/it][A[A[A[A[A





540it [23:28,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 539 Batch Loss 1.2087 
Time 2.5252559185028076 








 22%|██▏       | 541/2416 [23:30<1:19:33,  2.55s/it][A[A[A[A[A





541it [23:30,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 540 Batch Loss 1.5791 
Time 2.520487070083618 








 22%|██▏       | 542/2416 [23:33<1:19:27,  2.54s/it][A[A[A[A[A





542it [23:33,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 541 Batch Loss 1.6057 
Time 2.528818130493164 








 22%|██▏       | 543/2416 [23:36<1:23:37,  2.68s/it][A[A[A[A[A





543it [23:36,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 542 Batch Loss 1.3401 
Time 2.9814538955688477 








 23%|██▎       | 544/2416 [23:38<1:22:25,  2.64s/it][A[A[A[A[A





544it [23:38,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 543 Batch Loss 1.5498 
Time 2.527717351913452 








 23%|██▎       | 545/2416 [23:41<1:21:07,  2.60s/it][A[A[A[A[A





545it [23:41,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 544 Batch Loss 1.3875 
Time 2.500392198562622 








 23%|██▎       | 546/2416 [23:44<1:20:20,  2.58s/it][A[A[A[A[A





546it [23:44,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 545 Batch Loss 1.2292 
Time 2.5076382160186768 








 23%|██▎       | 547/2416 [23:46<1:19:34,  2.55s/it][A[A[A[A[A





547it [23:46,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 546 Batch Loss 1.1754 
Time 2.48449444770813 








 23%|██▎       | 548/2416 [23:49<1:18:46,  2.53s/it][A[A[A[A[A





548it [23:48,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 547 Batch Loss 1.2552 
Time 2.46358060836792 








 23%|██▎       | 549/2416 [23:51<1:18:14,  2.51s/it][A[A[A[A[A





549it [23:51,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 548 Batch Loss 1.2914 
Time 2.459144115447998 








 23%|██▎       | 550/2416 [23:53<1:17:37,  2.50s/it][A[A[A[A[A





550it [23:53,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 549 Batch Loss 1.5312 
Time 2.445932149887085 








 23%|██▎       | 551/2416 [23:56<1:17:31,  2.49s/it][A[A[A[A[A





551it [23:56,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 550 Batch Loss 0.9921 
Time 2.474557638168335 








 23%|██▎       | 552/2416 [23:58<1:17:05,  2.48s/it][A[A[A[A[A





552it [23:58,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 551 Batch Loss 1.4439 
Time 2.4471566677093506 








 23%|██▎       | 553/2416 [24:01<1:16:42,  2.47s/it][A[A[A[A[A





553it [24:01,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 552 Batch Loss 1.5725 
Time 2.4301841259002686 








 23%|██▎       | 554/2416 [24:04<1:20:40,  2.60s/it][A[A[A[A[A





554it [24:04,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 553 Batch Loss 1.5933 
Time 2.889378070831299 








 23%|██▎       | 555/2416 [24:06<1:19:43,  2.57s/it][A[A[A[A[A





555it [24:06,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 554 Batch Loss 1.4888 
Time 2.495856761932373 








 23%|██▎       | 556/2416 [24:09<1:18:49,  2.54s/it][A[A[A[A[A





556it [24:09,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 555 Batch Loss 1.0852 
Time 2.4639551639556885 








 23%|██▎       | 557/2416 [24:11<1:18:03,  2.52s/it][A[A[A[A[A





557it [24:11,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 556 Batch Loss 1.4264 
Time 2.4583561420440674 








 23%|██▎       | 558/2416 [24:14<1:17:37,  2.51s/it][A[A[A[A[A





558it [24:14,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 557 Batch Loss 1.2788 
Time 2.461869955062866 








 23%|██▎       | 559/2416 [24:16<1:17:25,  2.50s/it][A[A[A[A[A





559it [24:16,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 558 Batch Loss 1.4579 
Time 2.4754257202148438 








 23%|██▎       | 560/2416 [24:19<1:17:00,  2.49s/it][A[A[A[A[A





560it [24:19,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 559 Batch Loss 1.4468 
Time 2.447805404663086 








 23%|██▎       | 561/2416 [24:21<1:16:59,  2.49s/it][A[A[A[A[A





561it [24:21,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 560 Batch Loss 1.1624 
Time 2.4767377376556396 








 23%|██▎       | 562/2416 [24:24<1:16:51,  2.49s/it][A[A[A[A[A





562it [24:24,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 561 Batch Loss 1.4508 
Time 2.468539237976074 








 23%|██▎       | 563/2416 [24:26<1:16:44,  2.48s/it][A[A[A[A[A





563it [24:26,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 562 Batch Loss 1.3834 
Time 2.468480348587036 








 23%|██▎       | 564/2416 [24:29<1:16:49,  2.49s/it][A[A[A[A[A





564it [24:29,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 563 Batch Loss 1.2073 
Time 2.4921324253082275 








 23%|██▎       | 565/2416 [24:31<1:21:01,  2.63s/it][A[A[A[A[A





565it [24:31,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 564 Batch Loss 1.5807 
Time 2.9309263229370117 








 23%|██▎       | 566/2416 [24:34<1:19:35,  2.58s/it][A[A[A[A[A





566it [24:34,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 565 Batch Loss 1.2769 
Time 2.4630541801452637 








 23%|██▎       | 567/2416 [24:36<1:18:43,  2.55s/it][A[A[A[A[A





567it [24:36,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 566 Batch Loss 1.1592 
Time 2.475388765335083 








 24%|██▎       | 568/2416 [24:39<1:18:15,  2.54s/it][A[A[A[A[A





568it [24:39,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 567 Batch Loss 1.0324 
Time 2.4933996200561523 








 24%|██▎       | 569/2416 [24:41<1:17:55,  2.53s/it][A[A[A[A[A





569it [24:41,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 568 Batch Loss 1.4675 
Time 2.4947311878204346 








 24%|██▎       | 570/2416 [24:44<1:17:30,  2.52s/it][A[A[A[A[A





570it [24:44,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 569 Batch Loss 1.3365 
Time 2.4777414798736572 








 24%|██▎       | 571/2416 [24:46<1:17:19,  2.51s/it][A[A[A[A[A





571it [24:46,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 570 Batch Loss 1.2440 
Time 2.489218235015869 








 24%|██▎       | 572/2416 [24:49<1:17:04,  2.51s/it][A[A[A[A[A





572it [24:49,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 571 Batch Loss 1.2020 
Time 2.484867572784424 








 24%|██▎       | 573/2416 [24:51<1:16:36,  2.49s/it][A[A[A[A[A





573it [24:51,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 572 Batch Loss 1.3162 
Time 2.451408863067627 








 24%|██▍       | 574/2416 [24:54<1:16:27,  2.49s/it][A[A[A[A[A





574it [24:54,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 573 Batch Loss 1.3629 
Time 2.4676456451416016 








 24%|██▍       | 575/2416 [24:57<1:20:33,  2.63s/it][A[A[A[A[A





575it [24:57,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 574 Batch Loss 1.4486 
Time 2.9343087673187256 








 24%|██▍       | 576/2416 [24:59<1:18:57,  2.57s/it][A[A[A[A[A





576it [24:59,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 575 Batch Loss 1.4509 
Time 2.4432411193847656 








 24%|██▍       | 577/2416 [25:02<1:18:03,  2.55s/it][A[A[A[A[A





577it [25:02,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 576 Batch Loss 1.5929 
Time 2.4721133708953857 








 24%|██▍       | 578/2416 [25:04<1:17:26,  2.53s/it][A[A[A[A[A





578it [25:04,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 577 Batch Loss 1.1751 
Time 2.4686505794525146 








 24%|██▍       | 579/2416 [25:07<1:16:56,  2.51s/it][A[A[A[A[A





579it [25:07,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 578 Batch Loss 1.2292 
Time 2.467942953109741 








 24%|██▍       | 580/2416 [25:09<1:16:39,  2.51s/it][A[A[A[A[A





580it [25:09,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 579 Batch Loss 1.5011 
Time 2.473762035369873 








 24%|██▍       | 581/2416 [25:12<1:16:18,  2.50s/it][A[A[A[A[A





581it [25:12,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 580 Batch Loss 1.3539 
Time 2.454800605773926 








 24%|██▍       | 582/2416 [25:14<1:16:11,  2.49s/it][A[A[A[A[A





582it [25:14,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 581 Batch Loss 1.4099 
Time 2.4732930660247803 








 24%|██▍       | 583/2416 [25:17<1:16:05,  2.49s/it][A[A[A[A[A





583it [25:17,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 582 Batch Loss 1.3942 
Time 2.472452163696289 








 24%|██▍       | 584/2416 [25:19<1:16:01,  2.49s/it][A[A[A[A[A





584it [25:19,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 583 Batch Loss 1.5078 
Time 2.473520278930664 








 24%|██▍       | 585/2416 [25:22<1:20:18,  2.63s/it][A[A[A[A[A





585it [25:22,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 584 Batch Loss 1.3119 
Time 2.9484364986419678 








 24%|██▍       | 586/2416 [25:25<1:18:59,  2.59s/it][A[A[A[A[A





586it [25:25,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 585 Batch Loss 1.2455 
Time 2.4840619564056396 








 24%|██▍       | 587/2416 [25:27<1:18:14,  2.57s/it][A[A[A[A[A





587it [25:27,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 586 Batch Loss 1.5428 
Time 2.5051655769348145 








 24%|██▍       | 588/2416 [25:30<1:17:38,  2.55s/it][A[A[A[A[A





588it [25:30,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 587 Batch Loss 1.3341 
Time 2.4901936054229736 








 24%|██▍       | 589/2416 [25:32<1:17:06,  2.53s/it][A[A[A[A[A





589it [25:32,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 588 Batch Loss 1.5545 
Time 2.485506534576416 








 24%|██▍       | 590/2416 [25:35<1:16:33,  2.52s/it][A[A[A[A[A





590it [25:35,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 589 Batch Loss 1.1930 
Time 2.466294050216675 








 24%|██▍       | 591/2416 [25:37<1:16:10,  2.50s/it][A[A[A[A[A





591it [25:37,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 590 Batch Loss 1.3011 
Time 2.4627342224121094 








 25%|██▍       | 592/2416 [25:40<1:15:49,  2.49s/it][A[A[A[A[A





592it [25:40,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 591 Batch Loss 1.1653 
Time 2.4557716846466064 








 25%|██▍       | 593/2416 [25:42<1:15:57,  2.50s/it][A[A[A[A[A





593it [25:42,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 592 Batch Loss 1.6656 
Time 2.502810478210449 








 25%|██▍       | 594/2416 [25:45<1:16:00,  2.50s/it][A[A[A[A[A





594it [25:45,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 593 Batch Loss 1.4289 
Time 2.496978521347046 








 25%|██▍       | 595/2416 [25:48<1:20:03,  2.64s/it][A[A[A[A[A





595it [25:48,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 594 Batch Loss 1.3828 
Time 2.939465045928955 








 25%|██▍       | 596/2416 [25:50<1:18:36,  2.59s/it][A[A[A[A[A





596it [25:50,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 595 Batch Loss 1.7256 
Time 2.476896047592163 








 25%|██▍       | 597/2416 [25:52<1:17:31,  2.56s/it][A[A[A[A[A





597it [25:52,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 596 Batch Loss 1.5357 
Time 2.463986873626709 








 25%|██▍       | 598/2416 [25:55<1:16:55,  2.54s/it][A[A[A[A[A





598it [25:55,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 597 Batch Loss 1.1252 
Time 2.4855563640594482 








 25%|██▍       | 599/2416 [25:58<1:17:17,  2.55s/it][A[A[A[A[A





599it [25:58,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 598 Batch Loss 1.2388 
Time 2.568147897720337 








 25%|██▍       | 600/2416 [26:00<1:17:38,  2.57s/it][A[A[A[A[A





600it [26:00,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 599 Batch Loss 1.4356 
Time 2.5813822746276855 








 25%|██▍       | 601/2416 [26:03<1:17:10,  2.55s/it][A[A[A[A[A





601it [26:03,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 600 Batch Loss 1.6165 
Time 2.5060181617736816 








 25%|██▍       | 602/2416 [26:05<1:16:15,  2.52s/it][A[A[A[A[A





602it [26:05,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 601 Batch Loss 1.6710 
Time 2.439281463623047 








 25%|██▍       | 603/2416 [26:08<1:15:51,  2.51s/it][A[A[A[A[A





603it [26:08,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 602 Batch Loss 1.3678 
Time 2.468550443649292 








 25%|██▌       | 604/2416 [26:10<1:15:29,  2.50s/it][A[A[A[A[A





604it [26:10,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 603 Batch Loss 1.3277 
Time 2.46107816696167 








 25%|██▌       | 605/2416 [26:13<1:19:29,  2.63s/it][A[A[A[A[A





605it [26:13,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 604 Batch Loss 1.7192 
Time 2.932224750518799 








 25%|██▌       | 606/2416 [26:16<1:18:16,  2.59s/it][A[A[A[A[A





606it [26:16,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 605 Batch Loss 1.0780 
Time 2.4851925373077393 








 25%|██▌       | 607/2416 [26:18<1:17:02,  2.56s/it][A[A[A[A[A





607it [26:18,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 606 Batch Loss 1.5538 
Time 2.450227737426758 








 25%|██▌       | 608/2416 [26:21<1:16:26,  2.54s/it][A[A[A[A[A





608it [26:20,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 607 Batch Loss 1.3881 
Time 2.483452081680298 








 25%|██▌       | 609/2416 [26:23<1:16:13,  2.53s/it][A[A[A[A[A





609it [26:23,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 608 Batch Loss 1.1546 
Time 2.503812074661255 








 25%|██▌       | 610/2416 [26:25<1:15:38,  2.51s/it][A[A[A[A[A





610it [26:25,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 609 Batch Loss 1.3007 
Time 2.4645285606384277 








 25%|██▌       | 611/2416 [26:28<1:15:20,  2.50s/it][A[A[A[A[A





611it [26:28,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 610 Batch Loss 1.0874 
Time 2.472644329071045 








 25%|██▌       | 612/2416 [26:30<1:15:10,  2.50s/it][A[A[A[A[A





612it [26:30,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 611 Batch Loss 1.2913 
Time 2.4793567657470703 








 25%|██▌       | 613/2416 [26:33<1:14:59,  2.50s/it][A[A[A[A[A





613it [26:33,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 612 Batch Loss 1.1819 
Time 2.4727556705474854 








 25%|██▌       | 614/2416 [26:35<1:14:48,  2.49s/it][A[A[A[A[A





614it [26:35,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 613 Batch Loss 1.4479 
Time 2.464714288711548 








 25%|██▌       | 615/2416 [26:38<1:14:51,  2.49s/it][A[A[A[A[A





615it [26:38,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 614 Batch Loss 1.6590 
Time 2.48549747467041 








 25%|██▌       | 616/2416 [26:41<1:18:48,  2.63s/it][A[A[A[A[A





616it [26:41,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 615 Batch Loss 1.1220 
Time 2.9306957721710205 








 26%|██▌       | 617/2416 [26:43<1:17:27,  2.58s/it][A[A[A[A[A





617it [26:43,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 616 Batch Loss 1.3441 
Time 2.473964214324951 








 26%|██▌       | 618/2416 [26:46<1:16:27,  2.55s/it][A[A[A[A[A





618it [26:46,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 617 Batch Loss 1.6127 
Time 2.461869478225708 








 26%|██▌       | 619/2416 [26:48<1:15:55,  2.54s/it][A[A[A[A[A





619it [26:48,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 618 Batch Loss 1.4427 
Time 2.485391855239868 








 26%|██▌       | 620/2416 [26:51<1:15:17,  2.52s/it][A[A[A[A[A





620it [26:51,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 619 Batch Loss 0.9148 
Time 2.4577484130859375 








 26%|██▌       | 621/2416 [26:53<1:14:59,  2.51s/it][A[A[A[A[A





621it [26:53,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 620 Batch Loss 0.9651 
Time 2.476980686187744 








 26%|██▌       | 622/2416 [26:56<1:14:49,  2.50s/it][A[A[A[A[A





622it [26:56,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 621 Batch Loss 1.1166 
Time 2.4822380542755127 








 26%|██▌       | 623/2416 [26:58<1:14:49,  2.50s/it][A[A[A[A[A





623it [26:58,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 622 Batch Loss 1.0772 
Time 2.5006375312805176 








 26%|██▌       | 624/2416 [27:01<1:14:40,  2.50s/it][A[A[A[A[A





624it [27:01,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 623 Batch Loss 1.5607 
Time 2.4827160835266113 








 26%|██▌       | 625/2416 [27:03<1:14:31,  2.50s/it][A[A[A[A[A





625it [27:03,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 624 Batch Loss 1.3402 
Time 2.4775593280792236 








 26%|██▌       | 626/2416 [27:06<1:18:38,  2.64s/it][A[A[A[A[A





626it [27:06,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 625 Batch Loss 1.5223 
Time 2.948362350463867 








 26%|██▌       | 627/2416 [27:09<1:17:04,  2.58s/it][A[A[A[A[A





627it [27:09,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 626 Batch Loss 1.2723 
Time 2.4561767578125 








 26%|██▌       | 628/2416 [27:11<1:16:08,  2.55s/it][A[A[A[A[A





628it [27:11,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 627 Batch Loss 1.3950 
Time 2.4749248027801514 








 26%|██▌       | 629/2416 [27:14<1:15:34,  2.54s/it][A[A[A[A[A





629it [27:14,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 628 Batch Loss 1.4178 
Time 2.4852802753448486 








 26%|██▌       | 630/2416 [27:16<1:15:00,  2.52s/it][A[A[A[A[A





630it [27:16,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 629 Batch Loss 1.1634 
Time 2.4683918952941895 








 26%|██▌       | 631/2416 [27:19<1:14:40,  2.51s/it][A[A[A[A[A





631it [27:19,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 630 Batch Loss 1.4803 
Time 2.4734559059143066 








 26%|██▌       | 632/2416 [27:21<1:14:20,  2.50s/it][A[A[A[A[A





632it [27:21,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 631 Batch Loss 1.4815 
Time 2.4673383235931396 








 26%|██▌       | 633/2416 [27:24<1:14:16,  2.50s/it][A[A[A[A[A





633it [27:24,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 632 Batch Loss 1.4547 
Time 2.4893383979797363 








 26%|██▌       | 634/2416 [27:26<1:14:21,  2.50s/it][A[A[A[A[A





634it [27:26,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 633 Batch Loss 1.2165 
Time 2.5011518001556396 








 26%|██▋       | 635/2416 [27:29<1:14:21,  2.50s/it][A[A[A[A[A





635it [27:29,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 634 Batch Loss 1.3235 
Time 2.4924869537353516 








 26%|██▋       | 636/2416 [27:32<1:18:25,  2.64s/it][A[A[A[A[A





636it [27:32,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 635 Batch Loss 1.5391 
Time 2.9608798027038574 








 26%|██▋       | 637/2416 [27:34<1:16:58,  2.60s/it][A[A[A[A[A





637it [27:34,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 636 Batch Loss 1.0246 
Time 2.47478985786438 








 26%|██▋       | 638/2416 [27:37<1:16:02,  2.57s/it][A[A[A[A[A





638it [27:37,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 637 Batch Loss 1.3182 
Time 2.481976270675659 








 26%|██▋       | 639/2416 [27:39<1:15:09,  2.54s/it][A[A[A[A[A





639it [27:39,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 638 Batch Loss 1.3683 
Time 2.45448899269104 








 26%|██▋       | 640/2416 [27:42<1:14:46,  2.53s/it][A[A[A[A[A





640it [27:42,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 639 Batch Loss 1.2594 
Time 2.48478627204895 








 27%|██▋       | 641/2416 [27:44<1:14:18,  2.51s/it][A[A[A[A[A





641it [27:44,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 640 Batch Loss 1.1718 
Time 2.4704554080963135 








 27%|██▋       | 642/2416 [27:47<1:14:05,  2.51s/it][A[A[A[A[A





642it [27:47,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 641 Batch Loss 1.3937 
Time 2.478785991668701 








 27%|██▋       | 643/2416 [27:49<1:13:56,  2.50s/it][A[A[A[A[A





643it [27:49,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 642 Batch Loss 1.3315 
Time 2.4769489765167236 








 27%|██▋       | 644/2416 [27:51<1:13:41,  2.50s/it][A[A[A[A[A





644it [27:51,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 643 Batch Loss 1.0836 
Time 2.466858386993408 








 27%|██▋       | 645/2416 [27:54<1:13:32,  2.49s/it][A[A[A[A[A





645it [27:54,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 644 Batch Loss 1.1089 
Time 2.4730236530303955 








 27%|██▋       | 646/2416 [27:56<1:13:12,  2.48s/it][A[A[A[A[A





646it [27:56,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 645 Batch Loss 1.5734 
Time 2.443758010864258 








 27%|██▋       | 647/2416 [27:59<1:17:16,  2.62s/it][A[A[A[A[A





647it [27:59,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 646 Batch Loss 1.1252 
Time 2.930690050125122 








 27%|██▋       | 648/2416 [28:02<1:15:59,  2.58s/it][A[A[A[A[A





648it [28:02,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 647 Batch Loss 1.4085 
Time 2.473662853240967 








 27%|██▋       | 649/2416 [28:04<1:15:04,  2.55s/it][A[A[A[A[A





649it [28:04,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 648 Batch Loss 1.2617 
Time 2.466656446456909 








 27%|██▋       | 650/2416 [28:07<1:14:29,  2.53s/it][A[A[A[A[A





650it [28:07,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 649 Batch Loss 1.1808 
Time 2.475001811981201 








 27%|██▋       | 651/2416 [28:09<1:13:51,  2.51s/it][A[A[A[A[A





651it [28:09,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 650 Batch Loss 1.2275 
Time 2.449697494506836 








 27%|██▋       | 652/2416 [28:12<1:13:45,  2.51s/it][A[A[A[A[A





652it [28:12,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 651 Batch Loss 1.2430 
Time 2.491457462310791 








 27%|██▋       | 653/2416 [28:14<1:13:31,  2.50s/it][A[A[A[A[A





653it [28:14,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 652 Batch Loss 1.2978 
Time 2.4772303104400635 








 27%|██▋       | 654/2416 [28:17<1:13:18,  2.50s/it][A[A[A[A[A





654it [28:17,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 653 Batch Loss 1.1797 
Time 2.474606513977051 








 27%|██▋       | 655/2416 [28:19<1:13:27,  2.50s/it][A[A[A[A[A





655it [28:19,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 654 Batch Loss 1.0629 
Time 2.5085175037384033 








 27%|██▋       | 656/2416 [28:22<1:13:52,  2.52s/it][A[A[A[A[A





656it [28:22,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 655 Batch Loss 1.3136 
Time 2.539318561553955 








 27%|██▋       | 657/2416 [28:25<1:18:33,  2.68s/it][A[A[A[A[A





657it [28:25,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 656 Batch Loss 1.1715 
Time 3.0380334854125977 








 27%|██▋       | 658/2416 [28:27<1:17:43,  2.65s/it][A[A[A[A[A





658it [28:27,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 657 Batch Loss 1.2876 
Time 2.5785465240478516 








 27%|██▋       | 659/2416 [28:30<1:16:48,  2.62s/it][A[A[A[A[A





659it [28:30,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 658 Batch Loss 1.3783 
Time 2.5387892723083496 








 27%|██▋       | 660/2416 [28:33<1:15:43,  2.59s/it][A[A[A[A[A





660it [28:33,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 659 Batch Loss 1.6567 
Time 2.489914894104004 








 27%|██▋       | 661/2416 [28:35<1:14:52,  2.56s/it][A[A[A[A[A





661it [28:35,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 660 Batch Loss 1.2895 
Time 2.488985061645508 








 27%|██▋       | 662/2416 [28:38<1:14:06,  2.54s/it][A[A[A[A[A





662it [28:37,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 661 Batch Loss 1.4119 
Time 2.4694719314575195 








 27%|██▋       | 663/2416 [28:40<1:13:38,  2.52s/it][A[A[A[A[A





663it [28:40,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 662 Batch Loss 1.4553 
Time 2.4761106967926025 








 27%|██▋       | 664/2416 [28:43<1:13:25,  2.51s/it][A[A[A[A[A





664it [28:42,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 663 Batch Loss 1.2299 
Time 2.490907669067383 








 28%|██▊       | 665/2416 [28:45<1:13:01,  2.50s/it][A[A[A[A[A





665it [28:45,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 664 Batch Loss 0.9704 
Time 2.458803176879883 








 28%|██▊       | 666/2416 [28:47<1:12:56,  2.50s/it][A[A[A[A[A





666it [28:47,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 665 Batch Loss 1.5033 
Time 2.486069679260254 








 28%|██▊       | 667/2416 [28:50<1:13:07,  2.51s/it][A[A[A[A[A





667it [28:50,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 666 Batch Loss 1.3963 
Time 2.5173022747039795 








 28%|██▊       | 668/2416 [28:53<1:16:49,  2.64s/it][A[A[A[A[A





668it [28:53,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 667 Batch Loss 1.1931 
Time 2.924489736557007 








 28%|██▊       | 669/2416 [28:55<1:15:27,  2.59s/it][A[A[A[A[A





669it [28:55,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 668 Batch Loss 1.4532 
Time 2.470581293106079 








 28%|██▊       | 670/2416 [28:58<1:14:49,  2.57s/it][A[A[A[A[A





670it [28:58,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 669 Batch Loss 1.6342 
Time 2.5129308700561523 








 28%|██▊       | 671/2416 [29:00<1:13:55,  2.54s/it][A[A[A[A[A





671it [29:00,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 670 Batch Loss 1.3572 
Time 2.4617106914520264 








 28%|██▊       | 672/2416 [29:03<1:13:27,  2.53s/it][A[A[A[A[A





672it [29:03,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 671 Batch Loss 1.2145 
Time 2.484330415725708 








 28%|██▊       | 673/2416 [29:05<1:12:52,  2.51s/it][A[A[A[A[A





673it [29:05,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 672 Batch Loss 1.4220 
Time 2.4526050090789795 








 28%|██▊       | 674/2416 [29:08<1:12:48,  2.51s/it][A[A[A[A[A





674it [29:08,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 673 Batch Loss 1.3943 
Time 2.4813106060028076 








 28%|██▊       | 675/2416 [29:10<1:12:26,  2.50s/it][A[A[A[A[A





675it [29:10,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 674 Batch Loss 1.1152 
Time 2.4556055068969727 








 28%|██▊       | 676/2416 [29:13<1:12:11,  2.49s/it][A[A[A[A[A





676it [29:13,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 675 Batch Loss 1.1607 
Time 2.4627768993377686 








 28%|██▊       | 677/2416 [29:15<1:12:07,  2.49s/it][A[A[A[A[A





677it [29:15,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 676 Batch Loss 1.1406 
Time 2.471327781677246 








 28%|██▊       | 678/2416 [29:18<1:12:07,  2.49s/it][A[A[A[A[A





678it [29:18,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 677 Batch Loss 1.2538 
Time 2.487581491470337 








 28%|██▊       | 679/2416 [29:21<1:16:02,  2.63s/it][A[A[A[A[A





679it [29:21,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 678 Batch Loss 1.4215 
Time 2.929723024368286 








 28%|██▊       | 680/2416 [29:23<1:14:47,  2.59s/it][A[A[A[A[A





680it [29:23,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 679 Batch Loss 1.4367 
Time 2.4791452884674072 








 28%|██▊       | 681/2416 [29:26<1:14:15,  2.57s/it][A[A[A[A[A





681it [29:26,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 680 Batch Loss 1.1766 
Time 2.5193066596984863 








 28%|██▊       | 682/2416 [29:28<1:13:25,  2.54s/it][A[A[A[A[A





682it [29:28,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 681 Batch Loss 1.1522 
Time 2.4683783054351807 








 28%|██▊       | 683/2416 [29:31<1:12:50,  2.52s/it][A[A[A[A[A





683it [29:31,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 682 Batch Loss 1.3107 
Time 2.466667413711548 








 28%|██▊       | 684/2416 [29:33<1:12:34,  2.51s/it][A[A[A[A[A





684it [29:33,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 683 Batch Loss 1.3336 
Time 2.4847300052642822 








 28%|██▊       | 685/2416 [29:36<1:12:10,  2.50s/it][A[A[A[A[A





685it [29:36,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 684 Batch Loss 1.3939 
Time 2.4615001678466797 








 28%|██▊       | 686/2416 [29:38<1:12:03,  2.50s/it][A[A[A[A[A





686it [29:38,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 685 Batch Loss 1.3643 
Time 2.478956460952759 








 28%|██▊       | 687/2416 [29:41<1:11:54,  2.50s/it][A[A[A[A[A





687it [29:41,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 686 Batch Loss 1.2222 
Time 2.4672763347625732 








 28%|██▊       | 688/2416 [29:43<1:11:40,  2.49s/it][A[A[A[A[A





688it [29:43,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 687 Batch Loss 1.3720 
Time 2.4615964889526367 








 29%|██▊       | 689/2416 [29:46<1:15:41,  2.63s/it][A[A[A[A[A





689it [29:46,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 688 Batch Loss 1.2518 
Time 2.9471254348754883 








 29%|██▊       | 690/2416 [29:49<1:14:22,  2.59s/it][A[A[A[A[A





690it [29:49,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 689 Batch Loss 1.4559 
Time 2.4748013019561768 








 29%|██▊       | 691/2416 [29:51<1:13:54,  2.57s/it][A[A[A[A[A





691it [29:51,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 690 Batch Loss 1.2750 
Time 2.5221643447875977 








 29%|██▊       | 692/2416 [29:54<1:13:00,  2.54s/it][A[A[A[A[A





692it [29:54,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 691 Batch Loss 1.3823 
Time 2.4616947174072266 








 29%|██▊       | 693/2416 [29:56<1:12:36,  2.53s/it][A[A[A[A[A





693it [29:56,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 692 Batch Loss 1.2919 
Time 2.488304853439331 








 29%|██▊       | 694/2416 [29:59<1:12:12,  2.52s/it][A[A[A[A[A





694it [29:59,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 693 Batch Loss 1.2663 
Time 2.471331834793091 








 29%|██▉       | 695/2416 [30:01<1:11:50,  2.50s/it][A[A[A[A[A





695it [30:01,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 694 Batch Loss 1.0548 
Time 2.4644060134887695 








 29%|██▉       | 696/2416 [30:04<1:11:44,  2.50s/it][A[A[A[A[A





696it [30:04,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 695 Batch Loss 1.2500 
Time 2.4885525703430176 








 29%|██▉       | 697/2416 [30:06<1:11:38,  2.50s/it][A[A[A[A[A





697it [30:06,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 696 Batch Loss 1.3236 
Time 2.4811489582061768 








 29%|██▉       | 698/2416 [30:09<1:11:36,  2.50s/it][A[A[A[A[A





698it [30:09,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 697 Batch Loss 1.1340 
Time 2.4953503608703613 








 29%|██▉       | 699/2416 [30:12<1:15:39,  2.64s/it][A[A[A[A[A





699it [30:12,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 698 Batch Loss 1.2391 
Time 2.966562032699585 








 29%|██▉       | 700/2416 [30:14<1:14:06,  2.59s/it][A[A[A[A[A





700it [30:14,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 699 Batch Loss 1.3929 
Time 2.4516067504882812 








 29%|██▉       | 701/2416 [30:17<1:13:16,  2.56s/it][A[A[A[A[A





701it [30:16,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 700 Batch Loss 0.9685 
Time 2.481238603591919 








 29%|██▉       | 702/2416 [30:19<1:12:23,  2.53s/it][A[A[A[A[A





702it [30:19,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 701 Batch Loss 1.3840 
Time 2.4598305225372314 








 29%|██▉       | 703/2416 [30:21<1:11:58,  2.52s/it][A[A[A[A[A





703it [30:21,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 702 Batch Loss 1.4062 
Time 2.4807589054107666 








 29%|██▉       | 704/2416 [30:24<1:11:25,  2.50s/it][A[A[A[A[A





704it [30:24,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 703 Batch Loss 1.5665 
Time 2.446765422821045 








 29%|██▉       | 705/2416 [30:26<1:11:35,  2.51s/it][A[A[A[A[A





705it [30:26,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 704 Batch Loss 1.4571 
Time 2.519365072250366 








 29%|██▉       | 706/2416 [30:29<1:11:26,  2.51s/it][A[A[A[A[A





706it [30:29,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 705 Batch Loss 1.4024 
Time 2.479495048522949 








 29%|██▉       | 707/2416 [30:31<1:10:59,  2.49s/it][A[A[A[A[A





707it [30:31,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 706 Batch Loss 0.9587 
Time 2.4471323490142822 








 29%|██▉       | 708/2416 [30:34<1:11:02,  2.50s/it][A[A[A[A[A





708it [30:34,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 707 Batch Loss 1.1758 
Time 2.484222173690796 








 29%|██▉       | 709/2416 [30:37<1:15:01,  2.64s/it][A[A[A[A[A





709it [30:37,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 708 Batch Loss 1.4069 
Time 2.956448554992676 








 29%|██▉       | 710/2416 [30:39<1:13:22,  2.58s/it][A[A[A[A[A





710it [30:39,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 709 Batch Loss 1.0979 
Time 2.442861318588257 








 29%|██▉       | 711/2416 [30:42<1:12:24,  2.55s/it][A[A[A[A[A





711it [30:42,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 710 Batch Loss 1.0818 
Time 2.4598195552825928 








 29%|██▉       | 712/2416 [30:44<1:11:50,  2.53s/it][A[A[A[A[A





712it [30:44,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 711 Batch Loss 1.2967 
Time 2.4744415283203125 








 30%|██▉       | 713/2416 [30:47<1:11:04,  2.50s/it][A[A[A[A[A





713it [30:47,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 712 Batch Loss 1.4632 
Time 2.4313676357269287 








 30%|██▉       | 714/2416 [30:49<1:10:36,  2.49s/it][A[A[A[A[A





714it [30:49,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 713 Batch Loss 1.5349 
Time 2.444668769836426 








 30%|██▉       | 715/2416 [30:52<1:10:21,  2.48s/it][A[A[A[A[A





715it [30:52,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 714 Batch Loss 1.2398 
Time 2.458773374557495 








 30%|██▉       | 716/2416 [30:54<1:09:54,  2.47s/it][A[A[A[A[A





716it [30:54,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 715 Batch Loss 1.4647 
Time 2.422630786895752 








 30%|██▉       | 717/2416 [30:57<1:10:05,  2.48s/it][A[A[A[A[A





717it [30:57,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 716 Batch Loss 1.5075 
Time 2.4788057804107666 








 30%|██▉       | 718/2416 [30:59<1:09:52,  2.47s/it][A[A[A[A[A





718it [30:59,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 717 Batch Loss 1.3380 
Time 2.4473392963409424 








 30%|██▉       | 719/2416 [31:02<1:10:12,  2.48s/it][A[A[A[A[A





719it [31:02,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 718 Batch Loss 1.2591 
Time 2.5043694972991943 








 30%|██▉       | 720/2416 [31:05<1:14:45,  2.64s/it][A[A[A[A[A





720it [31:05,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 719 Batch Loss 1.3047 
Time 3.0137453079223633 








 30%|██▉       | 721/2416 [31:07<1:14:08,  2.62s/it][A[A[A[A[A





721it [31:07,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 720 Batch Loss 1.4612 
Time 2.5674655437469482 








 30%|██▉       | 722/2416 [31:10<1:12:57,  2.58s/it][A[A[A[A[A





722it [31:10,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 721 Batch Loss 1.4244 
Time 2.4740254878997803 








 30%|██▉       | 723/2416 [31:12<1:11:57,  2.55s/it][A[A[A[A[A





723it [31:12,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 722 Batch Loss 1.3456 
Time 2.4564335346221924 








 30%|██▉       | 724/2416 [31:15<1:11:23,  2.53s/it][A[A[A[A[A





724it [31:15,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 723 Batch Loss 1.5064 
Time 2.472390651702881 








 30%|███       | 725/2416 [31:17<1:11:00,  2.52s/it][A[A[A[A[A





725it [31:17,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 724 Batch Loss 1.2191 
Time 2.4780526161193848 








 30%|███       | 726/2416 [31:20<1:10:41,  2.51s/it][A[A[A[A[A





726it [31:20,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 725 Batch Loss 1.4338 
Time 2.474963903427124 








 30%|███       | 727/2416 [31:22<1:10:31,  2.51s/it][A[A[A[A[A





727it [31:22,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 726 Batch Loss 1.2950 
Time 2.4820163249969482 








 30%|███       | 728/2416 [31:25<1:10:27,  2.50s/it][A[A[A[A[A





728it [31:25,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 727 Batch Loss 1.2043 
Time 2.495943307876587 








 30%|███       | 729/2416 [31:27<1:10:10,  2.50s/it][A[A[A[A[A





729it [31:27,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 728 Batch Loss 1.2124 
Time 2.4583566188812256 








 30%|███       | 730/2416 [31:30<1:10:12,  2.50s/it][A[A[A[A[A





730it [31:30,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 729 Batch Loss 1.2068 
Time 2.493643045425415 








 30%|███       | 731/2416 [31:32<1:13:58,  2.63s/it][A[A[A[A[A





731it [31:32,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 730 Batch Loss 1.4287 
Time 2.937633514404297 








 30%|███       | 732/2416 [31:35<1:12:37,  2.59s/it][A[A[A[A[A





732it [31:35,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 731 Batch Loss 1.1399 
Time 2.4701099395751953 








 30%|███       | 733/2416 [31:37<1:11:49,  2.56s/it][A[A[A[A[A





733it [31:37,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 732 Batch Loss 1.1832 
Time 2.48638916015625 








 30%|███       | 734/2416 [31:40<1:11:08,  2.54s/it][A[A[A[A[A





734it [31:40,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 733 Batch Loss 1.5503 
Time 2.4664700031280518 








 30%|███       | 735/2416 [31:42<1:10:30,  2.52s/it][A[A[A[A[A





735it [31:42,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 734 Batch Loss 1.0541 
Time 2.4572596549987793 








 30%|███       | 736/2416 [31:45<1:10:08,  2.51s/it][A[A[A[A[A





736it [31:45,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 735 Batch Loss 1.4507 
Time 2.4644856452941895 








 31%|███       | 737/2416 [31:47<1:09:37,  2.49s/it][A[A[A[A[A





737it [31:47,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 736 Batch Loss 1.3889 
Time 2.4412949085235596 








 31%|███       | 738/2416 [31:50<1:09:27,  2.48s/it][A[A[A[A[A





738it [31:50,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 737 Batch Loss 1.4303 
Time 2.4658334255218506 








 31%|███       | 739/2416 [31:52<1:09:25,  2.48s/it][A[A[A[A[A





739it [31:52,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 738 Batch Loss 1.1882 
Time 2.468423843383789 








 31%|███       | 740/2416 [31:55<1:09:31,  2.49s/it][A[A[A[A[A





740it [31:55,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 739 Batch Loss 1.2227 
Time 2.493107557296753 








 31%|███       | 741/2416 [31:58<1:13:26,  2.63s/it][A[A[A[A[A





741it [31:58,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 740 Batch Loss 1.1799 
Time 2.9480350017547607 








 31%|███       | 742/2416 [32:00<1:12:40,  2.61s/it][A[A[A[A[A





742it [32:00,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 741 Batch Loss 1.1977 
Time 2.5383481979370117 








 31%|███       | 743/2416 [32:03<1:11:53,  2.58s/it][A[A[A[A[A





743it [32:03,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 742 Batch Loss 1.4739 
Time 2.504671812057495 








 31%|███       | 744/2416 [32:05<1:11:07,  2.55s/it][A[A[A[A[A





744it [32:05,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 743 Batch Loss 1.5974 
Time 2.477431535720825 








 31%|███       | 745/2416 [32:08<1:10:29,  2.53s/it][A[A[A[A[A





745it [32:08,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 744 Batch Loss 1.0784 
Time 2.4727609157562256 








 31%|███       | 746/2416 [32:10<1:10:11,  2.52s/it][A[A[A[A[A





746it [32:10,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 745 Batch Loss 1.3321 
Time 2.4871273040771484 








 31%|███       | 747/2416 [32:13<1:09:57,  2.52s/it][A[A[A[A[A





747it [32:13,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 746 Batch Loss 1.2563 
Time 2.4848709106445312 








 31%|███       | 748/2416 [32:15<1:09:55,  2.52s/it][A[A[A[A[A





748it [32:15,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 747 Batch Loss 1.2154 
Time 2.502974033355713 








 31%|███       | 749/2416 [32:18<1:09:50,  2.51s/it][A[A[A[A[A





749it [32:18,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 748 Batch Loss 1.1367 
Time 2.492112636566162 








 31%|███       | 750/2416 [32:20<1:09:28,  2.50s/it][A[A[A[A[A





750it [32:20,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 749 Batch Loss 1.4348 
Time 2.4686970710754395 








 31%|███       | 751/2416 [32:23<1:09:24,  2.50s/it][A[A[A[A[A





751it [32:23,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 750 Batch Loss 1.4195 
Time 2.479909658432007 








 31%|███       | 752/2416 [32:26<1:13:15,  2.64s/it][A[A[A[A[A





752it [32:26,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 751 Batch Loss 1.3261 
Time 2.9539287090301514 








 31%|███       | 753/2416 [32:28<1:12:22,  2.61s/it][A[A[A[A[A





753it [32:28,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 752 Batch Loss 1.3141 
Time 2.533555269241333 








 31%|███       | 754/2416 [32:31<1:11:55,  2.60s/it][A[A[A[A[A





754it [32:31,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 753 Batch Loss 1.1331 
Time 2.549194097518921 








 31%|███▏      | 755/2416 [32:33<1:10:58,  2.56s/it][A[A[A[A[A





755it [32:33,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 754 Batch Loss 1.1903 
Time 2.4774599075317383 








 31%|███▏      | 756/2416 [32:36<1:10:14,  2.54s/it][A[A[A[A[A





756it [32:36,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 755 Batch Loss 1.5205 
Time 2.4659030437469482 








 31%|███▏      | 757/2416 [32:38<1:10:12,  2.54s/it][A[A[A[A[A





757it [32:38,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 756 Batch Loss 1.2673 
Time 2.522027015686035 








 31%|███▏      | 758/2416 [32:41<1:10:02,  2.53s/it][A[A[A[A[A





758it [32:41,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 757 Batch Loss 1.3784 
Time 2.511373519897461 








 31%|███▏      | 759/2416 [32:43<1:10:20,  2.55s/it][A[A[A[A[A





759it [32:43,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 758 Batch Loss 1.2246 
Time 2.5701332092285156 








 31%|███▏      | 760/2416 [32:46<1:10:08,  2.54s/it][A[A[A[A[A





760it [32:46,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 759 Batch Loss 1.0961 
Time 2.5133113861083984 








 31%|███▏      | 761/2416 [32:49<1:10:09,  2.54s/it][A[A[A[A[A





761it [32:49,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 760 Batch Loss 1.3178 
Time 2.534510850906372 








 32%|███▏      | 762/2416 [32:52<1:13:56,  2.68s/it][A[A[A[A[A





762it [32:52,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 761 Batch Loss 1.1597 
Time 2.990886926651001 








 32%|███▏      | 763/2416 [32:54<1:12:36,  2.64s/it][A[A[A[A[A





763it [32:54,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 762 Batch Loss 1.0809 
Time 2.5205628871917725 








 32%|███▏      | 764/2416 [32:57<1:11:37,  2.60s/it][A[A[A[A[A





764it [32:57,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 763 Batch Loss 1.2180 
Time 2.5111682415008545 








 32%|███▏      | 765/2416 [32:59<1:11:03,  2.58s/it][A[A[A[A[A





765it [32:59,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 764 Batch Loss 1.2509 
Time 2.523742914199829 








 32%|███▏      | 766/2416 [33:02<1:10:41,  2.57s/it][A[A[A[A[A





766it [33:02,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 765 Batch Loss 1.3163 
Time 2.5344343185424805 








 32%|███▏      | 767/2416 [33:04<1:10:07,  2.55s/it][A[A[A[A[A





767it [33:04,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 766 Batch Loss 1.6633 
Time 2.4962286949157715 








 32%|███▏      | 768/2416 [33:07<1:09:48,  2.54s/it][A[A[A[A[A





768it [33:07,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 767 Batch Loss 1.3152 
Time 2.5014305114746094 








 32%|███▏      | 769/2416 [33:09<1:09:21,  2.53s/it][A[A[A[A[A





769it [33:09,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 768 Batch Loss 1.2973 
Time 2.481034278869629 








 32%|███▏      | 770/2416 [33:12<1:08:45,  2.51s/it][A[A[A[A[A





770it [33:12,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 769 Batch Loss 1.3915 
Time 2.448007822036743 








 32%|███▏      | 771/2416 [33:14<1:08:36,  2.50s/it][A[A[A[A[A





771it [33:14,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 770 Batch Loss 1.1910 
Time 2.475269317626953 








 32%|███▏      | 772/2416 [33:17<1:08:24,  2.50s/it][A[A[A[A[A





772it [33:17,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 771 Batch Loss 1.4973 
Time 2.466165542602539 








 32%|███▏      | 773/2416 [33:20<1:11:57,  2.63s/it][A[A[A[A[A





773it [33:20,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 772 Batch Loss 1.2619 
Time 2.926724433898926 








 32%|███▏      | 774/2416 [33:22<1:10:46,  2.59s/it][A[A[A[A[A





774it [33:22,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 773 Batch Loss 1.4045 
Time 2.478278398513794 








 32%|███▏      | 775/2416 [33:25<1:09:49,  2.55s/it][A[A[A[A[A





775it [33:25,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 774 Batch Loss 1.4440 
Time 2.464568614959717 








 32%|███▏      | 776/2416 [33:27<1:09:21,  2.54s/it][A[A[A[A[A





776it [33:27,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 775 Batch Loss 1.4282 
Time 2.4888100624084473 








 32%|███▏      | 777/2416 [33:30<1:09:03,  2.53s/it][A[A[A[A[A





777it [33:30,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 776 Batch Loss 1.2176 
Time 2.4916250705718994 








 32%|███▏      | 778/2416 [33:32<1:09:39,  2.55s/it][A[A[A[A[A





778it [33:32,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 777 Batch Loss 1.4271 
Time 2.5935416221618652 








 32%|███▏      | 779/2416 [33:35<1:10:03,  2.57s/it][A[A[A[A[A





779it [33:35,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 778 Batch Loss 1.0971 
Time 2.591963529586792 








 32%|███▏      | 780/2416 [33:37<1:10:01,  2.57s/it][A[A[A[A[A





780it [33:37,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 779 Batch Loss 1.3715 
Time 2.5535030364990234 








 32%|███▏      | 781/2416 [33:40<1:10:12,  2.58s/it][A[A[A[A[A





781it [33:40,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 780 Batch Loss 1.2315 
Time 2.5855860710144043 








 32%|███▏      | 782/2416 [33:42<1:09:40,  2.56s/it][A[A[A[A[A





782it [33:42,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 781 Batch Loss 1.1060 
Time 2.500760078430176 








 32%|███▏      | 783/2416 [33:45<1:09:08,  2.54s/it][A[A[A[A[A





783it [33:45,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 782 Batch Loss 1.2127 
Time 2.4889371395111084 








 32%|███▏      | 784/2416 [33:48<1:12:18,  2.66s/it][A[A[A[A[A





784it [33:48,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 783 Batch Loss 1.3247 
Time 2.922229290008545 








 32%|███▏      | 785/2416 [33:50<1:11:08,  2.62s/it][A[A[A[A[A





785it [33:50,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 784 Batch Loss 1.2114 
Time 2.5051190853118896 








 33%|███▎      | 786/2416 [33:53<1:10:07,  2.58s/it][A[A[A[A[A





786it [33:53,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 785 Batch Loss 1.6448 
Time 2.4817771911621094 








 33%|███▎      | 787/2416 [33:55<1:09:38,  2.56s/it][A[A[A[A[A





787it [33:55,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 786 Batch Loss 1.3552 
Time 2.509481191635132 








 33%|███▎      | 788/2416 [33:58<1:09:17,  2.55s/it][A[A[A[A[A





788it [33:58,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 787 Batch Loss 1.4519 
Time 2.5096592903137207 








 33%|███▎      | 789/2416 [34:00<1:08:59,  2.54s/it][A[A[A[A[A





789it [34:00,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 788 Batch Loss 1.4356 
Time 2.5063509941101074 








 33%|███▎      | 790/2416 [34:03<1:09:01,  2.55s/it][A[A[A[A[A





790it [34:03,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 789 Batch Loss 1.4403 
Time 2.5375125408172607 








 33%|███▎      | 791/2416 [34:06<1:08:52,  2.54s/it][A[A[A[A[A





791it [34:06,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 790 Batch Loss 1.2076 
Time 2.5185306072235107 








 33%|███▎      | 792/2416 [34:08<1:08:45,  2.54s/it][A[A[A[A[A





792it [34:08,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 791 Batch Loss 1.0605 
Time 2.5272738933563232 








 33%|███▎      | 793/2416 [34:11<1:08:47,  2.54s/it][A[A[A[A[A





793it [34:11,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 792 Batch Loss 1.2140 
Time 2.5378634929656982 








 33%|███▎      | 794/2416 [34:13<1:08:31,  2.53s/it][A[A[A[A[A





794it [34:13,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 793 Batch Loss 1.1559 
Time 2.5053274631500244 








 33%|███▎      | 795/2416 [34:16<1:12:19,  2.68s/it][A[A[A[A[A





795it [34:16,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 794 Batch Loss 1.3781 
Time 2.9923880100250244 








 33%|███▎      | 796/2416 [34:19<1:11:23,  2.64s/it][A[A[A[A[A





796it [34:19,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 795 Batch Loss 1.3848 
Time 2.5569746494293213 








 33%|███▎      | 797/2416 [34:21<1:10:13,  2.60s/it][A[A[A[A[A





797it [34:21,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 796 Batch Loss 1.3831 
Time 2.4917426109313965 








 33%|███▎      | 798/2416 [34:24<1:09:46,  2.59s/it][A[A[A[A[A





798it [34:24,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 797 Batch Loss 0.9880 
Time 2.538626194000244 








 33%|███▎      | 799/2416 [34:26<1:09:39,  2.59s/it][A[A[A[A[A





799it [34:26,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 798 Batch Loss 1.4639 
Time 2.5632102489471436 








 33%|███▎      | 800/2416 [34:29<1:09:19,  2.57s/it][A[A[A[A[A





800it [34:29,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 799 Batch Loss 1.6451 
Time 2.534827709197998 








 33%|███▎      | 801/2416 [34:31<1:09:02,  2.57s/it][A[A[A[A[A





801it [34:31,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 800 Batch Loss 1.2935 
Time 2.5311954021453857 








 33%|███▎      | 802/2416 [34:34<1:08:44,  2.56s/it][A[A[A[A[A





802it [34:34,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 801 Batch Loss 1.2710 
Time 2.5221447944641113 








 33%|███▎      | 803/2416 [34:37<1:08:51,  2.56s/it][A[A[A[A[A





803it [34:37,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 802 Batch Loss 1.4453 
Time 2.5638790130615234 








 33%|███▎      | 804/2416 [34:39<1:08:49,  2.56s/it][A[A[A[A[A





804it [34:39,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 803 Batch Loss 1.4820 
Time 2.54748272895813 








 33%|███▎      | 805/2416 [34:42<1:12:27,  2.70s/it][A[A[A[A[A





805it [34:42,  2.70s/it][A[A[A[A[A[A

Epoch 0 Batch 804 Batch Loss 1.1967 
Time 3.0113525390625 








 33%|███▎      | 806/2416 [34:45<1:11:20,  2.66s/it][A[A[A[A[A





806it [34:45,  2.66s/it][A[A[A[A[A[A

Epoch 0 Batch 805 Batch Loss 1.2893 
Time 2.548632860183716 








 33%|███▎      | 807/2416 [34:47<1:10:33,  2.63s/it][A[A[A[A[A





807it [34:47,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 806 Batch Loss 1.5824 
Time 2.5553395748138428 








 33%|███▎      | 808/2416 [34:50<1:09:50,  2.61s/it][A[A[A[A[A





808it [34:50,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 807 Batch Loss 1.5208 
Time 2.5375382900238037 








 33%|███▎      | 809/2416 [34:52<1:09:15,  2.59s/it][A[A[A[A[A





809it [34:52,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 808 Batch Loss 1.5756 
Time 2.525850534439087 








 34%|███▎      | 810/2416 [34:55<1:09:04,  2.58s/it][A[A[A[A[A





810it [34:55,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 809 Batch Loss 1.1225 
Time 2.559032678604126 








 34%|███▎      | 811/2416 [34:57<1:08:43,  2.57s/it][A[A[A[A[A





811it [34:57,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 810 Batch Loss 1.3982 
Time 2.535639762878418 








 34%|███▎      | 812/2416 [35:00<1:08:31,  2.56s/it][A[A[A[A[A





812it [35:00,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 811 Batch Loss 1.2234 
Time 2.539257287979126 








 34%|███▎      | 813/2416 [35:03<1:08:34,  2.57s/it][A[A[A[A[A





813it [35:03,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 812 Batch Loss 1.1152 
Time 2.557265281677246 








 34%|███▎      | 814/2416 [35:05<1:08:26,  2.56s/it][A[A[A[A[A





814it [35:05,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 813 Batch Loss 1.2457 
Time 2.5478670597076416 








 34%|███▎      | 815/2416 [35:08<1:11:49,  2.69s/it][A[A[A[A[A





815it [35:08,  2.69s/it][A[A[A[A[A[A

Epoch 0 Batch 814 Batch Loss 1.3647 
Time 2.9770278930664062 








 34%|███▍      | 816/2416 [35:11<1:10:26,  2.64s/it][A[A[A[A[A





816it [35:11,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 815 Batch Loss 1.1768 
Time 2.5094099044799805 








 34%|███▍      | 817/2416 [35:13<1:09:29,  2.61s/it][A[A[A[A[A





817it [35:13,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 816 Batch Loss 1.4109 
Time 2.518007755279541 








 34%|███▍      | 818/2416 [35:16<1:08:41,  2.58s/it][A[A[A[A[A





818it [35:16,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 817 Batch Loss 1.4212 
Time 2.4966158866882324 








 34%|███▍      | 819/2416 [35:18<1:08:05,  2.56s/it][A[A[A[A[A





819it [35:18,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 818 Batch Loss 1.3919 
Time 2.4955413341522217 








 34%|███▍      | 820/2416 [35:21<1:07:43,  2.55s/it][A[A[A[A[A





820it [35:21,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 819 Batch Loss 1.4889 
Time 2.5081748962402344 








 34%|███▍      | 821/2416 [35:23<1:07:45,  2.55s/it][A[A[A[A[A





821it [35:23,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 820 Batch Loss 1.3245 
Time 2.548631429672241 








 34%|███▍      | 822/2416 [35:26<1:07:35,  2.54s/it][A[A[A[A[A





822it [35:26,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 821 Batch Loss 1.5495 
Time 2.5220344066619873 








 34%|███▍      | 823/2416 [35:28<1:07:15,  2.53s/it][A[A[A[A[A





823it [35:28,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 822 Batch Loss 1.0811 
Time 2.4965970516204834 








 34%|███▍      | 824/2416 [35:31<1:07:10,  2.53s/it][A[A[A[A[A





824it [35:31,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 823 Batch Loss 1.2250 
Time 2.515784978866577 








 34%|███▍      | 825/2416 [35:33<1:07:32,  2.55s/it][A[A[A[A[A





825it [35:33,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 824 Batch Loss 1.3289 
Time 2.5674357414245605 








 34%|███▍      | 826/2416 [35:36<1:10:44,  2.67s/it][A[A[A[A[A





826it [35:36,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 825 Batch Loss 1.2981 
Time 2.9425406455993652 








 34%|███▍      | 827/2416 [35:39<1:09:55,  2.64s/it][A[A[A[A[A





827it [35:39,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 826 Batch Loss 1.2401 
Time 2.5563783645629883 








 34%|███▍      | 828/2416 [35:42<1:09:22,  2.62s/it][A[A[A[A[A





828it [35:42,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 827 Batch Loss 1.2404 
Time 2.564241886138916 








 34%|███▍      | 829/2416 [35:44<1:08:41,  2.60s/it][A[A[A[A[A





829it [35:44,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 828 Batch Loss 1.2889 
Time 2.523751974105835 








 34%|███▍      | 830/2416 [35:47<1:08:20,  2.59s/it][A[A[A[A[A





830it [35:47,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 829 Batch Loss 1.1734 
Time 2.543985366821289 








 34%|███▍      | 831/2416 [35:49<1:08:07,  2.58s/it][A[A[A[A[A





831it [35:49,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 830 Batch Loss 1.4276 
Time 2.5558485984802246 








 34%|███▍      | 832/2416 [35:52<1:07:44,  2.57s/it][A[A[A[A[A





832it [35:52,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 831 Batch Loss 1.2313 
Time 2.5176072120666504 








 34%|███▍      | 833/2416 [35:54<1:07:37,  2.56s/it][A[A[A[A[A





833it [35:54,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 832 Batch Loss 1.3766 
Time 2.544487237930298 








 35%|███▍      | 834/2416 [35:57<1:07:17,  2.55s/it][A[A[A[A[A





834it [35:57,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 833 Batch Loss 1.4022 
Time 2.5146446228027344 








 35%|███▍      | 835/2416 [35:59<1:07:01,  2.54s/it][A[A[A[A[A





835it [35:59,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 834 Batch Loss 1.6608 
Time 2.5149827003479004 








 35%|███▍      | 836/2416 [36:02<1:06:45,  2.54s/it][A[A[A[A[A





836it [36:02,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 835 Batch Loss 1.4261 
Time 2.5042426586151123 








 35%|███▍      | 837/2416 [36:05<1:10:36,  2.68s/it][A[A[A[A[A





837it [36:05,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 836 Batch Loss 1.6323 
Time 3.0164451599121094 








 35%|███▍      | 838/2416 [36:07<1:09:10,  2.63s/it][A[A[A[A[A





838it [36:07,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 837 Batch Loss 1.6650 
Time 2.492595672607422 








 35%|███▍      | 839/2416 [36:10<1:08:47,  2.62s/it][A[A[A[A[A





839it [36:10,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 838 Batch Loss 0.9542 
Time 2.576178789138794 








 35%|███▍      | 840/2416 [36:13<1:08:37,  2.61s/it][A[A[A[A[A





840it [36:13,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 839 Batch Loss 1.2231 
Time 2.584662437438965 








 35%|███▍      | 841/2416 [36:15<1:07:43,  2.58s/it][A[A[A[A[A





841it [36:15,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 840 Batch Loss 1.3260 
Time 2.488722562789917 








 35%|███▍      | 842/2416 [36:18<1:07:06,  2.56s/it][A[A[A[A[A





842it [36:18,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 841 Batch Loss 1.4086 
Time 2.4974966049194336 








 35%|███▍      | 843/2416 [36:20<1:06:40,  2.54s/it][A[A[A[A[A





843it [36:20,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 842 Batch Loss 1.5661 
Time 2.497169017791748 








 35%|███▍      | 844/2416 [36:23<1:06:23,  2.53s/it][A[A[A[A[A





844it [36:23,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 843 Batch Loss 1.3644 
Time 2.501060962677002 








 35%|███▍      | 845/2416 [36:25<1:06:17,  2.53s/it][A[A[A[A[A





845it [36:25,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 844 Batch Loss 1.0431 
Time 2.512288808822632 








 35%|███▌      | 846/2416 [36:28<1:09:48,  2.67s/it][A[A[A[A[A





846it [36:28,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 845 Batch Loss 1.4195 
Time 2.9734950065612793 








 35%|███▌      | 847/2416 [36:31<1:08:43,  2.63s/it][A[A[A[A[A





847it [36:31,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 846 Batch Loss 1.3488 
Time 2.52388072013855 








 35%|███▌      | 848/2416 [36:33<1:07:57,  2.60s/it][A[A[A[A[A





848it [36:33,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 847 Batch Loss 1.3612 
Time 2.519479751586914 








 35%|███▌      | 849/2416 [36:36<1:07:17,  2.58s/it][A[A[A[A[A





849it [36:36,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 848 Batch Loss 1.3589 
Time 2.5162477493286133 








 35%|███▌      | 850/2416 [36:38<1:06:47,  2.56s/it][A[A[A[A[A





850it [36:38,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 849 Batch Loss 1.3460 
Time 2.5025455951690674 








 35%|███▌      | 851/2416 [36:41<1:06:20,  2.54s/it][A[A[A[A[A





851it [36:41,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 850 Batch Loss 1.5167 
Time 2.497403383255005 








 35%|███▌      | 852/2416 [36:43<1:05:53,  2.53s/it][A[A[A[A[A





852it [36:43,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 851 Batch Loss 1.1104 
Time 2.4753856658935547 








 35%|███▌      | 853/2416 [36:46<1:05:48,  2.53s/it][A[A[A[A[A





853it [36:46,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 852 Batch Loss 1.3575 
Time 2.5079665184020996 








 35%|███▌      | 854/2416 [36:48<1:05:24,  2.51s/it][A[A[A[A[A





854it [36:48,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 853 Batch Loss 1.4684 
Time 2.470698356628418 








 35%|███▌      | 855/2416 [36:51<1:05:10,  2.51s/it][A[A[A[A[A





855it [36:51,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 854 Batch Loss 1.5121 
Time 2.4818925857543945 








 35%|███▌      | 856/2416 [36:53<1:05:02,  2.50s/it][A[A[A[A[A





856it [36:53,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 855 Batch Loss 1.3644 
Time 2.4724795818328857 








 35%|███▌      | 857/2416 [36:56<1:08:21,  2.63s/it][A[A[A[A[A





857it [36:56,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 856 Batch Loss 1.1189 
Time 2.9186630249023438 








 36%|███▌      | 858/2416 [36:59<1:07:12,  2.59s/it][A[A[A[A[A





858it [36:59,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 857 Batch Loss 1.3340 
Time 2.475966453552246 








 36%|███▌      | 859/2416 [37:01<1:06:23,  2.56s/it][A[A[A[A[A





859it [37:01,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 858 Batch Loss 1.3130 
Time 2.478419065475464 








 36%|███▌      | 860/2416 [37:04<1:05:45,  2.54s/it][A[A[A[A[A





860it [37:04,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 859 Batch Loss 1.5119 
Time 2.4678611755371094 








 36%|███▌      | 861/2416 [37:06<1:05:20,  2.52s/it][A[A[A[A[A





861it [37:06,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 860 Batch Loss 1.0813 
Time 2.475860595703125 








 36%|███▌      | 862/2416 [37:09<1:05:04,  2.51s/it][A[A[A[A[A





862it [37:09,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 861 Batch Loss 1.1662 
Time 2.4764585494995117 








 36%|███▌      | 863/2416 [37:11<1:04:43,  2.50s/it][A[A[A[A[A





863it [37:11,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 862 Batch Loss 1.1341 
Time 2.4568421840667725 








 36%|███▌      | 864/2416 [37:14<1:04:38,  2.50s/it][A[A[A[A[A





864it [37:14,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 863 Batch Loss 1.2883 
Time 2.4863264560699463 








 36%|███▌      | 865/2416 [37:16<1:04:17,  2.49s/it][A[A[A[A[A





865it [37:16,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 864 Batch Loss 1.4860 
Time 2.4466679096221924 








 36%|███▌      | 866/2416 [37:19<1:04:11,  2.49s/it][A[A[A[A[A





866it [37:18,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 865 Batch Loss 1.1410 
Time 2.4719648361206055 








 36%|███▌      | 867/2416 [37:21<1:04:08,  2.48s/it][A[A[A[A[A





867it [37:21,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 866 Batch Loss 1.4824 
Time 2.472827672958374 








 36%|███▌      | 868/2416 [37:24<1:07:45,  2.63s/it][A[A[A[A[A





868it [37:24,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 867 Batch Loss 1.1972 
Time 2.9462132453918457 








 36%|███▌      | 869/2416 [37:26<1:06:35,  2.58s/it][A[A[A[A[A





869it [37:26,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 868 Batch Loss 1.7447 
Time 2.4681103229522705 








 36%|███▌      | 870/2416 [37:29<1:05:38,  2.55s/it][A[A[A[A[A





870it [37:29,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 869 Batch Loss 1.2035 
Time 2.4502108097076416 








 36%|███▌      | 871/2416 [37:31<1:05:08,  2.53s/it][A[A[A[A[A





871it [37:31,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 870 Batch Loss 1.3815 
Time 2.4720089435577393 








 36%|███▌      | 872/2416 [37:34<1:05:01,  2.53s/it][A[A[A[A[A





872it [37:34,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 871 Batch Loss 1.6121 
Time 2.5111000537872314 








 36%|███▌      | 873/2416 [37:36<1:04:21,  2.50s/it][A[A[A[A[A





873it [37:36,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 872 Batch Loss 1.2938 
Time 2.4318037033081055 








 36%|███▌      | 874/2416 [37:39<1:04:00,  2.49s/it][A[A[A[A[A





874it [37:39,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 873 Batch Loss 1.0703 
Time 2.4473214149475098 








 36%|███▌      | 875/2416 [37:41<1:03:50,  2.49s/it][A[A[A[A[A





875it [37:41,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 874 Batch Loss 1.2782 
Time 2.4630393981933594 








 36%|███▋      | 876/2416 [37:44<1:03:30,  2.47s/it][A[A[A[A[A





876it [37:44,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 875 Batch Loss 1.2599 
Time 2.434718608856201 








 36%|███▋      | 877/2416 [37:46<1:03:16,  2.47s/it][A[A[A[A[A





877it [37:46,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 876 Batch Loss 1.2409 
Time 2.441898822784424 








 36%|███▋      | 878/2416 [37:49<1:03:24,  2.47s/it][A[A[A[A[A





878it [37:49,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 877 Batch Loss 1.3870 
Time 2.4733173847198486 








 36%|███▋      | 879/2416 [37:52<1:06:45,  2.61s/it][A[A[A[A[A





879it [37:52,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 878 Batch Loss 1.5531 
Time 2.9073591232299805 








 36%|███▋      | 880/2416 [37:54<1:05:28,  2.56s/it][A[A[A[A[A





880it [37:54,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 879 Batch Loss 1.3456 
Time 2.427335739135742 








 36%|███▋      | 881/2416 [37:57<1:05:06,  2.54s/it][A[A[A[A[A





881it [37:57,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 880 Batch Loss 1.2076 
Time 2.5000815391540527 








 37%|███▋      | 882/2416 [37:59<1:04:49,  2.54s/it][A[A[A[A[A





882it [37:59,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 881 Batch Loss 1.1860 
Time 2.507194757461548 








 37%|███▋      | 883/2416 [38:02<1:04:17,  2.52s/it][A[A[A[A[A





883it [38:02,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 882 Batch Loss 1.3418 
Time 2.4600276947021484 








 37%|███▋      | 884/2416 [38:04<1:04:17,  2.52s/it][A[A[A[A[A





884it [38:04,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 883 Batch Loss 1.4545 
Time 2.5039470195770264 








 37%|███▋      | 885/2416 [38:07<1:04:08,  2.51s/it][A[A[A[A[A





885it [38:07,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 884 Batch Loss 1.4247 
Time 2.492893695831299 








 37%|███▋      | 886/2416 [38:09<1:04:13,  2.52s/it][A[A[A[A[A





886it [38:09,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 885 Batch Loss 1.1807 
Time 2.518785238265991 








 37%|███▋      | 887/2416 [38:12<1:03:47,  2.50s/it][A[A[A[A[A





887it [38:12,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 886 Batch Loss 1.2281 
Time 2.455345869064331 








 37%|███▋      | 888/2416 [38:14<1:03:42,  2.50s/it][A[A[A[A[A





888it [38:14,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 887 Batch Loss 1.3731 
Time 2.4893786907196045 








 37%|███▋      | 889/2416 [38:17<1:07:04,  2.64s/it][A[A[A[A[A





889it [38:17,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 888 Batch Loss 1.5067 
Time 2.9307374954223633 








 37%|███▋      | 890/2416 [38:20<1:05:55,  2.59s/it][A[A[A[A[A





890it [38:19,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 889 Batch Loss 1.3070 
Time 2.482177495956421 








 37%|███▋      | 891/2416 [38:22<1:05:05,  2.56s/it][A[A[A[A[A





891it [38:22,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 890 Batch Loss 1.3416 
Time 2.4732186794281006 








 37%|███▋      | 892/2416 [38:25<1:04:37,  2.54s/it][A[A[A[A[A





892it [38:24,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 891 Batch Loss 1.1190 
Time 2.4864420890808105 








 37%|███▋      | 893/2416 [38:27<1:04:03,  2.52s/it][A[A[A[A[A





893it [38:27,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 892 Batch Loss 1.5147 
Time 2.465559720993042 








 37%|███▋      | 894/2416 [38:29<1:03:54,  2.52s/it][A[A[A[A[A





894it [38:29,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 893 Batch Loss 1.3849 
Time 2.4979546070098877 








 37%|███▋      | 895/2416 [38:32<1:03:41,  2.51s/it][A[A[A[A[A





895it [38:32,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 894 Batch Loss 1.1663 
Time 2.4893295764923096 








 37%|███▋      | 896/2416 [38:34<1:03:28,  2.51s/it][A[A[A[A[A





896it [38:34,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 895 Batch Loss 1.4982 
Time 2.4758412837982178 








 37%|███▋      | 897/2416 [38:37<1:03:10,  2.50s/it][A[A[A[A[A





897it [38:37,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 896 Batch Loss 1.3448 
Time 2.4578192234039307 








 37%|███▋      | 898/2416 [38:39<1:03:14,  2.50s/it][A[A[A[A[A





898it [38:39,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 897 Batch Loss 1.1366 
Time 2.4943976402282715 








 37%|███▋      | 899/2416 [38:42<1:03:24,  2.51s/it][A[A[A[A[A





899it [38:42,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 898 Batch Loss 1.1856 
Time 2.5146992206573486 








 37%|███▋      | 900/2416 [38:45<1:07:22,  2.67s/it][A[A[A[A[A





900it [38:45,  2.67s/it][A[A[A[A[A[A

Epoch 0 Batch 899 Batch Loss 1.2936 
Time 3.0275864601135254 








 37%|███▋      | 901/2416 [38:48<1:06:34,  2.64s/it][A[A[A[A[A





901it [38:48,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 900 Batch Loss 1.1779 
Time 2.5574800968170166 








 37%|███▋      | 902/2416 [38:50<1:05:51,  2.61s/it][A[A[A[A[A





902it [38:50,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 901 Batch Loss 1.1432 
Time 2.533703088760376 








 37%|███▋      | 903/2416 [38:53<1:05:06,  2.58s/it][A[A[A[A[A





903it [38:53,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 902 Batch Loss 1.4379 
Time 2.5030927658081055 








 37%|███▋      | 904/2416 [38:55<1:04:30,  2.56s/it][A[A[A[A[A





904it [38:55,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 903 Batch Loss 1.5083 
Time 2.4956459999084473 








 37%|███▋      | 905/2416 [38:58<1:03:55,  2.54s/it][A[A[A[A[A





905it [38:58,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 904 Batch Loss 1.1212 
Time 2.4799246788024902 








 38%|███▊      | 906/2416 [39:00<1:03:22,  2.52s/it][A[A[A[A[A





906it [39:00,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 905 Batch Loss 1.2807 
Time 2.457655668258667 








 38%|███▊      | 907/2416 [39:03<1:03:09,  2.51s/it][A[A[A[A[A





907it [39:03,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 906 Batch Loss 1.3398 
Time 2.4837679862976074 








 38%|███▊      | 908/2416 [39:05<1:02:54,  2.50s/it][A[A[A[A[A





908it [39:05,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 907 Batch Loss 1.4511 
Time 2.4783096313476562 








 38%|███▊      | 909/2416 [39:08<1:02:42,  2.50s/it][A[A[A[A[A





909it [39:08,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 908 Batch Loss 1.4092 
Time 2.470146894454956 








 38%|███▊      | 910/2416 [39:11<1:06:00,  2.63s/it][A[A[A[A[A





910it [39:10,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 909 Batch Loss 1.6749 
Time 2.9262495040893555 








 38%|███▊      | 911/2416 [39:13<1:04:45,  2.58s/it][A[A[A[A[A





911it [39:13,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 910 Batch Loss 1.1824 
Time 2.4551429748535156 








 38%|███▊      | 912/2416 [39:15<1:03:59,  2.55s/it][A[A[A[A[A





912it [39:15,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 911 Batch Loss 1.4706 
Time 2.466944694519043 








 38%|███▊      | 913/2416 [39:18<1:03:30,  2.54s/it][A[A[A[A[A





913it [39:18,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 912 Batch Loss 1.3172 
Time 2.480163335800171 








 38%|███▊      | 914/2416 [39:20<1:03:09,  2.52s/it][A[A[A[A[A





914it [39:20,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 913 Batch Loss 1.4235 
Time 2.4849538803100586 








 38%|███▊      | 915/2416 [39:23<1:02:50,  2.51s/it][A[A[A[A[A





915it [39:23,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 914 Batch Loss 1.2252 
Time 2.475937843322754 








 38%|███▊      | 916/2416 [39:25<1:02:42,  2.51s/it][A[A[A[A[A





916it [39:25,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 915 Batch Loss 1.3648 
Time 2.489701986312866 








 38%|███▊      | 917/2416 [39:28<1:02:26,  2.50s/it][A[A[A[A[A





917it [39:28,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 916 Batch Loss 1.3061 
Time 2.4661471843719482 








 38%|███▊      | 918/2416 [39:30<1:02:27,  2.50s/it][A[A[A[A[A





918it [39:30,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 917 Batch Loss 0.9340 
Time 2.488349437713623 








 38%|███▊      | 919/2416 [39:33<1:02:19,  2.50s/it][A[A[A[A[A





919it [39:33,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 918 Batch Loss 1.1658 
Time 2.479140281677246 








 38%|███▊      | 920/2416 [39:35<1:02:25,  2.50s/it][A[A[A[A[A





920it [39:35,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 919 Batch Loss 1.4450 
Time 2.5078718662261963 








 38%|███▊      | 921/2416 [39:38<1:05:38,  2.63s/it][A[A[A[A[A





921it [39:38,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 920 Batch Loss 1.6373 
Time 2.9268269538879395 








 38%|███▊      | 922/2416 [39:41<1:04:25,  2.59s/it][A[A[A[A[A





922it [39:41,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 921 Batch Loss 1.1738 
Time 2.470895767211914 








 38%|███▊      | 923/2416 [39:43<1:03:35,  2.56s/it][A[A[A[A[A





923it [39:43,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 922 Batch Loss 1.4116 
Time 2.4641494750976562 








 38%|███▊      | 924/2416 [39:46<1:03:02,  2.53s/it][A[A[A[A[A





924it [39:46,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 923 Batch Loss 1.1478 
Time 2.4742913246154785 








 38%|███▊      | 925/2416 [39:48<1:02:37,  2.52s/it][A[A[A[A[A





925it [39:48,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 924 Batch Loss 1.3163 
Time 2.4789950847625732 








 38%|███▊      | 926/2416 [39:51<1:02:23,  2.51s/it][A[A[A[A[A





926it [39:51,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 925 Batch Loss 1.1965 
Time 2.480548620223999 








 38%|███▊      | 927/2416 [39:53<1:02:14,  2.51s/it][A[A[A[A[A





927it [39:53,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 926 Batch Loss 1.3768 
Time 2.4846718311309814 








 38%|███▊      | 928/2416 [39:56<1:02:01,  2.50s/it][A[A[A[A[A





928it [39:56,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 927 Batch Loss 1.1433 
Time 2.4732487201690674 








 38%|███▊      | 929/2416 [39:58<1:01:53,  2.50s/it][A[A[A[A[A





929it [39:58,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 928 Batch Loss 1.3788 
Time 2.4719138145446777 








 38%|███▊      | 930/2416 [40:01<1:01:31,  2.48s/it][A[A[A[A[A





930it [40:01,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 929 Batch Loss 1.4767 
Time 2.4441447257995605 








 39%|███▊      | 931/2416 [40:04<1:05:10,  2.63s/it][A[A[A[A[A





931it [40:04,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 930 Batch Loss 1.4068 
Time 2.9711692333221436 








 39%|███▊      | 932/2416 [40:06<1:03:58,  2.59s/it][A[A[A[A[A





932it [40:06,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 931 Batch Loss 1.3336 
Time 2.4656660556793213 








 39%|███▊      | 933/2416 [40:09<1:03:08,  2.55s/it][A[A[A[A[A





933it [40:09,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 932 Batch Loss 1.6632 
Time 2.4686474800109863 








 39%|███▊      | 934/2416 [40:11<1:02:42,  2.54s/it][A[A[A[A[A





934it [40:11,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 933 Batch Loss 1.0419 
Time 2.494884729385376 








 39%|███▊      | 935/2416 [40:14<1:02:18,  2.52s/it][A[A[A[A[A





935it [40:14,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 934 Batch Loss 1.3994 
Time 2.4802584648132324 








 39%|███▊      | 936/2416 [40:16<1:01:53,  2.51s/it][A[A[A[A[A





936it [40:16,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 935 Batch Loss 1.3209 
Time 2.460711717605591 








 39%|███▉      | 937/2416 [40:19<1:01:35,  2.50s/it][A[A[A[A[A





937it [40:19,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 936 Batch Loss 1.2298 
Time 2.4591991901397705 








 39%|███▉      | 938/2416 [40:21<1:01:24,  2.49s/it][A[A[A[A[A





938it [40:21,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 937 Batch Loss 1.3800 
Time 2.4689714908599854 








 39%|███▉      | 939/2416 [40:24<1:01:30,  2.50s/it][A[A[A[A[A





939it [40:24,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 938 Batch Loss 1.3125 
Time 2.4981496334075928 








 39%|███▉      | 940/2416 [40:26<1:01:19,  2.49s/it][A[A[A[A[A





940it [40:26,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 939 Batch Loss 1.2142 
Time 2.4728646278381348 








 39%|███▉      | 941/2416 [40:29<1:01:14,  2.49s/it][A[A[A[A[A





941it [40:29,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 940 Batch Loss 1.1740 
Time 2.4705631732940674 








 39%|███▉      | 942/2416 [40:32<1:04:37,  2.63s/it][A[A[A[A[A





942it [40:31,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 941 Batch Loss 1.2326 
Time 2.9398932456970215 








 39%|███▉      | 943/2416 [40:34<1:03:36,  2.59s/it][A[A[A[A[A





943it [40:34,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 942 Batch Loss 1.3079 
Time 2.4868338108062744 








 39%|███▉      | 944/2416 [40:37<1:02:58,  2.57s/it][A[A[A[A[A





944it [40:37,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 943 Batch Loss 1.2489 
Time 2.495600461959839 








 39%|███▉      | 945/2416 [40:39<1:02:16,  2.54s/it][A[A[A[A[A





945it [40:39,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 944 Batch Loss 1.2694 
Time 2.4670941829681396 








 39%|███▉      | 946/2416 [40:42<1:01:56,  2.53s/it][A[A[A[A[A





946it [40:41,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 945 Batch Loss 1.0184 
Time 2.4871959686279297 








 39%|███▉      | 947/2416 [40:44<1:01:57,  2.53s/it][A[A[A[A[A





947it [40:44,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 946 Batch Loss 1.2426 
Time 2.5234122276306152 








 39%|███▉      | 948/2416 [40:47<1:01:27,  2.51s/it][A[A[A[A[A





948it [40:46,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 947 Batch Loss 1.2523 
Time 2.4561269283294678 








 39%|███▉      | 949/2416 [40:49<1:01:11,  2.50s/it][A[A[A[A[A





949it [40:49,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 948 Batch Loss 1.3466 
Time 2.463002920150757 








 39%|███▉      | 950/2416 [40:51<1:01:02,  2.50s/it][A[A[A[A[A





950it [40:51,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 949 Batch Loss 1.2350 
Time 2.462294340133667 








 39%|███▉      | 951/2416 [40:54<1:00:48,  2.49s/it][A[A[A[A[A





951it [40:54,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 950 Batch Loss 1.5849 
Time 2.4565107822418213 








 39%|███▉      | 952/2416 [40:57<1:04:18,  2.64s/it][A[A[A[A[A





952it [40:57,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 951 Batch Loss 1.3797 
Time 2.9600329399108887 








 39%|███▉      | 953/2416 [40:59<1:03:18,  2.60s/it][A[A[A[A[A





953it [40:59,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 952 Batch Loss 1.4092 
Time 2.48876690864563 








 39%|███▉      | 954/2416 [41:02<1:02:33,  2.57s/it][A[A[A[A[A





954it [41:02,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 953 Batch Loss 1.4320 
Time 2.493823528289795 








 40%|███▉      | 955/2416 [41:04<1:02:09,  2.55s/it][A[A[A[A[A





955it [41:04,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 954 Batch Loss 1.1019 
Time 2.510921001434326 








 40%|███▉      | 956/2416 [41:07<1:01:42,  2.54s/it][A[A[A[A[A





956it [41:07,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 955 Batch Loss 1.2392 
Time 2.4853196144104004 








 40%|███▉      | 957/2416 [41:09<1:01:18,  2.52s/it][A[A[A[A[A





957it [41:09,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 956 Batch Loss 1.4659 
Time 2.4722740650177 








 40%|███▉      | 958/2416 [41:12<1:00:59,  2.51s/it][A[A[A[A[A





958it [41:12,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 957 Batch Loss 1.2077 
Time 2.469701051712036 








 40%|███▉      | 959/2416 [41:14<1:00:54,  2.51s/it][A[A[A[A[A





959it [41:14,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 958 Batch Loss 1.1385 
Time 2.497053623199463 








 40%|███▉      | 960/2416 [41:17<1:01:24,  2.53s/it][A[A[A[A[A





960it [41:17,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 959 Batch Loss 1.2906 
Time 2.5659945011138916 








 40%|███▉      | 961/2416 [41:20<1:01:41,  2.54s/it][A[A[A[A[A





961it [41:20,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 960 Batch Loss 1.4556 
Time 2.5598461627960205 








 40%|███▉      | 962/2416 [41:23<1:04:58,  2.68s/it][A[A[A[A[A





962it [41:23,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 961 Batch Loss 1.4005 
Time 2.9949474334716797 








 40%|███▉      | 963/2416 [41:25<1:03:26,  2.62s/it][A[A[A[A[A





963it [41:25,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 962 Batch Loss 1.1223 
Time 2.4653594493865967 








 40%|███▉      | 964/2416 [41:28<1:02:46,  2.59s/it][A[A[A[A[A





964it [41:28,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 963 Batch Loss 1.5747 
Time 2.518468141555786 








 40%|███▉      | 965/2416 [41:30<1:02:00,  2.56s/it][A[A[A[A[A





965it [41:30,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 964 Batch Loss 1.3302 
Time 2.483541488647461 








 40%|███▉      | 966/2416 [41:33<1:01:17,  2.54s/it][A[A[A[A[A





966it [41:33,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 965 Batch Loss 1.4254 
Time 2.4539549350738525 








 40%|████      | 967/2416 [41:35<1:01:06,  2.53s/it][A[A[A[A[A





967it [41:35,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 966 Batch Loss 1.5975 
Time 2.499931573867798 








 40%|████      | 968/2416 [41:38<1:00:57,  2.53s/it][A[A[A[A[A





968it [41:38,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 967 Batch Loss 1.5297 
Time 2.5018723011016846 








 40%|████      | 969/2416 [41:40<1:00:38,  2.51s/it][A[A[A[A[A





969it [41:40,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 968 Batch Loss 1.1740 
Time 2.479107618331909 








 40%|████      | 970/2416 [41:43<1:00:21,  2.50s/it][A[A[A[A[A





970it [41:43,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 969 Batch Loss 1.2694 
Time 2.464651346206665 








 40%|████      | 971/2416 [41:45<1:00:15,  2.50s/it][A[A[A[A[A





971it [41:45,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 970 Batch Loss 1.5317 
Time 2.482513427734375 








 40%|████      | 972/2416 [41:48<1:00:11,  2.50s/it][A[A[A[A[A





972it [41:48,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 971 Batch Loss 1.3356 
Time 2.4874651432037354 








 40%|████      | 973/2416 [41:51<1:03:30,  2.64s/it][A[A[A[A[A





973it [41:50,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 972 Batch Loss 1.2098 
Time 2.9589784145355225 








 40%|████      | 974/2416 [41:53<1:02:17,  2.59s/it][A[A[A[A[A





974it [41:53,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 973 Batch Loss 1.4497 
Time 2.4631519317626953 








 40%|████      | 975/2416 [41:56<1:01:43,  2.57s/it][A[A[A[A[A





975it [41:55,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 974 Batch Loss 1.0666 
Time 2.5035438537597656 








 40%|████      | 976/2416 [41:58<1:01:22,  2.56s/it][A[A[A[A[A





976it [41:58,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 975 Batch Loss 1.0049 
Time 2.5161352157592773 








 40%|████      | 977/2416 [42:01<1:00:45,  2.53s/it][A[A[A[A[A





977it [42:01,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 976 Batch Loss 1.6320 
Time 2.4690194129943848 








 40%|████      | 978/2416 [42:03<1:00:20,  2.52s/it][A[A[A[A[A





978it [42:03,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 977 Batch Loss 1.5049 
Time 2.465517044067383 








 41%|████      | 979/2416 [42:06<1:00:08,  2.51s/it][A[A[A[A[A





979it [42:05,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 978 Batch Loss 1.2019 
Time 2.481189727783203 








 41%|████      | 980/2416 [42:08<59:52,  2.50s/it]  [A[A[A[A[A





980it [42:08,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 979 Batch Loss 1.3218 
Time 2.469273090362549 








 41%|████      | 981/2416 [42:10<59:41,  2.50s/it][A[A[A[A[A





981it [42:10,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 980 Batch Loss 1.1219 
Time 2.474254608154297 








 41%|████      | 982/2416 [42:13<59:33,  2.49s/it][A[A[A[A[A





982it [42:13,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 981 Batch Loss 1.2134 
Time 2.476489543914795 








 41%|████      | 983/2416 [42:16<1:02:41,  2.62s/it][A[A[A[A[A





983it [42:16,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 982 Batch Loss 1.2909 
Time 2.9184746742248535 








 41%|████      | 984/2416 [42:18<1:01:52,  2.59s/it][A[A[A[A[A





984it [42:18,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 983 Batch Loss 1.4320 
Time 2.5058155059814453 








 41%|████      | 985/2416 [42:21<1:01:04,  2.56s/it][A[A[A[A[A





985it [42:21,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 984 Batch Loss 1.3791 
Time 2.4762682914733887 








 41%|████      | 986/2416 [42:23<1:00:30,  2.54s/it][A[A[A[A[A





986it [42:23,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 985 Batch Loss 1.2852 
Time 2.4720475673675537 








 41%|████      | 987/2416 [42:26<1:00:08,  2.53s/it][A[A[A[A[A





987it [42:26,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 986 Batch Loss 1.2556 
Time 2.4802350997924805 








 41%|████      | 988/2416 [42:28<59:59,  2.52s/it]  [A[A[A[A[A





988it [42:28,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 987 Batch Loss 1.6291 
Time 2.492729663848877 








 41%|████      | 989/2416 [42:31<59:57,  2.52s/it][A[A[A[A[A





989it [42:31,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 988 Batch Loss 1.5334 
Time 2.5143702030181885 








 41%|████      | 990/2416 [42:33<59:47,  2.52s/it][A[A[A[A[A





990it [42:33,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 989 Batch Loss 1.4560 
Time 2.4876275062561035 








 41%|████      | 991/2416 [42:36<59:41,  2.51s/it][A[A[A[A[A





991it [42:36,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 990 Batch Loss 1.4952 
Time 2.499058246612549 








 41%|████      | 992/2416 [42:38<59:53,  2.52s/it][A[A[A[A[A





992it [42:38,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 991 Batch Loss 1.3088 
Time 2.5381393432617188 








 41%|████      | 993/2416 [42:41<59:32,  2.51s/it][A[A[A[A[A





993it [42:41,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 992 Batch Loss 1.3705 
Time 2.4721004962921143 








 41%|████      | 994/2416 [42:44<1:02:54,  2.65s/it][A[A[A[A[A





994it [42:44,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 993 Batch Loss 1.2905 
Time 2.9751269817352295 








 41%|████      | 995/2416 [42:46<1:01:47,  2.61s/it][A[A[A[A[A





995it [42:46,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 994 Batch Loss 1.1382 
Time 2.495443344116211 








 41%|████      | 996/2416 [42:49<1:00:53,  2.57s/it][A[A[A[A[A





996it [42:49,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 995 Batch Loss 1.2317 
Time 2.4740569591522217 








 41%|████▏     | 997/2416 [42:51<1:00:31,  2.56s/it][A[A[A[A[A





997it [42:51,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 996 Batch Loss 1.6122 
Time 2.520726442337036 








 41%|████▏     | 998/2416 [42:54<1:00:05,  2.54s/it][A[A[A[A[A





998it [42:54,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 997 Batch Loss 1.3773 
Time 2.4891011714935303 








 41%|████▏     | 999/2416 [42:56<59:38,  2.53s/it]  [A[A[A[A[A





999it [42:56,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 998 Batch Loss 1.5311 
Time 2.47314715385437 








 41%|████▏     | 1000/2416 [42:59<59:35,  2.53s/it][A[A[A[A[A





1000it [42:59,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 999 Batch Loss 1.4490 
Time 2.513808488845825 








 41%|████▏     | 1001/2416 [43:01<59:27,  2.52s/it][A[A[A[A[A





1001it [43:01,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 1000 Batch Loss 1.1693 
Time 2.497868537902832 








 41%|████▏     | 1002/2416 [43:04<59:18,  2.52s/it][A[A[A[A[A





1002it [43:04,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 1001 Batch Loss 1.4545 
Time 2.4995369911193848 








 42%|████▏     | 1003/2416 [43:06<58:47,  2.50s/it][A[A[A[A[A





1003it [43:06,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1002 Batch Loss 1.1832 
Time 2.442657470703125 








 42%|████▏     | 1004/2416 [43:09<58:40,  2.49s/it][A[A[A[A[A





1004it [43:09,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1003 Batch Loss 1.3233 
Time 2.470310926437378 








 42%|████▏     | 1005/2416 [43:12<1:01:49,  2.63s/it][A[A[A[A[A





1005it [43:12,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 1004 Batch Loss 1.1758 
Time 2.9315712451934814 








 42%|████▏     | 1006/2416 [43:14<1:00:42,  2.58s/it][A[A[A[A[A





1006it [43:14,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1005 Batch Loss 1.3128 
Time 2.4669573307037354 








 42%|████▏     | 1007/2416 [43:17<59:52,  2.55s/it]  [A[A[A[A[A





1007it [43:17,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1006 Batch Loss 1.4126 
Time 2.46142315864563 








 42%|████▏     | 1008/2416 [43:19<59:16,  2.53s/it][A[A[A[A[A





1008it [43:19,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1007 Batch Loss 1.2866 
Time 2.4559853076934814 








 42%|████▏     | 1009/2416 [43:22<59:05,  2.52s/it][A[A[A[A[A





1009it [43:22,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 1008 Batch Loss 1.4260 
Time 2.4953203201293945 








 42%|████▏     | 1010/2416 [43:24<58:44,  2.51s/it][A[A[A[A[A





1010it [43:24,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1009 Batch Loss 1.6102 
Time 2.465366840362549 








 42%|████▏     | 1011/2416 [43:27<58:25,  2.49s/it][A[A[A[A[A





1011it [43:27,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1010 Batch Loss 1.0885 
Time 2.452087163925171 








 42%|████▏     | 1012/2416 [43:29<58:15,  2.49s/it][A[A[A[A[A





1012it [43:29,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1011 Batch Loss 1.1549 
Time 2.4667532444000244 








 42%|████▏     | 1013/2416 [43:32<58:17,  2.49s/it][A[A[A[A[A





1013it [43:32,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1012 Batch Loss 1.3671 
Time 2.491953134536743 








 42%|████▏     | 1014/2416 [43:34<58:22,  2.50s/it][A[A[A[A[A





1014it [43:34,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1013 Batch Loss 1.0467 
Time 2.4968366622924805 








 42%|████▏     | 1015/2416 [43:37<1:01:35,  2.64s/it][A[A[A[A[A





1015it [43:37,  2.64s/it][A[A[A[A[A[A

Epoch 0 Batch 1014 Batch Loss 0.9674 
Time 2.9530982971191406 








 42%|████▏     | 1016/2416 [43:40<1:00:28,  2.59s/it][A[A[A[A[A





1016it [43:40,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 1015 Batch Loss 1.3158 
Time 2.4714903831481934 








 42%|████▏     | 1017/2416 [43:42<59:45,  2.56s/it]  [A[A[A[A[A





1017it [43:42,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 1016 Batch Loss 1.3887 
Time 2.483520030975342 








 42%|████▏     | 1018/2416 [43:45<59:03,  2.53s/it][A[A[A[A[A





1018it [43:45,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1017 Batch Loss 1.1329 
Time 2.4580628871917725 








 42%|████▏     | 1019/2416 [43:47<58:42,  2.52s/it][A[A[A[A[A





1019it [43:47,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 1018 Batch Loss 1.2372 
Time 2.478548288345337 








 42%|████▏     | 1020/2416 [43:50<58:19,  2.51s/it][A[A[A[A[A





1020it [43:50,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1019 Batch Loss 1.1016 
Time 2.4609365463256836 








 42%|████▏     | 1021/2416 [43:52<58:15,  2.51s/it][A[A[A[A[A





1021it [43:52,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1020 Batch Loss 1.3170 
Time 2.4933388233184814 








 42%|████▏     | 1022/2416 [43:55<58:47,  2.53s/it][A[A[A[A[A





1022it [43:55,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1021 Batch Loss 1.4097 
Time 2.573692798614502 








 42%|████▏     | 1023/2416 [43:57<58:48,  2.53s/it][A[A[A[A[A





1023it [43:57,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1022 Batch Loss 1.5366 
Time 2.533179998397827 








 42%|████▏     | 1024/2416 [44:00<59:02,  2.55s/it][A[A[A[A[A





1024it [44:00,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1023 Batch Loss 1.4440 
Time 2.5610365867614746 








 42%|████▏     | 1025/2416 [44:02<59:25,  2.56s/it][A[A[A[A[A





1025it [44:02,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 1024 Batch Loss 1.1014 
Time 2.594404935836792 








 42%|████▏     | 1026/2416 [44:05<1:02:07,  2.68s/it][A[A[A[A[A





1026it [44:05,  2.68s/it][A[A[A[A[A[A

Epoch 0 Batch 1025 Batch Loss 1.6650 
Time 2.9457743167877197 








 43%|████▎     | 1027/2416 [44:08<1:00:35,  2.62s/it][A[A[A[A[A





1027it [44:08,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 1026 Batch Loss 1.2573 
Time 2.4554288387298584 








 43%|████▎     | 1028/2416 [44:10<59:26,  2.57s/it]  [A[A[A[A[A





1028it [44:10,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 1027 Batch Loss 1.0622 
Time 2.447930335998535 








 43%|████▎     | 1029/2416 [44:13<58:58,  2.55s/it][A[A[A[A[A





1029it [44:13,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1028 Batch Loss 1.3317 
Time 2.4977922439575195 








 43%|████▎     | 1030/2416 [44:15<58:22,  2.53s/it][A[A[A[A[A





1030it [44:15,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1029 Batch Loss 1.5842 
Time 2.457388401031494 








 43%|████▎     | 1031/2416 [44:18<57:59,  2.51s/it][A[A[A[A[A





1031it [44:18,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1030 Batch Loss 1.3756 
Time 2.4627835750579834 








 43%|████▎     | 1032/2416 [44:20<57:42,  2.50s/it][A[A[A[A[A





1032it [44:20,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1031 Batch Loss 1.1755 
Time 2.4634087085723877 








 43%|████▎     | 1033/2416 [44:23<57:36,  2.50s/it][A[A[A[A[A





1033it [44:23,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1032 Batch Loss 1.1256 
Time 2.47503662109375 








 43%|████▎     | 1034/2416 [44:25<57:08,  2.48s/it][A[A[A[A[A





1034it [44:25,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1033 Batch Loss 1.2741 
Time 2.4286792278289795 








 43%|████▎     | 1035/2416 [44:28<57:08,  2.48s/it][A[A[A[A[A





1035it [44:28,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1034 Batch Loss 1.3053 
Time 2.475804090499878 








 43%|████▎     | 1036/2416 [44:31<59:56,  2.61s/it][A[A[A[A[A





1036it [44:31,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 1035 Batch Loss 1.5307 
Time 2.8849823474884033 








 43%|████▎     | 1037/2416 [44:33<58:58,  2.57s/it][A[A[A[A[A





1037it [44:33,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 1036 Batch Loss 1.2899 
Time 2.4570984840393066 








 43%|████▎     | 1038/2416 [44:35<58:04,  2.53s/it][A[A[A[A[A





1038it [44:35,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1037 Batch Loss 1.3237 
Time 2.4245529174804688 








 43%|████▎     | 1039/2416 [44:38<57:42,  2.51s/it][A[A[A[A[A





1039it [44:38,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1038 Batch Loss 1.5557 
Time 2.473426103591919 








 43%|████▎     | 1040/2416 [44:40<57:10,  2.49s/it][A[A[A[A[A





1040it [44:40,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1039 Batch Loss 1.3671 
Time 2.436955690383911 








 43%|████▎     | 1041/2416 [44:43<56:56,  2.48s/it][A[A[A[A[A





1041it [44:43,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1040 Batch Loss 1.2389 
Time 2.4492783546447754 








 43%|████▎     | 1042/2416 [44:45<56:53,  2.48s/it][A[A[A[A[A





1042it [44:45,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1041 Batch Loss 1.3213 
Time 2.4723188877105713 








 43%|████▎     | 1043/2416 [44:48<56:38,  2.48s/it][A[A[A[A[A





1043it [44:48,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1042 Batch Loss 1.3431 
Time 2.4391019344329834 








 43%|████▎     | 1044/2416 [44:50<56:35,  2.47s/it][A[A[A[A[A





1044it [44:50,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1043 Batch Loss 1.7579 
Time 2.4586141109466553 








 43%|████▎     | 1045/2416 [44:53<56:53,  2.49s/it][A[A[A[A[A





1045it [44:53,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1044 Batch Loss 1.2852 
Time 2.5104820728302 








 43%|████▎     | 1046/2416 [44:56<59:50,  2.62s/it][A[A[A[A[A





1046it [44:56,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 1045 Batch Loss 1.4109 
Time 2.91526198387146 








 43%|████▎     | 1047/2416 [44:58<58:51,  2.58s/it][A[A[A[A[A





1047it [44:58,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1046 Batch Loss 1.5539 
Time 2.4732728004455566 








 43%|████▎     | 1048/2416 [45:01<58:08,  2.55s/it][A[A[A[A[A





1048it [45:01,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1047 Batch Loss 1.3717 
Time 2.470320224761963 








 43%|████▎     | 1049/2416 [45:03<57:37,  2.53s/it][A[A[A[A[A





1049it [45:03,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1048 Batch Loss 1.1971 
Time 2.4640824794769287 








 43%|████▎     | 1050/2416 [45:06<57:37,  2.53s/it][A[A[A[A[A





1050it [45:06,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1049 Batch Loss 1.1281 
Time 2.528090476989746 








 44%|████▎     | 1051/2416 [45:08<57:12,  2.51s/it][A[A[A[A[A





1051it [45:08,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1050 Batch Loss 1.0803 
Time 2.4658405780792236 








 44%|████▎     | 1052/2416 [45:11<56:46,  2.50s/it][A[A[A[A[A





1052it [45:11,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1051 Batch Loss 1.2120 
Time 2.4448366165161133 








 44%|████▎     | 1053/2416 [45:13<56:33,  2.49s/it][A[A[A[A[A





1053it [45:13,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1052 Batch Loss 1.4199 
Time 2.461359739303589 








 44%|████▎     | 1054/2416 [45:16<56:35,  2.49s/it][A[A[A[A[A





1054it [45:16,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1053 Batch Loss 1.1916 
Time 2.4924144744873047 








 44%|████▎     | 1055/2416 [45:18<56:27,  2.49s/it][A[A[A[A[A





1055it [45:18,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1054 Batch Loss 1.1363 
Time 2.4738612174987793 








 44%|████▎     | 1056/2416 [45:21<56:20,  2.49s/it][A[A[A[A[A





1056it [45:21,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1055 Batch Loss 1.5290 
Time 2.464047908782959 








 44%|████▍     | 1057/2416 [45:23<59:19,  2.62s/it][A[A[A[A[A





1057it [45:23,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 1056 Batch Loss 1.3228 
Time 2.913710117340088 








 44%|████▍     | 1058/2416 [45:26<58:21,  2.58s/it][A[A[A[A[A





1058it [45:26,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1057 Batch Loss 1.5690 
Time 2.477205514907837 








 44%|████▍     | 1059/2416 [45:28<57:47,  2.56s/it][A[A[A[A[A





1059it [45:28,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1058 Batch Loss 1.5108 
Time 2.4837281703948975 








 44%|████▍     | 1060/2416 [45:31<57:07,  2.53s/it][A[A[A[A[A





1060it [45:31,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1059 Batch Loss 1.3968 
Time 2.451378107070923 








 44%|████▍     | 1061/2416 [45:33<56:41,  2.51s/it][A[A[A[A[A





1061it [45:33,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1060 Batch Loss 1.5742 
Time 2.452594518661499 








 44%|████▍     | 1062/2416 [45:36<56:35,  2.51s/it][A[A[A[A[A





1062it [45:36,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1061 Batch Loss 1.2245 
Time 2.478339433670044 








 44%|████▍     | 1063/2416 [45:38<56:21,  2.50s/it][A[A[A[A[A





1063it [45:38,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1062 Batch Loss 1.1394 
Time 2.473252296447754 








 44%|████▍     | 1064/2416 [45:41<56:17,  2.50s/it][A[A[A[A[A





1064it [45:41,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1063 Batch Loss 1.3664 
Time 2.479473114013672 








 44%|████▍     | 1065/2416 [45:43<56:03,  2.49s/it][A[A[A[A[A





1065it [45:43,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1064 Batch Loss 1.2957 
Time 2.4643137454986572 








 44%|████▍     | 1066/2416 [45:46<55:53,  2.48s/it][A[A[A[A[A





1066it [45:46,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1065 Batch Loss 1.2381 
Time 2.4634857177734375 








 44%|████▍     | 1067/2416 [45:48<55:49,  2.48s/it][A[A[A[A[A





1067it [45:48,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1066 Batch Loss 1.2931 
Time 2.4733989238739014 








 44%|████▍     | 1068/2416 [45:51<58:45,  2.62s/it][A[A[A[A[A





1068it [45:51,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 1067 Batch Loss 1.4252 
Time 2.9153566360473633 








 44%|████▍     | 1069/2416 [45:54<57:39,  2.57s/it][A[A[A[A[A





1069it [45:54,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 1068 Batch Loss 1.5521 
Time 2.445143938064575 








 44%|████▍     | 1070/2416 [45:56<56:52,  2.54s/it][A[A[A[A[A





1070it [45:56,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1069 Batch Loss 1.1969 
Time 2.4502103328704834 








 44%|████▍     | 1071/2416 [45:59<56:20,  2.51s/it][A[A[A[A[A





1071it [45:59,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1070 Batch Loss 1.5807 
Time 2.455545663833618 








 44%|████▍     | 1072/2416 [46:01<55:50,  2.49s/it][A[A[A[A[A





1072it [46:01,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1071 Batch Loss 1.3194 
Time 2.4330742359161377 








 44%|████▍     | 1073/2416 [46:04<55:37,  2.49s/it][A[A[A[A[A





1073it [46:03,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1072 Batch Loss 1.7201 
Time 2.4522387981414795 








 44%|████▍     | 1074/2416 [46:06<55:27,  2.48s/it][A[A[A[A[A





1074it [46:06,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1073 Batch Loss 1.4397 
Time 2.448255777359009 








 44%|████▍     | 1075/2416 [46:08<55:27,  2.48s/it][A[A[A[A[A





1075it [46:08,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1074 Batch Loss 1.4391 
Time 2.4758822917938232 








 45%|████▍     | 1076/2416 [46:11<55:07,  2.47s/it][A[A[A[A[A





1076it [46:11,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1075 Batch Loss 1.0800 
Time 2.4268956184387207 








 45%|████▍     | 1077/2416 [46:13<55:00,  2.46s/it][A[A[A[A[A





1077it [46:13,  2.46s/it][A[A[A[A[A[A

Epoch 0 Batch 1076 Batch Loss 1.4186 
Time 2.450831890106201 








 45%|████▍     | 1078/2416 [46:16<55:04,  2.47s/it][A[A[A[A[A





1078it [46:16,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1077 Batch Loss 1.3538 
Time 2.4697389602661133 








 45%|████▍     | 1079/2416 [46:19<58:10,  2.61s/it][A[A[A[A[A





1079it [46:19,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 1078 Batch Loss 1.2372 
Time 2.91719388961792 








 45%|████▍     | 1080/2416 [46:21<57:22,  2.58s/it][A[A[A[A[A





1080it [46:21,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1079 Batch Loss 1.4929 
Time 2.4836008548736572 








 45%|████▍     | 1081/2416 [46:24<57:09,  2.57s/it][A[A[A[A[A





1081it [46:24,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 1080 Batch Loss 1.2455 
Time 2.53718638420105 








 45%|████▍     | 1082/2416 [46:26<56:52,  2.56s/it][A[A[A[A[A





1082it [46:26,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 1081 Batch Loss 1.5669 
Time 2.5145792961120605 








 45%|████▍     | 1083/2416 [46:29<56:18,  2.53s/it][A[A[A[A[A





1083it [46:29,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1082 Batch Loss 1.2546 
Time 2.4661238193511963 








 45%|████▍     | 1084/2416 [46:31<55:49,  2.51s/it][A[A[A[A[A





1084it [46:31,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1083 Batch Loss 1.5079 
Time 2.451111316680908 








 45%|████▍     | 1085/2416 [46:34<55:29,  2.50s/it][A[A[A[A[A





1085it [46:34,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1084 Batch Loss 1.1796 
Time 2.4519529342651367 








 45%|████▍     | 1086/2416 [46:36<55:24,  2.50s/it][A[A[A[A[A





1086it [46:36,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1085 Batch Loss 1.1506 
Time 2.4802749156951904 








 45%|████▍     | 1087/2416 [46:39<55:05,  2.49s/it][A[A[A[A[A





1087it [46:39,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1086 Batch Loss 1.2819 
Time 2.445016384124756 








 45%|████▌     | 1088/2416 [46:41<54:55,  2.48s/it][A[A[A[A[A





1088it [46:41,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1087 Batch Loss 1.5544 
Time 2.4569551944732666 








 45%|████▌     | 1089/2416 [46:44<57:50,  2.62s/it][A[A[A[A[A





1089it [46:44,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 1088 Batch Loss 1.0479 
Time 2.9138853549957275 








 45%|████▌     | 1090/2416 [46:47<56:41,  2.57s/it][A[A[A[A[A





1090it [46:47,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 1089 Batch Loss 1.3026 
Time 2.436418056488037 








 45%|████▌     | 1091/2416 [46:49<55:45,  2.52s/it][A[A[A[A[A





1091it [46:49,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1090 Batch Loss 1.3971 
Time 2.419609785079956 








 45%|████▌     | 1092/2416 [46:51<55:22,  2.51s/it][A[A[A[A[A





1092it [46:51,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1091 Batch Loss 1.4020 
Time 2.460108995437622 








 45%|████▌     | 1093/2416 [46:54<55:02,  2.50s/it][A[A[A[A[A





1093it [46:54,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1092 Batch Loss 1.2521 
Time 2.454118251800537 








 45%|████▌     | 1094/2416 [46:56<54:49,  2.49s/it][A[A[A[A[A





1094it [46:56,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1093 Batch Loss 1.4831 
Time 2.4545204639434814 








 45%|████▌     | 1095/2416 [46:59<54:41,  2.48s/it][A[A[A[A[A





1095it [46:59,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1094 Batch Loss 1.4939 
Time 2.4591526985168457 








 45%|████▌     | 1096/2416 [47:01<54:35,  2.48s/it][A[A[A[A[A





1096it [47:01,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1095 Batch Loss 1.3872 
Time 2.45662260055542 








 45%|████▌     | 1097/2416 [47:04<54:24,  2.48s/it][A[A[A[A[A





1097it [47:04,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1096 Batch Loss 1.4220 
Time 2.453308343887329 








 45%|████▌     | 1098/2416 [47:06<54:20,  2.47s/it][A[A[A[A[A





1098it [47:06,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1097 Batch Loss 1.2739 
Time 2.463960886001587 








 45%|████▌     | 1099/2416 [47:09<57:17,  2.61s/it][A[A[A[A[A





1099it [47:09,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 1098 Batch Loss 1.4390 
Time 2.9176337718963623 








 46%|████▌     | 1100/2416 [47:12<56:19,  2.57s/it][A[A[A[A[A





1100it [47:12,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 1099 Batch Loss 1.3839 
Time 2.4536986351013184 








 46%|████▌     | 1101/2416 [47:14<55:43,  2.54s/it][A[A[A[A[A





1101it [47:14,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 1100 Batch Loss 1.3481 
Time 2.4767589569091797 








 46%|████▌     | 1102/2416 [47:17<55:23,  2.53s/it][A[A[A[A[A





1102it [47:17,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1101 Batch Loss 1.3720 
Time 2.4872195720672607 








 46%|████▌     | 1103/2416 [47:19<54:52,  2.51s/it][A[A[A[A[A





1103it [47:19,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1102 Batch Loss 1.3256 
Time 2.44252610206604 








 46%|████▌     | 1104/2416 [47:22<54:34,  2.50s/it][A[A[A[A[A





1104it [47:22,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1103 Batch Loss 1.2487 
Time 2.461181640625 








 46%|████▌     | 1105/2416 [47:24<54:20,  2.49s/it][A[A[A[A[A





1105it [47:24,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1104 Batch Loss 1.2963 
Time 2.4492719173431396 








 46%|████▌     | 1106/2416 [47:27<54:33,  2.50s/it][A[A[A[A[A





1106it [47:27,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1105 Batch Loss 1.7022 
Time 2.514756917953491 








 46%|████▌     | 1107/2416 [47:29<54:17,  2.49s/it][A[A[A[A[A





1107it [47:29,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1106 Batch Loss 1.3956 
Time 2.459892511367798 








 46%|████▌     | 1108/2416 [47:32<54:09,  2.48s/it][A[A[A[A[A





1108it [47:31,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1107 Batch Loss 1.3666 
Time 2.458383560180664 








 46%|████▌     | 1109/2416 [47:34<57:13,  2.63s/it][A[A[A[A[A





1109it [47:34,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 1108 Batch Loss 1.3969 
Time 2.950974464416504 








 46%|████▌     | 1110/2416 [47:37<56:11,  2.58s/it][A[A[A[A[A





1110it [47:37,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1109 Batch Loss 1.4805 
Time 2.4598944187164307 








 46%|████▌     | 1111/2416 [47:39<55:24,  2.55s/it][A[A[A[A[A





1111it [47:39,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1110 Batch Loss 1.3710 
Time 2.4622583389282227 








 46%|████▌     | 1112/2416 [47:42<54:58,  2.53s/it][A[A[A[A[A





1112it [47:42,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1111 Batch Loss 1.7051 
Time 2.4696457386016846 








 46%|████▌     | 1113/2416 [47:44<54:33,  2.51s/it][A[A[A[A[A





1113it [47:44,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1112 Batch Loss 1.2650 
Time 2.4592742919921875 








 46%|████▌     | 1114/2416 [47:47<54:22,  2.51s/it][A[A[A[A[A





1114it [47:47,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1113 Batch Loss 0.9758 
Time 2.481480360031128 








 46%|████▌     | 1115/2416 [47:49<54:03,  2.49s/it][A[A[A[A[A





1115it [47:49,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1114 Batch Loss 1.4921 
Time 2.4473676681518555 








 46%|████▌     | 1116/2416 [47:52<53:45,  2.48s/it][A[A[A[A[A





1116it [47:52,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1115 Batch Loss 1.2583 
Time 2.438533067703247 








 46%|████▌     | 1117/2416 [47:54<53:53,  2.49s/it][A[A[A[A[A





1117it [47:54,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1116 Batch Loss 1.2514 
Time 2.4912307262420654 








 46%|████▋     | 1118/2416 [47:57<53:44,  2.48s/it][A[A[A[A[A





1118it [47:57,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1117 Batch Loss 1.7107 
Time 2.4622578620910645 








 46%|████▋     | 1119/2416 [48:00<56:35,  2.62s/it][A[A[A[A[A





1119it [48:00,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 1118 Batch Loss 1.3977 
Time 2.920950174331665 








 46%|████▋     | 1120/2416 [48:02<55:37,  2.58s/it][A[A[A[A[A





1120it [48:02,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1119 Batch Loss 1.2226 
Time 2.463879346847534 








 46%|████▋     | 1121/2416 [48:05<54:52,  2.54s/it][A[A[A[A[A





1121it [48:05,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 1120 Batch Loss 1.3129 
Time 2.4584410190582275 








 46%|████▋     | 1122/2416 [48:07<54:36,  2.53s/it][A[A[A[A[A





1122it [48:07,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1121 Batch Loss 1.2765 
Time 2.491840362548828 








 46%|████▋     | 1123/2416 [48:10<54:08,  2.51s/it][A[A[A[A[A





1123it [48:10,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1122 Batch Loss 1.3263 
Time 2.4535295963287354 








 47%|████▋     | 1124/2416 [48:12<53:41,  2.49s/it][A[A[A[A[A





1124it [48:12,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1123 Batch Loss 1.5905 
Time 2.435072183609009 








 47%|████▋     | 1125/2416 [48:15<53:37,  2.49s/it][A[A[A[A[A





1125it [48:15,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1124 Batch Loss 1.2870 
Time 2.4731602668762207 








 47%|████▋     | 1126/2416 [48:17<53:17,  2.48s/it][A[A[A[A[A





1126it [48:17,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1125 Batch Loss 1.4905 
Time 2.434422016143799 








 47%|████▋     | 1127/2416 [48:19<53:16,  2.48s/it][A[A[A[A[A





1127it [48:19,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1126 Batch Loss 1.4942 
Time 2.469515562057495 








 47%|████▋     | 1128/2416 [48:22<52:58,  2.47s/it][A[A[A[A[A





1128it [48:22,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1127 Batch Loss 1.3917 
Time 2.4296042919158936 








 47%|████▋     | 1129/2416 [48:24<52:52,  2.46s/it][A[A[A[A[A





1129it [48:24,  2.46s/it][A[A[A[A[A[A

Epoch 0 Batch 1128 Batch Loss 1.3144 
Time 2.4469988346099854 








 47%|████▋     | 1130/2416 [48:27<55:59,  2.61s/it][A[A[A[A[A





1130it [48:27,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 1129 Batch Loss 1.1619 
Time 2.9417531490325928 








 47%|████▋     | 1131/2416 [48:30<54:59,  2.57s/it][A[A[A[A[A





1131it [48:30,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 1130 Batch Loss 1.3522 
Time 2.45290207862854 








 47%|████▋     | 1132/2416 [48:32<54:15,  2.54s/it][A[A[A[A[A





1132it [48:32,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 1131 Batch Loss 1.6469 
Time 2.446597099304199 








 47%|████▋     | 1133/2416 [48:35<53:48,  2.52s/it][A[A[A[A[A





1133it [48:35,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 1132 Batch Loss 1.1920 
Time 2.4576539993286133 








 47%|████▋     | 1134/2416 [48:37<53:42,  2.51s/it][A[A[A[A[A





1134it [48:37,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1133 Batch Loss 1.2499 
Time 2.4974379539489746 








 47%|████▋     | 1135/2416 [48:40<53:39,  2.51s/it][A[A[A[A[A





1135it [48:40,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1134 Batch Loss 1.4428 
Time 2.502164363861084 








 47%|████▋     | 1136/2416 [48:42<53:26,  2.51s/it][A[A[A[A[A





1136it [48:42,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1135 Batch Loss 1.3090 
Time 2.4738035202026367 








 47%|████▋     | 1137/2416 [48:45<53:24,  2.51s/it][A[A[A[A[A





1137it [48:45,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1136 Batch Loss 1.1724 
Time 2.4933106899261475 








 47%|████▋     | 1138/2416 [48:47<53:20,  2.50s/it][A[A[A[A[A





1138it [48:47,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1137 Batch Loss 1.4084 
Time 2.493213176727295 








 47%|████▋     | 1139/2416 [48:50<53:17,  2.50s/it][A[A[A[A[A





1139it [48:50,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1138 Batch Loss 1.6023 
Time 2.4872677326202393 








 47%|████▋     | 1140/2416 [48:52<53:08,  2.50s/it][A[A[A[A[A





1140it [48:52,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1139 Batch Loss 1.3280 
Time 2.4761486053466797 








 47%|████▋     | 1141/2416 [48:55<55:54,  2.63s/it][A[A[A[A[A





1141it [48:55,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 1140 Batch Loss 1.4393 
Time 2.9296491146087646 








 47%|████▋     | 1142/2416 [48:58<54:57,  2.59s/it][A[A[A[A[A





1142it [48:58,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 1141 Batch Loss 1.2595 
Time 2.4782018661499023 








 47%|████▋     | 1143/2416 [49:00<54:25,  2.57s/it][A[A[A[A[A





1143it [49:00,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 1142 Batch Loss 1.2842 
Time 2.501215696334839 








 47%|████▋     | 1144/2416 [49:03<54:04,  2.55s/it][A[A[A[A[A





1144it [49:03,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1143 Batch Loss 1.3935 
Time 2.502255439758301 








 47%|████▋     | 1145/2416 [49:05<54:15,  2.56s/it][A[A[A[A[A





1145it [49:05,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 1144 Batch Loss 1.5702 
Time 2.579817056655884 








 47%|████▋     | 1146/2416 [49:08<54:40,  2.58s/it][A[A[A[A[A





1146it [49:08,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1145 Batch Loss 1.1415 
Time 2.626450300216675 








 47%|████▋     | 1147/2416 [49:11<54:52,  2.59s/it][A[A[A[A[A





1147it [49:11,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 1146 Batch Loss 1.3732 
Time 2.604642152786255 








 48%|████▊     | 1148/2416 [49:13<54:52,  2.60s/it][A[A[A[A[A





1148it [49:13,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 1147 Batch Loss 1.2458 
Time 2.5908894538879395 








 48%|████▊     | 1149/2416 [49:16<54:24,  2.58s/it][A[A[A[A[A





1149it [49:16,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1148 Batch Loss 1.5322 
Time 2.523888349533081 








 48%|████▊     | 1150/2416 [49:19<56:51,  2.69s/it][A[A[A[A[A





1150it [49:19,  2.70s/it][A[A[A[A[A[A

Epoch 0 Batch 1149 Batch Loss 1.3947 
Time 2.962029457092285 








 48%|████▊     | 1151/2416 [49:21<55:29,  2.63s/it][A[A[A[A[A





1151it [49:21,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 1150 Batch Loss 1.7579 
Time 2.471609592437744 








 48%|████▊     | 1152/2416 [49:24<54:22,  2.58s/it][A[A[A[A[A





1152it [49:24,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1151 Batch Loss 1.3770 
Time 2.444373846054077 








 48%|████▊     | 1153/2416 [49:26<53:38,  2.55s/it][A[A[A[A[A





1153it [49:26,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1152 Batch Loss 1.4100 
Time 2.46062970161438 








 48%|████▊     | 1154/2416 [49:29<53:05,  2.52s/it][A[A[A[A[A





1154it [49:29,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 1153 Batch Loss 1.3871 
Time 2.456760883331299 








 48%|████▊     | 1155/2416 [49:31<52:42,  2.51s/it][A[A[A[A[A





1155it [49:31,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1154 Batch Loss 1.3263 
Time 2.4561245441436768 








 48%|████▊     | 1156/2416 [49:33<52:26,  2.50s/it][A[A[A[A[A





1156it [49:33,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1155 Batch Loss 1.0993 
Time 2.4633426666259766 








 48%|████▊     | 1157/2416 [49:36<52:08,  2.48s/it][A[A[A[A[A





1157it [49:36,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1156 Batch Loss 1.2560 
Time 2.4451797008514404 








 48%|████▊     | 1158/2416 [49:38<52:13,  2.49s/it][A[A[A[A[A





1158it [49:38,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1157 Batch Loss 1.1955 
Time 2.4978864192962646 








 48%|████▊     | 1159/2416 [49:41<51:57,  2.48s/it][A[A[A[A[A





1159it [49:41,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1158 Batch Loss 1.4168 
Time 2.4449431896209717 








 48%|████▊     | 1160/2416 [49:43<52:10,  2.49s/it][A[A[A[A[A





1160it [49:43,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1159 Batch Loss 1.2572 
Time 2.509035110473633 








 48%|████▊     | 1161/2416 [49:46<54:55,  2.63s/it][A[A[A[A[A





1161it [49:46,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 1160 Batch Loss 1.3100 
Time 2.9236176013946533 








 48%|████▊     | 1162/2416 [49:49<53:54,  2.58s/it][A[A[A[A[A





1162it [49:49,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1161 Batch Loss 1.0868 
Time 2.461963653564453 








 48%|████▊     | 1163/2416 [49:51<53:11,  2.55s/it][A[A[A[A[A





1163it [49:51,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1162 Batch Loss 1.4414 
Time 2.463747024536133 








 48%|████▊     | 1164/2416 [49:54<52:41,  2.53s/it][A[A[A[A[A





1164it [49:54,  2.53s/it][A[A[A[A[A[A

Epoch 0 Batch 1163 Batch Loss 1.3687 
Time 2.4640705585479736 








 48%|████▊     | 1165/2416 [49:56<52:18,  2.51s/it][A[A[A[A[A





1165it [49:56,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1164 Batch Loss 1.4466 
Time 2.4557113647460938 








 48%|████▊     | 1166/2416 [49:59<52:01,  2.50s/it][A[A[A[A[A





1166it [49:59,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1165 Batch Loss 1.4992 
Time 2.4527575969696045 








 48%|████▊     | 1167/2416 [50:01<51:49,  2.49s/it][A[A[A[A[A





1167it [50:01,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1166 Batch Loss 1.3597 
Time 2.4542243480682373 








 48%|████▊     | 1168/2416 [50:04<51:44,  2.49s/it][A[A[A[A[A





1168it [50:04,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1167 Batch Loss 1.4058 
Time 2.475295066833496 








 48%|████▊     | 1169/2416 [50:06<51:32,  2.48s/it][A[A[A[A[A





1169it [50:06,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1168 Batch Loss 1.3061 
Time 2.451345205307007 








 48%|████▊     | 1170/2416 [50:09<51:29,  2.48s/it][A[A[A[A[A





1170it [50:09,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1169 Batch Loss 1.2909 
Time 2.461421251296997 








 48%|████▊     | 1171/2416 [50:11<51:21,  2.48s/it][A[A[A[A[A





1171it [50:11,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1170 Batch Loss 1.5223 
Time 2.4551641941070557 








 49%|████▊     | 1172/2416 [50:14<54:12,  2.61s/it][A[A[A[A[A





1172it [50:14,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 1171 Batch Loss 1.3421 
Time 2.925358295440674 








 49%|████▊     | 1173/2416 [50:16<53:10,  2.57s/it][A[A[A[A[A





1173it [50:16,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 1172 Batch Loss 1.3273 
Time 2.442248582839966 








 49%|████▊     | 1174/2416 [50:19<52:41,  2.55s/it][A[A[A[A[A





1174it [50:19,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1173 Batch Loss 1.4292 
Time 2.471669912338257 








 49%|████▊     | 1175/2416 [50:21<52:08,  2.52s/it][A[A[A[A[A





1175it [50:21,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 1174 Batch Loss 1.2085 
Time 2.4491851329803467 








 49%|████▊     | 1176/2416 [50:24<51:48,  2.51s/it][A[A[A[A[A





1176it [50:24,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1175 Batch Loss 1.3060 
Time 2.457639217376709 








 49%|████▊     | 1177/2416 [50:26<51:31,  2.49s/it][A[A[A[A[A





1177it [50:26,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1176 Batch Loss 0.9251 
Time 2.453514814376831 








 49%|████▉     | 1178/2416 [50:29<51:30,  2.50s/it][A[A[A[A[A





1178it [50:29,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1177 Batch Loss 1.0991 
Time 2.489685297012329 








 49%|████▉     | 1179/2416 [50:31<51:22,  2.49s/it][A[A[A[A[A





1179it [50:31,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1178 Batch Loss 1.5186 
Time 2.468738555908203 








 49%|████▉     | 1180/2416 [50:34<51:13,  2.49s/it][A[A[A[A[A





1180it [50:34,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1179 Batch Loss 1.3322 
Time 2.456749677658081 








 49%|████▉     | 1181/2416 [50:36<51:17,  2.49s/it][A[A[A[A[A





1181it [50:36,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1180 Batch Loss 1.3118 
Time 2.488300085067749 








 49%|████▉     | 1182/2416 [50:39<51:20,  2.50s/it][A[A[A[A[A





1182it [50:39,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1181 Batch Loss 1.3760 
Time 2.49094557762146 








 49%|████▉     | 1183/2416 [50:42<53:59,  2.63s/it][A[A[A[A[A





1183it [50:42,  2.63s/it][A[A[A[A[A[A

Epoch 0 Batch 1182 Batch Loss 1.3605 
Time 2.923133611679077 








 49%|████▉     | 1184/2416 [50:44<53:18,  2.60s/it][A[A[A[A[A





1184it [50:44,  2.60s/it][A[A[A[A[A[A

Epoch 0 Batch 1183 Batch Loss 1.2883 
Time 2.5091233253479004 








 49%|████▉     | 1185/2416 [50:47<52:31,  2.56s/it][A[A[A[A[A





1185it [50:47,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 1184 Batch Loss 1.3834 
Time 2.4687016010284424 








 49%|████▉     | 1186/2416 [50:49<51:58,  2.54s/it][A[A[A[A[A





1186it [50:49,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 1185 Batch Loss 1.3301 
Time 2.4647324085235596 








 49%|████▉     | 1187/2416 [50:52<51:27,  2.51s/it][A[A[A[A[A





1187it [50:52,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1186 Batch Loss 1.3611 
Time 2.444309949874878 








 49%|████▉     | 1188/2416 [50:54<51:07,  2.50s/it][A[A[A[A[A





1188it [50:54,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1187 Batch Loss 1.2096 
Time 2.4542922973632812 








 49%|████▉     | 1189/2416 [50:57<51:04,  2.50s/it][A[A[A[A[A





1189it [50:57,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1188 Batch Loss 1.1626 
Time 2.482045888900757 








 49%|████▉     | 1190/2416 [50:59<50:56,  2.49s/it][A[A[A[A[A





1190it [50:59,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1189 Batch Loss 1.3901 
Time 2.46537709236145 








 49%|████▉     | 1191/2416 [51:02<50:52,  2.49s/it][A[A[A[A[A





1191it [51:02,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1190 Batch Loss 1.4180 
Time 2.480043411254883 








 49%|████▉     | 1192/2416 [51:04<50:38,  2.48s/it][A[A[A[A[A





1192it [51:04,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1191 Batch Loss 1.1857 
Time 2.4482791423797607 








 49%|████▉     | 1193/2416 [51:07<53:24,  2.62s/it][A[A[A[A[A





1193it [51:07,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 1192 Batch Loss 1.3766 
Time 2.9284863471984863 








 49%|████▉     | 1194/2416 [51:10<52:35,  2.58s/it][A[A[A[A[A





1194it [51:10,  2.58s/it][A[A[A[A[A[A

Epoch 0 Batch 1193 Batch Loss 1.1955 
Time 2.4768619537353516 








 49%|████▉     | 1195/2416 [51:12<51:51,  2.55s/it][A[A[A[A[A





1195it [51:12,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1194 Batch Loss 1.2419 
Time 2.4542441368103027 








 50%|████▉     | 1196/2416 [51:14<51:17,  2.52s/it][A[A[A[A[A





1196it [51:14,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 1195 Batch Loss 1.2846 
Time 2.4531409740448 








 50%|████▉     | 1197/2416 [51:17<50:58,  2.51s/it][A[A[A[A[A





1197it [51:17,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1196 Batch Loss 1.3228 
Time 2.4598171710968018 








 50%|████▉     | 1198/2416 [51:19<50:36,  2.49s/it][A[A[A[A[A





1198it [51:19,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1197 Batch Loss 1.0257 
Time 2.4496378898620605 








 50%|████▉     | 1199/2416 [51:22<50:34,  2.49s/it][A[A[A[A[A





1199it [51:22,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1198 Batch Loss 1.3650 
Time 2.4787325859069824 








 50%|████▉     | 1200/2416 [51:24<50:13,  2.48s/it][A[A[A[A[A





1200it [51:24,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1199 Batch Loss 1.2906 
Time 2.431450843811035 








 50%|████▉     | 1201/2416 [51:27<50:04,  2.47s/it][A[A[A[A[A





1201it [51:27,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1200 Batch Loss 1.1706 
Time 2.4485487937927246 








 50%|████▉     | 1202/2416 [51:29<50:30,  2.50s/it][A[A[A[A[A





1202it [51:29,  2.50s/it][A[A[A[A[A[A

Epoch 0 Batch 1201 Batch Loss 1.4356 
Time 2.544489860534668 








 50%|████▉     | 1203/2416 [51:32<53:36,  2.65s/it][A[A[A[A[A





1203it [51:32,  2.65s/it][A[A[A[A[A[A

Epoch 0 Batch 1202 Batch Loss 1.3190 
Time 3.0028204917907715 








 50%|████▉     | 1204/2416 [51:35<52:19,  2.59s/it][A[A[A[A[A





1204it [51:35,  2.59s/it][A[A[A[A[A[A

Epoch 0 Batch 1203 Batch Loss 1.3449 
Time 2.4404337406158447 








 50%|████▉     | 1205/2416 [51:37<51:17,  2.54s/it][A[A[A[A[A





1205it [51:37,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 1204 Batch Loss 1.5578 
Time 2.4105613231658936 








 50%|████▉     | 1206/2416 [51:40<50:41,  2.51s/it][A[A[A[A[A





1206it [51:40,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1205 Batch Loss 1.1078 
Time 2.4378838539123535 








 50%|████▉     | 1207/2416 [51:42<50:13,  2.49s/it][A[A[A[A[A





1207it [51:42,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1206 Batch Loss 1.4245 
Time 2.432476043701172 








 50%|█████     | 1208/2416 [51:45<50:12,  2.49s/it][A[A[A[A[A





1208it [51:45,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1207 Batch Loss 1.1779 
Time 2.4872817993164062 








 50%|█████     | 1209/2416 [51:47<50:08,  2.49s/it][A[A[A[A[A





1209it [51:47,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1208 Batch Loss 1.1255 
Time 2.483750820159912 








 50%|█████     | 1210/2416 [51:50<50:04,  2.49s/it][A[A[A[A[A





1210it [51:50,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1209 Batch Loss 1.4323 
Time 2.477036237716675 








 50%|█████     | 1211/2416 [51:52<49:44,  2.48s/it][A[A[A[A[A





1211it [51:52,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1210 Batch Loss 1.5028 
Time 2.4293293952941895 








 50%|█████     | 1212/2416 [51:54<49:33,  2.47s/it][A[A[A[A[A





1212it [51:54,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1211 Batch Loss 1.5009 
Time 2.4408793449401855 








 50%|█████     | 1213/2416 [51:57<49:31,  2.47s/it][A[A[A[A[A





1213it [51:57,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1212 Batch Loss 1.3699 
Time 2.4573938846588135 








 50%|█████     | 1214/2416 [52:00<52:22,  2.61s/it][A[A[A[A[A





1214it [52:00,  2.61s/it][A[A[A[A[A[A

Epoch 0 Batch 1213 Batch Loss 1.3030 
Time 2.934851884841919 








 50%|█████     | 1215/2416 [52:02<51:18,  2.56s/it][A[A[A[A[A





1215it [52:02,  2.56s/it][A[A[A[A[A[A

Epoch 0 Batch 1214 Batch Loss 1.3133 
Time 2.4278900623321533 








 50%|█████     | 1216/2416 [52:05<50:55,  2.55s/it][A[A[A[A[A





1216it [52:05,  2.55s/it][A[A[A[A[A[A

Epoch 0 Batch 1215 Batch Loss 1.3659 
Time 2.492929458618164 








 50%|█████     | 1217/2416 [52:07<50:19,  2.52s/it][A[A[A[A[A





1217it [52:07,  2.52s/it][A[A[A[A[A[A

Epoch 0 Batch 1216 Batch Loss 1.5024 
Time 2.4431378841400146 








 50%|█████     | 1218/2416 [52:10<50:07,  2.51s/it][A[A[A[A[A





1218it [52:10,  2.51s/it][A[A[A[A[A[A

Epoch 0 Batch 1217 Batch Loss 1.3704 
Time 2.478762149810791 








 50%|█████     | 1219/2416 [52:12<49:42,  2.49s/it][A[A[A[A[A





1219it [52:12,  2.49s/it][A[A[A[A[A[A

Epoch 0 Batch 1218 Batch Loss 1.3295 
Time 2.4372599124908447 








 50%|█████     | 1220/2416 [52:15<49:31,  2.48s/it][A[A[A[A[A





1220it [52:15,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1219 Batch Loss 1.7467 
Time 2.455293655395508 








 51%|█████     | 1221/2416 [52:17<49:23,  2.48s/it][A[A[A[A[A





1221it [52:17,  2.48s/it][A[A[A[A[A[A

Epoch 0 Batch 1220 Batch Loss 1.3690 
Time 2.455551862716675 








 51%|█████     | 1222/2416 [52:20<49:11,  2.47s/it][A[A[A[A[A





1222it [52:20,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1221 Batch Loss 1.2773 
Time 2.4404282569885254 








 51%|█████     | 1223/2416 [52:22<49:06,  2.47s/it][A[A[A[A[A





1223it [52:22,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1222 Batch Loss 1.2959 
Time 2.4494051933288574 








 51%|█████     | 1224/2416 [52:25<49:07,  2.47s/it][A[A[A[A[A





1224it [52:25,  2.47s/it][A[A[A[A[A[A

Epoch 0 Batch 1223 Batch Loss 1.3842 
Time 2.4658710956573486 








 51%|█████     | 1225/2416 [52:28<52:06,  2.63s/it][A[A[A[A[A





1225it [52:28,  2.62s/it][A[A[A[A[A[A

Epoch 0 Batch 1224 Batch Loss 1.4472 
Time 2.9708144664764404 








 51%|█████     | 1226/2416 [52:30<51:03,  2.57s/it][A[A[A[A[A





1226it [52:30,  2.57s/it][A[A[A[A[A[A

Epoch 0 Batch 1225 Batch Loss 1.2833 
Time 2.4480044841766357 








 51%|█████     | 1227/2416 [52:32<50:19,  2.54s/it][A[A[A[A[A





1227it [52:32,  2.54s/it][A[A[A[A[A[A

Epoch 0 Batch 1226 Batch Loss 1.1259 
Time 2.4462502002716064 

