# Textual entailment

### Imports

In [2]:
from tqdm import tqdm
import tensorflow as tf
import numpy as np
import sys
display_step = 10

### Embeddings

Download Glove word embeddings

In [31]:
glove_zip_file = "data/glove.6B.zip"
glove_vectors_file = "data/glove.6B.100d.txt"
import zipfile, urllib.request, shutil, os
    
#large file - 862 MB
if (not os.path.isfile(glove_zip_file) and
    not os.path.isfile(glove_vectors_file)):
    with urllib.request.urlopen("http://nlp.stanford.edu/data/glove.6B.zip") as response, open(glove_zip_file, 'wb') as out_file:
        shutil.copyfileobj(response, out_file)

Unzip word embeddings

In [4]:
import zipfile
def unzip_single_file(zip_file_name, output_file_name):
    """
        If the outFile is already created, don't recreate
        If the outFile does not exist, create it from the zipFile
    """
    if not os.path.isfile(output_file_name):
        with open(output_file_name, 'wb') as out_file:
            with zipfile.ZipFile(zip_file_name) as zipped:
                for info in zipped.infolist():
                    if output_file_name in info.filename:
                        with zipped.open(info) as requested_file:
                            out_file.write(requested_file.read())
                            return

unzip_single_file(glove_zip_file, glove_vectors_file)

In [32]:
glove_wordmap = {}
with open(glove_vectors_file, "r", encoding="utf8") as glove:
    for line in glove:
        name, vector = tuple(line.split(" ", 1))
        glove_wordmap[name] = np.fromstring(vector, sep=" ")

## Model

### Embed sentences - sentence2sequence

In [4]:
def sentence2sequence(sentence):
    """
    - Turns an input sentence into an (n,d) matrix, 
        where n is the number of tokens in the sentence
        and d is the number of dimensions each word vector has.
    
      Tensorflow doesn't need to be used here, as simply
      turning the sentence into a sequence based off our 
      mapping does not need the computational power that
      Tensorflow provides. Normal Python suffices for this task.
    """
    tokens = sentence.lower().split(" ")
    rows = []
    words = []
    #Greedy search for tokens
    for token in tokens:
        i = len(token)
        while len(token) > 0 and i > 0:
            word = token[:i]
            if word in glove_wordmap:
                rows.append(glove_wordmap[word])
                words.append(word)
                token = token[i:]
                i = len(token)
            else:
                i = i-1
    return rows, words

In [5]:
display_step = 1
# one hot encoding
def score_setup(row):
    convert_dict = {
      'ENTAILMENT': 0,
      'NEUTRAL': 1,
      'CONTRADICTION': 2
    }
    score = np.zeros((3,))
    tag = row["entailment_judgment"]
    score[convert_dict[tag]] += 1
    return score

def fit_to_size(matrix, shape):
    res = np.zeros(shape)
    slices = [slice(0,min(dim,shape[e])) for e, dim in enumerate(matrix.shape)]
    res[slices] = matrix[slices]
    return res

### Cell used for development

In [56]:
def create_model():
    tf.reset_default_graph() 
    data_feature_list, correct_values, correct_scores = split_data_into_scores()

    l_h, l_e = max_hypothesis_length, max_evidence_length
    N, D, H = batch_size, vector_size, hidden_size
    l_seq = l_h + l_e
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    lstm_drop =  tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p)
    hyp = tf.placeholder(tf.float32, [N, l_h, D], 'hypothesis')
    evi = tf.placeholder(tf.float32, [N, l_e, D], 'evidence')
    y = tf.placeholder(tf.float32, [N, 3], 'label')
    lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size)

    lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p)

    fc_initializer = tf.random_normal_initializer(stddev=0.1) 
    fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 3], 
                            initializer = fc_initializer)
    fc_bias = tf.get_variable('bias', [3])
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, 
                     tf.nn.l2_loss(fc_weight)) 

    x = tf.concat([hyp, evi], 1) # N, (Lh+Le), d
    x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d
    x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d
    x = tf.split(x, l_seq,)
    rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back,
                                                            x, dtype=tf.float32)

    classification_scores = tf.matmul(rnn_outputs[-1], fc_weight) + fc_bias

    with tf.variable_scope('Accuracy'):
        predicts = tf.cast(tf.argmax(classification_scores, 1), 'int32')
        y_label = tf.cast(tf.argmax(y, 1), 'int32')
        corrects = tf.equal(predicts, y_label)
        num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
        accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

    with tf.variable_scope("loss"):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            logits = classification_scores, labels = y)
        loss = tf.reduce_mean(cross_entropy)
        total_loss = loss + weight_decay * tf.add_n(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    optimizer = tf.train.GradientDescentOptimizer(learning_rate)

    opt_op = optimizer.minimize(total_loss)
    # Initialize variables
    init = tf.global_variables_initializer()

    # Use TQDM if installed
    tqdm_installed = False
    try:
        from tqdm import tqdm
        tqdm_installed = True
    except:
        pass

    # Launch the Tensorflow session
    sess = tf.Session()
    sess.run(init)

    # training_iterations_count: The number of data pieces to train on in total
    # batch_size: The number of data pieces per batch
    training_iterations = range(0,training_iterations_count,batch_size)
    if tqdm_installed:
        # Add a progress bar if TQDM is installed
        training_iterations = tqdm(training_iterations)

    for i in training_iterations:
        if i % 1000 == 0:
        # Select indices for a random data subset
            batch = np.random.randint(data_feature_list[0].shape[0], size=batch_size)

        # Use the selected subset indices to initialize the graph's 
        #   placeholder values
        hyps, evis, ys = (data_feature_list[0][batch,:],
                          data_feature_list[1][batch,:],
                          correct_scores[batch])

        # Run the optimization with these initialized values
        sess.run([opt_op], feed_dict={hyp: hyps, evi: evis, y: ys})
        # display_step: how often the accuracy and loss should 
        #   be tested and displayed.
        if (i/batch_size) % display_step == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={hyp: hyps, evi: evis, y: ys})
            # Calculate batch loss
            tmp_loss = sess.run(loss, feed_dict={hyp: hyps, evi: evis, y: ys})
            # Display results
            print("Iter " + str(i/batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(tmp_loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))

    data_feature_list, correct_values, correct_scores = split_test_data_into_scores("data/dev.txt")


    hyps, evis, ys = (data_feature_list[0][:],
                      data_feature_list[1][:],
                      correct_scores)
    predictions = sess.run(classification_scores, feed_dict={hyp: hyps, evi: evis, y: ys})
    total = len(predictions)
    correct_predictions = 0
    for i,prediction in enumerate(predictions):
        if np.argmax(prediction[0])==np.argmax(ys[i]):
            correct_predictions += 1
    #     print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
    #       " / " + ["Positive", "Neutral", "Negative"][np.argmax(ys[i])])
    acc = correct_predictions*100/total
    print("Acc: %s" % str(acc))
    return acc

In [57]:
import numpy as np
accs = [] # 128
batches = [128, 64, 32, 1, 256]
for batch in batches:
    batch_size = batch
    accs.append(create_model())

print(accs)
print("Best batch size %s" % str(batches[np.argmax(accs)]))






  0%|                                                                                                                                                                                   | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 0.956232, Training Accuracy= 0.58594






  0%|▏                                                                                                                                                                          | 1/782 [00:00<10:43,  1.21it/s]



  1%|▊                                                                                                                                                                          | 4/782 [00:00<07:39,  1.69it/s]



  1%|█▌                                                                                                                                                                         | 7/782 [00:01<05:30,  2.35it/s]



  1%|██▏                                                                                                                                                                       | 10/782 [00:01<04:00,  3.21it/s]

Iter 10.0, Minibatch Loss= 0.938195, Training Accuracy= 0.58594






  2%|██▌                                                                                                                                                                       | 12/782 [00:01<03:01,  4.23it/s]



  2%|███▎                                                                                                                                                                      | 15/782 [00:01<02:16,  5.64it/s]



  2%|███▉                                                                                                                                                                      | 18/782 [00:01<01:44,  7.31it/s]

Iter 20.0, Minibatch Loss= 0.937931, Training Accuracy= 0.58594






  3%|████▌                                                                                                                                                                     | 21/782 [00:01<01:26,  8.83it/s]



  3%|█████▏                                                                                                                                                                    | 24/782 [00:01<01:10, 10.81it/s]



  3%|█████▊                                                                                                                                                                    | 27/782 [00:02<00:57, 13.05it/s]



  4%|██████▌                                                                                                                                                                   | 30/782 [00:02<00:50, 14.98it/s]

Iter 30.0, Minibatch Loss= 0.937693, Training Accuracy= 0.58594






  4%|███████▏                                                                                                                                                                  | 33/782 [00:02<00:48, 15.50it/s]



  5%|███████▊                                                                                                                                                                  | 36/782 [00:02<00:43, 16.99it/s]



  5%|████████▍                                                                                                                                                                 | 39/782 [00:02<00:39, 18.86it/s]

Iter 40.0, Minibatch Loss= 0.937342, Training Accuracy= 0.58594






  5%|█████████▏                                                                                                                                                                | 42/782 [00:02<00:39, 18.77it/s]



  6%|█████████▊                                                                                                                                                                | 45/782 [00:02<00:35, 20.62it/s]



  6%|██████████▍                                                                                                                                                               | 48/782 [00:02<00:33, 22.13it/s]

Iter 50.0, Minibatch Loss= 0.936669, Training Accuracy= 0.58594






  7%|███████████                                                                                                                                                               | 51/782 [00:03<00:33, 21.57it/s]



  7%|███████████▋                                                                                                                                                              | 54/782 [00:03<00:32, 22.35it/s]



  7%|████████████▍                                                                                                                                                             | 57/782 [00:03<00:31, 22.82it/s]



  8%|█████████████                                                                                                                                                             | 60/782 [00:03<00:31, 23.11it/s]

Iter 60.0, Minibatch Loss= 0.935027, Training Accuracy= 0.58594






  8%|█████████████▋                                                                                                                                                            | 63/782 [00:03<00:33, 21.77it/s]



  8%|██████████████▎                                                                                                                                                           | 66/782 [00:03<00:31, 22.71it/s]



  9%|███████████████                                                                                                                                                           | 69/782 [00:03<00:29, 23.80it/s]

Iter 70.0, Minibatch Loss= 0.930252, Training Accuracy= 0.58594






  9%|███████████████▋                                                                                                                                                          | 72/782 [00:04<00:31, 22.68it/s]



 10%|████████████████▎                                                                                                                                                         | 75/782 [00:04<00:29, 23.72it/s]



 10%|████████████████▉                                                                                                                                                         | 78/782 [00:04<00:29, 23.97it/s]

Iter 80.0, Minibatch Loss= 0.917919, Training Accuracy= 0.58594






 10%|█████████████████▌                                                                                                                                                        | 81/782 [00:04<00:30, 22.76it/s]



 11%|██████████████████▎                                                                                                                                                       | 84/782 [00:04<00:29, 23.87it/s]



 11%|██████████████████▉                                                                                                                                                       | 87/782 [00:04<00:28, 24.68it/s]



 12%|███████████████████▌                                                                                                                                                      | 90/782 [00:04<00:27, 24.96it/s]

Iter 90.0, Minibatch Loss= 0.935087, Training Accuracy= 0.58594


KeyboardInterrupt: 

### Constants

In [38]:
#Constants setup
max_hypothesis_length, max_evidence_length = 28, 32
batch_size, vector_size, hidden_size = 128, 100, 256

training_iterations_count = 100000

lstm_size = hidden_size

weight_decay = 0.0005

learning_rate = 1

input_p, output_p = 0.6, 0.3

### [Training data](http://www.site.uottawa.ca/~diana/csi5386/A2_2019/SICK_train.txt)

In [43]:
import numpy as np

def split_data_into_scores(file_name="data/training.txt"):
    import csv
    with open(file_name,"r") as data:
        train = csv.DictReader(data , delimiter='\t')
        evi_sentences = []
        hyp_sentences = []
        labels = []
        scores = []
        count = 1
        for row in train:
            hyp_sentences.append(np.vstack(
                    sentence2sequence(row["sentence_A"].lower())[0]))
            evi_sentences.append(np.vstack(
                    sentence2sequence(row["sentence_B"].lower())[0]))
            labels.append(row["entailment_judgment"])
            scores.append(score_setup(row))
        
        hyp_sentences = np.stack([fit_to_size(x, (max_hypothesis_length, vector_size))
                          for x in hyp_sentences])
        evi_sentences = np.stack([fit_to_size(x, (max_evidence_length, vector_size))
                          for x in evi_sentences])
                                 
        return (hyp_sentences, evi_sentences), labels, np.array(scores)
data_feature_list, correct_values, correct_scores = split_data_into_scores()

l_h, l_e = max_hypothesis_length, max_evidence_length
N, D, H = batch_size, vector_size, hidden_size
l_seq = l_h + l_e



### TRAINING

In [44]:
tf.reset_default_graph()

In [45]:
lstm = tf.contrib.rnn.GRUCell(lstm_size)
lstm_drop =  tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p)

In [46]:
# N: The number of elements in each of our batches, 
#   which we use to train subsets of data for efficiency's sake.
# l_h: The maximum length of a hypothesis, or the second sentence.  This is
#   used because training an RNN is extraordinarily difficult without 
#   rolling it out to a fixed length.
# l_e: The maximum length of evidence, the first sentence.  This is used
#   because training an RNN is extraordinarily difficult without 
#   rolling it out to a fixed length.
# D: The size of our used GloVe or other vectors.
hyp = tf.placeholder(tf.float32, [N, l_h, D], 'hypothesis')
evi = tf.placeholder(tf.float32, [N, l_e, D], 'evidence')
y = tf.placeholder(tf.float32, [N, 3], 'label')
# hyp: Where the hypotheses will be stored during training.
# evi: Where the evidences will be stored during training.
# y: Where correct scores will be stored during training.

# lstm_size: the size of the gates in the LSTM, 
#    as in the first LSTM layer's initialization.
# lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size)
lstm_back = tf.contrib.rnn.GRUCell(lstm_size)

# lstm_back:  The LSTM used for looking backwards 
#   through the sentences, similar to lstm.

# input_p: the probability that inputs to the LSTM will be retained at each
#   iteration of dropout.
# output_p: the probability that outputs from the LSTM will be retained at 
#   each iteration of dropout.
lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p)
# lstm_drop_back:  A dropout wrapper for lstm_back, like lstm_drop.


fc_initializer = tf.random_normal_initializer(stddev=0.1) 
# fc_initializer: initial values for the fully connected layer's weights.
# hidden_size: the size of the outputs from each lstm layer.  
#   Multiplied by 2 to account for the two LSTMs.
fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 3], 
                            initializer = fc_initializer)
# fc_weight: Storage for the fully connected layer's weights.
fc_bias = tf.get_variable('bias', [3])
# fc_bias: Storage for the fully connected layer's bias.

# tf.GraphKeys.REGULARIZATION_LOSSES:  A key to a collection in the graph
#   designated for losses due to regularization.
#   In this case, this portion of loss is regularization on the weights
#   for the fully connected layer.
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, 
                     tf.nn.l2_loss(fc_weight)) 

x = tf.concat([hyp, evi], 1) # N, (Lh+Le), d
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(x, l_seq,)

# x: the inputs to the bidirectional_rnn


# tf.contrib.rnn.static_bidirectional_rnn: Runs the input through
#   two recurrent networks, one that runs the inputs forward and one
#   that runs the inputs in reversed order, combining the outputs.
rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back,
                                                            x, dtype=tf.float32)
# rnn_outputs: the list of LSTM outputs, as a list. 
#   What we want is the latest output, rnn_outputs[-1]

classification_scores = tf.matmul(rnn_outputs[-1], fc_weight) + fc_bias
# The scores are relative certainties for how likely the output matches
#   a certain entailment: 
#     0: Positive entailment
#     1: Neutral entailment
#     2: Negative entailment

In [47]:
with tf.variable_scope('Accuracy'):
    predicts = tf.cast(tf.argmax(classification_scores, 1), 'int32')
    y_label = tf.cast(tf.argmax(y, 1), 'int32')
    corrects = tf.equal(predicts, y_label)
    num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
    accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

with tf.variable_scope("loss"):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        logits = classification_scores, labels = y)
    loss = tf.reduce_mean(cross_entropy)
    total_loss = loss + weight_decay * tf.add_n(
        tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

optimizer = tf.train.GradientDescentOptimizer(learning_rate)

opt_op = optimizer.minimize(total_loss)

In [48]:
# Initialize variables
init = tf.global_variables_initializer()

from tqdm import tqdm
import time
start = time.time()
# Launch the Tensorflow session
sess = tf.Session()
sess.run(init)

# training_iterations_count: The number of data pieces to train on in total
# batch_size: The number of data pieces per batch
training_iterations = range(0,training_iterations_count,batch_size)
print(training_iterations)
training_iterations = tqdm(training_iterations)

for i in training_iterations:
    if i % 1000 == 0:
    # Select indices for a random data subset
        batch = np.random.randint(data_feature_list[0].shape[0], size=batch_size)
    
    # Use the selected subset indices to initialize the graph's 
    #   placeholder values
    hyps, evis, ys = (data_feature_list[0][batch,:],
                      data_feature_list[1][batch,:],
                      correct_scores[batch])
    
    # Run the optimization with these initialized values
    sess.run([opt_op], feed_dict={hyp: hyps, evi: evis, y: ys})
    # display_step: how often the accuracy and loss should 
    #   be tested and displayed.
    if (i/batch_size) % display_step == 0:
        # Calculate batch accuracy
        acc = sess.run(accuracy, feed_dict={hyp: hyps, evi: evis, y: ys})
        # Calculate batch loss
        tmp_loss = sess.run(loss, feed_dict={hyp: hyps, evi: evis, y: ys})
        # Display results
        print("Iter " + str(i/batch_size) + ", Minibatch Loss= " + \
              "{:.6f}".format(tmp_loss) + ", Training Accuracy= " + \
              "{:.5f}".format(acc))
stop = time.time()
mins = int((stop-start)/60.0)
print("Training took %s:%s" % (str(mins), str(stop - start - mins*60)))

range(0, 100000, 128)


  0%|                                                                          | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 0.977815, Training Accuracy= 0.56250


  0%|                                                                  | 1/782 [00:02<29:12,  2.24s/it]

Iter 1.0, Minibatch Loss= 0.995556, Training Accuracy= 0.56250


  0%|▏                                                                 | 2/782 [00:03<24:30,  1.89s/it]

Iter 2.0, Minibatch Loss= 0.992535, Training Accuracy= 0.56250


  0%|▎                                                                 | 3/782 [00:04<22:20,  1.72s/it]

Iter 3.0, Minibatch Loss= 0.982986, Training Accuracy= 0.56250


  1%|▎                                                                 | 4/782 [00:05<19:54,  1.54s/it]

Iter 4.0, Minibatch Loss= 0.966112, Training Accuracy= 0.56250


  1%|▍                                                                 | 5/782 [00:06<17:54,  1.38s/it]

Iter 5.0, Minibatch Loss= 0.961751, Training Accuracy= 0.56250


  1%|▌                                                                 | 6/782 [00:07<16:36,  1.28s/it]

Iter 6.0, Minibatch Loss= 0.958568, Training Accuracy= 0.56250


  1%|▌                                                                 | 7/782 [00:08<15:29,  1.20s/it]

Iter 7.0, Minibatch Loss= 0.955487, Training Accuracy= 0.56250


  1%|▋                                                                 | 8/782 [00:09<14:41,  1.14s/it]

Iter 8.0, Minibatch Loss= 0.951669, Training Accuracy= 0.56250


  1%|▊                                                                 | 9/782 [00:10<14:03,  1.09s/it]

Iter 9.0, Minibatch Loss= 0.946536, Training Accuracy= 0.56250


  1%|▊                                                                | 10/782 [00:11<13:36,  1.06s/it]

Iter 10.0, Minibatch Loss= 0.938583, Training Accuracy= 0.56250


  1%|▉                                                                | 11/782 [00:12<13:18,  1.04s/it]

Iter 11.0, Minibatch Loss= 0.924116, Training Accuracy= 0.56250


  2%|▉                                                                | 12/782 [00:13<13:32,  1.06s/it]

Iter 12.0, Minibatch Loss= 1.227269, Training Accuracy= 0.23438


  2%|█                                                                | 13/782 [00:14<13:33,  1.06s/it]

Iter 13.0, Minibatch Loss= 1.156875, Training Accuracy= 0.21875


  2%|█▏                                                               | 14/782 [00:16<13:49,  1.08s/it]

Iter 14.0, Minibatch Loss= 1.490158, Training Accuracy= 0.56250


  2%|█▏                                                               | 15/782 [00:17<13:44,  1.07s/it]

Iter 15.0, Minibatch Loss= 1.446911, Training Accuracy= 0.28125


  2%|█▎                                                               | 16/782 [00:18<13:40,  1.07s/it]

Iter 16.0, Minibatch Loss= 1.825914, Training Accuracy= 0.56250


  2%|█▍                                                               | 17/782 [00:19<13:51,  1.09s/it]

Iter 17.0, Minibatch Loss= 1.557053, Training Accuracy= 0.28125


  2%|█▍                                                               | 18/782 [00:20<13:45,  1.08s/it]

Iter 18.0, Minibatch Loss= 1.534554, Training Accuracy= 0.56250


  2%|█▌                                                               | 19/782 [00:21<13:29,  1.06s/it]

Iter 19.0, Minibatch Loss= 1.328527, Training Accuracy= 0.56250


  3%|█▋                                                               | 20/782 [00:22<13:13,  1.04s/it]

Iter 20.0, Minibatch Loss= 1.350386, Training Accuracy= 0.56250


  3%|█▋                                                               | 21/782 [00:23<13:13,  1.04s/it]

Iter 21.0, Minibatch Loss= 10.641380, Training Accuracy= 0.28125


  3%|█▊                                                               | 22/782 [00:24<13:10,  1.04s/it]

Iter 22.0, Minibatch Loss= 1.559796, Training Accuracy= 0.56250


  3%|█▉                                                               | 23/782 [00:25<13:05,  1.04s/it]

Iter 23.0, Minibatch Loss= 1.401265, Training Accuracy= 0.29688


  3%|█▉                                                               | 24/782 [00:26<13:11,  1.04s/it]

Iter 24.0, Minibatch Loss= 6.859655, Training Accuracy= 0.16406


  3%|██                                                               | 25/782 [00:27<13:11,  1.05s/it]

Iter 25.0, Minibatch Loss= 15.122231, Training Accuracy= 0.19531


  3%|██▏                                                              | 26/782 [00:28<13:06,  1.04s/it]

Iter 26.0, Minibatch Loss= 25.629194, Training Accuracy= 0.56250


  3%|██▏                                                              | 27/782 [00:29<12:56,  1.03s/it]

Iter 27.0, Minibatch Loss= 44.196472, Training Accuracy= 0.28125


  4%|██▎                                                              | 28/782 [00:30<13:16,  1.06s/it]

Iter 28.0, Minibatch Loss= 40.049500, Training Accuracy= 0.56250


  4%|██▍                                                              | 29/782 [00:31<13:35,  1.08s/it]

Iter 29.0, Minibatch Loss= 18.995995, Training Accuracy= 0.16406


  4%|██▍                                                              | 30/782 [00:32<13:29,  1.08s/it]

Iter 30.0, Minibatch Loss= 39.531384, Training Accuracy= 0.56250


  4%|██▌                                                              | 31/782 [00:34<14:13,  1.14s/it]

Iter 31.0, Minibatch Loss= 45.083427, Training Accuracy= 0.28125


  4%|██▋                                                              | 32/782 [00:35<14:13,  1.14s/it]

Iter 32.0, Minibatch Loss= 55.034153, Training Accuracy= 0.56250


  4%|██▋                                                              | 33/782 [00:36<14:05,  1.13s/it]

Iter 33.0, Minibatch Loss= 18.373058, Training Accuracy= 0.56250


  4%|██▊                                                              | 34/782 [00:37<13:52,  1.11s/it]

Iter 34.0, Minibatch Loss= 42.575386, Training Accuracy= 0.28125


  4%|██▉                                                              | 35/782 [00:38<13:59,  1.12s/it]

Iter 35.0, Minibatch Loss= 33.883312, Training Accuracy= 0.56250


  5%|██▉                                                              | 36/782 [00:39<14:10,  1.14s/it]

Iter 36.0, Minibatch Loss= 25.529423, Training Accuracy= 0.16406


  5%|███                                                              | 37/782 [00:41<14:09,  1.14s/it]

Iter 37.0, Minibatch Loss= 33.695000, Training Accuracy= 0.56250


  5%|███▏                                                             | 38/782 [00:42<13:52,  1.12s/it]

Iter 38.0, Minibatch Loss= 42.537537, Training Accuracy= 0.28125


  5%|███▏                                                             | 39/782 [00:43<13:33,  1.09s/it]

Iter 39.0, Minibatch Loss= 48.951702, Training Accuracy= 0.56250


  5%|███▎                                                             | 40/782 [00:44<13:24,  1.08s/it]

Iter 40.0, Minibatch Loss= 13.931902, Training Accuracy= 0.56250


  5%|███▍                                                             | 41/782 [00:45<13:29,  1.09s/it]

Iter 41.0, Minibatch Loss= 40.905922, Training Accuracy= 0.28125


  5%|███▍                                                             | 42/782 [00:46<13:34,  1.10s/it]

Iter 42.0, Minibatch Loss= 29.448595, Training Accuracy= 0.56250


  5%|███▌                                                             | 43/782 [00:47<13:20,  1.08s/it]

Iter 43.0, Minibatch Loss= 39.506378, Training Accuracy= 0.16406


  6%|███▋                                                             | 44/782 [00:48<13:19,  1.08s/it]

Iter 44.0, Minibatch Loss= 29.611902, Training Accuracy= 0.56250


  6%|███▋                                                             | 45/782 [00:49<13:32,  1.10s/it]

Iter 45.0, Minibatch Loss= 41.615952, Training Accuracy= 0.28125


  6%|███▊                                                             | 46/782 [00:50<13:20,  1.09s/it]

Iter 46.0, Minibatch Loss= 44.603142, Training Accuracy= 0.56250


  6%|███▉                                                             | 47/782 [00:51<13:09,  1.07s/it]

Iter 47.0, Minibatch Loss= 9.659362, Training Accuracy= 0.56250


  6%|███▉                                                             | 48/782 [00:52<12:59,  1.06s/it]

Iter 48.0, Minibatch Loss= 39.805817, Training Accuracy= 0.28125


  6%|████                                                             | 49/782 [00:53<12:55,  1.06s/it]

Iter 49.0, Minibatch Loss= 24.615120, Training Accuracy= 0.56250


  6%|████▏                                                            | 50/782 [00:54<12:50,  1.05s/it]

Iter 50.0, Minibatch Loss= 52.564034, Training Accuracy= 0.15625


  7%|████▏                                                            | 51/782 [00:55<12:47,  1.05s/it]

Iter 51.0, Minibatch Loss= 25.915712, Training Accuracy= 0.56250


  7%|████▎                                                            | 52/782 [00:56<12:43,  1.05s/it]

Iter 52.0, Minibatch Loss= 39.970730, Training Accuracy= 0.28125


  7%|████▍                                                            | 53/782 [00:58<12:46,  1.05s/it]

Iter 53.0, Minibatch Loss= 40.746796, Training Accuracy= 0.56250


  7%|████▍                                                            | 54/782 [00:59<12:43,  1.05s/it]

Iter 54.0, Minibatch Loss= 6.506407, Training Accuracy= 0.56250


  7%|████▌                                                            | 55/782 [01:00<12:43,  1.05s/it]

Iter 55.0, Minibatch Loss= 38.155342, Training Accuracy= 0.28125


  7%|████▋                                                            | 56/782 [01:01<12:41,  1.05s/it]

Iter 56.0, Minibatch Loss= 32.382835, Training Accuracy= 0.17188


  7%|████▋                                                            | 57/782 [01:02<12:38,  1.05s/it]

Iter 57.0, Minibatch Loss= 54.790558, Training Accuracy= 0.56250


  7%|████▊                                                            | 58/782 [01:03<12:37,  1.05s/it]

Iter 58.0, Minibatch Loss= 20.961079, Training Accuracy= 0.56250


  8%|████▉                                                            | 59/782 [01:04<12:38,  1.05s/it]

Iter 59.0, Minibatch Loss= 38.933823, Training Accuracy= 0.28125


  8%|████▉                                                            | 60/782 [01:05<12:37,  1.05s/it]

Iter 60.0, Minibatch Loss= 35.873695, Training Accuracy= 0.56250


  8%|█████                                                            | 61/782 [01:06<12:35,  1.05s/it]

Iter 61.0, Minibatch Loss= 11.677141, Training Accuracy= 0.17188


  8%|█████▏                                                           | 62/782 [01:07<12:33,  1.05s/it]

Iter 62.0, Minibatch Loss= 35.145638, Training Accuracy= 0.56250


  8%|█████▏                                                           | 63/782 [01:08<12:34,  1.05s/it]

Iter 63.0, Minibatch Loss= 40.954315, Training Accuracy= 0.28125


  8%|█████▎                                                           | 64/782 [01:09<12:38,  1.06s/it]

Iter 64.0, Minibatch Loss= 48.764946, Training Accuracy= 0.56250


  8%|█████▍                                                           | 65/782 [01:10<12:30,  1.05s/it]

Iter 65.0, Minibatch Loss= 14.905069, Training Accuracy= 0.56250


  8%|█████▍                                                           | 66/782 [01:11<12:47,  1.07s/it]

Iter 66.0, Minibatch Loss= 40.683632, Training Accuracy= 0.28125


  9%|█████▌                                                           | 67/782 [01:12<12:51,  1.08s/it]

Iter 67.0, Minibatch Loss= 29.691345, Training Accuracy= 0.56250


  9%|█████▋                                                           | 68/782 [01:13<12:37,  1.06s/it]

Iter 68.0, Minibatch Loss= 28.973330, Training Accuracy= 0.16406


  9%|█████▋                                                           | 69/782 [01:14<12:38,  1.06s/it]

Iter 69.0, Minibatch Loss= 29.323444, Training Accuracy= 0.56250


  9%|█████▊                                                           | 70/782 [01:15<12:34,  1.06s/it]

Iter 70.0, Minibatch Loss= 41.824486, Training Accuracy= 0.28125


  9%|█████▉                                                           | 71/782 [01:17<12:28,  1.05s/it]

Iter 71.0, Minibatch Loss= 43.511642, Training Accuracy= 0.56250


  9%|█████▉                                                           | 72/782 [01:18<12:28,  1.05s/it]

Iter 72.0, Minibatch Loss= 10.695263, Training Accuracy= 0.56250


  9%|██████                                                           | 73/782 [01:19<12:38,  1.07s/it]

Iter 73.0, Minibatch Loss= 40.220562, Training Accuracy= 0.28125


  9%|██████▏                                                          | 74/782 [01:20<12:35,  1.07s/it]

Iter 74.0, Minibatch Loss= 25.112661, Training Accuracy= 0.56250


 10%|██████▏                                                          | 75/782 [01:21<12:21,  1.05s/it]

Iter 75.0, Minibatch Loss= 42.895966, Training Accuracy= 0.16406


 10%|██████▎                                                          | 76/782 [01:22<12:13,  1.04s/it]

Iter 76.0, Minibatch Loss= 25.031281, Training Accuracy= 0.56250


 10%|██████▍                                                          | 77/782 [01:23<12:03,  1.03s/it]

Iter 77.0, Minibatch Loss= 40.496872, Training Accuracy= 0.28125


 10%|██████▍                                                          | 78/782 [01:24<11:58,  1.02s/it]

Iter 78.0, Minibatch Loss= 39.556400, Training Accuracy= 0.56250


 10%|██████▌                                                          | 79/782 [01:25<11:57,  1.02s/it]

Iter 79.0, Minibatch Loss= 6.558368, Training Accuracy= 0.56250


 10%|██████▋                                                          | 80/782 [01:26<11:59,  1.02s/it]

Iter 80.0, Minibatch Loss= 40.679119, Training Accuracy= 0.28125


 10%|██████▋                                                          | 81/782 [01:27<11:59,  1.03s/it]

Iter 81.0, Minibatch Loss= 23.957411, Training Accuracy= 0.20312


 10%|██████▊                                                          | 82/782 [01:28<11:57,  1.03s/it]

Iter 82.0, Minibatch Loss= 48.499325, Training Accuracy= 0.56250


 11%|██████▉                                                          | 83/782 [01:29<11:54,  1.02s/it]

Iter 83.0, Minibatch Loss= 15.199680, Training Accuracy= 0.56250


 11%|██████▉                                                          | 84/782 [01:30<11:50,  1.02s/it]

Iter 84.0, Minibatch Loss= 46.456627, Training Accuracy= 0.28125


 11%|███████                                                          | 85/782 [01:31<11:50,  1.02s/it]

Iter 85.0, Minibatch Loss= 30.000072, Training Accuracy= 0.56250


 11%|███████▏                                                         | 86/782 [01:32<11:50,  1.02s/it]

Iter 86.0, Minibatch Loss= 16.446732, Training Accuracy= 0.17188


 11%|███████▏                                                         | 87/782 [01:33<11:51,  1.02s/it]

Iter 87.0, Minibatch Loss= 29.636690, Training Accuracy= 0.56250


 11%|███████▎                                                         | 88/782 [01:34<11:45,  1.02s/it]

Iter 88.0, Minibatch Loss= 46.730194, Training Accuracy= 0.28125


 11%|███████▍                                                         | 89/782 [01:35<11:42,  1.01s/it]

Iter 89.0, Minibatch Loss= 44.094242, Training Accuracy= 0.56250


 12%|███████▍                                                         | 90/782 [01:36<11:39,  1.01s/it]

Iter 90.0, Minibatch Loss= 10.829421, Training Accuracy= 0.56250


 12%|███████▌                                                         | 91/782 [01:37<11:37,  1.01s/it]

Iter 91.0, Minibatch Loss= 45.525314, Training Accuracy= 0.28125


 12%|███████▋                                                         | 92/782 [01:38<11:40,  1.02s/it]

Iter 92.0, Minibatch Loss= 25.263250, Training Accuracy= 0.56250


 12%|███████▋                                                         | 93/782 [01:39<11:39,  1.01s/it]

Iter 93.0, Minibatch Loss= 32.558121, Training Accuracy= 0.16406


 12%|███████▊                                                         | 94/782 [01:40<11:38,  1.02s/it]

Iter 94.0, Minibatch Loss= 25.268192, Training Accuracy= 0.56250


 12%|███████▉                                                         | 95/782 [01:41<11:33,  1.01s/it]

Iter 95.0, Minibatch Loss= 45.904869, Training Accuracy= 0.28125


 12%|███████▉                                                         | 96/782 [01:42<11:33,  1.01s/it]

Iter 96.0, Minibatch Loss= 39.228584, Training Accuracy= 0.56250


 12%|████████                                                         | 97/782 [01:43<11:33,  1.01s/it]

Iter 97.0, Minibatch Loss= 6.264279, Training Accuracy= 0.56250


 13%|████████▏                                                        | 98/782 [01:44<11:38,  1.02s/it]

Iter 98.0, Minibatch Loss= 45.506824, Training Accuracy= 0.28125


 13%|████████▏                                                        | 99/782 [01:45<11:36,  1.02s/it]

Iter 99.0, Minibatch Loss= 20.750452, Training Accuracy= 0.56250


 13%|████████▏                                                       | 100/782 [01:46<11:33,  1.02s/it]

Iter 100.0, Minibatch Loss= 45.173641, Training Accuracy= 0.16406


 13%|████████▎                                                       | 101/782 [01:47<11:31,  1.02s/it]

Iter 101.0, Minibatch Loss= 20.794426, Training Accuracy= 0.56250


 13%|████████▎                                                       | 102/782 [01:48<11:41,  1.03s/it]

Iter 102.0, Minibatch Loss= 47.209053, Training Accuracy= 0.28125


 13%|████████▍                                                       | 103/782 [01:49<11:50,  1.05s/it]

Iter 103.0, Minibatch Loss= 34.961235, Training Accuracy= 0.56250


 13%|████████▌                                                       | 104/782 [01:50<11:42,  1.04s/it]

Iter 104.0, Minibatch Loss= 2.324024, Training Accuracy= 0.57031


 13%|████████▌                                                       | 105/782 [01:51<11:36,  1.03s/it]

Iter 105.0, Minibatch Loss= 34.568924, Training Accuracy= 0.28125


 14%|████████▋                                                       | 106/782 [01:52<11:31,  1.02s/it]

Iter 106.0, Minibatch Loss= 24.638124, Training Accuracy= 0.21875


 14%|████████▊                                                       | 107/782 [01:53<11:31,  1.02s/it]

Iter 107.0, Minibatch Loss= 41.345165, Training Accuracy= 0.56250


 14%|████████▊                                                       | 108/782 [01:54<11:28,  1.02s/it]

Iter 108.0, Minibatch Loss= 8.684769, Training Accuracy= 0.56250


 14%|████████▉                                                       | 109/782 [01:55<11:27,  1.02s/it]

Iter 109.0, Minibatch Loss= 45.233749, Training Accuracy= 0.28125


 14%|█████████                                                       | 110/782 [01:56<11:26,  1.02s/it]

Iter 110.0, Minibatch Loss= 22.893282, Training Accuracy= 0.56250


 14%|█████████                                                       | 111/782 [01:57<11:23,  1.02s/it]

Iter 111.0, Minibatch Loss= 36.431313, Training Accuracy= 0.16406


 14%|█████████▏                                                      | 112/782 [01:58<11:21,  1.02s/it]

Iter 112.0, Minibatch Loss= 22.976212, Training Accuracy= 0.56250


 14%|█████████▏                                                      | 113/782 [01:59<11:18,  1.01s/it]

Iter 113.0, Minibatch Loss= 46.197945, Training Accuracy= 0.28125


 15%|█████████▎                                                      | 114/782 [02:00<11:15,  1.01s/it]

Iter 114.0, Minibatch Loss= 37.093048, Training Accuracy= 0.56250


 15%|█████████▍                                                      | 115/782 [02:01<11:11,  1.01s/it]

Iter 115.0, Minibatch Loss= 4.439064, Training Accuracy= 0.56250


 15%|█████████▍                                                      | 116/782 [02:02<11:09,  1.01s/it]

Iter 116.0, Minibatch Loss= 43.785767, Training Accuracy= 0.28125


 15%|█████████▌                                                      | 117/782 [02:04<11:13,  1.01s/it]

Iter 117.0, Minibatch Loss= 19.934708, Training Accuracy= 0.58594


 15%|█████████▋                                                      | 118/782 [02:05<11:12,  1.01s/it]

Iter 118.0, Minibatch Loss= 14.670147, Training Accuracy= 0.18750


 15%|█████████▋                                                      | 119/782 [02:06<11:09,  1.01s/it]

Iter 119.0, Minibatch Loss= 33.348137, Training Accuracy= 0.56250


 15%|█████████▊                                                      | 120/782 [02:07<11:07,  1.01s/it]

Iter 120.0, Minibatch Loss= 34.988918, Training Accuracy= 0.29688


 15%|█████████▉                                                      | 121/782 [02:08<11:06,  1.01s/it]

Iter 121.0, Minibatch Loss= 46.260918, Training Accuracy= 0.56250


 16%|█████████▉                                                      | 122/782 [02:09<11:04,  1.01s/it]

Iter 122.0, Minibatch Loss= 13.891710, Training Accuracy= 0.56250


 16%|██████████                                                      | 123/782 [02:10<11:05,  1.01s/it]

Iter 123.0, Minibatch Loss= 35.567478, Training Accuracy= 0.28906


 16%|██████████▏                                                     | 124/782 [02:11<11:04,  1.01s/it]

Iter 124.0, Minibatch Loss= 27.221203, Training Accuracy= 0.56250


 16%|██████████▏                                                     | 125/782 [02:12<11:00,  1.01s/it]

Iter 125.0, Minibatch Loss= 33.213718, Training Accuracy= 0.17969


 16%|██████████▎                                                     | 126/782 [02:13<11:01,  1.01s/it]

Iter 126.0, Minibatch Loss= 26.497032, Training Accuracy= 0.55469


 16%|██████████▍                                                     | 127/782 [02:14<10:59,  1.01s/it]

Iter 127.0, Minibatch Loss= 39.544395, Training Accuracy= 0.29688


 16%|██████████▍                                                     | 128/782 [02:15<11:02,  1.01s/it]

Iter 128.0, Minibatch Loss= 38.028862, Training Accuracy= 0.55469


 16%|██████████▌                                                     | 129/782 [02:16<11:00,  1.01s/it]

Iter 129.0, Minibatch Loss= 4.651619, Training Accuracy= 0.55469


 17%|██████████▋                                                     | 130/782 [02:17<11:02,  1.02s/it]

Iter 130.0, Minibatch Loss= 39.727974, Training Accuracy= 0.28125


 17%|██████████▋                                                     | 131/782 [02:18<11:04,  1.02s/it]

Iter 131.0, Minibatch Loss= 24.931149, Training Accuracy= 0.27344


 17%|██████████▊                                                     | 132/782 [02:19<11:15,  1.04s/it]

Iter 132.0, Minibatch Loss= 42.592197, Training Accuracy= 0.55469


 17%|██████████▉                                                     | 133/782 [02:20<11:15,  1.04s/it]

Iter 133.0, Minibatch Loss= 7.819606, Training Accuracy= 0.56250


 17%|██████████▉                                                     | 134/782 [02:21<11:07,  1.03s/it]

Iter 134.0, Minibatch Loss= 27.912319, Training Accuracy= 0.29688


 17%|███████████                                                     | 135/782 [02:22<11:01,  1.02s/it]

Iter 135.0, Minibatch Loss= 32.339272, Training Accuracy= 0.55469


 17%|███████████▏                                                    | 136/782 [02:23<10:56,  1.02s/it]

Iter 136.0, Minibatch Loss= 29.512312, Training Accuracy= 0.17969


 18%|███████████▏                                                    | 137/782 [02:24<10:51,  1.01s/it]

Iter 137.0, Minibatch Loss= 30.265139, Training Accuracy= 0.55469


 18%|███████████▎                                                    | 138/782 [02:25<10:50,  1.01s/it]

Iter 138.0, Minibatch Loss= 33.353790, Training Accuracy= 0.29688


 18%|███████████▍                                                    | 139/782 [02:26<10:47,  1.01s/it]

Iter 139.0, Minibatch Loss= 41.918205, Training Accuracy= 0.55469


 18%|███████████▍                                                    | 140/782 [02:27<10:46,  1.01s/it]

Iter 140.0, Minibatch Loss= 8.459201, Training Accuracy= 0.55469


 18%|███████████▌                                                    | 141/782 [02:28<10:45,  1.01s/it]

Iter 141.0, Minibatch Loss= 34.280113, Training Accuracy= 0.28125


 18%|███████████▌                                                    | 142/782 [02:29<10:45,  1.01s/it]

Iter 142.0, Minibatch Loss= 24.030027, Training Accuracy= 0.28125


 18%|███████████▋                                                    | 143/782 [02:30<10:46,  1.01s/it]

Iter 143.0, Minibatch Loss= 42.601555, Training Accuracy= 0.55469


 18%|███████████▊                                                    | 144/782 [02:31<10:45,  1.01s/it]

Iter 144.0, Minibatch Loss= 9.248147, Training Accuracy= 0.55469


 19%|███████████▊                                                    | 145/782 [02:32<10:42,  1.01s/it]

Iter 145.0, Minibatch Loss= 44.758308, Training Accuracy= 0.28125


 19%|███████████▉                                                    | 146/782 [02:33<10:42,  1.01s/it]

Iter 146.0, Minibatch Loss= 22.504618, Training Accuracy= 0.55469


 19%|████████████                                                    | 147/782 [02:34<10:40,  1.01s/it]

Iter 147.0, Minibatch Loss= 33.374657, Training Accuracy= 0.18750


 19%|████████████                                                    | 148/782 [02:35<10:41,  1.01s/it]

Iter 148.0, Minibatch Loss= 21.415531, Training Accuracy= 0.55469


 19%|████████████▏                                                   | 149/782 [02:36<10:41,  1.01s/it]

Iter 149.0, Minibatch Loss= 47.434788, Training Accuracy= 0.29688


 19%|████████████▎                                                   | 150/782 [02:37<10:39,  1.01s/it]

Iter 150.0, Minibatch Loss= 33.318275, Training Accuracy= 0.55469


 19%|████████████▎                                                   | 151/782 [02:38<10:37,  1.01s/it]

Iter 151.0, Minibatch Loss= 5.285076, Training Accuracy= 0.38281


 19%|████████████▍                                                   | 152/782 [02:39<10:35,  1.01s/it]

Iter 152.0, Minibatch Loss= 35.980659, Training Accuracy= 0.55469


 20%|████████████▌                                                   | 153/782 [02:40<10:36,  1.01s/it]

Iter 153.0, Minibatch Loss= 29.326866, Training Accuracy= 0.22656


 20%|████████████▌                                                   | 154/782 [02:41<10:33,  1.01s/it]

Iter 154.0, Minibatch Loss= 31.450262, Training Accuracy= 0.55469


 20%|████████████▋                                                   | 155/782 [02:42<10:33,  1.01s/it]

Iter 155.0, Minibatch Loss= 25.503187, Training Accuracy= 0.35156


 20%|████████████▊                                                   | 156/782 [02:43<10:32,  1.01s/it]

Iter 156.0, Minibatch Loss= 38.542278, Training Accuracy= 0.55469


 20%|████████████▊                                                   | 157/782 [02:44<10:33,  1.01s/it]

Iter 157.0, Minibatch Loss= 4.893150, Training Accuracy= 0.55469


 20%|████████████▉                                                   | 158/782 [02:45<10:33,  1.01s/it]

Iter 158.0, Minibatch Loss= 34.828712, Training Accuracy= 0.28125


 20%|█████████████                                                   | 159/782 [02:46<10:33,  1.02s/it]

Iter 159.0, Minibatch Loss= 31.140835, Training Accuracy= 0.24219


 20%|█████████████                                                   | 160/782 [02:47<10:27,  1.01s/it]

Iter 160.0, Minibatch Loss= 44.365089, Training Accuracy= 0.55469


 21%|█████████████▏                                                  | 161/782 [02:48<10:31,  1.02s/it]

Iter 161.0, Minibatch Loss= 10.679871, Training Accuracy= 0.55469


 21%|█████████████▎                                                  | 162/782 [02:49<10:44,  1.04s/it]

Iter 162.0, Minibatch Loss= 42.166412, Training Accuracy= 0.28125


 21%|█████████████▎                                                  | 163/782 [02:50<10:38,  1.03s/it]

Iter 163.0, Minibatch Loss= 23.147249, Training Accuracy= 0.55469


 21%|█████████████▍                                                  | 164/782 [02:51<10:30,  1.02s/it]

Iter 164.0, Minibatch Loss= 30.030563, Training Accuracy= 0.20312


 21%|█████████████▌                                                  | 165/782 [02:52<10:25,  1.01s/it]

Iter 165.0, Minibatch Loss= 20.356455, Training Accuracy= 0.55469


 21%|█████████████▌                                                  | 166/782 [02:53<10:23,  1.01s/it]

Iter 166.0, Minibatch Loss= 46.462929, Training Accuracy= 0.29688


 21%|█████████████▋                                                  | 167/782 [02:54<10:21,  1.01s/it]

Iter 167.0, Minibatch Loss= 32.333672, Training Accuracy= 0.55469


 21%|█████████████▋                                                  | 168/782 [02:55<10:20,  1.01s/it]

Iter 168.0, Minibatch Loss= 5.803654, Training Accuracy= 0.38281


 22%|█████████████▊                                                  | 169/782 [02:56<10:20,  1.01s/it]

Iter 169.0, Minibatch Loss= 35.027924, Training Accuracy= 0.55469


 22%|█████████████▉                                                  | 170/782 [02:57<10:21,  1.02s/it]

Iter 170.0, Minibatch Loss= 29.374170, Training Accuracy= 0.23438


 22%|█████████████▉                                                  | 171/782 [02:58<10:21,  1.02s/it]

Iter 171.0, Minibatch Loss= 30.091320, Training Accuracy= 0.55469


 22%|██████████████                                                  | 172/782 [02:59<10:17,  1.01s/it]

Iter 172.0, Minibatch Loss= 26.168236, Training Accuracy= 0.37500


 22%|██████████████▏                                                 | 173/782 [03:00<10:15,  1.01s/it]

Iter 173.0, Minibatch Loss= 35.781193, Training Accuracy= 0.55469


 22%|██████████████▏                                                 | 174/782 [03:01<10:13,  1.01s/it]

Iter 174.0, Minibatch Loss= 2.095287, Training Accuracy= 0.59375


 22%|██████████████▎                                                 | 175/782 [03:02<10:15,  1.01s/it]

Iter 175.0, Minibatch Loss= 28.420288, Training Accuracy= 0.30469


 23%|██████████████▍                                                 | 176/782 [03:03<10:13,  1.01s/it]

Iter 176.0, Minibatch Loss= 23.139746, Training Accuracy= 0.50000


 23%|██████████████▍                                                 | 177/782 [03:04<10:12,  1.01s/it]

Iter 177.0, Minibatch Loss= 19.083122, Training Accuracy= 0.55469


 23%|██████████████▌                                                 | 178/782 [03:05<10:11,  1.01s/it]

Iter 178.0, Minibatch Loss= 15.711101, Training Accuracy= 0.28906


 23%|██████████████▋                                                 | 179/782 [03:06<10:07,  1.01s/it]

Iter 179.0, Minibatch Loss= 15.358992, Training Accuracy= 0.62500


 23%|██████████████▋                                                 | 180/782 [03:07<10:07,  1.01s/it]

Iter 180.0, Minibatch Loss= 11.950646, Training Accuracy= 0.45312


 23%|██████████████▊                                                 | 181/782 [03:08<10:04,  1.01s/it]

Iter 181.0, Minibatch Loss= 38.386036, Training Accuracy= 0.55469


 23%|██████████████▉                                                 | 182/782 [03:09<10:07,  1.01s/it]

Iter 182.0, Minibatch Loss= 4.376508, Training Accuracy= 0.55469


 23%|██████████████▉                                                 | 183/782 [03:10<10:06,  1.01s/it]

Iter 183.0, Minibatch Loss= 35.006927, Training Accuracy= 0.29688


 24%|███████████████                                                 | 184/782 [03:11<10:02,  1.01s/it]

Iter 184.0, Minibatch Loss= 27.583618, Training Accuracy= 0.28125


 24%|███████████████▏                                                | 185/782 [03:12<10:03,  1.01s/it]

Iter 185.0, Minibatch Loss= 45.271446, Training Accuracy= 0.55469


 24%|███████████████▏                                                | 186/782 [03:13<10:02,  1.01s/it]

Iter 186.0, Minibatch Loss= 6.924447, Training Accuracy= 0.58594


 24%|███████████████▎                                                | 187/782 [03:14<10:05,  1.02s/it]

Iter 187.0, Minibatch Loss= 7.220122, Training Accuracy= 0.33594


 24%|███████████████▍                                                | 188/782 [03:15<10:06,  1.02s/it]

Iter 188.0, Minibatch Loss= 38.576038, Training Accuracy= 0.55469


 24%|███████████████▍                                                | 189/782 [03:16<10:05,  1.02s/it]

Iter 189.0, Minibatch Loss= 30.275757, Training Accuracy= 0.19531


 24%|███████████████▌                                                | 190/782 [03:18<10:03,  1.02s/it]

Iter 190.0, Minibatch Loss= 35.375999, Training Accuracy= 0.55469


 24%|███████████████▋                                                | 191/782 [03:19<10:15,  1.04s/it]

Iter 191.0, Minibatch Loss= 18.542265, Training Accuracy= 0.34375


 25%|███████████████▋                                                | 192/782 [03:20<10:19,  1.05s/it]

Iter 192.0, Minibatch Loss= 42.983910, Training Accuracy= 0.55469


 25%|███████████████▊                                                | 193/782 [03:21<10:11,  1.04s/it]

Iter 193.0, Minibatch Loss= 9.843455, Training Accuracy= 0.55469


 25%|███████████████▉                                                | 194/782 [03:22<10:05,  1.03s/it]

Iter 194.0, Minibatch Loss= 25.678555, Training Accuracy= 0.29688


 25%|███████████████▉                                                | 195/782 [03:23<10:04,  1.03s/it]

Iter 195.0, Minibatch Loss= 20.116261, Training Accuracy= 0.56250


 25%|████████████████                                                | 196/782 [03:24<09:59,  1.02s/it]

Iter 196.0, Minibatch Loss= 38.521584, Training Accuracy= 0.17969


 25%|████████████████                                                | 197/782 [03:25<09:55,  1.02s/it]

Iter 197.0, Minibatch Loss= 20.817575, Training Accuracy= 0.55469


 25%|████████████████▏                                               | 198/782 [03:26<09:52,  1.01s/it]

Iter 198.0, Minibatch Loss= 39.413765, Training Accuracy= 0.29688


 25%|████████████████▎                                               | 199/782 [03:27<09:49,  1.01s/it]

Iter 199.0, Minibatch Loss= 32.246017, Training Accuracy= 0.55469


 26%|████████████████▎                                               | 200/782 [03:28<09:47,  1.01s/it]

Iter 200.0, Minibatch Loss= 1.370658, Training Accuracy= 0.60156


 26%|████████████████▍                                               | 201/782 [03:29<09:45,  1.01s/it]

Iter 201.0, Minibatch Loss= 8.452863, Training Accuracy= 0.31250


 26%|████████████████▌                                               | 202/782 [03:30<09:42,  1.00s/it]

Iter 202.0, Minibatch Loss= 23.728546, Training Accuracy= 0.55469


 26%|████████████████▌                                               | 203/782 [03:31<09:41,  1.00s/it]

Iter 203.0, Minibatch Loss= 36.218315, Training Accuracy= 0.33594


 26%|████████████████▋                                               | 204/782 [03:32<09:39,  1.00s/it]

Iter 204.0, Minibatch Loss= 33.409683, Training Accuracy= 0.55469


 26%|████████████████▊                                               | 205/782 [03:33<09:38,  1.00s/it]

Iter 205.0, Minibatch Loss= 2.424973, Training Accuracy= 0.60938


 26%|████████████████▊                                               | 206/782 [03:34<09:36,  1.00s/it]

Iter 206.0, Minibatch Loss= 11.314362, Training Accuracy= 0.57031


 26%|████████████████▉                                               | 207/782 [03:35<09:37,  1.00s/it]

Iter 207.0, Minibatch Loss= 2.220235, Training Accuracy= 0.49219


 27%|█████████████████                                               | 208/782 [03:36<09:36,  1.00s/it]

Iter 208.0, Minibatch Loss= 18.869564, Training Accuracy= 0.55469


 27%|█████████████████                                               | 209/782 [03:37<09:35,  1.00s/it]

Iter 209.0, Minibatch Loss= 35.900166, Training Accuracy= 0.18750


 27%|█████████████████▏                                              | 210/782 [03:38<09:32,  1.00s/it]

Iter 210.0, Minibatch Loss= 17.770203, Training Accuracy= 0.57031


 27%|█████████████████▎                                              | 211/782 [03:39<09:38,  1.01s/it]

Iter 211.0, Minibatch Loss= 43.095169, Training Accuracy= 0.29688


 27%|█████████████████▎                                              | 212/782 [03:40<09:50,  1.04s/it]

Iter 212.0, Minibatch Loss= 30.697008, Training Accuracy= 0.55469


 27%|█████████████████▍                                              | 213/782 [03:41<09:45,  1.03s/it]

Iter 213.0, Minibatch Loss= 3.235720, Training Accuracy= 0.42969


 27%|█████████████████▌                                              | 214/782 [03:42<09:43,  1.03s/it]

Iter 214.0, Minibatch Loss= 28.883997, Training Accuracy= 0.55469


 27%|█████████████████▌                                              | 215/782 [03:43<09:41,  1.02s/it]

Iter 215.0, Minibatch Loss= 33.063595, Training Accuracy= 0.24219


 28%|█████████████████▋                                              | 216/782 [03:44<09:41,  1.03s/it]

Iter 216.0, Minibatch Loss= 23.817383, Training Accuracy= 0.55469


 28%|█████████████████▊                                              | 217/782 [03:45<09:37,  1.02s/it]

Iter 217.0, Minibatch Loss= 30.093199, Training Accuracy= 0.36719


 28%|█████████████████▊                                              | 218/782 [03:46<09:33,  1.02s/it]

Iter 218.0, Minibatch Loss= 31.211267, Training Accuracy= 0.55469


 28%|█████████████████▉                                              | 219/782 [03:47<09:31,  1.01s/it]

Iter 219.0, Minibatch Loss= 3.779111, Training Accuracy= 0.39844


 28%|██████████████████                                              | 220/782 [03:48<09:36,  1.03s/it]

Iter 220.0, Minibatch Loss= 21.062742, Training Accuracy= 0.55469


 28%|██████████████████                                              | 221/782 [03:49<09:49,  1.05s/it]

Iter 221.0, Minibatch Loss= 37.885216, Training Accuracy= 0.33594


 28%|██████████████████▏                                             | 222/782 [03:50<09:40,  1.04s/it]

Iter 222.0, Minibatch Loss= 29.342085, Training Accuracy= 0.55469


 29%|██████████████████▎                                             | 223/782 [03:51<09:34,  1.03s/it]

Iter 223.0, Minibatch Loss= 3.887568, Training Accuracy= 0.42969


 29%|██████████████████▎                                             | 224/782 [03:52<09:29,  1.02s/it]

Iter 224.0, Minibatch Loss= 29.272520, Training Accuracy= 0.55469


 29%|██████████████████▍                                             | 225/782 [03:53<09:24,  1.01s/it]

Iter 225.0, Minibatch Loss= 32.611774, Training Accuracy= 0.24219


 29%|██████████████████▍                                             | 226/782 [03:54<09:24,  1.01s/it]

Iter 226.0, Minibatch Loss= 24.428768, Training Accuracy= 0.55469


 29%|██████████████████▌                                             | 227/782 [03:55<09:22,  1.01s/it]

Iter 227.0, Minibatch Loss= 28.763691, Training Accuracy= 0.36719


 29%|██████████████████▋                                             | 228/782 [03:56<09:22,  1.01s/it]

Iter 228.0, Minibatch Loss= 30.594624, Training Accuracy= 0.55469


 29%|██████████████████▋                                             | 229/782 [03:57<09:20,  1.01s/it]

Iter 229.0, Minibatch Loss= 2.815973, Training Accuracy= 0.44531


 29%|██████████████████▊                                             | 230/782 [03:58<09:18,  1.01s/it]

Iter 230.0, Minibatch Loss= 20.506607, Training Accuracy= 0.55469


 30%|██████████████████▉                                             | 231/782 [03:59<09:18,  1.01s/it]

Iter 231.0, Minibatch Loss= 30.567352, Training Accuracy= 0.35156


 30%|██████████████████▉                                             | 232/782 [04:00<09:18,  1.01s/it]

Iter 232.0, Minibatch Loss= 29.987249, Training Accuracy= 0.55469


 30%|███████████████████                                             | 233/782 [04:01<09:15,  1.01s/it]

Iter 233.0, Minibatch Loss= 7.813890, Training Accuracy= 0.32812


 30%|███████████████████▏                                            | 234/782 [04:02<09:13,  1.01s/it]

Iter 234.0, Minibatch Loss= 20.126799, Training Accuracy= 0.59375


 30%|███████████████████▏                                            | 235/782 [04:03<09:13,  1.01s/it]

Iter 235.0, Minibatch Loss= 37.849083, Training Accuracy= 0.33594


 30%|███████████████████▎                                            | 236/782 [04:04<09:11,  1.01s/it]

Iter 236.0, Minibatch Loss= 31.136799, Training Accuracy= 0.55469


 30%|███████████████████▍                                            | 237/782 [04:05<09:09,  1.01s/it]

Iter 237.0, Minibatch Loss= 4.995969, Training Accuracy= 0.45312


 30%|███████████████████▍                                            | 238/782 [04:06<09:07,  1.01s/it]

Iter 238.0, Minibatch Loss= 22.117321, Training Accuracy= 0.55469


 31%|███████████████████▌                                            | 239/782 [04:07<09:06,  1.01s/it]

Iter 239.0, Minibatch Loss= 26.305811, Training Accuracy= 0.25781


 31%|███████████████████▋                                            | 240/782 [04:08<09:06,  1.01s/it]

Iter 240.0, Minibatch Loss= 17.196110, Training Accuracy= 0.60156


 31%|███████████████████▋                                            | 241/782 [04:09<09:06,  1.01s/it]

Iter 241.0, Minibatch Loss= 34.375755, Training Accuracy= 0.35938


 31%|███████████████████▊                                            | 242/782 [04:10<09:15,  1.03s/it]

Iter 242.0, Minibatch Loss= 28.348692, Training Accuracy= 0.55469


 31%|███████████████████▉                                            | 243/782 [04:12<09:29,  1.06s/it]

Iter 243.0, Minibatch Loss= 4.196774, Training Accuracy= 0.42188


 31%|███████████████████▉                                            | 244/782 [04:13<09:22,  1.05s/it]

Iter 244.0, Minibatch Loss= 26.367378, Training Accuracy= 0.55469


 31%|████████████████████                                            | 245/782 [04:14<09:22,  1.05s/it]

Iter 245.0, Minibatch Loss= 31.430798, Training Accuracy= 0.23438


 31%|████████████████████▏                                           | 246/782 [04:15<09:17,  1.04s/it]

Iter 246.0, Minibatch Loss= 22.099667, Training Accuracy= 0.55469


 32%|████████████████████▏                                           | 247/782 [04:16<09:09,  1.03s/it]

Iter 247.0, Minibatch Loss= 29.882488, Training Accuracy= 0.39062


 32%|████████████████████▎                                           | 248/782 [04:17<09:07,  1.02s/it]

Iter 248.0, Minibatch Loss= 25.769512, Training Accuracy= 0.55469


 32%|████████████████████▍                                           | 249/782 [04:18<09:05,  1.02s/it]

Iter 249.0, Minibatch Loss= 5.201422, Training Accuracy= 0.43750


 32%|████████████████████▍                                           | 250/782 [04:19<09:16,  1.05s/it]

Iter 250.0, Minibatch Loss= 30.692671, Training Accuracy= 0.48438


 32%|████████████████████▌                                           | 251/782 [04:20<09:13,  1.04s/it]

Iter 251.0, Minibatch Loss= 19.415024, Training Accuracy= 0.34375


 32%|████████████████████▌                                           | 252/782 [04:21<09:06,  1.03s/it]

Iter 252.0, Minibatch Loss= 33.964081, Training Accuracy= 0.48438


 32%|████████████████████▋                                           | 253/782 [04:22<09:02,  1.03s/it]

Iter 253.0, Minibatch Loss= 47.447762, Training Accuracy= 0.18750


 32%|████████████████████▊                                           | 254/782 [04:23<08:59,  1.02s/it]

Iter 254.0, Minibatch Loss= 21.632820, Training Accuracy= 0.50000


 33%|████████████████████▊                                           | 255/782 [04:24<08:58,  1.02s/it]

Iter 255.0, Minibatch Loss= 44.642326, Training Accuracy= 0.32812


 33%|████████████████████▉                                           | 256/782 [04:25<08:58,  1.02s/it]

Iter 256.0, Minibatch Loss= 26.084114, Training Accuracy= 0.48438


 33%|█████████████████████                                           | 257/782 [04:26<08:58,  1.02s/it]

Iter 257.0, Minibatch Loss= 17.404572, Training Accuracy= 0.21875


 33%|█████████████████████                                           | 258/782 [04:27<08:53,  1.02s/it]

Iter 258.0, Minibatch Loss= 19.783588, Training Accuracy= 0.39062


 33%|█████████████████████▏                                          | 259/782 [04:28<08:51,  1.02s/it]

Iter 259.0, Minibatch Loss= 52.043465, Training Accuracy= 0.48438


 33%|█████████████████████▎                                          | 260/782 [04:29<08:49,  1.01s/it]

Iter 260.0, Minibatch Loss= 8.471274, Training Accuracy= 0.49219


 33%|█████████████████████▎                                          | 261/782 [04:30<08:48,  1.01s/it]

Iter 261.0, Minibatch Loss= 39.757317, Training Accuracy= 0.32812


 34%|█████████████████████▍                                          | 262/782 [04:31<08:44,  1.01s/it]

Iter 262.0, Minibatch Loss= 18.422825, Training Accuracy= 0.35938


 34%|█████████████████████▌                                          | 263/782 [04:32<08:43,  1.01s/it]

Iter 263.0, Minibatch Loss= 12.890643, Training Accuracy= 0.50000


 34%|█████████████████████▌                                          | 264/782 [04:33<08:41,  1.01s/it]

Iter 264.0, Minibatch Loss= 35.588676, Training Accuracy= 0.33594


 34%|█████████████████████▋                                          | 265/782 [04:34<08:40,  1.01s/it]

Iter 265.0, Minibatch Loss= 20.122217, Training Accuracy= 0.27344


 34%|█████████████████████▊                                          | 266/782 [04:35<08:40,  1.01s/it]

Iter 266.0, Minibatch Loss= 30.706629, Training Accuracy= 0.48438


 34%|█████████████████████▊                                          | 267/782 [04:36<08:39,  1.01s/it]

Iter 267.0, Minibatch Loss= 27.808483, Training Accuracy= 0.35156


 34%|█████████████████████▉                                          | 268/782 [04:37<08:49,  1.03s/it]

Iter 268.0, Minibatch Loss= 30.876804, Training Accuracy= 0.48438


 34%|██████████████████████                                          | 269/782 [04:38<08:44,  1.02s/it]

Iter 269.0, Minibatch Loss= 21.381786, Training Accuracy= 0.22656


 35%|██████████████████████                                          | 270/782 [04:39<08:40,  1.02s/it]

Iter 270.0, Minibatch Loss= 18.281521, Training Accuracy= 0.45312


 35%|██████████████████████▏                                         | 271/782 [04:40<08:38,  1.01s/it]

Iter 271.0, Minibatch Loss= 25.042084, Training Accuracy= 0.48438


 35%|██████████████████████▎                                         | 272/782 [04:41<08:39,  1.02s/it]

Iter 272.0, Minibatch Loss= 36.111305, Training Accuracy= 0.33594


 35%|██████████████████████▎                                         | 273/782 [04:42<08:36,  1.01s/it]

Iter 273.0, Minibatch Loss= 27.593735, Training Accuracy= 0.48438


 35%|██████████████████████▍                                         | 274/782 [04:43<08:35,  1.02s/it]

Iter 274.0, Minibatch Loss= 19.330099, Training Accuracy= 0.23438


 35%|██████████████████████▌                                         | 275/782 [04:44<08:37,  1.02s/it]

Iter 275.0, Minibatch Loss= 21.126978, Training Accuracy= 0.39844


 35%|██████████████████████▌                                         | 276/782 [04:45<08:37,  1.02s/it]

Iter 276.0, Minibatch Loss= 52.068275, Training Accuracy= 0.48438


 35%|██████████████████████▋                                         | 277/782 [04:46<08:35,  1.02s/it]

Iter 277.0, Minibatch Loss= 8.637804, Training Accuracy= 0.52344


 36%|██████████████████████▊                                         | 278/782 [04:47<08:31,  1.01s/it]

Iter 278.0, Minibatch Loss= 37.590919, Training Accuracy= 0.33594


 36%|██████████████████████▊                                         | 279/782 [04:48<08:37,  1.03s/it]

Iter 279.0, Minibatch Loss= 17.673180, Training Accuracy= 0.53125


 36%|██████████████████████▉                                         | 280/782 [04:49<08:43,  1.04s/it]

Iter 280.0, Minibatch Loss= 22.960758, Training Accuracy= 0.24219


 36%|██████████████████████▉                                         | 281/782 [04:50<08:57,  1.07s/it]

Iter 281.0, Minibatch Loss= 20.538370, Training Accuracy= 0.39844


 36%|███████████████████████                                         | 282/782 [04:52<08:59,  1.08s/it]

Iter 282.0, Minibatch Loss= 49.316628, Training Accuracy= 0.48438


 36%|███████████████████████▏                                        | 283/782 [04:53<08:50,  1.06s/it]

Iter 283.0, Minibatch Loss= 6.578222, Training Accuracy= 0.56250


 36%|███████████████████████▏                                        | 284/782 [04:54<08:41,  1.05s/it]

Iter 284.0, Minibatch Loss= 33.173107, Training Accuracy= 0.34375


 36%|███████████████████████▎                                        | 285/782 [04:55<08:39,  1.05s/it]

Iter 285.0, Minibatch Loss= 18.553600, Training Accuracy= 0.51562


 37%|███████████████████████▍                                        | 286/782 [04:56<08:34,  1.04s/it]

Iter 286.0, Minibatch Loss= 3.028787, Training Accuracy= 0.57812


 37%|███████████████████████▍                                        | 287/782 [04:57<08:57,  1.09s/it]

Iter 287.0, Minibatch Loss= 28.514164, Training Accuracy= 0.40625


 37%|███████████████████████▌                                        | 288/782 [04:58<09:05,  1.11s/it]

Iter 288.0, Minibatch Loss= 21.069050, Training Accuracy= 0.27344


 37%|███████████████████████▋                                        | 289/782 [04:59<09:08,  1.11s/it]

Iter 289.0, Minibatch Loss= 38.194767, Training Accuracy= 0.48438


 37%|███████████████████████▋                                        | 290/782 [05:00<08:58,  1.09s/it]

Iter 290.0, Minibatch Loss= 24.382381, Training Accuracy= 0.38281


 37%|███████████████████████▊                                        | 291/782 [05:01<08:49,  1.08s/it]

Iter 291.0, Minibatch Loss= 34.812237, Training Accuracy= 0.48438


 37%|███████████████████████▉                                        | 292/782 [05:02<08:46,  1.07s/it]

Iter 292.0, Minibatch Loss= 6.810199, Training Accuracy= 0.28906


 37%|███████████████████████▉                                        | 293/782 [05:03<08:44,  1.07s/it]

Iter 293.0, Minibatch Loss= 19.648911, Training Accuracy= 0.52344


 38%|████████████████████████                                        | 294/782 [05:04<08:41,  1.07s/it]

Iter 294.0, Minibatch Loss= 46.817265, Training Accuracy= 0.34375


 38%|████████████████████████▏                                       | 295/782 [05:05<08:38,  1.06s/it]

Iter 295.0, Minibatch Loss= 26.056633, Training Accuracy= 0.48438


 38%|████████████████████████▏                                       | 296/782 [05:07<08:37,  1.06s/it]

Iter 296.0, Minibatch Loss= 19.244934, Training Accuracy= 0.39062


 38%|████████████████████████▎                                       | 297/782 [05:08<08:37,  1.07s/it]

Iter 297.0, Minibatch Loss= 24.123808, Training Accuracy= 0.54688


 38%|████████████████████████▍                                       | 298/782 [05:09<08:35,  1.07s/it]

Iter 298.0, Minibatch Loss= 28.124073, Training Accuracy= 0.22656


 38%|████████████████████████▍                                       | 299/782 [05:10<08:34,  1.06s/it]

Iter 299.0, Minibatch Loss= 15.687267, Training Accuracy= 0.57812


 38%|████████████████████████▌                                       | 300/782 [05:11<08:32,  1.06s/it]

Iter 300.0, Minibatch Loss= 31.895531, Training Accuracy= 0.38281


 38%|████████████████████████▋                                       | 301/782 [05:12<08:29,  1.06s/it]

Iter 301.0, Minibatch Loss= 29.709599, Training Accuracy= 0.48438


 39%|████████████████████████▋                                       | 302/782 [05:13<08:29,  1.06s/it]

Iter 302.0, Minibatch Loss= 10.717038, Training Accuracy= 0.44531


 39%|████████████████████████▊                                       | 303/782 [05:14<08:30,  1.06s/it]

Iter 303.0, Minibatch Loss= 25.319992, Training Accuracy= 0.53906


 39%|████████████████████████▉                                       | 304/782 [05:15<08:30,  1.07s/it]

Iter 304.0, Minibatch Loss= 25.938877, Training Accuracy= 0.23438


 39%|████████████████████████▉                                       | 305/782 [05:16<08:31,  1.07s/it]

Iter 305.0, Minibatch Loss= 16.065151, Training Accuracy= 0.57031


 39%|█████████████████████████                                       | 306/782 [05:17<08:30,  1.07s/it]

Iter 306.0, Minibatch Loss= 39.174713, Training Accuracy= 0.36719


 39%|█████████████████████████▏                                      | 307/782 [05:18<08:37,  1.09s/it]

Iter 307.0, Minibatch Loss= 24.790751, Training Accuracy= 0.48438


 39%|█████████████████████████▏                                      | 308/782 [05:20<08:43,  1.11s/it]

Iter 308.0, Minibatch Loss= 15.450287, Training Accuracy= 0.46094


 40%|█████████████████████████▎                                      | 309/782 [05:21<08:36,  1.09s/it]

Iter 309.0, Minibatch Loss= 21.316399, Training Accuracy= 0.54688


 40%|█████████████████████████▎                                      | 310/782 [05:22<08:32,  1.09s/it]

Iter 310.0, Minibatch Loss= 25.699364, Training Accuracy= 0.24219


 40%|█████████████████████████▍                                      | 311/782 [05:23<08:26,  1.08s/it]

Iter 311.0, Minibatch Loss= 15.223147, Training Accuracy= 0.54688


 40%|█████████████████████████▌                                      | 312/782 [05:24<08:25,  1.07s/it]

Iter 312.0, Minibatch Loss= 32.692032, Training Accuracy= 0.48438


 40%|█████████████████████████▌                                      | 313/782 [05:25<08:24,  1.08s/it]

Iter 313.0, Minibatch Loss= 16.617023, Training Accuracy= 0.41406


 40%|█████████████████████████▋                                      | 314/782 [05:26<08:22,  1.07s/it]

Iter 314.0, Minibatch Loss= 27.022833, Training Accuracy= 0.52344


 40%|█████████████████████████▊                                      | 315/782 [05:27<08:20,  1.07s/it]

Iter 315.0, Minibatch Loss= 20.889091, Training Accuracy= 0.25781


 40%|█████████████████████████▊                                      | 316/782 [05:28<08:19,  1.07s/it]

Iter 316.0, Minibatch Loss= 15.345017, Training Accuracy= 0.60938


 41%|█████████████████████████▉                                      | 317/782 [05:29<08:19,  1.07s/it]

Iter 317.0, Minibatch Loss= 22.833969, Training Accuracy= 0.41406


 41%|██████████████████████████                                      | 318/782 [05:30<08:17,  1.07s/it]

Iter 318.0, Minibatch Loss= 33.013847, Training Accuracy= 0.48438


 41%|██████████████████████████                                      | 319/782 [05:31<08:15,  1.07s/it]

Iter 319.0, Minibatch Loss= 6.173093, Training Accuracy= 0.47656


 41%|██████████████████████████▏                                     | 320/782 [05:32<08:17,  1.08s/it]

Iter 320.0, Minibatch Loss= 26.661762, Training Accuracy= 0.53906


 41%|██████████████████████████▎                                     | 321/782 [05:34<08:26,  1.10s/it]

Iter 321.0, Minibatch Loss= 21.586697, Training Accuracy= 0.25781


 41%|██████████████████████████▎                                     | 322/782 [05:35<08:31,  1.11s/it]

Iter 322.0, Minibatch Loss= 17.779869, Training Accuracy= 0.57812


 41%|██████████████████████████▍                                     | 323/782 [05:36<08:30,  1.11s/it]

Iter 323.0, Minibatch Loss= 36.452095, Training Accuracy= 0.37500


 41%|██████████████████████████▌                                     | 324/782 [05:37<08:38,  1.13s/it]

Iter 324.0, Minibatch Loss= 26.454517, Training Accuracy= 0.48438


 42%|██████████████████████████▌                                     | 325/782 [05:38<08:42,  1.14s/it]

Iter 325.0, Minibatch Loss= 14.428053, Training Accuracy= 0.46094


 42%|██████████████████████████▋                                     | 326/782 [05:39<08:44,  1.15s/it]

Iter 326.0, Minibatch Loss= 21.542381, Training Accuracy= 0.54688


 42%|██████████████████████████▊                                     | 327/782 [05:40<08:42,  1.15s/it]

Iter 327.0, Minibatch Loss= 22.357784, Training Accuracy= 0.25000


 42%|██████████████████████████▊                                     | 328/782 [05:42<08:37,  1.14s/it]

Iter 328.0, Minibatch Loss= 15.444912, Training Accuracy= 0.61719


 42%|██████████████████████████▉                                     | 329/782 [05:43<08:25,  1.12s/it]

Iter 329.0, Minibatch Loss= 9.424076, Training Accuracy= 0.62500


 42%|███████████████████████████                                     | 330/782 [05:44<08:19,  1.10s/it]

Iter 330.0, Minibatch Loss= 17.499897, Training Accuracy= 0.41406


 42%|███████████████████████████                                     | 331/782 [05:45<08:14,  1.10s/it]

Iter 331.0, Minibatch Loss= 25.908878, Training Accuracy= 0.48438


 42%|███████████████████████████▏                                    | 332/782 [05:46<08:08,  1.09s/it]

Iter 332.0, Minibatch Loss= 21.845512, Training Accuracy= 0.27344


 43%|███████████████████████████▎                                    | 333/782 [05:47<08:03,  1.08s/it]

Iter 333.0, Minibatch Loss= 15.409101, Training Accuracy= 0.53125


 43%|███████████████████████████▎                                    | 334/782 [05:48<08:04,  1.08s/it]

Iter 334.0, Minibatch Loss= 25.522232, Training Accuracy= 0.48438


 43%|███████████████████████████▍                                    | 335/782 [05:49<08:14,  1.11s/it]

Iter 335.0, Minibatch Loss= 24.414968, Training Accuracy= 0.38281


 43%|███████████████████████████▍                                    | 336/782 [05:50<08:09,  1.10s/it]

Iter 336.0, Minibatch Loss= 23.419735, Training Accuracy= 0.53906


 43%|███████████████████████████▌                                    | 337/782 [05:51<08:07,  1.10s/it]

Iter 337.0, Minibatch Loss= 17.477318, Training Accuracy= 0.27344


 43%|███████████████████████████▋                                    | 338/782 [05:52<08:04,  1.09s/it]

Iter 338.0, Minibatch Loss= 15.491448, Training Accuracy= 0.57812


 43%|███████████████████████████▋                                    | 339/782 [05:53<08:02,  1.09s/it]

Iter 339.0, Minibatch Loss= 17.398607, Training Accuracy= 0.51562


 43%|███████████████████████████▊                                    | 340/782 [05:55<07:59,  1.08s/it]

Iter 340.0, Minibatch Loss= 32.562229, Training Accuracy= 0.35156


 44%|███████████████████████████▉                                    | 341/782 [05:56<07:58,  1.08s/it]

Iter 341.0, Minibatch Loss= 20.742926, Training Accuracy= 0.53906


 44%|███████████████████████████▉                                    | 342/782 [05:57<07:56,  1.08s/it]

Iter 342.0, Minibatch Loss= 13.747159, Training Accuracy= 0.29688


 44%|████████████████████████████                                    | 343/782 [05:58<07:56,  1.09s/it]

Iter 343.0, Minibatch Loss= 13.592330, Training Accuracy= 0.61719


 44%|████████████████████████████▏                                   | 344/782 [05:59<07:55,  1.09s/it]

Iter 344.0, Minibatch Loss= 12.804244, Training Accuracy= 0.43750


 44%|████████████████████████████▏                                   | 345/782 [06:00<07:51,  1.08s/it]

Iter 345.0, Minibatch Loss= 34.184685, Training Accuracy= 0.48438


 44%|████████████████████████████▎                                   | 346/782 [06:01<07:47,  1.07s/it]

Iter 346.0, Minibatch Loss= 6.375695, Training Accuracy= 0.34375


 44%|████████████████████████████▍                                   | 347/782 [06:02<07:45,  1.07s/it]

Iter 347.0, Minibatch Loss= 17.294043, Training Accuracy= 0.53125


 45%|████████████████████████████▍                                   | 348/782 [06:03<07:43,  1.07s/it]

Iter 348.0, Minibatch Loss= 37.308319, Training Accuracy= 0.37500


 45%|████████████████████████████▌                                   | 349/782 [06:04<07:53,  1.09s/it]

Iter 349.0, Minibatch Loss= 20.718584, Training Accuracy= 0.48438


 45%|████████████████████████████▋                                   | 350/782 [06:05<07:50,  1.09s/it]

Iter 350.0, Minibatch Loss= 17.440575, Training Accuracy= 0.46094


 45%|████████████████████████████▋                                   | 351/782 [06:06<07:48,  1.09s/it]

Iter 351.0, Minibatch Loss= 18.633099, Training Accuracy= 0.55469


 45%|████████████████████████████▊                                   | 352/782 [06:07<07:42,  1.08s/it]

Iter 352.0, Minibatch Loss= 16.112949, Training Accuracy= 0.29688


 45%|████████████████████████████▉                                   | 353/782 [06:09<07:47,  1.09s/it]

Iter 353.0, Minibatch Loss= 14.033760, Training Accuracy= 0.60938


 45%|████████████████████████████▉                                   | 354/782 [06:10<07:44,  1.08s/it]

Iter 354.0, Minibatch Loss= 14.378981, Training Accuracy= 0.51562


 45%|█████████████████████████████                                   | 355/782 [06:11<07:41,  1.08s/it]

Iter 355.0, Minibatch Loss= 30.620165, Training Accuracy= 0.37500


 46%|█████████████████████████████▏                                  | 356/782 [06:12<07:39,  1.08s/it]

Iter 356.0, Minibatch Loss= 18.200823, Training Accuracy= 0.54688


 46%|█████████████████████████████▏                                  | 357/782 [06:13<07:48,  1.10s/it]

Iter 357.0, Minibatch Loss= 14.702394, Training Accuracy= 0.32031


 46%|█████████████████████████████▎                                  | 358/782 [06:14<07:55,  1.12s/it]

Iter 358.0, Minibatch Loss= 12.512665, Training Accuracy= 0.57031


 46%|█████████████████████████████▍                                  | 359/782 [06:15<08:19,  1.18s/it]

Iter 359.0, Minibatch Loss= 23.731415, Training Accuracy= 0.49219


 46%|█████████████████████████████▍                                  | 360/782 [06:17<09:26,  1.34s/it]

Iter 360.0, Minibatch Loss= 19.132381, Training Accuracy= 0.47656


 46%|█████████████████████████████▌                                  | 361/782 [06:19<09:47,  1.39s/it]

Iter 361.0, Minibatch Loss= 18.872129, Training Accuracy= 0.54688


 46%|█████████████████████████████▋                                  | 362/782 [06:20<10:16,  1.47s/it]

Iter 362.0, Minibatch Loss= 13.333813, Training Accuracy= 0.34375


 46%|█████████████████████████████▋                                  | 363/782 [06:22<10:25,  1.49s/it]

Iter 363.0, Minibatch Loss= 12.407064, Training Accuracy= 0.57812


 47%|█████████████████████████████▊                                  | 364/782 [06:23<10:34,  1.52s/it]

Iter 364.0, Minibatch Loss= 18.659208, Training Accuracy= 0.50781


 47%|█████████████████████████████▊                                  | 365/782 [06:25<10:51,  1.56s/it]

Iter 365.0, Minibatch Loss= 21.528627, Training Accuracy= 0.48438


 47%|█████████████████████████████▉                                  | 366/782 [06:27<10:53,  1.57s/it]

Iter 366.0, Minibatch Loss= 15.381009, Training Accuracy= 0.55469


 47%|██████████████████████████████                                  | 367/782 [06:28<10:38,  1.54s/it]

Iter 367.0, Minibatch Loss= 12.936012, Training Accuracy= 0.41406


 47%|██████████████████████████████                                  | 368/782 [06:29<09:56,  1.44s/it]

Iter 368.0, Minibatch Loss= 11.682253, Training Accuracy= 0.54688


 47%|██████████████████████████████▏                                 | 369/782 [06:31<09:17,  1.35s/it]

Iter 369.0, Minibatch Loss= 30.671368, Training Accuracy= 0.40625


 47%|██████████████████████████████▎                                 | 370/782 [06:32<08:55,  1.30s/it]

Iter 370.0, Minibatch Loss= 16.963068, Training Accuracy= 0.54688


 47%|██████████████████████████████▎                                 | 371/782 [06:33<08:27,  1.23s/it]

Iter 371.0, Minibatch Loss= 15.585683, Training Accuracy= 0.32031


 48%|██████████████████████████████▍                                 | 372/782 [06:34<08:08,  1.19s/it]

Iter 372.0, Minibatch Loss= 13.517513, Training Accuracy= 0.52344


 48%|██████████████████████████████▌                                 | 373/782 [06:35<08:00,  1.18s/it]

Iter 373.0, Minibatch Loss= 31.278408, Training Accuracy= 0.48438


 48%|██████████████████████████████▌                                 | 374/782 [06:36<07:47,  1.15s/it]

Iter 374.0, Minibatch Loss= 12.574280, Training Accuracy= 0.50000


 48%|██████████████████████████████▋                                 | 375/782 [06:37<07:53,  1.16s/it]

Iter 375.0, Minibatch Loss= 26.787863, Training Accuracy= 0.66406


 48%|██████████████████████████████▊                                 | 376/782 [06:38<07:47,  1.15s/it]

Iter 376.0, Minibatch Loss= 11.643661, Training Accuracy= 0.61719


 48%|██████████████████████████████▊                                 | 377/782 [06:40<07:38,  1.13s/it]

Iter 377.0, Minibatch Loss= 4.684634, Training Accuracy= 0.60938


 48%|██████████████████████████████▉                                 | 378/782 [06:41<07:37,  1.13s/it]

Iter 378.0, Minibatch Loss= 13.962704, Training Accuracy= 0.33594


 48%|███████████████████████████████                                 | 379/782 [06:42<07:27,  1.11s/it]

Iter 379.0, Minibatch Loss= 26.633194, Training Accuracy= 0.66406


 49%|███████████████████████████████                                 | 380/782 [06:43<07:21,  1.10s/it]

Iter 380.0, Minibatch Loss= 10.680106, Training Accuracy= 0.64844


 49%|███████████████████████████████▏                                | 381/782 [06:44<07:30,  1.12s/it]

Iter 381.0, Minibatch Loss= 4.055997, Training Accuracy= 0.50781


 49%|███████████████████████████████▎                                | 382/782 [06:45<07:28,  1.12s/it]

Iter 382.0, Minibatch Loss= 6.605898, Training Accuracy= 0.45312


 49%|███████████████████████████████▎                                | 383/782 [06:46<07:21,  1.11s/it]

Iter 383.0, Minibatch Loss= 11.905730, Training Accuracy= 0.66406


 49%|███████████████████████████████▍                                | 384/782 [06:47<07:20,  1.11s/it]

Iter 384.0, Minibatch Loss= 11.511086, Training Accuracy= 0.28906


 49%|███████████████████████████████▌                                | 385/782 [06:49<07:36,  1.15s/it]

Iter 385.0, Minibatch Loss= 28.134048, Training Accuracy= 0.66406


 49%|███████████████████████████████▌                                | 386/782 [06:50<07:35,  1.15s/it]

Iter 386.0, Minibatch Loss= 12.348452, Training Accuracy= 0.64062


 49%|███████████████████████████████▋                                | 387/782 [06:51<07:25,  1.13s/it]

Iter 387.0, Minibatch Loss= 5.988637, Training Accuracy= 0.63281


 50%|███████████████████████████████▊                                | 388/782 [06:52<07:26,  1.13s/it]

Iter 388.0, Minibatch Loss= 5.915987, Training Accuracy= 0.36719


 50%|███████████████████████████████▊                                | 389/782 [06:53<07:19,  1.12s/it]

Iter 389.0, Minibatch Loss= 25.414177, Training Accuracy= 0.66406


 50%|███████████████████████████████▉                                | 390/782 [06:54<07:15,  1.11s/it]

Iter 390.0, Minibatch Loss= 13.912326, Training Accuracy= 0.32812


 50%|████████████████████████████████                                | 391/782 [06:55<07:19,  1.12s/it]

Iter 391.0, Minibatch Loss= 27.335642, Training Accuracy= 0.66406


 50%|████████████████████████████████                                | 392/782 [06:56<07:23,  1.14s/it]

Iter 392.0, Minibatch Loss= 8.862020, Training Accuracy= 0.67188


 50%|████████████████████████████████▏                               | 393/782 [06:58<07:18,  1.13s/it]

Iter 393.0, Minibatch Loss= 26.507906, Training Accuracy= 0.26562


 50%|████████████████████████████████▏                               | 394/782 [06:59<07:23,  1.14s/it]

Iter 394.0, Minibatch Loss= 28.806681, Training Accuracy= 0.66406


 51%|████████████████████████████████▎                               | 395/782 [07:00<07:22,  1.14s/it]

Iter 395.0, Minibatch Loss= 11.377879, Training Accuracy= 0.69531


 51%|████████████████████████████████▍                               | 396/782 [07:01<07:14,  1.13s/it]

Iter 396.0, Minibatch Loss= 17.781387, Training Accuracy= 0.16406


 51%|████████████████████████████████▍                               | 397/782 [07:02<07:07,  1.11s/it]

Iter 397.0, Minibatch Loss= 25.885298, Training Accuracy= 0.66406


 51%|████████████████████████████████▌                               | 398/782 [07:03<07:06,  1.11s/it]

Iter 398.0, Minibatch Loss= 15.535922, Training Accuracy= 0.35938


 51%|████████████████████████████████▋                               | 399/782 [07:04<07:03,  1.11s/it]

Iter 399.0, Minibatch Loss= 34.057011, Training Accuracy= 0.66406


 51%|████████████████████████████████▋                               | 400/782 [07:05<07:07,  1.12s/it]

Iter 400.0, Minibatch Loss= 15.278159, Training Accuracy= 0.66406


 51%|████████████████████████████████▊                               | 401/782 [07:06<06:59,  1.10s/it]

Iter 401.0, Minibatch Loss= 8.479597, Training Accuracy= 0.35938


 51%|████████████████████████████████▉                               | 402/782 [07:07<06:54,  1.09s/it]

Iter 402.0, Minibatch Loss= 26.209736, Training Accuracy= 0.66406


 52%|████████████████████████████████▉                               | 403/782 [07:09<07:00,  1.11s/it]

Iter 403.0, Minibatch Loss= 10.280090, Training Accuracy= 0.69531


 52%|█████████████████████████████████                               | 404/782 [07:10<07:00,  1.11s/it]

Iter 404.0, Minibatch Loss= 18.228722, Training Accuracy= 0.16406


 52%|█████████████████████████████████▏                              | 405/782 [07:11<06:55,  1.10s/it]

Iter 405.0, Minibatch Loss= 26.129078, Training Accuracy= 0.66406


 52%|█████████████████████████████████▏                              | 406/782 [07:12<06:50,  1.09s/it]

Iter 406.0, Minibatch Loss= 14.896011, Training Accuracy= 0.33594


 52%|█████████████████████████████████▎                              | 407/782 [07:13<06:54,  1.10s/it]

Iter 407.0, Minibatch Loss= 34.381844, Training Accuracy= 0.66406


 52%|█████████████████████████████████▍                              | 408/782 [07:14<06:51,  1.10s/it]

Iter 408.0, Minibatch Loss= 15.719674, Training Accuracy= 0.66406


 52%|█████████████████████████████████▍                              | 409/782 [07:15<06:49,  1.10s/it]

Iter 409.0, Minibatch Loss= 6.439136, Training Accuracy= 0.46094


 52%|█████████████████████████████████▌                              | 410/782 [07:16<06:51,  1.11s/it]

Iter 410.0, Minibatch Loss= 21.435349, Training Accuracy= 0.70312


 53%|█████████████████████████████████▋                              | 411/782 [07:17<06:47,  1.10s/it]

Iter 411.0, Minibatch Loss= 8.229525, Training Accuracy= 0.69531


 53%|█████████████████████████████████▋                              | 412/782 [07:19<07:06,  1.15s/it]

Iter 412.0, Minibatch Loss= 15.136648, Training Accuracy= 0.17188


 53%|█████████████████████████████████▊                              | 413/782 [07:20<07:11,  1.17s/it]

Iter 413.0, Minibatch Loss= 23.452133, Training Accuracy= 0.66406


 53%|█████████████████████████████████▉                              | 414/782 [07:21<07:00,  1.14s/it]

Iter 414.0, Minibatch Loss= 21.649639, Training Accuracy= 0.25781


 53%|█████████████████████████████████▉                              | 415/782 [07:22<06:50,  1.12s/it]

Iter 415.0, Minibatch Loss= 39.172363, Training Accuracy= 0.66406


 53%|██████████████████████████████████                              | 416/782 [07:23<06:52,  1.13s/it]

Iter 416.0, Minibatch Loss= 20.268402, Training Accuracy= 0.66406


 53%|██████████████████████████████████▏                             | 417/782 [07:24<06:45,  1.11s/it]

Iter 417.0, Minibatch Loss= 4.260941, Training Accuracy= 0.71875


 53%|██████████████████████████████████▏                             | 418/782 [07:25<06:44,  1.11s/it]

Iter 418.0, Minibatch Loss= 8.143646, Training Accuracy= 0.39844


 54%|██████████████████████████████████▎                             | 419/782 [07:26<06:43,  1.11s/it]

Iter 419.0, Minibatch Loss= 24.173216, Training Accuracy= 0.70312


 54%|██████████████████████████████████▎                             | 420/782 [07:28<06:37,  1.10s/it]

Iter 420.0, Minibatch Loss= 10.717566, Training Accuracy= 0.69531


 54%|██████████████████████████████████▍                             | 421/782 [07:29<06:32,  1.09s/it]

Iter 421.0, Minibatch Loss= 17.005686, Training Accuracy= 0.19531


 54%|██████████████████████████████████▌                             | 422/782 [07:30<06:32,  1.09s/it]

Iter 422.0, Minibatch Loss= 24.313131, Training Accuracy= 0.66406


 54%|██████████████████████████████████▌                             | 423/782 [07:31<06:33,  1.10s/it]

Iter 423.0, Minibatch Loss= 14.050371, Training Accuracy= 0.38281


 54%|██████████████████████████████████▋                             | 424/782 [07:32<06:30,  1.09s/it]

Iter 424.0, Minibatch Loss= 32.140221, Training Accuracy= 0.66406


 54%|██████████████████████████████████▊                             | 425/782 [07:33<06:26,  1.08s/it]

Iter 425.0, Minibatch Loss= 13.687815, Training Accuracy= 0.66406


 54%|██████████████████████████████████▊                             | 426/782 [07:34<06:32,  1.10s/it]

Iter 426.0, Minibatch Loss= 5.930379, Training Accuracy= 0.48438


 55%|██████████████████████████████████▉                             | 427/782 [07:35<06:28,  1.10s/it]

Iter 427.0, Minibatch Loss= 19.803761, Training Accuracy= 0.70312


 55%|███████████████████████████████████                             | 428/782 [07:36<06:31,  1.11s/it]

Iter 428.0, Minibatch Loss= 6.807950, Training Accuracy= 0.68750


 55%|███████████████████████████████████                             | 429/782 [07:37<06:27,  1.10s/it]

Iter 429.0, Minibatch Loss= 4.416049, Training Accuracy= 0.46875


 55%|███████████████████████████████████▏                            | 430/782 [07:38<06:24,  1.09s/it]

Iter 430.0, Minibatch Loss= 16.623051, Training Accuracy= 0.66406


 55%|███████████████████████████████████▎                            | 431/782 [07:40<06:30,  1.11s/it]

Iter 431.0, Minibatch Loss= 13.817663, Training Accuracy= 0.32031


 55%|███████████████████████████████████▎                            | 432/782 [07:41<06:32,  1.12s/it]

Iter 432.0, Minibatch Loss= 30.505947, Training Accuracy= 0.66406


 55%|███████████████████████████████████▍                            | 433/782 [07:42<06:32,  1.12s/it]

Iter 433.0, Minibatch Loss= 13.670238, Training Accuracy= 0.69531


 55%|███████████████████████████████████▌                            | 434/782 [07:43<06:26,  1.11s/it]

Iter 434.0, Minibatch Loss= 8.450909, Training Accuracy= 0.33594


 56%|███████████████████████████████████▌                            | 435/782 [07:44<06:24,  1.11s/it]

Iter 435.0, Minibatch Loss= 18.349808, Training Accuracy= 0.66406


 56%|███████████████████████████████████▋                            | 436/782 [07:45<06:21,  1.10s/it]

Iter 436.0, Minibatch Loss= 17.310646, Training Accuracy= 0.33594


 56%|███████████████████████████████████▊                            | 437/782 [07:46<06:24,  1.11s/it]

Iter 437.0, Minibatch Loss= 31.609161, Training Accuracy= 0.66406


 56%|███████████████████████████████████▊                            | 438/782 [07:47<06:21,  1.11s/it]

Iter 438.0, Minibatch Loss= 13.441017, Training Accuracy= 0.70312


 56%|███████████████████████████████████▉                            | 439/782 [07:49<06:28,  1.13s/it]

Iter 439.0, Minibatch Loss= 2.290027, Training Accuracy= 0.71875


 56%|████████████████████████████████████                            | 440/782 [07:50<06:30,  1.14s/it]

Iter 440.0, Minibatch Loss= 3.266741, Training Accuracy= 0.56250


 56%|████████████████████████████████████                            | 441/782 [07:51<06:22,  1.12s/it]

Iter 441.0, Minibatch Loss= 5.191687, Training Accuracy= 0.74219


 57%|████████████████████████████████████▏                           | 442/782 [07:52<06:26,  1.14s/it]

Iter 442.0, Minibatch Loss= 10.997621, Training Accuracy= 0.41406


 57%|████████████████████████████████████▎                           | 443/782 [07:53<06:18,  1.12s/it]

Iter 443.0, Minibatch Loss= 27.599159, Training Accuracy= 0.66406


 57%|████████████████████████████████████▎                           | 444/782 [07:54<06:14,  1.11s/it]

Iter 444.0, Minibatch Loss= 8.793921, Training Accuracy= 0.69531


 57%|████████████████████████████████████▍                           | 445/782 [07:55<06:17,  1.12s/it]

Iter 445.0, Minibatch Loss= 14.596254, Training Accuracy= 0.18750


 57%|████████████████████████████████████▌                           | 446/782 [07:56<06:11,  1.10s/it]

Iter 446.0, Minibatch Loss= 22.644936, Training Accuracy= 0.66406


 57%|████████████████████████████████████▌                           | 447/782 [07:57<06:07,  1.10s/it]

Iter 447.0, Minibatch Loss= 18.027847, Training Accuracy= 0.32031


 57%|████████████████████████████████████▋                           | 448/782 [07:59<06:04,  1.09s/it]

Iter 448.0, Minibatch Loss= 35.542599, Training Accuracy= 0.66406


 57%|████████████████████████████████████▋                           | 449/782 [08:00<06:07,  1.10s/it]

Iter 449.0, Minibatch Loss= 16.893917, Training Accuracy= 0.66406


 58%|████████████████████████████████████▊                           | 450/782 [08:01<06:02,  1.09s/it]

Iter 450.0, Minibatch Loss= 3.519369, Training Accuracy= 0.70312


 58%|████████████████████████████████████▉                           | 451/782 [08:02<06:08,  1.11s/it]

Iter 451.0, Minibatch Loss= 4.288655, Training Accuracy= 0.71094


 58%|████████████████████████████████████▉                           | 452/782 [08:03<06:05,  1.11s/it]

Iter 452.0, Minibatch Loss= 15.695216, Training Accuracy= 0.30469


 58%|█████████████████████████████████████                           | 453/782 [08:04<06:05,  1.11s/it]

Iter 453.0, Minibatch Loss= 24.652706, Training Accuracy= 0.70312


 58%|█████████████████████████████████████▏                          | 454/782 [08:05<06:03,  1.11s/it]

Iter 454.0, Minibatch Loss= 11.955254, Training Accuracy= 0.56250


 58%|█████████████████████████████████████▏                          | 455/782 [08:06<06:03,  1.11s/it]

Iter 455.0, Minibatch Loss= 10.486738, Training Accuracy= 0.70312


 58%|█████████████████████████████████████▎                          | 456/782 [08:07<06:00,  1.11s/it]

Iter 456.0, Minibatch Loss= 4.191582, Training Accuracy= 0.53125


 58%|█████████████████████████████████████▍                          | 457/782 [08:09<06:00,  1.11s/it]

Iter 457.0, Minibatch Loss= 17.780069, Training Accuracy= 0.70312


 59%|█████████████████████████████████████▍                          | 458/782 [08:10<06:04,  1.13s/it]

Iter 458.0, Minibatch Loss= 9.385832, Training Accuracy= 0.46875


 59%|█████████████████████████████████████▌                          | 459/782 [08:11<06:05,  1.13s/it]

Iter 459.0, Minibatch Loss= 15.312962, Training Accuracy= 0.66406


 59%|█████████████████████████████████████▋                          | 460/782 [08:12<06:00,  1.12s/it]

Iter 460.0, Minibatch Loss= 10.412487, Training Accuracy= 0.42188


 59%|█████████████████████████████████████▋                          | 461/782 [08:13<05:56,  1.11s/it]

Iter 461.0, Minibatch Loss= 25.190533, Training Accuracy= 0.66406


 59%|█████████████████████████████████████▊                          | 462/782 [08:14<05:55,  1.11s/it]

Iter 462.0, Minibatch Loss= 8.511370, Training Accuracy= 0.69531


 59%|█████████████████████████████████████▉                          | 463/782 [08:15<05:55,  1.11s/it]

Iter 463.0, Minibatch Loss= 9.795970, Training Accuracy= 0.31250


 59%|█████████████████████████████████████▉                          | 464/782 [08:16<05:56,  1.12s/it]

Iter 464.0, Minibatch Loss= 17.750202, Training Accuracy= 0.67188


 59%|██████████████████████████████████████                          | 465/782 [08:18<05:53,  1.12s/it]

Iter 465.0, Minibatch Loss= 18.110470, Training Accuracy= 0.32812


 60%|██████████████████████████████████████▏                         | 466/782 [08:19<05:57,  1.13s/it]

Iter 466.0, Minibatch Loss= 30.874912, Training Accuracy= 0.66406


 60%|██████████████████████████████████████▏                         | 467/782 [08:20<05:55,  1.13s/it]

Iter 467.0, Minibatch Loss= 12.526141, Training Accuracy= 0.70312


 60%|██████████████████████████████████████▎                         | 468/782 [08:21<05:50,  1.11s/it]

Iter 468.0, Minibatch Loss= 2.696756, Training Accuracy= 0.62500


 60%|██████████████████████████████████████▍                         | 469/782 [08:22<05:45,  1.11s/it]

Iter 469.0, Minibatch Loss= 10.870329, Training Accuracy= 0.70312


 60%|██████████████████████████████████████▍                         | 470/782 [08:23<05:51,  1.13s/it]

Iter 470.0, Minibatch Loss= 7.944742, Training Accuracy= 0.40625


 60%|██████████████████████████████████████▌                         | 471/782 [08:24<05:47,  1.12s/it]

Iter 471.0, Minibatch Loss= 14.896014, Training Accuracy= 0.67969


 60%|██████████████████████████████████████▋                         | 472/782 [08:25<05:47,  1.12s/it]

Iter 472.0, Minibatch Loss= 19.273632, Training Accuracy= 0.29688


 60%|██████████████████████████████████████▋                         | 473/782 [08:26<05:43,  1.11s/it]

Iter 473.0, Minibatch Loss= 29.730320, Training Accuracy= 0.66406


 61%|██████████████████████████████████████▊                         | 474/782 [08:28<05:40,  1.11s/it]

Iter 474.0, Minibatch Loss= 12.172076, Training Accuracy= 0.70312


 61%|██████████████████████████████████████▊                         | 475/782 [08:29<05:38,  1.10s/it]

Iter 475.0, Minibatch Loss= 1.611547, Training Accuracy= 0.78125


 61%|██████████████████████████████████████▉                         | 476/782 [08:30<05:35,  1.10s/it]

Iter 476.0, Minibatch Loss= 2.953281, Training Accuracy= 0.72656


 61%|███████████████████████████████████████                         | 477/782 [08:31<05:39,  1.11s/it]

Iter 477.0, Minibatch Loss= 9.870026, Training Accuracy= 0.42188


 61%|███████████████████████████████████████                         | 478/782 [08:32<05:38,  1.11s/it]

Iter 478.0, Minibatch Loss= 19.699421, Training Accuracy= 0.70312


 61%|███████████████████████████████████████▏                        | 479/782 [08:33<05:36,  1.11s/it]

Iter 479.0, Minibatch Loss= 14.371450, Training Accuracy= 0.39844


 61%|███████████████████████████████████████▎                        | 480/782 [08:34<05:32,  1.10s/it]

Iter 480.0, Minibatch Loss= 20.859560, Training Accuracy= 0.66406


 62%|███████████████████████████████████████▎                        | 481/782 [08:35<05:31,  1.10s/it]

Iter 481.0, Minibatch Loss= 4.760815, Training Accuracy= 0.73438


 62%|███████████████████████████████████████▍                        | 482/782 [08:36<05:33,  1.11s/it]

Iter 482.0, Minibatch Loss= 3.405820, Training Accuracy= 0.74219


 62%|███████████████████████████████████████▌                        | 483/782 [08:38<05:31,  1.11s/it]

Iter 483.0, Minibatch Loss= 3.203845, Training Accuracy= 0.60938


 62%|███████████████████████████████████████▌                        | 484/782 [08:39<05:39,  1.14s/it]

Iter 484.0, Minibatch Loss= 13.911950, Training Accuracy= 0.70312


 62%|███████████████████████████████████████▋                        | 485/782 [08:40<05:35,  1.13s/it]

Iter 485.0, Minibatch Loss= 6.031824, Training Accuracy= 0.51562


 62%|███████████████████████████████████████▊                        | 486/782 [08:41<05:32,  1.12s/it]

Iter 486.0, Minibatch Loss= 11.999781, Training Accuracy= 0.67969


 62%|███████████████████████████████████████▊                        | 487/782 [08:42<05:36,  1.14s/it]

Iter 487.0, Minibatch Loss= 18.756182, Training Accuracy= 0.31250


 62%|███████████████████████████████████████▉                        | 488/782 [08:43<05:31,  1.13s/it]

Iter 488.0, Minibatch Loss= 29.717138, Training Accuracy= 0.66406


 63%|████████████████████████████████████████                        | 489/782 [08:44<05:30,  1.13s/it]

Iter 489.0, Minibatch Loss= 9.853918, Training Accuracy= 0.69531


 63%|████████████████████████████████████████                        | 490/782 [08:45<05:31,  1.13s/it]

Iter 490.0, Minibatch Loss= 7.579071, Training Accuracy= 0.33594


 63%|████████████████████████████████████████▏                       | 491/782 [08:47<05:32,  1.14s/it]

Iter 491.0, Minibatch Loss= 19.586168, Training Accuracy= 0.66406


 63%|████████████████████████████████████████▎                       | 492/782 [08:48<05:29,  1.13s/it]

Iter 492.0, Minibatch Loss= 15.587184, Training Accuracy= 0.36719


 63%|████████████████████████████████████████▎                       | 493/782 [08:49<05:40,  1.18s/it]

Iter 493.0, Minibatch Loss= 29.886814, Training Accuracy= 0.66406


 63%|████████████████████████████████████████▍                       | 494/782 [08:50<05:35,  1.16s/it]

Iter 494.0, Minibatch Loss= 11.048400, Training Accuracy= 0.70312


 63%|████████████████████████████████████████▌                       | 495/782 [08:51<05:33,  1.16s/it]

Iter 495.0, Minibatch Loss= 3.265014, Training Accuracy= 0.62500


 63%|████████████████████████████████████████▌                       | 496/782 [08:53<05:33,  1.17s/it]

Iter 496.0, Minibatch Loss= 13.183069, Training Accuracy= 0.70312


 64%|████████████████████████████████████████▋                       | 497/782 [08:54<05:30,  1.16s/it]

Iter 497.0, Minibatch Loss= 7.152469, Training Accuracy= 0.46875


 64%|████████████████████████████████████████▊                       | 498/782 [08:55<05:28,  1.16s/it]

Iter 498.0, Minibatch Loss= 12.685631, Training Accuracy= 0.67969


 64%|████████████████████████████████████████▊                       | 499/782 [08:56<05:23,  1.14s/it]

Iter 499.0, Minibatch Loss= 14.928700, Training Accuracy= 0.39062


 64%|████████████████████████████████████████▉                       | 500/782 [08:57<05:18,  1.13s/it]

Iter 500.0, Minibatch Loss= 40.815033, Training Accuracy= 0.53125


 64%|█████████████████████████████████████████                       | 501/782 [08:58<05:18,  1.13s/it]

Iter 501.0, Minibatch Loss= 3.612513, Training Accuracy= 0.49219


 64%|█████████████████████████████████████████                       | 502/782 [08:59<05:16,  1.13s/it]

Iter 502.0, Minibatch Loss= 10.860339, Training Accuracy= 0.60938


 64%|█████████████████████████████████████████▏                      | 503/782 [09:00<05:15,  1.13s/it]

Iter 503.0, Minibatch Loss= 26.334354, Training Accuracy= 0.38281


 64%|█████████████████████████████████████████▏                      | 504/782 [09:02<05:15,  1.13s/it]

Iter 504.0, Minibatch Loss= 25.842581, Training Accuracy= 0.60938


 65%|█████████████████████████████████████████▎                      | 505/782 [09:03<05:11,  1.13s/it]

Iter 505.0, Minibatch Loss= 18.209566, Training Accuracy= 0.18750


 65%|█████████████████████████████████████████▍                      | 506/782 [09:04<05:05,  1.11s/it]

Iter 506.0, Minibatch Loss= 22.825916, Training Accuracy= 0.53906


 65%|█████████████████████████████████████████▍                      | 507/782 [09:05<05:01,  1.10s/it]

Iter 507.0, Minibatch Loss= 40.915691, Training Accuracy= 0.30469


 65%|█████████████████████████████████████████▌                      | 508/782 [09:06<05:02,  1.10s/it]

Iter 508.0, Minibatch Loss= 33.290825, Training Accuracy= 0.53125


 65%|█████████████████████████████████████████▋                      | 509/782 [09:07<04:58,  1.09s/it]

Iter 509.0, Minibatch Loss= 6.071295, Training Accuracy= 0.46875


 65%|█████████████████████████████████████████▋                      | 510/782 [09:08<05:00,  1.10s/it]

Iter 510.0, Minibatch Loss= 27.401176, Training Accuracy= 0.60938


 65%|█████████████████████████████████████████▊                      | 511/782 [09:09<04:58,  1.10s/it]

Iter 511.0, Minibatch Loss= 3.340209, Training Accuracy= 0.61719


 65%|█████████████████████████████████████████▉                      | 512/782 [09:10<04:56,  1.10s/it]

Iter 512.0, Minibatch Loss= 15.041376, Training Accuracy= 0.40625


 66%|█████████████████████████████████████████▉                      | 513/782 [09:11<04:53,  1.09s/it]

Iter 513.0, Minibatch Loss= 30.353205, Training Accuracy= 0.60938


 66%|██████████████████████████████████████████                      | 514/782 [09:13<04:57,  1.11s/it]

Iter 514.0, Minibatch Loss= 14.294703, Training Accuracy= 0.29688


 66%|██████████████████████████████████████████▏                     | 515/782 [09:14<04:52,  1.10s/it]

Iter 515.0, Minibatch Loss= 17.287668, Training Accuracy= 0.54688


 66%|██████████████████████████████████████████▏                     | 516/782 [09:15<04:56,  1.11s/it]

Iter 516.0, Minibatch Loss= 36.915710, Training Accuracy= 0.30469


 66%|██████████████████████████████████████████▎                     | 517/782 [09:16<05:05,  1.15s/it]

Iter 517.0, Minibatch Loss= 29.268372, Training Accuracy= 0.59375


 66%|██████████████████████████████████████████▍                     | 518/782 [09:17<05:12,  1.18s/it]

Iter 518.0, Minibatch Loss= 3.993852, Training Accuracy= 0.64844


 66%|██████████████████████████████████████████▍                     | 519/782 [09:18<05:11,  1.18s/it]

Iter 519.0, Minibatch Loss= 9.263265, Training Accuracy= 0.42188


 66%|██████████████████████████████████████████▌                     | 520/782 [09:20<05:09,  1.18s/it]

Iter 520.0, Minibatch Loss= 30.839018, Training Accuracy= 0.60938


 67%|██████████████████████████████████████████▋                     | 521/782 [09:21<04:59,  1.15s/it]

Iter 521.0, Minibatch Loss= 15.936236, Training Accuracy= 0.30469


 67%|██████████████████████████████████████████▋                     | 522/782 [09:22<04:59,  1.15s/it]

Iter 522.0, Minibatch Loss= 18.770370, Training Accuracy= 0.53906


 67%|██████████████████████████████████████████▊                     | 523/782 [09:23<04:52,  1.13s/it]

Iter 523.0, Minibatch Loss= 33.349205, Training Accuracy= 0.32031


 67%|██████████████████████████████████████████▉                     | 524/782 [09:24<04:49,  1.12s/it]

Iter 524.0, Minibatch Loss= 28.893517, Training Accuracy= 0.60156


 67%|██████████████████████████████████████████▉                     | 525/782 [09:25<04:49,  1.13s/it]

Iter 525.0, Minibatch Loss= 4.350336, Training Accuracy= 0.65625


 67%|███████████████████████████████████████████                     | 526/782 [09:26<04:45,  1.11s/it]

Iter 526.0, Minibatch Loss= 10.608441, Training Accuracy= 0.42969


 67%|███████████████████████████████████████████▏                    | 527/782 [09:27<04:42,  1.11s/it]

Iter 527.0, Minibatch Loss= 30.105389, Training Accuracy= 0.60938


 68%|███████████████████████████████████████████▏                    | 528/782 [09:29<04:45,  1.12s/it]

Iter 528.0, Minibatch Loss= 14.003825, Training Accuracy= 0.30469


 68%|███████████████████████████████████████████▎                    | 529/782 [09:30<04:43,  1.12s/it]

Iter 529.0, Minibatch Loss= 15.792908, Training Accuracy= 0.54688


 68%|███████████████████████████████████████████▍                    | 530/782 [09:31<04:39,  1.11s/it]

Iter 530.0, Minibatch Loss= 33.525581, Training Accuracy= 0.32812


 68%|███████████████████████████████████████████▍                    | 531/782 [09:32<04:37,  1.10s/it]

Iter 531.0, Minibatch Loss= 27.700613, Training Accuracy= 0.60938


 68%|███████████████████████████████████████████▌                    | 532/782 [09:33<04:38,  1.11s/it]

Iter 532.0, Minibatch Loss= 4.238469, Training Accuracy= 0.66406


 68%|███████████████████████████████████████████▌                    | 533/782 [09:34<04:37,  1.11s/it]

Iter 533.0, Minibatch Loss= 5.870746, Training Accuracy= 0.48438


 68%|███████████████████████████████████████████▋                    | 534/782 [09:35<04:34,  1.11s/it]

Iter 534.0, Minibatch Loss= 26.684502, Training Accuracy= 0.60938


 68%|███████████████████████████████████████████▊                    | 535/782 [09:36<04:35,  1.12s/it]

Iter 535.0, Minibatch Loss= 13.098716, Training Accuracy= 0.28906


 69%|███████████████████████████████████████████▊                    | 536/782 [09:37<04:33,  1.11s/it]

Iter 536.0, Minibatch Loss= 16.151421, Training Accuracy= 0.55469


 69%|███████████████████████████████████████████▉                    | 537/782 [09:38<04:31,  1.11s/it]

Iter 537.0, Minibatch Loss= 33.823013, Training Accuracy= 0.32031


 69%|████████████████████████████████████████████                    | 538/782 [09:40<04:35,  1.13s/it]

Iter 538.0, Minibatch Loss= 29.358818, Training Accuracy= 0.59375


 69%|████████████████████████████████████████████                    | 539/782 [09:41<04:30,  1.12s/it]

Iter 539.0, Minibatch Loss= 4.647189, Training Accuracy= 0.66406


 69%|████████████████████████████████████████████▏                   | 540/782 [09:42<04:36,  1.14s/it]

Iter 540.0, Minibatch Loss= 7.095621, Training Accuracy= 0.46875


 69%|████████████████████████████████████████████▎                   | 541/782 [09:43<04:44,  1.18s/it]

Iter 541.0, Minibatch Loss= 28.883820, Training Accuracy= 0.60938


 69%|████████████████████████████████████████████▎                   | 542/782 [09:44<04:38,  1.16s/it]

Iter 542.0, Minibatch Loss= 9.253104, Training Accuracy= 0.38281


 69%|████████████████████████████████████████████▍                   | 543/782 [09:45<04:31,  1.14s/it]

Iter 543.0, Minibatch Loss= 10.191427, Training Accuracy= 0.61719


 70%|████████████████████████████████████████████▌                   | 544/782 [09:46<04:27,  1.12s/it]

Iter 544.0, Minibatch Loss= 16.492140, Training Accuracy= 0.41406


 70%|████████████████████████████████████████████▌                   | 545/782 [09:48<04:26,  1.12s/it]

Iter 545.0, Minibatch Loss= 34.925163, Training Accuracy= 0.53125


 70%|████████████████████████████████████████████▋                   | 546/782 [09:49<04:35,  1.17s/it]

Iter 546.0, Minibatch Loss= 4.961824, Training Accuracy= 0.63281


 70%|████████████████████████████████████████████▊                   | 547/782 [09:50<04:33,  1.16s/it]

Iter 547.0, Minibatch Loss= 11.114033, Training Accuracy= 0.43750


 70%|████████████████████████████████████████████▊                   | 548/782 [09:51<04:27,  1.14s/it]

Iter 548.0, Minibatch Loss= 29.501602, Training Accuracy= 0.60938


 70%|████████████████████████████████████████████▉                   | 549/782 [09:52<04:22,  1.12s/it]

Iter 549.0, Minibatch Loss= 14.332094, Training Accuracy= 0.30469


 70%|█████████████████████████████████████████████                   | 550/782 [09:53<04:18,  1.12s/it]

Iter 550.0, Minibatch Loss= 14.424897, Training Accuracy= 0.56250


 70%|█████████████████████████████████████████████                   | 551/782 [09:54<04:20,  1.13s/it]

Iter 551.0, Minibatch Loss= 32.083801, Training Accuracy= 0.32812


 71%|█████████████████████████████████████████████▏                  | 552/782 [09:56<04:17,  1.12s/it]

Iter 552.0, Minibatch Loss= 27.534874, Training Accuracy= 0.60938


 71%|█████████████████████████████████████████████▎                  | 553/782 [09:57<04:14,  1.11s/it]

Iter 553.0, Minibatch Loss= 4.775865, Training Accuracy= 0.68750


 71%|█████████████████████████████████████████████▎                  | 554/782 [09:58<04:11,  1.10s/it]

Iter 554.0, Minibatch Loss= 5.266564, Training Accuracy= 0.55469


 71%|█████████████████████████████████████████████▍                  | 555/782 [09:59<04:12,  1.11s/it]

Iter 555.0, Minibatch Loss= 21.470131, Training Accuracy= 0.60938


 71%|█████████████████████████████████████████████▌                  | 556/782 [10:00<04:09,  1.11s/it]

Iter 556.0, Minibatch Loss= 9.598763, Training Accuracy= 0.32031


 71%|█████████████████████████████████████████████▌                  | 557/782 [10:01<04:07,  1.10s/it]

Iter 557.0, Minibatch Loss= 11.030505, Training Accuracy= 0.57812


 71%|█████████████████████████████████████████████▋                  | 558/782 [10:02<04:09,  1.11s/it]

Iter 558.0, Minibatch Loss= 16.091209, Training Accuracy= 0.43750


 71%|█████████████████████████████████████████████▋                  | 559/782 [10:03<04:07,  1.11s/it]

Iter 559.0, Minibatch Loss= 32.962875, Training Accuracy= 0.55469


 72%|█████████████████████████████████████████████▊                  | 560/782 [10:04<04:09,  1.12s/it]

Iter 560.0, Minibatch Loss= 5.751815, Training Accuracy= 0.63281


 72%|█████████████████████████████████████████████▉                  | 561/782 [10:06<04:07,  1.12s/it]

Iter 561.0, Minibatch Loss= 11.758579, Training Accuracy= 0.44531


 72%|█████████████████████████████████████████████▉                  | 562/782 [10:07<04:05,  1.12s/it]

Iter 562.0, Minibatch Loss= 28.115295, Training Accuracy= 0.60938


 72%|██████████████████████████████████████████████                  | 563/782 [10:08<04:06,  1.13s/it]

Iter 563.0, Minibatch Loss= 5.037019, Training Accuracy= 0.46875


 72%|██████████████████████████████████████████████▏                 | 564/782 [10:09<04:03,  1.12s/it]

Iter 564.0, Minibatch Loss= 8.800114, Training Accuracy= 0.57812


 72%|██████████████████████████████████████████████▏                 | 565/782 [10:10<04:00,  1.11s/it]

Iter 565.0, Minibatch Loss= 24.609457, Training Accuracy= 0.60938


 72%|██████████████████████████████████████████████▎                 | 566/782 [10:11<03:58,  1.11s/it]

Iter 566.0, Minibatch Loss= 5.363565, Training Accuracy= 0.60156


 73%|██████████████████████████████████████████████▍                 | 567/782 [10:12<04:01,  1.12s/it]

Iter 567.0, Minibatch Loss= 18.711229, Training Accuracy= 0.60938


 73%|██████████████████████████████████████████████▍                 | 568/782 [10:13<03:59,  1.12s/it]

Iter 568.0, Minibatch Loss= 6.900558, Training Accuracy= 0.52344


 73%|██████████████████████████████████████████████▌                 | 569/782 [10:15<03:59,  1.12s/it]

Iter 569.0, Minibatch Loss= 23.481131, Training Accuracy= 0.60938


 73%|██████████████████████████████████████████████▋                 | 570/782 [10:16<03:58,  1.13s/it]

Iter 570.0, Minibatch Loss= 15.013763, Training Accuracy= 0.28906


 73%|██████████████████████████████████████████████▋                 | 571/782 [10:17<03:54,  1.11s/it]

Iter 571.0, Minibatch Loss= 11.728324, Training Accuracy= 0.62500


 73%|██████████████████████████████████████████████▊                 | 572/782 [10:18<03:52,  1.11s/it]

Iter 572.0, Minibatch Loss= 17.413952, Training Accuracy= 0.42969


 73%|██████████████████████████████████████████████▉                 | 573/782 [10:19<03:59,  1.15s/it]

Iter 573.0, Minibatch Loss= 33.117451, Training Accuracy= 0.60156


 73%|██████████████████████████████████████████████▉                 | 574/782 [10:20<03:59,  1.15s/it]

Iter 574.0, Minibatch Loss= 6.663813, Training Accuracy= 0.63281


 74%|███████████████████████████████████████████████                 | 575/782 [10:21<03:55,  1.14s/it]

Iter 575.0, Minibatch Loss= 12.427537, Training Accuracy= 0.46094


 74%|███████████████████████████████████████████████▏                | 576/782 [10:22<03:55,  1.14s/it]

Iter 576.0, Minibatch Loss= 27.162819, Training Accuracy= 0.60938


 74%|███████████████████████████████████████████████▏                | 577/782 [10:24<03:50,  1.12s/it]

Iter 577.0, Minibatch Loss= 3.061224, Training Accuracy= 0.60156


 74%|███████████████████████████████████████████████▎                | 578/782 [10:25<03:47,  1.12s/it]

Iter 578.0, Minibatch Loss= 12.963999, Training Accuracy= 0.44531


 74%|███████████████████████████████████████████████▍                | 579/782 [10:26<03:49,  1.13s/it]

Iter 579.0, Minibatch Loss= 26.963333, Training Accuracy= 0.60938


 74%|███████████████████████████████████████████████▍                | 580/782 [10:27<03:47,  1.12s/it]

Iter 580.0, Minibatch Loss= 6.223307, Training Accuracy= 0.43750


 74%|███████████████████████████████████████████████▌                | 581/782 [10:28<03:43,  1.11s/it]

Iter 581.0, Minibatch Loss= 7.667606, Training Accuracy= 0.68750


 74%|███████████████████████████████████████████████▋                | 582/782 [10:29<03:39,  1.10s/it]

Iter 582.0, Minibatch Loss= 6.836073, Training Accuracy= 0.64844


 75%|███████████████████████████████████████████████▋                | 583/782 [10:30<03:39,  1.10s/it]

Iter 583.0, Minibatch Loss= 13.034386, Training Accuracy= 0.46875


 75%|███████████████████████████████████████████████▊                | 584/782 [10:31<03:42,  1.12s/it]

Iter 584.0, Minibatch Loss= 27.651730, Training Accuracy= 0.60938


 75%|███████████████████████████████████████████████▉                | 585/782 [10:32<03:39,  1.11s/it]

Iter 585.0, Minibatch Loss= 3.106783, Training Accuracy= 0.67188


 75%|███████████████████████████████████████████████▉                | 586/782 [10:34<03:39,  1.12s/it]

Iter 586.0, Minibatch Loss= 5.801566, Training Accuracy= 0.53906


 75%|████████████████████████████████████████████████                | 587/782 [10:35<03:37,  1.12s/it]

Iter 587.0, Minibatch Loss= 19.181042, Training Accuracy= 0.60938


 75%|████████████████████████████████████████████████                | 588/782 [10:36<03:34,  1.10s/it]

Iter 588.0, Minibatch Loss= 8.326076, Training Accuracy= 0.34375


 75%|████████████████████████████████████████████████▏               | 589/782 [10:37<03:32,  1.10s/it]

Iter 589.0, Minibatch Loss= 9.638059, Training Accuracy= 0.65625


 75%|████████████████████████████████████████████████▎               | 590/782 [10:38<03:33,  1.11s/it]

Iter 590.0, Minibatch Loss= 12.813533, Training Accuracy= 0.52344


 76%|████████████████████████████████████████████████▎               | 591/782 [10:39<03:32,  1.11s/it]

Iter 591.0, Minibatch Loss= 27.610455, Training Accuracy= 0.60938


 76%|████████████████████████████████████████████████▍               | 592/782 [10:40<03:33,  1.12s/it]

Iter 592.0, Minibatch Loss= 5.172806, Training Accuracy= 0.71875


 76%|████████████████████████████████████████████████▌               | 593/782 [10:41<03:29,  1.11s/it]

Iter 593.0, Minibatch Loss= 3.696774, Training Accuracy= 0.71094


 76%|████████████████████████████████████████████████▌               | 594/782 [10:42<03:26,  1.10s/it]

Iter 594.0, Minibatch Loss= 3.050909, Training Accuracy= 0.65625


 76%|████████████████████████████████████████████████▋               | 595/782 [10:44<03:24,  1.09s/it]

Iter 595.0, Minibatch Loss= 13.720398, Training Accuracy= 0.61719


 76%|████████████████████████████████████████████████▊               | 596/782 [10:45<03:26,  1.11s/it]

Iter 596.0, Minibatch Loss= 12.431932, Training Accuracy= 0.33594


 76%|████████████████████████████████████████████████▊               | 597/782 [10:46<03:27,  1.12s/it]

Iter 597.0, Minibatch Loss= 12.264807, Training Accuracy= 0.64844


 76%|████████████████████████████████████████████████▉               | 598/782 [10:47<03:28,  1.13s/it]

Iter 598.0, Minibatch Loss= 24.477249, Training Accuracy= 0.42969


 77%|█████████████████████████████████████████████████               | 599/782 [10:48<03:34,  1.17s/it]

Iter 599.0, Minibatch Loss= 25.875647, Training Accuracy= 0.60938


 77%|█████████████████████████████████████████████████               | 600/782 [10:49<03:32,  1.17s/it]

Iter 600.0, Minibatch Loss= 4.556990, Training Accuracy= 0.65625


 77%|█████████████████████████████████████████████████▏              | 601/782 [10:51<03:30,  1.16s/it]

Iter 601.0, Minibatch Loss= 12.741974, Training Accuracy= 0.61719


 77%|█████████████████████████████████████████████████▎              | 602/782 [10:52<03:23,  1.13s/it]

Iter 602.0, Minibatch Loss= 13.827370, Training Accuracy= 0.42969


 77%|█████████████████████████████████████████████████▎              | 603/782 [10:53<03:18,  1.11s/it]

Iter 603.0, Minibatch Loss= 24.170956, Training Accuracy= 0.60938


 77%|█████████████████████████████████████████████████▍              | 604/782 [10:54<03:14,  1.09s/it]

Iter 604.0, Minibatch Loss= 16.068821, Training Accuracy= 0.26562


 77%|█████████████████████████████████████████████████▌              | 605/782 [10:55<03:16,  1.11s/it]

Iter 605.0, Minibatch Loss= 13.942989, Training Accuracy= 0.57812


 77%|█████████████████████████████████████████████████▌              | 606/782 [10:56<03:13,  1.10s/it]

Iter 606.0, Minibatch Loss= 29.635780, Training Accuracy= 0.34375


 78%|█████████████████████████████████████████████████▋              | 607/782 [10:57<03:09,  1.08s/it]

Iter 607.0, Minibatch Loss= 27.516506, Training Accuracy= 0.60938


 78%|█████████████████████████████████████████████████▊              | 608/782 [10:58<03:10,  1.10s/it]

Iter 608.0, Minibatch Loss= 4.010057, Training Accuracy= 0.68750


 78%|█████████████████████████████████████████████████▊              | 609/782 [10:59<03:08,  1.09s/it]

Iter 609.0, Minibatch Loss= 3.453806, Training Accuracy= 0.61719


 78%|█████████████████████████████████████████████████▉              | 610/782 [11:00<03:07,  1.09s/it]

Iter 610.0, Minibatch Loss= 15.933922, Training Accuracy= 0.60938


 78%|██████████████████████████████████████████████████              | 611/782 [11:01<03:07,  1.10s/it]

Iter 611.0, Minibatch Loss= 10.033895, Training Accuracy= 0.38281


 78%|██████████████████████████████████████████████████              | 612/782 [11:02<03:04,  1.08s/it]

Iter 612.0, Minibatch Loss= 9.602568, Training Accuracy= 0.65625


 78%|██████████████████████████████████████████████████▏             | 613/782 [11:04<03:04,  1.09s/it]

Iter 613.0, Minibatch Loss= 15.401822, Training Accuracy= 0.49219


 79%|██████████████████████████████████████████████████▎             | 614/782 [11:05<03:05,  1.10s/it]

Iter 614.0, Minibatch Loss= 26.924561, Training Accuracy= 0.60938


 79%|██████████████████████████████████████████████████▎             | 615/782 [11:06<03:01,  1.09s/it]

Iter 615.0, Minibatch Loss= 4.348109, Training Accuracy= 0.70312


 79%|██████████████████████████████████████████████████▍             | 616/782 [11:07<02:58,  1.08s/it]

Iter 616.0, Minibatch Loss= 2.891268, Training Accuracy= 0.72656


 79%|██████████████████████████████████████████████████▍             | 617/782 [11:08<02:56,  1.07s/it]

Iter 617.0, Minibatch Loss= 3.040822, Training Accuracy= 0.66406


 79%|██████████████████████████████████████████████████▌             | 618/782 [11:09<02:59,  1.09s/it]

Iter 618.0, Minibatch Loss= 14.523973, Training Accuracy= 0.44531


 79%|██████████████████████████████████████████████████▋             | 619/782 [11:10<02:56,  1.08s/it]

Iter 619.0, Minibatch Loss= 23.235107, Training Accuracy= 0.60938


 79%|██████████████████████████████████████████████████▋             | 620/782 [11:11<02:56,  1.09s/it]

Iter 620.0, Minibatch Loss= 7.276022, Training Accuracy= 0.34375


 79%|██████████████████████████████████████████████████▊             | 621/782 [11:12<02:55,  1.09s/it]

Iter 621.0, Minibatch Loss= 10.559985, Training Accuracy= 0.60938


 80%|██████████████████████████████████████████████████▉             | 622/782 [11:13<02:53,  1.08s/it]

Iter 622.0, Minibatch Loss= 13.574461, Training Accuracy= 0.53125


 80%|██████████████████████████████████████████████████▉             | 623/782 [11:14<02:52,  1.09s/it]

Iter 623.0, Minibatch Loss= 26.103790, Training Accuracy= 0.60938


 80%|███████████████████████████████████████████████████             | 624/782 [11:15<02:52,  1.09s/it]

Iter 624.0, Minibatch Loss= 4.852970, Training Accuracy= 0.67188


 80%|███████████████████████████████████████████████████▏            | 625/782 [11:17<02:50,  1.08s/it]

Iter 625.0, Minibatch Loss= 24.250912, Training Accuracy= 0.60156


 80%|███████████████████████████████████████████████████▏            | 626/782 [11:18<02:48,  1.08s/it]

Iter 626.0, Minibatch Loss= 4.983138, Training Accuracy= 0.60156


 80%|███████████████████████████████████████████████████▎            | 627/782 [11:19<02:52,  1.11s/it]

Iter 627.0, Minibatch Loss= 6.145079, Training Accuracy= 0.42188


 80%|███████████████████████████████████████████████████▍            | 628/782 [11:20<02:50,  1.11s/it]

Iter 628.0, Minibatch Loss= 9.167036, Training Accuracy= 0.63281


 80%|███████████████████████████████████████████████████▍            | 629/782 [11:21<02:47,  1.09s/it]

Iter 629.0, Minibatch Loss= 9.705755, Training Accuracy= 0.42969


 81%|███████████████████████████████████████████████████▌            | 630/782 [11:22<02:44,  1.09s/it]

Iter 630.0, Minibatch Loss= 32.615807, Training Accuracy= 0.59375


 81%|███████████████████████████████████████████████████▋            | 631/782 [11:23<02:42,  1.07s/it]

Iter 631.0, Minibatch Loss= 8.831874, Training Accuracy= 0.61719


 81%|███████████████████████████████████████████████████▋            | 632/782 [11:24<02:41,  1.08s/it]

Iter 632.0, Minibatch Loss= 25.753971, Training Accuracy= 0.31250


 81%|███████████████████████████████████████████████████▊            | 633/782 [11:25<02:40,  1.07s/it]

Iter 633.0, Minibatch Loss= 28.883884, Training Accuracy= 0.60156


 81%|███████████████████████████████████████████████████▉            | 634/782 [11:26<02:38,  1.07s/it]

Iter 634.0, Minibatch Loss= 14.562876, Training Accuracy= 0.21875


 81%|███████████████████████████████████████████████████▉            | 635/782 [11:27<02:36,  1.06s/it]

Iter 635.0, Minibatch Loss= 28.332539, Training Accuracy= 0.57812


 81%|████████████████████████████████████████████████████            | 636/782 [11:28<02:34,  1.06s/it]

Iter 636.0, Minibatch Loss= 20.491653, Training Accuracy= 0.29688


 81%|████████████████████████████████████████████████████▏           | 637/782 [11:29<02:34,  1.07s/it]

Iter 637.0, Minibatch Loss= 41.640839, Training Accuracy= 0.57812


 82%|████████████████████████████████████████████████████▏           | 638/782 [11:31<02:32,  1.06s/it]

Iter 638.0, Minibatch Loss= 13.293161, Training Accuracy= 0.60156


 82%|████████████████████████████████████████████████████▎           | 639/782 [11:32<02:31,  1.06s/it]

Iter 639.0, Minibatch Loss= 16.818825, Training Accuracy= 0.32812


 82%|████████████████████████████████████████████████████▍           | 640/782 [11:33<02:30,  1.06s/it]

Iter 640.0, Minibatch Loss= 31.162209, Training Accuracy= 0.60156


 82%|████████████████████████████████████████████████████▍           | 641/782 [11:34<02:29,  1.06s/it]

Iter 641.0, Minibatch Loss= 11.428324, Training Accuracy= 0.58594


 82%|████████████████████████████████████████████████████▌           | 642/782 [11:35<02:28,  1.06s/it]

Iter 642.0, Minibatch Loss= 5.274898, Training Accuracy= 0.69531


 82%|████████████████████████████████████████████████████▌           | 643/782 [11:36<02:27,  1.06s/it]

Iter 643.0, Minibatch Loss= 10.312263, Training Accuracy= 0.39844


 82%|████████████████████████████████████████████████████▋           | 644/782 [11:37<02:26,  1.06s/it]

Iter 644.0, Minibatch Loss= 29.020298, Training Accuracy= 0.60156


 82%|████████████████████████████████████████████████████▊           | 645/782 [11:38<02:25,  1.06s/it]

Iter 645.0, Minibatch Loss= 10.777406, Training Accuracy= 0.35938


 83%|████████████████████████████████████████████████████▊           | 646/782 [11:39<02:25,  1.07s/it]

Iter 646.0, Minibatch Loss= 14.288323, Training Accuracy= 0.60156


 83%|████████████████████████████████████████████████████▉           | 647/782 [11:40<02:24,  1.07s/it]

Iter 647.0, Minibatch Loss= 25.219913, Training Accuracy= 0.32031


 83%|█████████████████████████████████████████████████████           | 648/782 [11:41<02:23,  1.07s/it]

Iter 648.0, Minibatch Loss= 30.797394, Training Accuracy= 0.60156


 83%|█████████████████████████████████████████████████████           | 649/782 [11:42<02:21,  1.06s/it]

Iter 649.0, Minibatch Loss= 6.759049, Training Accuracy= 0.64844


 83%|█████████████████████████████████████████████████████▏          | 650/782 [11:43<02:19,  1.06s/it]

Iter 650.0, Minibatch Loss= 17.268887, Training Accuracy= 0.36719


 83%|█████████████████████████████████████████████████████▎          | 651/782 [11:44<02:19,  1.07s/it]

Iter 651.0, Minibatch Loss= 24.668638, Training Accuracy= 0.60156


 83%|█████████████████████████████████████████████████████▎          | 652/782 [11:45<02:18,  1.07s/it]

Iter 652.0, Minibatch Loss= 15.007588, Training Accuracy= 0.24219


 84%|█████████████████████████████████████████████████████▍          | 653/782 [11:46<02:16,  1.06s/it]

Iter 653.0, Minibatch Loss= 24.402718, Training Accuracy= 0.58594


 84%|█████████████████████████████████████████████████████▌          | 654/782 [11:48<02:17,  1.07s/it]

Iter 654.0, Minibatch Loss= 20.522301, Training Accuracy= 0.33594


 84%|█████████████████████████████████████████████████████▌          | 655/782 [11:49<02:19,  1.10s/it]

Iter 655.0, Minibatch Loss= 34.681252, Training Accuracy= 0.57812


 84%|█████████████████████████████████████████████████████▋          | 656/782 [11:50<02:17,  1.09s/it]

Iter 656.0, Minibatch Loss= 8.235453, Training Accuracy= 0.64844


 84%|█████████████████████████████████████████████████████▊          | 657/782 [11:51<02:16,  1.09s/it]

Iter 657.0, Minibatch Loss= 15.767628, Training Accuracy= 0.37500


 84%|█████████████████████████████████████████████████████▊          | 658/782 [11:52<02:14,  1.08s/it]

Iter 658.0, Minibatch Loss= 26.047842, Training Accuracy= 0.60156


 84%|█████████████████████████████████████████████████████▉          | 659/782 [11:53<02:13,  1.09s/it]

Iter 659.0, Minibatch Loss= 11.358257, Training Accuracy= 0.33594


 84%|██████████████████████████████████████████████████████          | 660/782 [11:54<02:18,  1.13s/it]

Iter 660.0, Minibatch Loss= 20.174213, Training Accuracy= 0.60938


 85%|██████████████████████████████████████████████████████          | 661/782 [11:55<02:18,  1.14s/it]

Iter 661.0, Minibatch Loss= 18.319237, Training Accuracy= 0.39844


 85%|██████████████████████████████████████████████████████▏         | 662/782 [11:57<02:22,  1.19s/it]

Iter 662.0, Minibatch Loss= 31.048359, Training Accuracy= 0.57812


 85%|██████████████████████████████████████████████████████▎         | 663/782 [11:58<02:18,  1.17s/it]

Iter 663.0, Minibatch Loss= 5.787075, Training Accuracy= 0.70312


 85%|██████████████████████████████████████████████████████▎         | 664/782 [11:59<02:17,  1.17s/it]

Iter 664.0, Minibatch Loss= 7.408255, Training Accuracy= 0.49219


 85%|██████████████████████████████████████████████████████▍         | 665/782 [12:00<02:18,  1.18s/it]

Iter 665.0, Minibatch Loss= 21.700752, Training Accuracy= 0.60156


 85%|██████████████████████████████████████████████████████▌         | 666/782 [12:02<02:21,  1.22s/it]

Iter 666.0, Minibatch Loss= 11.524969, Training Accuracy= 0.25781


 85%|██████████████████████████████████████████████████████▌         | 667/782 [12:03<02:18,  1.20s/it]

Iter 667.0, Minibatch Loss= 21.620378, Training Accuracy= 0.60938


 85%|██████████████████████████████████████████████████████▋         | 668/782 [12:04<02:12,  1.16s/it]

Iter 668.0, Minibatch Loss= 19.326450, Training Accuracy= 0.36719


 86%|██████████████████████████████████████████████████████▊         | 669/782 [12:05<02:09,  1.14s/it]

Iter 669.0, Minibatch Loss= 31.603682, Training Accuracy= 0.57812


 86%|██████████████████████████████████████████████████████▊         | 670/782 [12:06<02:06,  1.13s/it]

Iter 670.0, Minibatch Loss= 6.241695, Training Accuracy= 0.70312


 86%|██████████████████████████████████████████████████████▉         | 671/782 [12:07<02:03,  1.11s/it]

Iter 671.0, Minibatch Loss= 8.691238, Training Accuracy= 0.48438


 86%|██████████████████████████████████████████████████████▉         | 672/782 [12:08<01:59,  1.09s/it]

Iter 672.0, Minibatch Loss= 21.397247, Training Accuracy= 0.60156


 86%|███████████████████████████████████████████████████████         | 673/782 [12:09<01:58,  1.09s/it]

Iter 673.0, Minibatch Loss= 11.194136, Training Accuracy= 0.28906


 86%|███████████████████████████████████████████████████████▏        | 674/782 [12:10<01:56,  1.07s/it]

Iter 674.0, Minibatch Loss= 20.081285, Training Accuracy= 0.61719


 86%|███████████████████████████████████████████████████████▏        | 675/782 [12:11<01:53,  1.06s/it]

Iter 675.0, Minibatch Loss= 19.087566, Training Accuracy= 0.36719


 86%|███████████████████████████████████████████████████████▎        | 676/782 [12:12<01:51,  1.05s/it]

Iter 676.0, Minibatch Loss= 30.672832, Training Accuracy= 0.57812


 87%|███████████████████████████████████████████████████████▍        | 677/782 [12:14<01:57,  1.12s/it]

Iter 677.0, Minibatch Loss= 5.816732, Training Accuracy= 0.70312


 87%|███████████████████████████████████████████████████████▍        | 678/782 [12:15<02:01,  1.17s/it]

Iter 678.0, Minibatch Loss= 6.072803, Training Accuracy= 0.56250


 87%|███████████████████████████████████████████████████████▌        | 679/782 [12:16<02:06,  1.23s/it]

Iter 679.0, Minibatch Loss= 16.783134, Training Accuracy= 0.60938


 87%|███████████████████████████████████████████████████████▋        | 680/782 [12:17<02:02,  1.20s/it]

Iter 680.0, Minibatch Loss= 10.301662, Training Accuracy= 0.28125


 87%|███████████████████████████████████████████████████████▋        | 681/782 [12:18<01:58,  1.17s/it]

Iter 681.0, Minibatch Loss= 18.622526, Training Accuracy= 0.62500


 87%|███████████████████████████████████████████████████████▊        | 682/782 [12:20<01:57,  1.17s/it]

Iter 682.0, Minibatch Loss= 21.045959, Training Accuracy= 0.37500


 87%|███████████████████████████████████████████████████████▉        | 683/782 [12:21<01:52,  1.14s/it]

Iter 683.0, Minibatch Loss= 30.167473, Training Accuracy= 0.57812


 87%|███████████████████████████████████████████████████████▉        | 684/782 [12:22<01:48,  1.11s/it]

Iter 684.0, Minibatch Loss= 5.940432, Training Accuracy= 0.71094


 88%|████████████████████████████████████████████████████████        | 685/782 [12:23<01:50,  1.14s/it]

Iter 685.0, Minibatch Loss= 5.946871, Training Accuracy= 0.59375


 88%|████████████████████████████████████████████████████████▏       | 686/782 [12:24<01:52,  1.18s/it]

Iter 686.0, Minibatch Loss= 14.957739, Training Accuracy= 0.60938


 88%|████████████████████████████████████████████████████████▏       | 687/782 [12:25<01:53,  1.19s/it]

Iter 687.0, Minibatch Loss= 9.987520, Training Accuracy= 0.28906


 88%|████████████████████████████████████████████████████████▎       | 688/782 [12:27<01:57,  1.25s/it]

Iter 688.0, Minibatch Loss= 16.992264, Training Accuracy= 0.64062


 88%|████████████████████████████████████████████████████████▍       | 689/782 [12:28<02:01,  1.30s/it]

Iter 689.0, Minibatch Loss= 20.622070, Training Accuracy= 0.36719


 88%|████████████████████████████████████████████████████████▍       | 690/782 [12:30<02:05,  1.37s/it]

Iter 690.0, Minibatch Loss= 28.905975, Training Accuracy= 0.57812


 88%|████████████████████████████████████████████████████████▌       | 691/782 [12:31<02:04,  1.37s/it]

Iter 691.0, Minibatch Loss= 5.557200, Training Accuracy= 0.71875


 88%|████████████████████████████████████████████████████████▋       | 692/782 [12:33<02:04,  1.39s/it]

Iter 692.0, Minibatch Loss= 6.019159, Training Accuracy= 0.60156


 89%|████████████████████████████████████████████████████████▋       | 693/782 [12:34<02:03,  1.39s/it]

Iter 693.0, Minibatch Loss= 14.363349, Training Accuracy= 0.62500


 89%|████████████████████████████████████████████████████████▊       | 694/782 [12:35<02:04,  1.41s/it]

Iter 694.0, Minibatch Loss= 8.993840, Training Accuracy= 0.33594


 89%|████████████████████████████████████████████████████████▉       | 695/782 [12:37<01:58,  1.36s/it]

Iter 695.0, Minibatch Loss= 15.408741, Training Accuracy= 0.64062


 89%|████████████████████████████████████████████████████████▉       | 696/782 [12:38<01:50,  1.28s/it]

Iter 696.0, Minibatch Loss= 19.356091, Training Accuracy= 0.42188


 89%|█████████████████████████████████████████████████████████       | 697/782 [12:39<01:44,  1.23s/it]

Iter 697.0, Minibatch Loss= 26.788218, Training Accuracy= 0.57812


 89%|█████████████████████████████████████████████████████████▏      | 698/782 [12:40<01:39,  1.19s/it]

Iter 698.0, Minibatch Loss= 4.899084, Training Accuracy= 0.68750


 89%|█████████████████████████████████████████████████████████▏      | 699/782 [12:41<01:37,  1.17s/it]

Iter 699.0, Minibatch Loss= 7.137381, Training Accuracy= 0.56250


 90%|█████████████████████████████████████████████████████████▎      | 700/782 [12:42<01:34,  1.16s/it]

Iter 700.0, Minibatch Loss= 15.440804, Training Accuracy= 0.62500


 90%|█████████████████████████████████████████████████████████▎      | 701/782 [12:43<01:32,  1.14s/it]

Iter 701.0, Minibatch Loss= 8.057154, Training Accuracy= 0.37500


 90%|█████████████████████████████████████████████████████████▍      | 702/782 [12:44<01:31,  1.14s/it]

Iter 702.0, Minibatch Loss= 13.842436, Training Accuracy= 0.65625


 90%|█████████████████████████████████████████████████████████▌      | 703/782 [12:46<01:29,  1.13s/it]

Iter 703.0, Minibatch Loss= 15.668312, Training Accuracy= 0.43750


 90%|█████████████████████████████████████████████████████████▌      | 704/782 [12:47<01:27,  1.12s/it]

Iter 704.0, Minibatch Loss= 24.753044, Training Accuracy= 0.57812


 90%|█████████████████████████████████████████████████████████▋      | 705/782 [12:48<01:26,  1.12s/it]

Iter 705.0, Minibatch Loss= 4.144975, Training Accuracy= 0.70312


 90%|█████████████████████████████████████████████████████████▊      | 706/782 [12:49<01:26,  1.14s/it]

Iter 706.0, Minibatch Loss= 6.287861, Training Accuracy= 0.59375


 90%|█████████████████████████████████████████████████████████▊      | 707/782 [12:50<01:24,  1.13s/it]

Iter 707.0, Minibatch Loss= 13.292862, Training Accuracy= 0.62500


 91%|█████████████████████████████████████████████████████████▉      | 708/782 [12:51<01:22,  1.12s/it]

Iter 708.0, Minibatch Loss= 6.704223, Training Accuracy= 0.38281


 91%|██████████████████████████████████████████████████████████      | 709/782 [12:52<01:20,  1.10s/it]

Iter 709.0, Minibatch Loss= 13.426979, Training Accuracy= 0.67969


 91%|██████████████████████████████████████████████████████████      | 710/782 [12:53<01:19,  1.10s/it]

Iter 710.0, Minibatch Loss= 12.240921, Training Accuracy= 0.53906


 91%|██████████████████████████████████████████████████████████▏     | 711/782 [12:54<01:19,  1.12s/it]

Iter 711.0, Minibatch Loss= 18.821953, Training Accuracy= 0.60156


 91%|██████████████████████████████████████████████████████████▎     | 712/782 [12:56<01:19,  1.14s/it]

Iter 712.0, Minibatch Loss= 5.657311, Training Accuracy= 0.65625


 91%|██████████████████████████████████████████████████████████▎     | 713/782 [12:57<01:17,  1.13s/it]

Iter 713.0, Minibatch Loss= 11.173815, Training Accuracy= 0.64062


 91%|██████████████████████████████████████████████████████████▍     | 714/782 [12:58<01:16,  1.12s/it]

Iter 714.0, Minibatch Loss= 5.224025, Training Accuracy= 0.60156


 91%|██████████████████████████████████████████████████████████▌     | 715/782 [12:59<01:14,  1.11s/it]

Iter 715.0, Minibatch Loss= 11.814884, Training Accuracy= 0.65625


 92%|██████████████████████████████████████████████████████████▌     | 716/782 [13:00<01:14,  1.12s/it]

Iter 716.0, Minibatch Loss= 6.724487, Training Accuracy= 0.60938


 92%|██████████████████████████████████████████████████████████▋     | 717/782 [13:01<01:12,  1.12s/it]

Iter 717.0, Minibatch Loss= 12.278806, Training Accuracy= 0.63281


 92%|██████████████████████████████████████████████████████████▊     | 718/782 [13:02<01:10,  1.10s/it]

Iter 718.0, Minibatch Loss= 4.894676, Training Accuracy= 0.53906


 92%|██████████████████████████████████████████████████████████▊     | 719/782 [13:03<01:09,  1.11s/it]

Iter 719.0, Minibatch Loss= 9.683080, Training Accuracy= 0.68750


 92%|██████████████████████████████████████████████████████████▉     | 720/782 [13:05<01:08,  1.11s/it]

Iter 720.0, Minibatch Loss= 9.521777, Training Accuracy= 0.57812


 92%|███████████████████████████████████████████████████████████     | 721/782 [13:06<01:07,  1.10s/it]

Iter 721.0, Minibatch Loss= 14.660158, Training Accuracy= 0.64062


 92%|███████████████████████████████████████████████████████████     | 722/782 [13:07<01:05,  1.10s/it]

Iter 722.0, Minibatch Loss= 3.913608, Training Accuracy= 0.67188


 92%|███████████████████████████████████████████████████████████▏    | 723/782 [13:08<01:04,  1.10s/it]

Iter 723.0, Minibatch Loss= 10.320342, Training Accuracy= 0.67188


 93%|███████████████████████████████████████████████████████████▎    | 724/782 [13:09<01:03,  1.09s/it]

Iter 724.0, Minibatch Loss= 2.650321, Training Accuracy= 0.68750


 93%|███████████████████████████████████████████████████████████▎    | 725/782 [13:10<01:01,  1.09s/it]

Iter 725.0, Minibatch Loss= 7.052928, Training Accuracy= 0.71094


 93%|███████████████████████████████████████████████████████████▍    | 726/782 [13:11<01:00,  1.08s/it]

Iter 726.0, Minibatch Loss= 3.769254, Training Accuracy= 0.67188


 93%|███████████████████████████████████████████████████████████▍    | 727/782 [13:12<00:59,  1.09s/it]

Iter 727.0, Minibatch Loss= 9.826447, Training Accuracy= 0.67188


 93%|███████████████████████████████████████████████████████████▌    | 728/782 [13:13<00:58,  1.08s/it]

Iter 728.0, Minibatch Loss= 2.544359, Training Accuracy= 0.68750


 93%|███████████████████████████████████████████████████████████▋    | 729/782 [13:14<00:57,  1.09s/it]

Iter 729.0, Minibatch Loss= 5.514244, Training Accuracy= 0.67969


 93%|███████████████████████████████████████████████████████████▋    | 730/782 [13:15<00:56,  1.09s/it]

Iter 730.0, Minibatch Loss= 7.776666, Training Accuracy= 0.52344


 93%|███████████████████████████████████████████████████████████▊    | 731/782 [13:16<00:55,  1.10s/it]

Iter 731.0, Minibatch Loss= 15.060823, Training Accuracy= 0.61719


 94%|███████████████████████████████████████████████████████████▉    | 732/782 [13:18<00:54,  1.09s/it]

Iter 732.0, Minibatch Loss= 6.014329, Training Accuracy= 0.47656


 94%|███████████████████████████████████████████████████████████▉    | 733/782 [13:19<00:54,  1.11s/it]

Iter 733.0, Minibatch Loss= 11.264915, Training Accuracy= 0.68750


 94%|████████████████████████████████████████████████████████████    | 734/782 [13:20<00:53,  1.12s/it]

Iter 734.0, Minibatch Loss= 9.893404, Training Accuracy= 0.58594


 94%|████████████████████████████████████████████████████████████▏   | 735/782 [13:21<00:53,  1.13s/it]

Iter 735.0, Minibatch Loss= 14.967882, Training Accuracy= 0.60938


 94%|████████████████████████████████████████████████████████████▏   | 736/782 [13:22<00:53,  1.16s/it]

Iter 736.0, Minibatch Loss= 5.082195, Training Accuracy= 0.65625


 94%|████████████████████████████████████████████████████████████▎   | 737/782 [13:23<00:51,  1.14s/it]

Iter 737.0, Minibatch Loss= 9.379280, Training Accuracy= 0.67969


 94%|████████████████████████████████████████████████████████████▍   | 738/782 [13:24<00:49,  1.12s/it]

Iter 738.0, Minibatch Loss= 2.193023, Training Accuracy= 0.78125


 95%|████████████████████████████████████████████████████████████▍   | 739/782 [13:26<00:48,  1.12s/it]

Iter 739.0, Minibatch Loss= 2.029858, Training Accuracy= 0.81250


 95%|████████████████████████████████████████████████████████████▌   | 740/782 [13:27<00:46,  1.11s/it]

Iter 740.0, Minibatch Loss= 2.452673, Training Accuracy= 0.75781


 95%|████████████████████████████████████████████████████████████▋   | 741/782 [13:28<00:44,  1.10s/it]

Iter 741.0, Minibatch Loss= 4.122250, Training Accuracy= 0.67188


 95%|████████████████████████████████████████████████████████████▋   | 742/782 [13:29<00:44,  1.12s/it]

Iter 742.0, Minibatch Loss= 8.639252, Training Accuracy= 0.67188


 95%|████████████████████████████████████████████████████████████▊   | 743/782 [13:30<00:44,  1.14s/it]

Iter 743.0, Minibatch Loss= 2.661915, Training Accuracy= 0.68750


 95%|████████████████████████████████████████████████████████████▉   | 744/782 [13:31<00:43,  1.14s/it]

Iter 744.0, Minibatch Loss= 8.234433, Training Accuracy= 0.67188


 95%|████████████████████████████████████████████████████████████▉   | 745/782 [13:32<00:42,  1.16s/it]

Iter 745.0, Minibatch Loss= 5.022906, Training Accuracy= 0.64844


 95%|█████████████████████████████████████████████████████████████   | 746/782 [13:34<00:44,  1.24s/it]

Iter 746.0, Minibatch Loss= 9.426260, Training Accuracy= 0.67969


 96%|█████████████████████████████████████████████████████████████▏  | 747/782 [13:35<00:43,  1.24s/it]

Iter 747.0, Minibatch Loss= 4.293500, Training Accuracy= 0.57031


 96%|█████████████████████████████████████████████████████████████▏  | 748/782 [13:36<00:43,  1.27s/it]

Iter 748.0, Minibatch Loss= 9.221392, Training Accuracy= 0.70312


 96%|█████████████████████████████████████████████████████████████▎  | 749/782 [13:38<00:40,  1.22s/it]

Iter 749.0, Minibatch Loss= 7.983218, Training Accuracy= 0.59375


 96%|█████████████████████████████████████████████████████████████▍  | 750/782 [13:39<00:37,  1.18s/it]

Iter 750.0, Minibatch Loss= 30.550079, Training Accuracy= 0.60938


 96%|█████████████████████████████████████████████████████████████▍  | 751/782 [13:40<00:36,  1.19s/it]

Iter 751.0, Minibatch Loss= 6.771765, Training Accuracy= 0.47656


 96%|█████████████████████████████████████████████████████████████▌  | 752/782 [13:41<00:36,  1.22s/it]

Iter 752.0, Minibatch Loss= 8.932360, Training Accuracy= 0.46094


 96%|█████████████████████████████████████████████████████████████▋  | 753/782 [13:42<00:34,  1.19s/it]

Iter 753.0, Minibatch Loss= 25.285191, Training Accuracy= 0.63281


 96%|█████████████████████████████████████████████████████████████▋  | 754/782 [13:43<00:32,  1.15s/it]

Iter 754.0, Minibatch Loss= 5.176226, Training Accuracy= 0.55469


 97%|█████████████████████████████████████████████████████████████▊  | 755/782 [13:44<00:30,  1.12s/it]

Iter 755.0, Minibatch Loss= 5.519318, Training Accuracy= 0.50781


 97%|█████████████████████████████████████████████████████████████▊  | 756/782 [13:45<00:28,  1.10s/it]

Iter 756.0, Minibatch Loss= 19.237514, Training Accuracy= 0.63281


 97%|█████████████████████████████████████████████████████████████▉  | 757/782 [13:46<00:27,  1.08s/it]

Iter 757.0, Minibatch Loss= 11.954117, Training Accuracy= 0.19531


 97%|██████████████████████████████████████████████████████████████  | 758/782 [13:47<00:25,  1.07s/it]

Iter 758.0, Minibatch Loss= 25.738979, Training Accuracy= 0.61719


 97%|██████████████████████████████████████████████████████████████  | 759/782 [13:49<00:24,  1.09s/it]

Iter 759.0, Minibatch Loss= 29.884897, Training Accuracy= 0.27344


 97%|██████████████████████████████████████████████████████████████▏ | 760/782 [13:50<00:24,  1.11s/it]

Iter 760.0, Minibatch Loss= 43.774338, Training Accuracy= 0.60938


 97%|██████████████████████████████████████████████████████████████▎ | 761/782 [13:51<00:24,  1.15s/it]

Iter 761.0, Minibatch Loss= 16.757000, Training Accuracy= 0.61719


 97%|██████████████████████████████████████████████████████████████▎ | 762/782 [13:52<00:23,  1.17s/it]

Iter 762.0, Minibatch Loss= 17.701752, Training Accuracy= 0.34375


 98%|██████████████████████████████████████████████████████████████▍ | 763/782 [13:53<00:22,  1.20s/it]

Iter 763.0, Minibatch Loss= 31.312952, Training Accuracy= 0.64062


 98%|██████████████████████████████████████████████████████████████▌ | 764/782 [13:55<00:23,  1.30s/it]

Iter 764.0, Minibatch Loss= 10.671877, Training Accuracy= 0.59375


 98%|██████████████████████████████████████████████████████████████▌ | 765/782 [13:56<00:22,  1.30s/it]

Iter 765.0, Minibatch Loss= 6.216900, Training Accuracy= 0.43750


 98%|██████████████████████████████████████████████████████████████▋ | 766/782 [13:58<00:21,  1.33s/it]

Iter 766.0, Minibatch Loss= 23.165169, Training Accuracy= 0.64062


 98%|██████████████████████████████████████████████████████████████▊ | 767/782 [13:59<00:19,  1.33s/it]

Iter 767.0, Minibatch Loss= 7.182098, Training Accuracy= 0.38281


 98%|██████████████████████████████████████████████████████████████▊ | 768/782 [14:00<00:18,  1.31s/it]

Iter 768.0, Minibatch Loss= 9.363126, Training Accuracy= 0.64062


 98%|██████████████████████████████████████████████████████████████▉ | 769/782 [14:01<00:16,  1.27s/it]

Iter 769.0, Minibatch Loss= 18.138073, Training Accuracy= 0.36719


 98%|███████████████████████████████████████████████████████████████ | 770/782 [14:03<00:14,  1.25s/it]

Iter 770.0, Minibatch Loss= 31.079975, Training Accuracy= 0.60938


 99%|███████████████████████████████████████████████████████████████ | 771/782 [14:04<00:13,  1.23s/it]

Iter 771.0, Minibatch Loss= 7.004267, Training Accuracy= 0.62500


 99%|███████████████████████████████████████████████████████████████▏| 772/782 [14:05<00:12,  1.22s/it]

Iter 772.0, Minibatch Loss= 10.034107, Training Accuracy= 0.42969


 99%|███████████████████████████████████████████████████████████████▎| 773/782 [14:06<00:10,  1.20s/it]

Iter 773.0, Minibatch Loss= 25.488216, Training Accuracy= 0.64062


 99%|███████████████████████████████████████████████████████████████▎| 774/782 [14:07<00:09,  1.19s/it]

Iter 774.0, Minibatch Loss= 10.234097, Training Accuracy= 0.36719


 99%|███████████████████████████████████████████████████████████████▍| 775/782 [14:09<00:08,  1.19s/it]

Iter 775.0, Minibatch Loss= 19.163145, Training Accuracy= 0.61719


 99%|███████████████████████████████████████████████████████████████▌| 776/782 [14:10<00:07,  1.20s/it]

Iter 776.0, Minibatch Loss= 19.811813, Training Accuracy= 0.36719


 99%|███████████████████████████████████████████████████████████████▌| 777/782 [14:11<00:05,  1.18s/it]

Iter 777.0, Minibatch Loss= 30.896788, Training Accuracy= 0.60938


 99%|███████████████████████████████████████████████████████████████▋| 778/782 [14:12<00:04,  1.21s/it]

Iter 778.0, Minibatch Loss= 6.645589, Training Accuracy= 0.64062


100%|███████████████████████████████████████████████████████████████▊| 779/782 [14:13<00:03,  1.19s/it]

Iter 779.0, Minibatch Loss= 16.635708, Training Accuracy= 0.33594


100%|███████████████████████████████████████████████████████████████▊| 780/782 [14:15<00:02,  1.18s/it]

Iter 780.0, Minibatch Loss= 25.642807, Training Accuracy= 0.64062


100%|███████████████████████████████████████████████████████████████▉| 781/782 [14:16<00:01,  1.18s/it]

Iter 781.0, Minibatch Loss= 10.085630, Training Accuracy= 0.39062


100%|████████████████████████████████████████████████████████████████| 782/782 [14:17<00:00,  1.24s/it]


Training took 14:20.6855149269104


### Developing

In [188]:
# import numpy as np
# accs = [] # 128
# batches = [128, 64, 32, 1, 256]
# for batch in batches:
#     batch_size = batch
#     print("dev batch %s" % str(batch))
#     accs.append(test("data/dev.txt"))

# print(accs)
# print("Best batch size %s" % str(batches[np.argmax(accs)]))


### Testing

In [49]:
def test(file_test="data/test_labeled.txt"):
    data_feature_list, correct_values, correct_scores = split_data_into_scores(file_test)

    #hyps, evis, ys 
    data = (data_feature_list[0],
                      data_feature_list[1],
                      correct_scores)
    # predictions = []
    correct_predictions = 0
    total_predictions = 0
    for i, f1 in enumerate(data_feature_list[0]):
        hyps, evis, ys = data_feature_list[0][i], data_feature_list[1][i], correct_scores[i]
        prediction = sess.run(classification_scores, feed_dict={hyp: ([hyps] * N), evi: ([evis] * N), y: ([ys] * N)})
    #     predictions.append(prediction)
        total_predictions += 1
        if np.argmax(prediction[0])==np.argmax(ys):
            correct_predictions += 1
    #     if total_predictions % 500 == 0:
    #         print(total_predictions)
    #     print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
    #       " / " + ["Positive", "Neutral", "Negative"][np.argmax(ys[i])])
    print(total_predictions)

    # acc = sess.run(accuracy)#, feed_dict={'label': correct_scores})
    acc = correct_predictions*100/total_predictions
    print("Acc: %s" % str(acc))    
# print("Acc: %s" % str(acc))

import time
start = time.time()
test()
stop = time.time()
mins = int((stop-start)/60.0)
print("Training took %s:%s" % (str(mins), str(stop - start - mins*60)))



4927
Acc: 25.81692713618835
Training took 21:43.974363565444946


In [190]:
sess.close()
#56.52