# Textual entailment

### Imports

In [2]:
from tqdm import tqdm
import tensorflow as tf
import numpy as np
import sys
display_step = 10

### Embeddings

Download Glove word embeddings

In [51]:
glove_zip_file = "data/glove.6B.zip"
glove_vectors_file = "data/glove.6B.50d.txt"
import zipfile, urllib.request, shutil, os
    
#large file - 862 MB
if (not os.path.isfile(glove_zip_file) and
    not os.path.isfile(glove_vectors_file)):
    with urllib.request.urlopen("http://nlp.stanford.edu/data/glove.6B.zip") as response, open(glove_zip_file, 'wb') as out_file:
        shutil.copyfileobj(response, out_file)

Unzip word embeddings

In [4]:
import zipfile
def unzip_single_file(zip_file_name, output_file_name):
    """
        If the outFile is already created, don't recreate
        If the outFile does not exist, create it from the zipFile
    """
    if not os.path.isfile(output_file_name):
        with open(output_file_name, 'wb') as out_file:
            with zipfile.ZipFile(zip_file_name) as zipped:
                for info in zipped.infolist():
                    if output_file_name in info.filename:
                        with zipped.open(info) as requested_file:
                            out_file.write(requested_file.read())
                            return

unzip_single_file(glove_zip_file, glove_vectors_file)

In [52]:
glove_wordmap = {}
with open(glove_vectors_file, "r", encoding="utf8") as glove:
    for line in glove:
        name, vector = tuple(line.split(" ", 1))
        glove_wordmap[name] = np.fromstring(vector, sep=" ")

KeyboardInterrupt: 

## Model

### Embed sentences - sentence2sequence

In [4]:
def sentence2sequence(sentence):
    """
    - Turns an input sentence into an (n,d) matrix, 
        where n is the number of tokens in the sentence
        and d is the number of dimensions each word vector has.
    
      Tensorflow doesn't need to be used here, as simply
      turning the sentence into a sequence based off our 
      mapping does not need the computational power that
      Tensorflow provides. Normal Python suffices for this task.
    """
    tokens = sentence.lower().split(" ")
    rows = []
    words = []
    #Greedy search for tokens
    for token in tokens:
        i = len(token)
        while len(token) > 0 and i > 0:
            word = token[:i]
            if word in glove_wordmap:
                rows.append(glove_wordmap[word])
                words.append(word)
                token = token[i:]
                i = len(token)
            else:
                i = i-1
    return rows, words

In [5]:
display_step = 1
# one hot encoding
def score_setup(row):
    convert_dict = {
      'ENTAILMENT': 0,
      'NEUTRAL': 1,
      'CONTRADICTION': 2
    }
    score = np.zeros((3,))
    tag = row["entailment_judgment"]
    score[convert_dict[tag]] += 1
    return score

def fit_to_size(matrix, shape):
    res = np.zeros(shape)
    slices = [slice(0,min(dim,shape[e])) for e, dim in enumerate(matrix.shape)]
    res[slices] = matrix[slices]
    return res

### Cell used for development

In [56]:
def create_model():
    tf.reset_default_graph() 
    data_feature_list, correct_values, correct_scores = split_data_into_scores()

    l_h, l_e = max_hypothesis_length, max_evidence_length
    N, D, H = batch_size, vector_size, hidden_size
    l_seq = l_h + l_e
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    lstm_drop =  tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p)
    hyp = tf.placeholder(tf.float32, [N, l_h, D], 'hypothesis')
    evi = tf.placeholder(tf.float32, [N, l_e, D], 'evidence')
    y = tf.placeholder(tf.float32, [N, 3], 'label')
    lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size)

    lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p)

    fc_initializer = tf.random_normal_initializer(stddev=0.1) 
    fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 3], 
                            initializer = fc_initializer)
    fc_bias = tf.get_variable('bias', [3])
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, 
                     tf.nn.l2_loss(fc_weight)) 

    x = tf.concat([hyp, evi], 1) # N, (Lh+Le), d
    x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d
    x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d
    x = tf.split(x, l_seq,)
    rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back,
                                                            x, dtype=tf.float32)

    classification_scores = tf.matmul(rnn_outputs[-1], fc_weight) + fc_bias

    with tf.variable_scope('Accuracy'):
        predicts = tf.cast(tf.argmax(classification_scores, 1), 'int32')
        y_label = tf.cast(tf.argmax(y, 1), 'int32')
        corrects = tf.equal(predicts, y_label)
        num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
        accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

    with tf.variable_scope("loss"):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            logits = classification_scores, labels = y)
        loss = tf.reduce_mean(cross_entropy)
        total_loss = loss + weight_decay * tf.add_n(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    optimizer = tf.train.GradientDescentOptimizer(learning_rate)

    opt_op = optimizer.minimize(total_loss)
    # Initialize variables
    init = tf.global_variables_initializer()

    # Use TQDM if installed
    tqdm_installed = False
    try:
        from tqdm import tqdm
        tqdm_installed = True
    except:
        pass

    # Launch the Tensorflow session
    sess = tf.Session()
    sess.run(init)

    # training_iterations_count: The number of data pieces to train on in total
    # batch_size: The number of data pieces per batch
    training_iterations = range(0,training_iterations_count,batch_size)
    if tqdm_installed:
        # Add a progress bar if TQDM is installed
        training_iterations = tqdm(training_iterations)

    for i in training_iterations:
        if i % 1000 == 0:
        # Select indices for a random data subset
            batch = np.random.randint(data_feature_list[0].shape[0], size=batch_size)

        # Use the selected subset indices to initialize the graph's 
        #   placeholder values
        hyps, evis, ys = (data_feature_list[0][batch,:],
                          data_feature_list[1][batch,:],
                          correct_scores[batch])

        # Run the optimization with these initialized values
        sess.run([opt_op], feed_dict={hyp: hyps, evi: evis, y: ys})
        # display_step: how often the accuracy and loss should 
        #   be tested and displayed.
        if (i/batch_size) % display_step == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={hyp: hyps, evi: evis, y: ys})
            # Calculate batch loss
            tmp_loss = sess.run(loss, feed_dict={hyp: hyps, evi: evis, y: ys})
            # Display results
            print("Iter " + str(i/batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(tmp_loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))

    data_feature_list, correct_values, correct_scores = split_test_data_into_scores("data/dev.txt")


    hyps, evis, ys = (data_feature_list[0][:],
                      data_feature_list[1][:],
                      correct_scores)
    predictions = sess.run(classification_scores, feed_dict={hyp: hyps, evi: evis, y: ys})
    total = len(predictions)
    correct_predictions = 0
    for i,prediction in enumerate(predictions):
        if np.argmax(prediction[0])==np.argmax(ys[i]):
            correct_predictions += 1
    #     print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
    #       " / " + ["Positive", "Neutral", "Negative"][np.argmax(ys[i])])
    acc = correct_predictions*100/total
    print("Acc: %s" % str(acc))
    return acc

In [57]:
import numpy as np
accs = [] # 128
batches = [128, 64, 32, 1, 256]
for batch in batches:
    batch_size = batch
    accs.append(create_model())

print(accs)
print("Best batch size %s" % str(batches[np.argmax(accs)]))






  0%|                                                                                                                                                                                   | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 0.956232, Training Accuracy= 0.58594






  0%|▏                                                                                                                                                                          | 1/782 [00:00<10:43,  1.21it/s]



  1%|▊                                                                                                                                                                          | 4/782 [00:00<07:39,  1.69it/s]



  1%|█▌                                                                                                                                                                         | 7/782 [00:01<05:30,  2.35it/s]



  1%|██▏                                                                                                                                                                       | 10/782 [00:01<04:00,  3.21it/s]

Iter 10.0, Minibatch Loss= 0.938195, Training Accuracy= 0.58594






  2%|██▌                                                                                                                                                                       | 12/782 [00:01<03:01,  4.23it/s]



  2%|███▎                                                                                                                                                                      | 15/782 [00:01<02:16,  5.64it/s]



  2%|███▉                                                                                                                                                                      | 18/782 [00:01<01:44,  7.31it/s]

Iter 20.0, Minibatch Loss= 0.937931, Training Accuracy= 0.58594






  3%|████▌                                                                                                                                                                     | 21/782 [00:01<01:26,  8.83it/s]



  3%|█████▏                                                                                                                                                                    | 24/782 [00:01<01:10, 10.81it/s]



  3%|█████▊                                                                                                                                                                    | 27/782 [00:02<00:57, 13.05it/s]



  4%|██████▌                                                                                                                                                                   | 30/782 [00:02<00:50, 14.98it/s]

Iter 30.0, Minibatch Loss= 0.937693, Training Accuracy= 0.58594






  4%|███████▏                                                                                                                                                                  | 33/782 [00:02<00:48, 15.50it/s]



  5%|███████▊                                                                                                                                                                  | 36/782 [00:02<00:43, 16.99it/s]



  5%|████████▍                                                                                                                                                                 | 39/782 [00:02<00:39, 18.86it/s]

Iter 40.0, Minibatch Loss= 0.937342, Training Accuracy= 0.58594






  5%|█████████▏                                                                                                                                                                | 42/782 [00:02<00:39, 18.77it/s]



  6%|█████████▊                                                                                                                                                                | 45/782 [00:02<00:35, 20.62it/s]



  6%|██████████▍                                                                                                                                                               | 48/782 [00:02<00:33, 22.13it/s]

Iter 50.0, Minibatch Loss= 0.936669, Training Accuracy= 0.58594






  7%|███████████                                                                                                                                                               | 51/782 [00:03<00:33, 21.57it/s]



  7%|███████████▋                                                                                                                                                              | 54/782 [00:03<00:32, 22.35it/s]



  7%|████████████▍                                                                                                                                                             | 57/782 [00:03<00:31, 22.82it/s]



  8%|█████████████                                                                                                                                                             | 60/782 [00:03<00:31, 23.11it/s]

Iter 60.0, Minibatch Loss= 0.935027, Training Accuracy= 0.58594






  8%|█████████████▋                                                                                                                                                            | 63/782 [00:03<00:33, 21.77it/s]



  8%|██████████████▎                                                                                                                                                           | 66/782 [00:03<00:31, 22.71it/s]



  9%|███████████████                                                                                                                                                           | 69/782 [00:03<00:29, 23.80it/s]

Iter 70.0, Minibatch Loss= 0.930252, Training Accuracy= 0.58594






  9%|███████████████▋                                                                                                                                                          | 72/782 [00:04<00:31, 22.68it/s]



 10%|████████████████▎                                                                                                                                                         | 75/782 [00:04<00:29, 23.72it/s]



 10%|████████████████▉                                                                                                                                                         | 78/782 [00:04<00:29, 23.97it/s]

Iter 80.0, Minibatch Loss= 0.917919, Training Accuracy= 0.58594






 10%|█████████████████▌                                                                                                                                                        | 81/782 [00:04<00:30, 22.76it/s]



 11%|██████████████████▎                                                                                                                                                       | 84/782 [00:04<00:29, 23.87it/s]



 11%|██████████████████▉                                                                                                                                                       | 87/782 [00:04<00:28, 24.68it/s]



 12%|███████████████████▌                                                                                                                                                      | 90/782 [00:04<00:27, 24.96it/s]

Iter 90.0, Minibatch Loss= 0.935087, Training Accuracy= 0.58594


KeyboardInterrupt: 

### Constants

In [None]:
#Constants setup
max_hypothesis_length, max_evidence_length = 28, 32
batch_size, vector_size, hidden_size = 128, 50, 256

training_iterations_count = 100000

lstm_size = hidden_size

weight_decay = 0.001

learning_rate = 1

input_p, output_p = 0.6, 0.3

### [Training data](http://www.site.uottawa.ca/~diana/csi5386/A2_2019/SICK_train.txt)

In [53]:
import numpy as np

def split_data_into_scores(file_name="data/training.txt"):
    import csv
    with open(file_name,"r") as data:
        train = csv.DictReader(data , delimiter='\t')
        evi_sentences = []
        hyp_sentences = []
        labels = []
        scores = []
        count = 1
        for row in train:
            hyp_sentences.append(np.vstack(
                    sentence2sequence(row["sentence_A"].lower())[0]))
            evi_sentences.append(np.vstack(
                    sentence2sequence(row["sentence_B"].lower())[0]))
            labels.append(row["entailment_judgment"])
            scores.append(score_setup(row))
        
        hyp_sentences = np.stack([fit_to_size(x, (max_hypothesis_length, vector_size))
                          for x in hyp_sentences])
        evi_sentences = np.stack([fit_to_size(x, (max_evidence_length, vector_size))
                          for x in evi_sentences])
                                 
        return (hyp_sentences, evi_sentences), labels, np.array(scores)
data_feature_list, correct_values, correct_scores = split_data_into_scores()

l_h, l_e = max_hypothesis_length, max_evidence_length
N, D, H = batch_size, vector_size, hidden_size
l_seq = l_h + l_e



### TRAINING

In [54]:
tf.reset_default_graph()

In [55]:
lstm = tf.contrib.rnn.LSTMCell(lstm_size)
lstm_drop =  tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p)

In [56]:
# N: The number of elements in each of our batches, 
#   which we use to train subsets of data for efficiency's sake.
# l_h: The maximum length of a hypothesis, or the second sentence.  This is
#   used because training an RNN is extraordinarily difficult without 
#   rolling it out to a fixed length.
# l_e: The maximum length of evidence, the first sentence.  This is used
#   because training an RNN is extraordinarily difficult without 
#   rolling it out to a fixed length.
# D: The size of our used GloVe or other vectors.
hyp = tf.placeholder(tf.float32, [N, l_h, D], 'hypothesis')
evi = tf.placeholder(tf.float32, [N, l_e, D], 'evidence')
y = tf.placeholder(tf.float32, [N, 3], 'label')
# hyp: Where the hypotheses will be stored during training.
# evi: Where the evidences will be stored during training.
# y: Where correct scores will be stored during training.

# lstm_size: the size of the gates in the LSTM, 
#    as in the first LSTM layer's initialization.
# lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size)
lstm_back = tf.contrib.rnn.LSTMCell(lstm_size)

# lstm_back:  The LSTM used for looking backwards 
#   through the sentences, similar to lstm.

# input_p: the probability that inputs to the LSTM will be retained at each
#   iteration of dropout.
# output_p: the probability that outputs from the LSTM will be retained at 
#   each iteration of dropout.
lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p)
# lstm_drop_back:  A dropout wrapper for lstm_back, like lstm_drop.


fc_initializer = tf.random_normal_initializer(stddev=0.1) 
# fc_initializer: initial values for the fully connected layer's weights.
# hidden_size: the size of the outputs from each lstm layer.  
#   Multiplied by 2 to account for the two LSTMs.
fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 3], 
                            initializer = fc_initializer)
# fc_weight: Storage for the fully connected layer's weights.
fc_bias = tf.get_variable('bias', [3])
# fc_bias: Storage for the fully connected layer's bias.

# tf.GraphKeys.REGULARIZATION_LOSSES:  A key to a collection in the graph
#   designated for losses due to regularization.
#   In this case, this portion of loss is regularization on the weights
#   for the fully connected layer.
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, 
                     tf.nn.l2_loss(fc_weight)) 

x = tf.concat([hyp, evi], 1) # N, (Lh+Le), d
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(x, l_seq,)

# x: the inputs to the bidirectional_rnn


# tf.contrib.rnn.static_bidirectional_rnn: Runs the input through
#   two recurrent networks, one that runs the inputs forward and one
#   that runs the inputs in reversed order, combining the outputs.
rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back,
                                                            x, dtype=tf.float32)
# rnn_outputs: the list of LSTM outputs, as a list. 
#   What we want is the latest output, rnn_outputs[-1]

classification_scores = tf.matmul(rnn_outputs[-1], fc_weight) + fc_bias
# The scores are relative certainties for how likely the output matches
#   a certain entailment: 
#     0: Positive entailment
#     1: Neutral entailment
#     2: Negative entailment

In [57]:
with tf.variable_scope('Accuracy'):
    predicts = tf.cast(tf.argmax(classification_scores, 1), 'int32')
    y_label = tf.cast(tf.argmax(y, 1), 'int32')
    corrects = tf.equal(predicts, y_label)
    num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
    accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

with tf.variable_scope("loss"):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        logits = classification_scores, labels = y)
    loss = tf.reduce_mean(cross_entropy)
    total_loss = loss + weight_decay * tf.add_n(
        tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

optimizer = tf.train.GradientDescentOptimizer(learning_rate)

opt_op = optimizer.minimize(total_loss)

In [None]:
# Initialize variables
init = tf.global_variables_initializer()

from tqdm import tqdm
import time
start = time.time()
# Launch the Tensorflow session
sess = tf.Session()
sess.run(init)

# training_iterations_count: The number of data pieces to train on in total
# batch_size: The number of data pieces per batch
training_iterations = range(0,training_iterations_count,batch_size)
print(training_iterations)
training_iterations = tqdm(training_iterations)
for j in range(10):
    for i in training_iterations:
        if i % 1000 == 0:
        # Select indices for a random data subset
            batch = np.random.randint(data_feature_list[0].shape[0], size=batch_size)

        # Use the selected subset indices to initialize the graph's 
        #   placeholder values
        hyps, evis, ys = (data_feature_list[0][batch,:],
                          data_feature_list[1][batch,:],
                          correct_scores[batch])

        # Run the optimization with these initialized values
        sess.run([opt_op], feed_dict={hyp: hyps, evi: evis, y: ys})
        # display_step: how often the accuracy and loss should 
        #   be tested and displayed.
        if (i/batch_size) % display_step == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={hyp: hyps, evi: evis, y: ys})
            # Calculate batch loss
            tmp_loss = sess.run(loss, feed_dict={hyp: hyps, evi: evis, y: ys})
            # Display results
            print("Iter " + str(i/batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(tmp_loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
stop = time.time()
mins = int((stop-start)/60.0)
print("Training took %s:%s" % (str(mins), str(stop - start - mins*60)))

range(0, 100000, 128)


  0%|                                                                          | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 0.982853, Training Accuracy= 0.53125


  0%|                                                                  | 1/782 [00:03<41:27,  3.18s/it]

Iter 1.0, Minibatch Loss= 1.072385, Training Accuracy= 0.35156


  0%|▏                                                                 | 2/782 [00:04<34:43,  2.67s/it]

Iter 2.0, Minibatch Loss= 1.020020, Training Accuracy= 0.53125


  0%|▎                                                                 | 3/782 [00:06<30:52,  2.38s/it]

Iter 3.0, Minibatch Loss= 0.962351, Training Accuracy= 0.53125


  1%|▎                                                                 | 4/782 [00:07<27:35,  2.13s/it]

Iter 4.0, Minibatch Loss= 0.953920, Training Accuracy= 0.53125


  1%|▍                                                                 | 5/782 [00:09<26:19,  2.03s/it]

Iter 5.0, Minibatch Loss= 0.953141, Training Accuracy= 0.53125


  1%|▌                                                                 | 6/782 [00:11<24:48,  1.92s/it]

Iter 6.0, Minibatch Loss= 0.952863, Training Accuracy= 0.53125


  1%|▌                                                                 | 7/782 [00:12<23:33,  1.82s/it]

Iter 7.0, Minibatch Loss= 0.952617, Training Accuracy= 0.53125


  1%|▋                                                                 | 8/782 [00:14<22:43,  1.76s/it]

Iter 8.0, Minibatch Loss= 0.952353, Training Accuracy= 0.53125


  1%|▊                                                                 | 9/782 [00:16<22:02,  1.71s/it]

Iter 9.0, Minibatch Loss= 0.952063, Training Accuracy= 0.53125


  1%|▊                                                                | 10/782 [00:17<21:50,  1.70s/it]

Iter 10.0, Minibatch Loss= 0.951739, Training Accuracy= 0.53125


  1%|▉                                                                | 11/782 [00:19<21:01,  1.64s/it]

Iter 11.0, Minibatch Loss= 0.951373, Training Accuracy= 0.53125


  2%|▉                                                                | 12/782 [00:20<21:03,  1.64s/it]

Iter 12.0, Minibatch Loss= 0.950954, Training Accuracy= 0.53125


  2%|█                                                                | 13/782 [00:22<21:38,  1.69s/it]

Iter 13.0, Minibatch Loss= 0.950467, Training Accuracy= 0.53125


  2%|█▏                                                               | 14/782 [00:24<21:42,  1.70s/it]

Iter 14.0, Minibatch Loss= 0.949891, Training Accuracy= 0.53125


  2%|█▏                                                               | 15/782 [00:25<20:53,  1.63s/it]

Iter 15.0, Minibatch Loss= 0.949194, Training Accuracy= 0.53125


  2%|█▎                                                               | 16/782 [00:27<20:06,  1.57s/it]

Iter 16.0, Minibatch Loss= 0.948329, Training Accuracy= 0.53125


  2%|█▍                                                               | 17/782 [00:28<19:42,  1.55s/it]

Iter 17.0, Minibatch Loss= 0.947216, Training Accuracy= 0.53125


  2%|█▍                                                               | 18/782 [00:30<19:45,  1.55s/it]

Iter 18.0, Minibatch Loss= 0.945708, Training Accuracy= 0.53125


  2%|█▌                                                               | 19/782 [00:31<19:18,  1.52s/it]

Iter 19.0, Minibatch Loss= 0.943507, Training Accuracy= 0.53125


  3%|█▋                                                               | 20/782 [00:33<19:09,  1.51s/it]

Iter 20.0, Minibatch Loss= 0.939889, Training Accuracy= 0.53125


  3%|█▋                                                               | 21/782 [00:34<19:23,  1.53s/it]

Iter 21.0, Minibatch Loss= 0.932562, Training Accuracy= 0.53125


  3%|█▊                                                               | 22/782 [00:36<18:46,  1.48s/it]

Iter 22.0, Minibatch Loss= 0.910817, Training Accuracy= 0.53125


  3%|█▉                                                               | 23/782 [00:38<20:09,  1.59s/it]

Iter 23.0, Minibatch Loss= 0.870964, Training Accuracy= 0.53906


  3%|█▉                                                               | 24/782 [00:39<20:07,  1.59s/it]

Iter 24.0, Minibatch Loss= 1.058347, Training Accuracy= 0.50000


  3%|██                                                               | 25/782 [00:41<20:23,  1.62s/it]

Iter 25.0, Minibatch Loss= 1.000816, Training Accuracy= 0.53125


  3%|██▏                                                              | 26/782 [00:43<21:07,  1.68s/it]

Iter 26.0, Minibatch Loss= 0.977986, Training Accuracy= 0.35938


  3%|██▏                                                              | 27/782 [00:44<20:41,  1.64s/it]

Iter 27.0, Minibatch Loss= 0.957604, Training Accuracy= 0.53125


  4%|██▎                                                              | 28/782 [00:46<20:12,  1.61s/it]

Iter 28.0, Minibatch Loss= 0.953739, Training Accuracy= 0.53125


  4%|██▍                                                              | 29/782 [00:47<19:31,  1.56s/it]

Iter 29.0, Minibatch Loss= 0.953116, Training Accuracy= 0.53125


  4%|██▍                                                              | 30/782 [00:49<19:01,  1.52s/it]

Iter 30.0, Minibatch Loss= 0.952891, Training Accuracy= 0.53125


  4%|██▌                                                              | 31/782 [00:50<19:03,  1.52s/it]

Iter 31.0, Minibatch Loss= 0.952692, Training Accuracy= 0.53125


  4%|██▋                                                              | 32/782 [00:52<18:43,  1.50s/it]

Iter 32.0, Minibatch Loss= 0.952483, Training Accuracy= 0.53125


  4%|██▋                                                              | 33/782 [00:53<18:35,  1.49s/it]

Iter 33.0, Minibatch Loss= 0.952254, Training Accuracy= 0.53125


  4%|██▊                                                              | 34/782 [00:55<18:37,  1.49s/it]

Iter 34.0, Minibatch Loss= 0.952002, Training Accuracy= 0.53125


  4%|██▉                                                              | 35/782 [00:56<18:02,  1.45s/it]

Iter 35.0, Minibatch Loss= 0.951722, Training Accuracy= 0.53125


  5%|██▉                                                              | 36/782 [00:58<18:38,  1.50s/it]

Iter 36.0, Minibatch Loss= 0.951407, Training Accuracy= 0.53125


  5%|███                                                              | 37/782 [00:59<18:31,  1.49s/it]

Iter 37.0, Minibatch Loss= 0.951050, Training Accuracy= 0.53125


  5%|███▏                                                             | 38/782 [01:01<18:18,  1.48s/it]

Iter 38.0, Minibatch Loss= 0.950639, Training Accuracy= 0.53125


  5%|███▏                                                             | 39/782 [01:02<18:29,  1.49s/it]

Iter 39.0, Minibatch Loss= 0.950162, Training Accuracy= 0.53125


  5%|███▎                                                             | 40/782 [01:04<18:35,  1.50s/it]

Iter 40.0, Minibatch Loss= 0.949599, Training Accuracy= 0.53125


  5%|███▍                                                             | 41/782 [01:05<19:15,  1.56s/it]

Iter 41.0, Minibatch Loss= 0.948924, Training Accuracy= 0.53125


  5%|███▍                                                             | 42/782 [01:07<18:37,  1.51s/it]

Iter 42.0, Minibatch Loss= 0.948101, Training Accuracy= 0.53125


  5%|███▌                                                             | 43/782 [01:08<18:39,  1.51s/it]

Iter 43.0, Minibatch Loss= 0.947075, Training Accuracy= 0.53125


  6%|███▋                                                             | 44/782 [01:10<19:00,  1.55s/it]

Iter 44.0, Minibatch Loss= 0.945764, Training Accuracy= 0.53125


  6%|███▋                                                             | 45/782 [01:11<18:18,  1.49s/it]

Iter 45.0, Minibatch Loss= 0.944033, Training Accuracy= 0.53125


  6%|███▊                                                             | 46/782 [01:13<18:07,  1.48s/it]

Iter 46.0, Minibatch Loss= 0.941653, Training Accuracy= 0.53125


  6%|███▉                                                             | 47/782 [01:14<17:48,  1.45s/it]

Iter 47.0, Minibatch Loss= 0.938188, Training Accuracy= 0.53125


  6%|███▉                                                             | 48/782 [01:15<17:41,  1.45s/it]

Iter 48.0, Minibatch Loss= 0.932697, Training Accuracy= 0.53125


  6%|████                                                             | 49/782 [01:17<17:55,  1.47s/it]

Iter 49.0, Minibatch Loss= 0.922650, Training Accuracy= 0.53125


  6%|████▏                                                            | 50/782 [01:18<17:24,  1.43s/it]

Iter 50.0, Minibatch Loss= 0.898256, Training Accuracy= 0.53125


  7%|████▏                                                            | 51/782 [01:20<17:39,  1.45s/it]

Iter 51.0, Minibatch Loss= 0.846929, Training Accuracy= 0.57031


  7%|████▎                                                            | 52/782 [01:21<17:26,  1.43s/it]

Iter 52.0, Minibatch Loss= 1.053324, Training Accuracy= 0.53125


  7%|████▍                                                            | 53/782 [01:23<17:22,  1.43s/it]

Iter 53.0, Minibatch Loss= 2.231272, Training Accuracy= 0.11719


  7%|████▍                                                            | 54/782 [01:24<17:43,  1.46s/it]

Iter 54.0, Minibatch Loss= 1.146716, Training Accuracy= 0.53125


  7%|████▌                                                            | 55/782 [01:26<17:28,  1.44s/it]

Iter 55.0, Minibatch Loss= 1.025231, Training Accuracy= 0.52344


  7%|████▋                                                            | 56/782 [01:27<17:23,  1.44s/it]

Iter 56.0, Minibatch Loss= 0.973206, Training Accuracy= 0.53125


  7%|████▋                                                            | 57/782 [01:28<17:17,  1.43s/it]

Iter 57.0, Minibatch Loss= 0.957723, Training Accuracy= 0.53125


  7%|████▊                                                            | 58/782 [01:30<17:18,  1.44s/it]

Iter 58.0, Minibatch Loss= 0.955538, Training Accuracy= 0.53125


  8%|████▉                                                            | 59/782 [01:31<17:40,  1.47s/it]

Iter 59.0, Minibatch Loss= 0.954932, Training Accuracy= 0.53125


  8%|████▉                                                            | 60/782 [01:33<17:30,  1.46s/it]

Iter 60.0, Minibatch Loss= 0.954543, Training Accuracy= 0.53125


  8%|█████                                                            | 61/782 [01:34<17:26,  1.45s/it]

Iter 61.0, Minibatch Loss= 0.954206, Training Accuracy= 0.53125


  8%|█████▏                                                           | 62/782 [01:36<17:36,  1.47s/it]

Iter 62.0, Minibatch Loss= 0.953888, Training Accuracy= 0.53125


  8%|█████▏                                                           | 63/782 [01:37<17:06,  1.43s/it]

Iter 63.0, Minibatch Loss= 0.953581, Training Accuracy= 0.53125


  8%|█████▎                                                           | 64/782 [01:39<17:44,  1.48s/it]

Iter 64.0, Minibatch Loss= 0.953277, Training Accuracy= 0.53125


  8%|█████▍                                                           | 65/782 [01:40<17:31,  1.47s/it]

Iter 65.0, Minibatch Loss= 0.952972, Training Accuracy= 0.53125


  8%|█████▍                                                           | 66/782 [01:42<17:18,  1.45s/it]

Iter 66.0, Minibatch Loss= 0.952663, Training Accuracy= 0.53125


  9%|█████▌                                                           | 67/782 [01:43<17:33,  1.47s/it]

Iter 67.0, Minibatch Loss= 0.952346, Training Accuracy= 0.53125


  9%|█████▋                                                           | 68/782 [01:44<17:08,  1.44s/it]

Iter 68.0, Minibatch Loss= 0.952015, Training Accuracy= 0.53125


  9%|█████▋                                                           | 69/782 [01:46<17:09,  1.44s/it]

Iter 69.0, Minibatch Loss= 0.951667, Training Accuracy= 0.53125


  9%|█████▊                                                           | 70/782 [01:47<16:58,  1.43s/it]

Iter 70.0, Minibatch Loss= 0.951297, Training Accuracy= 0.53125


  9%|█████▉                                                           | 71/782 [01:49<17:03,  1.44s/it]

Iter 71.0, Minibatch Loss= 0.950898, Training Accuracy= 0.53125


  9%|█████▉                                                           | 72/782 [01:50<17:35,  1.49s/it]

Iter 72.0, Minibatch Loss= 0.950466, Training Accuracy= 0.53125


  9%|██████                                                           | 73/782 [01:52<17:25,  1.47s/it]

Iter 73.0, Minibatch Loss= 0.949991, Training Accuracy= 0.53125


  9%|██████▏                                                          | 74/782 [01:53<17:14,  1.46s/it]

Iter 74.0, Minibatch Loss= 0.949463, Training Accuracy= 0.53125


 10%|██████▏                                                          | 75/782 [01:55<16:59,  1.44s/it]

Iter 75.0, Minibatch Loss= 0.948870, Training Accuracy= 0.53125


 10%|██████▎                                                          | 76/782 [01:56<16:55,  1.44s/it]

Iter 76.0, Minibatch Loss= 0.948197, Training Accuracy= 0.53125


 10%|██████▍                                                          | 77/782 [01:58<17:14,  1.47s/it]

Iter 77.0, Minibatch Loss= 0.947421, Training Accuracy= 0.53125


 10%|██████▍                                                          | 78/782 [01:59<16:52,  1.44s/it]

Iter 78.0, Minibatch Loss= 0.946515, Training Accuracy= 0.53125


 10%|██████▌                                                          | 79/782 [02:00<16:49,  1.44s/it]

Iter 79.0, Minibatch Loss= 0.945435, Training Accuracy= 0.53125


 10%|██████▋                                                          | 80/782 [02:02<16:51,  1.44s/it]

Iter 80.0, Minibatch Loss= 0.944121, Training Accuracy= 0.53125


 10%|██████▋                                                          | 81/782 [02:03<16:30,  1.41s/it]

Iter 81.0, Minibatch Loss= 0.942475, Training Accuracy= 0.53125


 10%|██████▊                                                          | 82/782 [02:05<16:54,  1.45s/it]

Iter 82.0, Minibatch Loss= 0.940326, Training Accuracy= 0.53125


 11%|██████▉                                                          | 83/782 [02:06<16:43,  1.44s/it]

Iter 83.0, Minibatch Loss= 0.937340, Training Accuracy= 0.53125


 11%|██████▉                                                          | 84/782 [02:08<16:50,  1.45s/it]

Iter 84.0, Minibatch Loss= 0.932734, Training Accuracy= 0.53125


 11%|███████                                                          | 85/782 [02:09<17:19,  1.49s/it]

Iter 85.0, Minibatch Loss= 0.924107, Training Accuracy= 0.53125


 11%|███████▏                                                         | 86/782 [02:11<16:44,  1.44s/it]

Iter 86.0, Minibatch Loss= 0.900387, Training Accuracy= 0.53125


 11%|███████▏                                                         | 87/782 [02:12<17:06,  1.48s/it]

Iter 87.0, Minibatch Loss= 0.862390, Training Accuracy= 0.54688


 11%|███████▎                                                         | 88/782 [02:14<16:55,  1.46s/it]

Iter 88.0, Minibatch Loss= 1.052962, Training Accuracy= 0.53125


 11%|███████▍                                                         | 89/782 [02:15<16:50,  1.46s/it]

Iter 89.0, Minibatch Loss= 1.002772, Training Accuracy= 0.35938


 12%|███████▍                                                         | 90/782 [02:17<17:03,  1.48s/it]

Iter 90.0, Minibatch Loss= 0.945459, Training Accuracy= 0.53125


 12%|███████▌                                                         | 91/782 [02:18<16:55,  1.47s/it]

Iter 91.0, Minibatch Loss= 0.941711, Training Accuracy= 0.53125


 12%|███████▋                                                         | 92/782 [02:19<16:42,  1.45s/it]

Iter 92.0, Minibatch Loss= 0.937480, Training Accuracy= 0.53125


 12%|███████▋                                                         | 93/782 [02:21<16:29,  1.44s/it]

Iter 93.0, Minibatch Loss= 0.931097, Training Accuracy= 0.53125


 12%|███████▊                                                         | 94/782 [02:22<16:19,  1.42s/it]

Iter 94.0, Minibatch Loss= 0.920570, Training Accuracy= 0.53125


 12%|███████▉                                                         | 95/782 [02:24<16:38,  1.45s/it]

Iter 95.0, Minibatch Loss= 0.901146, Training Accuracy= 0.53125


 12%|███████▉                                                         | 96/782 [02:25<16:24,  1.43s/it]

Iter 96.0, Minibatch Loss= 0.886269, Training Accuracy= 0.53125


 12%|████████                                                         | 97/782 [02:27<16:24,  1.44s/it]

Iter 97.0, Minibatch Loss= 1.227095, Training Accuracy= 0.43750


 13%|████████▏                                                        | 98/782 [02:28<16:11,  1.42s/it]

Iter 98.0, Minibatch Loss= 1.119162, Training Accuracy= 0.29688


 13%|████████▏                                                        | 99/782 [02:29<16:10,  1.42s/it]

Iter 99.0, Minibatch Loss= 1.059156, Training Accuracy= 0.53125


 13%|████████▏                                                       | 100/782 [02:31<16:33,  1.46s/it]

Iter 100.0, Minibatch Loss= 0.987850, Training Accuracy= 0.53125


 13%|████████▎                                                       | 101/782 [02:32<16:19,  1.44s/it]

Iter 101.0, Minibatch Loss= 0.960641, Training Accuracy= 0.53125


 13%|████████▎                                                       | 102/782 [02:34<16:23,  1.45s/it]

Iter 102.0, Minibatch Loss= 0.954171, Training Accuracy= 0.53125


 13%|████████▍                                                       | 103/782 [02:35<16:32,  1.46s/it]

Iter 103.0, Minibatch Loss= 0.953456, Training Accuracy= 0.53125


 13%|████████▌                                                       | 104/782 [02:37<16:01,  1.42s/it]

Iter 104.0, Minibatch Loss= 0.953068, Training Accuracy= 0.53125


 13%|████████▌                                                       | 105/782 [02:38<16:30,  1.46s/it]

Iter 105.0, Minibatch Loss= 0.952685, Training Accuracy= 0.53125


 14%|████████▋                                                       | 106/782 [02:40<16:32,  1.47s/it]

Iter 106.0, Minibatch Loss= 0.952299, Training Accuracy= 0.53125


 14%|████████▊                                                       | 107/782 [02:41<16:23,  1.46s/it]

Iter 107.0, Minibatch Loss= 0.951909, Training Accuracy= 0.53125


 14%|████████▊                                                       | 108/782 [02:43<16:37,  1.48s/it]

Iter 108.0, Minibatch Loss= 0.951511, Training Accuracy= 0.53125


 14%|████████▉                                                       | 109/782 [02:44<16:19,  1.45s/it]

Iter 109.0, Minibatch Loss= 0.951103, Training Accuracy= 0.53125


 14%|█████████                                                       | 110/782 [02:46<16:45,  1.50s/it]

Iter 110.0, Minibatch Loss= 0.950681, Training Accuracy= 0.53125


 14%|█████████                                                       | 111/782 [02:47<17:03,  1.52s/it]

Iter 111.0, Minibatch Loss= 0.950244, Training Accuracy= 0.53125


 14%|█████████▏                                                      | 112/782 [02:49<16:53,  1.51s/it]

Iter 112.0, Minibatch Loss= 0.949790, Training Accuracy= 0.53125


 14%|█████████▏                                                      | 113/782 [02:50<17:51,  1.60s/it]

Iter 113.0, Minibatch Loss= 0.949314, Training Accuracy= 0.53125


 15%|█████████▎                                                      | 114/782 [02:52<17:55,  1.61s/it]

Iter 114.0, Minibatch Loss= 0.948813, Training Accuracy= 0.53125


 15%|█████████▍                                                      | 115/782 [02:54<17:50,  1.60s/it]

Iter 115.0, Minibatch Loss= 0.948282, Training Accuracy= 0.53125


 15%|█████████▍                                                      | 116/782 [02:55<17:31,  1.58s/it]

Iter 116.0, Minibatch Loss= 0.947715, Training Accuracy= 0.53125


 15%|█████████▌                                                      | 117/782 [02:57<17:05,  1.54s/it]

Iter 117.0, Minibatch Loss= 0.947101, Training Accuracy= 0.53125


 15%|█████████▋                                                      | 118/782 [02:58<17:05,  1.54s/it]

Iter 118.0, Minibatch Loss= 0.946425, Training Accuracy= 0.53125


 15%|█████████▋                                                      | 119/782 [03:00<16:50,  1.52s/it]

Iter 119.0, Minibatch Loss= 0.945666, Training Accuracy= 0.53125


 15%|█████████▊                                                      | 120/782 [03:01<16:51,  1.53s/it]

Iter 120.0, Minibatch Loss= 0.944792, Training Accuracy= 0.53125


 15%|█████████▉                                                      | 121/782 [03:03<16:57,  1.54s/it]

Iter 121.0, Minibatch Loss= 0.943754, Training Accuracy= 0.53125


 16%|█████████▉                                                      | 122/782 [03:04<16:57,  1.54s/it]

Iter 122.0, Minibatch Loss= 0.942476, Training Accuracy= 0.53125


 16%|██████████                                                      | 123/782 [03:06<17:16,  1.57s/it]

Iter 123.0, Minibatch Loss= 0.940834, Training Accuracy= 0.53125


 16%|██████████▏                                                     | 124/782 [03:08<17:12,  1.57s/it]

Iter 124.0, Minibatch Loss= 0.938617, Training Accuracy= 0.53125


 16%|██████████▏                                                     | 125/782 [03:09<17:22,  1.59s/it]

Iter 125.0, Minibatch Loss= 1.018488, Training Accuracy= 0.50781


 16%|██████████▎                                                     | 126/782 [03:11<17:23,  1.59s/it]

Iter 126.0, Minibatch Loss= 1.041809, Training Accuracy= 0.50781


 16%|██████████▍                                                     | 127/782 [03:12<16:47,  1.54s/it]

Iter 127.0, Minibatch Loss= 0.996499, Training Accuracy= 0.50781


 16%|██████████▍                                                     | 128/782 [03:14<16:31,  1.52s/it]

Iter 128.0, Minibatch Loss= 0.992916, Training Accuracy= 0.51562


 16%|██████████▌                                                     | 129/782 [03:15<16:16,  1.50s/it]

Iter 129.0, Minibatch Loss= 0.989512, Training Accuracy= 0.51562


 17%|██████████▋                                                     | 130/782 [03:16<16:00,  1.47s/it]

Iter 130.0, Minibatch Loss= 0.986134, Training Accuracy= 0.51562


 17%|██████████▋                                                     | 131/782 [03:18<16:17,  1.50s/it]

Iter 131.0, Minibatch Loss= 0.982613, Training Accuracy= 0.51562


 17%|██████████▊                                                     | 132/782 [03:19<16:03,  1.48s/it]

Iter 132.0, Minibatch Loss= 0.979106, Training Accuracy= 0.51562


 17%|██████████▉                                                     | 133/782 [03:21<15:56,  1.47s/it]

Iter 133.0, Minibatch Loss= 0.979235, Training Accuracy= 0.51562


 17%|██████████▉                                                     | 134/782 [03:22<16:06,  1.49s/it]

Iter 134.0, Minibatch Loss= 1.017482, Training Accuracy= 0.50781


 17%|███████████                                                     | 135/782 [03:24<15:54,  1.47s/it]

Iter 135.0, Minibatch Loss= 1.089793, Training Accuracy= 0.51562


 17%|███████████▏                                                    | 136/782 [03:25<15:43,  1.46s/it]

Iter 136.0, Minibatch Loss= 1.016865, Training Accuracy= 0.51562


 18%|███████████▏                                                    | 137/782 [03:27<15:30,  1.44s/it]

Iter 137.0, Minibatch Loss= 1.007012, Training Accuracy= 0.51562


 18%|███████████▎                                                    | 138/782 [03:28<15:23,  1.43s/it]

Iter 138.0, Minibatch Loss= 1.005851, Training Accuracy= 0.51562


 18%|███████████▍                                                    | 139/782 [03:30<15:49,  1.48s/it]

Iter 139.0, Minibatch Loss= 1.002010, Training Accuracy= 0.51562


 18%|███████████▍                                                    | 140/782 [03:31<15:37,  1.46s/it]

Iter 140.0, Minibatch Loss= 1.000495, Training Accuracy= 0.51562


 18%|███████████▌                                                    | 141/782 [03:33<15:38,  1.46s/it]

Iter 141.0, Minibatch Loss= 0.999097, Training Accuracy= 0.51562


 18%|███████████▌                                                    | 142/782 [03:34<15:45,  1.48s/it]

Iter 142.0, Minibatch Loss= 0.998119, Training Accuracy= 0.51562


 18%|███████████▋                                                    | 143/782 [03:35<15:19,  1.44s/it]

Iter 143.0, Minibatch Loss= 0.997217, Training Accuracy= 0.51562


 18%|███████████▊                                                    | 144/782 [03:37<16:08,  1.52s/it]

Iter 144.0, Minibatch Loss= 0.996334, Training Accuracy= 0.51562


 19%|███████████▊                                                    | 145/782 [03:39<16:35,  1.56s/it]

Iter 145.0, Minibatch Loss= 0.995402, Training Accuracy= 0.51562


 19%|███████████▉                                                    | 146/782 [03:40<16:40,  1.57s/it]

Iter 146.0, Minibatch Loss= 0.994374, Training Accuracy= 0.51562


 19%|████████████                                                    | 147/782 [03:42<16:51,  1.59s/it]

Iter 147.0, Minibatch Loss= 0.993196, Training Accuracy= 0.51562


 19%|████████████                                                    | 148/782 [03:44<16:25,  1.55s/it]

Iter 148.0, Minibatch Loss= 0.991804, Training Accuracy= 0.51562


 19%|████████████▏                                                   | 149/782 [03:45<17:02,  1.61s/it]

Iter 149.0, Minibatch Loss= 0.990103, Training Accuracy= 0.51562


 19%|████████████▎                                                   | 150/782 [03:47<16:44,  1.59s/it]

Iter 150.0, Minibatch Loss= 0.987960, Training Accuracy= 0.51562


 19%|████████████▎                                                   | 151/782 [03:48<16:17,  1.55s/it]

Iter 151.0, Minibatch Loss= 0.985180, Training Accuracy= 0.51562


 19%|████████████▍                                                   | 152/782 [03:50<16:17,  1.55s/it]

Iter 152.0, Minibatch Loss= 0.981483, Training Accuracy= 0.51562


 20%|████████████▌                                                   | 153/782 [03:51<16:07,  1.54s/it]

Iter 153.0, Minibatch Loss= 0.976481, Training Accuracy= 0.51562


 20%|████████████▌                                                   | 154/782 [03:53<16:01,  1.53s/it]

Iter 154.0, Minibatch Loss= 0.969581, Training Accuracy= 0.51562


 20%|████████████▋                                                   | 155/782 [03:54<15:59,  1.53s/it]

Iter 155.0, Minibatch Loss= 0.959383, Training Accuracy= 0.51562


 20%|████████████▊                                                   | 156/782 [03:56<16:12,  1.55s/it]

Iter 156.0, Minibatch Loss= 0.941549, Training Accuracy= 0.51562


 20%|████████████▊                                                   | 157/782 [03:58<16:13,  1.56s/it]

Iter 157.0, Minibatch Loss= 0.908810, Training Accuracy= 0.54688


 20%|████████████▉                                                   | 158/782 [03:59<15:43,  1.51s/it]

Iter 158.0, Minibatch Loss= 0.923158, Training Accuracy= 0.51562


 20%|█████████████                                                   | 159/782 [04:00<15:26,  1.49s/it]

Iter 159.0, Minibatch Loss= 1.114439, Training Accuracy= 0.50781


 20%|█████████████                                                   | 160/782 [04:02<15:13,  1.47s/it]

Iter 160.0, Minibatch Loss= 1.024383, Training Accuracy= 0.42188


 21%|█████████████▏                                                  | 161/782 [04:03<15:04,  1.46s/it]

Iter 161.0, Minibatch Loss= 1.040404, Training Accuracy= 0.50781


 21%|█████████████▎                                                  | 162/782 [04:05<15:18,  1.48s/it]

Iter 162.0, Minibatch Loss= 1.002096, Training Accuracy= 0.50781


 21%|█████████████▎                                                  | 163/782 [04:06<15:05,  1.46s/it]

Iter 163.0, Minibatch Loss= 0.995977, Training Accuracy= 0.51562


 21%|█████████████▍                                                  | 164/782 [04:08<15:12,  1.48s/it]

Iter 164.0, Minibatch Loss= 0.994704, Training Accuracy= 0.51562


 21%|█████████████▌                                                  | 165/782 [04:09<15:41,  1.53s/it]

Iter 165.0, Minibatch Loss= 0.993718, Training Accuracy= 0.50781


 21%|█████████████▌                                                  | 166/782 [04:11<15:23,  1.50s/it]

Iter 166.0, Minibatch Loss= 0.992815, Training Accuracy= 0.50781


 21%|█████████████▋                                                  | 167/782 [04:12<15:09,  1.48s/it]

Iter 167.0, Minibatch Loss= 0.991962, Training Accuracy= 0.50781


 21%|█████████████▋                                                  | 168/782 [04:14<14:53,  1.46s/it]

Iter 168.0, Minibatch Loss= 0.991136, Training Accuracy= 0.51562


 22%|█████████████▊                                                  | 169/782 [04:15<14:46,  1.45s/it]

Iter 169.0, Minibatch Loss= 0.990323, Training Accuracy= 0.51562


 22%|█████████████▉                                                  | 170/782 [04:17<15:00,  1.47s/it]

Iter 170.0, Minibatch Loss= 0.989509, Training Accuracy= 0.52344


 22%|█████████████▉                                                  | 171/782 [04:18<14:45,  1.45s/it]

Iter 171.0, Minibatch Loss= 0.988680, Training Accuracy= 0.52344


 22%|██████████████                                                  | 172/782 [04:19<14:36,  1.44s/it]

Iter 172.0, Minibatch Loss= 0.987825, Training Accuracy= 0.52344


 22%|██████████████▏                                                 | 173/782 [04:21<14:26,  1.42s/it]

Iter 173.0, Minibatch Loss= 0.986930, Training Accuracy= 0.52344


 22%|██████████████▏                                                 | 174/782 [04:22<14:13,  1.40s/it]

Iter 174.0, Minibatch Loss= 0.985979, Training Accuracy= 0.52344


 22%|██████████████▎                                                 | 175/782 [04:24<14:35,  1.44s/it]

Iter 175.0, Minibatch Loss= 0.984954, Training Accuracy= 0.52344


 23%|██████████████▍                                                 | 176/782 [04:25<14:22,  1.42s/it]

Iter 176.0, Minibatch Loss= 0.983830, Training Accuracy= 0.52344


 23%|██████████████▍                                                 | 177/782 [04:26<14:16,  1.42s/it]

Iter 177.0, Minibatch Loss= 0.982576, Training Accuracy= 0.52344


 23%|██████████████▌                                                 | 178/782 [04:28<14:31,  1.44s/it]

Iter 178.0, Minibatch Loss= 0.981148, Training Accuracy= 0.52344


 23%|██████████████▋                                                 | 179/782 [04:29<14:03,  1.40s/it]

Iter 179.0, Minibatch Loss= 0.979483, Training Accuracy= 0.52344


 23%|██████████████▋                                                 | 180/782 [04:31<14:19,  1.43s/it]

Iter 180.0, Minibatch Loss= 0.977485, Training Accuracy= 0.52344


 23%|██████████████▊                                                 | 181/782 [04:32<14:15,  1.42s/it]

Iter 181.0, Minibatch Loss= 0.975003, Training Accuracy= 0.52344


 23%|██████████████▉                                                 | 182/782 [04:34<14:20,  1.43s/it]

Iter 182.0, Minibatch Loss= 0.971782, Training Accuracy= 0.52344


 23%|██████████████▉                                                 | 183/782 [04:35<14:41,  1.47s/it]

Iter 183.0, Minibatch Loss= 0.967361, Training Accuracy= 0.51562


 24%|███████████████                                                 | 184/782 [04:37<14:20,  1.44s/it]

Iter 184.0, Minibatch Loss= 0.960830, Training Accuracy= 0.51562


 24%|███████████████▏                                                | 185/782 [04:38<14:37,  1.47s/it]

Iter 185.0, Minibatch Loss= 0.950145, Training Accuracy= 0.51562


 24%|███████████████▏                                                | 186/782 [04:40<14:38,  1.47s/it]

Iter 186.0, Minibatch Loss= 0.929902, Training Accuracy= 0.52344


 24%|███████████████▎                                                | 187/782 [04:41<14:28,  1.46s/it]

Iter 187.0, Minibatch Loss= 0.882885, Training Accuracy= 0.57031


 24%|███████████████▍                                                | 188/782 [04:43<14:37,  1.48s/it]

Iter 188.0, Minibatch Loss= 0.804077, Training Accuracy= 0.67188


 24%|███████████████▍                                                | 189/782 [04:44<14:19,  1.45s/it]

Iter 189.0, Minibatch Loss= 1.314465, Training Accuracy= 0.21875


 24%|███████████████▌                                                | 190/782 [04:45<14:14,  1.44s/it]

Iter 190.0, Minibatch Loss= 1.779524, Training Accuracy= 0.32812


 24%|███████████████▋                                                | 191/782 [04:47<14:07,  1.43s/it]

Iter 191.0, Minibatch Loss= 2.710215, Training Accuracy= 0.21094


 25%|███████████████▋                                                | 192/782 [04:48<14:04,  1.43s/it]

Iter 192.0, Minibatch Loss= 1.520599, Training Accuracy= 0.37500


 25%|███████████████▊                                                | 193/782 [04:50<14:19,  1.46s/it]

Iter 193.0, Minibatch Loss= 1.196303, Training Accuracy= 0.50781


 25%|███████████████▉                                                | 194/782 [04:51<14:04,  1.44s/it]

Iter 194.0, Minibatch Loss= 1.117741, Training Accuracy= 0.32812


 25%|███████████████▉                                                | 195/782 [04:53<14:02,  1.44s/it]

Iter 195.0, Minibatch Loss= 1.113796, Training Accuracy= 0.50781


 25%|████████████████                                                | 196/782 [04:54<14:17,  1.46s/it]

Iter 196.0, Minibatch Loss= 1.000856, Training Accuracy= 0.51562


 25%|████████████████                                                | 197/782 [04:55<13:47,  1.42s/it]

Iter 197.0, Minibatch Loss= 0.986958, Training Accuracy= 0.51562


 25%|████████████████▏                                               | 198/782 [04:57<13:59,  1.44s/it]

Iter 198.0, Minibatch Loss= 0.981764, Training Accuracy= 0.51562


 25%|████████████████▎                                               | 199/782 [04:58<13:51,  1.43s/it]

Iter 199.0, Minibatch Loss= 0.976561, Training Accuracy= 0.51562


 26%|████████████████▎                                               | 200/782 [05:00<13:46,  1.42s/it]

Iter 200.0, Minibatch Loss= 0.970238, Training Accuracy= 0.51562


 26%|████████████████▍                                               | 201/782 [05:01<14:02,  1.45s/it]

Iter 201.0, Minibatch Loss= 0.961924, Training Accuracy= 0.51562


 26%|████████████████▌                                               | 202/782 [05:03<13:59,  1.45s/it]

Iter 202.0, Minibatch Loss= 0.950245, Training Accuracy= 0.51562


 26%|████████████████▌                                               | 203/782 [05:04<13:58,  1.45s/it]

Iter 203.0, Minibatch Loss= 0.936487, Training Accuracy= 0.51562


 26%|████████████████▋                                               | 204/782 [05:05<13:47,  1.43s/it]

Iter 204.0, Minibatch Loss= 1.120291, Training Accuracy= 0.50000


 26%|████████████████▊                                               | 205/782 [05:07<13:50,  1.44s/it]

Iter 205.0, Minibatch Loss= 0.996752, Training Accuracy= 0.51562


 26%|████████████████▊                                               | 206/782 [05:08<14:02,  1.46s/it]

Iter 206.0, Minibatch Loss= 0.981088, Training Accuracy= 0.51562


 26%|████████████████▉                                               | 207/782 [05:10<14:06,  1.47s/it]

Iter 207.0, Minibatch Loss= 0.974443, Training Accuracy= 0.51562


 27%|█████████████████                                               | 208/782 [05:11<13:44,  1.44s/it]

Iter 208.0, Minibatch Loss= 0.966894, Training Accuracy= 0.51562


 27%|█████████████████                                               | 209/782 [05:13<13:58,  1.46s/it]

Iter 209.0, Minibatch Loss= 0.956692, Training Accuracy= 0.51562


 27%|█████████████████▏                                              | 210/782 [05:14<14:04,  1.48s/it]

Iter 210.0, Minibatch Loss= 0.942742, Training Accuracy= 0.51562


 27%|█████████████████▎                                              | 211/782 [05:16<14:00,  1.47s/it]

Iter 211.0, Minibatch Loss= 0.924403, Training Accuracy= 0.53125


 27%|█████████████████▎                                              | 212/782 [05:17<13:55,  1.47s/it]

Iter 212.0, Minibatch Loss= 0.898814, Training Accuracy= 0.54688


 27%|█████████████████▍                                              | 213/782 [05:19<13:43,  1.45s/it]

Iter 213.0, Minibatch Loss= 0.977569, Training Accuracy= 0.43750


 27%|█████████████████▌                                              | 214/782 [05:20<13:37,  1.44s/it]

Iter 214.0, Minibatch Loss= 1.084405, Training Accuracy= 0.51562


 27%|█████████████████▌                                              | 215/782 [05:21<13:38,  1.44s/it]

Iter 215.0, Minibatch Loss= 0.989670, Training Accuracy= 0.51562


 28%|█████████████████▋                                              | 216/782 [05:23<13:33,  1.44s/it]

Iter 216.0, Minibatch Loss= 0.973055, Training Accuracy= 0.51562


 28%|█████████████████▊                                              | 217/782 [05:24<13:28,  1.43s/it]

Iter 217.0, Minibatch Loss= 0.966797, Training Accuracy= 0.51562


 28%|█████████████████▊                                              | 218/782 [05:26<13:23,  1.42s/it]

Iter 218.0, Minibatch Loss= 0.958345, Training Accuracy= 0.51562


 28%|█████████████████▉                                              | 219/782 [05:27<13:17,  1.42s/it]

Iter 219.0, Minibatch Loss= 0.949952, Training Accuracy= 0.51562


 28%|██████████████████                                              | 220/782 [05:28<13:05,  1.40s/it]

Iter 220.0, Minibatch Loss= 0.940545, Training Accuracy= 0.55469


 28%|██████████████████                                              | 221/782 [05:30<13:06,  1.40s/it]

Iter 221.0, Minibatch Loss= 0.995894, Training Accuracy= 0.51562


 28%|██████████████████▏                                             | 222/782 [05:31<13:08,  1.41s/it]

Iter 222.0, Minibatch Loss= 0.910412, Training Accuracy= 0.51562


 29%|██████████████████▎                                             | 223/782 [05:33<13:15,  1.42s/it]

Iter 223.0, Minibatch Loss= 0.876291, Training Accuracy= 0.58594


 29%|██████████████████▎                                             | 224/782 [05:34<13:16,  1.43s/it]

Iter 224.0, Minibatch Loss= 1.085388, Training Accuracy= 0.50781


 29%|██████████████████▍                                             | 225/782 [05:36<13:12,  1.42s/it]

Iter 225.0, Minibatch Loss= 0.967107, Training Accuracy= 0.51562


 29%|██████████████████▍                                             | 226/782 [05:37<13:36,  1.47s/it]

Iter 226.0, Minibatch Loss= 0.922495, Training Accuracy= 0.53125


 29%|██████████████████▌                                             | 227/782 [05:39<13:43,  1.48s/it]

Iter 227.0, Minibatch Loss= 0.884834, Training Accuracy= 0.45312


 29%|██████████████████▋                                             | 228/782 [05:40<13:48,  1.50s/it]

Iter 228.0, Minibatch Loss= 0.997860, Training Accuracy= 0.51562


 29%|██████████████████▋                                             | 229/782 [05:42<13:37,  1.48s/it]

Iter 229.0, Minibatch Loss= 0.965508, Training Accuracy= 0.51562


 29%|██████████████████▊                                             | 230/782 [05:43<13:28,  1.46s/it]

Iter 230.0, Minibatch Loss= 0.950497, Training Accuracy= 0.52344


 30%|██████████████████▉                                             | 231/782 [05:45<13:24,  1.46s/it]

Iter 231.0, Minibatch Loss= 0.930500, Training Accuracy= 0.53125


 30%|██████████████████▉                                             | 232/782 [05:46<13:42,  1.50s/it]

Iter 232.0, Minibatch Loss= 0.901187, Training Accuracy= 0.57812


 30%|███████████████████                                             | 233/782 [05:48<13:35,  1.49s/it]

Iter 233.0, Minibatch Loss= 0.858354, Training Accuracy= 0.60938


 30%|███████████████████▏                                            | 234/782 [05:49<13:27,  1.47s/it]

Iter 234.0, Minibatch Loss= 0.800189, Training Accuracy= 0.68750


 30%|███████████████████▏                                            | 235/782 [05:51<13:38,  1.50s/it]

Iter 235.0, Minibatch Loss= 0.925743, Training Accuracy= 0.51562


 30%|███████████████████▎                                            | 236/782 [05:52<13:55,  1.53s/it]

Iter 236.0, Minibatch Loss= 0.989868, Training Accuracy= 0.54688


 30%|███████████████████▍                                            | 237/782 [05:54<14:03,  1.55s/it]

Iter 237.0, Minibatch Loss= 1.324737, Training Accuracy= 0.34375


 30%|███████████████████▍                                            | 238/782 [05:55<14:02,  1.55s/it]

Iter 238.0, Minibatch Loss= 1.280227, Training Accuracy= 0.32812


 31%|███████████████████▌                                            | 239/782 [05:57<14:18,  1.58s/it]

Iter 239.0, Minibatch Loss= 1.228017, Training Accuracy= 0.51562


 31%|███████████████████▋                                            | 240/782 [05:58<13:56,  1.54s/it]

Iter 240.0, Minibatch Loss= 1.071241, Training Accuracy= 0.32812


 31%|███████████████████▋                                            | 241/782 [06:00<13:46,  1.53s/it]

Iter 241.0, Minibatch Loss= 1.011471, Training Accuracy= 0.51562


 31%|███████████████████▊                                            | 242/782 [06:01<13:35,  1.51s/it]

Iter 242.0, Minibatch Loss= 1.001164, Training Accuracy= 0.53125


 31%|███████████████████▉                                            | 243/782 [06:03<13:49,  1.54s/it]

Iter 243.0, Minibatch Loss= 0.998105, Training Accuracy= 0.51562


 31%|███████████████████▉                                            | 244/782 [06:05<13:39,  1.52s/it]

Iter 244.0, Minibatch Loss= 0.996021, Training Accuracy= 0.51562


 31%|████████████████████                                            | 245/782 [06:06<13:39,  1.53s/it]

Iter 245.0, Minibatch Loss= 0.994113, Training Accuracy= 0.51562


 31%|████████████████████▏                                           | 246/782 [06:08<13:50,  1.55s/it]

Iter 246.0, Minibatch Loss= 0.991928, Training Accuracy= 0.51562


 32%|████████████████████▏                                           | 247/782 [06:09<13:48,  1.55s/it]

Iter 247.0, Minibatch Loss= 0.989173, Training Accuracy= 0.51562


 32%|████████████████████▎                                           | 248/782 [06:11<13:23,  1.50s/it]

Iter 248.0, Minibatch Loss= 0.985420, Training Accuracy= 0.51562


 32%|████████████████████▍                                           | 249/782 [06:12<13:06,  1.48s/it]

Iter 249.0, Minibatch Loss= 0.979945, Training Accuracy= 0.51562


 32%|████████████████████▍                                           | 250/782 [06:13<13:01,  1.47s/it]

Iter 250.0, Minibatch Loss= 0.917059, Training Accuracy= 0.60938


 32%|████████████████████▌                                           | 251/782 [06:15<12:59,  1.47s/it]

Iter 251.0, Minibatch Loss= 0.911467, Training Accuracy= 0.60938


 32%|████████████████████▌                                           | 252/782 [06:16<12:47,  1.45s/it]

Iter 252.0, Minibatch Loss= 0.908581, Training Accuracy= 0.60938


 32%|████████████████████▋                                           | 253/782 [06:18<12:43,  1.44s/it]

Iter 253.0, Minibatch Loss= 0.905960, Training Accuracy= 0.60938


 32%|████████████████████▊                                           | 254/782 [06:19<12:42,  1.44s/it]

Iter 254.0, Minibatch Loss= 0.903303, Training Accuracy= 0.60938


 33%|████████████████████▊                                           | 255/782 [06:21<12:31,  1.43s/it]

Iter 255.0, Minibatch Loss= 0.900606, Training Accuracy= 0.60938


 33%|████████████████████▉                                           | 256/782 [06:22<12:19,  1.41s/it]

Iter 256.0, Minibatch Loss= 0.897963, Training Accuracy= 0.60156


 33%|█████████████████████                                           | 257/782 [06:23<12:22,  1.41s/it]

Iter 257.0, Minibatch Loss= 0.895426, Training Accuracy= 0.59375


 33%|█████████████████████                                           | 258/782 [06:25<12:21,  1.42s/it]

Iter 258.0, Minibatch Loss= 0.892921, Training Accuracy= 0.59375


 33%|█████████████████████▏                                          | 259/782 [06:26<12:19,  1.41s/it]

Iter 259.0, Minibatch Loss= 0.890323, Training Accuracy= 0.60156


 33%|█████████████████████▎                                          | 260/782 [06:28<12:20,  1.42s/it]

Iter 260.0, Minibatch Loss= 0.887517, Training Accuracy= 0.60156


 33%|█████████████████████▎                                          | 261/782 [06:29<12:25,  1.43s/it]

Iter 261.0, Minibatch Loss= 0.884379, Training Accuracy= 0.60156


 34%|█████████████████████▍                                          | 262/782 [06:30<12:17,  1.42s/it]

Iter 262.0, Minibatch Loss= 0.880718, Training Accuracy= 0.60156


 34%|█████████████████████▌                                          | 263/782 [06:32<12:07,  1.40s/it]

Iter 263.0, Minibatch Loss= 0.876215, Training Accuracy= 0.60156


 34%|█████████████████████▌                                          | 264/782 [06:33<12:16,  1.42s/it]

Iter 264.0, Minibatch Loss= 0.870315, Training Accuracy= 0.60156


 34%|█████████████████████▋                                          | 265/782 [06:35<12:15,  1.42s/it]

Iter 265.0, Minibatch Loss= 0.862115, Training Accuracy= 0.59375


 34%|█████████████████████▊                                          | 266/782 [06:36<12:15,  1.43s/it]

Iter 266.0, Minibatch Loss= 0.850640, Training Accuracy= 0.60938


 34%|█████████████████████▊                                          | 267/782 [06:38<12:18,  1.43s/it]

Iter 267.0, Minibatch Loss= 0.836661, Training Accuracy= 0.63281


 34%|█████████████████████▉                                          | 268/782 [06:39<12:23,  1.45s/it]

Iter 268.0, Minibatch Loss= 0.821013, Training Accuracy= 0.63281


 34%|██████████████████████                                          | 269/782 [06:41<12:13,  1.43s/it]

Iter 269.0, Minibatch Loss= 1.171104, Training Accuracy= 0.45312


 35%|██████████████████████                                          | 270/782 [06:42<12:13,  1.43s/it]

Iter 270.0, Minibatch Loss= 1.001715, Training Accuracy= 0.60938


 35%|██████████████████████▏                                         | 271/782 [06:43<12:14,  1.44s/it]

Iter 271.0, Minibatch Loss= 1.190712, Training Accuracy= 0.12500


 35%|██████████████████████▎                                         | 272/782 [06:45<12:10,  1.43s/it]

Iter 272.0, Minibatch Loss= 1.362007, Training Accuracy= 0.60938


 35%|██████████████████████▎                                         | 273/782 [06:46<12:04,  1.42s/it]

Iter 273.0, Minibatch Loss= 0.985695, Training Accuracy= 0.60938


 35%|██████████████████████▍                                         | 274/782 [06:48<11:59,  1.42s/it]

Iter 274.0, Minibatch Loss= 0.935860, Training Accuracy= 0.60938


 35%|██████████████████████▌                                         | 275/782 [06:49<12:00,  1.42s/it]

Iter 275.0, Minibatch Loss= 0.911600, Training Accuracy= 0.60938


 35%|██████████████████████▌                                         | 276/782 [06:50<12:02,  1.43s/it]

Iter 276.0, Minibatch Loss= 0.909727, Training Accuracy= 0.60938


 35%|██████████████████████▋                                         | 277/782 [06:52<11:53,  1.41s/it]

Iter 277.0, Minibatch Loss= 0.908579, Training Accuracy= 0.60938


 36%|██████████████████████▊                                         | 278/782 [06:53<11:49,  1.41s/it]

Iter 278.0, Minibatch Loss= 0.907588, Training Accuracy= 0.60938


 36%|██████████████████████▊                                         | 279/782 [06:55<11:53,  1.42s/it]

Iter 279.0, Minibatch Loss= 0.906666, Training Accuracy= 0.60938


 36%|██████████████████████▉                                         | 280/782 [06:56<11:53,  1.42s/it]

Iter 280.0, Minibatch Loss= 0.905761, Training Accuracy= 0.60938


 36%|██████████████████████▉                                         | 281/782 [06:58<11:46,  1.41s/it]

Iter 281.0, Minibatch Loss= 0.904833, Training Accuracy= 0.60938


 36%|███████████████████████                                         | 282/782 [06:59<11:41,  1.40s/it]

Iter 282.0, Minibatch Loss= 0.903855, Training Accuracy= 0.60938


 36%|███████████████████████▏                                        | 283/782 [07:00<11:39,  1.40s/it]

Iter 283.0, Minibatch Loss= 0.902797, Training Accuracy= 0.60938


 36%|███████████████████████▏                                        | 284/782 [07:02<11:48,  1.42s/it]

Iter 284.0, Minibatch Loss= 0.901634, Training Accuracy= 0.60938


 36%|███████████████████████▎                                        | 285/782 [07:03<11:51,  1.43s/it]

Iter 285.0, Minibatch Loss= 0.900335, Training Accuracy= 0.60938


 37%|███████████████████████▍                                        | 286/782 [07:05<11:32,  1.40s/it]

Iter 286.0, Minibatch Loss= 0.898874, Training Accuracy= 0.60938


 37%|███████████████████████▍                                        | 287/782 [07:06<11:35,  1.41s/it]

Iter 287.0, Minibatch Loss= 0.897219, Training Accuracy= 0.60938


 37%|███████████████████████▌                                        | 288/782 [07:07<11:36,  1.41s/it]

Iter 288.0, Minibatch Loss= 0.895344, Training Accuracy= 0.60938


 37%|███████████████████████▋                                        | 289/782 [07:09<11:43,  1.43s/it]

Iter 289.0, Minibatch Loss= 0.893226, Training Accuracy= 0.60938


 37%|███████████████████████▋                                        | 290/782 [07:10<11:40,  1.42s/it]

Iter 290.0, Minibatch Loss= 0.890851, Training Accuracy= 0.60938


 37%|███████████████████████▊                                        | 291/782 [07:12<11:34,  1.41s/it]

Iter 291.0, Minibatch Loss= 0.888214, Training Accuracy= 0.60938


 37%|███████████████████████▉                                        | 292/782 [07:13<11:30,  1.41s/it]

Iter 292.0, Minibatch Loss= 0.885308, Training Accuracy= 0.60938


 37%|███████████████████████▉                                        | 293/782 [07:14<11:31,  1.41s/it]

Iter 293.0, Minibatch Loss= 0.882111, Training Accuracy= 0.60938


 38%|████████████████████████                                        | 294/782 [07:16<11:30,  1.42s/it]

Iter 294.0, Minibatch Loss= 0.878563, Training Accuracy= 0.60938


 38%|████████████████████████▏                                       | 295/782 [07:17<11:19,  1.39s/it]

Iter 295.0, Minibatch Loss= 0.874548, Training Accuracy= 0.60938


 38%|████████████████████████▏                                       | 296/782 [07:19<11:22,  1.40s/it]

Iter 296.0, Minibatch Loss= 0.869883, Training Accuracy= 0.60938


 38%|████████████████████████▎                                       | 297/782 [07:20<11:23,  1.41s/it]

Iter 297.0, Minibatch Loss= 0.864317, Training Accuracy= 0.60938


 38%|████████████████████████▍                                       | 298/782 [07:22<11:23,  1.41s/it]

Iter 298.0, Minibatch Loss= 0.857534, Training Accuracy= 0.60938


 38%|████████████████████████▍                                       | 299/782 [07:23<11:23,  1.42s/it]

Iter 299.0, Minibatch Loss= 0.849201, Training Accuracy= 0.60938


 38%|████████████████████████▌                                       | 300/782 [07:24<11:23,  1.42s/it]

Iter 300.0, Minibatch Loss= 0.839093, Training Accuracy= 0.61719


 38%|████████████████████████▋                                       | 301/782 [07:26<11:38,  1.45s/it]

Iter 301.0, Minibatch Loss= 0.827195, Training Accuracy= 0.64062


 39%|████████████████████████▋                                       | 302/782 [07:27<11:56,  1.49s/it]

Iter 302.0, Minibatch Loss= 0.813494, Training Accuracy= 0.64062


 39%|████████████████████████▊                                       | 303/782 [07:29<12:20,  1.55s/it]

Iter 303.0, Minibatch Loss= 0.797431, Training Accuracy= 0.66406


 39%|████████████████████████▉                                       | 304/782 [07:31<12:06,  1.52s/it]

Iter 304.0, Minibatch Loss= 0.778231, Training Accuracy= 0.66406


 39%|████████████████████████▉                                       | 305/782 [07:32<12:01,  1.51s/it]

Iter 305.0, Minibatch Loss= 0.759865, Training Accuracy= 0.68750


 39%|█████████████████████████                                       | 306/782 [07:34<12:12,  1.54s/it]

Iter 306.0, Minibatch Loss= 0.963490, Training Accuracy= 0.61719


 39%|█████████████████████████▏                                      | 307/782 [07:35<12:02,  1.52s/it]

Iter 307.0, Minibatch Loss= 1.005510, Training Accuracy= 0.60938


 39%|█████████████████████████▏                                      | 308/782 [07:37<11:51,  1.50s/it]

Iter 308.0, Minibatch Loss= 0.933658, Training Accuracy= 0.60938


 40%|█████████████████████████▎                                      | 309/782 [07:38<11:52,  1.51s/it]

Iter 309.0, Minibatch Loss= 0.905037, Training Accuracy= 0.60938


 40%|█████████████████████████▎                                      | 310/782 [07:40<11:49,  1.50s/it]

Iter 310.0, Minibatch Loss= 0.898679, Training Accuracy= 0.60938


 40%|█████████████████████████▍                                      | 311/782 [07:41<11:36,  1.48s/it]

Iter 311.0, Minibatch Loss= 0.892353, Training Accuracy= 0.60938


 40%|█████████████████████████▌                                      | 312/782 [07:43<11:25,  1.46s/it]

Iter 312.0, Minibatch Loss= 0.885343, Training Accuracy= 0.60938


 40%|█████████████████████████▌                                      | 313/782 [07:44<11:16,  1.44s/it]

Iter 313.0, Minibatch Loss= 0.877562, Training Accuracy= 0.60938


 40%|█████████████████████████▋                                      | 314/782 [07:45<10:57,  1.41s/it]

Iter 314.0, Minibatch Loss= 0.869062, Training Accuracy= 0.60938


 40%|█████████████████████████▊                                      | 315/782 [07:47<10:59,  1.41s/it]

Iter 315.0, Minibatch Loss= 0.881186, Training Accuracy= 0.60938


 40%|█████████████████████████▊                                      | 316/782 [07:48<10:56,  1.41s/it]

Iter 316.0, Minibatch Loss= 1.297114, Training Accuracy= 0.32031


 41%|█████████████████████████▉                                      | 317/782 [07:49<10:54,  1.41s/it]

Iter 317.0, Minibatch Loss= 1.117259, Training Accuracy= 0.59375


 41%|██████████████████████████                                      | 318/782 [07:51<10:49,  1.40s/it]

Iter 318.0, Minibatch Loss= 0.963514, Training Accuracy= 0.60938


 41%|██████████████████████████                                      | 319/782 [07:52<10:50,  1.40s/it]

Iter 319.0, Minibatch Loss= 1.133163, Training Accuracy= 0.12500


 41%|██████████████████████████▏                                     | 320/782 [07:54<10:39,  1.38s/it]

Iter 320.0, Minibatch Loss= 1.337813, Training Accuracy= 0.60938


 41%|██████████████████████████▎                                     | 321/782 [07:55<10:45,  1.40s/it]

Iter 321.0, Minibatch Loss= 1.013642, Training Accuracy= 0.27344


 41%|██████████████████████████▎                                     | 322/782 [07:56<10:49,  1.41s/it]

Iter 322.0, Minibatch Loss= 1.008807, Training Accuracy= 0.60938


 41%|██████████████████████████▍                                     | 323/782 [07:58<10:51,  1.42s/it]

Iter 323.0, Minibatch Loss= 0.918170, Training Accuracy= 0.60938


 41%|██████████████████████████▌                                     | 324/782 [07:59<10:47,  1.41s/it]

Iter 324.0, Minibatch Loss= 0.912403, Training Accuracy= 0.60938


 42%|██████████████████████████▌                                     | 325/782 [08:01<10:46,  1.41s/it]

Iter 325.0, Minibatch Loss= 0.910698, Training Accuracy= 0.60938


 42%|██████████████████████████▋                                     | 326/782 [08:02<10:45,  1.42s/it]

Iter 326.0, Minibatch Loss= 0.909036, Training Accuracy= 0.60938


 42%|██████████████████████████▊                                     | 327/782 [08:04<10:47,  1.42s/it]

Iter 327.0, Minibatch Loss= 0.906932, Training Accuracy= 0.60938


 42%|██████████████████████████▊                                     | 328/782 [08:05<10:33,  1.40s/it]

Iter 328.0, Minibatch Loss= 0.904046, Training Accuracy= 0.60938


 42%|██████████████████████████▉                                     | 329/782 [08:06<10:28,  1.39s/it]

Iter 329.0, Minibatch Loss= 0.899805, Training Accuracy= 0.60938


 42%|███████████████████████████                                     | 330/782 [08:08<10:37,  1.41s/it]

Iter 330.0, Minibatch Loss= 0.893301, Training Accuracy= 0.60938


 42%|███████████████████████████                                     | 331/782 [08:09<10:45,  1.43s/it]

Iter 331.0, Minibatch Loss= 0.883647, Training Accuracy= 0.60938


 42%|███████████████████████████▏                                    | 332/782 [08:11<10:42,  1.43s/it]

Iter 332.0, Minibatch Loss= 0.870820, Training Accuracy= 0.60938


 43%|███████████████████████████▎                                    | 333/782 [08:12<10:42,  1.43s/it]

Iter 333.0, Minibatch Loss= 0.854830, Training Accuracy= 0.60938


 43%|███████████████████████████▎                                    | 334/782 [08:14<10:39,  1.43s/it]

Iter 334.0, Minibatch Loss= 0.839194, Training Accuracy= 0.60938


 43%|███████████████████████████▍                                    | 335/782 [08:15<10:37,  1.43s/it]

Iter 335.0, Minibatch Loss= 0.908216, Training Accuracy= 0.60938


 43%|███████████████████████████▍                                    | 336/782 [08:16<10:36,  1.43s/it]

Iter 336.0, Minibatch Loss= 1.103173, Training Accuracy= 0.50781


 43%|███████████████████████████▌                                    | 337/782 [08:18<10:33,  1.42s/it]

Iter 337.0, Minibatch Loss= 0.905350, Training Accuracy= 0.60938


 43%|███████████████████████████▋                                    | 338/782 [08:19<10:28,  1.42s/it]

Iter 338.0, Minibatch Loss= 0.887930, Training Accuracy= 0.61719


 43%|███████████████████████████▋                                    | 339/782 [08:21<10:16,  1.39s/it]

Iter 339.0, Minibatch Loss= 0.877355, Training Accuracy= 0.61719


 43%|███████████████████████████▊                                    | 340/782 [08:22<10:15,  1.39s/it]

Iter 340.0, Minibatch Loss= 0.866121, Training Accuracy= 0.61719


 44%|███████████████████████████▉                                    | 341/782 [08:23<10:16,  1.40s/it]

Iter 341.0, Minibatch Loss= 0.861018, Training Accuracy= 0.61719


 44%|███████████████████████████▉                                    | 342/782 [08:25<10:17,  1.40s/it]

Iter 342.0, Minibatch Loss= 0.882945, Training Accuracy= 0.64062


 44%|████████████████████████████                                    | 343/782 [08:26<10:16,  1.40s/it]

Iter 343.0, Minibatch Loss= 0.965609, Training Accuracy= 0.60938


 44%|████████████████████████████▏                                   | 344/782 [08:28<10:16,  1.41s/it]

Iter 344.0, Minibatch Loss= 0.863527, Training Accuracy= 0.61719


 44%|████████████████████████████▏                                   | 345/782 [08:29<10:16,  1.41s/it]

Iter 345.0, Minibatch Loss= 0.840270, Training Accuracy= 0.61719


 44%|████████████████████████████▎                                   | 346/782 [08:30<10:16,  1.41s/it]

Iter 346.0, Minibatch Loss= 0.825391, Training Accuracy= 0.62500


 44%|████████████████████████████▍                                   | 347/782 [08:32<10:12,  1.41s/it]

Iter 347.0, Minibatch Loss= 0.830605, Training Accuracy= 0.61719


 45%|████████████████████████████▍                                   | 348/782 [08:33<10:16,  1.42s/it]

Iter 348.0, Minibatch Loss= 0.986939, Training Accuracy= 0.61719


 45%|████████████████████████████▌                                   | 349/782 [08:35<10:04,  1.40s/it]

Iter 349.0, Minibatch Loss= 0.836583, Training Accuracy= 0.62500


 45%|████████████████████████████▋                                   | 350/782 [08:36<10:07,  1.41s/it]

Iter 350.0, Minibatch Loss= 0.857312, Training Accuracy= 0.61719


 45%|████████████████████████████▋                                   | 351/782 [08:37<10:16,  1.43s/it]

Iter 351.0, Minibatch Loss= 0.972711, Training Accuracy= 0.59375


 45%|████████████████████████████▊                                   | 352/782 [08:39<10:25,  1.45s/it]

Iter 352.0, Minibatch Loss= 0.968705, Training Accuracy= 0.60938


 45%|████████████████████████████▉                                   | 353/782 [08:40<10:24,  1.46s/it]

Iter 353.0, Minibatch Loss= 0.918392, Training Accuracy= 0.61719


 45%|████████████████████████████▉                                   | 354/782 [08:42<10:19,  1.45s/it]

Iter 354.0, Minibatch Loss= 0.903100, Training Accuracy= 0.61719


 45%|█████████████████████████████                                   | 355/782 [08:43<10:13,  1.44s/it]

Iter 355.0, Minibatch Loss= 0.895626, Training Accuracy= 0.61719


 46%|█████████████████████████████▏                                  | 356/782 [08:45<10:07,  1.43s/it]

Iter 356.0, Minibatch Loss= 0.890583, Training Accuracy= 0.61719


 46%|█████████████████████████████▏                                  | 357/782 [08:46<10:04,  1.42s/it]

Iter 357.0, Minibatch Loss= 0.885819, Training Accuracy= 0.61719


 46%|█████████████████████████████▎                                  | 358/782 [08:48<09:59,  1.41s/it]

Iter 358.0, Minibatch Loss= 0.879823, Training Accuracy= 0.61719


 46%|█████████████████████████████▍                                  | 359/782 [08:49<09:47,  1.39s/it]

Iter 359.0, Minibatch Loss= 0.870602, Training Accuracy= 0.61719


 46%|█████████████████████████████▍                                  | 360/782 [08:50<09:46,  1.39s/it]

Iter 360.0, Minibatch Loss= 0.854726, Training Accuracy= 0.61719


 46%|█████████████████████████████▌                                  | 361/782 [08:52<09:46,  1.39s/it]

Iter 361.0, Minibatch Loss= 0.829050, Training Accuracy= 0.62500


 46%|█████████████████████████████▋                                  | 362/782 [08:53<10:03,  1.44s/it]

Iter 362.0, Minibatch Loss= 0.801605, Training Accuracy= 0.63281


 46%|█████████████████████████████▋                                  | 363/782 [08:55<10:24,  1.49s/it]

Iter 363.0, Minibatch Loss= 0.778318, Training Accuracy= 0.64062


 47%|█████████████████████████████▊                                  | 364/782 [08:56<10:17,  1.48s/it]

Iter 364.0, Minibatch Loss= 0.761654, Training Accuracy= 0.64062


 47%|█████████████████████████████▊                                  | 365/782 [08:58<09:57,  1.43s/it]

Iter 365.0, Minibatch Loss= 0.923744, Training Accuracy= 0.60938


 47%|█████████████████████████████▉                                  | 366/782 [08:59<09:47,  1.41s/it]

Iter 366.0, Minibatch Loss= 1.217735, Training Accuracy= 0.53906


 47%|██████████████████████████████                                  | 367/782 [09:00<09:46,  1.41s/it]

Iter 367.0, Minibatch Loss= 0.924812, Training Accuracy= 0.60938


 47%|██████████████████████████████                                  | 368/782 [09:02<09:49,  1.42s/it]

Iter 368.0, Minibatch Loss= 0.938094, Training Accuracy= 0.61719


 47%|██████████████████████████████▏                                 | 369/782 [09:03<09:55,  1.44s/it]

Iter 369.0, Minibatch Loss= 0.890048, Training Accuracy= 0.60938


 47%|██████████████████████████████▎                                 | 370/782 [09:05<09:48,  1.43s/it]

Iter 370.0, Minibatch Loss= 0.866817, Training Accuracy= 0.63281


 47%|██████████████████████████████▎                                 | 371/782 [09:06<09:47,  1.43s/it]

Iter 371.0, Minibatch Loss= 0.848640, Training Accuracy= 0.61719


 48%|██████████████████████████████▍                                 | 372/782 [09:08<09:48,  1.44s/it]

Iter 372.0, Minibatch Loss= 0.850102, Training Accuracy= 0.64062


 48%|██████████████████████████████▌                                 | 373/782 [09:09<09:57,  1.46s/it]

Iter 373.0, Minibatch Loss= 0.939781, Training Accuracy= 0.60938


 48%|██████████████████████████████▌                                 | 374/782 [09:10<09:50,  1.45s/it]

Iter 374.0, Minibatch Loss= 0.832616, Training Accuracy= 0.61719


 48%|██████████████████████████████▋                                 | 375/782 [09:12<09:49,  1.45s/it]

Iter 375.0, Minibatch Loss= 0.818496, Training Accuracy= 0.64844


 48%|██████████████████████████████▊                                 | 376/782 [09:13<09:44,  1.44s/it]

Iter 376.0, Minibatch Loss= 0.801197, Training Accuracy= 0.65625


 48%|██████████████████████████████▊                                 | 377/782 [09:15<09:32,  1.41s/it]

Iter 377.0, Minibatch Loss= 0.791222, Training Accuracy= 0.65625


 48%|██████████████████████████████▉                                 | 378/782 [09:16<09:29,  1.41s/it]

Iter 378.0, Minibatch Loss= 0.781987, Training Accuracy= 0.65625


 48%|███████████████████████████████                                 | 379/782 [09:18<09:31,  1.42s/it]

Iter 379.0, Minibatch Loss= 0.772033, Training Accuracy= 0.65625


 49%|███████████████████████████████                                 | 380/782 [09:19<09:31,  1.42s/it]

Iter 380.0, Minibatch Loss= 0.761523, Training Accuracy= 0.65625


 49%|███████████████████████████████▏                                | 381/782 [09:20<09:39,  1.45s/it]

Iter 381.0, Minibatch Loss= 0.751018, Training Accuracy= 0.65625


 49%|███████████████████████████████▎                                | 382/782 [09:22<09:28,  1.42s/it]

Iter 382.0, Minibatch Loss= 0.740237, Training Accuracy= 0.65625


 49%|███████████████████████████████▎                                | 383/782 [09:23<09:23,  1.41s/it]

Iter 383.0, Minibatch Loss= 0.728747, Training Accuracy= 0.65625


 49%|███████████████████████████████▍                                | 384/782 [09:25<09:19,  1.41s/it]

Iter 384.0, Minibatch Loss= 0.716470, Training Accuracy= 0.65625


 49%|███████████████████████████████▌                                | 385/782 [09:26<09:17,  1.40s/it]

Iter 385.0, Minibatch Loss= 0.710948, Training Accuracy= 0.65625


 49%|███████████████████████████████▌                                | 386/782 [09:28<09:28,  1.44s/it]

Iter 386.0, Minibatch Loss= 0.815410, Training Accuracy= 0.57031


 49%|███████████████████████████████▋                                | 387/782 [09:29<09:24,  1.43s/it]

Iter 387.0, Minibatch Loss= 0.855176, Training Accuracy= 0.65625


 50%|███████████████████████████████▊                                | 388/782 [09:30<09:19,  1.42s/it]

Iter 388.0, Minibatch Loss= 0.748342, Training Accuracy= 0.65625


 50%|███████████████████████████████▊                                | 389/782 [09:32<09:14,  1.41s/it]

Iter 389.0, Minibatch Loss= 0.770152, Training Accuracy= 0.65625


 50%|███████████████████████████████▉                                | 390/782 [09:33<09:14,  1.41s/it]

Iter 390.0, Minibatch Loss= 0.825522, Training Accuracy= 0.65625


 50%|████████████████████████████████                                | 391/782 [09:35<09:26,  1.45s/it]

Iter 391.0, Minibatch Loss= 0.807445, Training Accuracy= 0.65625


 50%|████████████████████████████████                                | 392/782 [09:36<09:26,  1.45s/it]

Iter 392.0, Minibatch Loss= 0.794102, Training Accuracy= 0.65625


 50%|████████████████████████████████▏                               | 393/782 [09:38<09:41,  1.49s/it]

Iter 393.0, Minibatch Loss= 0.784539, Training Accuracy= 0.65625


 50%|████████████████████████████████▏                               | 394/782 [09:39<09:53,  1.53s/it]

Iter 394.0, Minibatch Loss= 0.778230, Training Accuracy= 0.65625


 51%|████████████████████████████████▎                               | 395/782 [09:41<09:35,  1.49s/it]

Iter 395.0, Minibatch Loss= 0.774396, Training Accuracy= 0.65625


 51%|████████████████████████████████▍                               | 396/782 [09:42<09:45,  1.52s/it]

Iter 396.0, Minibatch Loss= 0.771821, Training Accuracy= 0.65625


 51%|████████████████████████████████▍                               | 397/782 [09:44<09:53,  1.54s/it]

Iter 397.0, Minibatch Loss= 0.769559, Training Accuracy= 0.65625


 51%|████████████████████████████████▌                               | 398/782 [09:45<09:49,  1.53s/it]

Iter 398.0, Minibatch Loss= 0.767176, Training Accuracy= 0.65625


 51%|████████████████████████████████▋                               | 399/782 [09:47<09:52,  1.55s/it]

Iter 399.0, Minibatch Loss= 0.764573, Training Accuracy= 0.65625


 51%|████████████████████████████████▋                               | 400/782 [09:48<09:23,  1.48s/it]

Iter 400.0, Minibatch Loss= 0.763070, Training Accuracy= 0.65625


 51%|████████████████████████████████▊                               | 401/782 [09:50<09:30,  1.50s/it]

Iter 401.0, Minibatch Loss= 0.778125, Training Accuracy= 0.57812


 51%|████████████████████████████████▉                               | 402/782 [09:51<09:27,  1.49s/it]

Iter 402.0, Minibatch Loss= 0.864933, Training Accuracy= 0.65625


 52%|████████████████████████████████▉                               | 403/782 [09:53<09:28,  1.50s/it]

Iter 403.0, Minibatch Loss= 0.813264, Training Accuracy= 0.65625


 52%|█████████████████████████████████                               | 404/782 [09:55<09:46,  1.55s/it]

Iter 404.0, Minibatch Loss= 0.748226, Training Accuracy= 0.65625


 52%|█████████████████████████████████▏                              | 405/782 [09:56<09:41,  1.54s/it]

Iter 405.0, Minibatch Loss= 0.708791, Training Accuracy= 0.65625


 52%|█████████████████████████████████▏                              | 406/782 [09:58<09:41,  1.55s/it]

Iter 406.0, Minibatch Loss= 0.692743, Training Accuracy= 0.65625


 52%|█████████████████████████████████▎                              | 407/782 [09:59<09:31,  1.52s/it]

Iter 407.0, Minibatch Loss= 0.848604, Training Accuracy= 0.65625


 52%|█████████████████████████████████▍                              | 408/782 [10:01<09:24,  1.51s/it]

Iter 408.0, Minibatch Loss= 0.692221, Training Accuracy= 0.65625


 52%|█████████████████████████████████▍                              | 409/782 [10:02<09:31,  1.53s/it]

Iter 409.0, Minibatch Loss= 0.734157, Training Accuracy= 0.65625


 52%|█████████████████████████████████▌                              | 410/782 [10:04<09:16,  1.50s/it]

Iter 410.0, Minibatch Loss= 0.822394, Training Accuracy= 0.65625


 53%|█████████████████████████████████▋                              | 411/782 [10:05<09:27,  1.53s/it]

Iter 411.0, Minibatch Loss= 0.775513, Training Accuracy= 0.65625


 53%|█████████████████████████████████▋                              | 412/782 [10:07<09:12,  1.49s/it]

Iter 412.0, Minibatch Loss= 0.763841, Training Accuracy= 0.65625


 53%|█████████████████████████████████▊                              | 413/782 [10:08<09:27,  1.54s/it]

Iter 413.0, Minibatch Loss= 0.754160, Training Accuracy= 0.65625


 53%|█████████████████████████████████▉                              | 414/782 [10:10<09:50,  1.61s/it]

Iter 414.0, Minibatch Loss= 0.745101, Training Accuracy= 0.65625


 53%|█████████████████████████████████▉                              | 415/782 [10:11<09:35,  1.57s/it]

Iter 415.0, Minibatch Loss= 0.735600, Training Accuracy= 0.65625


 53%|██████████████████████████████████                              | 416/782 [10:13<09:39,  1.58s/it]

Iter 416.0, Minibatch Loss= 0.724900, Training Accuracy= 0.65625


 53%|██████████████████████████████████▏                             | 417/782 [10:15<09:26,  1.55s/it]

Iter 417.0, Minibatch Loss= 0.712043, Training Accuracy= 0.65625


 53%|██████████████████████████████████▏                             | 418/782 [10:16<09:14,  1.52s/it]

Iter 418.0, Minibatch Loss= 0.696950, Training Accuracy= 0.65625


 54%|██████████████████████████████████▎                             | 419/782 [10:18<09:39,  1.60s/it]

Iter 419.0, Minibatch Loss= 0.737904, Training Accuracy= 0.61719


 54%|██████████████████████████████████▎                             | 420/782 [10:19<09:28,  1.57s/it]

Iter 420.0, Minibatch Loss= 0.898292, Training Accuracy= 0.65625


 54%|██████████████████████████████████▍                             | 421/782 [10:21<09:35,  1.59s/it]

Iter 421.0, Minibatch Loss= 0.866153, Training Accuracy= 0.55469


 54%|██████████████████████████████████▌                             | 422/782 [10:22<09:18,  1.55s/it]

Iter 422.0, Minibatch Loss= 0.834452, Training Accuracy= 0.65625


 54%|██████████████████████████████████▌                             | 423/782 [10:24<09:05,  1.52s/it]

Iter 423.0, Minibatch Loss= 0.782405, Training Accuracy= 0.65625


 54%|██████████████████████████████████▋                             | 424/782 [10:26<09:17,  1.56s/it]

Iter 424.0, Minibatch Loss= 0.769958, Training Accuracy= 0.65625


 54%|██████████████████████████████████▊                             | 425/782 [10:27<09:00,  1.51s/it]

Iter 425.0, Minibatch Loss= 0.760793, Training Accuracy= 0.65625


 54%|██████████████████████████████████▊                             | 426/782 [10:29<09:20,  1.58s/it]

Iter 426.0, Minibatch Loss= 0.751998, Training Accuracy= 0.65625


 55%|██████████████████████████████████▉                             | 427/782 [10:30<09:08,  1.55s/it]

Iter 427.0, Minibatch Loss= 0.739773, Training Accuracy= 0.65625


 55%|███████████████████████████████████                             | 428/782 [10:32<09:07,  1.55s/it]

Iter 428.0, Minibatch Loss= 0.719860, Training Accuracy= 0.65625


 55%|███████████████████████████████████                             | 429/782 [10:33<09:22,  1.59s/it]

Iter 429.0, Minibatch Loss= 0.698628, Training Accuracy= 0.65625


 55%|███████████████████████████████████▏                            | 430/782 [10:35<09:04,  1.55s/it]

Iter 430.0, Minibatch Loss= 0.686305, Training Accuracy= 0.65625


 55%|███████████████████████████████████▎                            | 431/782 [10:36<09:05,  1.55s/it]

Iter 431.0, Minibatch Loss= 0.684207, Training Accuracy= 0.66406


 55%|███████████████████████████████████▎                            | 432/782 [10:38<09:04,  1.55s/it]

Iter 432.0, Minibatch Loss= 0.831497, Training Accuracy= 0.65625


 55%|███████████████████████████████████▍                            | 433/782 [10:39<09:02,  1.55s/it]

Iter 433.0, Minibatch Loss= 0.845484, Training Accuracy= 0.65625


 55%|███████████████████████████████████▌                            | 434/782 [10:41<08:53,  1.53s/it]

Iter 434.0, Minibatch Loss= 0.787150, Training Accuracy= 0.65625


 56%|███████████████████████████████████▌                            | 435/782 [10:43<08:54,  1.54s/it]

Iter 435.0, Minibatch Loss= 0.780899, Training Accuracy= 0.65625


 56%|███████████████████████████████████▋                            | 436/782 [10:44<09:12,  1.60s/it]

Iter 436.0, Minibatch Loss= 0.779595, Training Accuracy= 0.65625


 56%|███████████████████████████████████▊                            | 437/782 [10:46<08:58,  1.56s/it]

Iter 437.0, Minibatch Loss= 0.801042, Training Accuracy= 0.65625


 56%|███████████████████████████████████▊                            | 438/782 [10:47<08:46,  1.53s/it]

Iter 438.0, Minibatch Loss= 0.891178, Training Accuracy= 0.42969


 56%|███████████████████████████████████▉                            | 439/782 [10:49<08:48,  1.54s/it]

Iter 439.0, Minibatch Loss= 0.840140, Training Accuracy= 0.65625


 56%|████████████████████████████████████                            | 440/782 [10:50<08:22,  1.47s/it]

Iter 440.0, Minibatch Loss= 0.805420, Training Accuracy= 0.65625


 56%|████████████████████████████████████                            | 441/782 [10:52<08:31,  1.50s/it]

Iter 441.0, Minibatch Loss= 0.797562, Training Accuracy= 0.65625


 57%|████████████████████████████████████▏                           | 442/782 [10:53<08:23,  1.48s/it]

Iter 442.0, Minibatch Loss= 0.790720, Training Accuracy= 0.65625


 57%|████████████████████████████████████▎                           | 443/782 [10:55<08:19,  1.47s/it]

Iter 443.0, Minibatch Loss= 0.784477, Training Accuracy= 0.65625


 57%|████████████████████████████████████▎                           | 444/782 [10:56<08:26,  1.50s/it]

Iter 444.0, Minibatch Loss= 0.779529, Training Accuracy= 0.65625


 57%|████████████████████████████████████▍                           | 445/782 [10:57<08:12,  1.46s/it]

Iter 445.0, Minibatch Loss= 0.776247, Training Accuracy= 0.65625


 57%|████████████████████████████████████▌                           | 446/782 [10:59<08:17,  1.48s/it]

Iter 446.0, Minibatch Loss= 0.774230, Training Accuracy= 0.65625


 57%|████████████████████████████████████▌                           | 447/782 [11:00<08:10,  1.46s/it]

Iter 447.0, Minibatch Loss= 0.772822, Training Accuracy= 0.65625


 57%|████████████████████████████████████▋                           | 448/782 [11:02<08:11,  1.47s/it]

Iter 448.0, Minibatch Loss= 0.771587, Training Accuracy= 0.65625


 57%|████████████████████████████████████▋                           | 449/782 [11:04<08:25,  1.52s/it]

Iter 449.0, Minibatch Loss= 0.770349, Training Accuracy= 0.65625


 58%|████████████████████████████████████▊                           | 450/782 [11:05<08:17,  1.50s/it]

Iter 450.0, Minibatch Loss= 0.769469, Training Accuracy= 0.65625


 58%|████████████████████████████████████▉                           | 451/782 [11:06<08:10,  1.48s/it]

Iter 451.0, Minibatch Loss= 0.778111, Training Accuracy= 0.65625


 58%|████████████████████████████████████▉                           | 452/782 [11:08<08:08,  1.48s/it]

Iter 452.0, Minibatch Loss= 0.870557, Training Accuracy= 0.42188


 58%|█████████████████████████████████████                           | 453/782 [11:09<08:09,  1.49s/it]

Iter 453.0, Minibatch Loss= 0.840229, Training Accuracy= 0.65625


 58%|█████████████████████████████████████▏                          | 454/782 [11:11<08:13,  1.50s/it]

Iter 454.0, Minibatch Loss= 0.794588, Training Accuracy= 0.65625


 58%|█████████████████████████████████████▏                          | 455/782 [11:12<08:04,  1.48s/it]

Iter 455.0, Minibatch Loss= 0.786227, Training Accuracy= 0.65625


 58%|█████████████████████████████████████▎                          | 456/782 [11:14<07:56,  1.46s/it]

Iter 456.0, Minibatch Loss= 0.778378, Training Accuracy= 0.65625


 58%|█████████████████████████████████████▍                          | 457/782 [11:15<07:50,  1.45s/it]

Iter 457.0, Minibatch Loss= 0.772271, Training Accuracy= 0.65625


 59%|█████████████████████████████████████▍                          | 458/782 [11:17<07:48,  1.45s/it]

Iter 458.0, Minibatch Loss= 0.768266, Training Accuracy= 0.65625


 59%|█████████████████████████████████████▌                          | 459/782 [11:18<07:54,  1.47s/it]

Iter 459.0, Minibatch Loss= 0.765354, Training Accuracy= 0.65625


 59%|█████████████████████████████████████▋                          | 460/782 [11:20<07:43,  1.44s/it]

Iter 460.0, Minibatch Loss= 0.762926, Training Accuracy= 0.65625


 59%|█████████████████████████████████████▋                          | 461/782 [11:21<07:42,  1.44s/it]

Iter 461.0, Minibatch Loss= 0.769215, Training Accuracy= 0.65625


 59%|█████████████████████████████████████▊                          | 462/782 [11:22<07:38,  1.43s/it]

Iter 462.0, Minibatch Loss= 0.942890, Training Accuracy= 0.65625


 59%|█████████████████████████████████████▉                          | 463/782 [11:24<07:36,  1.43s/it]

Iter 463.0, Minibatch Loss= 0.850308, Training Accuracy= 0.65625


 59%|█████████████████████████████████████▉                          | 464/782 [11:25<07:44,  1.46s/it]

Iter 464.0, Minibatch Loss= 0.816838, Training Accuracy= 0.65625


 59%|██████████████████████████████████████                          | 465/782 [11:27<07:40,  1.45s/it]

Iter 465.0, Minibatch Loss= 0.814559, Training Accuracy= 0.65625


 60%|██████████████████████████████████████▏                         | 466/782 [11:28<07:39,  1.45s/it]

Iter 466.0, Minibatch Loss= 0.812408, Training Accuracy= 0.65625


 60%|██████████████████████████████████████▏                         | 467/782 [11:30<07:37,  1.45s/it]

Iter 467.0, Minibatch Loss= 0.810062, Training Accuracy= 0.65625


 60%|██████████████████████████████████████▎                         | 468/782 [11:31<07:33,  1.45s/it]

Iter 468.0, Minibatch Loss= 0.807375, Training Accuracy= 0.65625


 60%|██████████████████████████████████████▍                         | 469/782 [11:33<07:46,  1.49s/it]

Iter 469.0, Minibatch Loss= 0.804189, Training Accuracy= 0.65625


 60%|██████████████████████████████████████▍                         | 470/782 [11:34<07:40,  1.47s/it]

Iter 470.0, Minibatch Loss= 0.800333, Training Accuracy= 0.65625


 60%|██████████████████████████████████████▌                         | 471/782 [11:36<07:35,  1.47s/it]

Iter 471.0, Minibatch Loss= 0.795635, Training Accuracy= 0.65625


 60%|██████████████████████████████████████▋                         | 472/782 [11:37<07:43,  1.50s/it]

Iter 472.0, Minibatch Loss= 0.789968, Training Accuracy= 0.65625


 60%|██████████████████████████████████████▋                         | 473/782 [11:39<07:36,  1.48s/it]

Iter 473.0, Minibatch Loss= 0.783359, Training Accuracy= 0.65625


 61%|██████████████████████████████████████▊                         | 474/782 [11:40<07:42,  1.50s/it]

Iter 474.0, Minibatch Loss= 0.776169, Training Accuracy= 0.65625


 61%|██████████████████████████████████████▊                         | 475/782 [11:42<07:31,  1.47s/it]

Iter 475.0, Minibatch Loss= 0.769169, Training Accuracy= 0.64844


 61%|██████████████████████████████████████▉                         | 476/782 [11:43<07:25,  1.45s/it]

Iter 476.0, Minibatch Loss= 0.764108, Training Accuracy= 0.64844


 61%|███████████████████████████████████████                         | 477/782 [11:44<07:25,  1.46s/it]

Iter 477.0, Minibatch Loss= 0.960817, Training Accuracy= 0.41406


 61%|███████████████████████████████████████                         | 478/782 [11:46<07:14,  1.43s/it]

Iter 478.0, Minibatch Loss= 0.842973, Training Accuracy= 0.65625


 61%|███████████████████████████████████████▏                        | 479/782 [11:47<07:25,  1.47s/it]

Iter 479.0, Minibatch Loss= 0.855796, Training Accuracy= 0.65625


 61%|███████████████████████████████████████▎                        | 480/782 [11:49<07:19,  1.46s/it]

Iter 480.0, Minibatch Loss= 0.896058, Training Accuracy= 0.53125


 62%|███████████████████████████████████████▎                        | 481/782 [11:50<07:18,  1.46s/it]

Iter 481.0, Minibatch Loss= 1.001723, Training Accuracy= 0.65625


 62%|███████████████████████████████████████▍                        | 482/782 [11:52<07:25,  1.49s/it]

Iter 482.0, Minibatch Loss= 0.828397, Training Accuracy= 0.65625


 62%|███████████████████████████████████████▌                        | 483/782 [11:53<07:16,  1.46s/it]

Iter 483.0, Minibatch Loss= 0.816943, Training Accuracy= 0.65625


 62%|███████████████████████████████████████▌                        | 484/782 [11:55<07:28,  1.50s/it]

Iter 484.0, Minibatch Loss= 0.813407, Training Accuracy= 0.65625


 62%|███████████████████████████████████████▋                        | 485/782 [11:56<07:23,  1.49s/it]

Iter 485.0, Minibatch Loss= 0.810652, Training Accuracy= 0.65625


 62%|███████████████████████████████████████▊                        | 486/782 [11:58<07:14,  1.47s/it]

Iter 486.0, Minibatch Loss= 0.808007, Training Accuracy= 0.65625


 62%|███████████████████████████████████████▊                        | 487/782 [11:59<07:22,  1.50s/it]

Iter 487.0, Minibatch Loss= 0.805346, Training Accuracy= 0.65625


 62%|███████████████████████████████████████▉                        | 488/782 [12:01<07:11,  1.47s/it]

Iter 488.0, Minibatch Loss= 0.802583, Training Accuracy= 0.65625


 63%|████████████████████████████████████████                        | 489/782 [12:02<07:09,  1.47s/it]

Iter 489.0, Minibatch Loss= 0.799645, Training Accuracy= 0.65625


 63%|████████████████████████████████████████                        | 490/782 [12:04<07:07,  1.46s/it]

Iter 490.0, Minibatch Loss= 0.796468, Training Accuracy= 0.65625


 63%|████████████████████████████████████████▏                       | 491/782 [12:05<07:10,  1.48s/it]

Iter 491.0, Minibatch Loss= 0.792995, Training Accuracy= 0.65625


 63%|████████████████████████████████████████▎                       | 492/782 [12:07<07:22,  1.52s/it]

Iter 492.0, Minibatch Loss= 0.789167, Training Accuracy= 0.65625


 63%|████████████████████████████████████████▎                       | 493/782 [12:08<07:18,  1.52s/it]

Iter 493.0, Minibatch Loss= 0.784918, Training Accuracy= 0.65625


 63%|████████████████████████████████████████▍                       | 494/782 [12:10<07:18,  1.52s/it]

Iter 494.0, Minibatch Loss= 0.780134, Training Accuracy= 0.65625


 63%|████████████████████████████████████████▌                       | 495/782 [12:11<07:10,  1.50s/it]

Iter 495.0, Minibatch Loss= 0.774568, Training Accuracy= 0.65625


 63%|████████████████████████████████████████▌                       | 496/782 [12:13<07:03,  1.48s/it]

Iter 496.0, Minibatch Loss= 0.767690, Training Accuracy= 0.65625


 64%|████████████████████████████████████████▋                       | 497/782 [12:14<07:10,  1.51s/it]

Iter 497.0, Minibatch Loss= 0.758562, Training Accuracy= 0.65625


 64%|████████████████████████████████████████▊                       | 498/782 [12:16<07:01,  1.48s/it]

Iter 498.0, Minibatch Loss= 0.746259, Training Accuracy= 0.65625


 64%|████████████████████████████████████████▊                       | 499/782 [12:17<06:57,  1.48s/it]

Iter 499.0, Minibatch Loss= 0.731316, Training Accuracy= 0.65625


 64%|████████████████████████████████████████▉                       | 500/782 [12:19<07:01,  1.50s/it]

Iter 500.0, Minibatch Loss= 1.019597, Training Accuracy= 0.40625


 64%|█████████████████████████████████████████                       | 501/782 [12:20<06:49,  1.46s/it]

Iter 501.0, Minibatch Loss= 0.983752, Training Accuracy= 0.50781


 64%|█████████████████████████████████████████                       | 502/782 [12:22<06:52,  1.47s/it]

Iter 502.0, Minibatch Loss= 0.963273, Training Accuracy= 0.51562


 64%|█████████████████████████████████████████▏                      | 503/782 [12:23<06:46,  1.46s/it]

Iter 503.0, Minibatch Loss= 0.942279, Training Accuracy= 0.50781


 64%|█████████████████████████████████████████▏                      | 504/782 [12:24<06:41,  1.44s/it]

Iter 504.0, Minibatch Loss= 0.923505, Training Accuracy= 0.50781


 65%|█████████████████████████████████████████▎                      | 505/782 [12:26<06:46,  1.47s/it]

Iter 505.0, Minibatch Loss= 1.048831, Training Accuracy= 0.41406


 65%|█████████████████████████████████████████▍                      | 506/782 [12:27<06:38,  1.44s/it]

Iter 506.0, Minibatch Loss= 1.082009, Training Accuracy= 0.50781


 65%|█████████████████████████████████████████▍                      | 507/782 [12:29<06:55,  1.51s/it]

Iter 507.0, Minibatch Loss= 1.015232, Training Accuracy= 0.50781


 65%|█████████████████████████████████████████▌                      | 508/782 [12:30<06:46,  1.48s/it]

Iter 508.0, Minibatch Loss= 1.005106, Training Accuracy= 0.50781


 65%|█████████████████████████████████████████▋                      | 509/782 [12:32<06:39,  1.46s/it]

Iter 509.0, Minibatch Loss= 0.999256, Training Accuracy= 0.50781


 65%|█████████████████████████████████████████▋                      | 510/782 [12:33<06:47,  1.50s/it]

Iter 510.0, Minibatch Loss= 0.993058, Training Accuracy= 0.50781


 65%|█████████████████████████████████████████▊                      | 511/782 [12:35<06:32,  1.45s/it]

Iter 511.0, Minibatch Loss= 0.984651, Training Accuracy= 0.50781


 65%|█████████████████████████████████████████▉                      | 512/782 [12:36<06:40,  1.48s/it]

Iter 512.0, Minibatch Loss= 0.972163, Training Accuracy= 0.50781


 66%|█████████████████████████████████████████▉                      | 513/782 [12:38<06:36,  1.47s/it]

Iter 513.0, Minibatch Loss= 0.955211, Training Accuracy= 0.50781


 66%|██████████████████████████████████████████                      | 514/782 [12:39<06:39,  1.49s/it]

Iter 514.0, Minibatch Loss= 0.935545, Training Accuracy= 0.50781


 66%|██████████████████████████████████████████▏                     | 515/782 [12:41<06:46,  1.52s/it]

Iter 515.0, Minibatch Loss= 0.913452, Training Accuracy= 0.50781


 66%|██████████████████████████████████████████▏                     | 516/782 [12:42<06:28,  1.46s/it]

Iter 516.0, Minibatch Loss= 0.897128, Training Accuracy= 0.53906


 66%|██████████████████████████████████████████▎                     | 517/782 [12:44<06:29,  1.47s/it]

Iter 517.0, Minibatch Loss= 1.040483, Training Accuracy= 0.42969


 66%|██████████████████████████████████████████▍                     | 518/782 [12:45<06:23,  1.45s/it]

Iter 518.0, Minibatch Loss= 1.069121, Training Accuracy= 0.50781


 66%|██████████████████████████████████████████▍                     | 519/782 [12:46<06:20,  1.45s/it]

Iter 519.0, Minibatch Loss= 1.013298, Training Accuracy= 0.50781


 66%|██████████████████████████████████████████▌                     | 520/782 [12:48<06:24,  1.47s/it]

Iter 520.0, Minibatch Loss= 1.010911, Training Accuracy= 0.50781


 67%|██████████████████████████████████████████▋                     | 521/782 [12:49<06:18,  1.45s/it]

Iter 521.0, Minibatch Loss= 1.009518, Training Accuracy= 0.50781


 67%|██████████████████████████████████████████▋                     | 522/782 [12:51<06:14,  1.44s/it]

Iter 522.0, Minibatch Loss= 1.008485, Training Accuracy= 0.50781


 67%|██████████████████████████████████████████▊                     | 523/782 [12:52<06:08,  1.42s/it]

Iter 523.0, Minibatch Loss= 1.007613, Training Accuracy= 0.50781


 67%|██████████████████████████████████████████▉                     | 524/782 [12:54<06:09,  1.43s/it]

Iter 524.0, Minibatch Loss= 1.006803, Training Accuracy= 0.50781


 67%|██████████████████████████████████████████▉                     | 525/782 [12:55<06:17,  1.47s/it]

Iter 525.0, Minibatch Loss= 1.005996, Training Accuracy= 0.50781


 67%|███████████████████████████████████████████                     | 526/782 [12:57<06:05,  1.43s/it]

Iter 526.0, Minibatch Loss= 1.005144, Training Accuracy= 0.50781


 67%|███████████████████████████████████████████▏                    | 527/782 [12:58<06:11,  1.46s/it]

Iter 527.0, Minibatch Loss= 1.004200, Training Accuracy= 0.50781


 68%|███████████████████████████████████████████▏                    | 528/782 [12:59<06:07,  1.45s/it]

Iter 528.0, Minibatch Loss= 1.003109, Training Accuracy= 0.50781


 68%|███████████████████████████████████████████▎                    | 529/782 [13:01<06:13,  1.48s/it]

Iter 529.0, Minibatch Loss= 1.001800, Training Accuracy= 0.50781


 68%|███████████████████████████████████████████▍                    | 530/782 [13:03<06:33,  1.56s/it]

Iter 530.0, Minibatch Loss= 1.000172, Training Accuracy= 0.50781


 68%|███████████████████████████████████████████▍                    | 531/782 [13:04<06:26,  1.54s/it]

Iter 531.0, Minibatch Loss= 0.998084, Training Accuracy= 0.50781


 68%|███████████████████████████████████████████▌                    | 532/782 [13:06<06:19,  1.52s/it]

Iter 532.0, Minibatch Loss= 0.995342, Training Accuracy= 0.50781


 68%|███████████████████████████████████████████▌                    | 533/782 [13:07<06:10,  1.49s/it]

Iter 533.0, Minibatch Loss= 0.991714, Training Accuracy= 0.50781


 68%|███████████████████████████████████████████▋                    | 534/782 [13:09<06:07,  1.48s/it]

Iter 534.0, Minibatch Loss= 0.987009, Training Accuracy= 0.50781


 68%|███████████████████████████████████████████▊                    | 535/782 [13:10<06:12,  1.51s/it]

Iter 535.0, Minibatch Loss= 0.981203, Training Accuracy= 0.50781


 69%|███████████████████████████████████████████▊                    | 536/782 [13:12<06:00,  1.46s/it]

Iter 536.0, Minibatch Loss= 0.974336, Training Accuracy= 0.50781


 69%|███████████████████████████████████████████▉                    | 537/782 [13:13<05:56,  1.46s/it]

Iter 537.0, Minibatch Loss= 0.966061, Training Accuracy= 0.50781


 69%|████████████████████████████████████████████                    | 538/782 [13:14<05:48,  1.43s/it]

Iter 538.0, Minibatch Loss= 0.955503, Training Accuracy= 0.50781


 69%|████████████████████████████████████████████                    | 539/782 [13:16<05:45,  1.42s/it]

Iter 539.0, Minibatch Loss= 0.941466, Training Accuracy= 0.50781


 69%|████████████████████████████████████████████▏                   | 540/782 [13:17<05:50,  1.45s/it]

Iter 540.0, Minibatch Loss= 0.922444, Training Accuracy= 0.53906


 69%|████████████████████████████████████████████▎                   | 541/782 [13:19<05:59,  1.49s/it]

Iter 541.0, Minibatch Loss= 0.903491, Training Accuracy= 0.53125


 69%|████████████████████████████████████████████▎                   | 542/782 [13:20<05:59,  1.50s/it]

Iter 542.0, Minibatch Loss= 0.889946, Training Accuracy= 0.53906


 69%|████████████████████████████████████████████▍                   | 543/782 [13:22<05:59,  1.50s/it]

Iter 543.0, Minibatch Loss= 0.942016, Training Accuracy= 0.50000


 70%|████████████████████████████████████████████▌                   | 544/782 [13:23<06:00,  1.51s/it]

Iter 544.0, Minibatch Loss= 1.261575, Training Accuracy= 0.44531


 70%|████████████████████████████████████████████▌                   | 545/782 [13:25<06:11,  1.57s/it]

Iter 545.0, Minibatch Loss= 1.063696, Training Accuracy= 0.49219


 70%|████████████████████████████████████████████▋                   | 546/782 [13:27<06:01,  1.53s/it]

Iter 546.0, Minibatch Loss= 1.071643, Training Accuracy= 0.50781


 70%|████████████████████████████████████████████▊                   | 547/782 [13:28<05:56,  1.52s/it]

Iter 547.0, Minibatch Loss= 1.048604, Training Accuracy= 0.43750


 70%|████████████████████████████████████████████▊                   | 548/782 [13:30<06:07,  1.57s/it]

Iter 548.0, Minibatch Loss= 1.029732, Training Accuracy= 0.51562


 70%|████████████████████████████████████████████▉                   | 549/782 [13:31<05:50,  1.50s/it]

Iter 549.0, Minibatch Loss= 1.009140, Training Accuracy= 0.51562


 70%|█████████████████████████████████████████████                   | 550/782 [13:33<05:53,  1.52s/it]

Iter 550.0, Minibatch Loss= 0.939757, Training Accuracy= 0.51562


 70%|█████████████████████████████████████████████                   | 551/782 [13:34<05:44,  1.49s/it]

Iter 551.0, Minibatch Loss= 0.895792, Training Accuracy= 0.51562


 71%|█████████████████████████████████████████████▏                  | 552/782 [13:36<05:43,  1.49s/it]

Iter 552.0, Minibatch Loss= 0.874106, Training Accuracy= 0.57031


 71%|█████████████████████████████████████████████▎                  | 553/782 [13:37<05:51,  1.54s/it]

Iter 553.0, Minibatch Loss= 0.945064, Training Accuracy= 0.50781


 71%|█████████████████████████████████████████████▎                  | 554/782 [13:39<05:54,  1.55s/it]

Iter 554.0, Minibatch Loss= 1.189089, Training Accuracy= 0.51562


 71%|█████████████████████████████████████████████▍                  | 555/782 [13:40<05:54,  1.56s/it]

Iter 555.0, Minibatch Loss= 0.954647, Training Accuracy= 0.51562


 71%|█████████████████████████████████████████████▌                  | 556/782 [13:42<05:48,  1.54s/it]

Iter 556.0, Minibatch Loss= 0.915688, Training Accuracy= 0.51562


 71%|█████████████████████████████████████████████▌                  | 557/782 [13:44<05:54,  1.58s/it]

Iter 557.0, Minibatch Loss= 1.077297, Training Accuracy= 0.42188


 71%|█████████████████████████████████████████████▋                  | 558/782 [13:45<05:50,  1.56s/it]

Iter 558.0, Minibatch Loss= 1.077135, Training Accuracy= 0.50781


 71%|█████████████████████████████████████████████▋                  | 559/782 [13:47<05:59,  1.61s/it]

Iter 559.0, Minibatch Loss= 1.016830, Training Accuracy= 0.50781


 72%|█████████████████████████████████████████████▊                  | 560/782 [13:49<06:06,  1.65s/it]

Iter 560.0, Minibatch Loss= 1.010720, Training Accuracy= 0.50781


 72%|█████████████████████████████████████████████▉                  | 561/782 [13:50<06:09,  1.67s/it]

Iter 561.0, Minibatch Loss= 1.007745, Training Accuracy= 0.50781


 72%|█████████████████████████████████████████████▉                  | 562/782 [13:52<06:03,  1.65s/it]

Iter 562.0, Minibatch Loss= 1.005539, Training Accuracy= 0.50781


 72%|██████████████████████████████████████████████                  | 563/782 [13:54<06:12,  1.70s/it]

Iter 563.0, Minibatch Loss= 1.003551, Training Accuracy= 0.50781


 72%|██████████████████████████████████████████████▏                 | 564/782 [13:55<06:08,  1.69s/it]

Iter 564.0, Minibatch Loss= 1.001517, Training Accuracy= 0.51562


 72%|██████████████████████████████████████████████▏                 | 565/782 [13:57<06:09,  1.70s/it]

Iter 565.0, Minibatch Loss= 0.999229, Training Accuracy= 0.51562


 72%|██████████████████████████████████████████████▎                 | 566/782 [13:59<06:13,  1.73s/it]

Iter 566.0, Minibatch Loss= 0.996479, Training Accuracy= 0.51562


 73%|██████████████████████████████████████████████▍                 | 567/782 [14:01<06:04,  1.70s/it]

Iter 567.0, Minibatch Loss= 0.993012, Training Accuracy= 0.51562


 73%|██████████████████████████████████████████████▍                 | 568/782 [14:02<06:13,  1.75s/it]

Iter 568.0, Minibatch Loss= 0.988465, Training Accuracy= 0.51562


 73%|██████████████████████████████████████████████▌                 | 569/782 [14:04<06:15,  1.76s/it]

Iter 569.0, Minibatch Loss= 0.982234, Training Accuracy= 0.51562


 73%|██████████████████████████████████████████████▋                 | 570/782 [14:06<06:10,  1.75s/it]

Iter 570.0, Minibatch Loss= 0.973279, Training Accuracy= 0.51562


 73%|██████████████████████████████████████████████▋                 | 571/782 [14:08<06:27,  1.84s/it]

Iter 571.0, Minibatch Loss= 0.960001, Training Accuracy= 0.51562


 73%|██████████████████████████████████████████████▊                 | 572/782 [14:10<06:24,  1.83s/it]

Iter 572.0, Minibatch Loss= 0.940826, Training Accuracy= 0.51562


 73%|██████████████████████████████████████████████▉                 | 573/782 [14:12<06:23,  1.83s/it]

Iter 573.0, Minibatch Loss= 0.916805, Training Accuracy= 0.55469


 73%|██████████████████████████████████████████████▉                 | 574/782 [14:13<06:14,  1.80s/it]

Iter 574.0, Minibatch Loss= 0.889201, Training Accuracy= 0.59375


 74%|███████████████████████████████████████████████                 | 575/782 [14:15<05:51,  1.70s/it]

Iter 575.0, Minibatch Loss= 0.957619, Training Accuracy= 0.52344


 74%|███████████████████████████████████████████████▏                | 576/782 [14:16<05:39,  1.65s/it]

Iter 576.0, Minibatch Loss= 1.450119, Training Accuracy= 0.21875


 74%|███████████████████████████████████████████████▏                | 577/782 [14:18<05:24,  1.58s/it]

Iter 577.0, Minibatch Loss= 1.028438, Training Accuracy= 0.52344


 74%|███████████████████████████████████████████████▎                | 578/782 [14:19<05:14,  1.54s/it]

Iter 578.0, Minibatch Loss= 0.992431, Training Accuracy= 0.50781


 74%|███████████████████████████████████████████████▍                | 579/782 [14:21<05:11,  1.54s/it]

Iter 579.0, Minibatch Loss= 0.921004, Training Accuracy= 0.50781


 74%|███████████████████████████████████████████████▍                | 580/782 [14:22<05:01,  1.49s/it]

Iter 580.0, Minibatch Loss= 0.888102, Training Accuracy= 0.50781


 74%|███████████████████████████████████████████████▌                | 581/782 [14:24<05:05,  1.52s/it]

Iter 581.0, Minibatch Loss= 0.919583, Training Accuracy= 0.52344


 74%|███████████████████████████████████████████████▋                | 582/782 [14:25<05:07,  1.54s/it]

Iter 582.0, Minibatch Loss= 1.206533, Training Accuracy= 0.46094


 75%|███████████████████████████████████████████████▋                | 583/782 [14:27<05:01,  1.51s/it]

Iter 583.0, Minibatch Loss= 1.031539, Training Accuracy= 0.50781


 75%|███████████████████████████████████████████████▊                | 584/782 [14:28<05:12,  1.58s/it]

Iter 584.0, Minibatch Loss= 1.017118, Training Accuracy= 0.51562


 75%|███████████████████████████████████████████████▉                | 585/782 [14:30<05:13,  1.59s/it]

Iter 585.0, Minibatch Loss= 1.021837, Training Accuracy= 0.50781


 75%|███████████████████████████████████████████████▉                | 586/782 [14:32<05:15,  1.61s/it]

Iter 586.0, Minibatch Loss= 1.015373, Training Accuracy= 0.51562


 75%|████████████████████████████████████████████████                | 587/782 [14:33<05:15,  1.62s/it]

Iter 587.0, Minibatch Loss= 1.021035, Training Accuracy= 0.50781


 75%|████████████████████████████████████████████████                | 588/782 [14:35<05:12,  1.61s/it]

Iter 588.0, Minibatch Loss= 1.010648, Training Accuracy= 0.51562


 75%|████████████████████████████████████████████████▏               | 589/782 [14:37<05:12,  1.62s/it]

Iter 589.0, Minibatch Loss= 1.010836, Training Accuracy= 0.50781


 75%|████████████████████████████████████████████████▎               | 590/782 [14:38<05:08,  1.60s/it]

Iter 590.0, Minibatch Loss= 1.008109, Training Accuracy= 0.51562


 76%|████████████████████████████████████████████████▎               | 591/782 [14:40<05:11,  1.63s/it]

Iter 591.0, Minibatch Loss= 1.008264, Training Accuracy= 0.50781


 76%|████████████████████████████████████████████████▍               | 592/782 [14:41<05:07,  1.62s/it]

Iter 592.0, Minibatch Loss= 1.006758, Training Accuracy= 0.51562


 76%|████████████████████████████████████████████████▌               | 593/782 [14:43<05:03,  1.61s/it]

Iter 593.0, Minibatch Loss= 1.007032, Training Accuracy= 0.51562


 76%|████████████████████████████████████████████████▌               | 594/782 [14:45<05:04,  1.62s/it]

Iter 594.0, Minibatch Loss= 1.005967, Training Accuracy= 0.51562


 76%|████████████████████████████████████████████████▋               | 595/782 [14:46<05:07,  1.64s/it]

Iter 595.0, Minibatch Loss= 1.006590, Training Accuracy= 0.51562


 76%|████████████████████████████████████████████████▊               | 596/782 [14:48<05:04,  1.64s/it]

Iter 596.0, Minibatch Loss= 1.005617, Training Accuracy= 0.51562


 76%|████████████████████████████████████████████████▊               | 597/782 [14:50<04:58,  1.61s/it]

Iter 597.0, Minibatch Loss= 1.006918, Training Accuracy= 0.51562


 76%|████████████████████████████████████████████████▉               | 598/782 [14:51<04:50,  1.58s/it]

Iter 598.0, Minibatch Loss= 1.005627, Training Accuracy= 0.51562


 77%|█████████████████████████████████████████████████               | 599/782 [14:53<04:46,  1.56s/it]

Iter 599.0, Minibatch Loss= 1.007824, Training Accuracy= 0.51562


 77%|█████████████████████████████████████████████████               | 600/782 [14:54<04:37,  1.52s/it]

Iter 600.0, Minibatch Loss= 1.005488, Training Accuracy= 0.51562


 77%|█████████████████████████████████████████████████▏              | 601/782 [14:55<04:31,  1.50s/it]

Iter 601.0, Minibatch Loss= 1.007958, Training Accuracy= 0.51562


 77%|█████████████████████████████████████████████████▎              | 602/782 [14:57<04:23,  1.47s/it]

Iter 602.0, Minibatch Loss= 1.004613, Training Accuracy= 0.51562


 77%|█████████████████████████████████████████████████▎              | 603/782 [14:58<04:20,  1.46s/it]

Iter 603.0, Minibatch Loss= 1.006511, Training Accuracy= 0.51562


 77%|█████████████████████████████████████████████████▍              | 604/782 [15:00<04:22,  1.48s/it]

Iter 604.0, Minibatch Loss= 1.003332, Training Accuracy= 0.51562


 77%|█████████████████████████████████████████████████▌              | 605/782 [15:01<04:17,  1.45s/it]

Iter 605.0, Minibatch Loss= 1.004925, Training Accuracy= 0.51562


 77%|█████████████████████████████████████████████████▌              | 606/782 [15:03<04:16,  1.46s/it]

Iter 606.0, Minibatch Loss= 1.002182, Training Accuracy= 0.51562


 78%|█████████████████████████████████████████████████▋              | 607/782 [15:04<04:15,  1.46s/it]

Iter 607.0, Minibatch Loss= 1.004172, Training Accuracy= 0.51562


 78%|█████████████████████████████████████████████████▊              | 608/782 [15:06<04:19,  1.49s/it]

Iter 608.0, Minibatch Loss= 1.001448, Training Accuracy= 0.51562


 78%|█████████████████████████████████████████████████▊              | 609/782 [15:07<04:34,  1.58s/it]

Iter 609.0, Minibatch Loss= 1.004559, Training Accuracy= 0.51562


 78%|█████████████████████████████████████████████████▉              | 610/782 [15:09<04:34,  1.60s/it]

Iter 610.0, Minibatch Loss= 1.000989, Training Accuracy= 0.51562


 78%|██████████████████████████████████████████████████              | 611/782 [15:11<04:32,  1.59s/it]

Iter 611.0, Minibatch Loss= 1.005091, Training Accuracy= 0.51562


 78%|██████████████████████████████████████████████████              | 612/782 [15:12<04:28,  1.58s/it]

Iter 612.0, Minibatch Loss= 1.000039, Training Accuracy= 0.52344


 78%|██████████████████████████████████████████████████▏             | 613/782 [15:14<04:27,  1.58s/it]

Iter 613.0, Minibatch Loss= 1.003985, Training Accuracy= 0.51562


 79%|██████████████████████████████████████████████████▎             | 614/782 [15:15<04:28,  1.60s/it]

Iter 614.0, Minibatch Loss= 0.998564, Training Accuracy= 0.52344


 79%|██████████████████████████████████████████████████▎             | 615/782 [15:17<04:28,  1.61s/it]

Iter 615.0, Minibatch Loss= 1.002974, Training Accuracy= 0.51562


 79%|██████████████████████████████████████████████████▍             | 616/782 [15:19<04:26,  1.60s/it]

Iter 616.0, Minibatch Loss= 0.997118, Training Accuracy= 0.53906


 79%|██████████████████████████████████████████████████▍             | 617/782 [15:20<04:25,  1.61s/it]

Iter 617.0, Minibatch Loss= 1.002827, Training Accuracy= 0.50781


 79%|██████████████████████████████████████████████████▌             | 618/782 [15:22<04:27,  1.63s/it]

Iter 618.0, Minibatch Loss= 0.995326, Training Accuracy= 0.53125


 79%|██████████████████████████████████████████████████▋             | 619/782 [15:24<04:27,  1.64s/it]

Iter 619.0, Minibatch Loss= 1.002017, Training Accuracy= 0.50781


 79%|██████████████████████████████████████████████████▋             | 620/782 [15:25<04:25,  1.64s/it]

Iter 620.0, Minibatch Loss= 0.992614, Training Accuracy= 0.53125


 79%|██████████████████████████████████████████████████▊             | 621/782 [15:27<04:27,  1.66s/it]

Iter 621.0, Minibatch Loss= 1.000421, Training Accuracy= 0.51562


 80%|██████████████████████████████████████████████████▉             | 622/782 [15:29<04:23,  1.65s/it]

Iter 622.0, Minibatch Loss= 0.989589, Training Accuracy= 0.52344


 80%|██████████████████████████████████████████████████▉             | 623/782 [15:30<04:10,  1.58s/it]

Iter 623.0, Minibatch Loss= 1.000279, Training Accuracy= 0.51562


 80%|███████████████████████████████████████████████████             | 624/782 [15:32<04:04,  1.54s/it]

Iter 624.0, Minibatch Loss= 0.984994, Training Accuracy= 0.53125


 80%|███████████████████████████████████████████████████▏            | 625/782 [15:33<03:59,  1.52s/it]

Iter 625.0, Minibatch Loss= 0.948967, Training Accuracy= 0.54688


 80%|███████████████████████████████████████████████████▏            | 626/782 [15:34<03:55,  1.51s/it]

Iter 626.0, Minibatch Loss= 0.933247, Training Accuracy= 0.54688


 80%|███████████████████████████████████████████████████▎            | 627/782 [15:36<03:52,  1.50s/it]

Iter 627.0, Minibatch Loss= 0.930034, Training Accuracy= 0.54688


 80%|███████████████████████████████████████████████████▍            | 628/782 [15:37<03:52,  1.51s/it]

Iter 628.0, Minibatch Loss= 0.931150, Training Accuracy= 0.53125


 80%|███████████████████████████████████████████████████▍            | 629/782 [15:39<03:53,  1.53s/it]

Iter 629.0, Minibatch Loss= 0.938303, Training Accuracy= 0.55469


 81%|███████████████████████████████████████████████████▌            | 630/782 [15:41<03:50,  1.52s/it]

Iter 630.0, Minibatch Loss= 0.963055, Training Accuracy= 0.52344


 81%|███████████████████████████████████████████████████▋            | 631/782 [15:42<03:41,  1.46s/it]

Iter 631.0, Minibatch Loss= 0.935824, Training Accuracy= 0.55469


 81%|███████████████████████████████████████████████████▋            | 632/782 [15:43<03:36,  1.45s/it]

Iter 632.0, Minibatch Loss= 0.927138, Training Accuracy= 0.54688


 81%|███████████████████████████████████████████████████▊            | 633/782 [15:45<03:37,  1.46s/it]

Iter 633.0, Minibatch Loss= 0.922580, Training Accuracy= 0.54688


 81%|███████████████████████████████████████████████████▉            | 634/782 [15:46<03:43,  1.51s/it]

Iter 634.0, Minibatch Loss= 0.921586, Training Accuracy= 0.52344


 81%|███████████████████████████████████████████████████▉            | 635/782 [15:48<03:41,  1.51s/it]

Iter 635.0, Minibatch Loss= 0.921863, Training Accuracy= 0.55469


 81%|████████████████████████████████████████████████████            | 636/782 [15:49<03:41,  1.51s/it]

Iter 636.0, Minibatch Loss= 0.929794, Training Accuracy= 0.53125


 81%|████████████████████████████████████████████████████▏           | 637/782 [15:51<03:36,  1.49s/it]

Iter 637.0, Minibatch Loss= 0.920100, Training Accuracy= 0.55469


 82%|████████████████████████████████████████████████████▏           | 638/782 [15:52<03:28,  1.45s/it]

Iter 638.0, Minibatch Loss= 0.974184, Training Accuracy= 0.55469


 82%|████████████████████████████████████████████████████▎           | 639/782 [15:54<03:26,  1.44s/it]

Iter 639.0, Minibatch Loss= 0.919917, Training Accuracy= 0.55469


 82%|████████████████████████████████████████████████████▍           | 640/782 [15:55<03:25,  1.45s/it]

Iter 640.0, Minibatch Loss= 0.915164, Training Accuracy= 0.56250


 82%|████████████████████████████████████████████████████▍           | 641/782 [15:57<03:26,  1.47s/it]

Iter 641.0, Minibatch Loss= 0.911029, Training Accuracy= 0.55469


 82%|████████████████████████████████████████████████████▌           | 642/782 [15:58<03:24,  1.46s/it]

Iter 642.0, Minibatch Loss= 0.914439, Training Accuracy= 0.52344


 82%|████████████████████████████████████████████████████▌           | 643/782 [16:00<03:25,  1.48s/it]

Iter 643.0, Minibatch Loss= 0.945856, Training Accuracy= 0.55469


 82%|████████████████████████████████████████████████████▋           | 644/782 [16:01<03:19,  1.44s/it]

Iter 644.0, Minibatch Loss= 0.944210, Training Accuracy= 0.54688


 82%|████████████████████████████████████████████████████▊           | 645/782 [16:02<03:19,  1.45s/it]

Iter 645.0, Minibatch Loss= 1.025418, Training Accuracy= 0.54688


 83%|████████████████████████████████████████████████████▊           | 646/782 [16:04<03:19,  1.47s/it]

Iter 646.0, Minibatch Loss= 0.992490, Training Accuracy= 0.55469


 83%|████████████████████████████████████████████████████▉           | 647/782 [16:05<03:19,  1.47s/it]

Iter 647.0, Minibatch Loss= 0.960484, Training Accuracy= 0.55469


 83%|█████████████████████████████████████████████████████           | 648/782 [16:07<03:16,  1.47s/it]

Iter 648.0, Minibatch Loss= 0.945987, Training Accuracy= 0.55469


 83%|█████████████████████████████████████████████████████           | 649/782 [16:08<03:17,  1.48s/it]

Iter 649.0, Minibatch Loss= 0.937249, Training Accuracy= 0.55469


 83%|█████████████████████████████████████████████████████▏          | 650/782 [16:10<03:17,  1.50s/it]

Iter 650.0, Minibatch Loss= 0.931845, Training Accuracy= 0.55469


 83%|█████████████████████████████████████████████████████▎          | 651/782 [16:11<03:12,  1.47s/it]

Iter 651.0, Minibatch Loss= 0.928217, Training Accuracy= 0.55469


 83%|█████████████████████████████████████████████████████▎          | 652/782 [16:13<03:11,  1.48s/it]

Iter 652.0, Minibatch Loss= 0.925423, Training Accuracy= 0.55469


 84%|█████████████████████████████████████████████████████▍          | 653/782 [16:14<03:15,  1.51s/it]

Iter 653.0, Minibatch Loss= 0.922691, Training Accuracy= 0.55469


 84%|█████████████████████████████████████████████████████▌          | 654/782 [16:16<03:12,  1.51s/it]

Iter 654.0, Minibatch Loss= 0.919219, Training Accuracy= 0.55469


 84%|█████████████████████████████████████████████████████▌          | 655/782 [16:17<03:09,  1.49s/it]

Iter 655.0, Minibatch Loss= 0.913649, Training Accuracy= 0.55469


 84%|█████████████████████████████████████████████████████▋          | 656/782 [16:19<03:05,  1.48s/it]

Iter 656.0, Minibatch Loss= 0.902602, Training Accuracy= 0.57812


 84%|█████████████████████████████████████████████████████▊          | 657/782 [16:20<03:03,  1.47s/it]

Iter 657.0, Minibatch Loss= 0.877343, Training Accuracy= 0.61719


 84%|█████████████████████████████████████████████████████▊          | 658/782 [16:22<02:56,  1.43s/it]

Iter 658.0, Minibatch Loss= 1.183856, Training Accuracy= 0.53906


 84%|█████████████████████████████████████████████████████▉          | 659/782 [16:23<02:56,  1.43s/it]

Iter 659.0, Minibatch Loss= 1.005193, Training Accuracy= 0.32812


 84%|██████████████████████████████████████████████████████          | 660/782 [16:24<02:55,  1.44s/it]

Iter 660.0, Minibatch Loss= 1.028597, Training Accuracy= 0.55469


 85%|██████████████████████████████████████████████████████          | 661/782 [16:26<02:53,  1.44s/it]

Iter 661.0, Minibatch Loss= 0.958777, Training Accuracy= 0.51562


 85%|██████████████████████████████████████████████████████▏         | 662/782 [16:27<02:52,  1.43s/it]

Iter 662.0, Minibatch Loss= 0.958730, Training Accuracy= 0.55469


 85%|██████████████████████████████████████████████████████▎         | 663/782 [16:29<02:50,  1.43s/it]

Iter 663.0, Minibatch Loss= 0.945201, Training Accuracy= 0.55469


 85%|██████████████████████████████████████████████████████▎         | 664/782 [16:30<02:48,  1.42s/it]

Iter 664.0, Minibatch Loss= 0.938756, Training Accuracy= 0.55469


 85%|██████████████████████████████████████████████████████▍         | 665/782 [16:31<02:42,  1.39s/it]

Iter 665.0, Minibatch Loss= 0.936434, Training Accuracy= 0.55469


 85%|██████████████████████████████████████████████████████▌         | 666/782 [16:33<02:43,  1.41s/it]

Iter 666.0, Minibatch Loss= 0.937176, Training Accuracy= 0.55469


 85%|██████████████████████████████████████████████████████▌         | 667/782 [16:34<02:43,  1.42s/it]

Iter 667.0, Minibatch Loss= 0.948144, Training Accuracy= 0.53125


 85%|██████████████████████████████████████████████████████▋         | 668/782 [16:36<02:44,  1.44s/it]

Iter 668.0, Minibatch Loss= 0.959318, Training Accuracy= 0.55469


 86%|██████████████████████████████████████████████████████▊         | 669/782 [16:38<02:54,  1.54s/it]

Iter 669.0, Minibatch Loss= 0.954087, Training Accuracy= 0.55469


 86%|██████████████████████████████████████████████████████▊         | 670/782 [16:39<03:02,  1.63s/it]

Iter 670.0, Minibatch Loss= 0.938196, Training Accuracy= 0.55469


 86%|██████████████████████████████████████████████████████▉         | 671/782 [16:41<03:06,  1.68s/it]

Iter 671.0, Minibatch Loss= 0.934628, Training Accuracy= 0.55469


 86%|██████████████████████████████████████████████████████▉         | 672/782 [16:43<02:57,  1.61s/it]

Iter 672.0, Minibatch Loss= 0.932425, Training Accuracy= 0.55469


 86%|███████████████████████████████████████████████████████         | 673/782 [16:44<02:49,  1.55s/it]

Iter 673.0, Minibatch Loss= 0.931475, Training Accuracy= 0.55469


 86%|███████████████████████████████████████████████████████▏        | 674/782 [16:46<02:45,  1.53s/it]

Iter 674.0, Minibatch Loss= 0.931365, Training Accuracy= 0.55469


 86%|███████████████████████████████████████████████████████▏        | 675/782 [16:47<02:41,  1.51s/it]

Iter 675.0, Minibatch Loss= 0.932622, Training Accuracy= 0.55469


 86%|███████████████████████████████████████████████████████▎        | 676/782 [16:49<02:37,  1.49s/it]

Iter 676.0, Minibatch Loss= 0.936978, Training Accuracy= 0.55469


 87%|███████████████████████████████████████████████████████▍        | 677/782 [16:50<02:41,  1.54s/it]

Iter 677.0, Minibatch Loss= 0.949203, Training Accuracy= 0.53125


 87%|███████████████████████████████████████████████████████▍        | 678/782 [16:52<02:35,  1.49s/it]

Iter 678.0, Minibatch Loss= 0.943000, Training Accuracy= 0.55469


 87%|███████████████████████████████████████████████████████▌        | 679/782 [16:53<02:31,  1.47s/it]

Iter 679.0, Minibatch Loss= 0.943023, Training Accuracy= 0.55469


 87%|███████████████████████████████████████████████████████▋        | 680/782 [16:54<02:29,  1.46s/it]

Iter 680.0, Minibatch Loss= 0.934994, Training Accuracy= 0.55469


 87%|███████████████████████████████████████████████████████▋        | 681/782 [16:56<02:26,  1.45s/it]

Iter 681.0, Minibatch Loss= 0.934977, Training Accuracy= 0.55469


 87%|███████████████████████████████████████████████████████▊        | 682/782 [16:57<02:23,  1.44s/it]

Iter 682.0, Minibatch Loss= 0.933919, Training Accuracy= 0.55469


 87%|███████████████████████████████████████████████████████▉        | 683/782 [16:59<02:21,  1.42s/it]

Iter 683.0, Minibatch Loss= 0.936210, Training Accuracy= 0.55469


 87%|███████████████████████████████████████████████████████▉        | 684/782 [17:00<02:19,  1.42s/it]

Iter 684.0, Minibatch Loss= 0.935048, Training Accuracy= 0.55469


 88%|████████████████████████████████████████████████████████        | 685/782 [17:02<02:20,  1.44s/it]

Iter 685.0, Minibatch Loss= 0.938013, Training Accuracy= 0.55469


 88%|████████████████████████████████████████████████████████▏       | 686/782 [17:03<02:23,  1.50s/it]

Iter 686.0, Minibatch Loss= 0.934769, Training Accuracy= 0.55469


 88%|████████████████████████████████████████████████████████▏       | 687/782 [17:05<02:23,  1.51s/it]

Iter 687.0, Minibatch Loss= 0.936877, Training Accuracy= 0.55469


 88%|████████████████████████████████████████████████████████▎       | 688/782 [17:06<02:23,  1.52s/it]

Iter 688.0, Minibatch Loss= 0.933717, Training Accuracy= 0.55469


 88%|████████████████████████████████████████████████████████▍       | 689/782 [17:08<02:23,  1.55s/it]

Iter 689.0, Minibatch Loss= 0.935427, Training Accuracy= 0.55469


 88%|████████████████████████████████████████████████████████▍       | 690/782 [17:09<02:22,  1.55s/it]

Iter 690.0, Minibatch Loss= 0.932956, Training Accuracy= 0.55469


 88%|████████████████████████████████████████████████████████▌       | 691/782 [17:11<02:18,  1.52s/it]

Iter 691.0, Minibatch Loss= 0.934585, Training Accuracy= 0.55469


 88%|████████████████████████████████████████████████████████▋       | 692/782 [17:12<02:15,  1.50s/it]

Iter 692.0, Minibatch Loss= 0.932449, Training Accuracy= 0.55469


 89%|████████████████████████████████████████████████████████▋       | 693/782 [17:14<02:12,  1.49s/it]

Iter 693.0, Minibatch Loss= 0.934066, Training Accuracy= 0.55469


 89%|████████████████████████████████████████████████████████▊       | 694/782 [17:15<02:09,  1.47s/it]

Iter 694.0, Minibatch Loss= 0.932050, Training Accuracy= 0.55469


 89%|████████████████████████████████████████████████████████▉       | 695/782 [17:17<02:07,  1.47s/it]

Iter 695.0, Minibatch Loss= 0.933660, Training Accuracy= 0.55469


 89%|████████████████████████████████████████████████████████▉       | 696/782 [17:18<02:05,  1.46s/it]

Iter 696.0, Minibatch Loss= 0.931687, Training Accuracy= 0.55469


 89%|█████████████████████████████████████████████████████████       | 697/782 [17:20<02:04,  1.47s/it]

Iter 697.0, Minibatch Loss= 0.933282, Training Accuracy= 0.55469


 89%|█████████████████████████████████████████████████████████▏      | 698/782 [17:21<02:03,  1.48s/it]

Iter 698.0, Minibatch Loss= 0.931336, Training Accuracy= 0.55469


 89%|█████████████████████████████████████████████████████████▏      | 699/782 [17:23<02:02,  1.47s/it]

Iter 699.0, Minibatch Loss= 0.932914, Training Accuracy= 0.55469


 90%|█████████████████████████████████████████████████████████▎      | 700/782 [17:24<01:59,  1.46s/it]

Iter 700.0, Minibatch Loss= 0.930996, Training Accuracy= 0.55469


 90%|█████████████████████████████████████████████████████████▎      | 701/782 [17:25<01:55,  1.42s/it]

Iter 701.0, Minibatch Loss= 0.932565, Training Accuracy= 0.55469


 90%|█████████████████████████████████████████████████████████▍      | 702/782 [17:27<01:52,  1.41s/it]

Iter 702.0, Minibatch Loss= 0.930673, Training Accuracy= 0.55469


 90%|█████████████████████████████████████████████████████████▌      | 703/782 [17:28<01:51,  1.41s/it]

Iter 703.0, Minibatch Loss= 0.932250, Training Accuracy= 0.55469


 90%|█████████████████████████████████████████████████████████▌      | 704/782 [17:29<01:48,  1.40s/it]

Iter 704.0, Minibatch Loss= 0.930373, Training Accuracy= 0.55469


 90%|█████████████████████████████████████████████████████████▋      | 705/782 [17:31<01:47,  1.40s/it]

Iter 705.0, Minibatch Loss= 0.931980, Training Accuracy= 0.55469


 90%|█████████████████████████████████████████████████████████▊      | 706/782 [17:32<01:47,  1.42s/it]

Iter 706.0, Minibatch Loss= 0.930101, Training Accuracy= 0.55469


 90%|█████████████████████████████████████████████████████████▊      | 707/782 [17:34<01:48,  1.44s/it]

Iter 707.0, Minibatch Loss= 0.931759, Training Accuracy= 0.55469


 91%|█████████████████████████████████████████████████████████▉      | 708/782 [17:35<01:46,  1.44s/it]

Iter 708.0, Minibatch Loss= 0.929857, Training Accuracy= 0.55469


 91%|██████████████████████████████████████████████████████████      | 709/782 [17:37<01:44,  1.43s/it]

Iter 709.0, Minibatch Loss= 0.931586, Training Accuracy= 0.55469


 91%|██████████████████████████████████████████████████████████      | 710/782 [17:38<01:44,  1.45s/it]

Iter 710.0, Minibatch Loss= 0.929637, Training Accuracy= 0.55469


 91%|██████████████████████████████████████████████████████████▏     | 711/782 [17:40<01:44,  1.47s/it]

Iter 711.0, Minibatch Loss= 0.931455, Training Accuracy= 0.55469


 91%|██████████████████████████████████████████████████████████▎     | 712/782 [17:41<01:41,  1.45s/it]

Iter 712.0, Minibatch Loss= 0.929437, Training Accuracy= 0.55469


 91%|██████████████████████████████████████████████████████████▎     | 713/782 [17:43<01:39,  1.44s/it]

Iter 713.0, Minibatch Loss= 0.931354, Training Accuracy= 0.55469


 91%|██████████████████████████████████████████████████████████▍     | 714/782 [17:44<01:37,  1.43s/it]

Iter 714.0, Minibatch Loss= 0.929247, Training Accuracy= 0.55469


 91%|██████████████████████████████████████████████████████████▌     | 715/782 [17:45<01:36,  1.44s/it]

Iter 715.0, Minibatch Loss= 0.931270, Training Accuracy= 0.55469


 92%|██████████████████████████████████████████████████████████▌     | 716/782 [17:47<01:36,  1.46s/it]

Iter 716.0, Minibatch Loss= 0.929059, Training Accuracy= 0.55469


 92%|██████████████████████████████████████████████████████████▋     | 717/782 [17:48<01:36,  1.49s/it]

Iter 717.0, Minibatch Loss= 0.931188, Training Accuracy= 0.55469


 92%|██████████████████████████████████████████████████████████▊     | 718/782 [17:50<01:36,  1.51s/it]

Iter 718.0, Minibatch Loss= 0.928867, Training Accuracy= 0.55469


 92%|██████████████████████████████████████████████████████████▊     | 719/782 [17:52<01:35,  1.52s/it]

Iter 719.0, Minibatch Loss= 0.931103, Training Accuracy= 0.55469


 92%|██████████████████████████████████████████████████████████▉     | 720/782 [17:53<01:35,  1.54s/it]

Iter 720.0, Minibatch Loss= 0.928669, Training Accuracy= 0.55469


 92%|███████████████████████████████████████████████████████████     | 721/782 [17:55<01:36,  1.58s/it]

Iter 721.0, Minibatch Loss= 0.931013, Training Accuracy= 0.55469


 92%|███████████████████████████████████████████████████████████     | 722/782 [17:56<01:35,  1.60s/it]

Iter 722.0, Minibatch Loss= 0.928464, Training Accuracy= 0.55469


 92%|███████████████████████████████████████████████████████████▏    | 723/782 [17:58<01:33,  1.58s/it]

Iter 723.0, Minibatch Loss= 0.930921, Training Accuracy= 0.55469


 93%|███████████████████████████████████████████████████████████▎    | 724/782 [18:00<01:32,  1.59s/it]

Iter 724.0, Minibatch Loss= 0.928255, Training Accuracy= 0.55469


 93%|███████████████████████████████████████████████████████████▎    | 725/782 [18:01<01:30,  1.60s/it]

Iter 725.0, Minibatch Loss= 0.930831, Training Accuracy= 0.55469


 93%|███████████████████████████████████████████████████████████▍    | 726/782 [18:03<01:28,  1.59s/it]

Iter 726.0, Minibatch Loss= 0.928041, Training Accuracy= 0.55469


 93%|███████████████████████████████████████████████████████████▍    | 727/782 [18:04<01:25,  1.56s/it]

Iter 727.0, Minibatch Loss= 0.930744, Training Accuracy= 0.55469


 93%|███████████████████████████████████████████████████████████▌    | 728/782 [18:06<01:24,  1.56s/it]

Iter 728.0, Minibatch Loss= 0.927821, Training Accuracy= 0.55469


 93%|███████████████████████████████████████████████████████████▋    | 729/782 [18:07<01:20,  1.53s/it]

Iter 729.0, Minibatch Loss= 0.930655, Training Accuracy= 0.55469


 93%|███████████████████████████████████████████████████████████▋    | 730/782 [18:09<01:18,  1.52s/it]

Iter 730.0, Minibatch Loss= 0.927590, Training Accuracy= 0.55469


 93%|███████████████████████████████████████████████████████████▊    | 731/782 [18:10<01:17,  1.52s/it]

Iter 731.0, Minibatch Loss= 0.930559, Training Accuracy= 0.55469


 94%|███████████████████████████████████████████████████████████▉    | 732/782 [18:12<01:17,  1.56s/it]

Iter 732.0, Minibatch Loss= 0.927348, Training Accuracy= 0.55469


 94%|███████████████████████████████████████████████████████████▉    | 733/782 [18:13<01:14,  1.53s/it]

Iter 733.0, Minibatch Loss= 0.930453, Training Accuracy= 0.55469


 94%|████████████████████████████████████████████████████████████    | 734/782 [18:15<01:11,  1.49s/it]

Iter 734.0, Minibatch Loss= 0.927093, Training Accuracy= 0.55469


 94%|████████████████████████████████████████████████████████████▏   | 735/782 [18:16<01:09,  1.47s/it]

Iter 735.0, Minibatch Loss= 0.930340, Training Accuracy= 0.55469


 94%|████████████████████████████████████████████████████████████▏   | 736/782 [18:18<01:07,  1.48s/it]

Iter 736.0, Minibatch Loss= 0.926827, Training Accuracy= 0.55469


 94%|████████████████████████████████████████████████████████████▎   | 737/782 [18:19<01:08,  1.52s/it]

Iter 737.0, Minibatch Loss= 0.930221, Training Accuracy= 0.55469


 94%|████████████████████████████████████████████████████████████▍   | 738/782 [18:21<01:08,  1.55s/it]

Iter 738.0, Minibatch Loss= 0.926551, Training Accuracy= 0.55469


 95%|████████████████████████████████████████████████████████████▍   | 739/782 [18:23<01:06,  1.56s/it]

Iter 739.0, Minibatch Loss= 0.930096, Training Accuracy= 0.55469


 95%|████████████████████████████████████████████████████████████▌   | 740/782 [18:24<01:05,  1.56s/it]

Iter 740.0, Minibatch Loss= 0.926263, Training Accuracy= 0.55469


 95%|████████████████████████████████████████████████████████████▋   | 741/782 [18:26<01:05,  1.59s/it]

Iter 741.0, Minibatch Loss= 0.929963, Training Accuracy= 0.55469


 95%|████████████████████████████████████████████████████████████▋   | 742/782 [18:27<01:02,  1.56s/it]

Iter 742.0, Minibatch Loss= 0.925963, Training Accuracy= 0.55469


 95%|████████████████████████████████████████████████████████████▊   | 743/782 [18:29<01:00,  1.54s/it]

Iter 743.0, Minibatch Loss= 0.929819, Training Accuracy= 0.55469


 95%|████████████████████████████████████████████████████████████▉   | 744/782 [18:30<00:58,  1.55s/it]

Iter 744.0, Minibatch Loss= 0.925650, Training Accuracy= 0.55469


 95%|████████████████████████████████████████████████████████████▉   | 745/782 [18:32<00:56,  1.54s/it]

Iter 745.0, Minibatch Loss= 0.929664, Training Accuracy= 0.55469


 95%|█████████████████████████████████████████████████████████████   | 746/782 [18:33<00:55,  1.54s/it]

Iter 746.0, Minibatch Loss= 0.925325, Training Accuracy= 0.55469


 96%|█████████████████████████████████████████████████████████████▏  | 747/782 [18:35<00:53,  1.54s/it]

Iter 747.0, Minibatch Loss= 0.929497, Training Accuracy= 0.55469


 96%|█████████████████████████████████████████████████████████████▏  | 748/782 [18:36<00:51,  1.52s/it]

Iter 748.0, Minibatch Loss= 0.924988, Training Accuracy= 0.55469


 96%|█████████████████████████████████████████████████████████████▎  | 749/782 [18:38<00:49,  1.50s/it]

Iter 749.0, Minibatch Loss= 0.929319, Training Accuracy= 0.55469


 96%|█████████████████████████████████████████████████████████████▍  | 750/782 [18:40<00:49,  1.55s/it]

Iter 750.0, Minibatch Loss= 0.926846, Training Accuracy= 0.60938


 96%|█████████████████████████████████████████████████████████████▍  | 751/782 [18:41<00:48,  1.57s/it]

Iter 751.0, Minibatch Loss= 0.920161, Training Accuracy= 0.60938


 96%|█████████████████████████████████████████████████████████████▌  | 752/782 [18:43<00:46,  1.56s/it]

Iter 752.0, Minibatch Loss= 0.916095, Training Accuracy= 0.60938


 96%|█████████████████████████████████████████████████████████████▋  | 753/782 [18:44<00:45,  1.57s/it]

Iter 753.0, Minibatch Loss= 0.912430, Training Accuracy= 0.60938


 96%|█████████████████████████████████████████████████████████████▋  | 754/782 [18:46<00:43,  1.56s/it]

Iter 754.0, Minibatch Loss= 0.908657, Training Accuracy= 0.60938


 97%|█████████████████████████████████████████████████████████████▊  | 755/782 [18:47<00:42,  1.56s/it]

Iter 755.0, Minibatch Loss= 0.904604, Training Accuracy= 0.60938


 97%|█████████████████████████████████████████████████████████████▊  | 756/782 [18:49<00:40,  1.56s/it]

Iter 756.0, Minibatch Loss= 0.900179, Training Accuracy= 0.60938


 97%|█████████████████████████████████████████████████████████████▉  | 757/782 [18:50<00:38,  1.56s/it]

Iter 757.0, Minibatch Loss= 0.895374, Training Accuracy= 0.60938


 97%|██████████████████████████████████████████████████████████████  | 758/782 [18:52<00:36,  1.54s/it]

Iter 758.0, Minibatch Loss= 0.890286, Training Accuracy= 0.60938


 97%|██████████████████████████████████████████████████████████████  | 759/782 [18:54<00:35,  1.54s/it]

Iter 759.0, Minibatch Loss= 0.885102, Training Accuracy= 0.60938


 97%|██████████████████████████████████████████████████████████████▏ | 760/782 [18:55<00:33,  1.52s/it]

Iter 760.0, Minibatch Loss= 0.880014, Training Accuracy= 0.60938


 97%|██████████████████████████████████████████████████████████████▎ | 761/782 [18:56<00:31,  1.50s/it]

Iter 761.0, Minibatch Loss= 0.875137, Training Accuracy= 0.60938


 97%|██████████████████████████████████████████████████████████████▎ | 762/782 [18:58<00:29,  1.45s/it]

Iter 762.0, Minibatch Loss= 0.870505, Training Accuracy= 0.60938


 98%|██████████████████████████████████████████████████████████████▍ | 763/782 [18:59<00:27,  1.44s/it]

Iter 763.0, Minibatch Loss= 0.866130, Training Accuracy= 0.60938


 98%|██████████████████████████████████████████████████████████████▌ | 764/782 [19:01<00:26,  1.47s/it]

Iter 764.0, Minibatch Loss= 0.862116, Training Accuracy= 0.60938


 98%|██████████████████████████████████████████████████████████████▌ | 765/782 [19:02<00:26,  1.53s/it]

Iter 765.0, Minibatch Loss= 0.860864, Training Accuracy= 0.60938


 98%|██████████████████████████████████████████████████████████████▋ | 766/782 [19:04<00:24,  1.55s/it]

Iter 766.0, Minibatch Loss= 0.913762, Training Accuracy= 0.60938


 98%|██████████████████████████████████████████████████████████████▊ | 767/782 [19:06<00:23,  1.56s/it]

Iter 767.0, Minibatch Loss= 1.126198, Training Accuracy= 0.60938


 98%|██████████████████████████████████████████████████████████████▊ | 768/782 [19:07<00:21,  1.56s/it]

Iter 768.0, Minibatch Loss= 0.932299, Training Accuracy= 0.60938


 98%|██████████████████████████████████████████████████████████████▉ | 769/782 [19:09<00:20,  1.57s/it]

Iter 769.0, Minibatch Loss= 0.908544, Training Accuracy= 0.60938


 98%|███████████████████████████████████████████████████████████████ | 770/782 [19:10<00:18,  1.58s/it]

Iter 770.0, Minibatch Loss= 0.965357, Training Accuracy= 0.60938


 99%|███████████████████████████████████████████████████████████████ | 771/782 [19:12<00:16,  1.53s/it]

Iter 771.0, Minibatch Loss= 0.951943, Training Accuracy= 0.60938


 99%|███████████████████████████████████████████████████████████████▏| 772/782 [19:13<00:14,  1.50s/it]

Iter 772.0, Minibatch Loss= 0.970611, Training Accuracy= 0.60938


 99%|███████████████████████████████████████████████████████████████▎| 773/782 [19:15<00:13,  1.48s/it]

Iter 773.0, Minibatch Loss= 0.927713, Training Accuracy= 0.60938


 99%|███████████████████████████████████████████████████████████████▎| 774/782 [19:16<00:11,  1.49s/it]

Iter 774.0, Minibatch Loss= 0.910762, Training Accuracy= 0.60938


 99%|███████████████████████████████████████████████████████████████▍| 775/782 [19:18<00:10,  1.48s/it]

Iter 775.0, Minibatch Loss= 0.898486, Training Accuracy= 0.60938


 99%|███████████████████████████████████████████████████████████████▌| 776/782 [19:19<00:08,  1.46s/it]

Iter 776.0, Minibatch Loss= 0.889450, Training Accuracy= 0.60938


 99%|███████████████████████████████████████████████████████████████▌| 777/782 [19:20<00:07,  1.43s/it]

Iter 777.0, Minibatch Loss= 0.881466, Training Accuracy= 0.60938


 99%|███████████████████████████████████████████████████████████████▋| 778/782 [19:22<00:05,  1.47s/it]

Iter 778.0, Minibatch Loss= 0.874486, Training Accuracy= 0.60938


100%|███████████████████████████████████████████████████████████████▊| 779/782 [19:23<00:04,  1.46s/it]

Iter 779.0, Minibatch Loss= 0.868597, Training Accuracy= 0.60938


100%|███████████████████████████████████████████████████████████████▊| 780/782 [19:25<00:02,  1.45s/it]

Iter 780.0, Minibatch Loss= 0.863849, Training Accuracy= 0.60938


100%|███████████████████████████████████████████████████████████████▉| 781/782 [19:26<00:01,  1.44s/it]

Iter 781.0, Minibatch Loss= 0.861124, Training Accuracy= 0.60938


100%|████████████████████████████████████████████████████████████████| 782/782 [19:28<00:00,  1.44s/it]


Iter 0.0, Minibatch Loss= 0.915699, Training Accuracy= 0.57031
Iter 1.0, Minibatch Loss= 0.908374, Training Accuracy= 0.57031
Iter 2.0, Minibatch Loss= 0.905173, Training Accuracy= 0.57031
Iter 3.0, Minibatch Loss= 0.903111, Training Accuracy= 0.57031
Iter 4.0, Minibatch Loss= 0.901555, Training Accuracy= 0.57031
Iter 5.0, Minibatch Loss= 0.900305, Training Accuracy= 0.57031
Iter 6.0, Minibatch Loss= 0.899256, Training Accuracy= 0.57031
Iter 7.0, Minibatch Loss= 0.898350, Training Accuracy= 0.57031
Iter 8.0, Minibatch Loss= 0.897546, Training Accuracy= 0.57031
Iter 9.0, Minibatch Loss= 0.896818, Training Accuracy= 0.57031
Iter 10.0, Minibatch Loss= 0.896148, Training Accuracy= 0.57031
Iter 11.0, Minibatch Loss= 0.895524, Training Accuracy= 0.57031
Iter 12.0, Minibatch Loss= 0.894934, Training Accuracy= 0.57031
Iter 13.0, Minibatch Loss= 0.894372, Training Accuracy= 0.57031
Iter 14.0, Minibatch Loss= 0.893832, Training Accuracy= 0.57031
Iter 15.0, Minibatch Loss= 0.893308, Training Accu

Iter 127.0, Minibatch Loss= 1.029056, Training Accuracy= 0.48438
Iter 128.0, Minibatch Loss= 1.023348, Training Accuracy= 0.49219
Iter 129.0, Minibatch Loss= 1.018996, Training Accuracy= 0.49219
Iter 130.0, Minibatch Loss= 1.015308, Training Accuracy= 0.48438
Iter 131.0, Minibatch Loss= 1.011995, Training Accuracy= 0.47656
Iter 132.0, Minibatch Loss= 1.008939, Training Accuracy= 0.48438
Iter 133.0, Minibatch Loss= 1.006101, Training Accuracy= 0.48438
Iter 134.0, Minibatch Loss= 1.003483, Training Accuracy= 0.48438
Iter 135.0, Minibatch Loss= 1.001102, Training Accuracy= 0.48438
Iter 136.0, Minibatch Loss= 0.998973, Training Accuracy= 0.48438
Iter 137.0, Minibatch Loss= 0.997097, Training Accuracy= 0.48438
Iter 138.0, Minibatch Loss= 0.995456, Training Accuracy= 0.48438
Iter 139.0, Minibatch Loss= 0.994018, Training Accuracy= 0.48438
Iter 140.0, Minibatch Loss= 0.992745, Training Accuracy= 0.48438
Iter 141.0, Minibatch Loss= 0.991600, Training Accuracy= 0.47656
Iter 142.0, Minibatch Los

Iter 253.0, Minibatch Loss= 0.954642, Training Accuracy= 0.56250
Iter 254.0, Minibatch Loss= 0.947538, Training Accuracy= 0.57031
Iter 255.0, Minibatch Loss= 0.945953, Training Accuracy= 0.57812
Iter 256.0, Minibatch Loss= 0.937868, Training Accuracy= 0.57812
Iter 257.0, Minibatch Loss= 0.934297, Training Accuracy= 0.57031
Iter 258.0, Minibatch Loss= 0.932745, Training Accuracy= 0.57031
Iter 259.0, Minibatch Loss= 0.938391, Training Accuracy= 0.56250
Iter 260.0, Minibatch Loss= 0.928496, Training Accuracy= 0.57031
Iter 261.0, Minibatch Loss= 0.922446, Training Accuracy= 0.57812
Iter 262.0, Minibatch Loss= 0.918089, Training Accuracy= 0.58594
Iter 263.0, Minibatch Loss= 0.916553, Training Accuracy= 0.58594
Iter 264.0, Minibatch Loss= 0.921140, Training Accuracy= 0.57031
Iter 265.0, Minibatch Loss= 0.944220, Training Accuracy= 0.56250
Iter 266.0, Minibatch Loss= 0.965737, Training Accuracy= 0.56250
Iter 267.0, Minibatch Loss= 1.007584, Training Accuracy= 0.57031
Iter 268.0, Minibatch Los

Iter 379.0, Minibatch Loss= 0.889180, Training Accuracy= 0.58594
Iter 380.0, Minibatch Loss= 0.887924, Training Accuracy= 0.56250
Iter 381.0, Minibatch Loss= 0.898520, Training Accuracy= 0.57031
Iter 382.0, Minibatch Loss= 0.924037, Training Accuracy= 0.52344
Iter 383.0, Minibatch Loss= 0.904000, Training Accuracy= 0.57031
Iter 384.0, Minibatch Loss= 0.905979, Training Accuracy= 0.56250
Iter 385.0, Minibatch Loss= 0.871655, Training Accuracy= 0.60156
Iter 386.0, Minibatch Loss= 0.851122, Training Accuracy= 0.61719
Iter 387.0, Minibatch Loss= 0.850756, Training Accuracy= 0.60156
Iter 388.0, Minibatch Loss= 0.885386, Training Accuracy= 0.58594
Iter 389.0, Minibatch Loss= 0.878409, Training Accuracy= 0.60156
Iter 390.0, Minibatch Loss= 0.886629, Training Accuracy= 0.57031
Iter 391.0, Minibatch Loss= 0.833139, Training Accuracy= 0.60156
Iter 392.0, Minibatch Loss= 0.809159, Training Accuracy= 0.59375
Iter 393.0, Minibatch Loss= 0.803524, Training Accuracy= 0.61719
Iter 394.0, Minibatch Los

Iter 505.0, Minibatch Loss= 0.851614, Training Accuracy= 0.57812
Iter 506.0, Minibatch Loss= 0.820522, Training Accuracy= 0.60938
Iter 507.0, Minibatch Loss= 0.855735, Training Accuracy= 0.51562
Iter 508.0, Minibatch Loss= 0.856953, Training Accuracy= 0.54688
Iter 509.0, Minibatch Loss= 0.809030, Training Accuracy= 0.63281
Iter 510.0, Minibatch Loss= 0.801813, Training Accuracy= 0.60938
Iter 511.0, Minibatch Loss= 0.826148, Training Accuracy= 0.57812
Iter 512.0, Minibatch Loss= 0.839442, Training Accuracy= 0.58594
Iter 513.0, Minibatch Loss= 0.855465, Training Accuracy= 0.57812
Iter 514.0, Minibatch Loss= 0.825581, Training Accuracy= 0.58594
Iter 515.0, Minibatch Loss= 0.806172, Training Accuracy= 0.59375
Iter 516.0, Minibatch Loss= 0.811119, Training Accuracy= 0.56250
Iter 517.0, Minibatch Loss= 0.971247, Training Accuracy= 0.54688
Iter 518.0, Minibatch Loss= 0.820447, Training Accuracy= 0.61719
Iter 519.0, Minibatch Loss= 0.771817, Training Accuracy= 0.64062
Iter 520.0, Minibatch Los

Iter 631.0, Minibatch Loss= 0.750758, Training Accuracy= 0.65625
Iter 632.0, Minibatch Loss= 0.790077, Training Accuracy= 0.57812
Iter 633.0, Minibatch Loss= 0.713049, Training Accuracy= 0.67969
Iter 634.0, Minibatch Loss= 0.706872, Training Accuracy= 0.65625
Iter 635.0, Minibatch Loss= 0.708455, Training Accuracy= 0.68750
Iter 636.0, Minibatch Loss= 0.673829, Training Accuracy= 0.65625
Iter 637.0, Minibatch Loss= 0.681362, Training Accuracy= 0.68750
Iter 638.0, Minibatch Loss= 0.645356, Training Accuracy= 0.71094
Iter 639.0, Minibatch Loss= 0.735993, Training Accuracy= 0.67969
Iter 640.0, Minibatch Loss= 0.730518, Training Accuracy= 0.61719
Iter 641.0, Minibatch Loss= 0.767863, Training Accuracy= 0.67969
Iter 642.0, Minibatch Loss= 0.864935, Training Accuracy= 0.55469
Iter 643.0, Minibatch Loss= 0.713938, Training Accuracy= 0.67969
Iter 644.0, Minibatch Loss= 0.627138, Training Accuracy= 0.75000
Iter 645.0, Minibatch Loss= 0.593488, Training Accuracy= 0.75000
Iter 646.0, Minibatch Los

Iter 757.0, Minibatch Loss= 0.906079, Training Accuracy= 0.53906
Iter 758.0, Minibatch Loss= 0.808171, Training Accuracy= 0.54688
Iter 759.0, Minibatch Loss= 0.690227, Training Accuracy= 0.72656
Iter 760.0, Minibatch Loss= 0.762894, Training Accuracy= 0.62500
Iter 761.0, Minibatch Loss= 1.052269, Training Accuracy= 0.46875
Iter 762.0, Minibatch Loss= 0.939218, Training Accuracy= 0.53906
Iter 763.0, Minibatch Loss= 0.872500, Training Accuracy= 0.53125
Iter 764.0, Minibatch Loss= 0.855548, Training Accuracy= 0.57031
Iter 765.0, Minibatch Loss= 0.785335, Training Accuracy= 0.62500
Iter 766.0, Minibatch Loss= 0.847261, Training Accuracy= 0.64062
Iter 767.0, Minibatch Loss= 0.752982, Training Accuracy= 0.67969
Iter 768.0, Minibatch Loss= 0.716298, Training Accuracy= 0.71875
Iter 769.0, Minibatch Loss= 0.786572, Training Accuracy= 0.64844
Iter 770.0, Minibatch Loss= 0.826678, Training Accuracy= 0.60938
Iter 771.0, Minibatch Loss= 0.956352, Training Accuracy= 0.44531
Iter 772.0, Minibatch Los

Iter 102.0, Minibatch Loss= 0.863691, Training Accuracy= 0.54688
Iter 103.0, Minibatch Loss= 0.689754, Training Accuracy= 0.66406
Iter 104.0, Minibatch Loss= 0.551138, Training Accuracy= 0.73438
Iter 105.0, Minibatch Loss= 0.525222, Training Accuracy= 0.80469
Iter 106.0, Minibatch Loss= 0.533901, Training Accuracy= 0.76562
Iter 107.0, Minibatch Loss= 0.641196, Training Accuracy= 0.77344
Iter 108.0, Minibatch Loss= 0.622623, Training Accuracy= 0.75000
Iter 109.0, Minibatch Loss= 0.778973, Training Accuracy= 0.64844
Iter 110.0, Minibatch Loss= 0.848506, Training Accuracy= 0.55469
Iter 111.0, Minibatch Loss= 0.754497, Training Accuracy= 0.65625
Iter 112.0, Minibatch Loss= 0.789867, Training Accuracy= 0.70312
Iter 113.0, Minibatch Loss= 0.785745, Training Accuracy= 0.67969
Iter 114.0, Minibatch Loss= 0.828806, Training Accuracy= 0.68750
Iter 115.0, Minibatch Loss= 0.836599, Training Accuracy= 0.62500
Iter 116.0, Minibatch Loss= 0.808418, Training Accuracy= 0.73438
Iter 117.0, Minibatch Los

Iter 228.0, Minibatch Loss= 0.067442, Training Accuracy= 0.99219
Iter 229.0, Minibatch Loss= 0.060960, Training Accuracy= 0.99219
Iter 230.0, Minibatch Loss= 0.056558, Training Accuracy= 0.99219
Iter 231.0, Minibatch Loss= 0.052922, Training Accuracy= 0.99219
Iter 232.0, Minibatch Loss= 0.049785, Training Accuracy= 0.99219
Iter 233.0, Minibatch Loss= 0.046909, Training Accuracy= 0.99219
Iter 234.0, Minibatch Loss= 0.044161, Training Accuracy= 0.99219
Iter 235.0, Minibatch Loss= 0.041413, Training Accuracy= 0.99219
Iter 236.0, Minibatch Loss= 0.038497, Training Accuracy= 0.99219
Iter 237.0, Minibatch Loss= 0.035181, Training Accuracy= 0.99219
Iter 238.0, Minibatch Loss= 0.031335, Training Accuracy= 0.99219
Iter 239.0, Minibatch Loss= 0.027438, Training Accuracy= 1.00000
Iter 240.0, Minibatch Loss= 0.024088, Training Accuracy= 1.00000
Iter 241.0, Minibatch Loss= 0.021374, Training Accuracy= 1.00000
Iter 242.0, Minibatch Loss= 0.019246, Training Accuracy= 1.00000
Iter 243.0, Minibatch Los

Iter 354.0, Minibatch Loss= 0.930155, Training Accuracy= 0.57812
Iter 355.0, Minibatch Loss= 0.935706, Training Accuracy= 0.58594
Iter 356.0, Minibatch Loss= 0.923116, Training Accuracy= 0.57031
Iter 357.0, Minibatch Loss= 0.942102, Training Accuracy= 0.58594
Iter 358.0, Minibatch Loss= 0.941677, Training Accuracy= 0.52344
Iter 359.0, Minibatch Loss= 0.969321, Training Accuracy= 0.58594
Iter 360.0, Minibatch Loss= 0.943069, Training Accuracy= 0.54688
Iter 361.0, Minibatch Loss= 0.931666, Training Accuracy= 0.58594
Iter 362.0, Minibatch Loss= 0.907328, Training Accuracy= 0.57031
Iter 363.0, Minibatch Loss= 0.911445, Training Accuracy= 0.58594
Iter 364.0, Minibatch Loss= 0.900997, Training Accuracy= 0.57812
Iter 365.0, Minibatch Loss= 0.918441, Training Accuracy= 0.58594
Iter 366.0, Minibatch Loss= 0.917092, Training Accuracy= 0.53906
Iter 367.0, Minibatch Loss= 0.961588, Training Accuracy= 0.58594
Iter 368.0, Minibatch Loss= 0.949750, Training Accuracy= 0.56250
Iter 369.0, Minibatch Los

Iter 480.0, Minibatch Loss= 0.803023, Training Accuracy= 0.58594
Iter 481.0, Minibatch Loss= 0.826462, Training Accuracy= 0.54688
Iter 482.0, Minibatch Loss= 0.919301, Training Accuracy= 0.57031
Iter 483.0, Minibatch Loss= 1.304534, Training Accuracy= 0.45312
Iter 484.0, Minibatch Loss= 0.903109, Training Accuracy= 0.52344
Iter 485.0, Minibatch Loss= 0.859716, Training Accuracy= 0.54688
Iter 486.0, Minibatch Loss= 0.818197, Training Accuracy= 0.54688
Iter 487.0, Minibatch Loss= 0.784455, Training Accuracy= 0.55469
Iter 488.0, Minibatch Loss= 0.776278, Training Accuracy= 0.55469
Iter 489.0, Minibatch Loss= 0.837015, Training Accuracy= 0.54688
Iter 490.0, Minibatch Loss= 1.052018, Training Accuracy= 0.53125
Iter 491.0, Minibatch Loss= 1.022600, Training Accuracy= 0.50781
Iter 492.0, Minibatch Loss= 0.964819, Training Accuracy= 0.57031
Iter 493.0, Minibatch Loss= 0.840342, Training Accuracy= 0.48438
Iter 494.0, Minibatch Loss= 0.796339, Training Accuracy= 0.57031
Iter 495.0, Minibatch Los

Iter 606.0, Minibatch Loss= 0.997203, Training Accuracy= 0.45312
Iter 607.0, Minibatch Loss= 1.030998, Training Accuracy= 0.54688
Iter 608.0, Minibatch Loss= 0.980409, Training Accuracy= 0.43750
Iter 609.0, Minibatch Loss= 0.993240, Training Accuracy= 0.54688
Iter 610.0, Minibatch Loss= 0.930692, Training Accuracy= 0.43750
Iter 611.0, Minibatch Loss= 0.945407, Training Accuracy= 0.55469
Iter 612.0, Minibatch Loss= 0.888636, Training Accuracy= 0.53906
Iter 613.0, Minibatch Loss= 0.941746, Training Accuracy= 0.55469
Iter 614.0, Minibatch Loss= 0.818176, Training Accuracy= 0.55469
Iter 615.0, Minibatch Loss= 0.861705, Training Accuracy= 0.56250
Iter 616.0, Minibatch Loss= 0.725093, Training Accuracy= 0.62500
Iter 617.0, Minibatch Loss= 0.719015, Training Accuracy= 0.56250
Iter 618.0, Minibatch Loss= 0.725397, Training Accuracy= 0.58594
Iter 619.0, Minibatch Loss= 0.719792, Training Accuracy= 0.57031
Iter 620.0, Minibatch Loss= 0.774898, Training Accuracy= 0.59375
Iter 621.0, Minibatch Los

### Developing

In [188]:
# import numpy as np
# accs = [] # 128
# batches = [128, 64, 32, 1, 256]
# for batch in batches:
#     batch_size = batch
#     print("dev batch %s" % str(batch))
#     accs.append(test("data/dev.txt"))

# print(accs)
# print("Best batch size %s" % str(batches[np.argmax(accs)]))


### Testing

In [None]:
def test(file_test="data/test_labeled.txt"):
    data_feature_list, correct_values, correct_scores = split_data_into_scores(file_test)

    #hyps, evis, ys 
    data = (data_feature_list[0],
                      data_feature_list[1],
                      correct_scores)
    # predictions = []
    correct_predictions = 0
    total_predictions = 0
    for i, f1 in enumerate(data_feature_list[0]):
        hyps, evis, ys = data_feature_list[0][i], data_feature_list[1][i], correct_scores[i]
        prediction = sess.run(classification_scores, feed_dict={hyp: ([hyps] * N), evi: ([evis] * N), y: ([ys] * N)})
    #     predictions.append(prediction)
        total_predictions += 1
        if np.argmax(prediction[0])==np.argmax(ys):
            correct_predictions += 1
    #     if total_predictions % 500 == 0:
    #         print(total_predictions)
    #     print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
    #       " / " + ["Positive", "Neutral", "Negative"][np.argmax(ys[i])])
    print(total_predictions)

    # acc = sess.run(accuracy)#, feed_dict={'label': correct_scores})
    acc = correct_predictions*100/total_predictions
    print("Acc: %s" % str(acc))    
# print("Acc: %s" % str(acc))

import time
start = time.time()
test()
stop = time.time()
mins = int((stop-start)/60.0)
print("Training took %s:%s" % (str(mins), str(stop - start - mins*60)))

In [190]:
sess.close()
#56.52