In [10]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
import shutil
import pandas as pd
from torch.utils.data.dataset import Dataset
from nltk.tokenize import sent_tokenize, word_tokenize
import numpy as np
import pandas as pd
import sys
import csv
csv.field_size_limit(1000000)
from sklearn import metrics
import nltk
# nltk.download('punkt')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

In [11]:
class MyDataset(Dataset):

    def __init__(self, data_path, dict_path, max_length_sentences=30, max_length_word=35):
        super(MyDataset, self).__init__()

        texts, labels = [], []
        with open(data_path) as csv_file:
            reader = csv.reader(csv_file, quotechar='"')
            for idx, line in enumerate(reader):
                text = ""
                for tx in line[1:]:
                    text += tx.lower()
                    text += " "
                label = int(line[0]) - 1
                texts.append(text)
                labels.append(label)

        self.texts = texts
        self.labels = labels
        self.dict = pd.read_csv(filepath_or_buffer=dict_path, header=None, sep=" ", quoting=csv.QUOTE_NONE,
                                usecols=[0]).values 
        
        self.dict = [word[0] for word in self.dict]
        self.max_length_sentences = max_length_sentences
        self.max_length_word = max_length_word
        self.num_classes = len(set(self.labels))

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        label = self.labels[index]
        text = self.texts[index]
        document_encode = [
            [self.dict.index(word) if word in self.dict else -1 for word in word_tokenize(text=sentences)] for sentences
            in
            sent_tokenize(text=text)]

        for sentences in document_encode:
            if len(sentences) < self.max_length_word:
                extended_words = [-1 for _ in range(self.max_length_word - len(sentences))]
                sentences.extend(extended_words)

        if len(document_encode) < self.max_length_sentences:
            extended_sentences = [[-1 for _ in range(self.max_length_word)] for _ in
                                  range(self.max_length_sentences - len(document_encode))]
            document_encode.extend(extended_sentences)

        document_encode = [sentences[:self.max_length_word] for sentences in document_encode][
                          :self.max_length_sentences]

        document_encode = np.stack(arrays=document_encode, axis=0)
        document_encode += 1

        return document_encode.astype(np.int64), label

test = MyDataset(data_path="data/train.csv", dict_path="glove.6B.50d.txt")
print (test.__getitem__(index=1)[0].shape)
print(test.__len__())

(30, 35)
120000


In [21]:
def get_evaluation(y_true, y_prob, list_metrics):
    y_pred = np.argmax(y_prob, -1)
    output = {}
    if 'accuracy' in list_metrics:
        output['accuracy'] = metrics.accuracy_score(y_true, y_pred)
    if 'loss' in list_metrics:
        try:
            output['loss'] = metrics.log_loss(y_true, y_prob)
        except ValueError:
            output['loss'] = -1
    if 'confusion_matrix' in list_metrics:
        output['confusion_matrix'] = str(metrics.confusion_matrix(y_true, y_pred))
    return output

def matrix_mul(input, weight, bias=False):
    feature_list = []
    for feature in input:
        feature = torch.mm(feature, weight)
        if isinstance(bias, torch.nn.parameter.Parameter):
            feature = feature + bias.expand(feature.size()[0], bias.size()[1])
        feature = torch.tanh(feature).unsqueeze(0)
        feature_list.append(feature)

    return torch.cat(feature_list, 0).squeeze()

def element_wise_mul(input1, input2):

    feature_list = []
    for feature_1, feature_2 in zip(input1, input2):
        feature_2 = feature_2.unsqueeze(1).expand_as(feature_1)
        feature = feature_1 * feature_2
        feature_list.append(feature.unsqueeze(0))
    output = torch.cat(feature_list, 0)

    return torch.sum(output, 0).unsqueeze(0)

def get_max_lengths(data_path):
    word_length_list = []
    sent_length_list = []
    with open(data_path) as csv_file:
        reader = csv.reader(csv_file, quotechar='"')
        for idx, line in enumerate(reader):
            text = ""
            for tx in line[1:]:
                text += tx.lower()
                text += " "
            sent_list = sent_tokenize(text)
            sent_length_list.append(len(sent_list))

            for sent in sent_list:
                word_list = word_tokenize(sent)
                word_length_list.append(len(word_list))

        sorted_word_length = sorted(word_length_list)
        sorted_sent_length = sorted(sent_length_list)

    return sorted_word_length[int(0.8*len(sorted_word_length))], sorted_sent_length[int(0.8*len(sorted_sent_length))]

word, sent = get_max_lengths("data/test.csv")
print (word)
print (sent)

46
2


In [20]:
class WordAttNet(nn.Module):
    def __init__(self, word2vec_path, hidden_size=50):
        super(WordAttNet, self).__init__()
        dict = pd.read_csv(filepath_or_buffer=word2vec_path, header=None, sep=" ", quoting=csv.QUOTE_NONE).values[:, 1:]
        dict_len, embed_size = dict.shape
        dict_len += 1
        unknown_word = np.zeros((1, embed_size))
        dict = torch.from_numpy(np.concatenate([unknown_word, dict], axis=0).astype(np.float64))


        self.word_weight = nn.Parameter(torch.Tensor(2 * hidden_size, 2 * hidden_size))
        self.word_bias = nn.Parameter(torch.Tensor(1, 2 * hidden_size))
        self.context_weight = nn.Parameter(torch.Tensor(2 * hidden_size, 1))

        self.lookup = nn.Embedding(num_embeddings=dict_len, embedding_dim=embed_size).from_pretrained(dict)
        self.gru = nn.GRU(embed_size, hidden_size, bidirectional=True)
        self._create_weights(mean=0.0, std=0.05)

    def _create_weights(self, mean=0.0, std=0.05):

        self.word_weight.data.normal_(mean, std)
        self.context_weight.data.normal_(mean, std)

    def forward(self, input, hidden_state):

        output = self.lookup(input)
        f_output, h_output = self.gru(output.float(), hidden_state)  # feature output and hidden state output
        output = matrix_mul(f_output, self.word_weight, self.word_bias)
        output = matrix_mul(output, self.context_weight).permute(1,0)
        output = F.softmax(output)
        output = element_wise_mul(f_output,output.permute(1,0))

        return output, h_output

abc = WordAttNet(word2vec_path="glove.6B.50d.txt")

In [19]:
class SentAttNet(nn.Module):
    def __init__(self, sent_hidden_size=50, word_hidden_size=50, num_classes=14):
        super(SentAttNet, self).__init__()

        self.sent_weight = nn.Parameter(torch.Tensor(2 * sent_hidden_size, 2 * sent_hidden_size))
        self.sent_bias = nn.Parameter(torch.Tensor(1, 2 * sent_hidden_size))
        self.context_weight = nn.Parameter(torch.Tensor(2 * sent_hidden_size, 1))

        self.gru = nn.GRU(2 * word_hidden_size, sent_hidden_size, bidirectional=True)
        self.fc = nn.Linear(2 * sent_hidden_size, num_classes)
        # self.sent_softmax = nn.Softmax()
        # self.fc_softmax = nn.Softmax()
        self._create_weights(mean=0.0, std=0.05)

    def _create_weights(self, mean=0.0, std=0.05):
        self.sent_weight.data.normal_(mean, std)
        self.context_weight.data.normal_(mean, std)

    def forward(self, input, hidden_state):

        f_output, h_output = self.gru(input, hidden_state)
        output = matrix_mul(f_output, self.sent_weight, self.sent_bias)
        output = matrix_mul(output, self.context_weight).permute(1, 0)
        output = F.softmax(output)
        output = element_wise_mul(f_output, output.permute(1, 0)).squeeze(0)
        output = self.fc(output)

        return output, h_output

abc = SentAttNet()
abc


SentAttNet(
  (gru): GRU(100, 50, bidirectional=True)
  (fc): Linear(in_features=100, out_features=14, bias=True)
)

In [18]:
class HierAttNet(nn.Module):
    def __init__(self, word_hidden_size, sent_hidden_size, batch_size, num_classes, pretrained_word2vec_path,
                 max_sent_length, max_word_length):
        super(HierAttNet, self).__init__()
        self.batch_size = batch_size
        self.word_hidden_size = word_hidden_size
        self.sent_hidden_size = sent_hidden_size
        self.max_sent_length = max_sent_length
        self.max_word_length = max_word_length
        self.word_att_net = WordAttNet(pretrained_word2vec_path, word_hidden_size)
        self.sent_att_net = SentAttNet(sent_hidden_size, word_hidden_size, num_classes)
        self._init_hidden_state()

    def _init_hidden_state(self, last_batch_size=None):
        if last_batch_size:
            batch_size = last_batch_size
        else:
            batch_size = self.batch_size
        self.word_hidden_state = torch.zeros(2, batch_size, self.word_hidden_size)
        self.sent_hidden_state = torch.zeros(2, batch_size, self.sent_hidden_size)
        if torch.cuda.is_available():
            self.word_hidden_state = self.word_hidden_state.cuda()
            self.sent_hidden_state = self.sent_hidden_state.cuda()

    def forward(self, input):

        output_list = []
        input = input.permute(1, 0, 2)
        for i in input:
            output, self.word_hidden_state = self.word_att_net(i.permute(1, 0), self.word_hidden_state)
            output_list.append(output)
        output = torch.cat(output_list, 0)
        output, self.sent_hidden_state = self.sent_att_net(output, self.sent_hidden_state)

        return output


In [16]:
class Args:
    def __init__(self):
        self.batch_size = 128
        self.num_epoches = 2
        self.lr = 0.1
        self.momentum = 0.9
        self.word_hidden_size = 50
        self.sent_hidden_size = 50
        self.es_min_delta = 0.0
        self.es_patience = 5
        self.train_set = "data/train.csv"
        self.test_set = "data/test.csv"
        self.test_interval = 1
        self.word2vec_path = "glove.6B.50d.txt"
        self.log_path = "tensorboard/han_voc"
        self.saved_path = "trained_models"
        
def train(opt):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
    if not os.path.exists(opt.saved_path):
        os.makedirs(opt.saved_path)
    output_file = open(opt.saved_path + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    training_params = {"batch_size": opt.batch_size,
                       "shuffle": True,
                       "drop_last": True}
    test_params = {"batch_size": opt.batch_size,
                   "shuffle": False,
                   "drop_last": False}

    max_word_length, max_sent_length = get_max_lengths(opt.train_set)
    training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length, max_word_length)
    training_generator = DataLoader(training_set, **training_params)
    test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length, max_word_length)
    test_generator = DataLoader(test_set, **test_params)

    model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, training_set.num_classes,
                       opt.word2vec_path, max_sent_length, max_word_length)


    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)
    writer = SummaryWriter(opt.log_path)
    # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length))

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, momentum=opt.momentum)
    best_loss = 1e5
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epoches):
        for iter, (feature, label) in enumerate(training_generator):
            if torch.cuda.is_available():
                feature = feature.cuda()
                label = label.cuda()
            optimizer.zero_grad()
            model._init_hidden_state()
            predictions = model(feature)
            loss = criterion(predictions, label)
            loss.backward()
            optimizer.step()
            training_metrics = get_evaluation(label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"])
            print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1,
                opt.num_epoches,
                iter + 1,
                num_iter_per_epoch,
                optimizer.param_groups[0]['lr'],
                loss, training_metrics["accuracy"]))
            writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter)
            writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter)
        if epoch % opt.test_interval == 0:
            model.eval()
            loss_ls = []
            te_label_ls = []
            te_pred_ls = []
            for te_feature, te_label in test_generator:
                num_sample = len(te_label)
                if torch.cuda.is_available():
                    te_feature = te_feature.cuda()
                    te_label = te_label.cuda()
                with torch.no_grad():
                    model._init_hidden_state(num_sample)
                    te_predictions = model(te_feature)
                te_loss = criterion(te_predictions, te_label)
                loss_ls.append(te_loss * num_sample)
                te_label_ls.extend(te_label.clone().cpu())
                te_pred_ls.append(te_predictions.clone().cpu())
            te_loss = sum(loss_ls) / test_set.__len__()
            te_pred = torch.cat(te_pred_ls, 0)
            te_label = np.array(te_label_ls)
            test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"])
            output_file.write(
                "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format(
                    epoch + 1, opt.num_epoches,
                    te_loss,
                    test_metrics["accuracy"],
                    test_metrics["confusion_matrix"]))
            print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1,
                opt.num_epoches,
                optimizer.param_groups[0]['lr'],
                te_loss, test_metrics["accuracy"]))
            writer.add_scalar('Test/Loss', te_loss, epoch)
            writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
            model.train()
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch
                torch.save(model, opt.saved_path + os.sep + "whole_model_han")

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print("Stop training at epoch {}. The lowest loss achieved is {}".format(epoch, te_loss))
                break

In [22]:
opt = Args()
train(opt)

  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 1/937, Lr: 0.1, Loss: 1.3807423114776611, Accuracy: 0.265625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 2/937, Lr: 0.1, Loss: 1.383796215057373, Accuracy: 0.171875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 3/937, Lr: 0.1, Loss: 1.3800508975982666, Accuracy: 0.203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 4/937, Lr: 0.1, Loss: 1.3835102319717407, Accuracy: 0.203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 5/937, Lr: 0.1, Loss: 1.3816704750061035, Accuracy: 0.2734375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 6/937, Lr: 0.1, Loss: 1.3835817575454712, Accuracy: 0.2734375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 7/937, Lr: 0.1, Loss: 1.3912078142166138, Accuracy: 0.3828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 8/937, Lr: 0.1, Loss: 1.3846322298049927, Accuracy: 0.46875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 9/937, Lr: 0.1, Loss: 1.3873422145843506, Accuracy: 0.390625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 10/937, Lr: 0.1, Loss: 1.3650561571121216, Accuracy: 0.5546875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 11/937, Lr: 0.1, Loss: 1.3698673248291016, Accuracy: 0.578125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 12/937, Lr: 0.1, Loss: 1.3617451190948486, Accuracy: 0.6484375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 13/937, Lr: 0.1, Loss: 1.3586045503616333, Accuracy: 0.625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 14/937, Lr: 0.1, Loss: 1.3596137762069702, Accuracy: 0.4140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 15/937, Lr: 0.1, Loss: 1.3571399450302124, Accuracy: 0.390625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 16/937, Lr: 0.1, Loss: 1.3506348133087158, Accuracy: 0.421875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 17/937, Lr: 0.1, Loss: 1.3452259302139282, Accuracy: 0.3828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 18/937, Lr: 0.1, Loss: 1.324944019317627, Accuracy: 0.6796875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 19/937, Lr: 0.1, Loss: 1.3205273151397705, Accuracy: 0.5703125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 20/937, Lr: 0.1, Loss: 1.3182909488677979, Accuracy: 0.4765625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 21/937, Lr: 0.1, Loss: 1.340577244758606, Accuracy: 0.375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 22/937, Lr: 0.1, Loss: 1.3327375650405884, Accuracy: 0.421875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 23/937, Lr: 0.1, Loss: 1.335890293121338, Accuracy: 0.375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 24/937, Lr: 0.1, Loss: 1.2811644077301025, Accuracy: 0.6328125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 25/937, Lr: 0.1, Loss: 1.279802918434143, Accuracy: 0.625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 26/937, Lr: 0.1, Loss: 1.2618579864501953, Accuracy: 0.5390625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 27/937, Lr: 0.1, Loss: 1.275402545928955, Accuracy: 0.3671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 28/937, Lr: 0.1, Loss: 1.2766588926315308, Accuracy: 0.3984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 29/937, Lr: 0.1, Loss: 1.2429863214492798, Accuracy: 0.4140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 30/937, Lr: 0.1, Loss: 1.1934897899627686, Accuracy: 0.484375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 31/937, Lr: 0.1, Loss: 1.1571673154830933, Accuracy: 0.5078125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 32/937, Lr: 0.1, Loss: 1.1593118906021118, Accuracy: 0.5


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 33/937, Lr: 0.1, Loss: 1.1917574405670166, Accuracy: 0.4296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 34/937, Lr: 0.1, Loss: 1.0665762424468994, Accuracy: 0.6640625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 35/937, Lr: 0.1, Loss: 1.0747382640838623, Accuracy: 0.7265625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 36/937, Lr: 0.1, Loss: 1.0344793796539307, Accuracy: 0.6796875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 37/937, Lr: 0.1, Loss: 1.0021333694458008, Accuracy: 0.7578125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 38/937, Lr: 0.1, Loss: 1.0064575672149658, Accuracy: 0.71875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 39/937, Lr: 0.1, Loss: 0.9666009545326233, Accuracy: 0.6640625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 40/937, Lr: 0.1, Loss: 0.8448064923286438, Accuracy: 0.7421875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 41/937, Lr: 0.1, Loss: 0.8492686152458191, Accuracy: 0.7578125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 42/937, Lr: 0.1, Loss: 0.8176388144493103, Accuracy: 0.8046875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 43/937, Lr: 0.1, Loss: 0.8024977445602417, Accuracy: 0.7890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 44/937, Lr: 0.1, Loss: 0.7898409366607666, Accuracy: 0.7890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 45/937, Lr: 0.1, Loss: 0.8008730411529541, Accuracy: 0.7578125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 46/937, Lr: 0.1, Loss: 0.7019920945167542, Accuracy: 0.765625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 47/937, Lr: 0.1, Loss: 0.6123340725898743, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 48/937, Lr: 0.1, Loss: 0.6553052067756653, Accuracy: 0.796875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 49/937, Lr: 0.1, Loss: 0.6607319116592407, Accuracy: 0.6953125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 50/937, Lr: 0.1, Loss: 0.6516669392585754, Accuracy: 0.7578125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 51/937, Lr: 0.1, Loss: 0.632931113243103, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 52/937, Lr: 0.1, Loss: 0.6840794682502747, Accuracy: 0.78125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 53/937, Lr: 0.1, Loss: 0.651389479637146, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 54/937, Lr: 0.1, Loss: 0.5763641595840454, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 55/937, Lr: 0.1, Loss: 0.6334599256515503, Accuracy: 0.796875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 56/937, Lr: 0.1, Loss: 0.6881358623504639, Accuracy: 0.7734375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 57/937, Lr: 0.1, Loss: 0.785292387008667, Accuracy: 0.7265625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 58/937, Lr: 0.1, Loss: 0.5249456763267517, Accuracy: 0.796875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 59/937, Lr: 0.1, Loss: 0.501720666885376, Accuracy: 0.796875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 60/937, Lr: 0.1, Loss: 0.8114646673202515, Accuracy: 0.7265625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 61/937, Lr: 0.1, Loss: 0.47918859124183655, Accuracy: 0.8046875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 62/937, Lr: 0.1, Loss: 0.5888106226921082, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 63/937, Lr: 0.1, Loss: 0.822724461555481, Accuracy: 0.703125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 64/937, Lr: 0.1, Loss: 0.4783383011817932, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 65/937, Lr: 0.1, Loss: 0.7648991346359253, Accuracy: 0.7265625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 66/937, Lr: 0.1, Loss: 0.5941094160079956, Accuracy: 0.7578125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 67/937, Lr: 0.1, Loss: 0.48959898948669434, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 68/937, Lr: 0.1, Loss: 0.4288776218891144, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 69/937, Lr: 0.1, Loss: 0.606497049331665, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 70/937, Lr: 0.1, Loss: 0.554332971572876, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 71/937, Lr: 0.1, Loss: 0.5276173949241638, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 72/937, Lr: 0.1, Loss: 0.49781590700149536, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 73/937, Lr: 0.1, Loss: 0.675266683101654, Accuracy: 0.7578125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 74/937, Lr: 0.1, Loss: 0.4580564498901367, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 75/937, Lr: 0.1, Loss: 0.581664502620697, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 76/937, Lr: 0.1, Loss: 0.47868701815605164, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 77/937, Lr: 0.1, Loss: 0.41051220893859863, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 78/937, Lr: 0.1, Loss: 0.6471154093742371, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 79/937, Lr: 0.1, Loss: 0.43339017033576965, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 80/937, Lr: 0.1, Loss: 0.41115081310272217, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 81/937, Lr: 0.1, Loss: 0.4321819543838501, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 82/937, Lr: 0.1, Loss: 0.5219860076904297, Accuracy: 0.796875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 83/937, Lr: 0.1, Loss: 0.34855562448501587, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 84/937, Lr: 0.1, Loss: 0.4456726312637329, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 85/937, Lr: 0.1, Loss: 0.41489511728286743, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 86/937, Lr: 0.1, Loss: 0.4104601740837097, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 87/937, Lr: 0.1, Loss: 0.5229226350784302, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 88/937, Lr: 0.1, Loss: 0.4402884244918823, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 89/937, Lr: 0.1, Loss: 0.424992173910141, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 90/937, Lr: 0.1, Loss: 0.40347954630851746, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 91/937, Lr: 0.1, Loss: 0.5752367973327637, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 92/937, Lr: 0.1, Loss: 0.3703652322292328, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 93/937, Lr: 0.1, Loss: 0.35168004035949707, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 94/937, Lr: 0.1, Loss: 0.3660736382007599, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 95/937, Lr: 0.1, Loss: 0.47848257422447205, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 96/937, Lr: 0.1, Loss: 0.4124130606651306, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 97/937, Lr: 0.1, Loss: 0.2845352292060852, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 98/937, Lr: 0.1, Loss: 0.4454435110092163, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 99/937, Lr: 0.1, Loss: 0.47667983174324036, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 100/937, Lr: 0.1, Loss: 0.43237870931625366, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 101/937, Lr: 0.1, Loss: 0.4559752345085144, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 102/937, Lr: 0.1, Loss: 0.33308708667755127, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 103/937, Lr: 0.1, Loss: 0.5270054340362549, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 104/937, Lr: 0.1, Loss: 0.376537024974823, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 105/937, Lr: 0.1, Loss: 0.4162393808364868, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 106/937, Lr: 0.1, Loss: 0.3439039885997772, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 107/937, Lr: 0.1, Loss: 0.33009016513824463, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 108/937, Lr: 0.1, Loss: 0.38545894622802734, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 109/937, Lr: 0.1, Loss: 0.45623651146888733, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 110/937, Lr: 0.1, Loss: 0.39795222878456116, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 111/937, Lr: 0.1, Loss: 0.41036269068717957, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 112/937, Lr: 0.1, Loss: 0.39458271861076355, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 113/937, Lr: 0.1, Loss: 0.5209605693817139, Accuracy: 0.8046875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 114/937, Lr: 0.1, Loss: 0.5262248516082764, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 115/937, Lr: 0.1, Loss: 0.49977830052375793, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 116/937, Lr: 0.1, Loss: 0.5848079323768616, Accuracy: 0.7890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 117/937, Lr: 0.1, Loss: 0.4846966564655304, Accuracy: 0.78125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 118/937, Lr: 0.1, Loss: 0.453421950340271, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 119/937, Lr: 0.1, Loss: 0.5186865329742432, Accuracy: 0.8046875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 120/937, Lr: 0.1, Loss: 0.41243669390678406, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 121/937, Lr: 0.1, Loss: 0.44287773966789246, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 122/937, Lr: 0.1, Loss: 0.4357535243034363, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 123/937, Lr: 0.1, Loss: 0.4379569888114929, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 124/937, Lr: 0.1, Loss: 0.41138190031051636, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 125/937, Lr: 0.1, Loss: 0.3982745110988617, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 126/937, Lr: 0.1, Loss: 0.4389682710170746, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 127/937, Lr: 0.1, Loss: 0.30126824975013733, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 128/937, Lr: 0.1, Loss: 0.34007754921913147, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 129/937, Lr: 0.1, Loss: 0.43859991431236267, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 130/937, Lr: 0.1, Loss: 0.43633151054382324, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 131/937, Lr: 0.1, Loss: 0.3851814270019531, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 132/937, Lr: 0.1, Loss: 0.33431529998779297, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 133/937, Lr: 0.1, Loss: 0.5151264071464539, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 134/937, Lr: 0.1, Loss: 0.3619084060192108, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 135/937, Lr: 0.1, Loss: 0.35914498567581177, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 136/937, Lr: 0.1, Loss: 0.39190909266471863, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 137/937, Lr: 0.1, Loss: 0.3101038932800293, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 138/937, Lr: 0.1, Loss: 0.28834930062294006, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 139/937, Lr: 0.1, Loss: 0.2960008680820465, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 140/937, Lr: 0.1, Loss: 0.24056094884872437, Accuracy: 0.9453125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 141/937, Lr: 0.1, Loss: 0.31220582127571106, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 142/937, Lr: 0.1, Loss: 0.41006162762641907, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 143/937, Lr: 0.1, Loss: 0.34802407026290894, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 144/937, Lr: 0.1, Loss: 0.3978712856769562, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 145/937, Lr: 0.1, Loss: 0.4245297610759735, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 146/937, Lr: 0.1, Loss: 0.37236475944519043, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 147/937, Lr: 0.1, Loss: 0.4351884722709656, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 148/937, Lr: 0.1, Loss: 0.41372615098953247, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 149/937, Lr: 0.1, Loss: 0.3340732753276825, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 150/937, Lr: 0.1, Loss: 0.2601150572299957, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 151/937, Lr: 0.1, Loss: 0.29478979110717773, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 152/937, Lr: 0.1, Loss: 0.5373103022575378, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 153/937, Lr: 0.1, Loss: 0.3039618730545044, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 154/937, Lr: 0.1, Loss: 0.35590699315071106, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 155/937, Lr: 0.1, Loss: 0.19738076627254486, Accuracy: 0.9375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 156/937, Lr: 0.1, Loss: 0.3252291679382324, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 157/937, Lr: 0.1, Loss: 0.3983322083950043, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 158/937, Lr: 0.1, Loss: 0.3077617287635803, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 159/937, Lr: 0.1, Loss: 0.3274947702884674, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 160/937, Lr: 0.1, Loss: 0.5280047655105591, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 161/937, Lr: 0.1, Loss: 0.27662861347198486, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 162/937, Lr: 0.1, Loss: 0.45271676778793335, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 163/937, Lr: 0.1, Loss: 0.21158207952976227, Accuracy: 0.9453125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 164/937, Lr: 0.1, Loss: 0.3978729248046875, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 165/937, Lr: 0.1, Loss: 0.3968457579612732, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 166/937, Lr: 0.1, Loss: 0.24927116930484772, Accuracy: 0.9609375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 167/937, Lr: 0.1, Loss: 0.34678715467453003, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 168/937, Lr: 0.1, Loss: 0.34895744919776917, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 169/937, Lr: 0.1, Loss: 0.4048977792263031, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 170/937, Lr: 0.1, Loss: 0.35427355766296387, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 171/937, Lr: 0.1, Loss: 0.29974740743637085, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 172/937, Lr: 0.1, Loss: 0.4222210943698883, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 173/937, Lr: 0.1, Loss: 0.33646711707115173, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 174/937, Lr: 0.1, Loss: 0.4413657486438751, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 175/937, Lr: 0.1, Loss: 0.3741428256034851, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 176/937, Lr: 0.1, Loss: 0.43658646941185, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 177/937, Lr: 0.1, Loss: 0.3266402781009674, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 178/937, Lr: 0.1, Loss: 0.3017246425151825, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 179/937, Lr: 0.1, Loss: 0.32308343052864075, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 180/937, Lr: 0.1, Loss: 0.5129515528678894, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 181/937, Lr: 0.1, Loss: 0.34585756063461304, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 182/937, Lr: 0.1, Loss: 0.28428012132644653, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 183/937, Lr: 0.1, Loss: 0.323307603597641, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 184/937, Lr: 0.1, Loss: 0.50025475025177, Accuracy: 0.8046875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 185/937, Lr: 0.1, Loss: 0.2857784330844879, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 186/937, Lr: 0.1, Loss: 0.4097803831100464, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 187/937, Lr: 0.1, Loss: 0.4298817217350006, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 188/937, Lr: 0.1, Loss: 0.31187373399734497, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 189/937, Lr: 0.1, Loss: 0.42119190096855164, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 190/937, Lr: 0.1, Loss: 0.27760010957717896, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 191/937, Lr: 0.1, Loss: 0.337117999792099, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 192/937, Lr: 0.1, Loss: 0.40433546900749207, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 193/937, Lr: 0.1, Loss: 0.3235809803009033, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 194/937, Lr: 0.1, Loss: 0.35724925994873047, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 195/937, Lr: 0.1, Loss: 0.32687538862228394, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 196/937, Lr: 0.1, Loss: 0.3525124788284302, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 197/937, Lr: 0.1, Loss: 0.3240220546722412, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 198/937, Lr: 0.1, Loss: 0.36876174807548523, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 199/937, Lr: 0.1, Loss: 0.3137291371822357, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 200/937, Lr: 0.1, Loss: 0.31820011138916016, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 201/937, Lr: 0.1, Loss: 0.4082251191139221, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 202/937, Lr: 0.1, Loss: 0.4278827905654907, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 203/937, Lr: 0.1, Loss: 0.3983549475669861, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 204/937, Lr: 0.1, Loss: 0.473237544298172, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 205/937, Lr: 0.1, Loss: 0.35349786281585693, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 206/937, Lr: 0.1, Loss: 0.3212538957595825, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 207/937, Lr: 0.1, Loss: 0.25930920243263245, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 208/937, Lr: 0.1, Loss: 0.3219979405403137, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 209/937, Lr: 0.1, Loss: 0.3294624984264374, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 210/937, Lr: 0.1, Loss: 0.2693784236907959, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 211/937, Lr: 0.1, Loss: 0.3789544701576233, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 212/937, Lr: 0.1, Loss: 0.3528106212615967, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 213/937, Lr: 0.1, Loss: 0.2514331042766571, Accuracy: 0.9375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 214/937, Lr: 0.1, Loss: 0.4120091497898102, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 215/937, Lr: 0.1, Loss: 0.23561853170394897, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 216/937, Lr: 0.1, Loss: 0.2969685196876526, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 217/937, Lr: 0.1, Loss: 0.3678995668888092, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 218/937, Lr: 0.1, Loss: 0.33868637681007385, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 219/937, Lr: 0.1, Loss: 0.26580360531806946, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 220/937, Lr: 0.1, Loss: 0.31526222825050354, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 221/937, Lr: 0.1, Loss: 0.2175980806350708, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 222/937, Lr: 0.1, Loss: 0.4244703948497772, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 223/937, Lr: 0.1, Loss: 0.30230948328971863, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 224/937, Lr: 0.1, Loss: 0.40186476707458496, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 225/937, Lr: 0.1, Loss: 0.379947304725647, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 226/937, Lr: 0.1, Loss: 0.41933292150497437, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 227/937, Lr: 0.1, Loss: 0.3290492296218872, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 228/937, Lr: 0.1, Loss: 0.3382606506347656, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 229/937, Lr: 0.1, Loss: 0.3253929018974304, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 230/937, Lr: 0.1, Loss: 0.23291054368019104, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 231/937, Lr: 0.1, Loss: 0.36946240067481995, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 232/937, Lr: 0.1, Loss: 0.24911075830459595, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 233/937, Lr: 0.1, Loss: 0.29812419414520264, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 234/937, Lr: 0.1, Loss: 0.32132136821746826, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 235/937, Lr: 0.1, Loss: 0.4062519371509552, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 236/937, Lr: 0.1, Loss: 0.37902387976646423, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 237/937, Lr: 0.1, Loss: 0.38786518573760986, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 238/937, Lr: 0.1, Loss: 0.3291704058647156, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 239/937, Lr: 0.1, Loss: 0.3660935163497925, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 240/937, Lr: 0.1, Loss: 0.29613515734672546, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 241/937, Lr: 0.1, Loss: 0.4065336287021637, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 242/937, Lr: 0.1, Loss: 0.34719982743263245, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 243/937, Lr: 0.1, Loss: 0.35540130734443665, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 244/937, Lr: 0.1, Loss: 0.3846175968647003, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 245/937, Lr: 0.1, Loss: 0.2862641215324402, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 246/937, Lr: 0.1, Loss: 0.29790255427360535, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 247/937, Lr: 0.1, Loss: 0.2242322713136673, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 248/937, Lr: 0.1, Loss: 0.47691425681114197, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 249/937, Lr: 0.1, Loss: 0.3129369914531708, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 250/937, Lr: 0.1, Loss: 0.3163074553012848, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 251/937, Lr: 0.1, Loss: 0.3465269207954407, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 252/937, Lr: 0.1, Loss: 0.366837739944458, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 253/937, Lr: 0.1, Loss: 0.27734678983688354, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 254/937, Lr: 0.1, Loss: 0.2834639251232147, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 255/937, Lr: 0.1, Loss: 0.3011651337146759, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 256/937, Lr: 0.1, Loss: 0.2732478976249695, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 257/937, Lr: 0.1, Loss: 0.2873486876487732, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 258/937, Lr: 0.1, Loss: 0.3641515076160431, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 259/937, Lr: 0.1, Loss: 0.4127722680568695, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 260/937, Lr: 0.1, Loss: 0.41057032346725464, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 261/937, Lr: 0.1, Loss: 0.28271010518074036, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 262/937, Lr: 0.1, Loss: 0.5102397203445435, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 263/937, Lr: 0.1, Loss: 0.3676617443561554, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 264/937, Lr: 0.1, Loss: 0.3934861719608307, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 265/937, Lr: 0.1, Loss: 0.3518660068511963, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 266/937, Lr: 0.1, Loss: 0.35194605588912964, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 267/937, Lr: 0.1, Loss: 0.2470579445362091, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 268/937, Lr: 0.1, Loss: 0.34590253233909607, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 269/937, Lr: 0.1, Loss: 0.2567710280418396, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 270/937, Lr: 0.1, Loss: 0.44925588369369507, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 271/937, Lr: 0.1, Loss: 0.2611275017261505, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 272/937, Lr: 0.1, Loss: 0.4623737633228302, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 273/937, Lr: 0.1, Loss: 0.34352514147758484, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 274/937, Lr: 0.1, Loss: 0.41108524799346924, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 275/937, Lr: 0.1, Loss: 0.32911330461502075, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 276/937, Lr: 0.1, Loss: 0.31646791100502014, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 277/937, Lr: 0.1, Loss: 0.4554440379142761, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 278/937, Lr: 0.1, Loss: 0.3216155767440796, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 279/937, Lr: 0.1, Loss: 0.28534626960754395, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 280/937, Lr: 0.1, Loss: 0.30336010456085205, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 281/937, Lr: 0.1, Loss: 0.42057403922080994, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 282/937, Lr: 0.1, Loss: 0.4835132956504822, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 283/937, Lr: 0.1, Loss: 0.4636319875717163, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 284/937, Lr: 0.1, Loss: 0.4530135691165924, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 285/937, Lr: 0.1, Loss: 0.3142123222351074, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 286/937, Lr: 0.1, Loss: 0.36001309752464294, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 287/937, Lr: 0.1, Loss: 0.3639102876186371, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 288/937, Lr: 0.1, Loss: 0.4822883605957031, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 289/937, Lr: 0.1, Loss: 0.37325629591941833, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 290/937, Lr: 0.1, Loss: 0.3176620602607727, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 291/937, Lr: 0.1, Loss: 0.3270314633846283, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 292/937, Lr: 0.1, Loss: 0.41104692220687866, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 293/937, Lr: 0.1, Loss: 0.48625296354293823, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 294/937, Lr: 0.1, Loss: 0.35924553871154785, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 295/937, Lr: 0.1, Loss: 0.372123122215271, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 296/937, Lr: 0.1, Loss: 0.3542520999908447, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 297/937, Lr: 0.1, Loss: 0.296825647354126, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 298/937, Lr: 0.1, Loss: 0.4944556653499603, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 299/937, Lr: 0.1, Loss: 0.3846382200717926, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 300/937, Lr: 0.1, Loss: 0.35130178928375244, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 301/937, Lr: 0.1, Loss: 0.30376750230789185, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 302/937, Lr: 0.1, Loss: 0.3350048363208771, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 303/937, Lr: 0.1, Loss: 0.34622690081596375, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 304/937, Lr: 0.1, Loss: 0.4722689688205719, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 305/937, Lr: 0.1, Loss: 0.4192253649234772, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 306/937, Lr: 0.1, Loss: 0.3343961536884308, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 307/937, Lr: 0.1, Loss: 0.1950550526380539, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 308/937, Lr: 0.1, Loss: 0.25310760736465454, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 309/937, Lr: 0.1, Loss: 0.28410306572914124, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 310/937, Lr: 0.1, Loss: 0.4130968153476715, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 311/937, Lr: 0.1, Loss: 0.3492736220359802, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 312/937, Lr: 0.1, Loss: 0.3094116151332855, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 313/937, Lr: 0.1, Loss: 0.334950715303421, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 314/937, Lr: 0.1, Loss: 0.4174741208553314, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 315/937, Lr: 0.1, Loss: 0.3634292781352997, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 316/937, Lr: 0.1, Loss: 0.26571959257125854, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 317/937, Lr: 0.1, Loss: 0.2054487019777298, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 318/937, Lr: 0.1, Loss: 0.33016446232795715, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 319/937, Lr: 0.1, Loss: 0.3944869339466095, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 320/937, Lr: 0.1, Loss: 0.36970436573028564, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 321/937, Lr: 0.1, Loss: 0.3979218900203705, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 322/937, Lr: 0.1, Loss: 0.32928913831710815, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 323/937, Lr: 0.1, Loss: 0.3726857900619507, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 324/937, Lr: 0.1, Loss: 0.4654630422592163, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 325/937, Lr: 0.1, Loss: 0.34359028935432434, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 326/937, Lr: 0.1, Loss: 0.4891856908798218, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 327/937, Lr: 0.1, Loss: 0.2032489776611328, Accuracy: 0.9609375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 328/937, Lr: 0.1, Loss: 0.2985422909259796, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 329/937, Lr: 0.1, Loss: 0.4253773093223572, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 330/937, Lr: 0.1, Loss: 0.2789619266986847, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 331/937, Lr: 0.1, Loss: 0.40727630257606506, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 332/937, Lr: 0.1, Loss: 0.3205239474773407, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 333/937, Lr: 0.1, Loss: 0.31921523809432983, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 334/937, Lr: 0.1, Loss: 0.3524452745914459, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 335/937, Lr: 0.1, Loss: 0.2415102869272232, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 336/937, Lr: 0.1, Loss: 0.5054779052734375, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 337/937, Lr: 0.1, Loss: 0.3479890525341034, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 338/937, Lr: 0.1, Loss: 0.2616139054298401, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 339/937, Lr: 0.1, Loss: 0.34478214383125305, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 340/937, Lr: 0.1, Loss: 0.2447827309370041, Accuracy: 0.9375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 341/937, Lr: 0.1, Loss: 0.3119060695171356, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 342/937, Lr: 0.1, Loss: 0.3095912039279938, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 343/937, Lr: 0.1, Loss: 0.2840661108493805, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 344/937, Lr: 0.1, Loss: 0.19972482323646545, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 345/937, Lr: 0.1, Loss: 0.380573570728302, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 346/937, Lr: 0.1, Loss: 0.39211905002593994, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 347/937, Lr: 0.1, Loss: 0.3151560127735138, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 348/937, Lr: 0.1, Loss: 0.4663931131362915, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 349/937, Lr: 0.1, Loss: 0.4365537762641907, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 350/937, Lr: 0.1, Loss: 0.22724215686321259, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 351/937, Lr: 0.1, Loss: 0.28965094685554504, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 352/937, Lr: 0.1, Loss: 0.32331711053848267, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 353/937, Lr: 0.1, Loss: 0.35812240839004517, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 354/937, Lr: 0.1, Loss: 0.244451105594635, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 355/937, Lr: 0.1, Loss: 0.39439135789871216, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 356/937, Lr: 0.1, Loss: 0.21105922758579254, Accuracy: 0.9375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 357/937, Lr: 0.1, Loss: 0.31489208340644836, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 358/937, Lr: 0.1, Loss: 0.3659699857234955, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 359/937, Lr: 0.1, Loss: 0.45598793029785156, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 360/937, Lr: 0.1, Loss: 0.3924626111984253, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 361/937, Lr: 0.1, Loss: 0.39280006289482117, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 362/937, Lr: 0.1, Loss: 0.27942532300949097, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 363/937, Lr: 0.1, Loss: 0.4444946050643921, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 364/937, Lr: 0.1, Loss: 0.42602184414863586, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 365/937, Lr: 0.1, Loss: 0.39996808767318726, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 366/937, Lr: 0.1, Loss: 0.35177165269851685, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 367/937, Lr: 0.1, Loss: 0.29938438534736633, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 368/937, Lr: 0.1, Loss: 0.2633252441883087, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 369/937, Lr: 0.1, Loss: 0.19429831206798553, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 370/937, Lr: 0.1, Loss: 0.3173658549785614, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 371/937, Lr: 0.1, Loss: 0.414895623922348, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 372/937, Lr: 0.1, Loss: 0.3171297013759613, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 373/937, Lr: 0.1, Loss: 0.3769489526748657, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 374/937, Lr: 0.1, Loss: 0.3385883867740631, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 375/937, Lr: 0.1, Loss: 0.26351913809776306, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 376/937, Lr: 0.1, Loss: 0.33530929684638977, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 377/937, Lr: 0.1, Loss: 0.26902320981025696, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 378/937, Lr: 0.1, Loss: 0.330167680978775, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 379/937, Lr: 0.1, Loss: 0.2838050127029419, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 380/937, Lr: 0.1, Loss: 0.2857975363731384, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 381/937, Lr: 0.1, Loss: 0.32052016258239746, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 382/937, Lr: 0.1, Loss: 0.4140419363975525, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 383/937, Lr: 0.1, Loss: 0.3776552975177765, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 384/937, Lr: 0.1, Loss: 0.3574112355709076, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 385/937, Lr: 0.1, Loss: 0.3978431522846222, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 386/937, Lr: 0.1, Loss: 0.5025996565818787, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 387/937, Lr: 0.1, Loss: 0.2676191031932831, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 388/937, Lr: 0.1, Loss: 0.19770057499408722, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 389/937, Lr: 0.1, Loss: 0.3697053790092468, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 390/937, Lr: 0.1, Loss: 0.2772881090641022, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 391/937, Lr: 0.1, Loss: 0.3797215521335602, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 392/937, Lr: 0.1, Loss: 0.263489305973053, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 393/937, Lr: 0.1, Loss: 0.34590575098991394, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 394/937, Lr: 0.1, Loss: 0.3701554238796234, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 395/937, Lr: 0.1, Loss: 0.4727577567100525, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 396/937, Lr: 0.1, Loss: 0.21651573479175568, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 397/937, Lr: 0.1, Loss: 0.25876644253730774, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 398/937, Lr: 0.1, Loss: 0.2022588849067688, Accuracy: 0.9453125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 399/937, Lr: 0.1, Loss: 0.2589159607887268, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 400/937, Lr: 0.1, Loss: 0.386467307806015, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 401/937, Lr: 0.1, Loss: 0.32582974433898926, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 402/937, Lr: 0.1, Loss: 0.1499602198600769, Accuracy: 0.9609375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 403/937, Lr: 0.1, Loss: 0.3332867920398712, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 404/937, Lr: 0.1, Loss: 0.3178946077823639, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 405/937, Lr: 0.1, Loss: 0.23375070095062256, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 406/937, Lr: 0.1, Loss: 0.37011396884918213, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 407/937, Lr: 0.1, Loss: 0.436219722032547, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 408/937, Lr: 0.1, Loss: 0.35536980628967285, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 409/937, Lr: 0.1, Loss: 0.1895115226507187, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 410/937, Lr: 0.1, Loss: 0.23475390672683716, Accuracy: 0.9453125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 411/937, Lr: 0.1, Loss: 0.37239256501197815, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 412/937, Lr: 0.1, Loss: 0.4091714024543762, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 413/937, Lr: 0.1, Loss: 0.42374691367149353, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 414/937, Lr: 0.1, Loss: 0.3287026882171631, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 415/937, Lr: 0.1, Loss: 0.26157695055007935, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 416/937, Lr: 0.1, Loss: 0.3512926697731018, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 417/937, Lr: 0.1, Loss: 0.29927343130111694, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 418/937, Lr: 0.1, Loss: 0.351238489151001, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 419/937, Lr: 0.1, Loss: 0.297567218542099, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 420/937, Lr: 0.1, Loss: 0.45070090889930725, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 421/937, Lr: 0.1, Loss: 0.3459646999835968, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 422/937, Lr: 0.1, Loss: 0.3115878701210022, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 423/937, Lr: 0.1, Loss: 0.22739508748054504, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 424/937, Lr: 0.1, Loss: 0.27722594141960144, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 425/937, Lr: 0.1, Loss: 0.34680548310279846, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 426/937, Lr: 0.1, Loss: 0.27124688029289246, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 427/937, Lr: 0.1, Loss: 0.37347412109375, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 428/937, Lr: 0.1, Loss: 0.349335640668869, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 429/937, Lr: 0.1, Loss: 0.3696931302547455, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 430/937, Lr: 0.1, Loss: 0.2612995505332947, Accuracy: 0.9375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 431/937, Lr: 0.1, Loss: 0.5140023231506348, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 432/937, Lr: 0.1, Loss: 0.2265353500843048, Accuracy: 0.9375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 433/937, Lr: 0.1, Loss: 0.35495877265930176, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 434/937, Lr: 0.1, Loss: 0.2731354236602783, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 435/937, Lr: 0.1, Loss: 0.3217153251171112, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 436/937, Lr: 0.1, Loss: 0.2273293137550354, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 437/937, Lr: 0.1, Loss: 0.42943209409713745, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 438/937, Lr: 0.1, Loss: 0.25089651346206665, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 439/937, Lr: 0.1, Loss: 0.371437132358551, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 440/937, Lr: 0.1, Loss: 0.4014755189418793, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 441/937, Lr: 0.1, Loss: 0.33677586913108826, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 442/937, Lr: 0.1, Loss: 0.3260255753993988, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 443/937, Lr: 0.1, Loss: 0.2731267511844635, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 444/937, Lr: 0.1, Loss: 0.2986467480659485, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 445/937, Lr: 0.1, Loss: 0.319033682346344, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 446/937, Lr: 0.1, Loss: 0.27411025762557983, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 447/937, Lr: 0.1, Loss: 0.356528639793396, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 448/937, Lr: 0.1, Loss: 0.21317878365516663, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 449/937, Lr: 0.1, Loss: 0.3424661159515381, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 450/937, Lr: 0.1, Loss: 0.2853332757949829, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 451/937, Lr: 0.1, Loss: 0.25710529088974, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 452/937, Lr: 0.1, Loss: 0.41874513030052185, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 453/937, Lr: 0.1, Loss: 0.3091493248939514, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 454/937, Lr: 0.1, Loss: 0.3767790198326111, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 455/937, Lr: 0.1, Loss: 0.3468260169029236, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 456/937, Lr: 0.1, Loss: 0.3231419622898102, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 457/937, Lr: 0.1, Loss: 0.35175904631614685, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 458/937, Lr: 0.1, Loss: 0.29946961998939514, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 459/937, Lr: 0.1, Loss: 0.26876118779182434, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 460/937, Lr: 0.1, Loss: 0.39073213934898376, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 461/937, Lr: 0.1, Loss: 0.3502380847930908, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 462/937, Lr: 0.1, Loss: 0.2850416302680969, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 463/937, Lr: 0.1, Loss: 0.2790829837322235, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 464/937, Lr: 0.1, Loss: 0.3146520256996155, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 465/937, Lr: 0.1, Loss: 0.33476486802101135, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 466/937, Lr: 0.1, Loss: 0.30376845598220825, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 467/937, Lr: 0.1, Loss: 0.24633292853832245, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 468/937, Lr: 0.1, Loss: 0.3696768879890442, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 469/937, Lr: 0.1, Loss: 0.33664920926094055, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 470/937, Lr: 0.1, Loss: 0.3746319115161896, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 471/937, Lr: 0.1, Loss: 0.429847776889801, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 472/937, Lr: 0.1, Loss: 0.3764604330062866, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 473/937, Lr: 0.1, Loss: 0.2984776794910431, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 474/937, Lr: 0.1, Loss: 0.171271413564682, Accuracy: 0.953125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 475/937, Lr: 0.1, Loss: 0.28771257400512695, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 476/937, Lr: 0.1, Loss: 0.30626535415649414, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 477/937, Lr: 0.1, Loss: 0.31661519408226013, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 478/937, Lr: 0.1, Loss: 0.3160412311553955, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 479/937, Lr: 0.1, Loss: 0.3155977725982666, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 480/937, Lr: 0.1, Loss: 0.4023721516132355, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 481/937, Lr: 0.1, Loss: 0.2618890106678009, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 482/937, Lr: 0.1, Loss: 0.3278396725654602, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 483/937, Lr: 0.1, Loss: 0.3647497594356537, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 484/937, Lr: 0.1, Loss: 0.2733103930950165, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 485/937, Lr: 0.1, Loss: 0.39669933915138245, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 486/937, Lr: 0.1, Loss: 0.3857102692127228, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 487/937, Lr: 0.1, Loss: 0.34599030017852783, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 488/937, Lr: 0.1, Loss: 0.2849288880825043, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 489/937, Lr: 0.1, Loss: 0.2749437391757965, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 490/937, Lr: 0.1, Loss: 0.29663965106010437, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 491/937, Lr: 0.1, Loss: 0.4042679965496063, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 492/937, Lr: 0.1, Loss: 0.2711203396320343, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 493/937, Lr: 0.1, Loss: 0.33047205209732056, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 494/937, Lr: 0.1, Loss: 0.3438997268676758, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 495/937, Lr: 0.1, Loss: 0.36678993701934814, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 496/937, Lr: 0.1, Loss: 0.47257471084594727, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 497/937, Lr: 0.1, Loss: 0.27143457531929016, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 498/937, Lr: 0.1, Loss: 0.31420913338661194, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 499/937, Lr: 0.1, Loss: 0.3678281009197235, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 500/937, Lr: 0.1, Loss: 0.34970882534980774, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 501/937, Lr: 0.1, Loss: 0.3418712317943573, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 502/937, Lr: 0.1, Loss: 0.3626983165740967, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 503/937, Lr: 0.1, Loss: 0.4031606614589691, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 504/937, Lr: 0.1, Loss: 0.356132447719574, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 505/937, Lr: 0.1, Loss: 0.21135564148426056, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 506/937, Lr: 0.1, Loss: 0.3699378967285156, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 507/937, Lr: 0.1, Loss: 0.2608332931995392, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 508/937, Lr: 0.1, Loss: 0.39704447984695435, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 509/937, Lr: 0.1, Loss: 0.3297923505306244, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 510/937, Lr: 0.1, Loss: 0.3395601212978363, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 511/937, Lr: 0.1, Loss: 0.3423524498939514, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 512/937, Lr: 0.1, Loss: 0.3004050552845001, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 513/937, Lr: 0.1, Loss: 0.3646535277366638, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 514/937, Lr: 0.1, Loss: 0.38596221804618835, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 515/937, Lr: 0.1, Loss: 0.3312036693096161, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 516/937, Lr: 0.1, Loss: 0.3880164921283722, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 517/937, Lr: 0.1, Loss: 0.3167867958545685, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 518/937, Lr: 0.1, Loss: 0.34617140889167786, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 519/937, Lr: 0.1, Loss: 0.2520093619823456, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 520/937, Lr: 0.1, Loss: 0.26630958914756775, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 521/937, Lr: 0.1, Loss: 0.23142147064208984, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 522/937, Lr: 0.1, Loss: 0.4184756577014923, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 523/937, Lr: 0.1, Loss: 0.317493736743927, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 524/937, Lr: 0.1, Loss: 0.34938177466392517, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 525/937, Lr: 0.1, Loss: 0.34118151664733887, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 526/937, Lr: 0.1, Loss: 0.28124698996543884, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 527/937, Lr: 0.1, Loss: 0.27102625370025635, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 528/937, Lr: 0.1, Loss: 0.5133770704269409, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 529/937, Lr: 0.1, Loss: 0.4286889135837555, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 530/937, Lr: 0.1, Loss: 0.5519644021987915, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 531/937, Lr: 0.1, Loss: 0.2707008123397827, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 532/937, Lr: 0.1, Loss: 0.43199416995048523, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 533/937, Lr: 0.1, Loss: 0.3025118410587311, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 534/937, Lr: 0.1, Loss: 0.291808158159256, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 535/937, Lr: 0.1, Loss: 0.25577637553215027, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 536/937, Lr: 0.1, Loss: 0.2078305184841156, Accuracy: 0.9453125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 537/937, Lr: 0.1, Loss: 0.3236948847770691, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 538/937, Lr: 0.1, Loss: 0.2269054502248764, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 539/937, Lr: 0.1, Loss: 0.3366546928882599, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 540/937, Lr: 0.1, Loss: 0.2507078945636749, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 541/937, Lr: 0.1, Loss: 0.3255128562450409, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 542/937, Lr: 0.1, Loss: 0.23051200807094574, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 543/937, Lr: 0.1, Loss: 0.37946340441703796, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 544/937, Lr: 0.1, Loss: 0.2588415741920471, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 545/937, Lr: 0.1, Loss: 0.2920636534690857, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 546/937, Lr: 0.1, Loss: 0.2889668643474579, Accuracy: 0.9375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 547/937, Lr: 0.1, Loss: 0.25727882981300354, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 548/937, Lr: 0.1, Loss: 0.32551586627960205, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 549/937, Lr: 0.1, Loss: 0.42907842993736267, Accuracy: 0.8125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 550/937, Lr: 0.1, Loss: 0.3148462176322937, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 551/937, Lr: 0.1, Loss: 0.27143681049346924, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 552/937, Lr: 0.1, Loss: 0.29940447211265564, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 553/937, Lr: 0.1, Loss: 0.2534758448600769, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 554/937, Lr: 0.1, Loss: 0.22344844043254852, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 555/937, Lr: 0.1, Loss: 0.38144010305404663, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 556/937, Lr: 0.1, Loss: 0.3617582321166992, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 557/937, Lr: 0.1, Loss: 0.24984782934188843, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 558/937, Lr: 0.1, Loss: 0.3516745865345001, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 559/937, Lr: 0.1, Loss: 0.4315508008003235, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 560/937, Lr: 0.1, Loss: 0.3118020296096802, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 561/937, Lr: 0.1, Loss: 0.33146610856056213, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 562/937, Lr: 0.1, Loss: 0.35686251521110535, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 563/937, Lr: 0.1, Loss: 0.38376596570014954, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 564/937, Lr: 0.1, Loss: 0.4148886501789093, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 565/937, Lr: 0.1, Loss: 0.403186172246933, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 566/937, Lr: 0.1, Loss: 0.3732084631919861, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 567/937, Lr: 0.1, Loss: 0.39256808161735535, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 568/937, Lr: 0.1, Loss: 0.31216341257095337, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 569/937, Lr: 0.1, Loss: 0.20284852385520935, Accuracy: 0.9375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 570/937, Lr: 0.1, Loss: 0.3286229074001312, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 571/937, Lr: 0.1, Loss: 0.4263221323490143, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 572/937, Lr: 0.1, Loss: 0.3536320924758911, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 573/937, Lr: 0.1, Loss: 0.3146671950817108, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 574/937, Lr: 0.1, Loss: 0.37123116850852966, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 575/937, Lr: 0.1, Loss: 0.34347525238990784, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 576/937, Lr: 0.1, Loss: 0.45868155360221863, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 577/937, Lr: 0.1, Loss: 0.40472522377967834, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 578/937, Lr: 0.1, Loss: 0.38389143347740173, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 579/937, Lr: 0.1, Loss: 0.3520767092704773, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 580/937, Lr: 0.1, Loss: 0.2978021800518036, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 581/937, Lr: 0.1, Loss: 0.28506922721862793, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 582/937, Lr: 0.1, Loss: 0.33945873379707336, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 583/937, Lr: 0.1, Loss: 0.26008152961730957, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 584/937, Lr: 0.1, Loss: 0.41551634669303894, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 585/937, Lr: 0.1, Loss: 0.23872371017932892, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 586/937, Lr: 0.1, Loss: 0.31817689538002014, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 587/937, Lr: 0.1, Loss: 0.31990963220596313, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 588/937, Lr: 0.1, Loss: 0.24247866868972778, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 589/937, Lr: 0.1, Loss: 0.2819306552410126, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 590/937, Lr: 0.1, Loss: 0.2860422134399414, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 591/937, Lr: 0.1, Loss: 0.2662442922592163, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 592/937, Lr: 0.1, Loss: 0.33982378244400024, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 593/937, Lr: 0.1, Loss: 0.25592827796936035, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 594/937, Lr: 0.1, Loss: 0.3557255268096924, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 595/937, Lr: 0.1, Loss: 0.3170778453350067, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 596/937, Lr: 0.1, Loss: 0.25455009937286377, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 597/937, Lr: 0.1, Loss: 0.3592413067817688, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 598/937, Lr: 0.1, Loss: 0.2789595425128937, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 599/937, Lr: 0.1, Loss: 0.41031479835510254, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 600/937, Lr: 0.1, Loss: 0.3284929394721985, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 601/937, Lr: 0.1, Loss: 0.2761496603488922, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 602/937, Lr: 0.1, Loss: 0.4543483555316925, Accuracy: 0.8046875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 603/937, Lr: 0.1, Loss: 0.2808677852153778, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 604/937, Lr: 0.1, Loss: 0.4171525835990906, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 605/937, Lr: 0.1, Loss: 0.40965786576271057, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 606/937, Lr: 0.1, Loss: 0.42125725746154785, Accuracy: 0.8203125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 607/937, Lr: 0.1, Loss: 0.3209800720214844, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 608/937, Lr: 0.1, Loss: 0.34276384115219116, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 609/937, Lr: 0.1, Loss: 0.3566564917564392, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 610/937, Lr: 0.1, Loss: 0.2578493058681488, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 611/937, Lr: 0.1, Loss: 0.30098479986190796, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 612/937, Lr: 0.1, Loss: 0.38153237104415894, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 613/937, Lr: 0.1, Loss: 0.4841325581073761, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 614/937, Lr: 0.1, Loss: 0.2692558765411377, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 615/937, Lr: 0.1, Loss: 0.3994677662849426, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 616/937, Lr: 0.1, Loss: 0.2729383707046509, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 617/937, Lr: 0.1, Loss: 0.2672020494937897, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 618/937, Lr: 0.1, Loss: 0.3203963041305542, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 619/937, Lr: 0.1, Loss: 0.2507956027984619, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 620/937, Lr: 0.1, Loss: 0.3190774619579315, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 621/937, Lr: 0.1, Loss: 0.33586207032203674, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 622/937, Lr: 0.1, Loss: 0.282997190952301, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 623/937, Lr: 0.1, Loss: 0.29550349712371826, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 624/937, Lr: 0.1, Loss: 0.30151617527008057, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 625/937, Lr: 0.1, Loss: 0.3547644317150116, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 626/937, Lr: 0.1, Loss: 0.33973416686058044, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 627/937, Lr: 0.1, Loss: 0.3102208077907562, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 628/937, Lr: 0.1, Loss: 0.3513738512992859, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 629/937, Lr: 0.1, Loss: 0.428561270236969, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 630/937, Lr: 0.1, Loss: 0.24135524034500122, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 631/937, Lr: 0.1, Loss: 0.3687000274658203, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 632/937, Lr: 0.1, Loss: 0.4155120253562927, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 633/937, Lr: 0.1, Loss: 0.2596887946128845, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 634/937, Lr: 0.1, Loss: 0.3511640131473541, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 635/937, Lr: 0.1, Loss: 0.3276641368865967, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 636/937, Lr: 0.1, Loss: 0.28680655360221863, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 637/937, Lr: 0.1, Loss: 0.2879367768764496, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 638/937, Lr: 0.1, Loss: 0.28724798560142517, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 639/937, Lr: 0.1, Loss: 0.2804587185382843, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 640/937, Lr: 0.1, Loss: 0.38083165884017944, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 641/937, Lr: 0.1, Loss: 0.42369627952575684, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 642/937, Lr: 0.1, Loss: 0.21987134218215942, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 643/937, Lr: 0.1, Loss: 0.28352677822113037, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 644/937, Lr: 0.1, Loss: 0.33973079919815063, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 645/937, Lr: 0.1, Loss: 0.3177546262741089, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 646/937, Lr: 0.1, Loss: 0.28969746828079224, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 647/937, Lr: 0.1, Loss: 0.27096447348594666, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 648/937, Lr: 0.1, Loss: 0.4095053970813751, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 649/937, Lr: 0.1, Loss: 0.32104218006134033, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 650/937, Lr: 0.1, Loss: 0.27063482999801636, Accuracy: 0.9296875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 651/937, Lr: 0.1, Loss: 0.31422069668769836, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 652/937, Lr: 0.1, Loss: 0.27121835947036743, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 653/937, Lr: 0.1, Loss: 0.2902095317840576, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 654/937, Lr: 0.1, Loss: 0.31776976585388184, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 655/937, Lr: 0.1, Loss: 0.43468159437179565, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 656/937, Lr: 0.1, Loss: 0.2748622000217438, Accuracy: 0.9140625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 657/937, Lr: 0.1, Loss: 0.2922321856021881, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 658/937, Lr: 0.1, Loss: 0.34971994161605835, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 659/937, Lr: 0.1, Loss: 0.3947283923625946, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 660/937, Lr: 0.1, Loss: 0.2637549936771393, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 661/937, Lr: 0.1, Loss: 0.36975228786468506, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 662/937, Lr: 0.1, Loss: 0.3657611012458801, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 663/937, Lr: 0.1, Loss: 0.3686958849430084, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 664/937, Lr: 0.1, Loss: 0.4243896007537842, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 665/937, Lr: 0.1, Loss: 0.34090784192085266, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 666/937, Lr: 0.1, Loss: 0.26689550280570984, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 667/937, Lr: 0.1, Loss: 0.38561612367630005, Accuracy: 0.8359375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 668/937, Lr: 0.1, Loss: 0.4278072416782379, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 669/937, Lr: 0.1, Loss: 0.3844197392463684, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 670/937, Lr: 0.1, Loss: 0.34849441051483154, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 671/937, Lr: 0.1, Loss: 0.2280026227235794, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 672/937, Lr: 0.1, Loss: 0.3262746334075928, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 673/937, Lr: 0.1, Loss: 0.24290691316127777, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 674/937, Lr: 0.1, Loss: 0.40792375802993774, Accuracy: 0.8515625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 675/937, Lr: 0.1, Loss: 0.2352982461452484, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 676/937, Lr: 0.1, Loss: 0.36324143409729004, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 677/937, Lr: 0.1, Loss: 0.35129526257514954, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 678/937, Lr: 0.1, Loss: 0.34141114354133606, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 679/937, Lr: 0.1, Loss: 0.2979392409324646, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 680/937, Lr: 0.1, Loss: 0.39531639218330383, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 681/937, Lr: 0.1, Loss: 0.31162580847740173, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 682/937, Lr: 0.1, Loss: 0.2630612552165985, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 683/937, Lr: 0.1, Loss: 0.3512340784072876, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 684/937, Lr: 0.1, Loss: 0.2816585898399353, Accuracy: 0.890625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 685/937, Lr: 0.1, Loss: 0.2491965889930725, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 686/937, Lr: 0.1, Loss: 0.4458504617214203, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 687/937, Lr: 0.1, Loss: 0.27326375246047974, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 688/937, Lr: 0.1, Loss: 0.2337462604045868, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 689/937, Lr: 0.1, Loss: 0.3214802145957947, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 690/937, Lr: 0.1, Loss: 0.4870108366012573, Accuracy: 0.84375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 691/937, Lr: 0.1, Loss: 0.34106749296188354, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 692/937, Lr: 0.1, Loss: 0.3152746856212616, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 693/937, Lr: 0.1, Loss: 0.3093917965888977, Accuracy: 0.8984375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 694/937, Lr: 0.1, Loss: 0.17186632752418518, Accuracy: 0.953125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 695/937, Lr: 0.1, Loss: 0.33796441555023193, Accuracy: 0.875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 696/937, Lr: 0.1, Loss: 0.25133761763572693, Accuracy: 0.90625


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 697/937, Lr: 0.1, Loss: 0.24263916909694672, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 698/937, Lr: 0.1, Loss: 0.3671586811542511, Accuracy: 0.859375


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 699/937, Lr: 0.1, Loss: 0.40258339047431946, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 700/937, Lr: 0.1, Loss: 0.37387678027153015, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 701/937, Lr: 0.1, Loss: 0.4256798028945923, Accuracy: 0.828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 702/937, Lr: 0.1, Loss: 0.3323816657066345, Accuracy: 0.8671875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 703/937, Lr: 0.1, Loss: 0.21494817733764648, Accuracy: 0.921875


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 704/937, Lr: 0.1, Loss: 0.33648526668548584, Accuracy: 0.8828125


  output = F.softmax(output)
  output = F.softmax(output)


Epoch: 1/2, Iteration: 705/937, Lr: 0.1, Loss: 0.37034186720848083, Accuracy: 0.859375


KeyboardInterrupt: 

In [14]:
def get_args():
    args = {
        "batch_size": 128,
        "data_path": "data/test.csv",
        "pre_trained_model": "trained_models/whole_model_han",
        "word2vec_path": "glove.6B.50d.txt",
        "output": "predictions"
    }
    return args

def test(opt):
    test_params = {"batch_size": opt["batch_size"],
                   "shuffle": False,
                   "drop_last": False}
    if os.path.isdir(opt["output"]):
        shutil.rmtree(opt["output"])
    os.makedirs(opt["output"], exist_ok=True)
    
    if torch.cuda.is_available():
        model = torch.load(opt["pre_trained_model"])
    else:
        model = torch.load(opt["pre_trained_model"], map_location=lambda storage, loc: storage)
    
    test_set = MyDataset(opt["data_path"], opt["word2vec_path"], model.max_sent_length, model.max_word_length)
    test_generator = DataLoader(test_set, **test_params)
    
    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    te_label_ls = []
    te_pred_ls = []
    
    for te_feature, te_label in test_generator:
        num_sample = len(te_label)
        if torch.cuda.is_available():
            te_feature = te_feature.cuda()
            te_label = te_label.cuda()
        with torch.no_grad():
            model._init_hidden_state(num_sample)
            te_predictions = model(te_feature)
            te_predictions = F.softmax(te_predictions, dim=1)
        te_label_ls.extend(te_label.clone().cpu())
        te_pred_ls.append(te_predictions.clone().cpu())
        
    te_pred = torch.cat(te_pred_ls, 0).numpy()
    te_label = np.array(te_label_ls)

    fieldnames = ['True label', 'Predicted label', 'Content']
    with open(os.path.join(opt["output"], "predictions.csv"), 'w') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC)
        writer.writeheader()
        for i, j, k in zip(te_label, te_pred, test_set.texts):
            writer.writerow({'True label': i + 1, 'Predicted label': np.argmax(j) + 1, 'Content': k})

    test_metrics = get_evaluation(te_label, te_pred, list_metrics=["accuracy", "loss", "confusion_matrix"])
    print("Prediction:\nLoss: {} Accuracy: {} \nConfusion matrix: \n{}".format(
        test_metrics["loss"], test_metrics["accuracy"], test_metrics["confusion_matrix"]))

In [15]:
opt = get_args()
test(opt)

FileNotFoundError: [Errno 2] No such file or directory: 'trained_models/whole_model_han'