In [1]:
import torch
import torch.utils.data.dataloader as dataloader
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
import torch.optim as optim
import os
import re
import sys
import gc
import gensim
import numpy as np
from tqdm import tqdm
from collections import Counter
from itertools import chain

In [2]:
text = []
for file in os.listdir('Holmes_Training_Data/'):
    with open(os.path.join('Holmes_Training_Data', file), 'r', errors='ignore') as f:
        text.extend(f.read().splitlines())

text = [x.replace('*', '') for x in text]
text = [x for x in text if x != '']
text = [re.findall(r"[\w']+|[.,!?;]", x) for x in text]
[x.insert(0, '<bos>') for x in text]
[x.insert(len(x), '<eos>') for x in text]
text = text[:10000]

In [3]:
vocab = set(chain(*text))
vocab_size = len(vocab)
freqs = Counter(chain(*text))

In [4]:
word_to_idx = {word: i+1 for i, word in enumerate(vocab)}
word_to_idx['<unk>'] = 0
idx_to_word = {i+1: word for i, word in enumerate(vocab)}
idx_to_word[0] = '<unk>'

In [5]:
freqs_pow = torch.Tensor([freqs[idx_to_word[i]] for i in range(vocab_size)]).pow(0.75)
dist = freqs_pow / freqs_pow.sum()
gc.collect()

0

In [6]:
def neg_sample(num_samples, batch_size, use_gpu):
    w = np.random.choice(len(dist), (batch_size, num_samples), p=dist.numpy())
    if use_gpu:
        return torch.tensor(w).to(device)
    else:
        return torch.tensor(w)

In [7]:
wvmodel = gensim.models.KeyedVectors.load_word2vec_format('test_word.txt',
                                                          binary=False, encoding='utf-8')

In [8]:
class seqNet(nn.Module):
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 bidirectional, weight, num_labels, use_gpu, **kwargs):
        super(seqNet, self).__init__(**kwargs)
        self.num_hiddens = num_hiddens
        self.num_layers = num_layers
        self.use_gpu = use_gpu
#         self.num_negs = num_negs
        self.bidirectional = bidirectional
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.embedding.weight.requires_grad = False
        self.front = nn.LSTM(input_size=embed_size, hidden_size=self.num_hiddens,
                             num_layers=num_layers, bidirectional=self.bidirectional,
                             dropout=0)
        self.back = nn.LSTM(input_size=embed_size, hidden_size=self.num_hiddens,
                            num_layers=num_layers, bidirectional=self.bidirectional,
                            dropout=0)
        if self.bidirectional:
            self.decoder = nn.Linear(num_hiddens * 4, num_labels)
        else:
            self.decoder = nn.Linear(num_hiddens * 2, num_labels)

    def forward(self, inputs):
        embeddings_front = self.embedding(inputs[0])
        embeddings_back = self.embedding(inputs[1])
        states_front, _ = self.front(embeddings_front.permute([1, 0, 2]))
        states_back, _ = self.back(embeddings_back.permute([1, 0, 2]))
        encoding = torch.cat([states_front[-1], states_back[-1]], dim=1)
        outputs = self.decoder(encoding)
        return(outputs)

In [9]:
embedding_dim = 100
hidden_dim = 100
lr = 0.1
num_epoch = 500
use_gpu = True
num_layers = 2
bidirectional = True
batch_size = 256
num_negs = 25
device = torch.device('cuda:1')
loss_function = nn.CrossEntropyLoss()

In [10]:
def encode_sample(seq, word_to_idx):
    feature = []
    for token in seq:
        feature.append(word_to_idx[token])
    return feature

In [11]:
def padding(seq, maxlen, pad='<unk>'):
    padded = []
    if len(seq) < maxlen:
        padded = seq
        while len(padded) < maxlen:
            padded.append(pad)
    else:
        padded = seq[:maxlen]
    return padded

In [12]:
def eval_acc(y_pred, y_true):
    return torch.sum(torch.argmax(y_pred, dim=1) == y_true).cpu().numpy() / len(y_true)

In [13]:
data = []
for i in range(len(text)):
    for j in range(1, len(text[i])-1):
        context = []
        context.append(text[i][:j])
        context.append(text[i][(j+1):])
        target = word_to_idx[text[i][j]]
        context[0] = padding(context[0], 10)
        context[0] = encode_sample(context[0], word_to_idx)
        context[1] = padding(context[1], 10)
        context[1] = encode_sample(context[1], word_to_idx)
        data.append((context, target))

In [14]:
weight = nn.Embedding(len(word_to_idx), embedding_dim).weight.data
for i in range(len(wvmodel.index2word)):
    try:
        index = word_to_idx[wvmodel.index2word[i]]
    except:
        continue
    weight[index, :] = torch.from_numpy(wvmodel.get_vector(
        idx_to_word[word_to_idx[wvmodel.index2word[i]]]))

In [15]:
model = seqNet(vocab_size=len(word_to_idx), embed_size=embedding_dim,
               num_hiddens=hidden_dim, num_layers=num_layers,
               bidirectional=bidirectional, weight=weight, num_labels=len(word_to_idx),
               use_gpu=use_gpu)
optimizer = optim.SGD(model.parameters(), lr=0.1)
if use_gpu:
    model.to(device)

In [16]:
data_iter = torch.utils.data.DataLoader(data, batch_size=batch_size,
                                        shuffle=False, num_workers=0)

In [20]:
test_target = 'of'
test = [[], []]
test[0] = padding(['<bos>', 'The', 'Project', 'Gutenberg', 'Etext'], 10)
test[0] = encode_sample(test[0], word_to_idx)
test[1] = padding(['Reminiscences', 'of', 'Tolstoy', '<eos>'], 10)
test[1] = encode_sample(test[1], word_to_idx)
test = torch.tensor(test).view(2, 1, 10)
idx_to_word[int(torch.argmax(model(test.to(device)), dim=1).cpu().data)]

'avail'

In [21]:
for epoch in range(num_epoch):
    total_loss = torch.Tensor([0])
    num = 0
    train_acc = 0
    for context, target in tqdm(data_iter):
        num += 1
        context[0] = torch.stack(context[0]).permute((1, 0))
        context[1] = torch.stack(context[1]).permute((1, 0))
        context = torch.stack(context)
        target = torch.tensor(target)
        if use_gpu:
            context = context.to(device)
            target = target.to(device)
        model.zero_grad()
        log_probs = model(context)
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        train_acc += eval_acc(log_probs, target)
    print('epoch %d, loss %.4f, acc %.4f' %(epoch, total_loss / num, train_acc / num))

100%|██████████| 466/466 [00:04<00:00, 95.39it/s]
  3%|▎         | 12/466 [00:00<00:03, 116.97it/s]

epoch 0, loss 7.4706, acc 0.0745


100%|██████████| 466/466 [00:03<00:00, 120.90it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.75it/s]

epoch 1, loss 6.6842, acc 0.0763


100%|██████████| 466/466 [00:04<00:00, 115.04it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.04it/s]

epoch 2, loss 6.5853, acc 0.0771


100%|██████████| 466/466 [00:03<00:00, 117.79it/s]
  2%|▏         | 11/466 [00:00<00:04, 106.88it/s]

epoch 3, loss 6.5476, acc 0.0776


100%|██████████| 466/466 [00:03<00:00, 124.99it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.48it/s]

epoch 4, loss 6.5260, acc 0.0791


100%|██████████| 466/466 [00:03<00:00, 116.95it/s]
  2%|▏         | 11/466 [00:00<00:04, 104.58it/s]

epoch 5, loss 6.5086, acc 0.0813


100%|██████████| 466/466 [00:03<00:00, 117.73it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.29it/s]

epoch 6, loss 6.4911, acc 0.0855


100%|██████████| 466/466 [00:03<00:00, 117.33it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.50it/s]

epoch 7, loss 6.4728, acc 0.0894


100%|██████████| 466/466 [00:03<00:00, 120.13it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.08it/s]

epoch 8, loss 6.4499, acc 0.0939


100%|██████████| 466/466 [00:04<00:00, 108.89it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.20it/s]

epoch 9, loss 6.4161, acc 0.1017


100%|██████████| 466/466 [00:04<00:00, 107.79it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.19it/s]

epoch 10, loss 6.3582, acc 0.1149


100%|██████████| 466/466 [00:04<00:00, 113.57it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.63it/s]

epoch 11, loss 6.3215, acc 0.1200


100%|██████████| 466/466 [00:03<00:00, 120.48it/s]
  3%|▎         | 12/466 [00:00<00:03, 115.12it/s]

epoch 12, loss 6.2918, acc 0.1225


100%|██████████| 466/466 [00:04<00:00, 109.28it/s]
  3%|▎         | 12/466 [00:00<00:04, 111.20it/s]

epoch 13, loss 6.2589, acc 0.1262


100%|██████████| 466/466 [00:04<00:00, 113.05it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.17it/s]

epoch 14, loss 6.2242, acc 0.1313


100%|██████████| 466/466 [00:03<00:00, 122.30it/s]
  3%|▎         | 13/466 [00:00<00:03, 127.15it/s]

epoch 15, loss 6.1798, acc 0.1369


100%|██████████| 466/466 [00:03<00:00, 123.69it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.60it/s]

epoch 16, loss 6.1099, acc 0.1477


100%|██████████| 466/466 [00:03<00:00, 118.46it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.80it/s]

epoch 17, loss 6.0395, acc 0.1546


100%|██████████| 466/466 [00:03<00:00, 119.37it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.17it/s]

epoch 18, loss 5.9672, acc 0.1583


100%|██████████| 466/466 [00:03<00:00, 127.00it/s]
  2%|▏         | 10/466 [00:00<00:04, 92.60it/s]

epoch 19, loss 5.8729, acc 0.1678


100%|██████████| 466/466 [00:04<00:00, 106.68it/s]
  2%|▏         | 9/466 [00:00<00:05, 85.88it/s]

epoch 20, loss 5.8060, acc 0.1746


100%|██████████| 466/466 [00:04<00:00, 102.74it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.55it/s]

epoch 21, loss 5.7446, acc 0.1812


100%|██████████| 466/466 [00:04<00:00, 113.02it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.74it/s]

epoch 22, loss 5.6921, acc 0.1863


100%|██████████| 466/466 [00:04<00:00, 115.83it/s]
  2%|▏         | 11/466 [00:00<00:04, 105.32it/s]

epoch 23, loss 5.6420, acc 0.1924


100%|██████████| 466/466 [00:03<00:00, 118.07it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.94it/s]

epoch 24, loss 5.6020, acc 0.1960


100%|██████████| 466/466 [00:03<00:00, 125.25it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.10it/s]

epoch 25, loss 5.5517, acc 0.1998


100%|██████████| 466/466 [00:03<00:00, 117.76it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.82it/s]

epoch 26, loss 5.5107, acc 0.2025


100%|██████████| 466/466 [00:03<00:00, 117.45it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.74it/s]

epoch 27, loss 5.4694, acc 0.2061


100%|██████████| 466/466 [00:04<00:00, 113.51it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.16it/s]

epoch 28, loss 5.4243, acc 0.2087


100%|██████████| 466/466 [00:03<00:00, 122.82it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.43it/s]

epoch 29, loss 5.3893, acc 0.2110


100%|██████████| 466/466 [00:04<00:00, 116.22it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.15it/s]

epoch 30, loss 5.3448, acc 0.2150


100%|██████████| 466/466 [00:03<00:00, 125.44it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.72it/s]

epoch 31, loss 5.3099, acc 0.2167


100%|██████████| 466/466 [00:03<00:00, 123.76it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.94it/s]

epoch 32, loss 5.2694, acc 0.2205


100%|██████████| 466/466 [00:04<00:00, 110.64it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.92it/s]

epoch 33, loss 5.2374, acc 0.2225


100%|██████████| 466/466 [00:03<00:00, 125.55it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.53it/s]

epoch 34, loss 5.2079, acc 0.2241


100%|██████████| 466/466 [00:03<00:00, 119.12it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.64it/s]

epoch 35, loss 5.1709, acc 0.2272


100%|██████████| 466/466 [00:03<00:00, 117.07it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.66it/s]

epoch 36, loss 5.1432, acc 0.2289


100%|██████████| 466/466 [00:03<00:00, 117.91it/s]
  3%|▎         | 12/466 [00:00<00:04, 111.51it/s]

epoch 37, loss 5.1108, acc 0.2316


100%|██████████| 466/466 [00:03<00:00, 122.61it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.03it/s]

epoch 38, loss 5.0839, acc 0.2336


100%|██████████| 466/466 [00:03<00:00, 118.73it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.83it/s]

epoch 39, loss 5.0548, acc 0.2354


100%|██████████| 466/466 [00:03<00:00, 120.36it/s]
  2%|▏         | 10/466 [00:00<00:04, 96.94it/s]

epoch 40, loss 5.0253, acc 0.2366


100%|██████████| 466/466 [00:03<00:00, 118.76it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.93it/s]

epoch 41, loss 5.0009, acc 0.2389


100%|██████████| 466/466 [00:04<00:00, 113.42it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.44it/s]

epoch 42, loss 4.9740, acc 0.2410


100%|██████████| 466/466 [00:04<00:00, 111.54it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.71it/s]

epoch 43, loss 4.9476, acc 0.2431


100%|██████████| 466/466 [00:03<00:00, 122.24it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.49it/s]

epoch 44, loss 4.9194, acc 0.2448


100%|██████████| 466/466 [00:04<00:00, 114.15it/s]
  2%|▏         | 11/466 [00:00<00:04, 109.20it/s]

epoch 45, loss 4.8978, acc 0.2463


100%|██████████| 466/466 [00:03<00:00, 120.50it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.58it/s]

epoch 46, loss 4.8737, acc 0.2480


100%|██████████| 466/466 [00:03<00:00, 119.58it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.88it/s]

epoch 47, loss 4.8465, acc 0.2504


100%|██████████| 466/466 [00:03<00:00, 117.86it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.67it/s]

epoch 48, loss 4.8226, acc 0.2524


100%|██████████| 466/466 [00:03<00:00, 121.59it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.93it/s]

epoch 49, loss 4.7988, acc 0.2537


100%|██████████| 466/466 [00:04<00:00, 112.75it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.09it/s]

epoch 50, loss 4.7800, acc 0.2544


100%|██████████| 466/466 [00:03<00:00, 120.84it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.02it/s]

epoch 51, loss 4.7528, acc 0.2566


100%|██████████| 466/466 [00:04<00:00, 115.12it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.03it/s]

epoch 52, loss 4.7363, acc 0.2572


100%|██████████| 466/466 [00:03<00:00, 118.15it/s]
  3%|▎         | 12/466 [00:00<00:04, 110.10it/s]

epoch 53, loss 4.7042, acc 0.2601


100%|██████████| 466/466 [00:04<00:00, 116.44it/s]
  0%|          | 0/466 [00:00<?, ?it/s]

epoch 54, loss 4.6854, acc 0.2618


100%|██████████| 466/466 [00:04<00:00, 110.56it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.37it/s]

epoch 55, loss 4.6595, acc 0.2643


100%|██████████| 466/466 [00:03<00:00, 120.66it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.38it/s]

epoch 56, loss 4.6373, acc 0.2655


100%|██████████| 466/466 [00:03<00:00, 124.48it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.50it/s]

epoch 57, loss 4.6159, acc 0.2671


100%|██████████| 466/466 [00:03<00:00, 117.29it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.59it/s]

epoch 58, loss 4.6161, acc 0.2659


100%|██████████| 466/466 [00:03<00:00, 125.50it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.14it/s]

epoch 59, loss 4.5983, acc 0.2660


100%|██████████| 466/466 [00:03<00:00, 123.86it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.87it/s]

epoch 60, loss 4.5630, acc 0.2708


100%|██████████| 466/466 [00:03<00:00, 117.39it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.09it/s]

epoch 61, loss 4.5341, acc 0.2732


100%|██████████| 466/466 [00:03<00:00, 123.36it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.54it/s]

epoch 62, loss 4.5085, acc 0.2757


100%|██████████| 466/466 [00:03<00:00, 120.14it/s]
  2%|▏         | 10/466 [00:00<00:04, 91.84it/s]

epoch 63, loss 4.4930, acc 0.2768


100%|██████████| 466/466 [00:04<00:00, 105.94it/s]
  2%|▏         | 11/466 [00:00<00:04, 106.65it/s]

epoch 64, loss 4.4677, acc 0.2785


100%|██████████| 466/466 [00:03<00:00, 120.98it/s]
  3%|▎         | 12/466 [00:00<00:03, 118.60it/s]

epoch 65, loss 4.4479, acc 0.2799


100%|██████████| 466/466 [00:03<00:00, 122.17it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.83it/s]

epoch 66, loss 4.4300, acc 0.2810


100%|██████████| 466/466 [00:03<00:00, 120.20it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.74it/s]

epoch 67, loss 4.4108, acc 0.2827


100%|██████████| 466/466 [00:04<00:00, 112.20it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.44it/s]

epoch 68, loss 4.3889, acc 0.2844


100%|██████████| 466/466 [00:04<00:00, 115.94it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.61it/s]

epoch 69, loss 4.3741, acc 0.2843


100%|██████████| 466/466 [00:03<00:00, 116.78it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.04it/s]

epoch 70, loss 4.3569, acc 0.2868


100%|██████████| 466/466 [00:04<00:00, 111.43it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.25it/s]

epoch 71, loss 4.3336, acc 0.2881


100%|██████████| 466/466 [00:03<00:00, 119.35it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.34it/s]

epoch 72, loss 4.3148, acc 0.2897


100%|██████████| 466/466 [00:04<00:00, 113.99it/s]
  2%|▏         | 11/466 [00:00<00:04, 100.92it/s]

epoch 73, loss 4.2945, acc 0.2916


100%|██████████| 466/466 [00:04<00:00, 115.69it/s]
  3%|▎         | 12/466 [00:00<00:03, 118.98it/s]

epoch 74, loss 4.2799, acc 0.2923


100%|██████████| 466/466 [00:03<00:00, 120.15it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.68it/s]

epoch 75, loss 4.2572, acc 0.2952


100%|██████████| 466/466 [00:04<00:00, 115.27it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.97it/s]

epoch 76, loss 4.2863, acc 0.2905


100%|██████████| 466/466 [00:03<00:00, 122.07it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.15it/s]

epoch 77, loss 4.2389, acc 0.2960


100%|██████████| 466/466 [00:03<00:00, 116.77it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.89it/s]

epoch 78, loss 4.2072, acc 0.2989


100%|██████████| 466/466 [00:03<00:00, 120.68it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.71it/s]

epoch 79, loss 4.1916, acc 0.3005


100%|██████████| 466/466 [00:03<00:00, 120.16it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.76it/s]

epoch 80, loss 4.1729, acc 0.3012


100%|██████████| 466/466 [00:03<00:00, 120.19it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.70it/s]

epoch 81, loss 4.1532, acc 0.3033


100%|██████████| 466/466 [00:04<00:00, 106.68it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.18it/s]

epoch 82, loss 4.1391, acc 0.3046


100%|██████████| 466/466 [00:04<00:00, 113.20it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.62it/s]

epoch 83, loss 4.1154, acc 0.3065


100%|██████████| 466/466 [00:03<00:00, 122.06it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.57it/s]

epoch 84, loss 4.0973, acc 0.3078


100%|██████████| 466/466 [00:03<00:00, 117.31it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.40it/s]

epoch 85, loss 4.0802, acc 0.3101


100%|██████████| 466/466 [00:03<00:00, 117.75it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.79it/s]

epoch 86, loss 4.0651, acc 0.3110


100%|██████████| 466/466 [00:03<00:00, 117.74it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.65it/s]

epoch 87, loss 4.0473, acc 0.3124


100%|██████████| 466/466 [00:03<00:00, 119.83it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.53it/s]

epoch 88, loss 4.0271, acc 0.3140


100%|██████████| 466/466 [00:03<00:00, 122.13it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.52it/s]

epoch 89, loss 4.0086, acc 0.3165


100%|██████████| 466/466 [00:03<00:00, 122.02it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.73it/s]

epoch 90, loss 3.9908, acc 0.3183


100%|██████████| 466/466 [00:03<00:00, 117.08it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.48it/s]

epoch 91, loss 3.9765, acc 0.3185


100%|██████████| 466/466 [00:03<00:00, 117.58it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.73it/s]

epoch 92, loss 3.9799, acc 0.3182


100%|██████████| 466/466 [00:03<00:00, 122.33it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.80it/s]

epoch 93, loss 3.9462, acc 0.3219


100%|██████████| 466/466 [00:03<00:00, 119.22it/s]
  3%|▎         | 12/466 [00:00<00:03, 118.23it/s]

epoch 94, loss 3.9259, acc 0.3242


100%|██████████| 466/466 [00:04<00:00, 115.26it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.58it/s]

epoch 95, loss 3.9062, acc 0.3265


100%|██████████| 466/466 [00:04<00:00, 113.79it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.98it/s]

epoch 96, loss 3.8936, acc 0.3265


100%|██████████| 466/466 [00:03<00:00, 117.89it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.38it/s]

epoch 97, loss 3.8759, acc 0.3292


100%|██████████| 466/466 [00:03<00:00, 117.33it/s]
  2%|▏         | 11/466 [00:00<00:04, 105.43it/s]

epoch 98, loss 3.8576, acc 0.3305


100%|██████████| 466/466 [00:03<00:00, 117.63it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.21it/s]

epoch 99, loss 3.8423, acc 0.3317


100%|██████████| 466/466 [00:04<00:00, 108.98it/s]
  2%|▏         | 11/466 [00:00<00:04, 109.16it/s]

epoch 100, loss 3.8237, acc 0.3342


100%|██████████| 466/466 [00:03<00:00, 121.58it/s]
  2%|▏         | 10/466 [00:00<00:04, 92.99it/s]

epoch 101, loss 3.8073, acc 0.3350


100%|██████████| 466/466 [00:04<00:00, 112.65it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.52it/s]

epoch 102, loss 3.7916, acc 0.3370


100%|██████████| 466/466 [00:03<00:00, 118.51it/s]
  3%|▎         | 12/466 [00:00<00:03, 115.07it/s]

epoch 103, loss 3.7783, acc 0.3383


100%|██████████| 466/466 [00:03<00:00, 119.55it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.85it/s]

epoch 104, loss 3.7598, acc 0.3402


100%|██████████| 466/466 [00:04<00:00, 114.17it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.34it/s]

epoch 105, loss 3.7410, acc 0.3421


100%|██████████| 466/466 [00:03<00:00, 120.44it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.03it/s]

epoch 106, loss 3.7254, acc 0.3430


100%|██████████| 466/466 [00:04<00:00, 115.80it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.15it/s]

epoch 107, loss 3.7087, acc 0.3457


100%|██████████| 466/466 [00:03<00:00, 122.86it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.44it/s]

epoch 108, loss 3.6874, acc 0.3482


100%|██████████| 466/466 [00:04<00:00, 112.33it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.69it/s]

epoch 109, loss 3.6852, acc 0.3467


100%|██████████| 466/466 [00:03<00:00, 122.48it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.36it/s]

epoch 110, loss 3.6598, acc 0.3512


100%|██████████| 466/466 [00:03<00:00, 117.21it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.12it/s]

epoch 111, loss 3.6409, acc 0.3526


100%|██████████| 466/466 [00:03<00:00, 120.07it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.15it/s]

epoch 112, loss 3.6267, acc 0.3548


100%|██████████| 466/466 [00:03<00:00, 116.86it/s]
  2%|▏         | 11/466 [00:00<00:04, 101.14it/s]

epoch 113, loss 3.6117, acc 0.3556


100%|██████████| 466/466 [00:03<00:00, 117.03it/s]
  2%|▏         | 11/466 [00:00<00:04, 106.13it/s]

epoch 114, loss 3.6017, acc 0.3572


100%|██████████| 466/466 [00:04<00:00, 113.36it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.62it/s]

epoch 115, loss 3.5819, acc 0.3588


100%|██████████| 466/466 [00:03<00:00, 118.60it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.84it/s]

epoch 116, loss 3.5666, acc 0.3606


100%|██████████| 466/466 [00:04<00:00, 113.16it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.31it/s]

epoch 117, loss 3.5467, acc 0.3630


100%|██████████| 466/466 [00:04<00:00, 115.24it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.82it/s]

epoch 118, loss 3.5310, acc 0.3648


100%|██████████| 466/466 [00:04<00:00, 113.22it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.10it/s]

epoch 119, loss 3.5178, acc 0.3666


100%|██████████| 466/466 [00:03<00:00, 117.63it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.10it/s]

epoch 120, loss 3.4996, acc 0.3678


100%|██████████| 466/466 [00:03<00:00, 116.71it/s]
  3%|▎         | 12/466 [00:00<00:03, 118.28it/s]

epoch 121, loss 3.4815, acc 0.3699


100%|██████████| 466/466 [00:03<00:00, 117.89it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.78it/s]

epoch 122, loss 3.4719, acc 0.3713


100%|██████████| 466/466 [00:03<00:00, 120.34it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.54it/s]

epoch 123, loss 3.4496, acc 0.3748


100%|██████████| 466/466 [00:03<00:00, 123.18it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.34it/s]

epoch 124, loss 3.4369, acc 0.3754


100%|██████████| 466/466 [00:03<00:00, 119.81it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.48it/s]

epoch 125, loss 3.4182, acc 0.3778


100%|██████████| 466/466 [00:03<00:00, 122.78it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.56it/s]

epoch 126, loss 3.4111, acc 0.3771


100%|██████████| 466/466 [00:03<00:00, 120.66it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.20it/s]

epoch 127, loss 3.4434, acc 0.3701


100%|██████████| 466/466 [00:04<00:00, 108.79it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.98it/s]

epoch 128, loss 3.3888, acc 0.3798


100%|██████████| 466/466 [00:04<00:00, 115.95it/s]
  2%|▏         | 9/466 [00:00<00:05, 80.98it/s]

epoch 129, loss 3.3687, acc 0.3823


100%|██████████| 466/466 [00:04<00:00, 110.00it/s]
  2%|▏         | 10/466 [00:00<00:04, 91.74it/s]

epoch 130, loss 3.3557, acc 0.3828


100%|██████████| 466/466 [00:04<00:00, 115.48it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.85it/s]

epoch 131, loss 3.3357, acc 0.3855


100%|██████████| 466/466 [00:04<00:00, 115.35it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.22it/s]

epoch 132, loss 3.3216, acc 0.3879


100%|██████████| 466/466 [00:04<00:00, 114.67it/s]
  2%|▏         | 11/466 [00:00<00:04, 106.91it/s]

epoch 133, loss 3.3026, acc 0.3905


100%|██████████| 466/466 [00:03<00:00, 117.88it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.82it/s]

epoch 134, loss 3.2889, acc 0.3925


100%|██████████| 466/466 [00:03<00:00, 121.54it/s]
  2%|▏         | 11/466 [00:00<00:04, 106.79it/s]

epoch 135, loss 3.2859, acc 0.3917


100%|██████████| 466/466 [00:03<00:00, 118.52it/s]
  3%|▎         | 12/466 [00:00<00:04, 112.32it/s]

epoch 136, loss 3.2584, acc 0.3954


100%|██████████| 466/466 [00:04<00:00, 109.76it/s]
  3%|▎         | 12/466 [00:00<00:03, 118.77it/s]

epoch 137, loss 3.2392, acc 0.3969


100%|██████████| 466/466 [00:03<00:00, 120.81it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.93it/s]

epoch 138, loss 3.2258, acc 0.3991


100%|██████████| 466/466 [00:03<00:00, 121.19it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.61it/s]

epoch 139, loss 3.2105, acc 0.4010


100%|██████████| 466/466 [00:03<00:00, 123.87it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.70it/s]

epoch 140, loss 3.1971, acc 0.4026


100%|██████████| 466/466 [00:03<00:00, 122.57it/s]
  3%|▎         | 12/466 [00:00<00:04, 112.76it/s]

epoch 141, loss 3.1821, acc 0.4039


100%|██████████| 466/466 [00:03<00:00, 116.78it/s]
  3%|▎         | 12/466 [00:00<00:04, 113.20it/s]

epoch 142, loss 3.1625, acc 0.4075


100%|██████████| 466/466 [00:03<00:00, 120.92it/s]
  2%|▏         | 10/466 [00:00<00:04, 92.80it/s]

epoch 143, loss 3.1713, acc 0.4041


100%|██████████| 466/466 [00:04<00:00, 111.01it/s]
  2%|▏         | 11/466 [00:00<00:04, 109.54it/s]

epoch 144, loss 3.1537, acc 0.4076


100%|██████████| 466/466 [00:03<00:00, 119.94it/s]
  2%|▏         | 10/466 [00:00<00:04, 99.61it/s]

epoch 145, loss 3.1253, acc 0.4107


100%|██████████| 466/466 [00:04<00:00, 107.50it/s]
  3%|▎         | 12/466 [00:00<00:04, 111.25it/s]

epoch 146, loss 3.1082, acc 0.4143


100%|██████████| 466/466 [00:03<00:00, 120.65it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.74it/s]

epoch 147, loss 3.0911, acc 0.4160


100%|██████████| 466/466 [00:03<00:00, 118.97it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.19it/s]

epoch 148, loss 3.0801, acc 0.4168


100%|██████████| 466/466 [00:03<00:00, 120.29it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.66it/s]

epoch 149, loss 3.0674, acc 0.4174


100%|██████████| 466/466 [00:03<00:00, 124.29it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.56it/s]

epoch 150, loss 3.0458, acc 0.4220


100%|██████████| 466/466 [00:03<00:00, 125.15it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.83it/s]

epoch 151, loss 3.0304, acc 0.4238


100%|██████████| 466/466 [00:03<00:00, 124.80it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.47it/s]

epoch 152, loss 3.0139, acc 0.4255


100%|██████████| 466/466 [00:03<00:00, 119.61it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.77it/s]

epoch 153, loss 3.0043, acc 0.4275


100%|██████████| 466/466 [00:04<00:00, 112.28it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.33it/s]

epoch 154, loss 2.9915, acc 0.4277


100%|██████████| 466/466 [00:04<00:00, 110.89it/s]
  3%|▎         | 13/466 [00:00<00:03, 118.69it/s]

epoch 155, loss 2.9828, acc 0.4293


100%|██████████| 466/466 [00:03<00:00, 119.58it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.34it/s]

epoch 156, loss 2.9692, acc 0.4301


100%|██████████| 466/466 [00:03<00:00, 123.30it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.05it/s]

epoch 157, loss 2.9433, acc 0.4349


100%|██████████| 466/466 [00:03<00:00, 124.78it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.79it/s]

epoch 158, loss 2.9336, acc 0.4362


100%|██████████| 466/466 [00:03<00:00, 124.85it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.97it/s]

epoch 159, loss 2.9142, acc 0.4390


100%|██████████| 466/466 [00:03<00:00, 122.28it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.70it/s]

epoch 160, loss 2.8980, acc 0.4408


100%|██████████| 466/466 [00:03<00:00, 122.07it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.28it/s]

epoch 161, loss 2.8841, acc 0.4430


100%|██████████| 466/466 [00:03<00:00, 121.06it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.70it/s]

epoch 162, loss 2.8662, acc 0.4455


100%|██████████| 466/466 [00:03<00:00, 123.24it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.96it/s]

epoch 163, loss 2.8527, acc 0.4483


100%|██████████| 466/466 [00:04<00:00, 108.66it/s]
  3%|▎         | 12/466 [00:00<00:03, 115.40it/s]

epoch 164, loss 2.8414, acc 0.4483


100%|██████████| 466/466 [00:03<00:00, 121.98it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.49it/s]

epoch 165, loss 2.8266, acc 0.4502


100%|██████████| 466/466 [00:04<00:00, 114.12it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.22it/s]

epoch 166, loss 2.8131, acc 0.4533


100%|██████████| 466/466 [00:04<00:00, 114.81it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.92it/s]

epoch 167, loss 2.8015, acc 0.4536


100%|██████████| 466/466 [00:03<00:00, 122.17it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.81it/s]

epoch 168, loss 2.7847, acc 0.4577


100%|██████████| 466/466 [00:03<00:00, 122.08it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.76it/s]

epoch 169, loss 2.7708, acc 0.4592


100%|██████████| 466/466 [00:03<00:00, 117.89it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.51it/s]

epoch 170, loss 2.7593, acc 0.4598


100%|██████████| 466/466 [00:04<00:00, 109.52it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.43it/s]

epoch 171, loss 2.7478, acc 0.4617


100%|██████████| 466/466 [00:03<00:00, 116.54it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.77it/s]

epoch 172, loss 2.7327, acc 0.4635


100%|██████████| 466/466 [00:04<00:00, 113.55it/s]
  2%|▏         | 11/466 [00:00<00:04, 106.34it/s]

epoch 173, loss 2.7134, acc 0.4674


100%|██████████| 466/466 [00:03<00:00, 117.72it/s]
  3%|▎         | 12/466 [00:00<00:03, 118.82it/s]

epoch 174, loss 2.7042, acc 0.4689


100%|██████████| 466/466 [00:03<00:00, 119.83it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.42it/s]

epoch 175, loss 2.6955, acc 0.4696


100%|██████████| 466/466 [00:03<00:00, 117.35it/s]
  2%|▏         | 11/466 [00:00<00:04, 103.69it/s]

epoch 176, loss 2.6766, acc 0.4729


100%|██████████| 466/466 [00:03<00:00, 120.58it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.82it/s]

epoch 177, loss 2.6700, acc 0.4725


100%|██████████| 466/466 [00:04<00:00, 114.91it/s]
  2%|▏         | 8/466 [00:00<00:06, 71.73it/s]

epoch 178, loss 2.6530, acc 0.4757


100%|██████████| 466/466 [00:04<00:00, 108.20it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.43it/s]

epoch 179, loss 2.6421, acc 0.4775


100%|██████████| 466/466 [00:03<00:00, 119.61it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.84it/s]

epoch 180, loss 2.6273, acc 0.4801


100%|██████████| 466/466 [00:04<00:00, 111.96it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.51it/s]

epoch 181, loss 2.6076, acc 0.4834


100%|██████████| 466/466 [00:04<00:00, 113.69it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.61it/s]

epoch 182, loss 2.5868, acc 0.4880


100%|██████████| 466/466 [00:04<00:00, 107.30it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.65it/s]

epoch 183, loss 2.5797, acc 0.4879


100%|██████████| 466/466 [00:03<00:00, 121.19it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.52it/s]

epoch 184, loss 2.5686, acc 0.4905


100%|██████████| 466/466 [00:03<00:00, 117.20it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.45it/s]

epoch 185, loss 2.5656, acc 0.4901


100%|██████████| 466/466 [00:03<00:00, 117.25it/s]
  3%|▎         | 12/466 [00:00<00:03, 115.68it/s]

epoch 186, loss 2.5508, acc 0.4920


100%|██████████| 466/466 [00:03<00:00, 118.29it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.02it/s]

epoch 187, loss 2.5255, acc 0.4973


100%|██████████| 466/466 [00:03<00:00, 123.98it/s]
  2%|▏         | 10/466 [00:00<00:04, 96.77it/s]

epoch 188, loss 2.5141, acc 0.4999


100%|██████████| 466/466 [00:03<00:00, 121.86it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.71it/s]

epoch 189, loss 2.5036, acc 0.5001


100%|██████████| 466/466 [00:03<00:00, 122.28it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.59it/s]

epoch 190, loss 2.4847, acc 0.5047


100%|██████████| 466/466 [00:04<00:00, 114.19it/s]
  2%|▏         | 10/466 [00:00<00:04, 99.18it/s]

epoch 191, loss 2.4827, acc 0.5037


100%|██████████| 466/466 [00:04<00:00, 104.58it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.76it/s]

epoch 192, loss 2.4674, acc 0.5073


100%|██████████| 466/466 [00:03<00:00, 122.08it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.90it/s]

epoch 193, loss 2.4473, acc 0.5105


100%|██████████| 466/466 [00:04<00:00, 112.13it/s]
  3%|▎         | 12/466 [00:00<00:04, 111.07it/s]

epoch 194, loss 2.4359, acc 0.5144


100%|██████████| 466/466 [00:04<00:00, 114.97it/s]
  3%|▎         | 12/466 [00:00<00:03, 115.33it/s]

epoch 195, loss 2.4262, acc 0.5142


100%|██████████| 466/466 [00:03<00:00, 116.74it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.31it/s]

epoch 196, loss 2.4145, acc 0.5159


100%|██████████| 466/466 [00:03<00:00, 121.13it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.93it/s]

epoch 197, loss 2.3928, acc 0.5207


100%|██████████| 466/466 [00:03<00:00, 116.60it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.12it/s]

epoch 198, loss 2.3813, acc 0.5220


100%|██████████| 466/466 [00:03<00:00, 125.28it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.69it/s]

epoch 199, loss 2.3718, acc 0.5243


100%|██████████| 466/466 [00:03<00:00, 124.55it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.99it/s]

epoch 200, loss 2.3521, acc 0.5277


100%|██████████| 466/466 [00:03<00:00, 117.37it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.02it/s]

epoch 201, loss 2.3456, acc 0.5303


100%|██████████| 466/466 [00:03<00:00, 122.59it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.12it/s]

epoch 202, loss 2.3391, acc 0.5308


100%|██████████| 466/466 [00:03<00:00, 120.09it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.20it/s]

epoch 203, loss 2.3201, acc 0.5350


100%|██████████| 466/466 [00:03<00:00, 124.83it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.80it/s]

epoch 204, loss 2.3101, acc 0.5365


100%|██████████| 466/466 [00:03<00:00, 125.17it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.76it/s]

epoch 205, loss 2.3090, acc 0.5361


100%|██████████| 466/466 [00:03<00:00, 121.25it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.01it/s]

epoch 206, loss 2.2868, acc 0.5410


100%|██████████| 466/466 [00:03<00:00, 123.99it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.18it/s]

epoch 207, loss 2.2666, acc 0.5449


100%|██████████| 466/466 [00:03<00:00, 124.92it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.17it/s]

epoch 208, loss 2.2545, acc 0.5475


100%|██████████| 466/466 [00:03<00:00, 120.47it/s]
  2%|▏         | 11/466 [00:00<00:04, 101.05it/s]

epoch 209, loss 2.2458, acc 0.5491


100%|██████████| 466/466 [00:04<00:00, 107.34it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.38it/s]

epoch 210, loss 2.2315, acc 0.5516


100%|██████████| 466/466 [00:03<00:00, 122.21it/s]
  3%|▎         | 12/466 [00:00<00:03, 116.52it/s]

epoch 211, loss 2.2215, acc 0.5528


100%|██████████| 466/466 [00:04<00:00, 116.10it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.50it/s]

epoch 212, loss 2.2035, acc 0.5584


100%|██████████| 466/466 [00:03<00:00, 125.36it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.91it/s]

epoch 213, loss 2.1952, acc 0.5591


100%|██████████| 466/466 [00:03<00:00, 120.58it/s]
  2%|▏         | 11/466 [00:00<00:04, 102.40it/s]

epoch 214, loss 2.1771, acc 0.5637


100%|██████████| 466/466 [00:03<00:00, 117.27it/s]
  2%|▏         | 10/466 [00:00<00:04, 95.34it/s]

epoch 215, loss 2.2188, acc 0.5513


100%|██████████| 466/466 [00:04<00:00, 112.33it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.34it/s]

epoch 216, loss 2.1730, acc 0.5627


100%|██████████| 466/466 [00:03<00:00, 122.52it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.95it/s]

epoch 217, loss 2.1463, acc 0.5688


100%|██████████| 466/466 [00:03<00:00, 124.82it/s]
  3%|▎         | 13/466 [00:00<00:03, 127.36it/s]

epoch 218, loss 2.1335, acc 0.5717


100%|██████████| 466/466 [00:03<00:00, 118.80it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.28it/s]

epoch 219, loss 2.1235, acc 0.5737


100%|██████████| 466/466 [00:03<00:00, 117.17it/s]
  2%|▏         | 11/466 [00:00<00:04, 100.91it/s]

epoch 220, loss 2.1175, acc 0.5748


100%|██████████| 466/466 [00:03<00:00, 116.89it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.30it/s]

epoch 221, loss 2.0981, acc 0.5792


100%|██████████| 466/466 [00:04<00:00, 115.09it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.19it/s]

epoch 222, loss 2.0884, acc 0.5812


100%|██████████| 466/466 [00:03<00:00, 120.38it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.76it/s]

epoch 223, loss 2.0921, acc 0.5795


100%|██████████| 466/466 [00:04<00:00, 115.01it/s]
  2%|▏         | 11/466 [00:00<00:04, 109.13it/s]

epoch 224, loss 2.0701, acc 0.5847


100%|██████████| 466/466 [00:03<00:00, 119.74it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.56it/s]

epoch 225, loss 2.0589, acc 0.5863


100%|██████████| 466/466 [00:03<00:00, 122.92it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.75it/s]

epoch 226, loss 2.0444, acc 0.5902


100%|██████████| 466/466 [00:03<00:00, 117.77it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.68it/s]

epoch 227, loss 2.0426, acc 0.5895


100%|██████████| 466/466 [00:04<00:00, 113.57it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.86it/s]

epoch 228, loss 2.0326, acc 0.5915


100%|██████████| 466/466 [00:03<00:00, 124.52it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.35it/s]

epoch 229, loss 2.0082, acc 0.5966


100%|██████████| 466/466 [00:03<00:00, 118.68it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.50it/s]

epoch 230, loss 1.9908, acc 0.6028


100%|██████████| 466/466 [00:04<00:00, 115.74it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.06it/s]

epoch 231, loss 1.9911, acc 0.6016


100%|██████████| 466/466 [00:03<00:00, 122.33it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.30it/s]

epoch 232, loss 1.9657, acc 0.6076


100%|██████████| 466/466 [00:03<00:00, 122.16it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.38it/s]

epoch 233, loss 1.9638, acc 0.6073


100%|██████████| 466/466 [00:04<00:00, 114.33it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.92it/s]

epoch 234, loss 1.9719, acc 0.6035


100%|██████████| 466/466 [00:03<00:00, 118.81it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.13it/s]

epoch 235, loss 1.9399, acc 0.6132


100%|██████████| 466/466 [00:03<00:00, 124.57it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.02it/s]

epoch 236, loss 1.9426, acc 0.6106


100%|██████████| 466/466 [00:03<00:00, 119.57it/s]
  2%|▏         | 10/466 [00:00<00:04, 96.15it/s]

epoch 237, loss 1.9460, acc 0.6089


100%|██████████| 466/466 [00:04<00:00, 113.78it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.19it/s]

epoch 238, loss 1.9274, acc 0.6150


100%|██████████| 466/466 [00:03<00:00, 120.92it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.97it/s]

epoch 239, loss 1.9057, acc 0.6191


100%|██████████| 466/466 [00:03<00:00, 121.96it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.33it/s]

epoch 240, loss 1.8927, acc 0.6217


100%|██████████| 466/466 [00:03<00:00, 118.74it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.68it/s]

epoch 241, loss 1.8999, acc 0.6201


100%|██████████| 466/466 [00:03<00:00, 119.78it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.44it/s]

epoch 242, loss 1.8747, acc 0.6257


100%|██████████| 466/466 [00:03<00:00, 118.71it/s]
  3%|▎         | 12/466 [00:00<00:03, 118.20it/s]

epoch 243, loss 1.8619, acc 0.6275


100%|██████████| 466/466 [00:03<00:00, 117.78it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.03it/s]

epoch 244, loss 1.8559, acc 0.6296


100%|██████████| 466/466 [00:04<00:00, 115.37it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.98it/s]

epoch 245, loss 1.8352, acc 0.6351


100%|██████████| 466/466 [00:04<00:00, 115.15it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.17it/s]

epoch 246, loss 1.8255, acc 0.6371


100%|██████████| 466/466 [00:04<00:00, 109.58it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.15it/s]

epoch 247, loss 1.8187, acc 0.6385


100%|██████████| 466/466 [00:03<00:00, 122.84it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.42it/s]

epoch 248, loss 1.8095, acc 0.6412


100%|██████████| 466/466 [00:03<00:00, 124.40it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.55it/s]

epoch 249, loss 1.8102, acc 0.6391


100%|██████████| 466/466 [00:04<00:00, 112.61it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.76it/s]

epoch 250, loss 1.7805, acc 0.6481


100%|██████████| 466/466 [00:04<00:00, 116.48it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.24it/s]

epoch 251, loss 1.7875, acc 0.6436


100%|██████████| 466/466 [00:03<00:00, 119.36it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.33it/s]

epoch 252, loss 1.7623, acc 0.6516


100%|██████████| 466/466 [00:03<00:00, 124.96it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.12it/s]

epoch 253, loss 1.7559, acc 0.6522


100%|██████████| 466/466 [00:03<00:00, 120.68it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.08it/s]

epoch 254, loss 1.7434, acc 0.6558


100%|██████████| 466/466 [00:03<00:00, 124.75it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.76it/s]

epoch 255, loss 1.7360, acc 0.6569


100%|██████████| 466/466 [00:03<00:00, 117.02it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.63it/s]

epoch 256, loss 1.7905, acc 0.6405


100%|██████████| 466/466 [00:03<00:00, 121.10it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.85it/s]

epoch 257, loss 1.7197, acc 0.6601


100%|██████████| 466/466 [00:03<00:00, 120.23it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.01it/s]

epoch 258, loss 1.7194, acc 0.6596


100%|██████████| 466/466 [00:03<00:00, 119.18it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.52it/s]

epoch 259, loss 1.7056, acc 0.6625


100%|██████████| 466/466 [00:03<00:00, 118.49it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.73it/s]

epoch 260, loss 1.6886, acc 0.6678


100%|██████████| 466/466 [00:03<00:00, 119.40it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.29it/s]

epoch 261, loss 1.6812, acc 0.6690


100%|██████████| 466/466 [00:03<00:00, 125.15it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.10it/s]

epoch 262, loss 1.6622, acc 0.6743


100%|██████████| 466/466 [00:03<00:00, 119.32it/s]
  3%|▎         | 12/466 [00:00<00:03, 116.77it/s]

epoch 263, loss 1.6553, acc 0.6746


100%|██████████| 466/466 [00:04<00:00, 116.13it/s]
  3%|▎         | 12/466 [00:00<00:04, 113.28it/s]

epoch 264, loss 1.6495, acc 0.6763


100%|██████████| 466/466 [00:04<00:00, 114.41it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.08it/s]

epoch 265, loss 1.6339, acc 0.6800


100%|██████████| 466/466 [00:03<00:00, 121.21it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.93it/s]

epoch 266, loss 1.6313, acc 0.6802


100%|██████████| 466/466 [00:03<00:00, 123.09it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.31it/s]

epoch 267, loss 1.6203, acc 0.6826


100%|██████████| 466/466 [00:03<00:00, 119.07it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.94it/s]

epoch 268, loss 1.6060, acc 0.6857


100%|██████████| 466/466 [00:03<00:00, 119.80it/s]
  2%|▏         | 11/466 [00:00<00:04, 101.95it/s]

epoch 269, loss 1.5939, acc 0.6901


100%|██████████| 466/466 [00:03<00:00, 118.22it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.85it/s]

epoch 270, loss 1.5906, acc 0.6895


100%|██████████| 466/466 [00:04<00:00, 115.80it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.26it/s]

epoch 271, loss 1.5818, acc 0.6913


100%|██████████| 466/466 [00:03<00:00, 119.79it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.34it/s]

epoch 272, loss 1.5702, acc 0.6943


100%|██████████| 466/466 [00:03<00:00, 118.64it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.85it/s]

epoch 273, loss 1.5671, acc 0.6946


100%|██████████| 466/466 [00:03<00:00, 117.20it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.64it/s]

epoch 274, loss 1.5536, acc 0.6987


100%|██████████| 466/466 [00:03<00:00, 121.43it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.89it/s]

epoch 275, loss 1.5487, acc 0.6998


100%|██████████| 466/466 [00:03<00:00, 118.13it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.85it/s]

epoch 276, loss 1.5250, acc 0.7057


100%|██████████| 466/466 [00:04<00:00, 116.13it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.14it/s]

epoch 277, loss 1.5289, acc 0.7047


100%|██████████| 466/466 [00:04<00:00, 116.25it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.42it/s]

epoch 278, loss 1.5276, acc 0.7036


100%|██████████| 466/466 [00:03<00:00, 120.51it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.85it/s]

epoch 279, loss 1.5165, acc 0.7060


100%|██████████| 466/466 [00:03<00:00, 119.71it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.60it/s]

epoch 280, loss 1.5139, acc 0.7060


100%|██████████| 466/466 [00:03<00:00, 120.05it/s]
  2%|▏         | 9/466 [00:00<00:05, 82.51it/s]

epoch 281, loss 1.4949, acc 0.7111


100%|██████████| 466/466 [00:04<00:00, 112.04it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.27it/s]

epoch 282, loss 1.4697, acc 0.7193


100%|██████████| 466/466 [00:04<00:00, 112.62it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.66it/s]

epoch 283, loss 1.4684, acc 0.7178


100%|██████████| 466/466 [00:03<00:00, 119.01it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.17it/s]

epoch 284, loss 1.4580, acc 0.7198


100%|██████████| 466/466 [00:03<00:00, 124.42it/s]
  2%|▏         | 10/466 [00:00<00:04, 98.82it/s]

epoch 285, loss 1.4454, acc 0.7237


100%|██████████| 466/466 [00:04<00:00, 113.80it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.53it/s]

epoch 286, loss 1.4460, acc 0.7225


100%|██████████| 466/466 [00:03<00:00, 119.30it/s]
  3%|▎         | 12/466 [00:00<00:03, 115.59it/s]

epoch 287, loss 1.4272, acc 0.7276


100%|██████████| 466/466 [00:04<00:00, 114.82it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.38it/s]

epoch 288, loss 1.4302, acc 0.7282


100%|██████████| 466/466 [00:04<00:00, 113.38it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.16it/s]

epoch 289, loss 1.4276, acc 0.7260


100%|██████████| 466/466 [00:03<00:00, 116.89it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.69it/s]

epoch 290, loss 1.4163, acc 0.7297


100%|██████████| 466/466 [00:03<00:00, 119.29it/s]
  3%|▎         | 12/466 [00:00<00:04, 111.33it/s]

epoch 291, loss 1.4281, acc 0.7244


100%|██████████| 466/466 [00:04<00:00, 113.33it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.64it/s]

epoch 292, loss 1.4140, acc 0.7293


100%|██████████| 466/466 [00:03<00:00, 117.33it/s]
  3%|▎         | 12/466 [00:00<00:04, 112.54it/s]

epoch 293, loss 1.3990, acc 0.7333


100%|██████████| 466/466 [00:03<00:00, 118.67it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.09it/s]

epoch 294, loss 1.3857, acc 0.7357


100%|██████████| 466/466 [00:04<00:00, 114.18it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.09it/s]

epoch 295, loss 1.4031, acc 0.7310


100%|██████████| 466/466 [00:04<00:00, 111.35it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.70it/s]

epoch 296, loss 1.3805, acc 0.7360


100%|██████████| 466/466 [00:03<00:00, 118.11it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.04it/s]

epoch 297, loss 1.3515, acc 0.7456


100%|██████████| 466/466 [00:03<00:00, 123.88it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.40it/s]

epoch 298, loss 1.3401, acc 0.7490


100%|██████████| 466/466 [00:03<00:00, 124.78it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.16it/s]

epoch 299, loss 1.3468, acc 0.7445


100%|██████████| 466/466 [00:03<00:00, 119.38it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.73it/s]

epoch 300, loss 1.3483, acc 0.7438


100%|██████████| 466/466 [00:03<00:00, 121.91it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.83it/s]

epoch 301, loss 1.3269, acc 0.7501


100%|██████████| 466/466 [00:03<00:00, 118.12it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.05it/s]

epoch 302, loss 1.3178, acc 0.7513


100%|██████████| 466/466 [00:03<00:00, 124.40it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.32it/s]

epoch 303, loss 1.3091, acc 0.7544


100%|██████████| 466/466 [00:03<00:00, 124.88it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.11it/s]

epoch 304, loss 1.2994, acc 0.7569


100%|██████████| 466/466 [00:04<00:00, 113.10it/s]
  2%|▏         | 11/466 [00:00<00:04, 102.69it/s]

epoch 305, loss 1.3065, acc 0.7529


100%|██████████| 466/466 [00:04<00:00, 113.47it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.62it/s]

epoch 306, loss 1.2984, acc 0.7543


100%|██████████| 466/466 [00:04<00:00, 111.67it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.63it/s]

epoch 307, loss 1.2789, acc 0.7620


100%|██████████| 466/466 [00:03<00:00, 124.54it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.96it/s]

epoch 308, loss 1.2641, acc 0.7658


100%|██████████| 466/466 [00:03<00:00, 119.28it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.43it/s]

epoch 309, loss 1.2610, acc 0.7645


100%|██████████| 466/466 [00:04<00:00, 116.30it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.61it/s]

epoch 310, loss 1.2514, acc 0.7681


100%|██████████| 466/466 [00:04<00:00, 107.50it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.55it/s]

epoch 311, loss 1.2481, acc 0.7689


100%|██████████| 466/466 [00:03<00:00, 117.84it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.40it/s]

epoch 312, loss 1.2407, acc 0.7699


100%|██████████| 466/466 [00:03<00:00, 119.15it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.36it/s]

epoch 313, loss 1.2346, acc 0.7705


100%|██████████| 466/466 [00:03<00:00, 116.67it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.96it/s]

epoch 314, loss 1.2282, acc 0.7727


100%|██████████| 466/466 [00:03<00:00, 121.56it/s]
  3%|▎         | 12/466 [00:00<00:03, 116.86it/s]

epoch 315, loss 1.2171, acc 0.7759


100%|██████████| 466/466 [00:04<00:00, 110.98it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.24it/s]

epoch 316, loss 1.2212, acc 0.7728


100%|██████████| 466/466 [00:03<00:00, 117.72it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.40it/s]

epoch 317, loss 1.2054, acc 0.7785


100%|██████████| 466/466 [00:03<00:00, 116.79it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.69it/s]

epoch 318, loss 1.1911, acc 0.7821


100%|██████████| 466/466 [00:03<00:00, 117.63it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.33it/s]

epoch 319, loss 1.2346, acc 0.7673


100%|██████████| 466/466 [00:04<00:00, 115.14it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.99it/s]

epoch 320, loss 1.2276, acc 0.7684


100%|██████████| 466/466 [00:03<00:00, 116.85it/s]
  2%|▏         | 11/466 [00:00<00:04, 109.09it/s]

epoch 321, loss 1.2032, acc 0.7760


100%|██████████| 466/466 [00:04<00:00, 110.47it/s]
  2%|▏         | 11/466 [00:00<00:04, 100.65it/s]

epoch 322, loss 1.3311, acc 0.7338


100%|██████████| 466/466 [00:03<00:00, 118.04it/s]
  3%|▎         | 12/466 [00:00<00:04, 113.39it/s]

epoch 323, loss 1.2513, acc 0.7573


100%|██████████| 466/466 [00:04<00:00, 114.53it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.79it/s]

epoch 324, loss 1.2384, acc 0.7623


100%|██████████| 466/466 [00:03<00:00, 123.99it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.79it/s]

epoch 325, loss 1.1960, acc 0.7751


100%|██████████| 466/466 [00:03<00:00, 122.78it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.65it/s]

epoch 326, loss 1.1703, acc 0.7835


100%|██████████| 466/466 [00:03<00:00, 117.70it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.06it/s]

epoch 327, loss 1.1653, acc 0.7846


100%|██████████| 466/466 [00:03<00:00, 117.68it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.85it/s]

epoch 328, loss 1.1614, acc 0.7850


100%|██████████| 466/466 [00:04<00:00, 102.68it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.32it/s]

epoch 329, loss 1.1847, acc 0.7764


100%|██████████| 466/466 [00:04<00:00, 109.64it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.16it/s]

epoch 330, loss 1.1771, acc 0.7795


100%|██████████| 466/466 [00:04<00:00, 115.21it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.36it/s]

epoch 331, loss 1.1315, acc 0.7932


100%|██████████| 466/466 [00:03<00:00, 117.73it/s]
  3%|▎         | 12/466 [00:00<00:03, 115.82it/s]

epoch 332, loss 1.1285, acc 0.7949


100%|██████████| 466/466 [00:04<00:00, 115.06it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.77it/s]

epoch 333, loss 1.0917, acc 0.8047


100%|██████████| 466/466 [00:04<00:00, 112.27it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.68it/s]

epoch 334, loss 1.0858, acc 0.8050


100%|██████████| 466/466 [00:03<00:00, 122.26it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.93it/s]

epoch 335, loss 1.1019, acc 0.8002


100%|██████████| 466/466 [00:04<00:00, 113.60it/s]
  3%|▎         | 12/466 [00:00<00:03, 117.61it/s]

epoch 336, loss 1.1071, acc 0.7979


100%|██████████| 466/466 [00:03<00:00, 119.12it/s]
  3%|▎         | 12/466 [00:00<00:04, 113.09it/s]

epoch 337, loss 1.0858, acc 0.8035


100%|██████████| 466/466 [00:04<00:00, 112.83it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.46it/s]

epoch 338, loss 1.0776, acc 0.8060


100%|██████████| 466/466 [00:03<00:00, 116.61it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.29it/s]

epoch 339, loss 1.0727, acc 0.8061


100%|██████████| 466/466 [00:03<00:00, 120.81it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.74it/s]

epoch 340, loss 1.0789, acc 0.8051


100%|██████████| 466/466 [00:04<00:00, 115.79it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.18it/s]

epoch 341, loss 1.0839, acc 0.8013


100%|██████████| 466/466 [00:03<00:00, 117.36it/s]
  3%|▎         | 12/466 [00:00<00:04, 112.35it/s]

epoch 342, loss 1.0996, acc 0.7976


100%|██████████| 466/466 [00:04<00:00, 112.55it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.81it/s]

epoch 343, loss 1.1197, acc 0.7904


100%|██████████| 466/466 [00:04<00:00, 113.51it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.03it/s]

epoch 344, loss 1.0672, acc 0.8056


100%|██████████| 466/466 [00:04<00:00, 114.16it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.97it/s]

epoch 345, loss 1.0475, acc 0.8109


100%|██████████| 466/466 [00:04<00:00, 114.11it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.90it/s]

epoch 346, loss 1.0342, acc 0.8147


100%|██████████| 466/466 [00:03<00:00, 118.29it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.29it/s]

epoch 347, loss 1.0240, acc 0.8184


100%|██████████| 466/466 [00:04<00:00, 115.94it/s]
  2%|▏         | 11/466 [00:00<00:04, 103.54it/s]

epoch 348, loss 1.0396, acc 0.8122


100%|██████████| 466/466 [00:03<00:00, 119.86it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.69it/s]

epoch 349, loss 1.0132, acc 0.8209


100%|██████████| 466/466 [00:03<00:00, 120.01it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.17it/s]

epoch 350, loss 1.0231, acc 0.8165


100%|██████████| 466/466 [00:03<00:00, 124.83it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.14it/s]

epoch 351, loss 0.9931, acc 0.8260


100%|██████████| 466/466 [00:03<00:00, 121.23it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.89it/s]

epoch 352, loss 1.0067, acc 0.8209


100%|██████████| 466/466 [00:03<00:00, 123.65it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.55it/s]

epoch 353, loss 0.9855, acc 0.8277


100%|██████████| 466/466 [00:03<00:00, 122.32it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.91it/s]

epoch 354, loss 0.9813, acc 0.8281


100%|██████████| 466/466 [00:04<00:00, 116.18it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.43it/s]

epoch 355, loss 0.9931, acc 0.8228


100%|██████████| 466/466 [00:04<00:00, 113.73it/s]
  2%|▏         | 10/466 [00:00<00:04, 93.76it/s]

epoch 356, loss 0.9770, acc 0.8281


100%|██████████| 466/466 [00:04<00:00, 114.40it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.76it/s]

epoch 357, loss 0.9752, acc 0.8279


100%|██████████| 466/466 [00:03<00:00, 124.40it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.92it/s]

epoch 358, loss 0.9763, acc 0.8278


100%|██████████| 466/466 [00:03<00:00, 124.83it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.26it/s]

epoch 359, loss 0.9652, acc 0.8301


100%|██████████| 466/466 [00:03<00:00, 124.18it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.01it/s]

epoch 360, loss 0.9491, acc 0.8355


100%|██████████| 466/466 [00:03<00:00, 125.11it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.52it/s]

epoch 361, loss 0.9543, acc 0.8317


100%|██████████| 466/466 [00:03<00:00, 125.12it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.80it/s]

epoch 362, loss 0.9375, acc 0.8374


100%|██████████| 466/466 [00:04<00:00, 116.27it/s]
  3%|▎         | 12/466 [00:00<00:03, 116.83it/s]

epoch 363, loss 0.9522, acc 0.8320


100%|██████████| 466/466 [00:04<00:00, 112.94it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.68it/s]

epoch 364, loss 0.9887, acc 0.8208


100%|██████████| 466/466 [00:03<00:00, 124.82it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.66it/s]

epoch 365, loss 0.9580, acc 0.8291


100%|██████████| 466/466 [00:04<00:00, 109.07it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.15it/s]

epoch 366, loss 0.9514, acc 0.8298


100%|██████████| 466/466 [00:03<00:00, 123.01it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.77it/s]

epoch 367, loss 0.9881, acc 0.8193


100%|██████████| 466/466 [00:03<00:00, 122.14it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.86it/s]

epoch 368, loss 0.9660, acc 0.8253


100%|██████████| 466/466 [00:03<00:00, 117.92it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.12it/s]

epoch 369, loss 0.9332, acc 0.8354


100%|██████████| 466/466 [00:03<00:00, 117.11it/s]
  2%|▏         | 10/466 [00:00<00:05, 91.09it/s]

epoch 370, loss 0.9195, acc 0.8390


100%|██████████| 466/466 [00:04<00:00, 109.28it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.68it/s]

epoch 371, loss 0.9608, acc 0.8256


100%|██████████| 466/466 [00:03<00:00, 121.77it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.10it/s]

epoch 372, loss 0.9230, acc 0.8375


100%|██████████| 466/466 [00:03<00:00, 124.87it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.05it/s]

epoch 373, loss 0.9203, acc 0.8383


100%|██████████| 466/466 [00:03<00:00, 123.41it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.63it/s]

epoch 374, loss 0.9119, acc 0.8397


100%|██████████| 466/466 [00:04<00:00, 107.99it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.76it/s]

epoch 375, loss 0.8914, acc 0.8457


100%|██████████| 466/466 [00:04<00:00, 114.39it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.41it/s]

epoch 376, loss 0.8749, acc 0.8504


100%|██████████| 466/466 [00:04<00:00, 109.90it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.64it/s]

epoch 377, loss 0.8605, acc 0.8561


100%|██████████| 466/466 [00:03<00:00, 121.40it/s]
  2%|▏         | 10/466 [00:00<00:04, 92.34it/s]

epoch 378, loss 0.8601, acc 0.8550


100%|██████████| 466/466 [00:04<00:00, 107.28it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.28it/s]

epoch 379, loss 0.8351, acc 0.8634


100%|██████████| 466/466 [00:04<00:00, 115.63it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.56it/s]

epoch 380, loss 0.8464, acc 0.8589


100%|██████████| 466/466 [00:03<00:00, 118.28it/s]
  2%|▏         | 10/466 [00:00<00:04, 96.01it/s]

epoch 381, loss 0.8420, acc 0.8587


100%|██████████| 466/466 [00:04<00:00, 113.35it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.40it/s]

epoch 382, loss 0.8313, acc 0.8632


100%|██████████| 466/466 [00:03<00:00, 124.03it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.89it/s]

epoch 383, loss 0.8310, acc 0.8622


100%|██████████| 466/466 [00:04<00:00, 115.62it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.74it/s]

epoch 384, loss 0.8051, acc 0.8702


100%|██████████| 466/466 [00:03<00:00, 120.43it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.33it/s]

epoch 385, loss 0.8051, acc 0.8700


100%|██████████| 466/466 [00:03<00:00, 119.35it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.10it/s]

epoch 386, loss 0.8013, acc 0.8708


100%|██████████| 466/466 [00:03<00:00, 119.27it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.79it/s]

epoch 387, loss 0.8003, acc 0.8702


100%|██████████| 466/466 [00:03<00:00, 120.15it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.33it/s]

epoch 388, loss 0.7922, acc 0.8719


100%|██████████| 466/466 [00:04<00:00, 115.72it/s]
  2%|▏         | 10/466 [00:00<00:04, 96.60it/s]

epoch 389, loss 0.7840, acc 0.8747


100%|██████████| 466/466 [00:04<00:00, 112.28it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.18it/s]

epoch 390, loss 0.7800, acc 0.8756


100%|██████████| 466/466 [00:03<00:00, 116.90it/s]
  3%|▎         | 12/466 [00:00<00:03, 117.55it/s]

epoch 391, loss 1.2952, acc 0.7236


100%|██████████| 466/466 [00:03<00:00, 118.90it/s]
  2%|▏         | 11/466 [00:00<00:04, 101.88it/s]

epoch 392, loss 1.0630, acc 0.7842


100%|██████████| 466/466 [00:04<00:00, 109.08it/s]
  2%|▏         | 11/466 [00:00<00:04, 109.52it/s]

epoch 393, loss 1.0108, acc 0.8006


100%|██████████| 466/466 [00:04<00:00, 115.66it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.20it/s]

epoch 394, loss 0.9481, acc 0.8197


100%|██████████| 466/466 [00:03<00:00, 117.08it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.81it/s]

epoch 395, loss 0.9172, acc 0.8275


100%|██████████| 466/466 [00:03<00:00, 120.49it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.39it/s]

epoch 396, loss 0.8960, acc 0.8360


100%|██████████| 466/466 [00:03<00:00, 117.49it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.14it/s]

epoch 397, loss 0.8617, acc 0.8459


100%|██████████| 466/466 [00:04<00:00, 114.59it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.75it/s]

epoch 398, loss 0.8271, acc 0.8566


100%|██████████| 466/466 [00:03<00:00, 124.86it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.17it/s]

epoch 399, loss 0.8766, acc 0.8407


100%|██████████| 466/466 [00:04<00:00, 113.93it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.91it/s]

epoch 400, loss 0.8932, acc 0.8350


100%|██████████| 466/466 [00:04<00:00, 108.55it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.60it/s]

epoch 401, loss 0.8396, acc 0.8509


100%|██████████| 466/466 [00:04<00:00, 106.79it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.16it/s]

epoch 402, loss 0.8227, acc 0.8568


100%|██████████| 466/466 [00:04<00:00, 110.41it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.18it/s]

epoch 403, loss 0.7986, acc 0.8643


100%|██████████| 466/466 [00:04<00:00, 109.22it/s]
  3%|▎         | 12/466 [00:00<00:03, 118.16it/s]

epoch 404, loss 0.8019, acc 0.8630


100%|██████████| 466/466 [00:04<00:00, 113.73it/s]
  3%|▎         | 12/466 [00:00<00:03, 117.68it/s]

epoch 405, loss 0.7773, acc 0.8703


100%|██████████| 466/466 [00:03<00:00, 118.33it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.47it/s]

epoch 406, loss 0.7793, acc 0.8698


100%|██████████| 466/466 [00:03<00:00, 123.83it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.69it/s]

epoch 407, loss 0.7636, acc 0.8750


100%|██████████| 466/466 [00:03<00:00, 123.66it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.18it/s]

epoch 408, loss 0.7487, acc 0.8788


100%|██████████| 466/466 [00:04<00:00, 116.43it/s]
  3%|▎         | 12/466 [00:00<00:04, 113.14it/s]

epoch 409, loss 0.7385, acc 0.8816


100%|██████████| 466/466 [00:03<00:00, 121.09it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.55it/s]

epoch 410, loss 0.7284, acc 0.8844


100%|██████████| 466/466 [00:03<00:00, 117.40it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.93it/s]

epoch 411, loss 0.7601, acc 0.8735


100%|██████████| 466/466 [00:03<00:00, 122.11it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.27it/s]

epoch 412, loss 0.7527, acc 0.8761


100%|██████████| 466/466 [00:03<00:00, 123.36it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.46it/s]

epoch 413, loss 0.7814, acc 0.8653


100%|██████████| 466/466 [00:03<00:00, 117.31it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.58it/s]

epoch 414, loss 0.7390, acc 0.8784


100%|██████████| 466/466 [00:03<00:00, 120.65it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.83it/s]

epoch 415, loss 0.9439, acc 0.8226


100%|██████████| 466/466 [00:04<00:00, 113.77it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.71it/s]

epoch 416, loss 0.8906, acc 0.8291


100%|██████████| 466/466 [00:04<00:00, 109.69it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.81it/s]

epoch 417, loss 0.7846, acc 0.8627


100%|██████████| 466/466 [00:03<00:00, 118.73it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.21it/s]

epoch 418, loss 0.7463, acc 0.8761


100%|██████████| 466/466 [00:03<00:00, 121.24it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.76it/s]

epoch 419, loss 0.7180, acc 0.8847


100%|██████████| 466/466 [00:04<00:00, 115.93it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.63it/s]

epoch 420, loss 0.6954, acc 0.8907


100%|██████████| 466/466 [00:04<00:00, 114.49it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.31it/s]

epoch 421, loss 0.7061, acc 0.8875


100%|██████████| 466/466 [00:03<00:00, 121.37it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.26it/s]

epoch 422, loss 0.6803, acc 0.8963


100%|██████████| 466/466 [00:03<00:00, 121.11it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.32it/s]

epoch 423, loss 0.6591, acc 0.9026


100%|██████████| 466/466 [00:04<00:00, 115.60it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.55it/s]

epoch 424, loss 0.6794, acc 0.8958


100%|██████████| 466/466 [00:04<00:00, 112.40it/s]
  2%|▏         | 10/466 [00:00<00:04, 98.94it/s]

epoch 425, loss 0.6527, acc 0.9041


100%|██████████| 466/466 [00:04<00:00, 109.38it/s]
  3%|▎         | 12/466 [00:00<00:04, 113.18it/s]

epoch 426, loss 0.6652, acc 0.8987


100%|██████████| 466/466 [00:03<00:00, 123.24it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.28it/s]

epoch 427, loss 0.6820, acc 0.8926


100%|██████████| 466/466 [00:03<00:00, 123.02it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.42it/s]

epoch 428, loss 0.6539, acc 0.9021


100%|██████████| 466/466 [00:03<00:00, 118.72it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.94it/s]

epoch 429, loss 0.6464, acc 0.9051


100%|██████████| 466/466 [00:04<00:00, 115.76it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.02it/s]

epoch 430, loss 0.6286, acc 0.9099


100%|██████████| 466/466 [00:03<00:00, 121.68it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.12it/s]

epoch 431, loss 0.6246, acc 0.9109


100%|██████████| 466/466 [00:03<00:00, 124.57it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.30it/s]

epoch 432, loss 0.6092, acc 0.9160


100%|██████████| 466/466 [00:03<00:00, 121.16it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.18it/s]

epoch 433, loss 0.6083, acc 0.9148


100%|██████████| 466/466 [00:04<00:00, 113.70it/s]
  3%|▎         | 12/466 [00:00<00:04, 113.22it/s]

epoch 434, loss 0.6321, acc 0.9065


100%|██████████| 466/466 [00:03<00:00, 117.57it/s]
  3%|▎         | 12/466 [00:00<00:03, 115.91it/s]

epoch 435, loss 0.6169, acc 0.9107


100%|██████████| 466/466 [00:03<00:00, 120.91it/s]
  2%|▏         | 11/466 [00:00<00:04, 109.01it/s]

epoch 436, loss 0.7102, acc 0.8802


100%|██████████| 466/466 [00:03<00:00, 123.22it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.66it/s]

epoch 437, loss 0.6933, acc 0.8849


100%|██████████| 466/466 [00:03<00:00, 125.31it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.40it/s]

epoch 438, loss 0.6683, acc 0.8927


100%|██████████| 466/466 [00:04<00:00, 111.09it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.13it/s]

epoch 439, loss 0.6351, acc 0.9031


100%|██████████| 466/466 [00:03<00:00, 119.03it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.24it/s]

epoch 440, loss 0.6349, acc 0.9025


100%|██████████| 466/466 [00:03<00:00, 120.00it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.24it/s]

epoch 441, loss 0.6261, acc 0.9059


100%|██████████| 466/466 [00:03<00:00, 117.92it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.00it/s]

epoch 442, loss 0.6247, acc 0.9064


100%|██████████| 466/466 [00:03<00:00, 121.00it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.33it/s]

epoch 443, loss 0.6124, acc 0.9096


100%|██████████| 466/466 [00:03<00:00, 119.02it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.34it/s]

epoch 444, loss 0.5996, acc 0.9136


100%|██████████| 466/466 [00:03<00:00, 118.12it/s]
  2%|▏         | 11/466 [00:00<00:04, 105.26it/s]

epoch 445, loss 0.6098, acc 0.9098


100%|██████████| 466/466 [00:03<00:00, 118.02it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.79it/s]

epoch 446, loss 0.6452, acc 0.8988


100%|██████████| 466/466 [00:03<00:00, 121.78it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.64it/s]

epoch 447, loss 0.6740, acc 0.8876


100%|██████████| 466/466 [00:04<00:00, 109.18it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.38it/s]

epoch 448, loss 0.6652, acc 0.8908


100%|██████████| 466/466 [00:04<00:00, 116.02it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.78it/s]

epoch 449, loss 0.6423, acc 0.8976


100%|██████████| 466/466 [00:03<00:00, 123.35it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.73it/s]

epoch 450, loss 0.5977, acc 0.9115


100%|██████████| 466/466 [00:03<00:00, 120.34it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.84it/s]

epoch 451, loss 0.5886, acc 0.9149


100%|██████████| 466/466 [00:03<00:00, 120.03it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.93it/s]

epoch 452, loss 0.6006, acc 0.9105


100%|██████████| 466/466 [00:04<00:00, 116.09it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.19it/s]

epoch 453, loss 0.5909, acc 0.9130


100%|██████████| 466/466 [00:03<00:00, 122.46it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.90it/s]

epoch 454, loss 0.5795, acc 0.9167


100%|██████████| 466/466 [00:03<00:00, 116.75it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.65it/s]

epoch 455, loss 0.5627, acc 0.9222


100%|██████████| 466/466 [00:04<00:00, 116.10it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.31it/s]

epoch 456, loss 0.5792, acc 0.9157


100%|██████████| 466/466 [00:04<00:00, 113.13it/s]
  2%|▏         | 11/466 [00:00<00:04, 102.57it/s]

epoch 457, loss 0.5959, acc 0.9100


100%|██████████| 466/466 [00:03<00:00, 118.14it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.01it/s]

epoch 458, loss 0.6020, acc 0.9076


100%|██████████| 466/466 [00:04<00:00, 112.89it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.40it/s]

epoch 459, loss 0.5720, acc 0.9174


100%|██████████| 466/466 [00:04<00:00, 114.07it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.19it/s]

epoch 460, loss 0.5963, acc 0.9083


100%|██████████| 466/466 [00:04<00:00, 112.33it/s]
  3%|▎         | 13/466 [00:00<00:03, 121.57it/s]

epoch 461, loss 0.5676, acc 0.9178


100%|██████████| 466/466 [00:04<00:00, 104.54it/s]
  3%|▎         | 12/466 [00:00<00:03, 115.36it/s]

epoch 462, loss 0.5577, acc 0.9218


100%|██████████| 466/466 [00:03<00:00, 117.36it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.14it/s]

epoch 463, loss 0.5615, acc 0.9200


100%|██████████| 466/466 [00:04<00:00, 116.06it/s]
  2%|▏         | 11/466 [00:00<00:04, 107.66it/s]

epoch 464, loss 0.5397, acc 0.9267


100%|██████████| 466/466 [00:04<00:00, 114.39it/s]
  2%|▏         | 11/466 [00:00<00:04, 104.22it/s]

epoch 465, loss 0.5386, acc 0.9265


100%|██████████| 466/466 [00:04<00:00, 114.36it/s]
  3%|▎         | 12/466 [00:00<00:03, 117.24it/s]

epoch 466, loss 0.5587, acc 0.9185


100%|██████████| 466/466 [00:04<00:00, 115.76it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.33it/s]

epoch 467, loss 0.5705, acc 0.9144


100%|██████████| 466/466 [00:03<00:00, 122.98it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.64it/s]

epoch 468, loss 0.5727, acc 0.9144


100%|██████████| 466/466 [00:03<00:00, 120.83it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.64it/s]

epoch 469, loss 0.5731, acc 0.9126


100%|██████████| 466/466 [00:03<00:00, 125.03it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.70it/s]

epoch 470, loss 0.5684, acc 0.9151


100%|██████████| 466/466 [00:03<00:00, 124.64it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.73it/s]

epoch 471, loss 0.5595, acc 0.9172


100%|██████████| 466/466 [00:03<00:00, 124.90it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.75it/s]

epoch 472, loss 0.5213, acc 0.9303


100%|██████████| 466/466 [00:03<00:00, 117.65it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.88it/s]

epoch 473, loss 0.5251, acc 0.9284


100%|██████████| 466/466 [00:03<00:00, 123.34it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.40it/s]

epoch 474, loss 0.5800, acc 0.9089


100%|██████████| 466/466 [00:04<00:00, 115.53it/s]
  2%|▏         | 11/466 [00:00<00:04, 103.37it/s]

epoch 475, loss 0.5638, acc 0.9134


100%|██████████| 466/466 [00:04<00:00, 109.60it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.80it/s]

epoch 476, loss 0.5607, acc 0.9144


100%|██████████| 466/466 [00:04<00:00, 115.69it/s]
  3%|▎         | 12/466 [00:00<00:03, 114.42it/s]

epoch 477, loss 0.5065, acc 0.9324


100%|██████████| 466/466 [00:03<00:00, 124.80it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.21it/s]

epoch 478, loss 0.5125, acc 0.9302


100%|██████████| 466/466 [00:04<00:00, 116.50it/s]
  3%|▎         | 12/466 [00:00<00:03, 113.63it/s]

epoch 479, loss 0.5006, acc 0.9337


100%|██████████| 466/466 [00:04<00:00, 113.96it/s]
  3%|▎         | 13/466 [00:00<00:03, 123.22it/s]

epoch 480, loss 0.4781, acc 0.9408


100%|██████████| 466/466 [00:03<00:00, 118.48it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.95it/s]

epoch 481, loss 0.4859, acc 0.9383


100%|██████████| 466/466 [00:03<00:00, 124.87it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.82it/s]

epoch 482, loss 0.4888, acc 0.9364


100%|██████████| 466/466 [00:04<00:00, 112.06it/s]
  2%|▏         | 11/466 [00:00<00:04, 108.27it/s]

epoch 483, loss 0.4809, acc 0.9397


100%|██████████| 466/466 [00:03<00:00, 122.89it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.21it/s]

epoch 484, loss 0.5609, acc 0.9129


100%|██████████| 466/466 [00:04<00:00, 109.45it/s]
  3%|▎         | 13/466 [00:00<00:03, 122.96it/s]

epoch 485, loss 0.5293, acc 0.9224


100%|██████████| 466/466 [00:03<00:00, 116.50it/s]
  3%|▎         | 13/466 [00:00<00:03, 125.75it/s]

epoch 486, loss 0.4975, acc 0.9321


100%|██████████| 466/466 [00:03<00:00, 119.25it/s]
  3%|▎         | 12/466 [00:00<00:03, 119.27it/s]

epoch 487, loss 0.4793, acc 0.9386


100%|██████████| 466/466 [00:03<00:00, 117.03it/s]
  3%|▎         | 13/466 [00:00<00:03, 126.93it/s]

epoch 488, loss 0.4617, acc 0.9435


100%|██████████| 466/466 [00:04<00:00, 111.97it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.81it/s]

epoch 489, loss 0.4509, acc 0.9470


100%|██████████| 466/466 [00:03<00:00, 116.71it/s]
  2%|▏         | 11/466 [00:00<00:04, 105.67it/s]

epoch 490, loss 0.4389, acc 0.9504


100%|██████████| 466/466 [00:03<00:00, 119.12it/s]
  5%|▌         | 24/466 [00:00<00:03, 119.87it/s]

epoch 491, loss 0.4301, acc 0.9526


100%|██████████| 466/466 [00:04<00:00, 112.36it/s]
  3%|▎         | 12/466 [00:00<00:04, 112.30it/s]

epoch 492, loss 0.4507, acc 0.9463


100%|██████████| 466/466 [00:03<00:00, 123.44it/s]
  3%|▎         | 13/466 [00:00<00:03, 124.49it/s]

epoch 493, loss 0.4597, acc 0.9426


100%|██████████| 466/466 [00:04<00:00, 108.53it/s]
  3%|▎         | 13/466 [00:00<00:03, 120.96it/s]

epoch 494, loss 0.4392, acc 0.9485


100%|██████████| 466/466 [00:03<00:00, 120.72it/s]
  2%|▏         | 11/466 [00:00<00:04, 100.62it/s]

epoch 495, loss 0.4397, acc 0.9482


100%|██████████| 466/466 [00:03<00:00, 117.48it/s]
  3%|▎         | 12/466 [00:00<00:03, 118.96it/s]

epoch 496, loss 0.4382, acc 0.9484


100%|██████████| 466/466 [00:03<00:00, 118.49it/s]
  2%|▏         | 10/466 [00:00<00:05, 91.16it/s]

epoch 497, loss 0.4353, acc 0.9493


100%|██████████| 466/466 [00:04<00:00, 113.31it/s]
  3%|▎         | 12/466 [00:00<00:03, 116.99it/s]

epoch 498, loss 0.4886, acc 0.9313


100%|██████████| 466/466 [00:03<00:00, 116.91it/s]

epoch 499, loss 0.4671, acc 0.9380





In [22]:
idx_to_word[int(torch.argmax(model(test.to(device)), dim=1).cpu().data)]

'of'