In [1]:
import os
import pandas as pd
import numpy as np
import torch
import dgl

Using backend: pytorch


In [2]:
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GraphConv

In [3]:
from sklearn.utils import shuffle
my_batch_size = 30

In [4]:
from dgl.data import DGLDataset

class MyDataset(DGLDataset):
    """
    Parameters
    -------------------------
    raw_dir: str
        Specifying the directory that already stores the input data.
    
    """
    _pos_directory= '../positive_graph_save/'
    _neg_directory= '../negative_graph_save/'
    def __init__(self, 
                 url=None,
                 raw_dir=None,
                 save_dir=None,
                 force_reload=False,
                 verbose=False):
        super(MyDataset, self).__init__(name='docking_classify',
                                        url=url,
                                        raw_dir=raw_dir,
                                        save_dir=save_dir,
                                        force_reload=force_reload,
                                        verbose=verbose)
    def download(self):
        pass

    #must be implemented
    def process(self):
        df_pos = pd.read_csv('./positive_dataset.csv')
        df_neg = pd.read_csv('./negative_dataset.csv')
        pos_graphs = df_pos['file_name']
        pos_labels = df_pos['label']
        neg_graphs = df_neg['file_name']
        neg_labels = df_neg['label']

        #half_batch = int(my_batch_size/2)
        self.graph_dataset = []
        self.graph_labels = []
        #negative graphs are more
        for i in range(len(neg_graphs)):
            self.graph_dataset.append(pos_graphs[i%len(pos_graphs)])
            self.graph_dataset.append(neg_graphs[i])
            self.graph_labels.append(torch.Tensor([1,0])) #positive
            self.graph_labels.append(torch.Tensor([0,1])) #negative
            
        self.df_dataset = pd.DataFrame({'file_name':self.graph_dataset, 'label':self.graph_labels})
        self.df_dataset = shuffle(self.df_dataset)
        #for i in range(len())

    
    #must be implemented
    def __getitem__(self, idx):
        """get one item by index
        
        Parameters
        ---------------
        idx: int
            Item index

        Returns
        ---------------
        (dgl.DGLGraph, Tensor)
        """
        graph = dgl.load_graphs(self.df_dataset['file_name'][idx.item()])[0] #idx.item():convert torch.Tensor to int
        #print(self.df_dataset['file_name'][idx.item()])
        label = self.df_dataset['label'][idx.item()]
        return graph[0], label[0].long()

    #must be implemented
    def __len__(self):
        #number of data examples
        return self.df_dataset.shape[0]
        

    def save(self):
        pass

    def load(self):
        pass

    def has_cache(self):
        pass



In [5]:
my_dataset = MyDataset()

In [6]:
from dgl.dataloading.pytorch import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler

num_examples = len(my_dataset)
print("dataset length:", num_examples)
num_train = int(num_examples*0.8)

train_sampler = SubsetRandomSampler(torch.arange(num_train))
test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples))

train_dataloader = GraphDataLoader(my_dataset, sampler=train_sampler, batch_size=my_batch_size, drop_last=False)
test_dataloader = GraphDataLoader(my_dataset, sampler=test_sampler, batch_size=my_batch_size, drop_last=False)

dataset length: 14996


## Use $train\_dataloader$ and $test\_dataloader$ to get a batched graph with a batch size of $my\_batch\_size$

In [7]:
it = iter(train_dataloader)
batch = next(it)
print(batch)

[Graph(num_nodes=141309, num_edges=360564562,
      ndata_schemes={'h': Scheme(shape=(10,), dtype=torch.int64)}
      edata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)}), tensor([0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 1, 0])]


In [8]:
class GCN(nn.Module):
    def __init__(self, in_feats, hidden_size, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, hidden_size, allow_zero_in_degree=True)
        self.conv2 = GraphConv(hidden_size, num_classes, allow_zero_in_degree=True)

    def forward(self, g, inputs):
        h = self.conv1(g, inputs)
        h = F.relu(h)
        h = self.conv2(g, h)
        g.ndata['h'] = h
        return dgl.mean_nodes(g, 'h')
gnn = GCN(10, 16, 2)

In [None]:
import itertools
from tqdm import tqdm

optimizer = torch.optim.Adam(gnn.parameters(), lr=0.01)
all_logits = []
losses = []
temp = 1.0
for epoch in range(30):
    for batched_graph, labels in tqdm(train_dataloader):
        pred = gnn(batched_graph, batched_graph.ndata['h'].float())
        #print(pred.shape)
        #print(labels.shape)
        loss = F.cross_entropy(pred, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        temp = loss
        print("epochs:"+str(epoch)+"------------------------loss:"+str(loss))
    losses.append(temp)
    

  0%|          | 1/400 [00:09<1:01:48,  9.29s/it]

epochs:0------------------------loss:tensor(0.6892, grad_fn=<NllLossBackward>)


  0%|          | 2/400 [00:12<50:11,  7.57s/it]  

epochs:0------------------------loss:tensor(0.6912, grad_fn=<NllLossBackward>)


  1%|          | 3/400 [00:15<40:15,  6.08s/it]

epochs:0------------------------loss:tensor(0.7004, grad_fn=<NllLossBackward>)


  1%|          | 4/400 [00:22<42:11,  6.39s/it]

epochs:0------------------------loss:tensor(0.6940, grad_fn=<NllLossBackward>)


  1%|▏         | 5/400 [00:28<40:56,  6.22s/it]

epochs:0------------------------loss:tensor(0.6964, grad_fn=<NllLossBackward>)


  2%|▏         | 6/400 [00:32<36:05,  5.50s/it]

epochs:0------------------------loss:tensor(0.6946, grad_fn=<NllLossBackward>)


  2%|▏         | 7/400 [00:35<31:17,  4.78s/it]

epochs:0------------------------loss:tensor(0.6988, grad_fn=<NllLossBackward>)


  2%|▏         | 8/400 [00:38<28:59,  4.44s/it]

epochs:0------------------------loss:tensor(0.7030, grad_fn=<NllLossBackward>)


  2%|▏         | 9/400 [00:41<25:40,  3.94s/it]

epochs:0------------------------loss:tensor(0.6990, grad_fn=<NllLossBackward>)


  2%|▎         | 10/400 [00:43<22:12,  3.42s/it]

epochs:0------------------------loss:tensor(0.6945, grad_fn=<NllLossBackward>)


  3%|▎         | 11/400 [00:46<20:23,  3.15s/it]

epochs:0------------------------loss:tensor(0.6890, grad_fn=<NllLossBackward>)


  3%|▎         | 12/400 [00:51<23:32,  3.64s/it]

epochs:0------------------------loss:tensor(0.6869, grad_fn=<NllLossBackward>)


  3%|▎         | 13/400 [00:54<23:00,  3.57s/it]

epochs:0------------------------loss:tensor(0.6944, grad_fn=<NllLossBackward>)


  4%|▎         | 14/400 [00:58<24:06,  3.75s/it]

epochs:0------------------------loss:tensor(0.6895, grad_fn=<NllLossBackward>)


  4%|▍         | 15/400 [01:02<23:05,  3.60s/it]

epochs:0------------------------loss:tensor(0.6924, grad_fn=<NllLossBackward>)


  4%|▍         | 16/400 [01:04<21:28,  3.35s/it]

epochs:0------------------------loss:tensor(0.6903, grad_fn=<NllLossBackward>)


  4%|▍         | 17/400 [01:10<26:42,  4.18s/it]

epochs:0------------------------loss:tensor(0.6935, grad_fn=<NllLossBackward>)


  4%|▍         | 18/400 [01:13<23:16,  3.65s/it]

epochs:0------------------------loss:tensor(0.6894, grad_fn=<NllLossBackward>)


  5%|▍         | 19/400 [01:19<27:23,  4.31s/it]

epochs:0------------------------loss:tensor(0.6878, grad_fn=<NllLossBackward>)


  5%|▌         | 20/400 [01:21<22:55,  3.62s/it]

epochs:0------------------------loss:tensor(0.6915, grad_fn=<NllLossBackward>)


  5%|▌         | 21/400 [01:27<27:20,  4.33s/it]

epochs:0------------------------loss:tensor(0.6846, grad_fn=<NllLossBackward>)


  6%|▌         | 22/400 [01:29<23:04,  3.66s/it]

epochs:0------------------------loss:tensor(0.6981, grad_fn=<NllLossBackward>)


  6%|▌         | 23/400 [01:31<19:30,  3.10s/it]

epochs:0------------------------loss:tensor(0.6924, grad_fn=<NllLossBackward>)


  6%|▌         | 24/400 [01:37<25:13,  4.03s/it]

epochs:0------------------------loss:tensor(0.6928, grad_fn=<NllLossBackward>)


  6%|▋         | 25/400 [01:39<22:20,  3.57s/it]

epochs:0------------------------loss:tensor(0.6991, grad_fn=<NllLossBackward>)


  6%|▋         | 26/400 [01:41<18:51,  3.02s/it]

epochs:0------------------------loss:tensor(0.7027, grad_fn=<NllLossBackward>)


  7%|▋         | 27/400 [01:43<17:21,  2.79s/it]

epochs:0------------------------loss:tensor(0.6861, grad_fn=<NllLossBackward>)


  7%|▋         | 28/400 [01:51<26:17,  4.24s/it]

epochs:0------------------------loss:tensor(0.6848, grad_fn=<NllLossBackward>)


  7%|▋         | 29/400 [01:57<28:53,  4.67s/it]

epochs:0------------------------loss:tensor(0.6860, grad_fn=<NllLossBackward>)


  8%|▊         | 30/400 [01:59<23:58,  3.89s/it]

epochs:0------------------------loss:tensor(0.6872, grad_fn=<NllLossBackward>)


  8%|▊         | 31/400 [02:02<22:43,  3.70s/it]

epochs:0------------------------loss:tensor(0.6883, grad_fn=<NllLossBackward>)


  8%|▊         | 32/400 [02:04<20:21,  3.32s/it]

epochs:0------------------------loss:tensor(0.6829, grad_fn=<NllLossBackward>)


  8%|▊         | 33/400 [02:12<29:06,  4.76s/it]

epochs:0------------------------loss:tensor(0.6768, grad_fn=<NllLossBackward>)


  8%|▊         | 34/400 [02:14<24:03,  3.94s/it]

epochs:0------------------------loss:tensor(0.7178, grad_fn=<NllLossBackward>)


  9%|▉         | 35/400 [02:18<23:22,  3.84s/it]

epochs:0------------------------loss:tensor(0.6998, grad_fn=<NllLossBackward>)


  9%|▉         | 36/400 [02:21<21:54,  3.61s/it]

epochs:0------------------------loss:tensor(0.6992, grad_fn=<NllLossBackward>)


  9%|▉         | 37/400 [02:25<22:31,  3.72s/it]

epochs:0------------------------loss:tensor(0.6793, grad_fn=<NllLossBackward>)


 10%|▉         | 38/400 [02:27<19:28,  3.23s/it]

epochs:0------------------------loss:tensor(0.7030, grad_fn=<NllLossBackward>)


 10%|▉         | 39/400 [02:30<18:37,  3.09s/it]

epochs:0------------------------loss:tensor(0.6987, grad_fn=<NllLossBackward>)


 10%|█         | 40/400 [02:33<18:11,  3.03s/it]

epochs:0------------------------loss:tensor(0.6921, grad_fn=<NllLossBackward>)


 10%|█         | 41/400 [02:35<16:48,  2.81s/it]

epochs:0------------------------loss:tensor(0.6875, grad_fn=<NllLossBackward>)


 10%|█         | 42/400 [02:37<15:04,  2.53s/it]

epochs:0------------------------loss:tensor(0.6933, grad_fn=<NllLossBackward>)


 11%|█         | 43/400 [02:40<16:25,  2.76s/it]

epochs:0------------------------loss:tensor(0.6960, grad_fn=<NllLossBackward>)


 11%|█         | 44/400 [02:42<15:05,  2.54s/it]

epochs:0------------------------loss:tensor(0.6862, grad_fn=<NllLossBackward>)


 11%|█▏        | 45/400 [02:46<17:18,  2.93s/it]

epochs:0------------------------loss:tensor(0.6927, grad_fn=<NllLossBackward>)


 12%|█▏        | 46/400 [02:49<16:57,  2.87s/it]

epochs:0------------------------loss:tensor(0.6846, grad_fn=<NllLossBackward>)


 12%|█▏        | 47/400 [02:51<15:05,  2.56s/it]

epochs:0------------------------loss:tensor(0.6730, grad_fn=<NllLossBackward>)


 12%|█▏        | 48/400 [02:53<14:57,  2.55s/it]

epochs:0------------------------loss:tensor(0.6962, grad_fn=<NllLossBackward>)


 12%|█▏        | 49/400 [03:01<23:57,  4.09s/it]

epochs:0------------------------loss:tensor(0.6747, grad_fn=<NllLossBackward>)


 12%|█▎        | 50/400 [03:03<20:00,  3.43s/it]

epochs:0------------------------loss:tensor(0.7241, grad_fn=<NllLossBackward>)


 13%|█▎        | 51/400 [03:05<17:55,  3.08s/it]

epochs:0------------------------loss:tensor(0.6962, grad_fn=<NllLossBackward>)


 13%|█▎        | 52/400 [03:08<17:11,  2.96s/it]

epochs:0------------------------loss:tensor(0.6805, grad_fn=<NllLossBackward>)


 13%|█▎        | 53/400 [03:11<17:42,  3.06s/it]

epochs:0------------------------loss:tensor(0.7050, grad_fn=<NllLossBackward>)


 14%|█▎        | 54/400 [03:14<17:25,  3.02s/it]

epochs:0------------------------loss:tensor(0.6923, grad_fn=<NllLossBackward>)


 14%|█▍        | 55/400 [03:18<18:48,  3.27s/it]

epochs:0------------------------loss:tensor(0.6868, grad_fn=<NllLossBackward>)


 14%|█▍        | 56/400 [03:21<18:13,  3.18s/it]

epochs:0------------------------loss:tensor(0.6860, grad_fn=<NllLossBackward>)


 14%|█▍        | 57/400 [03:23<17:04,  2.99s/it]

epochs:0------------------------loss:tensor(0.7030, grad_fn=<NllLossBackward>)


 14%|█▍        | 58/400 [03:27<17:24,  3.05s/it]

epochs:0------------------------loss:tensor(0.6921, grad_fn=<NllLossBackward>)


 15%|█▍        | 59/400 [03:29<16:15,  2.86s/it]

epochs:0------------------------loss:tensor(0.6988, grad_fn=<NllLossBackward>)


 15%|█▌        | 60/400 [03:31<14:39,  2.59s/it]

epochs:0------------------------loss:tensor(0.6988, grad_fn=<NllLossBackward>)


 15%|█▌        | 61/400 [03:35<17:07,  3.03s/it]

epochs:0------------------------loss:tensor(0.6682, grad_fn=<NllLossBackward>)


 16%|█▌        | 62/400 [03:44<26:55,  4.78s/it]

epochs:0------------------------loss:tensor(0.6747, grad_fn=<NllLossBackward>)


 16%|█▌        | 63/400 [03:46<22:25,  3.99s/it]

epochs:0------------------------loss:tensor(0.6980, grad_fn=<NllLossBackward>)


 16%|█▌        | 64/400 [03:54<28:23,  5.07s/it]

epochs:0------------------------loss:tensor(0.6805, grad_fn=<NllLossBackward>)


 16%|█▋        | 65/400 [04:02<34:06,  6.11s/it]

epochs:0------------------------loss:tensor(0.6695, grad_fn=<NllLossBackward>)


 16%|█▋        | 66/400 [04:12<39:32,  7.10s/it]

epochs:0------------------------loss:tensor(0.6676, grad_fn=<NllLossBackward>)


 17%|█▋        | 67/400 [04:14<32:15,  5.81s/it]

epochs:0------------------------loss:tensor(0.7047, grad_fn=<NllLossBackward>)


 17%|█▋        | 68/400 [04:19<30:38,  5.54s/it]

epochs:0------------------------loss:tensor(0.6993, grad_fn=<NllLossBackward>)


 17%|█▋        | 69/400 [04:25<30:26,  5.52s/it]

epochs:0------------------------loss:tensor(0.7017, grad_fn=<NllLossBackward>)


 18%|█▊        | 70/400 [04:27<25:01,  4.55s/it]

epochs:0------------------------loss:tensor(0.6861, grad_fn=<NllLossBackward>)


 18%|█▊        | 71/400 [04:29<21:00,  3.83s/it]

epochs:0------------------------loss:tensor(0.6940, grad_fn=<NllLossBackward>)


 18%|█▊        | 72/400 [04:32<18:57,  3.47s/it]

epochs:0------------------------loss:tensor(0.6757, grad_fn=<NllLossBackward>)


 18%|█▊        | 73/400 [04:36<20:12,  3.71s/it]

epochs:0------------------------loss:tensor(0.6906, grad_fn=<NllLossBackward>)


 18%|█▊        | 74/400 [04:38<16:31,  3.04s/it]

epochs:0------------------------loss:tensor(0.6885, grad_fn=<NllLossBackward>)


 19%|█▉        | 75/400 [04:51<33:43,  6.22s/it]

epochs:0------------------------loss:tensor(0.6948, grad_fn=<NllLossBackward>)


 19%|█▉        | 76/400 [04:55<29:00,  5.37s/it]

epochs:0------------------------loss:tensor(0.6860, grad_fn=<NllLossBackward>)


 19%|█▉        | 77/400 [04:57<23:59,  4.46s/it]

epochs:0------------------------loss:tensor(0.6862, grad_fn=<NllLossBackward>)


 20%|█▉        | 78/400 [05:01<22:44,  4.24s/it]

epochs:0------------------------loss:tensor(0.6655, grad_fn=<NllLossBackward>)


 20%|█▉        | 79/400 [05:07<25:33,  4.78s/it]

epochs:0------------------------loss:tensor(0.6828, grad_fn=<NllLossBackward>)


 20%|██        | 80/400 [05:12<26:44,  5.01s/it]

epochs:0------------------------loss:tensor(0.6800, grad_fn=<NllLossBackward>)


 20%|██        | 81/400 [05:18<27:05,  5.10s/it]

epochs:0------------------------loss:tensor(0.6678, grad_fn=<NllLossBackward>)


 20%|██        | 82/400 [05:20<22:40,  4.28s/it]

epochs:0------------------------loss:tensor(0.6850, grad_fn=<NllLossBackward>)


 21%|██        | 83/400 [05:22<19:30,  3.69s/it]

epochs:0------------------------loss:tensor(0.6822, grad_fn=<NllLossBackward>)


 21%|██        | 84/400 [05:25<17:51,  3.39s/it]

epochs:0------------------------loss:tensor(0.6828, grad_fn=<NllLossBackward>)


 21%|██▏       | 85/400 [05:29<18:14,  3.47s/it]

epochs:0------------------------loss:tensor(0.6700, grad_fn=<NllLossBackward>)


 22%|██▏       | 86/400 [05:33<19:46,  3.78s/it]

epochs:0------------------------loss:tensor(0.6800, grad_fn=<NllLossBackward>)


 22%|██▏       | 87/400 [05:40<23:57,  4.59s/it]

epochs:0------------------------loss:tensor(0.6477, grad_fn=<NllLossBackward>)


 22%|██▏       | 88/400 [05:41<18:54,  3.64s/it]

epochs:0------------------------loss:tensor(0.6729, grad_fn=<NllLossBackward>)


 22%|██▏       | 89/400 [05:44<17:13,  3.32s/it]

epochs:0------------------------loss:tensor(0.6652, grad_fn=<NllLossBackward>)


 22%|██▎       | 90/400 [05:47<16:56,  3.28s/it]

epochs:0------------------------loss:tensor(0.7190, grad_fn=<NllLossBackward>)


 23%|██▎       | 91/400 [05:50<16:07,  3.13s/it]

epochs:0------------------------loss:tensor(0.6448, grad_fn=<NllLossBackward>)


 23%|██▎       | 92/400 [05:59<25:39,  5.00s/it]

epochs:0------------------------loss:tensor(0.7032, grad_fn=<NllLossBackward>)


 23%|██▎       | 93/400 [06:09<33:24,  6.53s/it]

epochs:0------------------------loss:tensor(0.6965, grad_fn=<NllLossBackward>)


 24%|██▎       | 94/400 [06:12<28:00,  5.49s/it]

epochs:0------------------------loss:tensor(0.6680, grad_fn=<NllLossBackward>)


 24%|██▍       | 95/400 [06:15<23:24,  4.60s/it]

epochs:0------------------------loss:tensor(0.6517, grad_fn=<NllLossBackward>)


 24%|██▍       | 96/400 [06:17<19:54,  3.93s/it]

epochs:0------------------------loss:tensor(0.6755, grad_fn=<NllLossBackward>)


 24%|██▍       | 97/400 [06:24<23:52,  4.73s/it]

epochs:0------------------------loss:tensor(0.6747, grad_fn=<NllLossBackward>)


 24%|██▍       | 98/400 [06:27<22:33,  4.48s/it]

epochs:0------------------------loss:tensor(0.7292, grad_fn=<NllLossBackward>)


 25%|██▍       | 99/400 [06:30<20:03,  4.00s/it]

epochs:0------------------------loss:tensor(0.6694, grad_fn=<NllLossBackward>)


 25%|██▌       | 100/400 [06:34<19:41,  3.94s/it]

epochs:0------------------------loss:tensor(0.6771, grad_fn=<NllLossBackward>)


 25%|██▌       | 101/400 [06:37<17:15,  3.46s/it]

epochs:0------------------------loss:tensor(0.6461, grad_fn=<NllLossBackward>)


 26%|██▌       | 102/400 [06:39<15:20,  3.09s/it]

epochs:0------------------------loss:tensor(0.6756, grad_fn=<NllLossBackward>)


 26%|██▌       | 103/400 [06:41<14:30,  2.93s/it]

epochs:0------------------------loss:tensor(0.6929, grad_fn=<NllLossBackward>)


 26%|██▌       | 104/400 [06:44<13:50,  2.81s/it]

epochs:0------------------------loss:tensor(0.6940, grad_fn=<NllLossBackward>)


 26%|██▋       | 105/400 [06:46<12:40,  2.58s/it]

epochs:0------------------------loss:tensor(0.6934, grad_fn=<NllLossBackward>)


 26%|██▋       | 106/400 [06:50<14:42,  3.00s/it]

epochs:0------------------------loss:tensor(0.7070, grad_fn=<NllLossBackward>)


 27%|██▋       | 107/400 [06:53<14:13,  2.91s/it]

epochs:0------------------------loss:tensor(0.7020, grad_fn=<NllLossBackward>)


 27%|██▋       | 108/400 [06:55<13:23,  2.75s/it]

epochs:0------------------------loss:tensor(0.6800, grad_fn=<NllLossBackward>)


 27%|██▋       | 109/400 [07:00<16:31,  3.41s/it]

epochs:0------------------------loss:tensor(0.6720, grad_fn=<NllLossBackward>)


 28%|██▊       | 110/400 [07:02<14:14,  2.94s/it]

epochs:0------------------------loss:tensor(0.6771, grad_fn=<NllLossBackward>)


 28%|██▊       | 111/400 [07:06<15:43,  3.26s/it]

epochs:0------------------------loss:tensor(0.6830, grad_fn=<NllLossBackward>)


 28%|██▊       | 112/400 [07:09<16:04,  3.35s/it]

epochs:0------------------------loss:tensor(0.6764, grad_fn=<NllLossBackward>)


 28%|██▊       | 113/400 [07:12<15:22,  3.21s/it]

epochs:0------------------------loss:tensor(0.6862, grad_fn=<NllLossBackward>)


 28%|██▊       | 114/400 [07:16<15:35,  3.27s/it]

epochs:0------------------------loss:tensor(0.6913, grad_fn=<NllLossBackward>)


 29%|██▉       | 115/400 [07:18<13:41,  2.88s/it]

epochs:0------------------------loss:tensor(0.6873, grad_fn=<NllLossBackward>)


 29%|██▉       | 116/400 [07:23<17:09,  3.63s/it]

epochs:0------------------------loss:tensor(0.6963, grad_fn=<NllLossBackward>)


 29%|██▉       | 117/400 [07:25<15:13,  3.23s/it]

epochs:0------------------------loss:tensor(0.6694, grad_fn=<NllLossBackward>)


 30%|██▉       | 118/400 [07:28<13:54,  2.96s/it]

epochs:0------------------------loss:tensor(0.6719, grad_fn=<NllLossBackward>)


 30%|██▉       | 119/400 [07:32<15:45,  3.37s/it]

epochs:0------------------------loss:tensor(0.6776, grad_fn=<NllLossBackward>)


 30%|███       | 120/400 [07:35<15:49,  3.39s/it]

epochs:0------------------------loss:tensor(0.6555, grad_fn=<NllLossBackward>)


 30%|███       | 121/400 [07:37<13:22,  2.88s/it]

epochs:0------------------------loss:tensor(0.6675, grad_fn=<NllLossBackward>)


 30%|███       | 122/400 [07:39<11:54,  2.57s/it]

epochs:0------------------------loss:tensor(0.6907, grad_fn=<NllLossBackward>)


 31%|███       | 123/400 [07:42<12:43,  2.76s/it]

epochs:0------------------------loss:tensor(0.6589, grad_fn=<NllLossBackward>)


 31%|███       | 124/400 [07:46<13:52,  3.02s/it]

epochs:0------------------------loss:tensor(0.6754, grad_fn=<NllLossBackward>)


 31%|███▏      | 125/400 [07:48<12:34,  2.74s/it]

epochs:0------------------------loss:tensor(0.6806, grad_fn=<NllLossBackward>)


 32%|███▏      | 126/400 [07:50<12:16,  2.69s/it]

epochs:0------------------------loss:tensor(0.6679, grad_fn=<NllLossBackward>)


 32%|███▏      | 127/400 [07:52<11:08,  2.45s/it]

epochs:0------------------------loss:tensor(0.6652, grad_fn=<NllLossBackward>)


 32%|███▏      | 128/400 [07:55<12:01,  2.65s/it]

epochs:0------------------------loss:tensor(0.6647, grad_fn=<NllLossBackward>)


 32%|███▏      | 129/400 [07:59<13:22,  2.96s/it]

epochs:0------------------------loss:tensor(0.7389, grad_fn=<NllLossBackward>)


 32%|███▎      | 130/400 [08:04<15:48,  3.51s/it]

epochs:0------------------------loss:tensor(0.7240, grad_fn=<NllLossBackward>)


 33%|███▎      | 131/400 [08:07<14:45,  3.29s/it]

epochs:0------------------------loss:tensor(0.6900, grad_fn=<NllLossBackward>)


 33%|███▎      | 132/400 [08:11<15:44,  3.53s/it]

epochs:0------------------------loss:tensor(0.6773, grad_fn=<NllLossBackward>)


 33%|███▎      | 133/400 [08:16<18:31,  4.16s/it]

epochs:0------------------------loss:tensor(0.6949, grad_fn=<NllLossBackward>)


 34%|███▎      | 134/400 [08:20<17:14,  3.89s/it]

epochs:0------------------------loss:tensor(0.6894, grad_fn=<NllLossBackward>)


 34%|███▍      | 135/400 [08:21<14:33,  3.30s/it]

epochs:0------------------------loss:tensor(0.6939, grad_fn=<NllLossBackward>)


 34%|███▍      | 136/400 [08:24<13:12,  3.00s/it]

epochs:0------------------------loss:tensor(0.6653, grad_fn=<NllLossBackward>)


 34%|███▍      | 137/400 [08:27<13:25,  3.06s/it]

epochs:0------------------------loss:tensor(0.6730, grad_fn=<NllLossBackward>)


 34%|███▍      | 138/400 [08:34<19:00,  4.35s/it]

epochs:0------------------------loss:tensor(0.6259, grad_fn=<NllLossBackward>)


 35%|███▍      | 139/400 [08:37<16:02,  3.69s/it]

epochs:0------------------------loss:tensor(0.6693, grad_fn=<NllLossBackward>)


 35%|███▌      | 140/400 [08:38<13:17,  3.07s/it]

epochs:0------------------------loss:tensor(0.7396, grad_fn=<NllLossBackward>)


 35%|███▌      | 141/400 [08:40<11:19,  2.63s/it]

epochs:0------------------------loss:tensor(0.6992, grad_fn=<NllLossBackward>)


 36%|███▌      | 142/400 [08:45<15:04,  3.51s/it]

epochs:0------------------------loss:tensor(0.6594, grad_fn=<NllLossBackward>)


 36%|███▌      | 143/400 [08:47<12:32,  2.93s/it]

epochs:0------------------------loss:tensor(0.7722, grad_fn=<NllLossBackward>)


 36%|███▌      | 144/400 [08:49<11:27,  2.69s/it]

epochs:0------------------------loss:tensor(0.6450, grad_fn=<NllLossBackward>)


 36%|███▋      | 145/400 [08:52<11:12,  2.64s/it]

epochs:0------------------------loss:tensor(0.7305, grad_fn=<NllLossBackward>)


 36%|███▋      | 146/400 [09:01<20:23,  4.82s/it]

epochs:0------------------------loss:tensor(0.6981, grad_fn=<NllLossBackward>)


 37%|███▋      | 147/400 [09:05<18:51,  4.47s/it]

epochs:0------------------------loss:tensor(0.6199, grad_fn=<NllLossBackward>)


 37%|███▋      | 148/400 [09:08<16:33,  3.94s/it]

epochs:0------------------------loss:tensor(0.6619, grad_fn=<NllLossBackward>)


 37%|███▋      | 149/400 [09:10<14:52,  3.56s/it]

epochs:0------------------------loss:tensor(0.7003, grad_fn=<NllLossBackward>)


 38%|███▊      | 150/400 [09:17<19:09,  4.60s/it]

epochs:0------------------------loss:tensor(0.6701, grad_fn=<NllLossBackward>)


 38%|███▊      | 151/400 [09:19<15:32,  3.74s/it]

epochs:0------------------------loss:tensor(0.6634, grad_fn=<NllLossBackward>)


 38%|███▊      | 152/400 [09:30<23:42,  5.74s/it]

epochs:0------------------------loss:tensor(0.7152, grad_fn=<NllLossBackward>)


 38%|███▊      | 153/400 [09:33<20:27,  4.97s/it]

epochs:0------------------------loss:tensor(0.6871, grad_fn=<NllLossBackward>)


 38%|███▊      | 154/400 [09:36<17:58,  4.38s/it]

epochs:0------------------------loss:tensor(0.7527, grad_fn=<NllLossBackward>)


 39%|███▉      | 155/400 [09:38<15:25,  3.78s/it]

epochs:0------------------------loss:tensor(0.6913, grad_fn=<NllLossBackward>)


 39%|███▉      | 156/400 [09:40<13:20,  3.28s/it]

epochs:0------------------------loss:tensor(0.7551, grad_fn=<NllLossBackward>)


 39%|███▉      | 157/400 [09:46<16:17,  4.02s/it]

epochs:0------------------------loss:tensor(0.7247, grad_fn=<NllLossBackward>)


 40%|███▉      | 158/400 [09:50<15:44,  3.90s/it]

epochs:0------------------------loss:tensor(0.6860, grad_fn=<NllLossBackward>)


 40%|███▉      | 159/400 [09:53<15:28,  3.85s/it]

epochs:0------------------------loss:tensor(0.7113, grad_fn=<NllLossBackward>)


 40%|████      | 160/400 [09:57<14:38,  3.66s/it]

epochs:0------------------------loss:tensor(0.6673, grad_fn=<NllLossBackward>)


 40%|████      | 161/400 [10:03<17:51,  4.48s/it]

epochs:0------------------------loss:tensor(0.7109, grad_fn=<NllLossBackward>)


 40%|████      | 162/400 [10:08<18:40,  4.71s/it]

epochs:0------------------------loss:tensor(0.6696, grad_fn=<NllLossBackward>)


 41%|████      | 163/400 [10:12<17:38,  4.47s/it]

epochs:0------------------------loss:tensor(0.6866, grad_fn=<NllLossBackward>)


 41%|████      | 164/400 [10:14<14:25,  3.67s/it]

epochs:0------------------------loss:tensor(0.6922, grad_fn=<NllLossBackward>)


 41%|████▏     | 165/400 [10:24<21:16,  5.43s/it]

epochs:0------------------------loss:tensor(0.6537, grad_fn=<NllLossBackward>)


 42%|████▏     | 166/400 [10:29<20:48,  5.33s/it]

epochs:0------------------------loss:tensor(0.6689, grad_fn=<NllLossBackward>)


 42%|████▏     | 167/400 [10:42<30:07,  7.76s/it]

epochs:0------------------------loss:tensor(0.6320, grad_fn=<NllLossBackward>)


 42%|████▏     | 168/400 [10:45<24:54,  6.44s/it]

epochs:0------------------------loss:tensor(0.6933, grad_fn=<NllLossBackward>)


 42%|████▏     | 169/400 [10:47<19:39,  5.11s/it]

epochs:0------------------------loss:tensor(0.6343, grad_fn=<NllLossBackward>)


 42%|████▎     | 170/400 [10:50<17:09,  4.48s/it]

epochs:0------------------------loss:tensor(0.7026, grad_fn=<NllLossBackward>)


 43%|████▎     | 171/400 [11:05<29:08,  7.64s/it]

epochs:0------------------------loss:tensor(0.6623, grad_fn=<NllLossBackward>)


 43%|████▎     | 172/400 [11:09<23:51,  6.28s/it]

epochs:0------------------------loss:tensor(0.6917, grad_fn=<NllLossBackward>)


 43%|████▎     | 173/400 [11:11<19:35,  5.18s/it]

epochs:0------------------------loss:tensor(0.6515, grad_fn=<NllLossBackward>)


 44%|████▎     | 174/400 [11:15<17:39,  4.69s/it]

epochs:0------------------------loss:tensor(0.6985, grad_fn=<NllLossBackward>)


 44%|████▍     | 175/400 [11:17<15:13,  4.06s/it]

epochs:0------------------------loss:tensor(0.6860, grad_fn=<NllLossBackward>)


 44%|████▍     | 176/400 [11:22<16:08,  4.33s/it]

epochs:0------------------------loss:tensor(0.6921, grad_fn=<NllLossBackward>)


 44%|████▍     | 177/400 [11:26<15:12,  4.09s/it]

epochs:0------------------------loss:tensor(0.6947, grad_fn=<NllLossBackward>)


 44%|████▍     | 178/400 [11:34<19:32,  5.28s/it]

epochs:0------------------------loss:tensor(0.6710, grad_fn=<NllLossBackward>)


 45%|████▍     | 179/400 [11:44<25:23,  6.90s/it]

epochs:0------------------------loss:tensor(0.6963, grad_fn=<NllLossBackward>)


 45%|████▌     | 180/400 [11:46<19:19,  5.27s/it]

epochs:0------------------------loss:tensor(0.6899, grad_fn=<NllLossBackward>)


 45%|████▌     | 181/400 [11:50<17:33,  4.81s/it]

epochs:0------------------------loss:tensor(0.6601, grad_fn=<NllLossBackward>)


 46%|████▌     | 182/400 [11:53<16:01,  4.41s/it]

epochs:0------------------------loss:tensor(0.6624, grad_fn=<NllLossBackward>)


 46%|████▌     | 183/400 [11:57<15:14,  4.22s/it]

epochs:0------------------------loss:tensor(0.6729, grad_fn=<NllLossBackward>)


 46%|████▌     | 184/400 [12:00<14:17,  3.97s/it]

epochs:0------------------------loss:tensor(0.6950, grad_fn=<NllLossBackward>)


 46%|████▋     | 185/400 [12:05<15:21,  4.29s/it]

epochs:0------------------------loss:tensor(0.6684, grad_fn=<NllLossBackward>)


 46%|████▋     | 186/400 [12:10<15:35,  4.37s/it]

epochs:0------------------------loss:tensor(0.6597, grad_fn=<NllLossBackward>)


 47%|████▋     | 187/400 [12:13<14:33,  4.10s/it]

epochs:0------------------------loss:tensor(0.6505, grad_fn=<NllLossBackward>)


 47%|████▋     | 188/400 [12:16<12:39,  3.58s/it]

epochs:0------------------------loss:tensor(0.6452, grad_fn=<NllLossBackward>)


 47%|████▋     | 189/400 [12:18<11:30,  3.27s/it]

epochs:0------------------------loss:tensor(0.6424, grad_fn=<NllLossBackward>)


 48%|████▊     | 190/400 [12:21<10:19,  2.95s/it]

epochs:0------------------------loss:tensor(0.6867, grad_fn=<NllLossBackward>)


 48%|████▊     | 191/400 [12:23<09:39,  2.77s/it]

epochs:0------------------------loss:tensor(0.6733, grad_fn=<NllLossBackward>)


 48%|████▊     | 192/400 [12:26<09:30,  2.74s/it]

epochs:0------------------------loss:tensor(0.6954, grad_fn=<NllLossBackward>)


 48%|████▊     | 193/400 [12:28<08:55,  2.59s/it]

epochs:0------------------------loss:tensor(0.6894, grad_fn=<NllLossBackward>)


 48%|████▊     | 194/400 [12:32<10:15,  2.99s/it]

epochs:0------------------------loss:tensor(0.6498, grad_fn=<NllLossBackward>)


 49%|████▉     | 195/400 [12:36<11:13,  3.28s/it]

epochs:0------------------------loss:tensor(0.6930, grad_fn=<NllLossBackward>)


 49%|████▉     | 196/400 [12:38<09:57,  2.93s/it]

epochs:0------------------------loss:tensor(0.6962, grad_fn=<NllLossBackward>)


 49%|████▉     | 197/400 [12:41<09:46,  2.89s/it]

epochs:0------------------------loss:tensor(0.6515, grad_fn=<NllLossBackward>)


 50%|████▉     | 198/400 [12:42<08:21,  2.48s/it]

epochs:0------------------------loss:tensor(0.6920, grad_fn=<NllLossBackward>)


 50%|████▉     | 199/400 [12:47<10:25,  3.11s/it]

epochs:0------------------------loss:tensor(0.6386, grad_fn=<NllLossBackward>)


 50%|█████     | 200/400 [12:50<10:44,  3.22s/it]

epochs:0------------------------loss:tensor(0.6311, grad_fn=<NllLossBackward>)


 50%|█████     | 201/400 [12:52<09:22,  2.82s/it]

epochs:0------------------------loss:tensor(0.6776, grad_fn=<NllLossBackward>)


 50%|█████     | 202/400 [12:54<08:36,  2.61s/it]

epochs:0------------------------loss:tensor(0.6607, grad_fn=<NllLossBackward>)


 51%|█████     | 203/400 [12:56<07:57,  2.42s/it]

epochs:0------------------------loss:tensor(0.6207, grad_fn=<NllLossBackward>)


 51%|█████     | 204/400 [13:01<10:39,  3.26s/it]

epochs:0------------------------loss:tensor(0.6037, grad_fn=<NllLossBackward>)


 51%|█████▏    | 205/400 [13:09<14:55,  4.59s/it]

epochs:0------------------------loss:tensor(0.6614, grad_fn=<NllLossBackward>)


 52%|█████▏    | 206/400 [13:12<13:01,  4.03s/it]

epochs:0------------------------loss:tensor(0.6296, grad_fn=<NllLossBackward>)


 52%|█████▏    | 207/400 [13:15<11:51,  3.69s/it]

epochs:0------------------------loss:tensor(0.6221, grad_fn=<NllLossBackward>)


 52%|█████▏    | 208/400 [13:19<12:03,  3.77s/it]

epochs:0------------------------loss:tensor(0.7121, grad_fn=<NllLossBackward>)


 52%|█████▏    | 209/400 [13:21<10:44,  3.38s/it]

epochs:0------------------------loss:tensor(0.6926, grad_fn=<NllLossBackward>)


 52%|█████▎    | 210/400 [13:25<11:32,  3.64s/it]

epochs:0------------------------loss:tensor(0.6327, grad_fn=<NllLossBackward>)


 53%|█████▎    | 211/400 [13:28<10:40,  3.39s/it]

epochs:0------------------------loss:tensor(0.6988, grad_fn=<NllLossBackward>)


 53%|█████▎    | 212/400 [13:30<09:07,  2.91s/it]

epochs:0------------------------loss:tensor(0.6699, grad_fn=<NllLossBackward>)


 53%|█████▎    | 213/400 [13:32<08:30,  2.73s/it]

epochs:0------------------------loss:tensor(0.7012, grad_fn=<NllLossBackward>)


 54%|█████▎    | 214/400 [13:37<10:12,  3.29s/it]

epochs:0------------------------loss:tensor(0.7319, grad_fn=<NllLossBackward>)


 54%|█████▍    | 215/400 [13:42<12:10,  3.95s/it]

epochs:0------------------------loss:tensor(0.7943, grad_fn=<NllLossBackward>)


 54%|█████▍    | 216/400 [13:45<10:30,  3.42s/it]

epochs:0------------------------loss:tensor(0.7050, grad_fn=<NllLossBackward>)


 54%|█████▍    | 217/400 [13:47<09:30,  3.12s/it]

epochs:0------------------------loss:tensor(0.6892, grad_fn=<NllLossBackward>)


 55%|█████▍    | 218/400 [13:50<09:18,  3.07s/it]

epochs:0------------------------loss:tensor(0.7024, grad_fn=<NllLossBackward>)


 55%|█████▍    | 219/400 [13:52<08:44,  2.90s/it]

epochs:0------------------------loss:tensor(0.6926, grad_fn=<NllLossBackward>)


 55%|█████▌    | 220/400 [13:56<09:42,  3.24s/it]

epochs:0------------------------loss:tensor(0.6393, grad_fn=<NllLossBackward>)


 55%|█████▌    | 221/400 [13:58<08:01,  2.69s/it]

epochs:0------------------------loss:tensor(0.6954, grad_fn=<NllLossBackward>)


 56%|█████▌    | 222/400 [14:00<07:57,  2.68s/it]

epochs:0------------------------loss:tensor(0.7147, grad_fn=<NllLossBackward>)


 56%|█████▌    | 223/400 [14:12<15:39,  5.31s/it]

epochs:0------------------------loss:tensor(0.6456, grad_fn=<NllLossBackward>)


 56%|█████▌    | 224/400 [14:17<15:13,  5.19s/it]

epochs:0------------------------loss:tensor(0.7098, grad_fn=<NllLossBackward>)


 56%|█████▋    | 225/400 [14:20<13:08,  4.50s/it]

epochs:0------------------------loss:tensor(0.7153, grad_fn=<NllLossBackward>)


 56%|█████▋    | 226/400 [14:25<13:49,  4.77s/it]

epochs:0------------------------loss:tensor(0.6303, grad_fn=<NllLossBackward>)


 57%|█████▋    | 227/400 [14:27<11:02,  3.83s/it]

epochs:0------------------------loss:tensor(0.6831, grad_fn=<NllLossBackward>)


 57%|█████▋    | 228/400 [14:29<09:33,  3.33s/it]

epochs:0------------------------loss:tensor(0.7110, grad_fn=<NllLossBackward>)


 57%|█████▋    | 229/400 [14:33<09:44,  3.42s/it]

epochs:0------------------------loss:tensor(0.6403, grad_fn=<NllLossBackward>)


 57%|█████▊    | 230/400 [14:34<08:24,  2.97s/it]

epochs:0------------------------loss:tensor(0.6646, grad_fn=<NllLossBackward>)


 58%|█████▊    | 231/400 [14:38<08:52,  3.15s/it]

epochs:0------------------------loss:tensor(0.6709, grad_fn=<NllLossBackward>)


 58%|█████▊    | 232/400 [14:42<09:53,  3.53s/it]

epochs:0------------------------loss:tensor(0.6733, grad_fn=<NllLossBackward>)


 58%|█████▊    | 233/400 [14:46<10:07,  3.64s/it]

epochs:0------------------------loss:tensor(0.6681, grad_fn=<NllLossBackward>)


 58%|█████▊    | 234/400 [14:51<11:05,  4.01s/it]

epochs:0------------------------loss:tensor(0.6593, grad_fn=<NllLossBackward>)


 59%|█████▉    | 235/400 [14:53<09:19,  3.39s/it]

epochs:0------------------------loss:tensor(0.6660, grad_fn=<NllLossBackward>)


 59%|█████▉    | 236/400 [14:57<09:52,  3.61s/it]

epochs:0------------------------loss:tensor(0.6615, grad_fn=<NllLossBackward>)


 59%|█████▉    | 237/400 [14:59<08:26,  3.11s/it]

epochs:0------------------------loss:tensor(0.6725, grad_fn=<NllLossBackward>)


 60%|█████▉    | 238/400 [15:01<07:18,  2.70s/it]

epochs:0------------------------loss:tensor(0.6610, grad_fn=<NllLossBackward>)


 60%|█████▉    | 239/400 [15:03<06:27,  2.41s/it]

epochs:0------------------------loss:tensor(0.6879, grad_fn=<NllLossBackward>)


 60%|██████    | 240/400 [15:05<06:28,  2.43s/it]

epochs:0------------------------loss:tensor(0.6887, grad_fn=<NllLossBackward>)


 60%|██████    | 241/400 [15:07<05:40,  2.14s/it]

epochs:0------------------------loss:tensor(0.6166, grad_fn=<NllLossBackward>)


 60%|██████    | 242/400 [15:11<07:04,  2.69s/it]

epochs:0------------------------loss:tensor(0.5893, grad_fn=<NllLossBackward>)


 61%|██████    | 243/400 [15:23<14:57,  5.72s/it]

epochs:0------------------------loss:tensor(0.6629, grad_fn=<NllLossBackward>)


 61%|██████    | 244/400 [15:26<12:38,  4.86s/it]

epochs:0------------------------loss:tensor(0.6355, grad_fn=<NllLossBackward>)


 61%|██████▏   | 245/400 [15:30<12:00,  4.65s/it]

epochs:0------------------------loss:tensor(0.7093, grad_fn=<NllLossBackward>)


 62%|██████▏   | 246/400 [15:38<14:29,  5.65s/it]

epochs:0------------------------loss:tensor(0.7356, grad_fn=<NllLossBackward>)


 62%|██████▏   | 247/400 [15:41<11:49,  4.64s/it]

epochs:0------------------------loss:tensor(0.6800, grad_fn=<NllLossBackward>)


 62%|██████▏   | 248/400 [15:42<09:25,  3.72s/it]

epochs:0------------------------loss:tensor(0.6738, grad_fn=<NllLossBackward>)


 62%|██████▏   | 249/400 [15:47<10:18,  4.10s/it]

epochs:0------------------------loss:tensor(0.6545, grad_fn=<NllLossBackward>)


 62%|██████▎   | 250/400 [15:53<11:44,  4.70s/it]

epochs:0------------------------loss:tensor(0.6789, grad_fn=<NllLossBackward>)


 63%|██████▎   | 251/400 [15:56<10:04,  4.06s/it]

epochs:0------------------------loss:tensor(0.6540, grad_fn=<NllLossBackward>)


 63%|██████▎   | 252/400 [16:04<13:13,  5.36s/it]

epochs:0------------------------loss:tensor(0.6399, grad_fn=<NllLossBackward>)


 63%|██████▎   | 253/400 [16:06<10:41,  4.37s/it]

epochs:0------------------------loss:tensor(0.6797, grad_fn=<NllLossBackward>)


 64%|██████▎   | 254/400 [16:11<10:39,  4.38s/it]

epochs:0------------------------loss:tensor(0.6656, grad_fn=<NllLossBackward>)


 64%|██████▍   | 255/400 [16:19<13:19,  5.52s/it]

epochs:0------------------------loss:tensor(0.6221, grad_fn=<NllLossBackward>)


 64%|██████▍   | 256/400 [16:21<10:30,  4.38s/it]

epochs:0------------------------loss:tensor(0.6725, grad_fn=<NllLossBackward>)


 64%|██████▍   | 257/400 [16:23<09:01,  3.78s/it]

epochs:0------------------------loss:tensor(0.6188, grad_fn=<NllLossBackward>)


 64%|██████▍   | 258/400 [16:28<09:34,  4.04s/it]

epochs:0------------------------loss:tensor(0.6823, grad_fn=<NllLossBackward>)


 65%|██████▍   | 259/400 [16:30<08:04,  3.43s/it]

epochs:0------------------------loss:tensor(0.6416, grad_fn=<NllLossBackward>)


 65%|██████▌   | 260/400 [16:32<06:55,  2.97s/it]

epochs:0------------------------loss:tensor(0.6873, grad_fn=<NllLossBackward>)


 65%|██████▌   | 261/400 [16:34<06:39,  2.88s/it]

epochs:0------------------------loss:tensor(0.6272, grad_fn=<NllLossBackward>)


 66%|██████▌   | 262/400 [16:39<07:55,  3.44s/it]

epochs:0------------------------loss:tensor(0.6424, grad_fn=<NllLossBackward>)


 66%|██████▌   | 263/400 [16:41<06:58,  3.05s/it]

epochs:0------------------------loss:tensor(0.6861, grad_fn=<NllLossBackward>)


 66%|██████▌   | 264/400 [16:43<06:18,  2.78s/it]

epochs:0------------------------loss:tensor(0.6618, grad_fn=<NllLossBackward>)


 66%|██████▋   | 265/400 [16:45<05:48,  2.58s/it]

epochs:0------------------------loss:tensor(0.6737, grad_fn=<NllLossBackward>)


 66%|██████▋   | 266/400 [16:50<07:15,  3.25s/it]

epochs:0------------------------loss:tensor(0.6348, grad_fn=<NllLossBackward>)


 67%|██████▋   | 267/400 [16:52<06:14,  2.82s/it]

epochs:0------------------------loss:tensor(0.6794, grad_fn=<NllLossBackward>)


 67%|██████▋   | 268/400 [16:56<06:50,  3.11s/it]

epochs:0------------------------loss:tensor(0.6630, grad_fn=<NllLossBackward>)


 67%|██████▋   | 269/400 [16:58<06:15,  2.87s/it]

epochs:0------------------------loss:tensor(0.6805, grad_fn=<NllLossBackward>)


 68%|██████▊   | 270/400 [17:01<05:56,  2.74s/it]

epochs:0------------------------loss:tensor(0.6851, grad_fn=<NllLossBackward>)


 68%|██████▊   | 271/400 [17:03<05:30,  2.57s/it]

epochs:0------------------------loss:tensor(0.6990, grad_fn=<NllLossBackward>)


 68%|██████▊   | 272/400 [17:07<06:24,  3.01s/it]

epochs:0------------------------loss:tensor(0.6437, grad_fn=<NllLossBackward>)


 68%|██████▊   | 273/400 [17:08<05:18,  2.51s/it]

epochs:0------------------------loss:tensor(0.6960, grad_fn=<NllLossBackward>)


 68%|██████▊   | 274/400 [17:12<06:03,  2.89s/it]

epochs:0------------------------loss:tensor(0.6911, grad_fn=<NllLossBackward>)


 69%|██████▉   | 275/400 [17:18<07:55,  3.81s/it]

epochs:0------------------------loss:tensor(0.6250, grad_fn=<NllLossBackward>)


 69%|██████▉   | 276/400 [17:21<07:25,  3.59s/it]

epochs:0------------------------loss:tensor(0.6718, grad_fn=<NllLossBackward>)


 69%|██████▉   | 277/400 [17:23<06:26,  3.14s/it]

epochs:0------------------------loss:tensor(0.6778, grad_fn=<NllLossBackward>)


 70%|██████▉   | 278/400 [17:29<08:13,  4.05s/it]

epochs:0------------------------loss:tensor(0.6664, grad_fn=<NllLossBackward>)


 70%|██████▉   | 279/400 [17:31<07:00,  3.47s/it]

epochs:0------------------------loss:tensor(0.6819, grad_fn=<NllLossBackward>)


 70%|███████   | 280/400 [17:34<06:10,  3.09s/it]

epochs:0------------------------loss:tensor(0.6613, grad_fn=<NllLossBackward>)


 70%|███████   | 281/400 [17:36<05:30,  2.78s/it]

epochs:0------------------------loss:tensor(0.6916, grad_fn=<NllLossBackward>)


 70%|███████   | 282/400 [17:38<05:12,  2.65s/it]

epochs:0------------------------loss:tensor(0.6719, grad_fn=<NllLossBackward>)


 71%|███████   | 283/400 [17:41<05:17,  2.72s/it]

epochs:0------------------------loss:tensor(0.6943, grad_fn=<NllLossBackward>)


 71%|███████   | 284/400 [17:49<08:18,  4.30s/it]

epochs:0------------------------loss:tensor(0.6662, grad_fn=<NllLossBackward>)


 71%|███████▏  | 285/400 [17:53<08:12,  4.28s/it]

epochs:0------------------------loss:tensor(0.6889, grad_fn=<NllLossBackward>)


 72%|███████▏  | 286/400 [17:55<06:39,  3.51s/it]

epochs:0------------------------loss:tensor(0.6737, grad_fn=<NllLossBackward>)


 72%|███████▏  | 287/400 [17:59<06:46,  3.59s/it]

epochs:0------------------------loss:tensor(0.6873, grad_fn=<NllLossBackward>)


 72%|███████▏  | 288/400 [18:00<05:47,  3.10s/it]

epochs:0------------------------loss:tensor(0.6516, grad_fn=<NllLossBackward>)


 72%|███████▏  | 289/400 [18:02<04:46,  2.58s/it]

epochs:0------------------------loss:tensor(0.6753, grad_fn=<NllLossBackward>)


 72%|███████▎  | 290/400 [18:04<04:36,  2.52s/it]

epochs:0------------------------loss:tensor(0.6503, grad_fn=<NllLossBackward>)


 73%|███████▎  | 291/400 [18:23<13:17,  7.32s/it]

epochs:0------------------------loss:tensor(0.6665, grad_fn=<NllLossBackward>)


 73%|███████▎  | 292/400 [18:27<11:33,  6.42s/it]

epochs:0------------------------loss:tensor(0.6803, grad_fn=<NllLossBackward>)


 73%|███████▎  | 293/400 [18:30<09:22,  5.25s/it]

epochs:0------------------------loss:tensor(0.6950, grad_fn=<NllLossBackward>)


 74%|███████▎  | 294/400 [18:40<11:59,  6.79s/it]

epochs:0------------------------loss:tensor(0.6403, grad_fn=<NllLossBackward>)


 74%|███████▍  | 295/400 [18:42<09:21,  5.35s/it]

epochs:0------------------------loss:tensor(0.7091, grad_fn=<NllLossBackward>)


 74%|███████▍  | 296/400 [18:47<08:54,  5.14s/it]

epochs:0------------------------loss:tensor(0.6492, grad_fn=<NllLossBackward>)


 74%|███████▍  | 297/400 [18:49<07:12,  4.19s/it]

epochs:0------------------------loss:tensor(0.6596, grad_fn=<NllLossBackward>)


 74%|███████▍  | 298/400 [18:52<06:58,  4.10s/it]

epochs:0------------------------loss:tensor(0.6628, grad_fn=<NllLossBackward>)


 75%|███████▍  | 299/400 [18:54<05:40,  3.37s/it]

epochs:0------------------------loss:tensor(0.6595, grad_fn=<NllLossBackward>)


 75%|███████▌  | 300/400 [18:59<06:13,  3.73s/it]

epochs:0------------------------loss:tensor(0.6903, grad_fn=<NllLossBackward>)


 75%|███████▌  | 301/400 [19:01<05:36,  3.40s/it]

epochs:0------------------------loss:tensor(0.6677, grad_fn=<NllLossBackward>)


 76%|███████▌  | 302/400 [19:03<04:55,  3.01s/it]

epochs:0------------------------loss:tensor(0.6796, grad_fn=<NllLossBackward>)


 76%|███████▌  | 303/400 [19:05<04:20,  2.69s/it]

epochs:0------------------------loss:tensor(0.6811, grad_fn=<NllLossBackward>)


 76%|███████▌  | 304/400 [19:07<03:56,  2.47s/it]

epochs:0------------------------loss:tensor(0.6666, grad_fn=<NllLossBackward>)


 76%|███████▋  | 305/400 [19:12<04:53,  3.08s/it]

epochs:0------------------------loss:tensor(0.6421, grad_fn=<NllLossBackward>)


 76%|███████▋  | 306/400 [19:15<04:57,  3.16s/it]

epochs:0------------------------loss:tensor(0.6955, grad_fn=<NllLossBackward>)


 77%|███████▋  | 307/400 [19:17<04:26,  2.86s/it]

epochs:0------------------------loss:tensor(0.6799, grad_fn=<NllLossBackward>)


 77%|███████▋  | 308/400 [19:23<05:38,  3.68s/it]

epochs:0------------------------loss:tensor(0.6822, grad_fn=<NllLossBackward>)


 77%|███████▋  | 309/400 [19:28<05:58,  3.94s/it]

epochs:0------------------------loss:tensor(0.6866, grad_fn=<NllLossBackward>)


 78%|███████▊  | 310/400 [19:30<05:16,  3.51s/it]

epochs:0------------------------loss:tensor(0.6818, grad_fn=<NllLossBackward>)


 78%|███████▊  | 311/400 [19:36<06:19,  4.27s/it]

epochs:0------------------------loss:tensor(0.6666, grad_fn=<NllLossBackward>)


 78%|███████▊  | 312/400 [19:40<06:11,  4.22s/it]

epochs:0------------------------loss:tensor(0.6882, grad_fn=<NllLossBackward>)


 78%|███████▊  | 313/400 [19:43<05:23,  3.72s/it]

epochs:0------------------------loss:tensor(0.6709, grad_fn=<NllLossBackward>)


 78%|███████▊  | 314/400 [19:45<04:49,  3.37s/it]

epochs:0------------------------loss:tensor(0.6725, grad_fn=<NllLossBackward>)


 79%|███████▉  | 315/400 [19:50<05:10,  3.65s/it]

epochs:0------------------------loss:tensor(0.6905, grad_fn=<NllLossBackward>)


 79%|███████▉  | 316/400 [20:01<08:31,  6.08s/it]

epochs:0------------------------loss:tensor(0.6285, grad_fn=<NllLossBackward>)


 79%|███████▉  | 317/400 [20:06<07:55,  5.72s/it]

epochs:0------------------------loss:tensor(0.6401, grad_fn=<NllLossBackward>)


 80%|███████▉  | 318/400 [20:09<06:31,  4.78s/it]

epochs:0------------------------loss:tensor(0.6504, grad_fn=<NllLossBackward>)


 80%|███████▉  | 319/400 [20:13<06:13,  4.61s/it]

epochs:0------------------------loss:tensor(0.6710, grad_fn=<NllLossBackward>)


 80%|████████  | 320/400 [20:15<05:12,  3.91s/it]

epochs:0------------------------loss:tensor(0.6262, grad_fn=<NllLossBackward>)


 80%|████████  | 321/400 [20:18<04:36,  3.50s/it]

epochs:0------------------------loss:tensor(0.6754, grad_fn=<NllLossBackward>)


 80%|████████  | 322/400 [20:20<04:01,  3.10s/it]

epochs:0------------------------loss:tensor(0.6787, grad_fn=<NllLossBackward>)


 81%|████████  | 323/400 [20:23<03:58,  3.09s/it]

epochs:0------------------------loss:tensor(0.6587, grad_fn=<NllLossBackward>)


 81%|████████  | 324/400 [20:25<03:21,  2.65s/it]

epochs:0------------------------loss:tensor(0.6505, grad_fn=<NllLossBackward>)


 81%|████████▏ | 325/400 [20:28<03:34,  2.86s/it]

epochs:0------------------------loss:tensor(0.6222, grad_fn=<NllLossBackward>)


 82%|████████▏ | 326/400 [20:35<05:01,  4.07s/it]

epochs:0------------------------loss:tensor(0.7270, grad_fn=<NllLossBackward>)


 82%|████████▏ | 327/400 [20:38<04:39,  3.83s/it]

epochs:0------------------------loss:tensor(0.6733, grad_fn=<NllLossBackward>)


 82%|████████▏ | 328/400 [20:42<04:43,  3.93s/it]

epochs:0------------------------loss:tensor(0.6844, grad_fn=<NllLossBackward>)


 82%|████████▏ | 329/400 [20:56<08:08,  6.88s/it]

epochs:0------------------------loss:tensor(0.6950, grad_fn=<NllLossBackward>)


 82%|████████▎ | 330/400 [21:02<07:44,  6.64s/it]

epochs:0------------------------loss:tensor(0.6956, grad_fn=<NllLossBackward>)


 83%|████████▎ | 331/400 [21:04<05:49,  5.06s/it]

epochs:0------------------------loss:tensor(0.6769, grad_fn=<NllLossBackward>)


 83%|████████▎ | 332/400 [21:11<06:27,  5.70s/it]

epochs:0------------------------loss:tensor(0.6948, grad_fn=<NllLossBackward>)


 83%|████████▎ | 333/400 [21:14<05:32,  4.96s/it]

epochs:0------------------------loss:tensor(0.6770, grad_fn=<NllLossBackward>)


 84%|████████▎ | 334/400 [21:16<04:37,  4.20s/it]

epochs:0------------------------loss:tensor(0.6698, grad_fn=<NllLossBackward>)


 84%|████████▍ | 335/400 [21:19<04:02,  3.73s/it]

epochs:0------------------------loss:tensor(0.6708, grad_fn=<NllLossBackward>)


 84%|████████▍ | 336/400 [21:22<03:35,  3.36s/it]

epochs:0------------------------loss:tensor(0.6662, grad_fn=<NllLossBackward>)


 84%|████████▍ | 337/400 [21:25<03:33,  3.39s/it]

epochs:0------------------------loss:tensor(0.6917, grad_fn=<NllLossBackward>)


 84%|████████▍ | 338/400 [21:32<04:28,  4.33s/it]

epochs:0------------------------loss:tensor(0.6199, grad_fn=<NllLossBackward>)


 85%|████████▍ | 339/400 [21:35<04:10,  4.10s/it]

epochs:0------------------------loss:tensor(0.6770, grad_fn=<NllLossBackward>)


 85%|████████▌ | 340/400 [21:38<03:52,  3.87s/it]

epochs:0------------------------loss:tensor(0.5729, grad_fn=<NllLossBackward>)


 85%|████████▌ | 341/400 [21:43<04:03,  4.12s/it]

epochs:0------------------------loss:tensor(0.6975, grad_fn=<NllLossBackward>)


 86%|████████▌ | 342/400 [21:45<03:27,  3.58s/it]

epochs:0------------------------loss:tensor(0.7128, grad_fn=<NllLossBackward>)


 86%|████████▌ | 343/400 [21:48<03:02,  3.20s/it]

epochs:0------------------------loss:tensor(0.6772, grad_fn=<NllLossBackward>)


 86%|████████▌ | 344/400 [21:50<02:43,  2.92s/it]

epochs:0------------------------loss:tensor(0.6990, grad_fn=<NllLossBackward>)


 86%|████████▋ | 345/400 [21:56<03:23,  3.70s/it]

epochs:0------------------------loss:tensor(0.6350, grad_fn=<NllLossBackward>)


 86%|████████▋ | 346/400 [21:59<03:13,  3.59s/it]

epochs:0------------------------loss:tensor(0.6911, grad_fn=<NllLossBackward>)


 87%|████████▋ | 347/400 [22:05<03:48,  4.30s/it]

epochs:0------------------------loss:tensor(0.6431, grad_fn=<NllLossBackward>)


 87%|████████▋ | 348/400 [22:07<03:08,  3.63s/it]

epochs:0------------------------loss:tensor(0.6749, grad_fn=<NllLossBackward>)


 87%|████████▋ | 349/400 [22:09<02:41,  3.16s/it]

epochs:0------------------------loss:tensor(0.6730, grad_fn=<NllLossBackward>)


 88%|████████▊ | 350/400 [22:13<02:43,  3.28s/it]

epochs:0------------------------loss:tensor(0.6963, grad_fn=<NllLossBackward>)


 88%|████████▊ | 351/400 [22:14<02:15,  2.77s/it]

epochs:0------------------------loss:tensor(0.6588, grad_fn=<NllLossBackward>)


 88%|████████▊ | 352/400 [22:16<02:03,  2.57s/it]

epochs:0------------------------loss:tensor(0.6613, grad_fn=<NllLossBackward>)


 88%|████████▊ | 353/400 [22:20<02:11,  2.79s/it]

epochs:0------------------------loss:tensor(0.7149, grad_fn=<NllLossBackward>)


 88%|████████▊ | 354/400 [22:27<03:13,  4.20s/it]

epochs:0------------------------loss:tensor(0.6665, grad_fn=<NllLossBackward>)


 89%|████████▉ | 355/400 [22:31<03:09,  4.21s/it]

epochs:0------------------------loss:tensor(0.6621, grad_fn=<NllLossBackward>)


 89%|████████▉ | 356/400 [22:34<02:42,  3.68s/it]

epochs:0------------------------loss:tensor(0.6654, grad_fn=<NllLossBackward>)


 89%|████████▉ | 357/400 [22:36<02:20,  3.26s/it]

epochs:0------------------------loss:tensor(0.6893, grad_fn=<NllLossBackward>)


 90%|████████▉ | 358/400 [22:40<02:24,  3.45s/it]

epochs:0------------------------loss:tensor(0.6719, grad_fn=<NllLossBackward>)


 90%|████████▉ | 359/400 [22:43<02:16,  3.33s/it]

epochs:0------------------------loss:tensor(0.6760, grad_fn=<NllLossBackward>)


 90%|█████████ | 360/400 [22:45<01:59,  2.99s/it]

epochs:0------------------------loss:tensor(0.6842, grad_fn=<NllLossBackward>)


 90%|█████████ | 361/400 [22:52<02:44,  4.22s/it]

epochs:0------------------------loss:tensor(0.6860, grad_fn=<NllLossBackward>)


 90%|█████████ | 362/400 [22:56<02:32,  4.00s/it]

epochs:0------------------------loss:tensor(0.6930, grad_fn=<NllLossBackward>)


 91%|█████████ | 363/400 [22:58<02:11,  3.55s/it]

epochs:0------------------------loss:tensor(0.6454, grad_fn=<NllLossBackward>)


 91%|█████████ | 364/400 [23:04<02:32,  4.23s/it]

epochs:0------------------------loss:tensor(0.6756, grad_fn=<NllLossBackward>)


 91%|█████████▏| 365/400 [23:06<02:03,  3.53s/it]

epochs:0------------------------loss:tensor(0.6461, grad_fn=<NllLossBackward>)


 92%|█████████▏| 366/400 [23:10<02:05,  3.70s/it]

epochs:0------------------------loss:tensor(0.6659, grad_fn=<NllLossBackward>)


 92%|█████████▏| 367/400 [23:23<03:34,  6.49s/it]

epochs:0------------------------loss:tensor(0.6837, grad_fn=<NllLossBackward>)


 92%|█████████▏| 368/400 [23:28<03:12,  6.02s/it]

epochs:0------------------------loss:tensor(0.6757, grad_fn=<NllLossBackward>)


 92%|█████████▏| 369/400 [23:31<02:34,  4.99s/it]

epochs:0------------------------loss:tensor(0.6785, grad_fn=<NllLossBackward>)


 92%|█████████▎| 370/400 [23:35<02:23,  4.79s/it]

epochs:0------------------------loss:tensor(0.6335, grad_fn=<NllLossBackward>)


 93%|█████████▎| 371/400 [23:39<02:14,  4.63s/it]

epochs:0------------------------loss:tensor(0.6159, grad_fn=<NllLossBackward>)


 93%|█████████▎| 372/400 [23:42<01:58,  4.23s/it]

epochs:0------------------------loss:tensor(0.6759, grad_fn=<NllLossBackward>)


 93%|█████████▎| 373/400 [23:46<01:49,  4.06s/it]

epochs:0------------------------loss:tensor(0.6935, grad_fn=<NllLossBackward>)


 94%|█████████▎| 374/400 [23:50<01:44,  4.02s/it]

epochs:0------------------------loss:tensor(0.6797, grad_fn=<NllLossBackward>)


 94%|█████████▍| 375/400 [23:52<01:23,  3.33s/it]

epochs:0------------------------loss:tensor(0.6243, grad_fn=<NllLossBackward>)


 94%|█████████▍| 376/400 [23:57<01:37,  4.05s/it]

epochs:0------------------------loss:tensor(0.6349, grad_fn=<NllLossBackward>)


 94%|█████████▍| 377/400 [24:00<01:23,  3.65s/it]

epochs:0------------------------loss:tensor(0.6499, grad_fn=<NllLossBackward>)


 94%|█████████▍| 378/400 [24:03<01:13,  3.36s/it]

epochs:0------------------------loss:tensor(0.6767, grad_fn=<NllLossBackward>)


 95%|█████████▍| 379/400 [24:07<01:16,  3.66s/it]

epochs:0------------------------loss:tensor(0.6900, grad_fn=<NllLossBackward>)


 95%|█████████▌| 380/400 [24:11<01:11,  3.60s/it]

epochs:0------------------------loss:tensor(0.6774, grad_fn=<NllLossBackward>)


 95%|█████████▌| 381/400 [24:15<01:15,  3.95s/it]

epochs:0------------------------loss:tensor(0.6910, grad_fn=<NllLossBackward>)


 96%|█████████▌| 382/400 [24:18<01:05,  3.64s/it]

epochs:0------------------------loss:tensor(0.6854, grad_fn=<NllLossBackward>)


 96%|█████████▌| 383/400 [24:20<00:51,  3.06s/it]

epochs:0------------------------loss:tensor(0.6660, grad_fn=<NllLossBackward>)


 96%|█████████▌| 384/400 [24:23<00:50,  3.16s/it]

epochs:0------------------------loss:tensor(0.6706, grad_fn=<NllLossBackward>)


 96%|█████████▋| 385/400 [24:26<00:43,  2.90s/it]

epochs:0------------------------loss:tensor(0.6912, grad_fn=<NllLossBackward>)


 96%|█████████▋| 386/400 [24:28<00:38,  2.76s/it]

epochs:0------------------------loss:tensor(0.6687, grad_fn=<NllLossBackward>)


 97%|█████████▋| 387/400 [24:31<00:36,  2.80s/it]

epochs:0------------------------loss:tensor(0.6429, grad_fn=<NllLossBackward>)


 97%|█████████▋| 388/400 [24:36<00:42,  3.52s/it]

epochs:0------------------------loss:tensor(0.6784, grad_fn=<NllLossBackward>)


 97%|█████████▋| 389/400 [24:39<00:35,  3.21s/it]

epochs:0------------------------loss:tensor(0.6705, grad_fn=<NllLossBackward>)


 98%|█████████▊| 390/400 [24:43<00:35,  3.54s/it]

epochs:0------------------------loss:tensor(0.6812, grad_fn=<NllLossBackward>)


 98%|█████████▊| 391/400 [24:56<00:56,  6.23s/it]

epochs:0------------------------loss:tensor(0.6267, grad_fn=<NllLossBackward>)


 98%|█████████▊| 392/400 [25:02<00:51,  6.38s/it]

epochs:0------------------------loss:tensor(0.6661, grad_fn=<NllLossBackward>)


 98%|█████████▊| 393/400 [25:06<00:38,  5.46s/it]

epochs:0------------------------loss:tensor(0.6657, grad_fn=<NllLossBackward>)


 98%|█████████▊| 394/400 [25:08<00:26,  4.47s/it]

epochs:0------------------------loss:tensor(0.6506, grad_fn=<NllLossBackward>)


 99%|█████████▉| 395/400 [25:10<00:19,  3.85s/it]

epochs:0------------------------loss:tensor(0.6754, grad_fn=<NllLossBackward>)


 99%|█████████▉| 396/400 [25:17<00:18,  4.70s/it]

epochs:0------------------------loss:tensor(0.6704, grad_fn=<NllLossBackward>)


 99%|█████████▉| 397/400 [25:20<00:12,  4.19s/it]

epochs:0------------------------loss:tensor(0.6653, grad_fn=<NllLossBackward>)


100%|█████████▉| 398/400 [25:22<00:07,  3.65s/it]

epochs:0------------------------loss:tensor(0.6585, grad_fn=<NllLossBackward>)


100%|█████████▉| 399/400 [25:25<00:03,  3.28s/it]

epochs:0------------------------loss:tensor(0.6631, grad_fn=<NllLossBackward>)


100%|██████████| 400/400 [25:33<00:00,  3.83s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epochs:0------------------------loss:tensor(0.6584, grad_fn=<NllLossBackward>)


  0%|          | 1/400 [00:03<23:13,  3.49s/it]

epochs:1------------------------loss:tensor(0.6533, grad_fn=<NllLossBackward>)


  0%|          | 2/400 [00:08<26:02,  3.93s/it]

epochs:1------------------------loss:tensor(0.5606, grad_fn=<NllLossBackward>)


  1%|          | 3/400 [00:10<21:55,  3.31s/it]

epochs:1------------------------loss:tensor(0.6605, grad_fn=<NllLossBackward>)


  1%|          | 4/400 [00:12<20:00,  3.03s/it]

epochs:1------------------------loss:tensor(0.6669, grad_fn=<NllLossBackward>)


  1%|▏         | 5/400 [00:14<17:16,  2.62s/it]

epochs:1------------------------loss:tensor(0.6917, grad_fn=<NllLossBackward>)


  2%|▏         | 6/400 [00:17<17:31,  2.67s/it]

epochs:1------------------------loss:tensor(0.7001, grad_fn=<NllLossBackward>)


  2%|▏         | 7/400 [00:19<17:00,  2.60s/it]

epochs:1------------------------loss:tensor(0.6734, grad_fn=<NllLossBackward>)


  2%|▏         | 8/400 [00:23<20:00,  3.06s/it]

epochs:1------------------------loss:tensor(0.6923, grad_fn=<NllLossBackward>)


  2%|▏         | 9/400 [00:25<17:58,  2.76s/it]

epochs:1------------------------loss:tensor(0.6213, grad_fn=<NllLossBackward>)


  2%|▎         | 10/400 [00:31<23:22,  3.60s/it]

epochs:1------------------------loss:tensor(0.6918, grad_fn=<NllLossBackward>)


  3%|▎         | 11/400 [00:33<21:08,  3.26s/it]

epochs:1------------------------loss:tensor(0.6840, grad_fn=<NllLossBackward>)


  3%|▎         | 12/400 [00:36<19:55,  3.08s/it]

epochs:1------------------------loss:tensor(0.6926, grad_fn=<NllLossBackward>)


  3%|▎         | 13/400 [00:39<20:19,  3.15s/it]

epochs:1------------------------loss:tensor(0.6307, grad_fn=<NllLossBackward>)


  4%|▎         | 14/400 [00:42<19:17,  3.00s/it]

epochs:1------------------------loss:tensor(0.7105, grad_fn=<NllLossBackward>)


  4%|▍         | 15/400 [00:44<18:26,  2.87s/it]

epochs:1------------------------loss:tensor(0.6788, grad_fn=<NllLossBackward>)


  4%|▍         | 16/400 [00:47<18:18,  2.86s/it]

epochs:1------------------------loss:tensor(0.6699, grad_fn=<NllLossBackward>)


  4%|▍         | 17/400 [00:50<17:37,  2.76s/it]

epochs:1------------------------loss:tensor(0.6805, grad_fn=<NllLossBackward>)


  4%|▍         | 18/400 [00:53<17:26,  2.74s/it]

epochs:1------------------------loss:tensor(0.6979, grad_fn=<NllLossBackward>)


  5%|▍         | 19/400 [00:56<19:33,  3.08s/it]

epochs:1------------------------loss:tensor(0.6624, grad_fn=<NllLossBackward>)


  5%|▌         | 20/400 [00:59<18:12,  2.87s/it]

epochs:1------------------------loss:tensor(0.6696, grad_fn=<NllLossBackward>)


  5%|▌         | 21/400 [01:02<18:41,  2.96s/it]

epochs:1------------------------loss:tensor(0.6605, grad_fn=<NllLossBackward>)


  6%|▌         | 22/400 [01:03<15:37,  2.48s/it]

epochs:1------------------------loss:tensor(0.6397, grad_fn=<NllLossBackward>)


  6%|▌         | 23/400 [01:07<17:15,  2.75s/it]

epochs:1------------------------loss:tensor(0.6641, grad_fn=<NllLossBackward>)


  6%|▌         | 24/400 [01:09<16:03,  2.56s/it]

epochs:1------------------------loss:tensor(0.7049, grad_fn=<NllLossBackward>)


  6%|▋         | 25/400 [01:11<15:22,  2.46s/it]

epochs:1------------------------loss:tensor(0.6414, grad_fn=<NllLossBackward>)


  6%|▋         | 26/400 [01:13<14:16,  2.29s/it]

epochs:1------------------------loss:tensor(0.6480, grad_fn=<NllLossBackward>)


  7%|▋         | 27/400 [01:17<16:40,  2.68s/it]

epochs:1------------------------loss:tensor(0.6716, grad_fn=<NllLossBackward>)
