In [1]:
import torch
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F
import torch.nn as nn

_tasks = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

cifar = CIFAR10('data', train = True, download = True, transform=_tasks)

Files already downloaded and verified


In [2]:
split = int(0.8 * len(cifar))
index_list = list(range(len(cifar)))
train_idx, valid_idx = index_list[:split], index_list[split:]

In [3]:
print(len(train_idx), len(valid_idx))

40000 10000


In [4]:
tr_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(valid_idx)

In [5]:
trainloader = DataLoader(cifar,batch_size = 256, sampler = tr_sampler)
validloader = DataLoader(cifar, batch_size = 256, sampler = val_sampler)

In [6]:
class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        
        self.pool = nn.MaxPool2d(2,2)
        
        self.linear1 = nn.Linear(1024, 512)
        self.linear2 = nn.Linear(512, 10)
    
    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 1024)
        x = F.relu(self.linear1(x))
        x = self.linear2(x)
        return x
model = Model()
model

Model(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear1): Linear(in_features=1024, out_features=512, bias=True)
  (linear2): Linear(in_features=512, out_features=10, bias=True)
)

In [7]:
import torch.optim as optim
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01, weight_decay = 1e-6, momentum = 0.9, nesterov = True)

In [8]:
for epoch in range(20):
    train_loss, valid_loss = [], []
    
    model.train()
    for data, target in trainloader:
        optimizer.zero_grad()
        output = model(data)
        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
        print("epoch:",epoch,"- train_loss:",loss)
    model.eval()
    for data, target in validloader:
        output = model(data)
        loss = loss_function(output,target)
        valid_loss.append(loss.item())
        print("epoch:",epoch,"- valid_loss:",loss)

epoch: 0 - train_loss: tensor(2.3027, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3044, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3038, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3030, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3025, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3016, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3032, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3034, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3040, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.2996, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3020, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3008, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.2982, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3064, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.3012, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: te

epoch: 0 - train_loss: tensor(2.1157, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.0828, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.0916, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.0741, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.1061, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.0617, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.1060, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.1241, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(1.9977, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.0269, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.0142, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.0534, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.0413, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.0477, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: tensor(2.0366, grad_fn=<NllLossBackward>)
epoch: 0 - train_loss: te

epoch: 1 - train_loss: tensor(1.8239, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.8447, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7728, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7169, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.6262, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7607, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7578, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7014, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7031, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.8239, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7346, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7407, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7357, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7236, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: tensor(1.7208, grad_fn=<NllLossBackward>)
epoch: 1 - train_loss: te

epoch: 1 - valid_loss: tensor(1.5798, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.7040, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.6292, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.6168, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.6492, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.6154, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.6193, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.6170, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.6525, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.5752, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.6478, grad_fn=<NllLossBackward>)
epoch: 1 - valid_loss: tensor(1.8621, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.6091, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.5466, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.6845, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: te

epoch: 2 - train_loss: tensor(1.4583, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.4451, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.4502, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.4828, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.5063, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.5314, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.5086, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.5131, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.4300, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.4662, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.5654, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.5342, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.4335, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.4768, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: tensor(1.4607, grad_fn=<NllLossBackward>)
epoch: 2 - train_loss: te

epoch: 3 - train_loss: tensor(1.3793, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.4323, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3771, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3974, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3576, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3978, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.2818, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3380, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3925, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.2518, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3656, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3120, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3939, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3178, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: tensor(1.3211, grad_fn=<NllLossBackward>)
epoch: 3 - train_loss: te

epoch: 3 - valid_loss: tensor(1.3875, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.4515, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.4032, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.4776, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.5171, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.5152, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.5198, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.4120, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.4961, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.4816, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.3202, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.4045, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.3938, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.3999, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: tensor(1.3872, grad_fn=<NllLossBackward>)
epoch: 3 - valid_loss: te

epoch: 4 - train_loss: tensor(1.1538, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.2854, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.2651, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.3101, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.3076, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.1598, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.2177, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.2059, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.0927, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.3313, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.2115, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.1341, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.2000, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.2707, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: tensor(1.1905, grad_fn=<NllLossBackward>)
epoch: 4 - train_loss: te

epoch: 5 - train_loss: tensor(1.2901, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.2317, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.2703, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.2642, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.1931, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.0707, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.1343, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.1238, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.2502, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.0069, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.2540, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.1136, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.1236, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.1813, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: tensor(1.2315, grad_fn=<NllLossBackward>)
epoch: 5 - train_loss: te

epoch: 5 - valid_loss: tensor(1.3767, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.1553, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.1844, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.1917, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.2308, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.1588, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.2366, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.2490, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.4092, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.1976, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.2339, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.3653, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.2946, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.2227, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: tensor(1.2604, grad_fn=<NllLossBackward>)
epoch: 5 - valid_loss: te

epoch: 6 - train_loss: tensor(0.9866, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(0.9948, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.1197, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.0668, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(0.9124, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.1097, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.0243, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(0.9764, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.1065, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.0720, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.0623, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.1043, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.0359, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.0891, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: tensor(1.0634, grad_fn=<NllLossBackward>)
epoch: 6 - train_loss: te

epoch: 7 - train_loss: tensor(1.0060, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(1.0242, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9110, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9744, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(1.0312, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(1.0956, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(1.0446, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9821, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(1.0611, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9807, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(1.1266, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9725, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9910, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9350, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9791, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: te

epoch: 7 - train_loss: tensor(1.0058, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(1.0305, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.8789, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9321, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(1.0094, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9417, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.9968, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(0.8866, grad_fn=<NllLossBackward>)
epoch: 7 - train_loss: tensor(1.2666, grad_fn=<NllLossBackward>)
epoch: 7 - valid_loss: tensor(1.0420, grad_fn=<NllLossBackward>)
epoch: 7 - valid_loss: tensor(1.0539, grad_fn=<NllLossBackward>)
epoch: 7 - valid_loss: tensor(0.9923, grad_fn=<NllLossBackward>)
epoch: 7 - valid_loss: tensor(0.9123, grad_fn=<NllLossBackward>)
epoch: 7 - valid_loss: tensor(1.1079, grad_fn=<NllLossBackward>)
epoch: 7 - valid_loss: tensor(1.0897, grad_fn=<NllLossBackward>)
epoch: 7 - valid_loss: te

epoch: 8 - train_loss: tensor(0.9349, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.8634, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.8367, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.9409, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.9359, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.8967, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.9156, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.6989, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.9209, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.9164, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.8754, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.9406, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.8879, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.8682, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: tensor(0.8690, grad_fn=<NllLossBackward>)
epoch: 8 - train_loss: te

epoch: 9 - train_loss: tensor(0.8255, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.7576, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.7890, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.9519, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8730, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8073, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8868, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8497, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.7912, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8396, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8128, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.7521, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8600, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8671, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.9091, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: te

epoch: 9 - train_loss: tensor(0.8679, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.6982, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.9406, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8142, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.9246, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.7218, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.9061, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8366, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.7777, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.7405, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.9145, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8469, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.7719, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8261, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: tensor(0.8174, grad_fn=<NllLossBackward>)
epoch: 9 - train_loss: te

epoch: 10 - train_loss: tensor(0.7979, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.8126, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.6760, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.8290, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.7466, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.6988, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.8063, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.7954, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.8067, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.6617, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.8239, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.7690, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.7738, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.8046, grad_fn=<NllLossBackward>)
epoch: 10 - train_loss: tensor(0.7380, grad_fn=<NllLossBackward>)
epoch: 10 

epoch: 10 - valid_loss: tensor(0.8878, grad_fn=<NllLossBackward>)
epoch: 10 - valid_loss: tensor(1.0045, grad_fn=<NllLossBackward>)
epoch: 10 - valid_loss: tensor(0.7989, grad_fn=<NllLossBackward>)
epoch: 10 - valid_loss: tensor(0.9821, grad_fn=<NllLossBackward>)
epoch: 10 - valid_loss: tensor(0.9106, grad_fn=<NllLossBackward>)
epoch: 10 - valid_loss: tensor(1.0237, grad_fn=<NllLossBackward>)
epoch: 10 - valid_loss: tensor(0.9042, grad_fn=<NllLossBackward>)
epoch: 10 - valid_loss: tensor(1.3284, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7698, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7773, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7666, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7085, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7922, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7540, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.6180, grad_fn=<NllLossBackward>)
epoch: 11 

epoch: 11 - train_loss: tensor(0.7483, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.6880, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7013, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.6956, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7068, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7645, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7107, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.6839, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7172, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7935, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.6313, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7534, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.5977, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7711, grad_fn=<NllLossBackward>)
epoch: 11 - train_loss: tensor(0.7659, grad_fn=<NllLossBackward>)
epoch: 11 

epoch: 12 - train_loss: tensor(0.5912, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.5722, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.5887, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.7480, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.7186, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.7342, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.6748, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.7917, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.5659, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.6923, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.6243, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.6816, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.6312, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.6414, grad_fn=<NllLossBackward>)
epoch: 12 - train_loss: tensor(0.6410, grad_fn=<NllLossBackward>)
epoch: 12 

epoch: 12 - valid_loss: tensor(0.8273, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.8780, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.9272, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.9988, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.7924, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.8592, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.9496, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.9130, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.8205, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.8271, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.8785, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.8880, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.8661, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(1.1211, grad_fn=<NllLossBackward>)
epoch: 12 - valid_loss: tensor(0.8217, grad_fn=<NllLossBackward>)
epoch: 12 

epoch: 13 - train_loss: tensor(0.6242, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.6754, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.6934, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.6498, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.7064, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.6255, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.6497, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.6034, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.5973, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.6999, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.6090, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.5426, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.6697, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.6357, grad_fn=<NllLossBackward>)
epoch: 13 - train_loss: tensor(0.5960, grad_fn=<NllLossBackward>)
epoch: 13 

epoch: 14 - train_loss: tensor(0.5112, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.5379, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.4587, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.5248, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.4254, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.5752, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.5215, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.5829, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.6339, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.4431, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.4883, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.4778, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.4698, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.5863, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.5238, grad_fn=<NllLossBackward>)
epoch: 14 

epoch: 14 - train_loss: tensor(0.6125, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.4803, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.5634, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.5433, grad_fn=<NllLossBackward>)
epoch: 14 - train_loss: tensor(0.4585, grad_fn=<NllLossBackward>)
epoch: 14 - valid_loss: tensor(0.8529, grad_fn=<NllLossBackward>)
epoch: 14 - valid_loss: tensor(0.8763, grad_fn=<NllLossBackward>)
epoch: 14 - valid_loss: tensor(0.8074, grad_fn=<NllLossBackward>)
epoch: 14 - valid_loss: tensor(0.8123, grad_fn=<NllLossBackward>)
epoch: 14 - valid_loss: tensor(0.9390, grad_fn=<NllLossBackward>)
epoch: 14 - valid_loss: tensor(0.7963, grad_fn=<NllLossBackward>)
epoch: 14 - valid_loss: tensor(0.8530, grad_fn=<NllLossBackward>)
epoch: 14 - valid_loss: tensor(0.8826, grad_fn=<NllLossBackward>)
epoch: 14 - valid_loss: tensor(0.9498, grad_fn=<NllLossBackward>)
epoch: 14 - valid_loss: tensor(0.8567, grad_fn=<NllLossBackward>)
epoch: 14 

epoch: 15 - train_loss: tensor(0.4564, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.4479, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.5027, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.5358, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.4247, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.4273, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.5086, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.4002, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.4318, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.4580, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.4570, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.5366, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.5092, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.5657, grad_fn=<NllLossBackward>)
epoch: 15 - train_loss: tensor(0.4235, grad_fn=<NllLossBackward>)
epoch: 15 

epoch: 16 - train_loss: tensor(0.3842, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4738, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4084, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.3663, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4049, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4224, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4682, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.3696, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4364, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4609, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.3730, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4001, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4657, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4264, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4515, grad_fn=<NllLossBackward>)
epoch: 16 

epoch: 16 - train_loss: tensor(0.4641, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4444, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4243, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4462, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4978, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.5549, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.5142, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4945, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4442, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4072, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4669, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4167, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4318, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4105, grad_fn=<NllLossBackward>)
epoch: 16 - train_loss: tensor(0.4691, grad_fn=<NllLossBackward>)
epoch: 16 

epoch: 17 - train_loss: tensor(0.4566, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.3982, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.3613, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.4517, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.3630, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.4123, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.3790, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.3332, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.3611, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.3238, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.4370, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.3200, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.2738, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.3904, grad_fn=<NllLossBackward>)
epoch: 17 - train_loss: tensor(0.4646, grad_fn=<NllLossBackward>)
epoch: 17 

epoch: 17 - valid_loss: tensor(0.9246, grad_fn=<NllLossBackward>)
epoch: 17 - valid_loss: tensor(0.8001, grad_fn=<NllLossBackward>)
epoch: 17 - valid_loss: tensor(0.8936, grad_fn=<NllLossBackward>)
epoch: 17 - valid_loss: tensor(0.9453, grad_fn=<NllLossBackward>)
epoch: 17 - valid_loss: tensor(0.9575, grad_fn=<NllLossBackward>)
epoch: 17 - valid_loss: tensor(0.9147, grad_fn=<NllLossBackward>)
epoch: 17 - valid_loss: tensor(1.2216, grad_fn=<NllLossBackward>)
epoch: 17 - valid_loss: tensor(1.0315, grad_fn=<NllLossBackward>)
epoch: 17 - valid_loss: tensor(1.1108, grad_fn=<NllLossBackward>)
epoch: 17 - valid_loss: tensor(0.9449, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3720, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.2995, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3009, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3175, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3100, grad_fn=<NllLossBackward>)
epoch: 18 

epoch: 18 - train_loss: tensor(0.2879, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3368, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3817, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3301, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3637, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3799, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3284, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3106, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.4044, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.2906, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3816, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3226, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3856, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.4000, grad_fn=<NllLossBackward>)
epoch: 18 - train_loss: tensor(0.3436, grad_fn=<NllLossBackward>)
epoch: 18 

epoch: 19 - train_loss: tensor(0.2446, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.2076, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.3304, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.3215, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.2213, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.2412, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.3109, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.3253, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.2305, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.2111, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.2361, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.2644, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.2893, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.3392, grad_fn=<NllLossBackward>)
epoch: 19 - train_loss: tensor(0.2863, grad_fn=<NllLossBackward>)
epoch: 19 

epoch: 19 - valid_loss: tensor(1.2120, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.2348, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.3642, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.2332, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.2412, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.1302, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.4901, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.1248, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.0291, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.3065, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.1768, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.3087, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.4313, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.3694, grad_fn=<NllLossBackward>)
epoch: 19 - valid_loss: tensor(1.2175, grad_fn=<NllLossBackward>)
epoch: 19 

In [9]:
import numpy as np
dataiter = iter(validloader)
data, labels = dataiter.next()
output = model(data)

_, preds_tensor = torch.max(output,1)
preds = np.squeeze(preds_tensor.numpy())

print ("Actual:", labels.numpy())
print ("Predicted:", preds)

Actual: [4 4 2 4 1 7 0 5 9 6 1 4 0 7 8 5 9 7 2 4 5 6 8 2 9 7 2 4 8 7 3 8 4 2 4 3 3
 5 1 0 6 3 5 5 5 8 6 8 3 6 0 0 5 5 4 5 9 7 9 9 3 8 4 9 0 5 6 6 8 6 4 6 8 4
 4 7 5 4 1 1 9 2 9 0 4 2 5 6 5 3 2 7 6 7 0 0 9 1 4 8 0 5 8 1 8 0 3 4 0 3 2
 2 5 2 9 5 8 7 5 6 1 8 5 3 9 1 4 3 4 2 0 9 6 9 8 6 2 4 9 2 7 9 1 0 7 8 0 3
 8 7 9 3 6 1 7 9 7 1 3 4 8 1 0 3 7 5 5 4 2 5 9 9 9 6 2 0 5 6 1 3 0 5 5 8 2
 3 0 3 4 8 2 4 4 7 0 7 5 9 2 7 7 4 0 5 6 3 0 0 7 6 4 6 9 8 8 2 9 6 5 4 9 5
 7 3 8 9 2 1 8 9 2 4 2 2 5 3 5 8 3 5 7 6 6 0 8 5 6 5 4 6 6 8 1 8 1 0]
Predicted: [3 3 3 7 1 7 0 5 9 6 1 4 0 7 8 5 9 7 2 4 6 4 8 2 9 5 5 8 8 7 5 8 4 3 3 3 3
 5 1 0 5 3 5 3 5 8 6 8 3 6 2 0 5 3 4 5 3 4 9 8 3 8 4 9 2 5 6 6 8 6 6 3 8 5
 2 3 5 2 1 1 9 0 9 1 4 2 3 6 0 3 2 7 6 4 0 0 1 3 4 8 9 3 3 1 8 3 5 3 0 5 2
 3 5 4 9 3 8 0 5 3 1 8 3 3 9 1 4 3 4 2 0 9 3 1 8 3 0 4 9 4 7 9 1 4 3 8 0 3
 8 7 9 3 3 3 2 9 3 1 3 3 0 8 0 3 7 3 7 6 2 5 9 9 0 6 2 0 3 3 1 5 0 5 3 8 3
 3 0 5 4 8 3 2 3 0 0 7 7 9 2 7 3 7 0 5 6 3 0 0 5 6 4 3 3 8 8 3 8 6 3 3 9 5
 7 3 9 9 2 

In [10]:
from sklearn.metrics import accuracy_score
ac = accuracy_score(labels,preds)
print('accuracy of model:', ac)

accuracy of model: 0.63671875
