* [PyTorch Tutorial](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
* [Do not need softmax](https://stackoverflow.com/questions/55675345/should-i-use-softmax-as-output-when-using-cross-entropy-loss-in-pytorch)
* I learned how to implement LeNet5
* I learned how to train net on GPU

In [23]:
%matplotlib inline

## Load and normalize the CIFAR10 training and test datasets using torchvision

In [120]:
from torch.utils.data.dataset import Dataset
from torchvision import transforms

import pandas as pd
import cv2 as cv

class AnimalDataset(Dataset):
    def __init__(self, root_path, subset, height, width):
        self.to_tensor = transforms.ToTensor()
        subset_txt = pd.read_csv(root_path + subset, sep=" ")
        self.np_subset_txt = np.array(subset_txt)
        self.count = len(self.np_subset_txt)
        self.height = height
        self.width = width
    
    def __getitem__(self, index):
        label = int(self.np_subset_txt[index][1])
        img = cv.imread(root_path + self.np_subset_txt[index][0])
        img_resize = cv.resize(img, (self.height, self.width))/255.0
        img_resize = img_resize.reshape(-1, self.height, self.width)
        img_resize_tensor = self.to_tensor(img_resize)
        return (img_resize, label)
    
    def __len__(self):
        return self.count

In [156]:
root_path = "C:/Users/USER/Desktop/Projects/Github_Repo/AI/DeepLearning/__HW1_DATA/"

Train_Dataset = AnimalDataset(root_path = root_path, subset = "train.txt", height = 30, width = 30)
Test_Dataset = AnimalDataset(root_path = root_path, subset = "test.txt", height = 30, width = 30)
Val_Dataset = AnimalDataset(root_path = root_path, subset = "val.txt", height = 30, width = 30)

In [157]:
Train_DataLoader = torch.utils.data.DataLoader(dataset=Train_Dataset, batch_size = 50, shuffle=True)
Test_DataLoader = torch.utils.data.DataLoader(dataset=Test_Dataset, batch_size = 50, shuffle=True)
Val_DataLoader = torch.utils.data.DataLoader(dataset=Val_Dataset, batch_size = 50, shuffle=True)

## Define a Convolutional Neural Network

In [197]:
import torch.nn as nn
import torch.nn.functional as F

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:

print(device)

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.fc1 = nn.Linear(16 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 50)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        #x = F.softmax(x)
        return x
    
net = Net()
net.to(device)

cuda:0


Net(
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=50, bias=True)
)

## Define a loss function

In [198]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(net.parameters(), lr=1e-3)

## Train the network on the training data

In [199]:
import time

tic = time.time()
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(Train_DataLoader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        #inputs, labels = data
        #print(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs.float())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 2000 mini-batches
            print(outputs)
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

toc = time.time()
print(toc - tic)
print('Finished Training')

tensor([[-0.0954,  0.0469, -0.0784,  ...,  0.0276,  0.1167, -0.0799],
        [-0.0831,  0.0400, -0.0803,  ...,  0.0244,  0.1135, -0.0812],
        [-0.0898,  0.0623, -0.0695,  ...,  0.0297,  0.1313, -0.0844],
        ...,
        [-0.0942,  0.0864, -0.0524,  ...,  0.0413,  0.1587, -0.0842],
        [-0.0937,  0.0721, -0.0614,  ...,  0.0341,  0.1395, -0.0857],
        [-0.0826,  0.0395, -0.0810,  ...,  0.0230,  0.1175, -0.0860]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[1,   100] loss: 3.913
tensor([[-0.0258,  0.0468, -0.0071,  ...,  0.0402,  0.1416, -0.0536],
        [-0.0547,  0.0915,  0.0323,  ...,  0.0614,  0.1931, -0.0633],
        [-0.0502,  0.0746,  0.0078,  ...,  0.0503,  0.1579, -0.0612],
        ...,
        [-0.0301,  0.0559, -0.0165,  ...,  0.0363,  0.1419, -0.0617],
        [-0.0350,  0.0531, -0.0136,  ...,  0.0390,  0.1357, -0.0597],
        [-0.0358,  0.0701,  0.0092,  ...,  0.0485,  0.1616, -0.0573]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[1,   200] 

tensor([[ 1.4840, -1.0554, -0.9344,  ..., -0.0100, -0.2203,  0.6454],
        [-0.3504,  0.0183,  0.1281,  ...,  0.0045,  0.3858, -0.2173],
        [-0.4160, -0.3068, -0.2687,  ..., -0.1460, -0.3874, -0.1063],
        ...,
        [ 0.8206, -0.6665, -0.5566,  ...,  0.0057, -0.0388,  0.3380],
        [-1.5512,  0.2178,  0.2102,  ..., -0.2221, -0.2788, -0.5891],
        [ 0.4031, -0.8439, -0.7894,  ..., -0.2300, -0.8381,  0.2712]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[2,   500] loss: 3.802
tensor([[-0.7737, -0.2940, -0.3362,  ..., -0.2326, -0.7859, -0.1196],
        [-2.0464,  0.5071,  0.3827,  ..., -0.2380, -0.2988, -0.7366],
        [-0.0820, -0.6900, -0.6970,  ..., -0.2735, -1.0678,  0.2050],
        ...,
        [ 0.4306, -0.5740, -0.6416,  ..., -0.1033, -0.6142,  0.2570],
        [-0.7950, -0.1933, -0.2449,  ..., -0.2203, -0.6980, -0.1875],
        [-0.5328, -0.2514, -0.3155,  ..., -0.1710, -0.5512, -0.0644]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[2,   600] 

tensor([[-0.1999, -0.7753, -0.6989,  ..., -0.0176, -0.4756,  0.1152],
        [ 1.5463, -1.8096, -1.5810,  ...,  0.6889,  0.9401,  0.8899],
        [ 1.9126, -2.1327, -2.0298,  ...,  0.5734,  0.1190,  1.1770],
        ...,
        [ 1.9878, -2.0465, -1.9811,  ...,  0.5272,  0.2672,  1.1397],
        [-2.2974, -0.2910, -0.1398,  ..., -0.4662, -1.4821, -0.7529],
        [ 1.6975, -1.6804, -1.5352,  ...,  0.6159,  0.8043,  0.8940]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[3,  1000] loss: 3.722
tensor([[-2.3665, -0.2460, -0.1145,  ..., -0.4829, -1.5431, -0.7566],
        [-2.8641,  0.0863,  0.1734,  ..., -0.5934, -1.5383, -1.0855],
        [ 0.8361, -1.4657, -1.3562,  ...,  0.3425, -0.2470,  0.8028],
        ...,
        [-2.7221,  0.0045,  0.1171,  ..., -0.5668, -1.5121, -0.9888],
        [-2.8283, -0.0808,  0.0288,  ..., -0.6170, -1.8362, -0.9774],
        [ 1.3910, -1.6081, -1.4295,  ...,  0.6610,  0.8874,  0.9151]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[3,  1100] 

tensor([[ 2.5979, -2.9787, -3.2015,  ...,  0.8001,  0.8835,  1.3077],
        [-1.4666,  0.2462,  0.1179,  ..., -0.7558, -0.7017, -1.7715],
        [-1.4368, -0.2314, -0.2361,  ..., -0.6457, -1.0654, -1.3003],
        ...,
        [-0.7631, -1.2012, -1.1273,  ..., -0.0212, -0.5531,  0.0137],
        [-1.4660, -0.2053, -0.1180,  ..., -0.2451,  0.8069, -1.3763],
        [-1.8313, -0.4768, -0.3616,  ..., -0.5217, -1.2261, -0.8328]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[5,   200] loss: 3.656
tensor([[-0.9906, -1.0319, -0.9387,  ..., -0.2101, -1.1452,  0.0781],
        [-1.0807, -0.2162, -0.2451,  ..., -0.3976, -0.6342, -0.8562],
        [ 2.2738, -2.2999, -2.4906,  ...,  0.5006,  0.1126,  1.1713],
        ...,
        [-2.2041, -0.5995, -0.4765,  ..., -0.5474, -2.1067, -0.5874],
        [-1.0690, -1.0126, -0.9774,  ..., -0.1774, -1.6560,  0.0276],
        [-0.4372, -1.0617, -1.0050,  ...,  0.5174,  1.1414, -0.4131]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[5,   300] 

tensor([[-0.5836, -1.3868, -1.3398,  ..., -0.2059, -2.1462,  0.0847],
        [-0.1133, -1.4140, -1.4316,  ..., -0.1664, -1.5719,  0.4702],
        [-1.7413, -0.0332,  0.0355,  ..., -0.9395, -2.1807, -1.4845],
        ...,
        [ 1.4849, -2.3354, -2.3514,  ...,  0.1339, -1.8279,  0.6929],
        [ 0.9172, -1.2001, -1.2755,  ..., -0.5858, -1.7398, -0.2887],
        [-1.8736, -1.3789, -1.5211,  ...,  0.6243,  1.2971, -0.8691]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[6,   600] loss: 3.622
tensor([[-1.1813,  0.3208,  0.1991,  ..., -1.3961, -2.6653, -1.7199],
        [-0.6523, -2.5325, -2.9255,  ...,  1.4276,  2.0298,  0.6573],
        [-1.1662, -0.2683, -0.4791,  ..., -0.5247, -1.1670, -1.1838],
        ...,
        [-0.8209, -2.0132, -2.1048,  ...,  0.2683, -1.4745,  0.8154],
        [-2.3503, -0.4439, -0.4461,  ..., -0.7586, -2.5638, -0.8863],
        [-2.4316, -0.2057, -0.2388,  ..., -0.9683, -2.4099, -1.2796]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[6,   700] 

tensor([[-1.9610,  0.2647,  0.2927,  ..., -1.6511, -2.7608, -2.4324],
        [-0.7388, -1.7486, -1.9048,  ...,  0.1474, -1.0992,  0.2201],
        [-1.7508, -1.2386, -1.5823,  ..., -0.5232, -0.7916, -0.4988],
        ...,
        [-0.4188,  0.3298,  0.4570,  ..., -1.8789, -3.1071, -2.7126],
        [-1.8811, -1.2963, -1.3755,  ..., -0.6945, -2.6289, -0.4351],
        [-3.4431, -0.0350,  0.2492,  ..., -1.5217, -3.6171, -1.4015]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[7,  1100] loss: 3.573
tensor([[-3.8721, -0.6812, -0.7671,  ..., -0.8751, -2.6337, -1.0099],
        [-3.7336, -0.7839, -1.0354,  ..., -0.4413, -1.5103, -1.1957],
        [-2.0930,  0.5242,  0.5484,  ..., -1.1801, -2.6903, -2.4982],
        ...,
        [-3.3231,  1.7357,  1.9603,  ..., -2.0687, -3.6290, -3.8197],
        [ 0.6378, -1.0764, -1.1932,  ..., -0.8921, -1.0255, -0.4689],
        [-1.9222,  0.1789,  0.1342,  ..., -1.4419, -3.1527, -1.9207]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[7,  1200] 

tensor([[-1.2979, -1.5201, -1.6147,  ..., -0.4790, -2.1754,  0.0930],
        [-0.3741, -1.8232, -1.8293,  ..., -0.0527, -2.0093,  0.2798],
        [-1.7104, -1.9338, -2.1997,  ..., -0.2653, -1.4374,  0.4416],
        ...,
        [-0.1868, -0.4107, -0.4244,  ..., -1.3137, -2.6694, -1.6376],
        [-2.8742,  1.1797,  1.2013,  ..., -2.4091, -3.8342, -3.8847],
        [-2.7301, -0.1878, -0.1467,  ..., -0.9662, -3.3549, -1.8603]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[9,   400] loss: 3.531
tensor([[ 0.6668, -3.6502, -3.2358,  ...,  1.7021, -1.6898,  1.7760],
        [-3.0362, -0.0746, -0.3697,  ..., -1.5119, -2.5945, -2.1393],
        [-0.2559, -2.0755, -2.5030,  ...,  0.0411,  0.0121,  0.2133],
        ...,
        [-1.9341,  0.6713,  0.5508,  ..., -1.8397, -3.3981, -3.1178],
        [-1.7255,  0.4732,  0.2648,  ..., -2.0794, -2.9635, -2.9531],
        [-2.7185, -1.9472, -3.0457,  ...,  0.7734,  1.6276, -1.5927]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[9,   500] 

tensor([[-2.6713, -2.0365, -2.4885,  ..., -0.6928, -1.3061, -0.6584],
        [-2.4067, -1.7565, -2.2560,  ..., -1.0594, -2.0576, -0.4081],
        [-3.0741, -1.6535, -2.5361,  ..., -0.6819, -0.8390, -1.5058],
        ...,
        [-1.6454, -2.6830, -3.2202,  ...,  0.0855, -0.6901,  0.7876],
        [-3.1545,  2.4814,  3.2275,  ..., -2.5792, -4.3800, -5.6833],
        [-2.9251, -2.1631, -3.1917,  ...,  0.1398,  0.3748, -1.2166]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[10,   800] loss: 3.525
tensor([[-4.1696,  0.6921,  0.5924,  ..., -1.4762, -2.7497, -3.1183],
        [-2.3793, -1.4316, -1.9608,  ..., -1.0775, -1.4859, -0.9136],
        [-1.2701, -1.3008, -1.7267,  ..., -0.8131, -0.2324, -1.7782],
        ...,
        [-2.3221, -0.2370, -0.1039,  ..., -1.1392, -3.2681, -1.3651],
        [-2.6194, -1.5425, -1.7421,  ..., -0.5535, -2.0135, -0.0269],
        [-3.3845,  1.3901,  1.8981,  ..., -1.7288, -3.6488, -2.7712]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[10,   900

tensor([[ 0.5842, -1.5640, -1.1550,  ..., -0.7809, -2.4255, -0.4850],
        [ 1.8544, -1.6262, -0.9604,  ..., -0.3901, -1.4058, -2.2389],
        [-2.2229,  0.2434, -0.1931,  ..., -1.3265, -0.6541, -4.2789],
        ...,
        [-5.0041,  1.1007,  2.0960,  ..., -0.4600, -4.5912, -2.0589],
        [ 0.4704, -1.9166, -1.9213,  ...,  0.0578, -0.4755, -0.1174],
        [ 0.1399, -2.4013, -2.5835,  ..., -0.1366, -0.6374,  0.2649]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[11,  1200] loss: 3.495
tensor([[-2.8775, -2.8859, -4.7245,  ...,  1.5316,  2.8542, -2.7211],
        [-1.4426, -2.2524, -3.0176,  ...,  0.6081,  0.4127, -0.2091],
        [ 0.5665, -2.1801, -2.8010,  ...,  1.0581,  1.9426, -0.2835],
        ...,
        [ 0.8358, -0.7583, -0.5255,  ..., -1.4516, -1.6442, -2.1516],
        [-0.9096, -2.9888, -3.4004,  ..., -0.7252, -0.5794, -0.2619],
        [-3.1814, -1.9585, -2.1782,  ..., -0.2216, -2.3246,  0.3318]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[12,   100

tensor([[-1.0012, -1.6774, -1.7937,  ..., -1.5069, -2.7516, -0.2044],
        [-1.2064, -0.9937, -1.5410,  ..., -1.9497, -1.9981, -1.8673],
        [-0.7493, -0.2072, -0.2458,  ..., -0.6053, -2.7751, -2.6194],
        ...,
        [-3.9905, -0.7173, -0.6680,  ..., -1.2314, -3.8057, -1.6231],
        [-2.8252, -0.2768, -0.5955,  ..., -1.8993, -2.1799, -2.8642],
        [ 0.3539, -1.8361, -1.9103,  ..., -0.5579, -1.5510, -1.2808]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[13,   500] loss: 3.481
tensor([[-4.4325,  2.4953,  3.5751,  ..., -3.1309, -5.1248, -7.1245],
        [ 1.0584, -3.0071, -2.4387,  ..., -0.2816, -1.6973,  0.4456],
        [-3.3193, -1.8978, -2.3222,  ..., -1.7550, -2.2330, -1.2625],
        ...,
        [-5.2420, -0.9861, -1.3283,  ..., -1.9254, -2.6534, -2.8765],
        [-4.5558, -0.5430, -1.2849,  ..., -1.6649, -1.5881, -4.8739],
        [-1.9627, -0.6172, -0.7851,  ..., -2.1275, -2.9864, -1.7494]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[13,   600

tensor([[-2.4081, -1.1403, -1.4286,  ..., -1.7662, -2.6401, -1.1086],
        [-5.0034,  0.8417,  0.9892,  ..., -2.3091, -4.7698, -4.0861],
        [-1.5058, -1.5655, -1.7930,  ..., -1.0946, -1.5304, -0.9139],
        ...,
        [-0.1723, -2.7928, -3.6257,  ...,  0.1151,  1.1348, -1.3360],
        [-1.2760, -1.6891, -2.4347,  ..., -1.1457, -1.1953, -1.3679],
        [-2.3340, -2.6051, -3.8441,  ...,  0.6318,  1.2419, -2.1647]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[14,  1000] loss: 3.482
tensor([[-1.9145, -1.2925, -1.2794,  ..., -0.4530, -2.2343, -0.3658],
        [-0.1790, -0.6153, -0.4588,  ..., -2.2155, -2.6161, -2.4145],
        [ 1.7415, -3.7572, -3.1030,  ..., -0.4982, -0.7190,  0.4804],
        ...,
        [-2.7861, -3.1956, -4.5922,  ...,  1.4095,  2.4920, -3.2448],
        [-3.2540, -1.9472, -2.0116,  ..., -1.4213, -2.8694, -0.2231],
        [ 1.0355, -4.0571, -2.8084,  ...,  1.2624, -1.1202,  0.6210]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[14,  1100

tensor([[-2.1248, -4.9793, -5.9989,  ...,  1.5044,  1.8743, -1.0963],
        [-3.3921, -1.0714, -1.5536,  ..., -2.0308, -3.5624, -2.3550],
        [-2.4554, -4.0435, -5.6001,  ...,  2.2978,  3.6166, -2.9137],
        ...,
        [-2.3813, -0.6179, -1.0520,  ..., -2.2329, -3.6574, -2.4909],
        [-1.5122, -0.4791, -0.7373,  ..., -1.1547, -2.6371, -1.0868],
        [ 1.6843, -2.0518, -1.5788,  ..., -1.7134, -1.9625, -0.7257]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[16,   300] loss: 3.441
tensor([[-5.1635,  2.1154,  2.4088,  ..., -2.7107, -5.2428, -6.4620],
        [ 0.1641, -3.0578, -2.8567,  ..., -0.1816, -0.7184,  0.7236],
        [-1.4271, -0.2411, -0.8522,  ..., -1.2082, -1.6457, -2.0335],
        ...,
        [-3.2137, -3.2646, -3.8792,  ..., -1.8955, -3.1077, -1.4754],
        [-2.5780, -1.1827, -2.0826,  ..., -1.9799, -2.2589, -3.0257],
        [-0.0515, -0.9269, -1.1873,  ..., -1.3122, -1.5995, -1.3154]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[16,   400

tensor([[-0.6368, -1.5240, -2.6485,  ..., -0.1699,  0.6619, -2.3899],
        [-0.0794, -2.0207, -2.5364,  ..., -1.9332, -1.8873, -0.9427],
        [ 0.1214, -3.1461, -4.2616,  ...,  1.2883,  2.6382, -1.2549],
        ...,
        [ 0.4374, -2.7283, -3.8963,  ...,  0.9180,  2.1584, -1.3763],
        [-0.0113, -1.3524, -1.5852,  ..., -2.2028, -2.7487, -0.9655],
        [-4.2733, -4.1267, -4.5888,  ..., -0.4087, -1.7204,  0.3644]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[17,   700] loss: 3.456
tensor([[-3.0443, -1.2412, -1.4364,  ..., -1.3500, -3.0598, -1.1364],
        [-5.9566, -3.4499, -3.2643,  ...,  0.2835, -1.0803, -0.5555],
        [-0.0890, -1.4811, -1.7863,  ..., -1.8347, -1.2414, -2.1534],
        ...,
        [-4.5665,  1.3752,  1.8201,  ..., -2.9728, -5.4767, -6.1771],
        [-4.0164, -2.2109, -2.2930,  ..., -1.8061, -3.1776, -0.7715],
        [-3.6049,  0.2103,  0.6754,  ..., -1.2916, -3.9156, -2.0435]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[17,   800

tensor([[-0.3541, -3.0269, -3.2631,  ..., -1.1364, -1.5129,  0.3586],
        [-3.6358, -2.0430, -2.2599,  ..., -1.6387, -3.4573, -1.1417],
        [-2.1645, -2.4362, -2.7920,  ..., -1.0214, -2.8563, -0.3890],
        ...,
        [-1.1598, -3.0122, -3.5441,  ...,  0.1634, -0.4492,  0.2536],
        [-1.7891, -1.0543, -1.5046,  ..., -2.2301, -3.2594, -2.1537],
        [-1.6457, -1.8684, -2.0657,  ..., -1.5944, -3.2729, -1.1595]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[18,  1200] loss: 3.415
tensor([[-0.0432, -0.9294, -0.7829,  ..., -1.3074, -1.9577, -1.7161],
        [-2.5311,  0.6632,  0.6128,  ..., -1.2797, -2.0296, -2.9485],
        [-2.3831, -0.4702,  0.1501,  ..., -1.1047, -3.5582, -1.3027],
        ...,
        [-2.8825,  0.0568,  0.5637,  ..., -0.1793, -2.2855, -1.3581],
        [-3.4503,  0.9041,  1.3493,  ..., -2.5824, -4.0694, -3.8064],
        [-4.8279, -0.8082, -0.7341,  ..., -2.3536, -3.1609, -2.7963]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[19,   100

tensor([[-2.0608, -0.0363, -0.8667,  ..., -1.1507, -0.4577, -3.7195],
        [-4.5212, -2.8069, -2.7842,  ..., -0.8670, -2.0368, -0.4709],
        [-3.5236, -1.5282, -2.2190,  ..., -1.0709, -1.9392, -1.9187],
        ...,
        [ 1.3482, -4.5811, -3.4515,  ..., -1.8405, -1.9149, -0.1880],
        [-2.7974, -2.7782, -3.6302,  ...,  0.1645,  0.6143, -2.1055],
        [-2.9573,  1.5738,  2.0592,  ..., -2.4803, -3.6136, -4.8376]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[20,   500] loss: 3.427
tensor([[ 1.2619, -2.2299, -1.4171,  ..., -1.6536, -2.8483, -2.7688],
        [-4.7878, -4.0820, -4.6656,  ..., -0.6049, -1.1889, -2.3285],
        [-1.0283, -0.5410, -1.0341,  ..., -1.3224, -1.3868, -2.1295],
        ...,
        [ 1.8098, -2.4498, -2.0184,  ..., -2.3568, -2.2305, -1.4979],
        [-0.7686, -0.5670, -0.6981,  ..., -2.1931, -3.7604, -3.4374],
        [-3.0569, -0.1989, -0.2191,  ..., -2.4976, -4.4388, -5.0440]],
       device='cuda:0', grad_fn=<AddmmBackward>)
[20,   600

In [200]:
PATH = './test_net.pth'
torch.save(net.state_dict(), PATH)

## Test the network on the test data

In [201]:
net = Net()
net.load_state_dict(torch.load(PATH))

correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in Test_DataLoader:
        #print(labels)
        images, labels = data
        # calculate outputs by running images through the network 
        outputs = net(images.float())
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        print(predicted)
        print(labels)
        #break

print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

tensor([22, 39, 33, 48, 48, 38, 41, 33, 20,  1, 48,  2, 15, 44, 15, 49, 38,  2,
        34, 20, 38,  1,  0, 42,  4, 41,  0, 20, 17, 40, 41, 48, 40,  2, 33, 34,
        39, 36, 35, 33, 14, 40, 48,  2, 27, 28, 35, 40, 15, 33])
tensor([19, 24, 37, 14, 12, 35, 41, 24, 16,  5, 44, 11, 15, 44, 19, 41, 11, 14,
        34, 18, 43, 12, 27, 37,  9, 47,  7,  3,  4, 40, 35, 32,  9,  2, 14, 38,
        10, 31, 43, 34, 14, 17, 12, 33,  3,  8, 34, 19, 21,  8])
tensor([40,  4, 46, 48, 35, 33, 22, 33, 40, 22,  0, 35, 35, 44, 33, 38, 23,  1,
        27, 33, 36, 33, 34, 32,  2, 20, 22, 36, 20, 14, 14,  2, 20, 15, 33,  0,
        45, 42, 20, 15, 22, 22, 46, 38, 20, 15,  0, 27, 23, 48])
tensor([13,  4, 46, 31, 43, 19,  3, 36, 29, 18, 32, 42, 18, 46, 33, 15, 27, 37,
         1, 18, 43, 26, 30,  8,  2, 31,  6, 26, 21, 23, 23, 23, 47, 22, 13, 28,
         9, 15, 21, 36, 47, 24,  4, 34, 42, 16, 29, 27, 10,  6])
tensor([20, 48, 34, 40, 39, 37, 40,  7, 20, 33, 14, 16, 32, 33, 41,  0, 40,  2,
        46,  0, 39, 

In [202]:
net = Net()
net.load_state_dict(torch.load(PATH))

correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in Val_DataLoader:
        #print(labels)
        images, labels = data
        # calculate outputs by running images through the network 
        outputs = net(images.float())
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        print(predicted)
        print(labels)
        #break

print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

tensor([ 0, 20, 48, 41,  5, 33, 34,  0, 49, 22, 34, 34,  0, 33, 36, 46, 20, 45,
         3, 24, 48, 45, 36, 33, 45, 40, 49, 28, 46, 46,  1,  2, 48, 40, 41, 38,
        36, 44,  5,  8, 23, 38, 40, 23,  8, 25, 20, 34, 33, 40])
tensor([ 0, 20, 22, 25, 40,  6, 40,  5,  3, 42, 42, 34,  6, 32, 28, 39, 24, 18,
         7, 35, 38, 46, 20, 20, 45, 30, 46,  2, 49,  0, 11, 27, 13, 29, 39, 19,
        44, 38, 25, 32, 33, 35, 40, 23,  7, 10, 42, 49, 33, 19])
tensor([49, 41, 33, 44, 34, 14,  3, 36, 46, 49, 39, 35,  2, 11, 48, 40, 44, 49,
        48, 34,  3, 15, 38, 23, 38,  5, 34, 44, 25, 45,  0, 44,  7, 44,  2, 33,
        46,  4, 40, 20, 33, 19, 34, 28, 37, 38, 40, 33, 44, 15])
tensor([46, 17, 22, 27, 21, 13,  7, 33, 46, 24,  8, 17, 27,  5, 48,  6, 49, 47,
        48,  8, 27,  3, 40, 31, 37, 21, 30,  6, 44, 45, 45, 11, 28, 49, 44, 23,
        42,  4, 23, 42, 18, 22, 35, 26, 26, 10, 16, 25, 34, 16])
tensor([14, 23, 14, 48, 33, 44, 23, 45, 16, 23, 40, 39, 28, 34, 34,  2,  6, 33,
        28, 41, 49, 

In [206]:
from tqdm import tqdm
net = Net()
net.load_state_dict(torch.load(PATH))

correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in tqdm(Train_DataLoader):
        #print(labels)
        images, labels = data
        # calculate outputs by running images through the network 
        outputs = net(images.float())
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        #print(predicted)
        #print(labels)
        #break

print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

100%|██████████████████████████████████████████████████████████████████████████████| 1267/1267 [03:16<00:00,  6.45it/s]

Accuracy of the network on the test images: 11 %



