# CNN Training with Code Example - Neural Network Programming Course

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
import pdb

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True) 



<torch.autograd.grad_mode.set_grad_enabled at 0x7f5d783107f0>

In [2]:
print(torch.__version__)
print(torchvision.__version__)

1.1.0
0.3.0


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
#from plotcm import plot_confusion_matrix



In [4]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()


In [5]:
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv1 = nn.Conv2d(in_channels =1,out_channels =6,kernel_size =5)
        self.conv2 = nn.Conv2d(in_channels =6,out_channels =12,kernel_size =5)
        
        self.fc1 = nn.Linear(in_features = 12*4*4, out_features = 120)
        self.fc2 = nn.Linear(in_features = 120, out_features = 60)
        self.out = nn.Linear(in_features= 60, out_features=10)
        
    def forward(self, t):
    # (1) input layer
        t = t

    # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

    # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

    # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)

    # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)

    # (6) output layer
        t = self.out(t)
    #t = F.softmax(t, dim=1)

        return t






In [6]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [7]:
network = Network()

In [8]:
train_loader = torch.utils.data.DataLoader(train_set,batch_size=100)
batch = next(iter(train_loader))
images, labels = batch


# Calculating the loss

In [9]:
preds = network(images)
loss = F.cross_entropy(preds, labels) # Calculating the loss

loss.item()

2.3044040203094482

In [10]:
get_num_correct(preds,labels)

11

In [11]:
network.conv1.weight

Parameter containing:
tensor([[[[ 0.1908,  0.0962, -0.0513,  0.1063,  0.0319],
          [-0.0840,  0.1050,  0.1264, -0.1102,  0.1228],
          [-0.1420,  0.1460,  0.1768, -0.1604,  0.0758],
          [-0.1328,  0.0490, -0.0960, -0.1086, -0.0157],
          [ 0.1767, -0.1314, -0.0779, -0.0358,  0.1260]]],


        [[[ 0.1918,  0.1884, -0.1475, -0.1328, -0.1522],
          [-0.0654,  0.1168, -0.0211, -0.0960, -0.0739],
          [ 0.1730,  0.1818,  0.1907,  0.0132,  0.0421],
          [ 0.1160,  0.0551,  0.0607, -0.1401,  0.0008],
          [-0.0079,  0.0913,  0.0653,  0.0225,  0.1211]]],


        [[[ 0.1965,  0.1952, -0.1214, -0.0509, -0.1895],
          [ 0.1557, -0.1298,  0.1487, -0.0408, -0.1227],
          [ 0.1038, -0.0018, -0.1285,  0.1662, -0.0132],
          [-0.1725,  0.0317, -0.1515, -0.0632, -0.0965],
          [-0.0495,  0.1501, -0.1340,  0.1423,  0.1051]]],


        [[[-0.0137, -0.1997, -0.0523,  0.0998, -0.0355],
          [ 0.0449,  0.1636,  0.1795,  0.0786, -0.0324

In [12]:
print(network.conv1.weight.grad)

None


In [13]:
loss.backward()

In [14]:
network.conv1.weight

Parameter containing:
tensor([[[[ 0.1908,  0.0962, -0.0513,  0.1063,  0.0319],
          [-0.0840,  0.1050,  0.1264, -0.1102,  0.1228],
          [-0.1420,  0.1460,  0.1768, -0.1604,  0.0758],
          [-0.1328,  0.0490, -0.0960, -0.1086, -0.0157],
          [ 0.1767, -0.1314, -0.0779, -0.0358,  0.1260]]],


        [[[ 0.1918,  0.1884, -0.1475, -0.1328, -0.1522],
          [-0.0654,  0.1168, -0.0211, -0.0960, -0.0739],
          [ 0.1730,  0.1818,  0.1907,  0.0132,  0.0421],
          [ 0.1160,  0.0551,  0.0607, -0.1401,  0.0008],
          [-0.0079,  0.0913,  0.0653,  0.0225,  0.1211]]],


        [[[ 0.1965,  0.1952, -0.1214, -0.0509, -0.1895],
          [ 0.1557, -0.1298,  0.1487, -0.0408, -0.1227],
          [ 0.1038, -0.0018, -0.1285,  0.1662, -0.0132],
          [-0.1725,  0.0317, -0.1515, -0.0632, -0.0965],
          [-0.0495,  0.1501, -0.1340,  0.1423,  0.1051]]],


        [[[-0.0137, -0.1997, -0.0523,  0.0998, -0.0355],
          [ 0.0449,  0.1636,  0.1795,  0.0786, -0.0324

In [15]:
network.conv1.weight.grad

tensor([[[[ 4.9024e-04,  6.9344e-04,  8.4223e-04,  8.3232e-04,  8.9785e-04],
          [ 3.4066e-04,  5.0599e-04,  7.5409e-04,  9.6821e-04,  8.8736e-04],
          [ 6.9517e-04,  6.9694e-04,  6.5502e-04,  7.7019e-04,  9.1500e-04],
          [ 1.8306e-04,  3.1088e-04,  4.5200e-04,  5.5293e-04,  6.4700e-04],
          [ 4.3742e-04,  3.3034e-04,  6.1228e-04,  9.9026e-04,  9.7451e-04]]],


        [[[ 4.2756e-04,  8.1468e-04,  9.8366e-04,  1.1754e-03,  1.2567e-03],
          [ 2.3213e-04,  7.1739e-04,  1.0464e-03,  1.2814e-03,  9.8787e-04],
          [ 3.4908e-04,  4.7962e-04,  7.7512e-04,  9.6285e-04,  8.5428e-04],
          [ 2.2020e-04,  3.6870e-04,  6.6762e-04,  9.6910e-04,  1.1329e-03],
          [ 3.6153e-04,  3.1611e-04,  4.0052e-04,  7.4115e-04,  1.1938e-03]]],


        [[[ 2.4687e-04,  1.9379e-04,  4.4343e-05, -1.9443e-04, -2.2458e-04],
          [ 4.7968e-04,  2.4939e-04,  3.1384e-04,  1.0714e-04, -9.8859e-05],
          [ 5.9352e-04,  4.3288e-04,  3.8758e-04,  1.0905e-04,  4.53

In [16]:
network.conv1.weight.grad.shape

torch.Size([6, 1, 5, 5])

In [17]:
optimizer = optim.Adam(network.parameters(), lr=0.01)

In [18]:
print(optimizer)

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.01
    weight_decay: 0
)


In [19]:
optimizer.step()

In [20]:
preds = network(images)
loss.item()

loss = F.cross_entropy(preds, labels)


get_num_correct(preds, labels)


19

In [21]:
network.conv1.weight

Parameter containing:
tensor([[[[ 0.1809,  0.0862, -0.0613,  0.0963,  0.0219],
          [-0.0939,  0.0951,  0.1164, -0.1202,  0.1128],
          [-0.1520,  0.1361,  0.1668, -0.1704,  0.0658],
          [-0.1428,  0.0390, -0.1060, -0.1186, -0.0257],
          [ 0.1667, -0.1414, -0.0879, -0.0458,  0.1160]]],


        [[[ 0.1818,  0.1784, -0.1575, -0.1428, -0.1622],
          [-0.0754,  0.1068, -0.0311, -0.1060, -0.0839],
          [ 0.1631,  0.1718,  0.1807,  0.0032,  0.0321],
          [ 0.1061,  0.0451,  0.0507, -0.1501, -0.0092],
          [-0.0179,  0.0813,  0.0553,  0.0125,  0.1111]]],


        [[[ 0.1865,  0.1853, -0.1314, -0.0409, -0.1795],
          [ 0.1457, -0.1398,  0.1387, -0.0507, -0.1128],
          [ 0.0938, -0.0118, -0.1385,  0.1562, -0.0231],
          [-0.1825,  0.0217, -0.1615, -0.0732, -0.1065],
          [-0.0595,  0.1401, -0.1436,  0.1323,  0.0951]]],


        [[[-0.0237, -0.2097, -0.0623,  0.0898, -0.0455],
          [ 0.0349,  0.1536,  0.1695,  0.0686, -0.0424

In [22]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

batch = next(iter(train_loader)) # Get Batch
images, labels = batch

preds = network(images) # Pass Batch
loss = F.cross_entropy(preds, labels) # Calculate Loss

loss.backward() # Calculate Gradients
optimizer.step() # Update Weights

print('loss1:', loss.item())
preds = network(images)
loss = F.cross_entropy(preds, labels)
print('loss2:', loss.item())

loss1: 2.297677516937256
loss2: 2.273430347442627


# CNN Training Loop Explained - Neural Network Code Project

In [23]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

total_loss = 0
total_correct = 0

for batch in train_loader: # Get Batch
    images, labels = batch 

    preds = network(images) # Pass Batch
    loss = F.cross_entropy(preds, labels) # Calculate Loss

    optimizer.zero_grad()
    loss.backward() # Calculate Gradients
    optimizer.step() # Update Weights

    total_loss += loss.item()
    total_correct += get_num_correct(preds, labels)
    
print(
    "epoch:", 0, 
    "total_correct:", total_correct, 
    "loss:", total_loss
)

epoch: 0 total_correct: 47215 loss: 335.4593598395586


In [26]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.1)

for epoch in range(20):
    
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print(
        "epoch", epoch, 
        "total_correct:", total_correct, 
        "loss:", total_loss
    )

epoch 0 total_correct: 6005 loss: 1455.555815935135
epoch 1 total_correct: 5990 loss: 1386.1377806663513
epoch 2 total_correct: 5977 loss: 1386.208979845047
epoch 3 total_correct: 5987 loss: 1386.239009141922
epoch 4 total_correct: 5978 loss: 1386.2530558109283
epoch 5 total_correct: 5978 loss: 1386.2599337100983
epoch 6 total_correct: 5980 loss: 1386.2634036540985
epoch 7 total_correct: 5980 loss: 1386.2652034759521
epoch 8 total_correct: 5980 loss: 1386.2661182880402
epoch 9 total_correct: 5980 loss: 1386.266589641571
epoch 10 total_correct: 5980 loss: 1386.266829252243
epoch 11 total_correct: 5980 loss: 1386.2669303417206
epoch 12 total_correct: 5980 loss: 1386.2670104503632
epoch 13 total_correct: 5980 loss: 1386.2670397758484
epoch 14 total_correct: 5980 loss: 1386.2670521736145
epoch 15 total_correct: 5980 loss: 1386.267074584961
epoch 16 total_correct: 5980 loss: 1386.2670829296112
epoch 17 total_correct: 5980 loss: 1386.2670905590057
epoch 18 total_correct: 5980 loss: 1386.2670