# PyTorch Tutorial 14 - Convolutional Neural Network (CNN)

In [2]:
import torch
import torch.nn as nn 
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

In [None]:
## Pooling is used to reduce the computation cost and to prevent overfitting

In [5]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")
device = torch.device("cpu")
# Hyper params
num_epochs = 4
batch_size = 4
learning_rate = 0.001

# dataset has PILImage images of range [0, 1]
# We transform them to Tensors of normalized range [-1, 1]

transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 
    ])


train_dataset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root="./data", train=False, download=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

classes = ("plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(
            3, # input_channel is 3 as its RGB images
            6, # output channel size is 6, an arbitrary number
            5 # kernel size 5x5
        )
        self.pool = nn.MaxPool2d(2, 2)  # 2x2 an arbitrary square
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120) # output size is 120, an arbitrary number  # input size is 16*5*5 = 
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x))) # -> n, 6, 14, 14
        x = self.pool(F.relu(self.conv2(x))) # -> n, 16, 5, 5
        x = x.view(-1, 16*5*5)               # -> n, 400
        x = F.relu(self.fc1(x))              # -> n, 120
        x = F.relu(self.fc2(x))              # -> n, 84
        x = self.fc3(x)                      # -> n, 10
        # no softmax activation function at the end. It is because this is already included in our Loss function
        return x
    

model = ConvNet().to(device)

# loss & optimizer
criterion = nn.CrossEntropyLoss() # this one applies Softmax for us
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


# training loop

n_total_steps = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # origin shape: [4, 3, 32, 32] = [4, 3, 1024]
        # input layer: 3 input channels, 6 output channels, 5 kernel size
        images = images.to(device)
        labels = labels.to(device)

        # forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 2000 == 0:
            print(f"epoch: {epoch+1} / {num_epochs}, step: {i+1} / {n_total_steps}, loss: {loss.item():.4f}")

print("Finished Training")



# testing / evaluation
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(10)]
    n_class_samples = [0 for i in range(10)]
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)  # this is the actual labels
        outputs = model(images)

        # it actually returns values, index
        _, predicted = torch.max(outputs, 1)  # 1 is to denote the dimension rowwise or columnwise
        n_samples += labels.size(0)  # this gives us the number of samples in the current batch
        n_correct += (predicted == labels).sum().item()

        for i in range(batch_size):
            label = labels[i]
            pred = predicted[i]
            if (label==pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1

    acc = 100.0 * n_correct / n_samples

    print(f"accuracy: {acc}%")

    for i in range(10):
        acc = 100.0 * n_class_correct[i] / n_class_samples[i]
        print(f"Accuracy of {classes[i]} : {acc} %")


Files already downloaded and verified
epoch: 1 / 4, step: 2000 / 12500, loss: 2.3278
epoch: 1 / 4, step: 4000 / 12500, loss: 2.2952
epoch: 1 / 4, step: 6000 / 12500, loss: 2.2783
epoch: 1 / 4, step: 8000 / 12500, loss: 2.2313
epoch: 1 / 4, step: 10000 / 12500, loss: 2.1385
epoch: 1 / 4, step: 12000 / 12500, loss: 2.1999
epoch: 2 / 4, step: 2000 / 12500, loss: 2.4402
epoch: 2 / 4, step: 4000 / 12500, loss: 3.0848
epoch: 2 / 4, step: 6000 / 12500, loss: 2.3668
epoch: 2 / 4, step: 8000 / 12500, loss: 1.7316
epoch: 2 / 4, step: 10000 / 12500, loss: 1.7321
epoch: 2 / 4, step: 12000 / 12500, loss: 1.4863
epoch: 3 / 4, step: 2000 / 12500, loss: 2.3053
epoch: 3 / 4, step: 4000 / 12500, loss: 1.7960
epoch: 3 / 4, step: 6000 / 12500, loss: 1.9008
epoch: 3 / 4, step: 8000 / 12500, loss: 1.0502
epoch: 3 / 4, step: 10000 / 12500, loss: 1.0638
epoch: 3 / 4, step: 12000 / 12500, loss: 1.9130
epoch: 4 / 4, step: 2000 / 12500, loss: 1.6192
epoch: 4 / 4, step: 4000 / 12500, loss: 1.3394
epoch: 4 / 4, st