# CIFAR10 CNN
## Importing libraries 

In [1]:
import torch
import numpy as np
import pandas as pd
import matplotlib as plt
import torchvision
import torchsummary
import tqdm
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import random_split
from torch.utils.data.sampler import SubsetRandomSampler

## GPU: GTX 1660 Ti

In [2]:
# Check if we are using GPU as device, and later we can train using my GTX 1660 Ti
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
devNumber = torch.cuda.current_device()
print(f"Current Device: {devNumber}")
devName = torch.cuda.get_device_name(devNumber)
print(f"GPU name is {devName}")

Device: cuda
Current Device: 0
GPU name is NVIDIA GeForce GTX 1660 Ti


# Collecting the CIFAR10 Dataset from Torchvision
Hyperparameters to tune:
1. Batch size:
    Change the number of batches, 8, 16, 32, 64, 128


In [3]:
# Convert original CIFAR10 images to Pytorch tensors and normalize the pixels
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# HYPERPARAMETER: Batch size
batch_size = 4

# Create the training set and data loader to handle batches
train_set = torchvision.datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform
)

train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=batch_size, shuffle=True, num_workers=2
)

# Create the test set
test_set = torchvision.datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform
)

test_loader = torch.utils.data.DataLoader(
    test_set, batch_size=batch_size, shuffle=True, num_workers=2
)

# Class labels for CIFAR10
classes = (
    'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'
)

Files already downloaded and verified
Files already downloaded and verified


### Shape and Show the CIFAR10 Images

In [4]:
print("Training Set:")
print(f"\t Samples: {train_set.data.shape}")
print(f"\t Labels: {len(train_set.targets)}")
print('Test Set:')
print(f"\t Samples: {test_set.data.shape}")
print(f"\t Labels: {len(test_set.targets)}")

def show_image(img):
    # Unnormalize the image
    img = img / 2 + 0.5
    # Convert the tensor to a numpy array
    npimg = img.numpy()
    # Change dimensions for display for matplotlib height width channel
    plt.imshow(np.transpose(npimg, (1, 2, 0)))  
    plt.show()


# Get a batch of images
dataiter = iter(train_loader)
images, labels = next(dataiter)

# Lets see the images
show_image(torchvision.utils.make_grid(images))

# Print the label for the image
print(' '.join(f'{train_set.classes[label]}' for label in labels))

Training Set:
	 Samples: (50000, 32, 32, 3)
	 Labels: 50000
Test Set:
	 Samples: (10000, 32, 32, 3)
	 Labels: 10000


AttributeError: module 'matplotlib' has no attribute 'imshow'

# Convolutional Neural Network
Hyperparameters to tune:
1. Number of Filters:
    Change the number of output channels in nn.Conv2d (e.g., from 6 to 12 or 16).
2. Kernel Size:
    Adjust the size of the convolution kernel (e.g., from 5x5 to 3x3 or 7x7).
3. Stride:
    Change the step size for sliding the kernel over the image (e.g., stride=1 or stride=2).
4. Padding:
    Add padding to preserve the spatial dimensions (e.g., padding=1).
5. Pool Size:
    Change the size of the pooling window (e.g., from 2x2 to 3x3).
6. Pooling Type:
    Try different pooling layers like nn.AvgPool2d instead of nn.MaxPool2d.
7. Stride in Pooling:
    Adjust the stride (e.g., stride=1 or stride=2).
8. Number of Neurons:
    Increase or decrease the number of neurons in fc1 and fc2 (e.g., 120 to 256 or 84 to 128).
9. Number of Layers:
    Add or remove fully connected layers to increase or decrease model capacity.
10. Different Activation Functions:
    Use relu, sigmoid, tanh, elu
11. Dropout and Rate:
    Add dropout and rates to layers to reduce overfitting

### Formula for Convolution Layer Shape (Assume Square)
$$K: \text{Kernel Size (square)}$$
$$S: \text{Stride Size (move by)}$$
$$P: \text{Padding Size (edges)}$$
$$Shape=floor(\frac{H-K+2P}{S})+1$$
### Formula for Pooling Layer Shape (Assume Square)
$$Shape=floor(\frac{H-K}{S})+1$$

In [5]:
def conv_output_shape(input_size, kernel_size, stride=1, padding=0):
    return (input_size - kernel_size + 2 * padding) // stride + 1

def pool_output_shape(input_size, kernel_size, stride=None):
    if stride is None:
        stride = kernel_size
    return (input_size - kernel_size) // stride + 1

example = conv_output_shape(input_size=32, kernel_size=5, stride=1, padding=0)
print(f"Convolution Output Size: {example}")
print(f"Pooling Output Size: {pool_output_shape(example, kernel_size=2)}")


Convolution Output Size: 28
Pooling Output Size: 14


In [89]:
# I will use the sequential method from NN in order to keep everything more organized
class ConvNN(nn.Module):
    def __init__(self):
        super().__init__()
        # Convolutional layers detect patterns like edges, textures, and shapes in the image
        self.convolution_1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        # Pooling reduces the size of the feature maps, decreasing computational cost and helping the network focus on the most important features
        self.convolution_2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.convolution_3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.convolution_4 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.convolution_5 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=4)
        )
        
        # Fully Connected Layers
        # self.fc_1 = nn.Sequential(
        #     nn.Flatten(),
        #     nn.Linear(512 * 4 * 4, 120),
        #     nn.ReLU()
        # )
        # self.fc_2 = nn.Sequential(
        #     nn.Linear(120, 84),
        #     nn.ReLU()
        # )
        self.fc_classify = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512, 10)
        )
        
    def forward(self, x):
        x = self.convolution_1(x)
        x = self.convolution_2(x)
        x = self.convolution_3(x)
        x = self.convolution_4(x)
        x = self.convolution_5(x)
        # x = self.fc_1(x)
        # x = self.fc_2(x)
        x = self.fc_classify(x)
        return x

### Loss Function and Optimizer
Hyperparameters to tune:
1. Optimizer:
    Try Adam, Adagrad
2. Adjust Learning Rate:
    Try .005, .0001, .0005, .01, .05

In [96]:
# Make a CNN object using the GPU
cnn = ConvNN().to(device)
# Get loss function and optimizer for it
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters(), lr=0.001, weight_decay=1e-4)
# Lets see some params
torchsummary.summary(cnn, (3, 32, 32), batch_size=32)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [32, 64, 32, 32]           1,792
       BatchNorm2d-2           [32, 64, 32, 32]             128
              ReLU-3           [32, 64, 32, 32]               0
            Conv2d-4          [32, 128, 32, 32]          73,856
       BatchNorm2d-5          [32, 128, 32, 32]             256
              ReLU-6          [32, 128, 32, 32]               0
         MaxPool2d-7          [32, 128, 16, 16]               0
            Conv2d-8          [32, 256, 16, 16]         295,168
       BatchNorm2d-9          [32, 256, 16, 16]             512
             ReLU-10          [32, 256, 16, 16]               0
        MaxPool2d-11            [32, 256, 8, 8]               0
           Conv2d-12            [32, 512, 8, 8]       1,180,160
      BatchNorm2d-13            [32, 512, 8, 8]           1,024
             ReLU-14            [32, 51

# Training the Convolutional Neural Network
Hyperparameters to tune:
1. Epochs
    Try 100

In [None]:
epochs = 5

for epoch in range(epochs):
    train_loss = 0.0
    for i, data in enumerate(tqdm.tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}")):
        # data is a list [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        # Zero out the parameter gradients
        optimizer.zero_grad()
        # Forward propogate
        outputs = cnn(inputs)
        # Get loss
        loss = criterion(outputs, labels)
        # Backward propogate
        loss.backward()
        optimizer.step()
        # Record loss
        train_loss += loss.item() 
         
        # Get Stats after every 2000 mini-batches, or 2000 * batch_size inputs
        if i % 2000 == 1999:    
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {train_loss / 2000:.3f}')
            train_loss = 0.0


print("Training complete!")

Epoch 1/5:  16%|█▌        | 2015/12500 [00:31<01:54, 91.18it/s]

[1,  2000] loss: 2.108


Epoch 1/5:  24%|██▍       | 3002/12500 [00:42<01:43, 91.90it/s]

In [92]:
# Saving the model
PATH = "./cifar_cnn.pth"
torch.save(cnn.state_dict(), PATH)

# Testing the Model

In [93]:
cnn_1 = ConvNN().to(device)
cnn_1.load_state_dict(torch.load(PATH, weights_only=True))

<All keys matched successfully>

### Get accuracy using test loader

In [94]:
def test_accuracy(net, testloader):
    num_correct = 0
    # Not training, only testing, so no gradient
    with torch.no_grad():
        # We are in evaluation mode using our CNN
        net.eval()
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            # classify our test images using our CNN
            outputs = net(images).to(device)
            # the class with the highest value is what we choose as prediction
            predicted = torch.max(outputs.data, 1)[1]
            # Check if labels match the predicted and sum up all correct and make it an int
            num_correct += (predicted == labels).sum().item()
    # Return the percentage correctly classified
    percent_correct = num_correct / len(testloader.dataset)
    percent_correct = percent_correct * 100
    print(f"Accuracy for entire test set: {percent_correct:.2f} %")
    
    

    
def test_accuracy_per_class(net, testloader):
    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}

    # again no gradients needed
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images).to(device)
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1

    # print accuracy for each class
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print(f"Accuracy for class: {classname:5s} is {accuracy:.2f} %")

### Lets see the accuracy on test loader

In [95]:
test_accuracy(cnn_1, test_loader)
test_accuracy_per_class(cnn_1, test_loader)

Accuracy for entire test set: 79.49 %
Accuracy for class: plane is 78.00 %
Accuracy for class: car   is 93.50 %
Accuracy for class: bird  is 75.60 %
Accuracy for class: cat   is 69.70 %
Accuracy for class: deer  is 73.10 %
Accuracy for class: dog   is 68.30 %
Accuracy for class: frog  is 88.90 %
Accuracy for class: horse is 73.40 %
Accuracy for class: ship  is 87.30 %
Accuracy for class: truck is 87.10 %
