In [1]:
# CNN-->Convolutional neural network

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings as wn
wn.filterwarnings("ignore")
import seaborn as snb2

In [3]:
pip install torch

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install torchvision

Note: you may need to restart the kernel to use updated packages.


In [5]:
import torch as th

In [6]:
import torch.nn as nn
import torch.nn.functional as F

In [7]:
import torchvision as tv

In [8]:
import torchvision.transforms as tf

In [9]:
import torch.optim as op

In [10]:
# Device confuguration
Device=th.device("cuda" if th.cuda.is_available() else "cpu")
Device

device(type='cpu')

In [11]:
# Hyper-parameters
num_epochs=5
batch_size=4
learning_rate=0.01

In [12]:
# This transformation pipeline is commonly used when preparing images for input into a neural network model.
#  It ensures that the input data is in the correct format and range for training or inference, 
# which can improve the performance and stability of the model.


transform=tf.Compose(
    [tf.ToTensor(),
    tf.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

In [13]:
# CIFAR10: 60000 32x32 color images in 10 classes, with 6000 images per class


train_dataset=tv.datasets.CIFAR10(root="/data",train=True,transform=transform,
                                 download=True)

test_dataset=tv.datasets.CIFAR10(root="/data",train=True,transform=transform,
                                download=True)

Files already downloaded and verified
Files already downloaded and verified


In [14]:
train_dataset.data.shape

(50000, 32, 32, 3)

In [15]:
test_dataset.data.shape

(50000, 32, 32, 3)

In [16]:
train_loader = th.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)

test_loader = th.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False)

In [17]:
train_dataset.classes

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [18]:
classes="airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"

In [19]:
def imshow(img):
    img=img / 2+0.5
    npimg=img.numpy()
    plt.imshow(np.transpose(npimg,(1, 2, 0)))
    plt.show()

In [20]:
# get some random training images

dataiter=iter(train_loader)
images,label=next(dataiter)
images.shape

torch.Size([4, 3, 32, 32])

In [21]:
#  simple convolutional neural network (CNN) using PyTorch's nn.Sequential module. Let's break down the layers:

model = nn.Sequential(     
    nn.Conv2d(3, 6, 5),#This is a convolutional layer that takes an input with 3 channels (presumably RGB) and applies 6 filters (or kernels) of size 5x5 to produce 6 output channels.
    
    nn.ReLU(),#This is the activation function ReLU (Rectified Linear Unit), which introduces non-linearity to the network by applying the element-wise rectified linear function.
    
    nn.MaxPool2d(2, 2),#This is a max pooling layer that performs max pooling operation with a kernel size of 2x2 and a stride of 2. It downsamples the input spatially, reducing its dimensions by half.
    
    nn.Conv2d(6, 16, 5),#Another convolutional layer that takes 6 input channels (from the previous layer) and applies 16 filters of size 5x5.
    
    nn.ReLU(), #Another ReLU activation function.
    
    nn.MaxPool2d(2, 2),#Another max pooling layer with the same configuration as the previous one.
    
    nn.Flatten(),#This layer flattens the multi-dimensional tensor into a one-dimensional tensor, preparing the data for the fully connected layers.
    
    nn.Linear(16 * 5 * 5, 120),#This is a fully connected (dense) layer that takes the flattened input and produces an output of size 120.
    nn.ReLU(),#Another ReLU activation function.
    nn.Linear(120, 84),#Another fully connected layer with an output size of 84.
    nn.ReLU(),#Another ReLU activation function.
    nn.Linear(84, 10) #The final fully connected layer with an output size of 10, which is typically used for classification tasks where the output corresponds to class scores.
).to(Device) #The device variable should be previously defined as either 'cuda' for GPU or 'cpu' for CPU. This allows the model to utilize hardware acceleration if available.

In [22]:
model

Sequential(
  (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): ReLU()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): ReLU()
  (11): Linear(in_features=84, out_features=10, bias=True)
)

In [23]:
criterion=nn.CrossEntropyLoss()
optimizer=op.SGD(model.parameters(),lr=learning_rate)

In [24]:
n_total_steps=len(train_loader)
for epoch in range(num_epochs):
    for i ,(images,label) in enumerate(train_loader):

        image=images.to(Device)
        labels=label.to(Device)
        
        
        #forward pass
        output=model(image)
        loss=criterion(output,label)
        
        
        #backward and optimizer
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 2000 ==0:
            print(f"Epoch [{epoch+1}/{num_epochs}],step [{i+1}/{n_total_steps}],Loss : {loss.item() : 4f}")

            
            
print('Finished Training')
PATH = './cnn.pth'
th.save(model.state_dict(), PATH) # A common PyTorch convention is to save models using either a .pt or .pth file extension.

Epoch [1/5],step [2000/12500],Loss :  2.130988
Epoch [1/5],step [4000/12500],Loss :  1.168796
Epoch [1/5],step [6000/12500],Loss :  1.812507
Epoch [1/5],step [8000/12500],Loss :  1.244899
Epoch [1/5],step [10000/12500],Loss :  1.112775
Epoch [1/5],step [12000/12500],Loss :  1.411159
Epoch [2/5],step [2000/12500],Loss :  1.472214
Epoch [2/5],step [4000/12500],Loss :  0.661800
Epoch [2/5],step [6000/12500],Loss :  0.968943
Epoch [2/5],step [8000/12500],Loss :  0.670709
Epoch [2/5],step [10000/12500],Loss :  1.347756
Epoch [2/5],step [12000/12500],Loss :  1.450011
Epoch [3/5],step [2000/12500],Loss :  0.977402
Epoch [3/5],step [4000/12500],Loss :  1.512808
Epoch [3/5],step [6000/12500],Loss :  1.248181
Epoch [3/5],step [8000/12500],Loss :  1.072377
Epoch [3/5],step [10000/12500],Loss :  0.890947
Epoch [3/5],step [12000/12500],Loss :  2.370414
Epoch [4/5],step [2000/12500],Loss :  0.915767
Epoch [4/5],step [4000/12500],Loss :  1.186538
Epoch [4/5],step [6000/12500],Loss :  1.404722
Epoch [

In [None]:
# It appears you're using a conditional statement to print out the training progress during each epoch. Let's break down what this code does:

# (i+1) % 2000 == 0: This checks if the current iteration index i+1 is a multiple of 2000. This condition is typically used to print the training progress every 2000 steps.

# Here's a breakdown of the placeholders in the f-string:

# {epoch+1}/{num_epochs}: Prints the current epoch number and the total number of epochs.
# {i+1}/{n_total_steps}: Prints the current step number and the total number of steps.
# {loss.item():.4f}: Prints the loss value with four decimal places.
    
# So, when (i+1) % 2000 == 0, the code will print out the training progress, including the epoch, step, and loss value.


In [25]:
with th.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(10)]
    n_class_samples = [0 for i in range(10)]
    for images, labels in test_loader:
        images = images.to(Device)
        labels = labels.to(Device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = th.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()
        
        for i in range(batch_size):
            label = labels[i]
            pred = predicted[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc} %')

    for i in range(10):
        acc = 100.0 * n_class_correct[i] / n_class_samples[i]
        print(f'Accuracy of {classes[i]}: {acc} %')

Accuracy of the network: 66.696 %
Accuracy of airplane: 75.22 %
Accuracy of automobile: 80.38 %
Accuracy of bird: 50.68 %
Accuracy of cat: 49.22 %
Accuracy of deer: 63.0 %
Accuracy of dog: 54.2 %
Accuracy of frog: 68.6 %
Accuracy of horse: 76.66 %
Accuracy of ship: 75.0 %
Accuracy of truck: 74.0 %
