# IMAGE CLASSIFICATION CNN IN PYTORCH

In [4]:
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models, transforms


In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [7]:
train_data = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform
)
test_data = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True, num_workers=2)

100.0%


In [8]:
image, label = train_data[0]

In [9]:
image.size()

torch.Size([3, 32, 32])

In [10]:
class_names = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [11]:
class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(3, 12, 5)# 3 input channels, 12 output channels, 5x5 kernel and returns 28x28 (28 - 5 + 1) -> (12, 28, 28)
        self.pool = nn.MaxPool2d(2, 2) # 2x2 pooling to reduce size by half (28 / 2 = 14) -> (12, 14, 14)
        self.conv2 = nn.Conv2d(12, 24, 5) # 12 input channels, 24 output channels, 5x5 kernel and returns 10x10 (14 - 5 + 1) -> (24, 10, 10)
        self.fc1 = nn.Linear(24 * 5 * 5, 120)# 24 input channels (10x10), 120 output features
        # 24 * 5 * 5 = 600, because after the second conv layer and pooling, the size is (24, 5, 5)
        self.fc2 = nn.Linear(120, 84)# 120 input features, 84 output features
        # 84 is a common size for the hidden layer in neural networks
        # It is not a fixed rule, but it is often used as a good size for hidden layers
        # It is a balance between having enough capacity to learn complex patterns and not being too large to overfit
        self.fc3 = nn.Linear(84, 10)# 84 input features, 10 output features (one for each class)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))# 3 input channels, 12 output channels, 5x5 kernel and returns 28x28 (28 - 5 + 1) -> (12, 28, 28)
        # Apply ReLU activation function to introduce non-linearity
        x = self.pool(F.relu(self.conv2(x)))# 12 input channels, 24 output channels, 5x5 kernel and returns 10x10 (14 - 5 + 1) -> (24, 10, 10)
        # Apply ReLU activation function to introduce non-linearity
        x = torch.flatten(x, 1) # Flatten the tensor to a 2D tensor (batch_size, 24 * 5 * 5)
        x = F.relu(self.fc1(x)) # Apply ReLU activation function to the first fully connected layer
        x = F.relu(self.fc2(x)) # Apply ReLU activation function to the second fully connected layer
        x = self.fc3(x) # Output layer, no activation function applied here
        return x

In [12]:
net = NeuralNet()
loss_function = nn.CrossEntropyLoss() # Cross entropy loss for multi-class classification
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # Stochastic Gradient Descent optimizer with learning rate 0.001 and momentum 0.9

In [13]:
for epoch in range(30):  # Loop over the dataset multiple times
    print(f'training epoch {epoch}...')
    
    running_loss = 0.0 # Initialize running loss for the epoch
    
    for i, data in enumerate(train_loader):
        inputs, labels = data # Get the inputs and labels from the data loader
        optimizer.zero_grad() # Zero the parameter gradients to prevent accumulation from previous iterations
        
        outputs = net(inputs)  # Forward pass to get the model outputs
        loss = loss_function(outputs, labels) # Calculate the loss using the outputs and labels
        loss.backward()  # Backward pass to compute gradients
        optimizer.step()  # Update weights using the optimizer
        
        running_loss += loss.item()  # Accumulate the loss for this batch
        
    print(f'loss: {running_loss / len(train_loader):.4f}')  # Print the average loss for the epoch

training epoch 0...
loss: 2.1473
training epoch 1...
loss: 1.6974
training epoch 2...
loss: 1.5022
training epoch 3...
loss: 1.3897
training epoch 4...
loss: 1.3046
training epoch 5...
loss: 1.2302
training epoch 6...
loss: 1.1575
training epoch 7...
loss: 1.0992
training epoch 8...
loss: 1.0476
training epoch 9...
loss: 1.0023
training epoch 10...
loss: 0.9623
training epoch 11...
loss: 0.9244
training epoch 12...
loss: 0.8915
training epoch 13...
loss: 0.8566
training epoch 14...
loss: 0.8262
training epoch 15...
loss: 0.7997
training epoch 16...
loss: 0.7708
training epoch 17...
loss: 0.7451
training epoch 18...
loss: 0.7162
training epoch 19...
loss: 0.6935
training epoch 20...
loss: 0.6695
training epoch 21...
loss: 0.6447
training epoch 22...
loss: 0.6258
training epoch 23...
loss: 0.6033
training epoch 24...
loss: 0.5791
training epoch 25...
loss: 0.5573
training epoch 26...
loss: 0.5398
training epoch 27...
loss: 0.5137
training epoch 28...
loss: 0.4955
training epoch 29...
los

In [14]:
torch.save(net.state_dict(), 'cifar10_neural_net.pth')  # Save the trained model parameters

In [15]:
net = NeuralNet()
net.load_state_dict(torch.load('cifar10_neural_net.pth'))  # Load the saved model parameters

<All keys matched successfully>

In [16]:
correct = 0
total = 0

net.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Disable gradient calculation for inference and memory efficiency
    for data in test_loader:
        images, labels = data
        outputs = net(images)  # Forward pass to get the model outputs
        _, predicted = torch.max(outputs.data, 1)  # Get the predicted class with the highest score
        total += labels.size(0)  # Increment total count of images
        correct += (predicted == labels).sum().item()  # Count correct predictions
        
accuracy = 100 * correct / total  # Calculate accuracy as a percentage
print(f'Accuracy of the network on the 10000 test images: {accuracy:.2f}%')  # Print the accuracy

Accuracy of the network on the 10000 test images: 68.90%


In [19]:
new_transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Resize the image to 32x32
    transforms.ToTensor(),  # Convert the image to a tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the image
])

def load_image(image_path):
    image = Image.open(image_path)  # Open the image file
    image = new_transform(image)  # Apply the transformations
    image = image.unsqueeze(0)  # Add a batch dimension (1, 3, 32, 32)
    return image


image_paths = [
    'IMG1.jpg',
    'IMG2.jpeg', 
    'IMG3.jpg',
]
images = [load_image(img) for img in image_paths]


net.eval()  # Set the model to evaluation mode
with torch.no_grad():  # Disable gradient calculation for inference
    for img in images:
        outputs = net(img)  # Forward pass to get the model outputs
        _, predicted = torch.max(outputs.data, 1)  # Get the predicted class with the highest score
        print(f'Predicted class: {class_names[predicted.item()]}')  # Print the predicted class name 

Predicted class: dog
Predicted class: ship
Predicted class: frog
