<a href="https://www.kaggle.com/code/swish9/image-classification-with-cnn-pytorch?scriptVersionId=142902253" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

<h3>I'm learning to build a PyTorch-based Convolutional Neural Network (CNN) for image classification. Using the CIFAR-10 dataset, I'll create a model that can accurately classify images into 10 different categories. This project is all about hands-on experience in deep learning with PyTorch and image classification.</h3>

In [None]:
import matplotlib.pyplot as plt

import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision 
from torchvision.transforms import transforms , ToTensor
from torchvision.datasets import ImageFolder as IF
from torch.utils.data import DataLoader
from torch.utils.data import random_split

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# https://pytorch.org/vision/stable/transforms.html
IMAGE_SIZE = 32

mean, std = [0.4914, 0.4822, 0.4465], [0.247, 0.243, 0.261]

# Define the training data transformations
composed_train = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(0.1),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.RandomApply([transforms.ColorJitter(brightness=0.1)], p=0.1),
    transforms.RandomApply([transforms.ColorJitter(contrast=0.1)], p=0.1),
    transforms.RandomApply([transforms.ColorJitter(saturation=0.1)], p=0.1),
    transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
    transforms.RandomErasing(p=0.75, scale=(0.02, 0.1), value=1.0, inplace=False)
])

# Define the test data transformations
composed_test = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

**These lines define a sequence of image transformations to be applied to training data:**
* transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)): Resizes the image to a 32x32 pixel resolution.
* transforms.RandomRotation(20): Randomly rotates the image by up to 20 degrees.
* transforms.RandomHorizontalFlip(0.1): Randomly flips the image horizontally with a probability of 0.1.
* transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1): Randomly adjusts brightness, contrast, and saturation.
* transforms.RandomApply(...): Randomly applies additional color jitter transformations with specified probabilities.
* transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.1): Randomly adjusts sharpness.
* transforms.ToTensor(): Converts the image to a PyTorch tensor.
* transforms.Normalize(mean, std): Normalizes the image data using the provided mean and standard deviation.
* transforms.RandomErasing(p=0.75, scale=(0.02, 0.1), value=1.0, inplace=False): Randomly erases parts of the image with a certain probability.

In [None]:
data_dir = "/kaggle/input/cifar10/cifar10"
train_dir = data_dir + "/train"
test_dir = data_dir + "/test"

In [None]:
file_path = "/kaggle/input/cifar10/cifar10/labels.txt"
labels = []
with open(file_path, 'r') as file:
    for label in file:
        label = label.strip()
        labels.append(label)
labels

# dataset.classes Could have saved my time lol 

In [None]:
dataset = IF(data_dir+ '/train', transform=ToTensor())
testing = IF(data_dir+ '/test', transform=ToTensor())

In [None]:
data_loader = DataLoader(dataset, batch_size=20, shuffle=True)

In [None]:
img,label = dataset[0] # Aeroplane pics 
print(img.shape,label)
img

In [None]:
# Define a function to display images
def show_images(images, labels):
    plt.figure(figsize=(20,16))
    for i in range(len(images)):
        ax = plt.subplot(1, len(images), i + 1)
        ax.set_title(f"Label: {labels[i]}")
        plt.imshow(images[i].permute(1, 2, 0))  # Convert from (C, H, W) to (H, W, C) format
        plt.axis("off")
    plt.show()

# Load a batch of data
for images, labels in data_loader:
    show_images(images, labels)
    break  # Display only the first batch of images

In [None]:
class MyResNet(nn.Module):
    def __init__(self, num_classes=10):
        super(MyResNet, self).__init__()

        # The convolutional layers
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        
        # The fully connected layers
        self.fc1 = nn.Linear(128 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, num_classes)
        
        # dropout layer
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Forward pass through convolutional layers
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        
        # Max-pooling layers
        x = F.max_pool2d(x, 2, 2)
        
        # Flatten the feature maps
        x = x.view(-1, 128 * 8 * 8)
        
        # Forward pass through fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [None]:
# Instantiate the model
model = MyResNet(num_classes=10)

In [None]:
model

In [None]:
train_losses = []
validation_losses = []
train_accuracies = []
validation_accuracies = []
best_validation_loss = float('inf')
best_model_weights = None

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)

In [None]:
patience = 0
early_stopping_patience = 5
num_epochs = 50