<a href="https://colab.research.google.com/github/JoshBoii/Convolutional-Neural-Network-/blob/main/CIFAR_10_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Import necessary libraries and modules
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from itertools import product
import numpy as np

In [62]:
# Define hyper-parameters
num_classes = 10
batch_size = 128
num_epochs = 10

In [63]:
# Define hyperparameters to tune
num_blocks_values = [2, 3, 4]
num_convs_values = [1, 2, 3]
learning_rate_values = [0.0001, 0.001, 0.01]

# Create a list of all hyperparameter combinations
hyperparams = list(product(num_blocks_values, num_convs_values, learning_rate_values))

In [64]:
#Read the dataset and create dataloaders
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [65]:
# Load CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

Files already downloaded and verified
Files already downloaded and verified


In [66]:
# Create dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [67]:
# Define the model architecture - added complexity 
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, num_convs, stride=1):
        super(BasicBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        layers = []
        for i in range(num_convs - 1):
            layers.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False))
            layers.append(nn.BatchNorm2d(out_channels))
            layers.append(nn.ReLU(inplace=True))
        self.layers = nn.Sequential(*layers)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        else:
            self.shortcut = nn.Identity()

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.layers(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.shortcut(x)
        out = self.relu(out)
        return out

class CNN(nn.Module):
    def __init__(self, num_blocks, num_convs, num_classes):
        super(CNN, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.layers = nn.ModuleList([BasicBlock(self.in_channels, 64, num_convs), 
                                     BasicBlock(64, 64, num_convs),
                                     BasicBlock(64, 64, num_convs)])

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        for layer in self.layers:
            out = layer(out)

        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)

        return out


The line `device = torch.device("cuda" if torch.cuda.is_available() else "cpu")` checks if a GPU with CUDA support is available on your system. If it is available, it will use the GPU ("cuda") as the device. If not, it will fall back to using the CPU ("cpu"). This allows the code to run on systems with or without GPU support.

In [68]:
# Define a function to train and evaluate the model with a given set of hyperparameters
def train_and_evaluate_model(num_blocks, num_convs, learning_rate):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Define the model architecture
    model = CNN(num_blocks, num_convs, num_classes).to(device)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Add a learning rate scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    # Initialize lists for recording statistics
    train_losses = []
    train_accs = []
    test_accs = []

    # Training loop
    for epoch in range(num_epochs):
        train_loss = 0
        train_total = 0
        train_correct = 0

        # Step the scheduler
        scheduler.step()

        for batch_idx, (data, targets) in enumerate(train_loader):
            data = data.to(device)
            targets = targets.to(device)

            # Forward
            scores = model(data)
            loss = criterion(scores, targets)

            # Backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Calculate accuracy
            _, predicted = scores.max(1)
            train_total += targets.size(0)
            train_correct += predicted.eq(targets).sum().item()

            # Update loss
            train_loss += loss.item()

            if (batch_idx+1) % 100 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                      .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), loss.item()))

        # Record statistics
        train_losses.append(train_loss/len(train_loader))
        train_acc = 100 * train_correct / train_total
        train_accs.append(train_acc)

        # Evaluate model on test set
        with torch.no_grad():
            test_correct = 0
            test_total = 0
            for data, targets in test_loader:
                data = data.to(device)
                targets = targets.to(device)

                scores = model(data)
                _, predicted = scores.max(1)
                test_total += targets.size(0)
                test_correct += predicted.eq(targets).sum().item()

            test_acc = 100 * test_correct / test_total
            test_accs.append(test_acc)
            print('Epoch [{}/{}], Test Accuracy: {:.4f}%'
                  .format(epoch+1, num_epochs, test_acc))

    # Return the final test accuracy
    return test_accs[-1]


In [70]:
print("Device being used:", torch.cuda.get_device_name(device))


RuntimeError: ignored

In [69]:
# Perform grid search
best_accuracy = 0
best_hyperparams = None

for hyperparam_set in hyperparams:
    num_blocks, num_convs, learning_rate = hyperparam_set
    accuracy = train_and_evaluate_model(num_blocks, num_convs, learning_rate)
    print('Hyperparameters:', hyperparam_set)
    print('Accuracy:', accuracy)
    print()

    # Update best hyperparameters
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_hyperparams = hyperparam_set

print('Best hyperparameters:', best_hyperparams)
print('Best accuracy:', best_accuracy)

Epoch [1/10], Step [100/391], Loss: 1.9037
Epoch [1/10], Step [200/391], Loss: 1.7366
Epoch [1/10], Step [300/391], Loss: 1.7249
Epoch [1/10], Test Accuracy: 42.9100%


KeyboardInterrupt: ignored

In [None]:
# Train the final model using the best hyperparameters
best_num_blocks, best_num_convs, best_learning_rate = best_hyperparams
final_accuracy = train_and_evaluate_model(best_num_blocks, best_num_convs, best_learning_rate)
print('Final accuracy with best hyperparameters:', final_accuracy)

# Save the model (optional)
torch.save(model.state_dict(), 'best_model.pth')


In [None]:
# Load the saved model (optional)
loaded_model = CNN(best_num_blocks, best_num_convs, num_classes).to(device)
loaded_model.load_state_dict(torch.load('best_model.pth'))
loaded_model.eval()  # Set the model to evaluation mode

# Use the loaded model for inference (optional)
with torch.no_grad():
    # Example: Get predictions for a single image from the test set
    sample_image, sample_label = test_dataset[0]
    sample_image = sample_image.unsqueeze(0).to(device)  # Add a batch dimension and move to device
    scores = loaded_model(sample_image)
    _, predicted_label = scores.max(1)
    print('Predicted label:', predicted_label.item())
    print('True label:', sample_label)


In [None]:
# Print final test accuracy
print('Final Test Accuracy: {:.4f}%'.format(test_accs[-1]))

# Save accuracies to file
np.savetxt('train_accs.txt', train_accs)
np.savetxt('test_accs.txt', test_accs)


**END**