<a href="https://colab.research.google.com/github/JebinAbraham/DeepLearning-CIFAR10-ImageClassification/blob/main/Deep_Learning_Image_Classification_CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 👈 resize to 224x224
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load CIFAR-10
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

classes = trainset.classes
print("Classes:", classes)

Using device: cuda


100%|██████████| 170M/170M [00:13<00:00, 12.7MB/s]


Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [3]:
# Flattened image size
input_size = 32 * 32 * 3
hidden_size = 512
num_classes = 10
sequence_length = 32

In [None]:
# Feedforward Neural Network
class FFN(nn.Module):
    def __init__(self):
        super(FFN, self).__init__()
        self.fc1 = nn.Linear(224 * 224 * 3, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
# Recurrent Neural Network
class RNNModel(nn.Module):
    def __init__(self):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(224 * 3, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # x shape: [B, 3, 224, 224]
        x = x.permute(0, 2, 3, 1)  # [B, 224, 224, 3]
        x = x.reshape(x.size(0), 224, -1)  # [B, 224, 3*224] = [B, seq_len, features]
        out, _ = self.rnn(x)  # out: [B, seq_len, hidden]
        out = out[:, -1, :]   # last time step
        return self.fc(out)   # [B, num_classes]

# LSTM
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size=224 * 3, hidden_size=512, batch_first=True)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        # Input: [B, 3, 224, 224]
        x = x.permute(0, 2, 3, 1)         # -> [B, 224, 224, 3]
        x = x.reshape(x.size(0), 224, -1) # -> [B, 224, 672]
        out, (hn, cn) = self.lstm(x)      # -> [B, 224, 512]
        out = out[:, -1, :]               # Use last time step -> [B, 512]
        out = self.fc(out)                # -> [B, 10]
        return out




In [4]:
def train(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)  # output should be [batch_size, 10]
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)
    return total_loss / len(loader), 100. * correct / total

def evaluate(model, loader, criterion):
    model.eval()
    loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss += criterion(outputs, labels).item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    return loss / len(loader), 100. * correct / total

In [None]:
import torch.nn.functional as F
results = {}
models = {

    'RNN': RNNModel().to(device),
    'LSTM': LSTMModel().to(device),
    'FFN': FFN().to(device),
}

for name, model in models.items():
    print(f"\nTraining {name}...")
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    train_loss, train_acc = train(model, trainloader, optimizer, criterion)
    test_loss, test_acc = evaluate(model, testloader, criterion)

    results[name] = {
        "Train Loss": train_loss,
        "Train Acc": train_acc,
        "Test Loss": test_loss,
        "Test Acc": test_acc
    }

# Show results
import pandas as pd
df = pd.DataFrame(results).T
df


Training RNN...

Training LSTM...

Training FFN...


Unnamed: 0,Train Loss,Train Acc,Test Loss,Test Acc
RNN,2.195028,19.724,2.189314,18.88
LSTM,1.960481,28.266,1.839421,32.93
FFN,2.123849,33.174,1.688199,40.39


Fine Tunining

In [None]:
class TunedFFN(nn.Module):
    def __init__(self):
        super(TunedFFN, self).__init__()
        self.fc1 = nn.Linear(224 * 224 * 3, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.drop1 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.drop2 = nn.Dropout(0.5)

        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.drop1(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.drop2(x)
        return self.fc3(x)

In [5]:
class TunedLSTM(nn.Module):
    def __init__(self):
        super(TunedLSTM, self).__init__()
        self.lstm = nn.LSTM(224 * 3, 512, batch_first=True, dropout=0.3, num_layers=2)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = x.permute(0, 2, 3, 1)
        x = x.reshape(x.size(0), 224, -1)
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        return self.fc(out)

In [None]:
class TunedRNN(nn.Module):
    def __init__(self):
        super(TunedRNN, self).__init__()
        self.rnn = nn.RNN(224 * 3, 512, batch_first=True, nonlinearity='relu')
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = x.permute(0, 2, 3, 1)
        x = x.reshape(x.size(0), 224, -1)
        out, _ = self.rnn(x)
        out = out[:, -1, :]
        return self.fc(out)

In [6]:
def train_finetuned(model, trainloader, testloader, name, epochs=10):
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        train_loss, train_acc = train(model, trainloader, optimizer, criterion)
        test_loss, test_acc = evaluate(model, testloader, criterion)
        scheduler.step()

        print(f"[{name}] Epoch {epoch+1}/{epochs} | Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}% | LR: {scheduler.get_last_lr()[0]:.5f}")

In [None]:
tuned_ffn = TunedFFN()
train_finetuned(tuned_ffn, trainloader, testloader, name="Tuned FFN", epochs=10)

[Tuned FFN] Epoch 1/10 | Train Acc: 34.81% | Test Acc: 43.10% | LR: 0.00100
[Tuned FFN] Epoch 2/10 | Train Acc: 39.61% | Test Acc: 44.81% | LR: 0.00100
[Tuned FFN] Epoch 3/10 | Train Acc: 40.23% | Test Acc: 44.24% | LR: 0.00100
[Tuned FFN] Epoch 4/10 | Train Acc: 40.10% | Test Acc: 45.06% | LR: 0.00100
[Tuned FFN] Epoch 5/10 | Train Acc: 39.84% | Test Acc: 44.19% | LR: 0.00050
[Tuned FFN] Epoch 6/10 | Train Acc: 41.95% | Test Acc: 47.16% | LR: 0.00050
[Tuned FFN] Epoch 7/10 | Train Acc: 42.26% | Test Acc: 46.88% | LR: 0.00050
[Tuned FFN] Epoch 8/10 | Train Acc: 42.01% | Test Acc: 46.29% | LR: 0.00050
[Tuned FFN] Epoch 9/10 | Train Acc: 42.50% | Test Acc: 46.77% | LR: 0.00050
[Tuned FFN] Epoch 10/10 | Train Acc: 42.11% | Test Acc: 47.66% | LR: 0.00025


In [None]:
# Instantiate fine-tuned LSTM model
tuned_lstm = TunedLSTM()

# Train it
train_finetuned(tuned_lstm, trainloader, testloader, name="Tuned LSTM", epochs=10)

In [None]:
# Instantiate fine-tuned RNN model
tuned_rnn = TunedRNN()

# Train it
train_finetuned(tuned_rnn, trainloader, testloader, name="Tuned RNN", epochs=10)