**Name: Atharva Admile**

Batch no: 06

**1. Image Classification Project**

• Objective:

Build an image classification model using traditional ML and deep learning
techniques.

• Dataset:

MNIST Digit Dataset

• Tasks:

Perform image preprocessing (resize, normalization)

Split dataset into train and test sets

Train at least 2 models (e.g., CNN and Logistic Regression)

Compare model performance

• Evaluation Metrics:

Accuracy

Confusion Matrix

Training vs Validation Accuracy Graph


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import confusion_matrix, accuracy_score

# Load and preprocess MNIST
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Split train into train/val
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
train_dataset, val_dataset = random_split(trainset, [train_size, val_size])

trainloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valloader = DataLoader(val_dataset, batch_size=64, shuffle=False)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Logistic Regression model
class LR(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(28*28, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        return self.linear(x)

# CNN model
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Training function
def train_model(model, optimizer, criterion, epochs=5):
    train_accs = []
    val_accs = []
    for epoch in range(epochs):
        model.train()
        correct, total = 0, 0
        for inputs, labels in trainloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        train_accs.append(correct / total)

        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for inputs, labels in valloader:
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_accs.append(correct / total)
        print(f'Epoch {epoch+1}: Train Acc {train_accs[-1]:.4f}, Val Acc {val_accs[-1]:.4f}')
    return train_accs, val_accs

# Train LR
lr_model = LR()
optimizer_lr = optim.SGD(lr_model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
print("Training LR:")
lr_train, lr_val = train_model(lr_model, optimizer_lr, criterion)

# Train CNN
cnn_model = CNN()
optimizer_cnn = optim.SGD(cnn_model.parameters(), lr=0.01, momentum=0.9)
print("\nTraining CNN:")
cnn_train, cnn_val = train_model(cnn_model, optimizer_cnn, criterion)

# Evaluate on test
def evaluate(model, name):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for inputs, labels in testloader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            y_true.extend(labels.numpy())
            y_pred.extend(predicted.numpy())
    acc = accuracy_score(y_true, y_pred)
    cm = confusion_matrix(y_true, y_pred)
    print(f"{name} Test Accuracy: {acc:.4f}")
    print(f"{name} Confusion Matrix:\n{cm}")
    return acc, cm

lr_acc, lr_cm = evaluate(lr_model, "LR")
cnn_acc, cnn_cm = evaluate(cnn_model, "CNN")

# Plot Training vs Validation Accuracy
plt.figure(figsize=(10, 5))
plt.plot(lr_train, label='LR Train')
plt.plot(lr_val, label='LR Val')
plt.plot(cnn_train, label='CNN Train')
plt.plot(cnn_val, label='CNN Val')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training vs Validation Accuracy')
plt.show()