In [None]:
!pip install torch torchvision matplotlib opencv-pythonimport os; os.makedirs('model', exist_ok=True)

/bin/bash: -c: line 1: syntax error near unexpected token `'model','
/bin/bash: -c: line 1: `pip install torch torchvision matplotlib opencv-pythonimport os; os.makedirs('model', exist_ok=True)'


In [None]:
%%writefile CNNclassify.py
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import cv2
import os
import sys
import matplotlib.pyplot as plt

class CNN(nn.Module):
    def __init__(self, in_channels=3, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=5, stride=1, padding=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, 1, 1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, 3, 1, 1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(128*4*4, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

MNIST_LABELS = [str(i) for i in range(10)]
CIFAR_LABELS = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

def train_model(dataset_name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    os.makedirs("model", exist_ok=True)


    if dataset_name == "mnist":
        transform = transforms.Compose([
            transforms.Resize((32, 32)),
            transforms.Grayscale(num_output_channels=3),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])
        trainset = datasets.MNIST('./data', train=True, download=True, transform=transform)
        testset = datasets.MNIST('./data', train=False, download=True, transform=transform)

    elif dataset_name == "cifar":
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
        ])
        trainset = datasets.CIFAR10('./data', train=True, download=True, transform=transform)
        testset = datasets.CIFAR10('./data', train=False, download=True, transform=transform)

    else:
        print("Unknown dataset. Use 'mnist' or 'cifar'.")
        return

    trainloader = DataLoader(trainset, batch_size=128, shuffle=True)
    testloader = DataLoader(testset, batch_size=100, shuffle=False)

    model = CNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()


    for epoch in range(15):

        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for x, y in trainloader:
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()

        train_loss /= len(trainloader)
        train_acc = 100 * correct / total


        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for x, y in testloader:
                x, y = x.to(device), y.to(device)
                outputs = model(x)
                loss = criterion(outputs, y)
                test_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()

        test_loss /= len(testloader)
        test_acc = 100 * correct / total


        print(f"Epoch {epoch+1:02d}: "
              f"Train Loss={train_loss:.4f}, Train Acc={train_acc:.2f}%, "
              f"Test Loss={test_loss:.4f}, Test Acc={test_acc:.2f}%")


    torch.save(model.state_dict(), f"model/cnn_{dataset_name}.pth")
    print(f"Model saved to model/cnn_{dataset_name}.pth")

def test_model(img_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    img = cv2.imread(img_path)
    if img is None:
        print("Image not found.")
        return


    img = cv2.resize(img, (32, 32))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    t = torch.tensor(img.transpose(2, 0, 1)).float().unsqueeze(0) / 255.0
    t = (t - 0.5) / 0.5

    for ds in ["mnist", "cifar"]:
        path = f"model/cnn_{ds}.pth"
        if not os.path.exists(path):
            continue


        m = CNN().to(device)
        m.load_state_dict(torch.load(path, map_location=device))
        m.eval()


        _, pred = torch.max(m(t.to(device)), 1)
        lbl = (MNIST_LABELS if ds == "mnist" else CIFAR_LABELS)[pred.item()]
        print(f"Prediction using {ds.upper()} model → {lbl}")


        f = m.conv1.weight.detach().cpu().clone()
        num_filters = f.shape[0]
        cols = 8
        rows = int(np.ceil(num_filters / cols))
        fig, axes = plt.subplots(rows, cols, figsize=(cols * 2, rows * 2))
        for i in range(rows * cols):
            ax = axes[i // cols, i % cols] if rows > 1 else axes[i % cols]
            ax.axis('off')
            if i < num_filters:
                flt = f[i]
                if flt.shape[0] == 3:
                    flt = flt.permute(1, 2, 0).numpy()
                else:
                    flt = flt.squeeze(0).numpy()
                flt = (flt - flt.min()) / (flt.max() - flt.min() + 1e-8)
                ax.imshow(flt, cmap=None if flt.ndim == 3 else 'gray')
            else:
                ax.imshow(np.zeros((f.shape[2], f.shape[3])), cmap='gray')
        plt.tight_layout()
        plt.savefig(f"CONV_filters_{ds}.png", bbox_inches='tight', dpi=300)
        # plt.show()
        # print(f"Saved CONV_filters_{ds}.png")


        with torch.no_grad():
            feature_maps = m.conv1(t.to(device)).cpu().squeeze(0)

        feature_maps = (feature_maps - feature_maps.min()) / (feature_maps.max() - feature_maps.min() + 1e-8)

        cols = 8
        rows = int(np.ceil(feature_maps.shape[0] / cols))
        fig, axes = plt.subplots(rows, cols, figsize=(cols * 2, rows * 2))
        for i in range(rows * cols):
            ax = axes[i // cols, i % cols] if rows > 1 else axes[i % cols]
            ax.axis('off')
            if i < feature_maps.shape[0]:
                ax.imshow(feature_maps[i], cmap='gray')
            else:
                ax.imshow(np.zeros_like(feature_maps[0]), cmap='gray')
        plt.tight_layout()
        plt.savefig(f"CONV_rslt_{ds}.png", bbox_inches='tight', dpi=1000)
        # plt.show()
        # print(f"Saved CONV_feature_maps_{ds}.png")

if __name__=="__main__":
    if len(sys.argv) < 2: sys.exit()
    if sys.argv[1]=="train":
        train_model(sys.argv[2].replace("--",""))
    elif sys.argv[1]=="test":
        test_model(sys.argv[2])

Overwriting CNNclassify.py


In [None]:
!python CNNclassify.py train --mnist
!python CNNclassify.py train --cifar

Epoch 01: Train Loss=0.1550, Train Acc=95.21%, Test Loss=0.0389, Test Acc=98.72%
Epoch 02: Train Loss=0.0593, Train Acc=98.19%, Test Loss=0.0320, Test Acc=98.90%
Epoch 03: Train Loss=0.0483, Train Acc=98.47%, Test Loss=0.0387, Test Acc=98.64%
Epoch 04: Train Loss=0.0413, Train Acc=98.79%, Test Loss=0.0223, Test Acc=99.25%
Epoch 05: Train Loss=0.0340, Train Acc=99.00%, Test Loss=0.0333, Test Acc=98.96%
Epoch 06: Train Loss=0.0302, Train Acc=99.11%, Test Loss=0.0272, Test Acc=99.18%
Epoch 07: Train Loss=0.0286, Train Acc=99.17%, Test Loss=0.0195, Test Acc=99.34%
Epoch 08: Train Loss=0.0243, Train Acc=99.27%, Test Loss=0.0315, Test Acc=99.09%
Epoch 09: Train Loss=0.0225, Train Acc=99.32%, Test Loss=0.0270, Test Acc=99.21%
Epoch 10: Train Loss=0.0197, Train Acc=99.41%, Test Loss=0.0237, Test Acc=99.41%
Epoch 11: Train Loss=0.0195, Train Acc=99.38%, Test Loss=0.0237, Test Acc=99.23%
Epoch 12: Train Loss=0.0168, Train Acc=99.47%, Test Loss=0.0204, Test Acc=99.35%
Epoch 13: Train Loss=0.0164,

In [None]:
from google.colab import files
uploaded = files.upload()

Saving 10647746.png to 10647746.png


In [None]:
!python CNNclassify.py test 10647746.png

Prediction using MNIST model → 8
Prediction using CIFAR model → horse


In [None]:
!zip -r Mehta_Dharm_HW1.zip CNNclassify.py model CONV_rslt_*.png
from google.colab import files
files.download('Mehta_Dharm_HW1.zip')

updating: CNNclassify.py (deflated 69%)
updating: model/ (stored 0%)
updating: model/cnn_cifar.pth (deflated 7%)
updating: model/cnn_mnist.pth (deflated 7%)
updating: CONV_rslt_cifar.png (deflated 81%)
updating: CONV_rslt_mnist.png (deflated 81%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
def visualize_conv_layer(conv_output, dataset_name):
    # Get the 32 filters from first conv layer
    filters = conv_output.squeeze(0).cpu().detach().numpy()

    # Create 6x6 grid (32 filters + 4 empty spaces)
    fig, axes = plt.subplots(6, 6, figsize=(12, 12))

    for i in range(6):
        for j in range(6):
            idx = i * 6 + j
            if idx < 32:
                # Normalize for visualization
                filter_img = filters[idx]
                filter_img = (filter_img - filter_img.min()) / (filter_img.max() - filter_img.min() + 1e-8)
                axes[i, j].imshow(filter_img, cmap='gray')
            else:
                axes[i, j].imshow(np.zeros_like(filters[0]), cmap='gray')
            axes[i, j].axis('off')

    plt.tight_layout()
    plt.savefig(f'CONV_rslt_{dataset_name}.png', bbox_inches='tight', dpi=100)
    plt.close()
