In [None]:
import torch, torch.nn as nn
import numpy as np
import torchvision.transforms as transforms
from PIL import Image
from google.colab import drive, files

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
class CNN(nn.Module):
    def __init__(self, num_classes=3):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Adaptive pooling handles ANY input size
        self.adaptive_pool = nn.AdaptiveAvgPool2d((4,4))

        self.classifier = nn.Sequential(
            nn.Linear(128 * 4 * 4, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.adaptive_pool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


In [None]:
class CompactVGG(nn.Module):
    def __init__(self, num_classes: int = 3):
        super().__init__()

        def vgg_block(in_c: int, out_c: int) -> nn.Sequential:
            return nn.Sequential(
                nn.Conv2d(in_c, out_c, 3, padding=1), nn.BatchNorm2d(out_c), nn.ReLU(inplace=True),
                nn.Conv2d(out_c, out_c, 3, padding=1), nn.BatchNorm2d(out_c), nn.ReLU(inplace=True),
                nn.MaxPool2d(2)  # halves H and W
            )

        self.features = nn.Sequential(
            vgg_block(3,   32),   # 256 → 128
            vgg_block(32,  64),   # 128 →  64
            vgg_block(64, 128),   #  64 →  32
            vgg_block(128,256)    #  32 →  16
        )

        self.gap = nn.AdaptiveAvgPool2d((1, 1))     # 256 × 1 × 1
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)     # [B,256,16,16]
        x = self.gap(x).flatten(1)
        x = self.classifier(x)
        return x

In [None]:
class HybridModel(nn.Module):
    def __init__(self, num_classes=3):
        super().__init__()
        self.features = nn.Sequential(
            # Stage 1 (High Resolution)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 128x128

            # Stage 2-4 (Lower Resolution)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 64x64

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 32x32

            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 16x16
        )

        self.gap = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.gap(x).flatten(1)
        return self.classifier(x)

In [None]:
CNN_BASELINE  = '/content/drive/MyDrive/COMP6721/models/CNN_bs64_lr3e-04.pth'
COMPACT_VGG  = '/content/drive/MyDrive/COMP6721/models/CompactVGG_bs64_lr3e-04.pth'
CNN_HYBRID   = '/content/drive/MyDrive/COMP6721/models/HybridModel_bs64_lr3e-04.pth'
CNN_BEST     = '/content/drive/MyDrive/COMP6721/models/CNN_bs64_lr3e-04 best_from_grid_search.pth'

baseline_cnn_model = CNN().to('cuda')
baseline_cnn_model.load_state_dict(torch.load(CNN_BASELINE))
compact_vgg_model = CompactVGG().to('cuda')
compact_vgg_model.load_state_dict(torch.load(COMPACT_VGG))
hybrid_model = HybridModel().to('cuda')
hybrid_model.load_state_dict(torch.load(CNN_HYBRID))
best_cnn_model = CNN().to('cuda')
best_cnn_model.load_state_dict(torch.load(CNN_BEST))

<All keys matched successfully>

In [None]:
def load_image(path):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    img_rgb = Image.open(path).convert('RGB')
    img_tensor = transform(img_rgb)
    return img_tensor

# class names
class_names = ['library-indoor', 'museum-indoor', 'shopping_mall-indoor']

def predict_single(path, model):
    X = load_image(path).to('cuda').unsqueeze(0)

    model.eval()

    with torch.no_grad():
        output = model(X)
        _, pred = torch.max(output, 1)
        print(f"\nImage: {path}\nPrediction: **{class_names[pred]}**")


In [None]:
uploaded = files.upload()           # choose an image
img_path = next(iter(uploaded))     # first filename

Saving Screenshot 2025-06-11 140246.png to Screenshot 2025-06-11 140246.png


In [None]:
predict_single(img_path, best_cnn_model)


Image: Screenshot 2025-06-11 140246.png
Prediction: **museum-indoor**


In [None]:
predict_single(img_path, baseline_cnn_model)


Image: Screenshot 2025-06-11 140246.png
Prediction: **museum-indoor**


In [None]:
predict_single(img_path, compact_vgg_model)


Image: Screenshot 2025-06-11 140246.png
Prediction: **museum-indoor**


In [None]:
predict_single(img_path, hybrid_model)


Image: Screenshot 2025-06-11 140246.png
Prediction: **museum-indoor**
