In [None]:
# ===============================================================
# CELL 1 — MOUNT GOOGLE DRIVE AND DEFINE RSAN PROJECT PATHS
# ---------------------------------------------------------------
# Reuses the same project structure as your original notebook.
# - RSAN_ROOT:   /content/drive/MyDrive/RSAN_Project (1)
# - MIT_ROOT:    raw MIT Indoor Scenes dataset
# - CLS_DATASET_DIR: 10-class processed dataset (train/val)
# - INDOOR_MODEL_DIR: where we'll save the new Places365+MIT model
# ===============================================================

from google.colab import drive
from pathlib import Path

# Mount Google Drive
drive.mount('/content/drive')

# Root of your RSAN project in Drive (adjust folder name if needed)
RSAN_ROOT = Path("/content/drive/MyDrive/RSAN_Project")

# Raw MIT Indoor Scenes dataset location (as in your old notebook)
MIT_ROOT = RSAN_ROOT / "datasets" / "MIT_Indoor_Scenes"
MIT_IMAGES_DIR = MIT_ROOT / "indoorCVPR_09" / "Images"
TRAIN_LIST = MIT_ROOT / "TrainImages.txt"
TEST_LIST = MIT_ROOT / "TestImages.txt"

# indoor classification dataset (already prepared for YOLOv8-CLS)
# We'll reuse this for resnet_places365 fine-tuning.
CLS_DATASET_DIR = RSAN_ROOT / "datasets" / "indoor_scenes_cls"

# Output directory for trained indoor classifier artifacts
INDOOR_MODEL_DIR = RSAN_ROOT / "models" / "indoor_classification"
INDOOR_MODEL_DIR.mkdir(parents=True, exist_ok=True)

print("RSAN_ROOT:", RSAN_ROOT)
print("MIT_IMAGES_DIR exists:", MIT_IMAGES_DIR.exists())
print("Train list exists:", TRAIN_LIST.exists())
print("Test list exists:", TEST_LIST.exists())
print("CLS_DATASET_DIR exists:", CLS_DATASET_DIR.exists())
print("INDOOR_MODEL_DIR:", INDOOR_MODEL_DIR)


In [None]:
# ================================================================
# CELL 2 — COUNT IMAGES PER CLASS IN 4-CLASS DATASET
# ---------------------------------------------------------------
# Expects:
#   CLS_DATASET_DIR/
#       train/
#           office/
#           hallway/
#           classroom/
#           lab/
#       val/
#           office/
#           hallway/
#           classroom/
#           lab/
# ================================================================

from collections import defaultdict

if not CLS_DATASET_DIR.exists():
    raise FileNotFoundError(
        f"{CLS_DATASET_DIR} does not exist. "
        "Make sure your 4-class dataset is created or update CLS_DATASET_DIR."
    )

counts = defaultdict(int)

for split in ["train", "val"]:
    split_root = CLS_DATASET_DIR / split
    if not split_root.exists():
        raise FileNotFoundError(f"Missing split folder: {split_root}")
    for cls_dir in split_root.iterdir():
        if not cls_dir.is_dir():
            continue
        n = len(list(cls_dir.glob("*.jpg")))
        counts[(split, cls_dir.name)] = n

print("\nImage counts per class:")
for (split, cls_name), n in sorted(counts.items()):
    print(f"{split:5s} | {cls_name:10s} : {n}")


In [None]:
# ================================================================
# CELL 3 — IMPORT LIBRARIES & SET CONFIG
# ================================================================

import os
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

# Training hyperparameters
BATCH_SIZE = 32
NUM_EPOCHS = 40   # adjust as desired
LR = 1e-3         # learning rate for MIT head

# Image transforms (Places365 / ImageNet-style normalization)
train_tf = transforms.Compose([
    # Randomly crop and resize: zooms in/out a bit while keeping 224×224
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),

    # Random horizontal flip
    transforms.RandomHorizontalFlip(),

    # Small random rotation (in degrees)
    transforms.RandomRotation(10),

    # Slight random changes in brightness/contrast/saturation
    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.2,
    ),

    # Convert to tensor
    transforms.ToTensor(),

    # Normalize with ImageNet / Places stats
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])

# Keep val_tf the same as before:
val_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])


In [None]:
# ================================================================
# CELL 4 — BUILD DATASETS & DATALOADERS FROM 4-CLASS DATASET
# ================================================================

from pathlib import Path

train_root = CLS_DATASET_DIR / "train"
val_root   = CLS_DATASET_DIR / "val"

train_ds = datasets.ImageFolder(train_root, transform=train_tf)
val_ds   = datasets.ImageFolder(val_root,   transform=val_tf)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

class_names = train_ds.classes
NUM_MIT_CLASSES = len(class_names)

print("MIT indoor (specialized) classes:", class_names)
print("NUM_MIT_CLASSES:", NUM_MIT_CLASSES)
print("Train size:", len(train_ds), "Val size:", len(val_ds))


In [None]:
# ================================================================
# CELL 5 — LOAD RESNET50-PLACES365 (AUTO-DOWNLOAD IF NEEDED)
# ================================================================

from pathlib import Path
import os

PLACES_WEIGHTS_PATH = RSAN_ROOT / "models" / "resnet50_places365.pth.tar"
PLACES_WEIGHTS_PATH.parent.mkdir(parents=True, exist_ok=True)

if not PLACES_WEIGHTS_PATH.exists():
    print("Weights not found, downloading ResNet50-Places365...")
    os.system(
        f'wget -O "{PLACES_WEIGHTS_PATH}" '
        'http://places2.csail.mit.edu/models_places365/resnet50_places365.pth.tar'
    )
else:
    print("Found existing weights at:", PLACES_WEIGHTS_PATH)

def load_resnet50_places365(weights_path: Path):
    import torch
    import torchvision.models as models

    model = models.resnet50(num_classes=365)
    checkpoint = torch.load(str(weights_path), map_location="cpu")

    # Some checkpoints have 'state_dict', some don't
    state = checkpoint.get("state_dict", checkpoint)

    # Strip possible 'module.' prefixes
    new_state = {k.replace("module.", ""): v for k, v in state.items()}
    model.load_state_dict(new_state, strict=True)
    return model

resnet_places = load_resnet50_places365(PLACES_WEIGHTS_PATH)
resnet_places.eval()
print("Loaded ResNet50-Places365.")


In [None]:
# ================================================================
# NEW CELL 6 — MULTI-HEAD MODEL: RESNET50 BACKBONE
# ---------------------------------------------------------------
# - Shared backbone: ResNet50 up to the final FC
# - Head 1: Places365 classifier (original fc, 365 classes)
# - Head 2: MIT indoor classifier (N classes from your dataset)
# ================================================================

import copy

class PlacesMITMultiHead(nn.Module):
    def __init__(self, resnet_places: nn.Module, num_mit_classes: int):
        super().__init__()
        self.backbone = resnet_places

        # Original final FC: in_features -> 365
        in_features = self.backbone.fc.in_features
        self.places_head = copy.deepcopy(self.backbone.fc)

        # Replace backbone.fc with identity so backbone(x) returns features
        self.backbone.fc = nn.Identity()

        # New head for MIT Indoor classes
        self.mit_head = nn.Linear(in_features, num_mit_classes)

    def forward(self, x):
        feats = self.backbone(x)          # [B, in_features]
        places_logits = self.places_head(feats)
        mit_logits = self.mit_head(feats)
        return places_logits, mit_logits

model = PlacesMITMultiHead(resnet_places, num_mit_classes=NUM_MIT_CLASSES).to(DEVICE)
print(model)


In [None]:
# ================================================================
# NEW CELL 7 — FREEZE BACKBONE + PLACES HEAD (NO FORGETTING)
# ---------------------------------------------------------------
# We want:
#   - Places365 predictions identical to original ResNet50-Places365
#   - Only MIT indoor head is trained
# ================================================================

# Freeze all backbone parameters (conv layers + BN, etc.)
for p in model.backbone.parameters():
    p.requires_grad = False

# Freeze Places365 classifier head
for p in model.places_head.parameters():
    p.requires_grad = False

# Only MIT head is trainable
for p in model.mit_head.parameters():
    p.requires_grad = True

criterion_mit = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.mit_head.parameters(), lr=LR)

print("Trainable parameters:")
for name, p in model.named_parameters():
    if p.requires_grad:
        print("  ", name, p.shape)


In [None]:
# ================================================================
# CELL 8 — TRAIN MIT INDOOR HEAD
# ================================================================

from tqdm.notebook import tqdm

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for imgs, labels in tqdm(loader, desc="Train", leave=False):
        imgs, labels = imgs.to(device), labels.to(device)

        _, mit_logits = model(imgs)  # we only supervise MIT head
        loss = criterion(mit_logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)
        preds = mit_logits.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


def eval_model(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            _, mit_logits = model(imgs)
            preds = mit_logits.argmax(1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)
            all_labels.append(labels.cpu())
            all_preds.append(preds.cpu())

    acc = correct / total if total > 0 else 0.0
    all_labels = torch.cat(all_labels).numpy()
    all_preds = torch.cat(all_preds).numpy()
    return acc, all_labels, all_preds


best_val_acc = 0.0

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\n===== Epoch {epoch}/{NUM_EPOCHS} =====")

    train_loss, train_acc = train_one_epoch(
        model, train_loader, optimizer, criterion_mit, DEVICE
    )
    val_acc, val_labels, val_preds = eval_model(model, val_loader, DEVICE)

    print(f"Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f}")
    print(f"Val acc:    {val_acc:.4f}")

    # Save best model (MIT head fine-tuned)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_path = INDOOR_MODEL_DIR / "resnet_places365_mit_multihead_best11.pth"
        torch.save(model.state_dict(), best_path)
        print("✅ New best model saved to:", best_path)

print("\nBest validation accuracy:", best_val_acc)


In [None]:
# ================================================================
# CELL 9 — CONFUSION MATRIX & CLASSIFICATION REPORT
# ================================================================

from sklearn.metrics import ConfusionMatrixDisplay

# Reuse val_labels, val_preds from the last epoch
cm = confusion_matrix(val_labels, val_preds)
print("Classification report (MIT head):\n")
print(classification_report(val_labels, val_preds, target_names=class_names))

fig, ax = plt.subplots(figsize=(6, 6))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(ax=ax, cmap="Blues", colorbar=False)
plt.xticks(rotation=45)
plt.title("MIT Indoor (10-class) — Confusion Matrix")
plt.tight_layout()
plt.show()


In [None]:
# ================================================================
# CELL 10 — RELOAD BEST MODEL & PREDICT ON NEW IMAGES
# ---------------------------------------------------------------
# This gives you access to BOTH:
#   - Places365 classes  (365-way head)
#   - MIT indoor classes (4-way head)
# ================================================================

# Reload best weights (if not already in memory)
best_path = INDOOR_MODEL_DIR / "resnet_places365_mit_multihead_best.pth"
print("Loading best model from:", best_path)
model.load_state_dict(torch.load(best_path, map_location=DEVICE))
model.to(DEVICE)
model.eval()

# If you have a mapping for Places365 class indices -> names, load it here.
# For now we'll just output the argmax index for Places365.
def predict_image(img_path: Path):
    """
    Run model on a single image and return:
      - places_idx: argmax index over 365 classes
      - mit_idx: argmax index over your MIT indoor classes
    """
    from PIL import Image

    img = Image.open(img_path).convert("RGB")
    x = val_tf(img).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        places_logits, mit_logits = model(x)
        places_probs = F.softmax(places_logits, dim=1)
        mit_probs = F.softmax(mit_logits, dim=1)

        places_idx = int(places_probs.argmax(1))
        mit_idx = int(mit_probs.argmax(1))

    return {
        "places_idx": places_idx,
        "mit_idx": mit_idx,
        "mit_class_name": class_names[mit_idx],
        "places_conf": float(places_probs[0, places_idx]),
        "mit_conf": float(mit_probs[0, mit_idx]),
    }

# Example: test on a random validation image
from random import choice

sample_class_dir = choice(list(val_root.iterdir()))
sample_img = choice(list(sample_class_dir.glob("*.jpg")))
print("Sample image:", sample_img)

pred = predict_image(sample_img)
print("\nPredictions:")
print("  Places365 index:", pred["places_idx"], f"(conf={pred['places_conf']:.3f})")
print("  MIT class:", pred["mit_class_name"], f"(conf={pred['mit_conf']:.3f})")

from IPython.display import display
from PIL import Image
display(Image.open(sample_img))


In [None]:
# ================================================================
# COMPARE MULTIPLE MODELS ON THE VALIDATION SET
# ---------------------------------------------------------------
# - Loads each .pth model you specify
# - Evaluates MIT head on val_loader
# - Prints accuracy + classification report
# - Shows confusion matrix
# ================================================================

import torch
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
from pathlib import Path

# If you want to upload models from your local machine into this runtime,
# you can uncomment these two lines and upload .pth files:
#
# from google.colab import files
# uploaded = files.upload()  # after this, use Path(name) for each uploaded file
#
# But usually it's easier to put models in INDOOR_MODEL_DIR on Drive
# and list them in MODEL_FILES below.

def build_empty_model(num_mit_classes: int):
    """
    Recreate the multi-head model architecture and load the
    Places365 backbone weights. We'll then load the fine-tuned
    weights from a .pth file into this model.
    """
    resnet_places = load_resnet50_places365(PLACES_WEIGHTS_PATH)
    model = PlacesMITMultiHead(resnet_places, num_mit_classes=num_mit_classes)
    model.to(DEVICE)
    return model

def evaluate_model_from_path(model_path: Path, label: str = None):
    """
    Load a model state_dict from `model_path`, evaluate it on the
    validation set, and show accuracy + confusion matrix.
    """
    if label is None:
        label = model_path.name

    print("\n==============================")
    print(f"Evaluating model: {label}")
    print("Path:", model_path)
    print("==============================")

    # Build fresh model and load weights
    model = build_empty_model(NUM_MIT_CLASSES)
    state = torch.load(str(model_path), map_location=DEVICE)
    model.load_state_dict(state)
    model.eval()

    all_labels = []
    all_preds = []

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(DEVICE)
            labels = labels.to(DEVICE)

            # Forward pass (we only care about MIT head here)
            _, mit_logits = model(imgs)
            preds = mit_logits.argmax(1)

            all_labels.append(labels.cpu())
            all_preds.append(preds.cpu())

    all_labels = torch.cat(all_labels).numpy()
    all_preds = torch.cat(all_preds).numpy()

    # Accuracy
    acc = (all_labels == all_preds).mean()
    print(f"Validation accuracy: {acc:.4f}\n")

    # Classification report
    print("Classification report:")
    print(classification_report(all_labels, all_preds, target_names=class_names))

    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    fig, ax = plt.subplots(figsize=(6, 6))
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
    disp.plot(ax=ax, cmap="Blues", colorbar=False)
    plt.xticks(rotation=45, ha="right")
    plt.title(f"Confusion Matrix — {label}")
    plt.tight_layout()
    plt.show()


# ------------------------------------------------------------
# EDIT THIS LIST: models you want to compare
# (they should all live in INDOOR_MODEL_DIR or give full Paths)
# ------------------------------------------------------------

MODEL_FILES = [
    "resnet_places365.pth",   # your current best
    # "resnet_places365_mit_multihead_augmented_best.pth",  # example extra
]

# Evaluate each model in the list
for fname in MODEL_FILES:
    model_path = INDOOR_MODEL_DIR / fname
    if not model_path.exists():
        print(f"\n⚠️ Skipping {fname} (file not found at {model_path})")
        continue
    evaluate_model_from_path(model_path, label=fname)
