In [None]:
# ===============================================================
# CELL 1 ‚Äî MOUNT GOOGLE DRIVE AND DEFINE RSAN PROJECT PATHS
# ---------------------------------------------------------------
# - Mounts your Google Drive inside Colab
# - Defines RSAN_ROOT pointing to /MyDrive/RSAN_Project
# - Defines paths to:
#       - Raw MIT Indoor Scenes dataset
#       - Processed 4-class dataset for YOLOv8-CLS
#       - Output models/metrics folder for indoor classification
# ===============================================================

from google.colab import drive
from pathlib import Path

# Mount Google Drive
drive.mount('/content/drive')

# Root of your RSAN project in Drive (adjust if the folder name is different)
RSAN_ROOT = Path("/content/drive/MyDrive/RSAN_Project (1)")
                  #/content/drive/MyDrive/RSAN_Project (1)

# Raw MIT Indoor Scenes dataset location
MIT_ROOT = RSAN_ROOT / "datasets" / "MIT_Indoor_Scenes"
MIT_IMAGES_DIR = MIT_ROOT / "indoorCVPR_09" / "Images"
TRAIN_LIST = MIT_ROOT / "TrainImages.txt"
TEST_LIST = MIT_ROOT / "TestImages.txt"

# Target folder for YOLOv8-CLS-ready dataset (4 classes)
CLS_DATASET_DIR = RSAN_ROOT / "datasets" / "indoor_scenes_cls"

# Output directory for trained indoor classifier artifacts
INDOOR_MODEL_DIR = RSAN_ROOT / "models" / "indoor_classification"
INDOOR_MODEL_DIR.mkdir(parents=True, exist_ok=True)

print("RSAN_ROOT:", RSAN_ROOT)
print("MIT_IMAGES_DIR:", MIT_IMAGES_DIR)
print("Train list:", TRAIN_LIST.exists())
print("Test list:", TEST_LIST.exists())
print("Indoor model dir:", INDOOR_MODEL_DIR)

In [None]:
# ===============================================================
# CELL 2 ‚Äî BUILD 4-CLASS DATASET FOR YOLOv8-CLS (FIXED FOR THE DATASET)
# ---------------------------------------------------------------
# To use the following mapping based on the folder names:
#
#   office         -> office
#   corridor       -> hallway
#   computerroom   -> lab
#   laboratorywet  -> lab
#   classroom      -> classroom
#
# ===============================================================

import os
import shutil

# Corrected mapping using the specific environment classes
CLASS_MAP = {
    "cafeteria": "cafeteria",
    "bedroom": "bedroom",
    "bathroom": "bathroom",
    "kitchen": "kitchen",
    "common_area": "common_area",     # includes living room / common area
    "livingroom": "common_area",      # maps livingroom folder to common_area
    "computerroom": "computer_lab",   # rename for consistency
    "computer_lab": "computer_lab",
    "classroom": "classroom",
    "restauranrt_kitchen": "kitchen",
    "auditorium": "colloquium",
    "lobby": "lobby",
    "office": "office",
    "hallway": "hallway",
    "corridor": "hallway"             # corridor should map to hallway
}

print("Using Class Map:", CLASS_MAP)

# Clean destination folder
if CLS_DATASET_DIR.exists():
    shutil.rmtree(CLS_DATASET_DIR)

# Rebuild directory structure
for split in ["train", "val"]:
    for dst_class in set(CLASS_MAP.values()):
        (CLS_DATASET_DIR / split / dst_class).mkdir(parents=True, exist_ok=True)

# Function to process TrainImages.txt or TestImages.txt
def process_split(file_list_path, split):
    kept = 0
    skipped = 0

    with open(file_list_path, "r") as f:
        lines = [line.strip() for line in f if line.strip()]

    for rel_path in lines:
        parts = rel_path.split('/')
        src_class = parts[0]
        filename = parts[-1]

        if src_class not in CLASS_MAP:
            skipped += 1
            continue

        dst_class = CLASS_MAP[src_class]

        src_path = MIT_IMAGES_DIR / rel_path
        dst_path = CLS_DATASET_DIR / split / dst_class / filename

        if src_path.exists():
            shutil.copy(src_path, dst_path)
            kept += 1
        else:
            skipped += 1

    print(f"[{split}] Copied {kept} images. Skipped {skipped}.")

# Build splits using official MIT Train/Test splits
process_split(TRAIN_LIST, "train")
process_split(TEST_LIST, "val")

print("üî• Dataset built successfully at:", CLS_DATASET_DIR)

In [None]:
# ================================================================
# CELL 3 ‚Äî COUNT IMAGES PER CLASS
# ---------------------------------------------------------------
# Ensures all classes were created correctly and have enough images.
# ================================================================

from collections import defaultdict

counts = defaultdict(int)

for split in ["train", "val"]:
    for cls_dir in (CLS_DATASET_DIR / split).iterdir():
        n = len(list(cls_dir.glob("*.jpg")))
        counts[(split, cls_dir.name)] = n

for (split, cls), n in counts.items():
    print(f"{split:5s} | {cls:10s} : {n}")

In [None]:
# ================================================================
# CELL 4 ‚Äî INSTALL YOLOv8 AND IMPORT LIBRARIES
# ================================================================

!pip install -q ultralytics

from ultralytics import YOLO
import torch
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import json

print("PyTorch:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())

In [None]:
# ===============================================================
# CELL 5 ‚Äî TRAIN YOLOv8-CLS CLASSIFIER (Optimized)
# ===============================================================

RUN_NAME = "indoor_cls_v3_optimized"

# Load a LARGE pretrained YOLOv8 classification model
# (best accuracy for indoor scenes)
model = YOLO("yolov8l-cls.pt")

results = model.train(
    data=str(CLS_DATASET_DIR),
    epochs=100,
    imgsz=384,
    batch=32,

    # Learning rate settings
    lr0=0.001,
    lrf=0.1,
    optimizer="AdamW",

    # Regularization
    weight_decay=0.0005,
    momentum=0.95,
    patience=50,

    # Strong augmentations
    mixup=0.2,
    flipud=0.5,
    fliplr=0.2,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    scale=0.2,
    degrees=10,
    translate=0.10,
    shear=2.0,

    # Output Folder
    project="runs/classify",
    name=RUN_NAME,
)

print(" Training complete!")

In [None]:
# ================================================================
# CELL 6 ‚Äî VALIDATE TRAINED MODEL (YOLO BUILT-IN METRICS)
# ================================================================

metrics = model.val()
metrics

In [None]:
# ================================================================
# CELL 7 ‚Äî CONFUSION MATRIX + METRICS.JSON
# ---------------------------------------------------------------
# Produces:
#   - confusion_matrix.png
#   - metrics.json (precision, recall, f1 scores)
# ================================================================

val_root = CLS_DATASET_DIR / "val"
class_names = sorted([d.name for d in val_root.iterdir() if d.is_dir()])

class_to_idx = {cls: i for i, cls in enumerate(class_names)}
idx_to_class = {i: cls for cls, i in class_to_idx.items()}

y_true, y_pred, paths = [], [], []

# Gather validation images
for cls in class_names:
    for img_path in (val_root / cls).glob("*.jpg"):
        paths.append(str(img_path))
        y_true.append(class_to_idx[cls])

# Run inference
for result in model(paths, stream=True):
    y_pred.append(int(result.probs.top1))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Plot
fig, ax = plt.subplots(figsize=(6,6))
im = ax.imshow(cm, cmap="Blues")
plt.colorbar(im)

ax.set_xticks(np.arange(len(class_names)))
ax.set_yticks(np.arange(len(class_names)))
ax.set_xticklabels(class_names, rotation=45)
ax.set_yticklabels(class_names)

for i in range(len(class_names)):
    for j in range(len(class_names)):
        ax.text(j, i, cm[i,j], ha="center", va="center",
                color="white" if cm[i,j] > cm.max()/2 else "black")

plt.title("Confusion Matrix ‚Äî Indoor Classification")
plt.tight_layout()

cm_path = INDOOR_MODEL_DIR / "confusion_matrix.png"
plt.savefig(cm_path, dpi=200)
plt.close()

print("Saved confusion matrix to:", cm_path)

# Classification report
report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)

metrics_path = INDOOR_MODEL_DIR / "metrics.json"
with open(metrics_path, "w") as f:
    json.dump({
        "class_names": class_names,
        "confusion_matrix": cm.tolist(),
        "metrics": report
    }, f, indent=4)

print("Saved metrics.json to:", metrics_path)

In [None]:
# ================================================================
# CELL 8 ‚Äî COPY BEST MODEL TO RSAN PROJECT MODELS FOLDER
# ================================================================

BEST_PT = Path("runs/classify") / RUN_NAME / "weights" / "best.pt"
DEST_PT = INDOOR_MODEL_DIR / "NEWBEST.pt"

shutil.copy(BEST_PT, DEST_PT)
print("Saved trained model to:", DEST_PT)

In [None]:
# ================================================================
# CELL 9 ‚Äî SANITY TEST ON SAMPLE IMAGE
# ================================================================

from PIL import Image

indoor_model = YOLO(str(DEST_PT))

sample = list((CLS_DATASET_DIR / "val" / class_names[0]).glob("*.jpg"))[0]
print("Testing on:", sample)

result = indoor_model(sample)[0]
print("Predicted class:", result.names[int(result.probs.top1)])

Image.open(sample)

In [None]:
# =====================================================
# CELL 10 ‚Äî MULTI-IMAGE TESTING FOR ALL 4 CLASSES
# =====================================================

from ultralytics import YOLO
from pathlib import Path
from PIL import Image

# Load your saved model ‚Äî MUST COME FROM CELL 8
INDOOR_MODEL = YOLO(str(DEST_PT))  # <-- this must exist

# Validation dataset root
val_root = CLS_DATASET_DIR / "val"

# Get directories: office, hallway, classroom, lab
class_dirs = [d for d in val_root.iterdir() if d.is_dir()]

results = []

for cls_dir in class_dirs:
    class_name = cls_dir.name
    print(f"\nüîπ Testing class: {class_name}")

    img_paths = sorted(cls_dir.glob("*.jpg"))

    for img_path in img_paths:
        try:
            result = INDOOR_MODEL(str(img_path))[0]
            pred = result.names[int(result.probs.top1)]
            results.append((img_path.name, class_name, pred))
            print(f"{img_path.name:40s} ‚Üí predicted: {pred}")
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

In [None]:
# =====================================================
# CELL 11 ‚Äî SHOW MISCLASSIFIED IMAGES
# =====================================================

from IPython.display import display

misclassified = []

for true_class, pred_list in [('office', []), ('classroom', []), ('hallway', []), ('lab', [])]:
    pass  # structure placeholder

# Go through saved results from CELL 10
for filename, true_class, pred in results:
    if true_class != pred:
        misclassified.append((filename, true_class, pred))

print(f"\n‚ùå Total Misclassifications: {len(misclassified)}\n")

for filename, true_class, pred in misclassified:
    img_path = val_root / true_class / filename
    print(f"File: {filename}")
    print(f"True class: {true_class} ‚Üí Predicted: {pred}")

    # Show the image
    display(Image.open(img_path))
    print("-" * 60)

In [None]:
# =====================================================
# CELL 12 ‚Äî RUN MODEL ON *ALL* IMAGES AND SAVE CSV
# =====================================================

from ultralytics import YOLO
from pathlib import Path
import pandas as pd
from tqdm.notebook import tqdm

# Load trained classifier
model = YOLO(str(DEST_PT))

rows = []

for split in ["train", "val"]:
    split_root = CLS_DATASET_DIR / split
    for cls_dir in split_root.iterdir():
        if not cls_dir.is_dir():
            continue
        true_class = cls_dir.name

        for img_path in cls_dir.glob("*.jpg"):
            result = model(str(img_path))[0]
            probs = result.probs

            top1_idx = int(probs.top1)
            top1_conf = float(probs.top1conf)
            pred_class = result.names[top1_idx]

            rows.append({
                "split": split,
                "path": str(img_path),
                "filename": img_path.name,
                "true_class": true_class,
                "pred_class": pred_class,
                "conf": top1_conf,
            })

# Save predictions
import os
os.makedirs(INDOOR_MODEL_DIR, exist_ok=True)
pred_csv = INDOOR_MODEL_DIR / "dataset_predictions.csv"

df = pd.DataFrame(rows)
df.to_csv(pred_csv, index=False)

print(f"Saved predictions for {len(df)} images to:", pred_csv)
df.head()

In [None]:
# =====================================================
# CELL 12 ‚Äî AUTO-FIX DATASET USING MODEL PREDICTIONS
# =====================================================

import shutil
from pathlib import Path

import pandas as pd

pred_csv = INDOOR_MODEL_DIR / "dataset_predictions.csv"
df = pd.read_csv(pred_csv)

# 1) Make a backup of the current dataset (only once)
BACKUP_DIR = CLS_DATASET_DIR.parent / "indoor_scenes_cls_backup"

if not BACKUP_DIR.exists():
    print("üì¶ Creating backup of dataset at:", BACKUP_DIR)
    shutil.copytree(CLS_DATASET_DIR, BACKUP_DIR)
else:
    print(" Backup already exists at:", BACKUP_DIR)

# 2) Choose misclassified images with high confidence
CONF_THRESH = 0.90

candidates = df[(df["true_class"] != df["pred_class"]) &
                (df["conf"] >= CONF_THRESH)].copy()

print(f"\nFound {len(candidates)} high-confidence mislabels (conf ‚â• {CONF_THRESH}).")

# 3) Move each candidate image to the predicted class folder
moved = 0
for _, row in candidates.iterrows():
    split = row["split"]
    true_cls = row["true_class"]
    pred_cls = row["pred_class"]
    filename = row["filename"]

    old_path = Path(row["path"])
    new_path = CLS_DATASET_DIR / split / pred_cls / filename

    # Ensure destination folder exists
    new_path.parent.mkdir(parents=True, exist_ok=True)

    if old_path == new_path:
        continue

    # Move the file
    if old_path.exists():
        shutil.move(str(old_path), str(new_path))
        moved += 1
        print(f"Moved: {filename} | {split} | {true_cls} ‚Üí {pred_cls}")
    else:
        print("‚ö†Ô∏è Missing file (skipped):", old_path)

print(f"\n Auto-fix complete. Files moved: {moved}")

In [None]:
# =====================================================
# CELL 13 ‚Äî RE-COUNT IMAGES PER CLASS AFTER FIX
# =====================================================

from collections import defaultdict

counts = defaultdict(int)

for split in ["train", "val"]:
    split_root = CLS_DATASET_DIR / split
    for cls_dir in split_root.iterdir():
        if not cls_dir.is_dir():
            continue
        n = len(list(cls_dir.glob("*.jpg")))
        counts[(split, cls_dir.name)] = n

print("\nUpdated image counts per class:")
for (split, cls_name), n in sorted(counts.items()):
    print(f"{split:5s} | {cls_name:10s} : {n}")