In [1]:
#imports and paths
import sys
from pathlib import Path

# Add project root (one level up from 'notebooks') to Python path
ROOT = Path(__file__).resolve().parents[1] if "__file__" in globals() else Path.cwd().parent
sys.path.append(str(ROOT))

print(" Added to sys.path:", ROOT)

import torch
from torch.utils.data import DataLoader, Subset
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import json
from sklearn.model_selection import train_test_split

from src.datasets import MultiLabelSceneDataset
from src.finetune import MultiLabelFineTune
from src.simclr import SimCLRModel

BASE = Path.home() / "projects/office-ssl"
LABELED = BASE / "data/labeled"

IMG_DIR = LABELED / "images"
ANNOT_CSV = LABELED / "annotations.csv"
LABEL_MAP = LABELED / "label_map.json"
CKPT_DIR = BASE / "checkpoints"


 Added to sys.path: /home/syed-mohammed-bilal/projects/office-ssl


  __import__("pkg_resources").declare_namespace(__name__)


In [2]:
import torch
torch.set_float32_matmul_precision('medium')

In [None]:
#Datasets and Dataloaders
train_ds = MultiLabelSceneDataset(IMG_DIR, ANNOT_CSV, LABEL_MAP)

indices = list(range(len(train_ds)))
train_idx, val_idx = train_test_split(indices, test_size=0.2, random_state=42)
train_dl = DataLoader(Subset(train_ds, train_idx), batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_dl   = DataLoader(Subset(train_ds, val_idx), batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

print(f"Train: {len(train_idx)} | Val: {len(val_idx)} | Classes: {len(json.load(open(LABEL_MAP)))}")


In [None]:
#load simCLR backbone
simclr_ckpt = sorted(CKPT_DIR.glob("simclr-epoch*.ckpt"))[-1]
simclr = SimCLRModel.load_from_checkpoint(str(simclr_ckpt), strict=False)

backbone = simclr.backbone.to(device)
backbone.eval()


In [None]:
#loading fine tuned classifier
from src.finetune import MultiLabelFineTune
label_map = json.load(open(LABEL_MAP))
num_classes = len(label_map)

model = MultiLabelFineTune(
    backbone=backbone,
    num_classes=num_classes,
    lr=1e-4,
    freeze_backbone=False
)

# Load fine-tuned weights
best_ckpt = sorted(CKPT_DIR.glob("finetune-*.ckpt"))[-1]
ft_state = torch.load(best_ckpt, map_location=device)["state_dict"]

clean_state = {}
for k, v in ft_state.items():
    if k.startswith("backbone.") or k.startswith("classifier."):
        clean_state[k] = v

model.load_state_dict(clean_state, strict=False)
model = model.to(device).eval()


In [None]:
# ft_ckpt = torch.load(best_ckpt, map_location=device)
# state = ft_ckpt["state_dict"]

# # remove prefix 'model.' or 'backbone.' if needed
# new_state = {}
# for k, v in state.items():
#     if k.startswith("backbone."):
#         new_state[k.replace("backbone.", "backbone.")] = v
#     elif k.startswith("classifier."):
#         new_state[k] = v
#     else:
#         new_state[k] = v

# model.load_state_dict(new_state, strict=False)


In [None]:
model = model.to(device)
model.eval()

print("Backbone device:", next(model.backbone.parameters()).device)
print("Classifier device:", next(model.classifier.parameters()).device)


In [None]:
#converting avif files to jpg
import subprocess

# path to your labeled images
img_dir = Path.home() / "projects/office-ssl/data/labeled/images"

converted = 0
for img_path in img_dir.glob("*.jpg"):
    result = subprocess.run(["file", str(img_path)], capture_output=True, text=True)
    if "AVIF" in result.stdout:
        out_path = img_path.with_name(img_path.stem + "_fixed.jpg")
        print(f"Converting {img_path.name} → {out_path.name}")
        subprocess.run(["heif-convert", str(img_path), str(out_path)], check=True)
        out_path.rename(img_path)
        converted += 1

print(f"Done! Converted {converted} AVIF images to JPEG.")


In [None]:
#trainer + callbacks
logger = TensorBoardLogger("logs", name="finetune_multilabel")
ckpt_cb = ModelCheckpoint(
    dirpath=str(CKPT_DIR),
    filename="finetune-{epoch:02d}-{val_mAP_mean:.4f}",
    save_top_k=3,
    monitor="val/mAP_mean",
    mode="max",
    auto_insert_metric_name=False
)
early_stop = EarlyStopping(monitor="val/mAP_mean", mode="max", patience=8)
lr_monitor = LearningRateMonitor(logging_interval='epoch')

trainer = Trainer(
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    devices=1,
    max_epochs=50,
    precision=16 if torch.cuda.is_available() else 32,
    callbacks=[ckpt_cb, early_stop, lr_monitor],
    logger=logger,
    log_every_n_steps=10
)


In [None]:
# #Load simCLR backbone
# # latest pretrain checkpoint
# device = 'cuda' if torch.cuda.is_available() else 'cpu'

# label_map = json.load(open(LABEL_MAP))
# num_classes = len(label_map)

# best_ckpt = sorted((CKPT_DIR).glob("finetune-*.ckpt"))[-1]
# print("Loading checkpoint:", best_ckpt)

# # Load model from checkpoint
# model = MultiLabelFineTune.load_from_checkpoint(
#     str(best_ckpt),
#     backbone=None,
#     num_classes=num_classes
# )

# # --- THE IMPORTANT PART ---
# # Move backbone AND classifier separately onto GPU
# model.backbone = model.backbone.to(device)
# model.classifier = model.classifier.to(device)
# model = model.to(device)
# model.eval()

# print("Backbone device:", next(model.backbone.parameters()).device)
# print("Classifier device:", next(model.classifier.parameters()).device)
# print("Model device:", next(model.parameters()).device)


In [None]:
# #Always set a single variable and reuse it for consistency for same device
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = model.to(device)

# x, y = next(iter(train_dl))
# x, y = x.to(device), y.to(device)
# with torch.no_grad():
#     loss = model.criterion(model(x), y)
# print(f"Initial loss: {loss.item():.4f}")


In [None]:
#Checking is loss actually decreases
batch = next(iter(train_dl))
x, y = batch
with torch.no_grad():
    logits = model(x.to('cuda' if torch.cuda.is_available() else 'cpu'))
    loss = model.criterion(logits, y.to('cuda' if torch.cuda.is_available() else 'cpu'))
print("Initial loss:", loss.item())


In [None]:
#train
trainer.fit(model, train_dataloaders=train_dl, val_dataloaders=val_dl)

In [None]:
#evaluating best model
best_ckpt = ckpt_cb.best_model_path
print("Best checkpoint:", best_ckpt)

finetuned = MultiLabelFineTune.load_from_checkpoint(best_ckpt, backbone=backbone, num_classes=num_classes)
finetuned = finetuned.eval().to(device)


In [None]:
#compute metrics
import numpy as np
from sklearn.metrics import average_precision_score, f1_score, hamming_loss

dl = DataLoader(train_ds, batch_size=32, shuffle=False, num_workers=4)
all_preds, all_targets = [], []
with torch.no_grad():
    for x, y in dl:
        x = x.to(device)
        preds = torch.sigmoid(finetuned(x)).cpu().numpy()
        all_preds.append(preds)
        all_targets.append(y.numpy())
preds = np.vstack(all_preds)
targets = np.vstack(all_targets)

aps = [average_precision_score(targets[:,i], preds[:,i]) if targets[:,i].sum() else np.nan
       for i in range(targets.shape[1])]
print("Mean AP:", np.nanmean(aps))
print("Macro-F1:", f1_score(targets, (preds>=0.5).astype(int), average='macro', zero_division=0))
print("Hamming-loss:", hamming_loss(targets, (preds>=0.5).astype(int)))


In [None]:
torch.save(finetuned.state_dict(), CKPT_DIR / "finetune_best_state.pt")

In [None]:
#no class is passing 0.5 confidence hence
import matplotlib.pyplot as plt
plt.hist(preds.flatten(), bins=50)
plt.title("Distribution of predicted probabilities")
plt.xlabel("p(class)")
plt.ylabel("count")
plt.show()


In [None]:
#Confidence calibration
from sklearn.metrics import precision_recall_curve
import numpy as np

best_thresh = []
for i in range(targets.shape[1]):
    if targets[:,i].sum() == 0:
        best_thresh.append(0.5)
        continue
    p, r, t = precision_recall_curve(targets[:,i], preds[:,i])
    f1_scores = 2 * p * r / (p + r + 1e-12)
    best_idx = np.nanargmax(f1_scores)
    best_thresh.append(t[best_idx] if best_idx < len(t) else 0.5)

print("Optimal thresholds per class:")
print(best_thresh)
print("Mean threshold:", np.mean(best_thresh))

# recompute F1 with these thresholds
pred_bin_opt = (preds >= np.array(best_thresh)).astype(int)
from sklearn.metrics import f1_score
print("F1 (optimal thresholds):", f1_score(targets, pred_bin_opt, average='macro', zero_division=0))


In [None]:
#class frquencies
import pandas as pd
annot = pd.read_csv(ANNOT_CSV)
freqs = annot['labels'].str.split(';').explode().value_counts()
print(freqs)


In [None]:
#per class avg bar chart
import matplotlib.pyplot as plt
import numpy as np


from pathlib import Path
import json

BASE = Path.home() / "projects/office-ssl"
LABEL_MAP = BASE / "data/labeled/label_map.json"

with open(LABEL_MAP, "r") as f:
    label_map = json.load(f)

idx_to_name = {v:k for k,v in label_map.items()}

print("Loaded", len(label_map), "classes.")

names = list(label_map.keys())
plt.figure(figsize=(12,6))
plt.bar(np.arange(len(names)), aps)
plt.xticks(np.arange(len(names)), names, rotation=90)
plt.ylabel("Average Precision")
plt.title("Per-class AP after fine-tuning")
plt.show()


In [None]:
#Confusion-style heatmap
import seaborn as sns
from sklearn.metrics import multilabel_confusion_matrix

cms = multilabel_confusion_matrix(targets, (preds>=np.array(best_thresh)).astype(int))
mAPs = [round(a,2) if not np.isnan(a) else 0 for a in aps]
fig, axes = plt.subplots(4,5, figsize=(18,14))
for i, ax in enumerate(axes.flat):
    if i < len(names):
        cm = cms[i]
        sns.heatmap(cm, annot=True, fmt='d', cbar=False, ax=ax)
        ax.set_title(f"{names[i]} (AP={mAPs[i]})")
plt.tight_layout()
plt.show()


In [None]:
#t-SNE Visualization of embeddings
from sklearn.manifold import TSNE
import numpy as np
import matplotlib.pyplot as plt
import torch

device = next(model.parameters()).device
print("Using device:", device)

subset_idx = np.random.choice(len(train_ds), size=300, replace=False)

embeddings = []
labels = []

with torch.no_grad():
    for idx in subset_idx:
        img, label_vec = train_ds[idx]

        # Move image to GPU
        x = img.unsqueeze(0).to(device)

        # 1) Extract backbone features
        feat = model.backbone(x)

        if feat.ndim == 4:
            feat = torch.nn.functional.adaptive_avg_pool2d(feat, (1,1)).reshape(feat.size(0), -1)

        embeddings.append(feat.cpu().numpy().squeeze())

        # 2) Get classifier logits (also on GPU)
        logits = model(x)
        logits = logits.cpu().numpy().squeeze()

        # Convert multi-label → single label for visualization
        labels.append(np.argmax(logits))

embeddings = np.array(embeddings)
labels = np.array(labels)

print("Running t-SNE…")
tsne = TSNE(n_components=2, perplexity=30, learning_rate=200, n_iter=1500)
emb_2d = tsne.fit_transform(embeddings)

plt.figure(figsize=(10,10))
plt.scatter(emb_2d[:,0], emb_2d[:,1], c=labels, cmap='tab20', s=10)
plt.title("t-SNE visualization of fine-tuned embeddings")
plt.colorbar()
plt.show()