In [28]:
# ================================================================
# Manual Evaluation Script for DepthNet
# ================================================================
import os
import torch
import torch.nn.functional as F
import numpy as np
from torchvision import transforms
from PIL import Image
from skimage.metrics import structural_similarity as ssim
import matplotlib.pyplot as plt
from tqdm import tqdm
import json

from models.model import DepthNet

# ------------------------------------------------------------
# Config
# ------------------------------------------------------------
IMG_DIR = "data/train/image"        # source images
DEPTH_DIR = "data/train/depth"      # ground-truth depth maps
CHECKPOINT = "outputs/checkpoints/depthnet_epoch20.pth"

os.makedirs("outputs/plots", exist_ok=True)

# ------------------------------------------------------------
# Helper functions
# ------------------------------------------------------------
def load_image_pairs(img_dir, depth_dir):
    img_files = sorted([f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.png'))])
    depth_files = sorted([f for f in os.listdir(depth_dir) if f.endswith('.png')])

    # Match files by name
    common = list(set(os.path.splitext(f)[0] for f in img_files) &
                  set(os.path.splitext(f)[0] for f in depth_files))

    image_paths = [os.path.join(img_dir, f"{n}.jpg") if os.path.exists(os.path.join(img_dir, f"{n}.jpg"))
                   else os.path.join(img_dir, f"{n}.png") for n in common]
    depth_paths = [os.path.join(depth_dir, f"{n}.png") for n in common]

    return image_paths, depth_paths


def compute_metrics(pred, gt):
    pred = pred.cpu().numpy().squeeze()
    gt = gt.cpu().numpy().squeeze()
    mae = np.mean(np.abs(pred - gt))
    mse = np.mean((pred - gt) ** 2)
    rmse = np.sqrt(mse)
    ssim_val = ssim(pred, gt, data_range=gt.max() - gt.min())
    return mae, mse, rmse, ssim_val


# ------------------------------------------------------------
# Load model
# ------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DepthNet().to(device)
model.load_state_dict(torch.load(CHECKPOINT, map_location=device, weights_only=True))
model.eval()
print(f"âœ… Loaded model from {CHECKPOINT}")

# ------------------------------------------------------------
# Load images
# ------------------------------------------------------------
transform = transforms.Compose([
    transforms.ToTensor()
])

image_paths, depth_paths = load_image_pairs(IMG_DIR, DEPTH_DIR)
print(f"[INFO] Found {len(image_paths)} imageâ€“depth pairs for evaluation.")

# Use a subset if dataset is huge
image_paths, depth_paths = image_paths[:50], depth_paths[:50]

# ------------------------------------------------------------
# Evaluation
# ------------------------------------------------------------
mae_list, mse_list, rmse_list, ssim_list = [], [], [], []

for i, (img_path, depth_path) in enumerate(tqdm(zip(image_paths, depth_paths), total=len(image_paths), desc="Evaluating")):
    img = transform(Image.open(img_path).convert("RGB")).unsqueeze(0).to(device)
    depth_gt = transform(Image.open(depth_path).convert("L")).to(device)

    with torch.no_grad():
        depth_pred = model(img)

    mae, mse, rmse, ssim_val = compute_metrics(depth_pred, depth_gt)
    mae_list.append(mae)
    mse_list.append(mse)
    rmse_list.append(rmse)
    ssim_list.append(ssim_val)

    # Save visualization for first few
    if i < 10:
        fig, axes = plt.subplots(1, 3, figsize=(12, 4))
        axes[0].imshow(img[0].permute(1, 2, 0).cpu())
        axes[0].set_title("Input Image"); axes[0].axis("off")

        axes[1].imshow(depth_gt[0].squeeze().cpu(), cmap="plasma")
        axes[1].set_title("Ground Truth"); axes[1].axis("off")

        axes[2].imshow(depth_pred[0].squeeze().cpu(), cmap="plasma")
        axes[2].set_title("Predicted Depth"); axes[2].axis("off")

        plt.tight_layout()
        plt.savefig(f"outputs/plots/manual_test_{i}.png")
        plt.close()

# ------------------------------------------------------------
# Save metrics
# ------------------------------------------------------------
metrics = {
    "MAE": float(np.mean(mae_list)),
    "MSE": float(np.mean(mse_list)),
    "RMSE": float(np.mean(rmse_list)),
    "SSIM": float(np.mean(ssim_list)),
}

with open("outputs/manual_metrics.json", "w") as f:
    json.dump(metrics, f, indent=4)

print("\nðŸ“Š Evaluation Results:")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

print("\nâœ… Plots saved to outputs/plots/")
print("âœ… Metrics saved to outputs/manual_metrics.json")


âœ… Loaded model from outputs/checkpoints/depthnet_epoch20.pth
[INFO] Found 250 imageâ€“depth pairs for evaluation.


Evaluating: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 50/50 [00:07<00:00,  6.46it/s]


ðŸ“Š Evaluation Results:
MAE: 0.2394
MSE: 0.1557
RMSE: 0.3938
SSIM: 0.2280

âœ… Plots saved to outputs/plots/
âœ… Metrics saved to outputs/manual_metrics.json





In [29]:
# ================================================================
# ðŸ“ˆ Plot and Save Evaluation Metrics
# ================================================================
import matplotlib.pyplot as plt

# Make sure outputs/plots exists
os.makedirs("outputs/plots", exist_ok=True)

# Metrics dictionary (already computed)
metrics = {
    "MAE": 0.2394,
    "MSE": 0.1557,
    "RMSE": 0.3938,
    "SSIM": 0.2280
}

# Create bar chart
plt.figure(figsize=(6, 4))
plt.bar(metrics.keys(), metrics.values(), color=['#4c72b0', '#55a868', '#c44e52', '#8172b2'])
plt.title("Depth Estimation Performance Metrics", fontsize=14)
plt.ylabel("Score", fontsize=12)
plt.ylim(0, max(metrics.values()) * 1.2)

# Annotate bars
for key, value in metrics.items():
    plt.text(key, value + 0.02, f"{value:.3f}", ha='center', fontsize=10)

# Save to outputs/plots
save_path = "outputs/plots/metrics_plot.png"
plt.tight_layout()
plt.savefig(save_path)
plt.close()

print(f"âœ… Metrics plot saved at: {save_path}")


âœ… Metrics plot saved at: outputs/plots/metrics_plot.png
