## Testing Model V1 on dataset 2

In [7]:
# === IMPORTS ===
import os
import json
import numpy as np
from tqdm.auto import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from torchvision.utils import make_grid
import os, sys

sys.path.append("../")
from models.model_v1 import get_model

from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    confusion_matrix,
)

print("Imports OK.")


Imports OK.


In [8]:
# === Config ===
DATA_DIR = 'F:\Projects\collaborative_cnn_team08'
TEST_DIR = os.path.join(DATA_DIR, 'TestDataset')
MODEL_PATH = '../models/model_v1.pth'
OUTPUT_JSON = "../results/test_v1_user2.json"
BATCH_SIZE = 32
NUM_WORKERS = 4
IMAGE_SIZE = (224, 224)

os.makedirs("../results", exist_ok=True)
print('TEST_DIR =', TEST_DIR)
print('MODEL_PATH =', MODEL_PATH)

TEST_DIR = F:\Projects\collaborative_cnn_team08\TestDataset
MODEL_PATH = ../models/model_v1.pth


In [9]:
# === LOAD TEST DATASET ===
if not os.path.isdir(TEST_DIR):
    raise RuntimeError(f"Test directory not found: {TEST_DIR}")

test_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
])

test_ds = datasets.ImageFolder(TEST_DIR, test_transform)
test_loader = DataLoader(
    test_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS
)

print("Classes:", test_ds.classes)
print("Number of test images:", len(test_ds))


Classes: ['cat', 'dog']
Number of test images: 1000


In [10]:
# === LOAD MODEL ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

num_classes = len(test_ds.classes)
model = get_model(num_classes=num_classes, device=device)

if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"Model not found at: {MODEL_PATH}")

model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model.eval()

print("Model loaded successfully.")


Using device: cpu
Model loaded successfully.


In [11]:
def compute_classification_metrics(y_true, y_pred, average="macro"):
    """
    Compute common classification metrics for classification tasks.

    Args:
        y_true (list or np.array): Ground truth labels
        y_pred (list or np.array): Predicted labels
        average (str): Averaging mode for multi-class classification.
                       Options: "macro", "micro", "weighted"

    Returns:
        dict: Accuracy, F1, Precision, Recall, Confusion matrix
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    metrics = {
        "accuracy": float(accuracy_score(y_true, y_pred)),
        "f1": float(f1_score(y_true, y_pred, average=average, zero_division=0)),
        "precision": float(precision_score(y_true, y_pred, average=average, zero_division=0)),
        "recall": float(recall_score(y_true, y_pred, average=average, zero_division=0)),
    }

    try:
        cm = confusion_matrix(y_true, y_pred).tolist()
    except Exception:
        cm = None

    metrics["confusion_matrix"] = cm

    return metrics


def save_metrics(metrics: dict, path: str):
    """
    Save a dictionary of metrics to a JSON file.

    Args:
        metrics (dict): metrics dictionary
        path (str): output JSON file path
    """
    with open(path, "w") as f:
        json.dump(metrics, f, indent=2)


In [12]:
# === EVALUATE MODEL ===
all_labels = []
all_preds = []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc="Testing"):
        imgs = imgs.to(device)
        labels = labels.to(device)

        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1)

        all_labels.extend(labels.cpu().tolist())
        all_preds.extend(preds.cpu().tolist())

# Compute metrics
metrics = compute_classification_metrics(all_labels, all_preds)
metrics["num_test_images"] = len(test_ds)
metrics["class_names"] = test_ds.classes

print("Test Metrics:", metrics)

save_metrics(metrics, OUTPUT_JSON)
print("Saved test metrics at", OUTPUT_JSON)


Testing: 100%|██████████| 32/32 [00:20<00:00,  1.54it/s]


Test Metrics: {'accuracy': 0.765, 'f1': 0.7642339962538287, 'precision': 0.7684892867708744, 'recall': 0.7649999999999999, 'confusion_matrix': [[411, 89], [146, 354]], 'num_test_images': 1000, 'class_names': ['cat', 'dog']}
Saved test metrics at ../results/test_v1_user2.json
