## Run Next Cell only to create a groundtruth csv file from the testing set

In [33]:
# Script to create a CSV file with image filenames, labels, and paths from a dataset organized in class-specific folders.

import os
import csv
from pathlib import Path

# Root folder where cat/ and dog/ exist
DATA_ROOT = Path("/media/veer/Data/Projects/collaborative_cnn_team08/data/Data2/test_set")   # change this if needed

# Output CSV file
CSV_PATH = Path("../data/Data2/test_labels.csv")

rows = []

# Loop through class folders (e.g., cat, dog)
for class_name in ["cats", "dogs"]:
    class_path = DATA_ROOT / class_name

    # Make sure folder exists
    if not class_path.exists():
        print(f"Folder not found: {class_path}")
        continue

    # Loop through files inside class folder
    for filename in os.listdir(class_path):
        if filename.lower().endswith((".jpg", ".png", ".jpeg")):
            # save filename and label and image path
            rows.append([filename, class_name, str(class_path / filename)])

# Write to CSV
with open(CSV_PATH, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(["filename", "label", "image_path"])  # header
    csvwriter.writerows(rows)




# Testing the prediction and ground truth with random sampling

In [None]:
# =========================
# Cell 1: Imports & Config
# =========================

import os
import sys
from pathlib import Path
from time import time

import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report

sys.path.append("../")
from models.model_v2 import CustomCNN
from utils.metrics import compute_metrics, print_metrics, save_metrics, load_metrics

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

PROJECT_ROOT = Path("../").resolve()

CSV_PATH = PROJECT_ROOT / "data" / "Data2" / "test_labels.csv"
MODEL_WEIGHTS_PATH = PROJECT_ROOT / "models" / "model_v2.pth"

BATCH_SIZE = 32
IMAGE_SIZE = 224

label_to_idx = {
    "cats": 0,
    "dogs": 1,
}
idx_to_label = {v: k for k, v in label_to_idx.items()}


Using device: cuda


In [None]:
# =========================
# Cell 2: Transform & Dataset
# =========================

val_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Load CSV
df = pd.read_csv(CSV_PATH)
print("Total samples in CSV:", len(df))
print(df.head())

# Shuffle rows
df = df.sample(frac=1.0, random_state=42).reset_index(drop=True)
print("After shuffling:")
print(df.head())


class CSVDataset(Dataset):
    def __init__(self, dataframe, transform=None, label_to_idx=None):
        self.df = dataframe
        self.transform = transform
        self.label_to_idx = label_to_idx

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = row["image_path"]
        label_str = row["label"]

        # convert label string to index
        if self.label_to_idx is not None:
            label = self.label_to_idx[label_str]
        else:
            label = int(label_str)

        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label, img_path


test_dataset = CSVDataset(df, transform=val_transform, label_to_idx=label_to_idx)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("Test samples:", len(test_dataset))


Total samples in CSV: 2023
       filename label                                         image_path
0  cat.4301.jpg  cats  /media/veer/Data/Projects/collaborative_cnn_te...
1  cat.4606.jpg  cats  /media/veer/Data/Projects/collaborative_cnn_te...
2  cat.4980.jpg  cats  /media/veer/Data/Projects/collaborative_cnn_te...
3  cat.4981.jpg  cats  /media/veer/Data/Projects/collaborative_cnn_te...
4  cat.4982.jpg  cats  /media/veer/Data/Projects/collaborative_cnn_te...
After shuffling:
       filename label                                         image_path
0  cat.4642.jpg  cats  /media/veer/Data/Projects/collaborative_cnn_te...
1  cat.4945.jpg  cats  /media/veer/Data/Projects/collaborative_cnn_te...
2  cat.4690.jpg  cats  /media/veer/Data/Projects/collaborative_cnn_te...
3  cat.4573.jpg  cats  /media/veer/Data/Projects/collaborative_cnn_te...
4  cat.4558.jpg  cats  /media/veer/Data/Projects/collaborative_cnn_te...
Test samples: 2023


In [30]:
# Load model
model = CustomCNN(num_classes=2).to(DEVICE)
model.load_state_dict(torch.load(MODEL_WEIGHTS_PATH, map_location=DEVICE))
model.eval()

print("Model loaded successfully.")


Model loaded successfully.


In [36]:
# =========================
# Cell 3: Load Model & Weights
# =========================

num_classes = len(label_to_idx)

model = CustomCNN(num_classes=num_classes).to(DEVICE)

if MODEL_WEIGHTS_PATH.exists():
    state_dict = torch.load(MODEL_WEIGHTS_PATH, map_location=DEVICE)
    model.load_state_dict(state_dict)
    print(f"Loaded weights from {MODEL_WEIGHTS_PATH}")
else:
    raise FileNotFoundError(f"Model weights not found at {MODEL_WEIGHTS_PATH}")

model.eval()


Loaded weights from /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth


CustomCNN(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (global_pool): AdaptiveAvgPool2d(output_size=(1, 1))
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
   

In [37]:
# =========================
# Cell 4: Inference on Test CSV
# =========================

all_labels = []
all_preds = []
all_paths = []

start_time = time()

with torch.no_grad():
    for images, labels, paths in test_loader:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)

        all_labels.extend(labels.cpu().tolist())
        all_preds.extend(preds.cpu().tolist())
        all_paths.extend(paths)

total_time = time() - start_time
print(f"Inference completed in {total_time:.2f} seconds.")


Inference completed in 9.50 seconds.


In [38]:
# =========================
# Cell 5: Metrics & Report
# =========================

acc = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average="macro")
cm = confusion_matrix(all_labels, all_preds)

print(f"Test Accuracy: {acc:.4f}")
print(f"Test F1 (macro): {f1:.4f}")
print("Confusion Matrix (rows=true, cols=pred):")
print(cm)

target_names = [idx_to_label[i] for i in range(len(idx_to_label))]
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=target_names))


Test Accuracy: 0.8552
Test F1 (macro): 0.8549
Confusion Matrix (rows=true, cols=pred):
[[825 186]
 [107 905]]

Classification Report:
              precision    recall  f1-score   support

        cats       0.89      0.82      0.85      1011
        dogs       0.83      0.89      0.86      1012

    accuracy                           0.86      2023
   macro avg       0.86      0.86      0.85      2023
weighted avg       0.86      0.86      0.85      2023



In [None]:
# =========================
# Cell 6: Save results in json with accuracy, f1 score, confusion matrix
# =========================

import json
results = {
    "accuracy": acc,
    "f1_macro": f1,
    "confusion_matrix": cm.tolist(),
    "classification_report": classification_report(all_labels, all_preds, target_names=target_names, output_dict=True),
    "number_of_samples": len(all_labels),
}

RESULTS_PATH = PROJECT_ROOT / "results" / "test_results_v2.json"
with open(RESULTS_PATH, 'w') as f:
    json.dump(results, f, indent=4)
print(f"Results saved to {RESULTS_PATH}")



Results saved to /media/veer/Data/Projects/collaborative_cnn_team08/results/test_results_v2.json
