# Use-case Evaluation: Fine-tuned ResNet-50

This notebook evaluates the best-performing fine-tuned ResNet-50 model on an independent
use-case dataset that was not used during training, validation, or model selection.
The goal is to assess model generalization under realistic conditions.


Imports

In [30]:
import json
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

from pathlib import Path
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from PIL import ImageFile

from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report
)

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

ImageFile.LOAD_TRUNCATED_IMAGES = True


Paths & device

Paths & model loading

In [31]:
# ---- Paths ----
USECASE_DIR = Path("/home/shared-data/use_case_imgs")  # ðŸ‘ˆ adjust if needed

PROJECT_DIR = Path("/home/javid/corrosion-detector-submission")
MODEL_PATH  = PROJECT_DIR / "models" / "resnet50_finetuned_seed42_best.pth"
RESULTS_DIR = PROJECT_DIR / "results"

RESULTS_DIR.mkdir(parents=True, exist_ok=True)

# ---- Device ----
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")


Using device: cuda


Transforms (MUST match training)

In [32]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


Load use-case dataset

In [33]:
usecase_dataset = datasets.ImageFolder(USECASE_DIR, transform=transform)
usecase_loader = DataLoader(
    usecase_dataset,
    batch_size=32,
    shuffle=False
)

class_names = usecase_dataset.classes

print(f"Use-case images: {len(usecase_dataset)}")
print(f"Classes: {class_names}")


Use-case images: 73
Classes: ['corrosion', 'non_corrosion']


Load best fine-tuned model

In [34]:
from torchvision import models
import torch.nn as nn

model = models.resnet50(weights=None)

num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Dropout(p=0.2),   # MUST match training
    nn.Linear(num_features, 2)
)

model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.to(DEVICE)
model.eval()

print("Best fine-tuned model loaded successfully.")


Best fine-tuned model loaded successfully.


7 â€” Collect predictions

In [35]:
all_labels = []
all_preds = []

with torch.no_grad():
    for x, y in usecase_loader:
        x = x.to(DEVICE)
        outputs = model(x)
        preds = outputs.argmax(1).cpu().numpy()

        all_labels.extend(y.numpy())
        all_preds.extend(preds)

print("Predictions collected for use-case dataset.")


Predictions collected for use-case dataset.


Save predictions to CSV

In [36]:
df_preds = pd.DataFrame({
    "true_label": all_labels,
    "predicted_label": all_preds
})

csv_path = RESULTS_DIR / "usecase_predictions_resnet50_finetuned.csv"
df_preds.to_csv(csv_path, index=False)

print(f"Use-case predictions saved to: {csv_path}")


Use-case predictions saved to: /home/javid/corrosion-detector-submission/results/usecase_predictions_resnet50_finetuned.csv


8 â€” Metrics & clean output

Accuracy

In [37]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(all_labels, all_preds)

print("====================")
print("   USE-CASE TEST ACCURACY")
print("====================")
print(f"Accuracy: {accuracy*100:.2f}%")
print("====================")


   USE-CASE TEST ACCURACY
Accuracy: 72.60%


8.2 â€” Classification report

In [38]:
from sklearn.metrics import classification_report

print("====================")
print("   CLASSIFICATION REPORT (USE-CASE TEST SET)")
print("====================")

print(
    classification_report(
        all_labels,
        all_preds,
        target_names=class_names,
        digits=4
    )
)


   CLASSIFICATION REPORT (USE-CASE TEST SET)
               precision    recall  f1-score   support

    corrosion     0.9524    0.6897    0.8000        58
non_corrosion     0.4194    0.8667    0.5652        15

     accuracy                         0.7260        73
    macro avg     0.6859    0.7782    0.6826        73
 weighted avg     0.8429    0.7260    0.7518        73



8.3 â€” Confusion matrix

In [39]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

cm = confusion_matrix(all_labels, all_preds)

disp = ConfusionMatrixDisplay(
    confusion_matrix=cm,
    display_labels=class_names
)

disp.plot(cmap="Blues")
plt.title("Confusion Matrix â€“ Use-case Test Set")

cm_path = RESULTS_DIR / "usecase_confusion_matrix_resnet50_finetuned.png"
plt.savefig(cm_path, dpi=300, bbox_inches="tight")
plt.show()

print(f"Confusion matrix saved to: {cm_path}")


Confusion matrix saved to: /home/javid/corrosion-detector-submission/results/usecase_confusion_matrix_resnet50_finetuned.png
