<div class="alert alert-block alert-info">

----------
---------
# <b> ResNet-50 Fine-tuning for Corrosion Classification</b> 

--------------
----------------
</div>


This notebook fine-tunes a pretrained ResNet-50 model for binary corrosion classification.
A fixed train, validation, and test split is used to ensure reproducibility and fair comparison with baseline model.

<div class="alert alert-block alert-info">

----------
---------
# <b> 1. Imports and Reproducibility</b> 

--------------
----------------
</div>

In [None]:
import os
import time
import json
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib
import matplotlib.pyplot as plt

from pathlib import Path
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, models, transforms
from PIL import ImageFile

from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    ConfusionMatrixDisplay,
    classification_report
)

ImageFile.LOAD_TRUNCATED_IMAGES = True

seed = 42 # Reproducibility 
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)


<div class="alert alert-block alert-info">

----------
---------
# <b> 2. Paths,hyperparameters and Device Setup</b> 

--------------
----------------
</div>

In [None]:
DATA_DIR = Path("/home/shared-data/corrosion_images") # Paths 

PROJECT_DIR = Path("/home/javid/corrosion-detector-submission")
SPLIT_PATH  = PROJECT_DIR / "data" / "fixed_split_indices.json"
MODELS_DIR  = PROJECT_DIR / "models"
RESULTS_DIR = PROJECT_DIR / "results"

MODELS_DIR.mkdir(parents=True, exist_ok=True)
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

MODEL_PATH = MODELS_DIR / f"resnet50_finetuned_seed{seed}_best.pth"

BATCH_SIZE = 64 #Training parameters 
EPOCHS = 40
PATIENCE = 7
LR = 1e-4
DROPOUT_P = 0.2


DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")


Using device: cuda


<div class="alert alert-block alert-info">

----------
---------
# <b> 3. Image Transformations</b> 

--------------
----------------
</div>

In [18]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


<div class="alert alert-block alert-info">

----------
---------
# <b> 4. Dataset Loading and Splitting</b> 

--------------
----------------
</div>

##### **_4.1 Loading Dataset_**

In [19]:
print("Loading dataset...")
full_dataset = datasets.ImageFolder(DATA_DIR, transform=transform)
class_names = full_dataset.classes

print(f"Total images: {len(full_dataset)}")
print(f"Classes: {class_names}")



Loading dataset...
Total images: 3999
Classes: ['corrosion', 'no_corrosion']


##### **_4.2 Loading Fixed Train / Val / Test Split_**

In [20]:
with open(SPLIT_PATH, "r") as f:
    split = json.load(f)

train_indices = split["train"]
val_indices   = split["val"]
test_indices  = split["test"]

print(
    f"Fixed splits â†’ "
    f"Train: {len(train_indices)}, "
    f"Val: {len(val_indices)}, "
    f"Test: {len(test_indices)}"
)


Fixed splits â†’ Train: 3199, Val: 400, Test: 400


##### **_4.3 Creating dataset subsets_**


In [21]:
train_ds = Subset(full_dataset, train_indices)
val_ds   = Subset(full_dataset, val_indices)
test_ds  = Subset(full_dataset, test_indices)


<div class="alert alert-block alert-info">

----------
---------
# <b> 5. DataLoaders</b> 

--------------
----------------
</div>

In [22]:
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

print("Dataloaders ready.")


Dataloaders ready.


<div class="alert alert-block alert-info">

----------
---------
# <b> 6. Model Definition (ResNet-50)</b> 

--------------
----------------
</div>

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model = models.resnet50(
    weights=models.ResNet50_Weights.IMAGENET1K_V2
)

num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Dropout(DROPOUT_P),
    nn.Linear(num_features, 2)
)

for name, param in model.named_parameters(): # Freezing backbone except last blocks
    if "layer3" in name or "layer4" in name or "fc" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

model.to(DEVICE)

print("Model loaded and configured.")


Using device: cuda
Model loaded and configured.


<div class="alert alert-block alert-info">

----------
---------
# <b> 7. Loss function and optimizer</b> 

--------------
----------------
</div>

In [24]:
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=LR
)

history = {
    "epoch": [],
    "train_loss": [],
    "val_loss": [],
    "train_acc": [],
    "val_acc": []
}


<div class="alert alert-block alert-info">

----------
---------
# <b> 8. Training Loop with Early Stopping</b> 

--------------
----------------
</div>

In [25]:
best_val_acc = 0.0
patience_left = PATIENCE

start_time = time.time()

for epoch in range(EPOCHS):
    model.train()
    correct, total, loss_sum = 0, 0, 0.0

    for x, y in train_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)

        outputs = model(x)
        loss = criterion(outputs, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_sum += loss.item() * x.size(0)
        correct += (outputs.argmax(1) == y).sum().item()
        total += y.size(0)

    train_acc = correct / total
    train_loss = loss_sum / total

    model.eval()
    v_correct, v_total, v_loss_sum = 0, 0, 0.0

    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            outputs = model(x)
            loss = criterion(outputs, y)

            v_loss_sum += loss.item() * x.size(0)
            v_correct += (outputs.argmax(1) == y).sum().item()
            v_total += y.size(0)

    val_acc = v_correct / v_total
    val_loss = v_loss_sum / v_total

    history["epoch"].append(epoch + 1)
    history["train_loss"].append(train_loss)
    history["val_loss"].append(val_loss)
    history["train_acc"].append(train_acc)
    history["val_acc"].append(val_acc)

    print(
        f"Epoch {epoch+1:02d} | "
        f"Train Acc: {train_acc*100:.2f}% | "
        f"Val Acc: {val_acc*100:.2f}%"
    )

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        patience_left = PATIENCE
        torch.save(model.state_dict(), MODEL_PATH)
        print(" Model improved and saved.")
    else:
        patience_left -= 1
        print(f"No improvement. Patience left: {patience_left}")

        if patience_left == 0:
            print("Early stopping triggered.")
            break


Epoch 01 | Train Acc: 88.34% | Val Acc: 97.75%
 Model improved and saved.
Epoch 02 | Train Acc: 99.06% | Val Acc: 97.75%
No improvement. Patience left: 6
Epoch 03 | Train Acc: 99.78% | Val Acc: 98.75%
 Model improved and saved.
Epoch 04 | Train Acc: 99.91% | Val Acc: 98.25%
No improvement. Patience left: 6
Epoch 05 | Train Acc: 99.91% | Val Acc: 98.25%
No improvement. Patience left: 5
Epoch 06 | Train Acc: 99.84% | Val Acc: 98.75%
No improvement. Patience left: 4
Epoch 07 | Train Acc: 100.00% | Val Acc: 98.75%
No improvement. Patience left: 3
Epoch 08 | Train Acc: 100.00% | Val Acc: 99.00%
 Model improved and saved.
Epoch 09 | Train Acc: 100.00% | Val Acc: 99.00%
No improvement. Patience left: 6
Epoch 10 | Train Acc: 99.91% | Val Acc: 97.75%
No improvement. Patience left: 5
Epoch 11 | Train Acc: 100.00% | Val Acc: 98.25%
No improvement. Patience left: 4
Epoch 12 | Train Acc: 99.94% | Val Acc: 99.00%
No improvement. Patience left: 3
Epoch 13 | Train Acc: 100.00% | Val Acc: 98.75%
No imp

##### **_8.1 Save Training History_**


In [26]:
history_df = pd.DataFrame(history)
history_path = RESULTS_DIR / f"resnet50_finetuned_seed{seed}_training_history.csv"
history_df.to_csv(history_path, index=False)

print(f"Training history saved to: {history_path}")


Training history saved to: /home/javid/corrosion-detector-submission/results/resnet50_finetuned_seed42_training_history.csv


##### **_8.2 Plotting Training & Validation Loss Curves_**


In [35]:
plt.figure()
plt.plot(history["epoch"], history["train_loss"], label="Train Loss")
plt.plot(history["epoch"], history["val_loss"], label="Val Loss")
plt.legend()
plt.savefig(RESULTS_DIR / f"resnet50_finetuned_seed{seed}_loss_curve.png", dpi=300)
plt.close()




In [37]:
import pandas as pd
import matplotlib.pyplot as plt

history_path = RESULTS_DIR / "resnet50_finetuned_seed42_training_history.csv"

history_df = pd.read_csv(history_path)

plt.figure()
plt.plot(history_df["epoch"], history_df["train_loss"], label="Train Loss")
plt.plot(history_df["epoch"], history_df["val_loss"], label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and Validation Loss")
plt.legend()
plt.grid(True)
plt.savefig(RESULTS_DIR / f"resnet50_finetuned_seed{seed}_loss_curve.png", dpi=300)
plt.close()



##### **_8.3 Plotting Training & validation accuracy plot_**


In [39]:
plt.figure()
plt.plot(history["epoch"], history["train_acc"], label="Train Accuracy")
plt.plot(history["epoch"], history["val_acc"], label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training and Validation Accuracy")  # ðŸ‘ˆ this was missing
plt.legend()
plt.grid(True)
plt.savefig(
    RESULTS_DIR / f"resnet50_finetuned_seed{seed}_accuracy_curve.png",
    dpi=300,
    bbox_inches="tight"
)
plt.close()



<div class="alert alert-block alert-info">

----------
---------
# <b> 9. Test Evaluation and Confusion Matrix</b> 

--------------
----------------
</div>

##### **_9.1 Loadiing best model and collecting the predictions_**


In [29]:
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.eval()

y_true, y_pred = [], []

with torch.no_grad():
    for x, y in test_loader:
        x = x.to(DEVICE)
        outputs = model(x)
        preds = outputs.argmax(1).cpu().numpy()

        y_true.extend(y.numpy())
        y_pred.extend(preds)

print("Predictions collected for test set.")


Predictions collected for test set.


##### **_9.2 Test Accuracy + Precision / Recall / F1-score_**


In [30]:
test_acc = accuracy_score(y_true, y_pred) * 100
print(f"Test Accuracy: {test_acc:.2f}%\n")

print("Classification Report:")
print(
    classification_report(
        y_true,
        y_pred,
        target_names=class_names,
        digits=4
    )
)


Test Accuracy: 98.25%

Classification Report:
              precision    recall  f1-score   support

   corrosion     0.9793    0.9844    0.9818       192
no_corrosion     0.9855    0.9808    0.9831       208

    accuracy                         0.9825       400
   macro avg     0.9824    0.9826    0.9825       400
weighted avg     0.9825    0.9825    0.9825       400



##### **_9.3: Confusion Matrix_**


In [31]:
cm = confusion_matrix(y_true, y_pred)

disp = ConfusionMatrixDisplay(
    confusion_matrix=cm,
    display_labels=class_names
)

disp.plot(cmap="Blues")
plt.title("Confusion Matrix â€“ Test Set")

cm_path = f"{RESULTS_DIR}/resnet50_finetuned_seed{seed}_confusion_matrix.png"
plt.savefig(cm_path, dpi=300, bbox_inches="tight")
plt.show()

print(f"Confusion matrix saved to: {cm_path}")


Confusion matrix saved to: /home/javid/corrosion-detector-submission/results/resnet50_finetuned_seed42_confusion_matrix.png
