<a href="https://colab.research.google.com/github/Ahmed-M2020/zer0p_notebooks/blob/main/Experiment_0_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Models Evaluation

## Dependances

In [None]:
# Standard libraries
import os
from pathlib import Path
import random
import copy

# Data handling and visualization
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2
import tqdm

# PyTorch Frameworks
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

# Metrics and Evaluation
from sklearn.metrics import roc_curve, auc, confusion_matrix

# Google Colab specific
from google.colab import drive

# Setting seeds for reproducibility
torch.manual_seed(42)
# np.random_seed(42)

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
drive.mount('/content/drive')
file_path = '/content/drive/MyDrive/zerop/0_Auswertung_231115.csv'
df = pd.read_csv(file_path, sep='\t', encoding='ISO-8859-1')
df.head()
df.shape

Mounted at /content/drive


(960, 10)

In [None]:

folder_path = Path('/content/drive/MyDrive/zerop/FTI Dataset 2023')
all_image_paths = list(folder_path.rglob('*.png'))
# Extract base names from the list of full paths
base_names = [os.path.basename(path) for path in all_image_paths]
# Create a mapping of base names to full paths
path_mapping = dict(zip(base_names, all_image_paths))
columns = ['File','Feature','Label']
df2 = df.loc[:, columns]
df2['absolute_path'] = df2['File'].map(path_mapping)
df2['Label'] = df2['Label'].replace({'good': 1, 'bad': 0})
df2.head()

Unnamed: 0,File,Feature,Label,absolute_path
0,230523_Dataset01_Valeo_Nr01_0.png,dist.1,0,/content/drive/MyDrive/zerop/FTI Dataset 2023/...
1,230523_Dataset01_Valeo_Nr01_0.png,e.rought1,0,/content/drive/MyDrive/zerop/FTI Dataset 2023/...
2,230523_Dataset01_Valeo_Nr01_1.png,e.rought2,0,/content/drive/MyDrive/zerop/FTI Dataset 2023/...
3,230523_Dataset01_Valeo_Nr01_1.png,e.rought3,0,/content/drive/MyDrive/zerop/FTI Dataset 2023/...
4,230523_Dataset01_Valeo_Nr01_2.png,angle,1,/content/drive/MyDrive/zerop/FTI Dataset 2023/...


In [None]:
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        # Check if the lengths match
        if len(self.image_paths) != len(self.labels):
            raise ValueError("Length of image_paths and labels must be the same.")

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        try:
            img_path = self.image_paths[idx]
            label = self.labels[idx]
            image = Image.open(img_path).convert('L')

            if self.transform:
                image = self.transform(image)
            # label = torch.tensor(label).float().unsqueeze(0)

            return image, label
        except KeyError:
            print(f"KeyError occurred at index {idx}")
            raise

In [None]:
def split_balanced(data, target, test_size=0.20):
    np.random.seed(40)
    classes = np.unique(target)

    if test_size < 1:
        n_test = int(np.round(len(target) * test_size))
    else:
        n_test = int(test_size)
    n_train = max(0, len(target) - n_test)
    n_train_per_class = max(1, int(np.floor(n_train / len(classes))))
    n_test_per_class = max(1, int(np.floor(n_test / len(classes))))

    ixs = []
    for cl in classes:
        class_ix = np.where(target == cl)[0]
        np.random.shuffle(class_ix)  # Randomly shuffle indices of each class
        ixs.append(class_ix)

    ix_train = np.concatenate([x[:n_train_per_class] for x in ixs])
    ix_test = np.concatenate([x[n_train_per_class:(n_train_per_class + n_test_per_class)] for x in ixs])

    X_train = data.iloc[ix_train].tolist()
    X_test = data.iloc[ix_test].tolist()
    y_train = target.iloc[ix_train].tolist()
    y_test = target.iloc[ix_test].tolist()

    return X_train, X_test, y_train, y_test


In [None]:
X_train, X_valid, labels_train, labels_valid = split_balanced(df2['absolute_path'], df2['Label'])

In [None]:
train_dataset = CustomDataset(X_train, labels_train)

In [None]:
def get_mean_std(data_loader):
    sum_, squared_sum, batches = 0, 0, 0
    for data, _ in data_loader:
        transform = transforms.ToTensor()  # Converts the image to a tensor and scales pixel values to [0, 1]
        data = transform(data)
        sum_ += torch.mean(data)
        squared_sum += torch.mean(data ** 2)
        batches += 1

    mean = sum_ / batches
    std = (squared_sum / batches - mean ** 2) ** 0.5
    return mean, std

mean, std = get_mean_std(train_dataset)
mean, std

(tensor(0.6803), tensor(0.1287))

In [None]:
transform_train = transforms.Compose([
    transforms.Resize((512,512)), # 512
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    # transforms.functional.rotate(90),
    transforms.RandomRotation(degrees=(0,90)),  # Random rotation between 0 and 90 degrees
    # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    # transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10),
    # transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3)),
    # transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

transform_valid = transforms.Compose([
    transforms.Resize((512,512)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.5], std=[0.5]),
    transforms.Normalize(mean=mean, std=std),

])

In [None]:
train_dataset = CustomDataset(X_train, labels_train, transform=transform_train)
valid_dataset = CustomDataset(X_valid, labels_valid, transform=transform_valid)
batch_size = 16

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

In [None]:
train_iterator = iter(train_loader)
data = next(train_iterator)
print(data[1].shape)

torch.Size([16])


In [None]:
val_iterator = iter(valid_loader)
data = next(val_iterator)
print(data[0].shape, data[1].shape)

torch.Size([16, 1, 512, 512]) torch.Size([16])


In [None]:
model = torchvision.models.resnet18(pretrained=True)
model.conv1 = nn.Conv2d(1, 64, kernel_size=6, stride=2, padding=3, bias=True)
num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, 2)
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 1),
    nn.Sigmoid()
)
model.to(device)



ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(6, 6), stride=(2, 2), padding=(3, 3))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2):

In [None]:
# model.load_state_dict(torch.load('/content/drive/MyDrive/Results/best_model_k6_91_86.pth'))

In [None]:
# for param in model.parameters():
#     param.requires_grad = False

# # Ensure the final layer's parameters are trainable
# for param in model.fc.parameters():
#     param.requires_grad = True


In [None]:
def model_train(model, train_loader, valid_loader, device, n_epochs, learning_rate, wd):
    # Loss function and optimizer
    loss_fn = nn.MSELoss() # Binary Cross Entropy #nn.BCEWithLogitsLoss() integrated with sigmoid fun., nn.BCELoss without sigmoid fun.
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=wd)

    # Scheduler
    # scheduler = lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.1)

    # Hold the best model
    best_acc = -np.inf  # Init to negative infinity
    best_weights = None

    for epoch in range(n_epochs):
        model.train()
        running_loss = 0.0
        total_train = 0
        correct_train = 0
        with tqdm.tqdm(enumerate(train_loader), total=len(train_loader), unit="batch") as bar:
            bar.set_description(f"Epoch {epoch + 1}/{n_epochs}")
            for batch_idx, (imgs, labels) in bar:
                imgs, labels = imgs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(imgs)
                # print(outputs, labels)
                loss = loss_fn(outputs.squeeze(), labels.float())
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                preds = (outputs.squeeze() >= 0.5).float()
                correct_train += (preds == labels).sum().item()
                total_train += labels.size(0)
                train_acc = correct_train / total_train

                # Retrieve the current learning rate and weight decay from the optimizer
                # current_lr = optimizer.param_groups[0]['lr']
                # current_weight_decay = optimizer.param_groups[0]['weight_decay']
                # Update the progress bar
                bar.set_postfix(
                    loss=running_loss / (batch_idx + 1),
                    acc=train_acc,
                    # lr=current_lr,
                    # weight_decay=current_weight_decay
                )

            # Step the scheduler
            # scheduler.step()

        # Validation
        model.eval()
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for imgs, labels in valid_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)

                preds = (outputs.squeeze() >= 0.5).float()
                correct_val += (preds == labels).sum().item()
                total_val += labels.size(0)

        val_acc = correct_val / total_val
        print(f"Epoch {epoch + 1} Validation Accuracy: {val_acc * 100:.2f}%")

        if val_acc > best_acc and train_acc > 0.82:
            best_acc = val_acc
            best_weights = copy.deepcopy(model.state_dict())
            print(f"Best accuracy ({best_acc * 100:.2f}%)")

    # Return best accuracy and weights
    return best_acc, best_weights

In [None]:
# Train the model
best_acc, best_weights = model_train(model, train_loader, valid_loader, device, n_epochs=14, learning_rate=0.00036, wd= 1e-3)
if best_acc > 0.86:
    accuracy_str = f'{best_acc * 100:.2f}'.replace('.', '_')
    save_path = f'/content/drive/MyDrive/Results/saved_models/best_model_{accuracy_str}.pth'
    torch.save(best_weights , save_path)
    # model.load_state_dict(best_weights)
    print(f"Saved the Best Accuracy: {best_acc * 100:.2f}%")

Epoch 1/14: 100%|██████████| 48/48 [01:35<00:00,  1.98s/batch, acc=0.812, loss=0.128]


Epoch 1 Validation Accuracy: 86.09%


Epoch 2/14: 100%|██████████| 48/48 [01:33<00:00,  1.95s/batch, acc=0.828, loss=0.123]


Epoch 2 Validation Accuracy: 68.70%
Best accuracy (68.70%)


Epoch 3/14: 100%|██████████| 48/48 [01:34<00:00,  1.97s/batch, acc=0.824, loss=0.122]


Epoch 3 Validation Accuracy: 78.26%
Best accuracy (78.26%)


Epoch 4/14: 100%|██████████| 48/48 [01:33<00:00,  1.95s/batch, acc=0.836, loss=0.12]


Epoch 4 Validation Accuracy: 36.52%


Epoch 5/14: 100%|██████████| 48/48 [01:33<00:00,  1.95s/batch, acc=0.81, loss=0.127]


Epoch 5 Validation Accuracy: 86.09%


Epoch 6/14: 100%|██████████| 48/48 [01:34<00:00,  1.97s/batch, acc=0.837, loss=0.124]


Epoch 6 Validation Accuracy: 81.74%
Best accuracy (81.74%)


Epoch 7/14: 100%|██████████| 48/48 [01:35<00:00,  1.99s/batch, acc=0.831, loss=0.124]


Epoch 7 Validation Accuracy: 84.35%
Best accuracy (84.35%)


Epoch 8/14: 100%|██████████| 48/48 [01:33<00:00,  1.94s/batch, acc=0.853, loss=0.115]


Epoch 8 Validation Accuracy: 69.57%


Epoch 9/14: 100%|██████████| 48/48 [01:34<00:00,  1.97s/batch, acc=0.836, loss=0.121]


Epoch 9 Validation Accuracy: 84.35%


Epoch 10/14: 100%|██████████| 48/48 [01:34<00:00,  1.97s/batch, acc=0.826, loss=0.13]


Epoch 10 Validation Accuracy: 75.65%


Epoch 11/14: 100%|██████████| 48/48 [01:34<00:00,  1.96s/batch, acc=0.862, loss=0.108]


Epoch 11 Validation Accuracy: 83.48%


Epoch 12/14: 100%|██████████| 48/48 [01:35<00:00,  1.98s/batch, acc=0.842, loss=0.123]


Epoch 12 Validation Accuracy: 73.91%


Epoch 13/14: 100%|██████████| 48/48 [01:35<00:00,  1.99s/batch, acc=0.855, loss=0.11]


Epoch 13 Validation Accuracy: 73.91%


Epoch 14/14: 100%|██████████| 48/48 [01:36<00:00,  2.01s/batch, acc=0.833, loss=0.12]


Epoch 14 Validation Accuracy: 71.30%


In [None]:
trained_model_state_dict = torch.load('/content/drive/MyDrive/Results/saved_models/best_model_100_00.pth', map_location=device)
model.load_state_dict(trained_model_state_dict)
model.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(6, 6), stride=(2, 2), padding=(3, 3))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2):

In [None]:
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, features, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.features = features
        self.transform = transform

        if len(self.image_paths) != len(self.labels):
            raise ValueError("Length of image_paths and labels must be the same.")

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        try:
            img_path = self.image_paths[idx]
            label = self.labels[idx]
            features = self.features[idx]
            image = Image.open(img_path).convert('L')

            if self.transform:
                image = self.transform(image)
            # label = torch.tensor(label).float().unsqueeze(0)

            return image, label, features
        except KeyError:
            print(f"KeyError occurred at index {idx}")
            raise

In [None]:
_dataset = CustomDataset(df2['absolute_path'], df2['Label'], df2['Feature'], transform=transform_valid)
_loader = DataLoader(_dataset, batch_size=16, shuffle=False)

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for images, labels, _ in _loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        preds = (outputs.squeeze() >= 0.5).float()
        # _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

overall_accuracy = 100 * correct / total

In [None]:
overall_accuracy

77.29166666666667