In [8]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Import Libraries and Data

In [14]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import glob
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, utils, models


import cv2
import plotly.subplots as sp
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import precision_score, recall_score, f1_score, precision_recall_curve
# Training and validation phases are omitted for brevity
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score
# Check if GPU is available and if not, use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [3]:
path = '/content/drive/Shareddrives/CS260cProject/chest_xray'
# train directory
train_folder=path+"/train/"
train_normal_dir=train_folder+"NORMAL/"
train_pneu_dir=train_folder+"PNEUMONIA/"
# test directory
test_folder=path+"/test/"
test_normal_dir=test_folder+"NORMAL/"
test_pneu_dir=test_folder+"PNEUMONIA/"
# validation directory
val_folder=path+"/val/"
val_normal_dir=val_folder+"NORMAL/"
val_pneu_dir=val_folder+"PNEUMONIA/"


In [4]:
# Train Dataset
train_class_names=os.listdir(train_folder)
print("Train class names: %s" % (train_class_names))
# print("\n")

# Validation Dataset
val_class_names=os.listdir(val_folder)
print("Validation class names: %s" % (val_class_names))

# Test Dataset
test_class_names=os.listdir(test_folder)
print("Test class names: %s" % (test_class_names))
# print("\n")

Train class names: ['.DS_Store', 'PNEUMONIA', 'NORMAL']
Validation class names: ['.DS_Store', 'NORMAL', 'PNEUMONIA']
Test class names: ['.DS_Store', 'NORMAL', 'PNEUMONIA']


# Data Extraction

In [11]:
# Data augmentation and normalization for training
# Just normalization for validation
# data_transforms = {
#     'train': transforms.Compose([
#         transforms.RandomResizedCrop(224),
#         transforms.RandomHorizontalFlip(),
#         transforms.ToTensor(),
#         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#     ]),
#     'val': transforms.Compose([
#         transforms.Resize(256),
#         transforms.CenterCrop(224),
#         transforms.ToTensor(),
#         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#     ]),
#     'test': transforms.Compose([
#         transforms.Resize(256),
#         transforms.CenterCrop(224),
#         transforms.ToTensor(),
#         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#     ])
# }

# data_dir = path# Your dataset path
# image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val', 'test']}
# dataloaders = {x: DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4) for x in ['train', 'val', 'test']}
# dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
# class_names = image_datasets['train'].classes

from torch.utils.data import WeightedRandomSampler

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

data_dir = path 
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val', 'test']}

class_sample_counts = np.bincount(image_datasets['train'].targets)
class_weights = 1. / torch.tensor(class_sample_counts, dtype=torch.float)
class_weights_normalized = class_weights / class_weights.sum()
samples_weights = class_weights_normalized[image_datasets['train'].targets]
sampler = WeightedRandomSampler(weights=samples_weights, num_samples=len(samples_weights), replacement=True)

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=4, sampler=sampler, num_workers=4),
    'val': DataLoader(image_datasets['val'], batch_size=4, shuffle=True, num_workers=4),
    'test': DataLoader(image_datasets['test'], batch_size=4, shuffle=True, num_workers=4)
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

In [None]:
class_names

['NORMAL', 'PNEUMONIA']

In [None]:
class_names

['NORMAL', 'PNEUMONIA']

# EDA

In [None]:
# Inspect the first 5 samples
# Create a mapping from numerical labels to class names
idx_to_class = {v: k for k, v in image_datasets['train'].class_to_idx.items()}

# Display images from the training dataset
for i in range(5):
    image, label = image_datasets['train'][i]
    
    # Convert the numerical label back to a class name
    label = idx_to_class[label]
    print(f'Image size: {image.size()}')
    print(f'Label: {label}')

Image size: torch.Size([3, 224, 224])
Label: NORMAL
Image size: torch.Size([3, 224, 224])
Label: NORMAL
Image size: torch.Size([3, 224, 224])
Label: NORMAL
Image size: torch.Size([3, 224, 224])
Label: NORMAL
Image size: torch.Size([3, 224, 224])
Label: NORMAL


In [None]:
# Get labels directly from the dataset
train_labels = image_datasets['train'].targets
val_labels = image_datasets['val'].targets
test_labels = image_datasets['test'].targets

# Convert numerical labels to their corresponding string labels if necessary
# The class_to_idx attribute is a dictionary that maps class names to numerical labels
idx_to_class = {v: k for k, v in image_datasets['train'].class_to_idx.items()}
train_labels = [idx_to_class[label] for label in train_labels]
val_labels = [idx_to_class[label] for label in val_labels]
test_labels = [idx_to_class[label] for label in test_labels]

fig = make_subplots(rows=1, cols=3, subplot_titles=('Train data', 'Validation data', 'Test data'))

# Add traces
fig.add_trace(go.Histogram(x=train_labels, nbinsx=2, name='Train'), row=1, col=1)
fig.add_trace(go.Histogram(x=val_labels, nbinsx=2, name='Validation'), row=1, col=2)
fig.add_trace(go.Histogram(x=test_labels, nbinsx=2, name='Test'), row=1, col=3)

# Update layout
fig.update_layout(height=400, width=1200, title_text="Diagnosis Distribution")

# Show plot
fig.show()


In [None]:
from torchvision.transforms import ToPILImage

to_pil = ToPILImage()

# Function to denormalize images
def denormalize(image):
    mean = torch.Tensor([0.485, 0.456, 0.406])
    std = torch.Tensor([0.229, 0.224, 0.225])
    image = image * std[...,None,None] + mean[...,None,None]
    image = image.clamp(0, 1)
    return image

# Convert tensor image to PIL image
def tensor_to_PIL(image):
    image = denormalize(image)
    image = ToPILImage()(image)
    return image

idx_to_class = {v: k for k, v in image_datasets['train'].class_to_idx.items()}
class_labels = [idx_to_class[label] for label in image_datasets['train'].targets]

pneumonia_indices = [i for i, label in enumerate(class_labels) if label == 'Pneumonia'][:4]
normal_indices = [i for i, label in enumerate(class_labels) if label == 'Normal'][:4]

plt.figure(figsize=(20,8))
for i, index in enumerate(pneumonia_indices):
    img, label = image_datasets['train'][index]
    img = tensor_to_PIL(img)
    plt.subplot(2,4,i+1)
    plt.axis('off')
    plt.imshow(img, cmap='gray')
    plt.title('Pneumonia')

for i, index in enumerate(normal_indices):
    img, label = image_datasets['train'][index]
    img = tensor_to_PIL(img)
    plt.subplot(2,4,4+i+1)
    plt.axis('off')
    plt.imshow(img, cmap='gray')
    plt.title('Normal')

plt.show()

<Figure size 2000x800 with 0 Axes>

# Model Training

## VGG16

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, precision_recall_curve

# Check if GPU is available and if not, use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained VGG16, and freeze the weights
vgg16 = models.vgg16(pretrained=True)
for param in vgg16.features.parameters():
    param.requires_grad = False

# Modify the classifier layer to match the number of classes in the dataset
num_features = vgg16.classifier[6].in_features
features = list(vgg16.classifier.children())[:-1]  # Remove last layer
features.extend([nn.Linear(num_features, len(class_names))])  # Add our layer with 2 outputs
vgg16.classifier = nn.Sequential(*features)  # Replace the model classifier

# Move the model to the device
vgg16 = vgg16.to(device)

# Define the loss and the optimizer
criterion = nn.CrossEntropyLoss()

# Only parameters of the final layer are being optimized
optimizer = optim.Adam(vgg16.classifier.parameters(), lr=0.001)

# Number of epochs
epochs = 7

# Training loop
for epoch in range(epochs):
    print('Epoch {}/{}'.format(epoch+1, epochs))
    print('-' * 10)

    for phase in ['train', 'val']:
        if phase == 'train':
            vgg16.train()  # Set model to training mode
        else:
            vgg16.eval()   # Set model to evaluate mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            # Track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = vgg16(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # Backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

        # Calculate precision, recall, and F1-score
        preds_cpu = preds.cpu().numpy()
        labels_cpu = labels.data.cpu().numpy()

        precision = precision_score(labels_cpu, preds_cpu)
        recall = recall_score(labels_cpu, preds_cpu)
        f1 = f1_score(labels_cpu, preds_cpu)

        # print('{} Precision: {:.4f} Recall: {:.4f} F1-score: {:.4f}'.format(phase, precision, recall, f1))

# Training and validation phases are omitted for brevity
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score

# Test phase
vgg16.eval() # Set the model to evaluation mode
test_correct = 0
test_total = 0

test_preds = []
test_probs = []
test_labels = []

with torch.no_grad(): # We don't need gradients for the test phase
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = vgg16(inputs)
        _, preds = torch.max(outputs, 1)

        test_total += labels.size(0)
        test_correct += (preds == labels).sum().item()

        # Collect predictions and labels for test set
        test_preds.extend(preds.cpu().numpy())
        test_probs.extend(outputs[:, 1].cpu().numpy())  # Save the probability of the positive class
        test_labels.extend(labels.data.cpu().numpy())

print('-' * 10)
print('Accuracy on the test set: %d %%' % (100 * test_correct / test_total))

# Calculate precision, recall, and F1-score for the test set
test_preds = np.array(test_preds)
test_probs = np.array(test_probs) # Convert to numpy array
test_labels = np.array(test_labels)

test_precision = precision_score(test_labels, test_preds)
test_recall = recall_score(test_labels, test_preds)
test_f1 = f1_score(test_labels, test_preds)
test_auc = roc_auc_score(test_labels, test_probs) # Here we compute the AUC score using the test labels and predicted probabilities

print('Test Precision: {:.4f} Recall: {:.4f} F1-score: {:.4f} AUC: {:.4f}'.format(test_precision, test_recall, test_f1, test_auc))

# Get the best threshold for the precision-recall curve
precision, recall, thresholds = precision_recall_curve(test_labels, test_probs)

# Compute F1 score for each threshold
f1_scores = 2*recall*precision / (recall + precision)

# Get the threshold that gives the maximum F1 score
best_threshold = thresholds[np.argmax(f1_scores)]
print('Best Threshold: ', best_threshold)


The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.


Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.



Epoch 1/7
----------
train Loss: 1.7606 Acc: 0.7563
val Loss: 0.5062 Acc: 0.8750
Epoch 2/7
----------
train Loss: 0.9417 Acc: 0.7964



Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.



val Loss: 0.1261 Acc: 0.9375
Epoch 3/7
----------
train Loss: 0.7269 Acc: 0.8177
val Loss: 0.6057 Acc: 0.7500
Epoch 4/7
----------
train Loss: 0.7712 Acc: 0.8183
val Loss: 0.3126 Acc: 0.8125
Epoch 5/7
----------
train Loss: 0.7238 Acc: 0.8209
val Loss: 0.3367 Acc: 0.9375
Epoch 6/7
----------
train Loss: 0.9031 Acc: 0.8181
val Loss: 0.1750 Acc: 0.9375
Epoch 7/7
----------



Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.



train Loss: 0.6214 Acc: 0.8455
val Loss: 0.2775 Acc: 0.8750
----------
Accuracy on the test set: 89 %
Test Precision: 0.8779 Recall: 0.9590 F1-score: 0.9167 AUC: 0.9558
Best Threshold:  0.25405788


## VGG19

In [None]:
# Training and validation phases are omitted for brevity

# Load pre-trained VGG16, and freeze the weights
vgg19 = models.vgg19(pretrained=True)
for param in vgg19.features.parameters():
  param.requires_grad = False

# Modify the classifier layer to match the number of classes in the dataset
num_features = vgg19.classifier[6].in_features
features = list(vgg19.classifier.children())[:-1] # Remove last layer
features.extend([nn.Linear(num_features, len(class_names))]) # Add our layer with 2 outputs
vgg19.classifier = nn.Sequential(*features) # Replace the model classifier

# Move the model to the device
vgg19 = vgg19.to(device)

# Define the loss and the optimizer
criterion = nn.CrossEntropyLoss()

# Only parameters of the final layer are being optimized
optimizer = optim.Adam(vgg19.classifier.parameters(), lr=0.001)

# Number of epochs
epochs = 7

# Training loop
for epoch in range(epochs):
    print('Epoch {}/{}'.format(epoch+1, epochs))
    print('-' * 10)

    for phase in ['train', 'val']:
        if phase == 'train':
            vgg19.train()  # Set model to training mode
        else:
            vgg19.eval()   # Set model to evaluate mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            # Track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = vgg19(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # Backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))


# Training and validation phases are omitted for brevity

vgg19.eval() # Set the model to evaluation mode
test_correct = 0
test_total = 0

test_preds = []
test_probs = []
test_labels = []

with torch.no_grad(): # We don't need gradients for the test phase
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = vgg19(inputs)
        _, preds = torch.max(outputs, 1)

        test_total += labels.size(0)
        test_correct += (preds == labels).sum().item()

        # Collect predictions and labels for test set
        test_preds.extend(preds.cpu().numpy())
        test_probs.extend(outputs[:, 1].cpu().numpy())  # Save the probability of the positive class
        test_labels.extend(labels.data.cpu().numpy())

print('-' * 10)
print('Accuracy on the test set: %d %%' % (100 * test_correct / test_total))

# Calculate precision, recall, and F1-score for the test set
test_preds = np.array(test_preds)
test_probs = np.array(test_probs) # Convert to numpy array
test_labels = np.array(test_labels)

test_precision = precision_score(test_labels, test_preds)
test_recall = recall_score(test_labels, test_preds)
test_f1 = f1_score(test_labels, test_preds)
test_auc = roc_auc_score(test_labels, test_probs) # Here we compute the AUC score using the test labels and predicted probabilities

print('Test Precision: {:.4f} Recall: {:.4f} F1-score: {:.4f} AUC: {:.4f}'.format(test_precision, test_recall, test_f1, test_auc))

# Get the best threshold for the precision-recall curve
precision, recall, thresholds = precision_recall_curve(test_labels, test_probs)

# Compute F1 score for each threshold
f1_scores = 2*recall*precision / (recall + precision)

# Get the threshold that gives the maximum F1 score
best_threshold = thresholds[np.argmax(f1_scores)]
print('Best Threshold: ', best_threshold)


The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.


Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG19_Weights.IMAGENET1K_V1`. You can also use `weights=VGG19_Weights.DEFAULT` to get the most up-to-date weights.



Epoch 1/7
----------
train Loss: 2.0270 Acc: 0.7602
val Loss: 0.4121 Acc: 0.8750
Epoch 2/7
----------
train Loss: 0.8214 Acc: 0.7991
val Loss: 0.3569 Acc: 0.8125
Epoch 3/7
----------
train Loss: 0.7511 Acc: 0.8179
val Loss: 0.9486 Acc: 0.6875
Epoch 4/7
----------
train Loss: 0.7281 Acc: 0.7972
val Loss: 0.6496 Acc: 0.7500
Epoch 5/7
----------
train Loss: 0.7796 Acc: 0.8079
val Loss: 0.3758 Acc: 0.8125
Epoch 6/7
----------
train Loss: 0.6482 Acc: 0.8183
val Loss: 0.5137 Acc: 0.8125
Epoch 7/7
----------
train Loss: 0.7351 Acc: 0.8131
val Loss: 0.4518 Acc: 0.6875
----------
Accuracy on the test set: 89 %
Test Precision: 0.8773 Recall: 0.9718 F1-score: 0.9221 AUC: 0.9688
Best Threshold:  0.14034997


## ResNet-50

### Transfer Learning

In [13]:
resnet50 = models.resnet50(pretrained=True)
for param in resnet50.parameters():
    param.requires_grad = False

# Modify the classifier layer to match the number of classes in the dataset
num_features = resnet50.fc.in_features
resnet50.fc = nn.Sequential(
    nn.Linear(num_features, 512),   # first linear layer
    nn.ReLU(),       

    nn.Linear(512, 256),   # first linear layer
    nn.ReLU(),                               
    
    nn.Linear(256, 128),        # second linear layer
    nn.ReLU(),                  
    
    nn.Linear(128, 2)           # third linear layer, output size = 2
)

# Move the model to the device
resnet50 = resnet50.to(device)

# Define the loss and the optimizer
criterion = nn.CrossEntropyLoss()

# Only parameters of the final layer are being optimized
optimizer = optim.SGD(resnet50.fc.parameters(), lr=0.001, momentum=0.3)

# Number of epochs
epochs = 12

# Training loop
for epoch in range(epochs):
    print('Epoch {}/{}'.format(epoch+1, epochs))
    print('-' * 10)

    for phase in ['train', 'val']:
        if phase == 'train':
            resnet50.train()  # Set model to training mode
        else:
            resnet50.eval()   # Set model to evaluate mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            # Track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = resnet50(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # Backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))



Epoch 1/12
----------
train Loss: 0.6840 Acc: 0.5991
val Loss: 0.6642 Acc: 0.7500
Epoch 2/12
----------
train Loss: 0.6290 Acc: 0.7659
val Loss: 0.5521 Acc: 0.6875
Epoch 3/12
----------
train Loss: 0.4849 Acc: 0.8002
val Loss: 0.6036 Acc: 0.7500
Epoch 4/12
----------
train Loss: 0.4344 Acc: 0.8016
val Loss: 0.5823 Acc: 0.6250
Epoch 5/12
----------
train Loss: 0.4143 Acc: 0.8160
val Loss: 0.7473 Acc: 0.7500
Epoch 6/12
----------
train Loss: 0.3985 Acc: 0.8265
val Loss: 0.6865 Acc: 0.7500
Epoch 7/12
----------
train Loss: 0.4012 Acc: 0.8225
val Loss: 0.6493 Acc: 0.7500
Epoch 8/12
----------
train Loss: 0.3920 Acc: 0.8269
val Loss: 0.7215 Acc: 0.7500
Epoch 9/12
----------
train Loss: 0.4010 Acc: 0.8196
val Loss: 0.6420 Acc: 0.7500
Epoch 10/12
----------
train Loss: 0.3888 Acc: 0.8288
val Loss: 0.7677 Acc: 0.7500
Epoch 11/12
----------
train Loss: 0.3992 Acc: 0.8257
val Loss: 0.7682 Acc: 0.6875
Epoch 12/12
----------
train Loss: 0.3912 Acc: 0.8292
val Loss: 0.5785 Acc: 0.7500
----------
Ac

NameError: ignored

In [15]:

# Training and validation phases are omitted for brevity

resnet50.eval() # Set the model to evaluation mode
test_correct = 0
test_total = 0

test_preds = []
test_probs = []
test_labels = []

with torch.no_grad(): # We don't need gradients for the test phase
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = resnet50(inputs)
        _, preds = torch.max(outputs, 1)

        test_total += labels.size(0)
        test_correct += (preds == labels).sum().item()

        # Collect predictions and labels for test set
        test_preds.extend(preds.cpu().numpy())
        test_probs.extend(torch.nn.functional.softmax(outputs, dim=1)[:, 1].cpu().numpy())  # Save the probability of the positive class
        test_labels.extend(labels.data.cpu().numpy())

print('-' * 10)
print('Accuracy on the test set: %d %%' % (100 * test_correct / test_total))

# Calculate precision, recall, and F1-score for the test set
test_preds = np.array(test_preds)
test_probs = np.array(test_probs) # Convert to numpy array
test_labels = np.array(test_labels)

test_precision = precision_score(test_labels, test_preds)
test_recall = recall_score(test_labels, test_preds)
test_f1 = f1_score(test_labels, test_preds)
test_auc = roc_auc_score(test_labels, test_probs) # Here we compute the AUC score using the test labels and predicted probabilities

print('Test Precision: {:.4f} Recall: {:.4f} F1-score: {:.4f} AUC: {:.4f}'.format(test_precision, test_recall, test_f1, test_auc))

# Get the best threshold for the precision-recall curve
precision, recall, thresholds = precision_recall_curve(test_labels, test_probs)

# Compute F1 score for each threshold
f1_scores = 2*recall*precision / (recall + precision)

# Get the threshold that gives the maximum F1 score
best_threshold = thresholds[np.argmax(f1_scores)]
print('Best Threshold: ', best_threshold)

----------
Accuracy on the test set: 85 %
Test Precision: 0.8889 Recall: 0.8821 F1-score: 0.8855 AUC: 0.9378
Best Threshold:  0.38178363


### Fine-tuning

In [None]:
resnet50 = models.resnet50(pretrained=True)

# Unfreeze the layers you want to fine-tune
for param in resnet50.layer3.parameters():
    param.requires_grad = True
for param in resnet50.layer4.parameters():
    param.requires_grad = True

# Modify the classifier layer to match the number of classes in the dataset
num_features = resnet50.fc.in_features
resnet50.fc = nn.Linear(num_features, len(class_names))

# Move the model to the device
resnet50 = resnet50.to(device)

# Define the loss and the optimizer
criterion = nn.CrossEntropyLoss()

# Specify the parameters to optimize and their learning rate
optimizer = optim.SGD([
    {'params': resnet50.layer3.parameters(), 'lr': 0.001},
    {'params': resnet50.layer4.parameters(), 'lr': 0.001},
    {'params': resnet50.fc.parameters(), 'lr': 0.01}
], momentum=0.9)

# Number of epochs
epochs = 10

# Training loop
for epoch in range(epochs):
    print('Epoch {}/{}'.format(epoch+1, epochs))
    print('-' * 10)

    for phase in ['train', 'val']:
        if phase == 'train':
            resnet50.train()  # Set model to training mode
        else:
            resnet50.eval()   # Set model to evaluate mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            # Track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = resnet50(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # Backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

# Training and validation phases are omitted for brevity

resnet50.eval() # Set the model to evaluation mode
test_correct = 0
test_total = 0

test_preds = []
test_probs = []
test_labels = []

with torch.no_grad(): # We don't need gradients for the test phase
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = resnet50(inputs)
        _, preds = torch.max(outputs, 1)

        test_total += labels.size(0)
        test_correct += (preds == labels).sum().item()

        # Collect predictions and labels for test set
        test_preds.extend(preds.cpu().numpy())
        test_probs.extend(torch.nn.functional.softmax(outputs, dim=1)[:, 1].cpu().numpy())  # Save the probability of the positive class
        test_labels.extend(labels.data.cpu().numpy())

print('-' * 10)
print('Accuracy on the test set: %d %%' % (100 * test_correct / test_total))

# Calculate precision, recall, and F1-score for the test set
test_preds = np.array(test_preds)
test_probs = np.array(test_probs) # Convert to numpy array
test_labels = np.array(test_labels)

test_precision = precision_score(test_labels, test_preds)
test_recall = recall_score(test_labels, test_preds)
test_f1 = f1_score(test_labels, test_preds)
test_auc = roc_auc_score(test_labels, test_probs) # Here we compute the AUC score using the test labels and predicted probabilities

print('Test Precision: {:.4f} Recall: {:.4f} F1-score: {:.4f} AUC: {:.4f}'.format(test_precision, test_recall, test_f1, test_auc))

# Get the best threshold for the precision-recall curve
precision, recall, thresholds = precision_recall_curve(test_labels, test_probs)

# Compute F1 score for each threshold
f1_scores = 2*recall*precision / (recall + precision)

# Get the threshold that gives the maximum F1 score
best_threshold = thresholds[np.argmax(f1_scores)]
print('Best Threshold: ', best_threshold)


The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.


Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.



Epoch 1/10
----------
train Loss: 1.0052 Acc: 0.7653
val Loss: 0.5696 Acc: 0.6875
Epoch 2/10
----------
train Loss: 0.4107 Acc: 0.8388
val Loss: 0.6467 Acc: 0.6875
Epoch 3/10
----------
train Loss: 0.3221 Acc: 0.8735
val Loss: 0.4232 Acc: 0.8125
Epoch 4/10
----------
train Loss: 0.2454 Acc: 0.9024
val Loss: 0.2026 Acc: 0.9375
Epoch 5/10
----------
train Loss: 0.2210 Acc: 0.9187
val Loss: 0.1903 Acc: 0.9375
Epoch 6/10
----------
train Loss: 0.1996 Acc: 0.9260
val Loss: 0.2661 Acc: 0.9375
Epoch 7/10
----------
train Loss: 0.1911 Acc: 0.9306
val Loss: 0.3723 Acc: 0.8125
Epoch 8/10
----------
train Loss: 0.1773 Acc: 0.9337
val Loss: 0.2132 Acc: 0.9375
Epoch 9/10
----------
train Loss: 0.1619 Acc: 0.9377
val Loss: 0.2884 Acc: 0.9375
Epoch 10/10
----------
train Loss: 0.1575 Acc: 0.9442
val Loss: 0.0705 Acc: 1.0000
----------
Accuracy on the test set: 94 %
Test Precision: 0.9212 Recall: 0.9897 F1-score: 0.9543 AUC: 0.9893
Best Threshold:  0.8006818
