In [1]:
import torch
import re

from torchvision import models, datasets, transforms
import os
import time

In [2]:

data_dir = 'D:/NEUB/MachineLearning/ThesisProject/x-rayImages/chest_xray/chest_xray'

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomGrayscale(p=0.3),
        transforms.RandomHorizontalFlip(p=0.5),
        # transforms.Lambda(lambda img: transforms.functional.equalize(img)),
        # transforms.RandomAffine(degrees=0, scale=(1.0, 1.3)),  # Random scaling between 1.0 and 1.2
        # transforms.RandomAdjustSharpness(sharpness_factor=2),  # Contrast stretching
        transforms.RandomRotation(degrees=(-7, 7)),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
        transforms.RandomCrop((180,180)),
        transforms.ToTensor(),
        transforms.Normalize([0.0020],[0.0010])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.0020],[0.0010])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.0020],[0.0010])
    ])
}

# Load the datasets and apply the transformations
img_datasets = {
    x: datasets.ImageFolder(os.path.join(data_dir, x), transform=data_transforms[x])
    for x in ['train', 'val', 'test']
}

# Create data loaders
dataloaders = {
    x: torch.utils.data.DataLoader(img_datasets[x], batch_size=32, shuffle=True, num_workers=4)
    for x in ['train', 'val', 'test']
}

# Get dataset sizes
dataset_sizes = {x: len(img_datasets[x]) for x in ['train', 'val', 'test']}
print(dataset_sizes)

# Get class names
class_names = img_datasets['train'].classes
print(class_names)

# Get class names test
class_names_test = img_datasets['test'].classes
print(class_names_test)


{'train': 5216, 'val': 16, 'test': 624}
['NORMAL', 'PNEUMONIA']
['NORMAL', 'PNEUMONIA']


In [3]:
model = models.densenet161(pretrained = True)



In [4]:
# freezing the layers of the pre-trained model
i = 0
for freeze_parameters in model.parameters():
    freeze_parameters.requires_grad = False
    i+=1
print(i)
total_params = sum(p.numel() for p in model.parameters())
print(total_params)

484
28681000


In [5]:
import torch.nn as nn

In [6]:
# Replace the classifier with a new linear layer
num_input_features = model.classifier.in_features
model.classifier = nn.Sequential(
    nn.Dropout(0.2),  # Add dropout regularization with 20% dropout rate
    nn.Linear(num_input_features, 2)
)


In [7]:
# define the loss function
# which helps to measures the error between the predicted and true labels for multiclass classification.
criteration = nn.CrossEntropyLoss()

In [8]:
# Define the aptimization loss
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

# select the Adam optimizer to update the weights of the linear layer during training.
# The optimizer takes the parameters of the model as input.

In [9]:
# set the divice 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# move the model to the device
model = model.to(device)

num_epochs = 10

best_loss = float('inf')
best_weights = None

# ...

for epoch in range(num_epochs):
    since = time.time()
    running_loss = 0.0
    running_corrects = 0
    val_loss = 0.0
    val_corrects = 0
    
    # set the model in training mode
    model.train()

    for inputs, labels in dataloaders['train']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward pass
        features = model(inputs)
        outputs = features

        # compute the loss
        loss = criteration(outputs, labels)

        # backward pass
        loss.backward()

        # update the parameter
        optimizer.step()

        # calculate training accuracy
        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    # calculate training loss and accuracy
    epoch_loss = running_loss / dataset_sizes['train']
    epoch_acc = running_corrects.double() / dataset_sizes['train']

    model.eval()

    with torch.no_grad():
        for inputs, labels in dataloaders['val']:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # forward pass
            features = model(inputs)
            outputs = features

            # calculate the loss
            val_loss += criteration(outputs, labels).item() * inputs.size(0)

            # calculate validation accuracy
            _, preds = torch.max(outputs, 1)
            val_corrects += torch.sum(preds == labels.data)

    # calculate validation loss and accuracy
    val_loss = val_loss / dataset_sizes['val']
    val_acc = val_corrects.double() / dataset_sizes['val']

    # Check if current validation loss is better than the best loss
    if val_loss < best_loss:
        best_loss = val_loss
        best_weights = model.state_dict()

    # Print epoch, training loss, training accuracy, validation loss, and validation accuracy
    print(f'Epoch {epoch+1}/{num_epochs}: Training Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}, '
          f'Validation Loss: {val_loss:.4f} Acc: {val_acc:.4f}')

time_elapsed = time.time() - since
print(f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")

# Load the best weights
model.load_state_dict(best_weights)


Epoch 1/10: Training Loss: 0.2766 Acc: 0.8871, Validation Loss: 0.4307 Acc: 0.8125
Epoch 2/10: Training Loss: 0.2025 Acc: 0.9166, Validation Loss: 0.4975 Acc: 0.7500
Epoch 3/10: Training Loss: 0.1855 Acc: 0.9260, Validation Loss: 0.3287 Acc: 0.8125
Epoch 4/10: Training Loss: 0.1866 Acc: 0.9262, Validation Loss: 0.4870 Acc: 0.6875
Epoch 5/10: Training Loss: 0.1800 Acc: 0.9241, Validation Loss: 0.3115 Acc: 0.8750
Epoch 6/10: Training Loss: 0.1575 Acc: 0.9367, Validation Loss: 0.3891 Acc: 0.7500
Epoch 7/10: Training Loss: 0.1635 Acc: 0.9375, Validation Loss: 0.4709 Acc: 0.6875
Epoch 8/10: Training Loss: 0.1716 Acc: 0.9346, Validation Loss: 0.3366 Acc: 0.7500
Epoch 9/10: Training Loss: 0.1714 Acc: 0.9327, Validation Loss: 0.3644 Acc: 0.7500
Epoch 10/10: Training Loss: 0.1759 Acc: 0.9344, Validation Loss: 0.4083 Acc: 0.7500
Training complete in 12m 10s


<All keys matched successfully>

### Claculating Accuracy, Precision, Recall, confusion matrix

In [11]:
from sklearn.metrics import confusion_matrix, accuracy_score,precision_score,recall_score

In [12]:
# Calculate test accuracy, precision, and recall
test_corrects = 0
predicted_labels = []
true_labels = []
model.eval()

with torch.no_grad():
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        features = model(inputs)
        outputs = features

        # Calculate test accuracy
        _, preds = torch.max(outputs, 1)
        test_corrects += torch.sum(preds == labels.data)

        # Store predicted and true labels for confusion matrix
        predicted_labels.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
    
# Calculate test accuracy
test_acc = test_corrects.double() / dataset_sizes['test']

# Calculate confusion matrix
confusion_mat = confusion_matrix(true_labels, predicted_labels)

# Calculate precision and recall
precision = precision_score(true_labels, predicted_labels, average='macro')
recall = recall_score(true_labels, predicted_labels, average='macro')

In [13]:
print(f'Test Accuracy: {test_acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print('Confusion Matrix:')
print(confusion_mat)

Test Accuracy: 0.8750
Precision: 0.8878
Recall: 0.8462
Confusion Matrix:
[[171  63]
 [ 15 375]]
