In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
import os
import torch
import random
import numpy as np
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import precision_score, recall_score, f1_score

importing Jupyter notebook from utils.ipynb


In [None]:
data_path = "drive/My Drive/Chest XRay Images/"
sample_ratio = 1

In [None]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Get dataset from folder
dataset = datasets.ImageFolder(root = data_path, transform = data_transforms)

# Create data transforms
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# Get 30% of the data randomly
num_data = len(dataset)
num_sample = int(num_data * sample_ratio)
indices = np.random.choice(range(num_data), num_sample, replace=False)

# Split the data into training, test, and validation sets
num_train = int(num_sample * 0.7)
num_test = int(num_sample * 0.2)
num_val = num_sample - num_train - num_test

train_indices = indices[:num_train]
test_indices = indices[num_train:num_train+num_test]
val_indices = indices[num_train+num_test:]

train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indices)
test_sampler = torch.utils.data.sampler.SubsetRandomSampler(test_indices)
val_sampler = torch.utils.data.sampler.SubsetRandomSampler(val_indices)

# Create data loaders for training, test, and validation sets
batch_size = 32

train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)

print(train_loader, num_sample, num_train)

In [None]:
# Define ResNet18 model
model = torch.hub.load('pytorch/vision:v0.9.0', 'resnet18', pretrained=False)
num_classes = len(dataset.classes)
model.fc = nn.Linear(512, num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train model on training set
num_epochs = 10

for epoch in range(num_epochs):
    train_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
    train_loss /= len(train_indices)
    print('Epoch: {}, Training Loss: {:.4f}'.format(epoch+1, train_loss))

# Evaluate model on test set
test_loss = 0.0
test_pred = []
test_true = []
model.eval()

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        test_pred.extend(predicted.cpu().numpy())
        test_true.extend(labels.cpu().numpy())

test_loss /= len(test_indices)
test_recall = recall_score(test_true, test_pred, average='macro')
test_precision = precision_score(test_true, test_pred, average='macro')
test_fscore = f1_score(test_true, test_pred, average='macro')

print('Test Loss: {:.4f}, Test Recall: {:.4f}, Test Precision: {:.4f}, Test F-score: {:.4f}'.format(test_loss, test_recall, test_precision, test_fscore))

# Evaluate model on validation set
val_loss = 0.0
val_pred = []
val_true = []
model.eval()

with torch.no_grad():
    for inputs, labels in val_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        val_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        val_pred.extend(predicted.cpu().numpy())
        val_true.extend(labels.cpu().numpy())

val_loss /= len(val_indices)
val_recall = recall_score(val_true, val_pred, average='macro')
val_precision = precision_score(val_true, val_pred, average='macro')
val_fscore = f1_score(val_true, val_pred, average='macro')

print('Validation Loss: {:.4f}, Validation Recall: {:.4f}, Validation Precision: {:.4f}, Validation F-score: {:.4f}'.format(val_loss, val_recall, val_precision, val_fscore))


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.9.0


Epoch: 1, Training Loss: 0.2616
Epoch: 2, Training Loss: 0.1624
Epoch: 3, Training Loss: 0.1333
Epoch: 4, Training Loss: 0.1204
Epoch: 5, Training Loss: 0.1056
Epoch: 6, Training Loss: 0.0946
Epoch: 7, Training Loss: 0.1150
Epoch: 8, Training Loss: 0.0832
Epoch: 9, Training Loss: 0.0755
Epoch: 10, Training Loss: 0.0862
Test Loss: 0.2806, Test Recall: 0.8322, Test Precision: 0.9405, Test F-score: 0.8690
Validation Loss: 0.2470, Validation Recall: 0.8125, Validation Precision: 0.9456, Validation F-score: 0.8558
