<a href="https://colab.research.google.com/github/Lumb3/ML-Project-Pneumonia-Classifier-in-PyTorch/blob/main/Train_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from sklearn.metrics import accuracy_score
import multiprocessing

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
multiprocessing.cpu_count()

2

In [63]:
# Set the device to GPU if available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define a custom dataset class for loading and preprocessing pneumonia data
class PneumoniaDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Initializes the dataset.
        Args:
            root_dir (str): Root directory containing 'NORMAL' and 'PNEUMONIA' subdirectories.
            transform (callable, optional): Transformation to apply to images.
        """
        self.root_dir = root_dir
        self.transform = transform  # Transformation to apply to images
        self.image_paths = []       # List to store image file paths
        self.labels = []            # List to store corresponding labels

        # Iterate through 'NORMAL' and 'PNEUMONIA' subdirectories to collect file paths and labels
        for label in ['NORMAL', 'PNEUMONIA']:
            class_dir = os.path.join(root_dir, label)
            for image_name in os.listdir(class_dir):
                self.image_paths.append(os.path.join(class_dir, image_name))
                self.labels.append(0 if label == 'NORMAL' else 1)  # 0 for NORMAL, 1 for PNEUMONIA

    def __len__(self):
        """
        Returns the total number of samples in the dataset.
        """
        return len(self.image_paths)

    def __getitem__(self, index):
        """
        Retrieves an image and its label at the specified index.
        Args:
            index (int): Index of the sample to retrieve.
        Returns:
            image (Tensor): Transformed image.
            label (int): Corresponding label (0 for NORMAL, 1 for PNEUMONIA).
        """
        img_path = self.image_paths[index]
        image = Image.open(img_path).convert('RGB')  # Open image and convert to RGB
        label = self.labels[index]                  # Get corresponding label

        if self.transform:
            image = self.transform(image)           # Apply transformations if provided
        return image, label

# Define transformations for the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),                # Resize images to 224x224
    transforms.ToTensor(),                        # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalize using mean and std of ImageNet
                         std=[0.229, 0.224, 0.225])
])

# Create dataset instances for training, testing, and validation
train_dataset = PneumoniaDataset(root_dir='/content/drive/MyDrive/CSV_files/chest_xray 2/train', transform=transform)
test_dataset = PneumoniaDataset(root_dir='/content/drive/MyDrive/CSV_files/chest_xray 2/test', transform=transform)
val_dataset = PneumoniaDataset(root_dir='/content/drive/MyDrive/CSV_files/chest_xray 2/val', transform=transform)

# Create data loaders for batching and shuffling
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)  # Shuffle for training
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)  # No shuffle for testing
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)    # No shuffle for validation

# Load a pre-trained ResNet-18 model
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# Modify the fully connected layer to output 2 classes (NORMAL and PNEUMONIA)
model.fc = nn.Linear(model.fc.in_features, 2)

# Move the model to the selected device (GPU or CPU)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0  # Corrected the typo from "runnign_loss" to "running_loss"

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()  # Use loss.item() to get the scalar value

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

    model.eval()
    val_labels = []
    val_preds = []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(preds.cpu().numpy())
    val_accuracy = accuracy_score(val_labels, val_preds)
    print(f"Validation Accuracy: ", val_accuracy)


model.eval()
test_labels = []
test_preds = []
with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            test_labels.extend(labels.cpu().numpy())
            test_preds.extend(preds.cpu().numpy())
test_accuracy = accuracy_score(test_labels, test_preds)
print(f"Validation Accuracy: ", test_accuracy)

torch.save(model.state_dict(), 'pneumonia_classifier.pth')

Epoch 1/10, Loss: 0.8823
Validation Accuracy:  0.0
Epoch 2/10, Loss: 0.6266
Validation Accuracy:  0.0
Epoch 3/10, Loss: 0.3913
Validation Accuracy:  0.875
Epoch 4/10, Loss: 0.1009
Validation Accuracy:  1.0
Epoch 5/10, Loss: 0.0262
Validation Accuracy:  1.0
Epoch 6/10, Loss: 0.0105
Validation Accuracy:  1.0
Epoch 7/10, Loss: 0.0060
Validation Accuracy:  1.0
Epoch 8/10, Loss: 0.0035
Validation Accuracy:  1.0
Epoch 9/10, Loss: 0.0021
Validation Accuracy:  1.0
Epoch 10/10, Loss: 0.0013
Validation Accuracy:  1.0
Validation Accuracy:  0.0
