<a href="https://colab.research.google.com/github/Gokul-Viswam/Pneumonia-resnet50/blob/main/PneumoniaMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle (1).json


{'kaggle (1).json': b'{"username":"gokulviswanathan","key":"b1ca58b5558c7b9448adb4e055fc9ecf"}'}

In [2]:
import os
import zipfile

# Make a kaggle directory and move the kaggle.json file there
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [3]:
# Download the dataset
!kaggle datasets download -d rijulshr/pneumoniamnist

Dataset URL: https://www.kaggle.com/datasets/rijulshr/pneumoniamnist
License(s): MIT
Downloading pneumoniamnist.zip to /content
  0% 0.00/3.39M [00:00<?, ?B/s]
100% 3.39M/3.39M [00:00<00:00, 659MB/s]


In [4]:
# Unzip the downloaded dataset
with zipfile.ZipFile("pneumoniamnist.zip", 'r') as zip_ref:
    zip_ref.extractall("pneumoniamnist")

In [5]:
# List the contents of the extracted directory
for root, dirs, files in os.walk("pneumoniamnist"):
    for file in files:
        print(os.path.join(root, file))

pneumoniamnist/pneumoniamnist.npz


In [6]:

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
import cv2
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Load the dataset
data = np.load("pneumoniamnist/pneumoniamnist.npz")
train_images, train_labels = data['train_images'], data['train_labels']
val_images, val_labels = data['val_images'], data['val_labels']
test_images, test_labels = data['test_images'], data['test_labels']

# Convert to float and scale
train_images = train_images.astype(np.float32)
val_images = val_images.astype(np.float32)
test_images = test_images.astype(np.float32)

In [7]:
# Custom PyTorch Dataset
class PneumoniaDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]

        # Resize to 224x224 and convert to 3 channels
        img = cv2.resize(img, (224, 224))
        img = np.stack([img]*3, axis=-1)  # (224,224,3)

        if self.transform:
            img = self.transform(img)

        label = int(self.labels[idx])
        return img, label


In [8]:
# Transform for ResNet
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)  # normalize RGB channels
])


# Create datasets and dataloaders
train_dataset = PneumoniaDataset(train_images, train_labels, transform=transform)
val_dataset = PneumoniaDataset(val_images, val_labels, transform=transform)
test_dataset = PneumoniaDataset(test_images, test_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)


# Load pretrained ResNet50 and modify final layer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 3)  # 3 classes
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 104MB/s]


In [9]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    acc = 100. * correct / total
    print(f"Epoch {epoch+1}/{num_epochs} | Loss: {running_loss:.4f} | Train Acc: {acc:.2f}%")

  label = int(self.labels[idx])


Epoch 1/5 | Loss: 16.7990 | Train Acc: 95.85%
Epoch 2/5 | Loss: 5.5438 | Train Acc: 98.61%
Epoch 3/5 | Loss: 4.8519 | Train Acc: 98.53%
Epoch 4/5 | Loss: 2.1451 | Train Acc: 99.41%
Epoch 5/5 | Loss: 1.9885 | Train Acc: 99.30%


In [10]:
# Evaluation
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, predicted = outputs.max(1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.numpy())

# Classification report
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=['Normal', 'Pneumonia']))


  label = int(self.labels[idx])



Classification Report:
              precision    recall  f1-score   support

      Normal       0.99      0.56      0.72       234
   Pneumonia       0.79      1.00      0.88       390

    accuracy                           0.83       624
   macro avg       0.89      0.78      0.80       624
weighted avg       0.87      0.83      0.82       624

