In [1]:
!pip install scikit-learn pillow torch torchvision



In [20]:
import os
from PIL import Image
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset


from torchvision import transforms
from torchvision import models as torchvision_models 

from sklearn.metrics import accuracy_score



In [21]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [22]:
class PneumoniaDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        for label in ['NORMAL', 'PNEUMONIA']:
            class_dir = os.path.join(root_dir, label)
            if not os.path.isdir(class_dir):
                continue  # Skip if directory doesn't exist
                
            for img_name in os.listdir(class_dir):
                # Only process files with image extensions
                if img_name.lower().endswith(('.jpeg', '.jpg', '.png')):
                    img_path = os.path.join(class_dir, img_name)
                    self.image_paths.append(img_path)
                    self.labels.append(0 if label == 'NORMAL' else 1)
                # Explicitly skip .DS_Store and other hidden files
                elif img_name.startswith('.'):
                    continue

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        try:
            image = Image.open(img_path).convert("RGB")
            label = self.labels[idx]
            
            if self.transform:
                image = self.transform(image)
                
            return image, label
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            # Return a placeholder or skip by returning None and handle it in DataLoader
            return None

In [23]:
# Changes for Resnet-18 model

transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [24]:
def create_dataset(path, name):
    try:
        full_path = os.path.abspath(path)
        print(f"Attempting to load {name} dataset from: {full_path}")
        if not os.path.exists(full_path):
            print(f"Warning: {name} directory not found at {full_path}")
            return None
        return PneumoniaDataset(root_dir=full_path, transform=transforms)
    except Exception as e:
        print(f"Error creating {name} dataset: {e}")
        return None

In [25]:
base_dir = os.path.abspath('chest_xray')
print(f"\nCurrent working directory: {os.getcwd()}")
print(f"Base directory contents: {os.listdir('.')}")
if os.path.exists(base_dir):
    print(f"chest_xray contents: {os.listdir(base_dir)}")


Current working directory: /workspaces/Deep-Learning/1_Project
Base directory contents: ['Pneumonia_Classification.ipynb', 'Handwritten_digit_recognition.ipynb', 'chest_xray', 'data']
chest_xray contents: ['train', 'chest_xray', 'test']


In [26]:
# Load the dataset

train_dataset = PneumoniaDataset(root_dir='chest_xray/chest_xray/train', transform=transforms)
test_dataset = PneumoniaDataset(root_dir='chest_xray/chest_xray/test', transform=transforms)
val_dataset = PneumoniaDataset(root_dir='chest_xray/chest_xray/val', transform=transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [27]:
import torchvision.models as models

In [28]:
# Initialize the model

model = models.resnet18(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, 2)  # Change the final layer for binary classification, Normal vs Pneumonia
model = model.to(device)

In [29]:
# Loss function and optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [30]:

# Define loss function and optimizer

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

    # Validation

    model.eval()
    val_labels = []
    val_preds = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)

            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(predicted.cpu().numpy())

    val_accuracy = accuracy_score(val_labels, val_preds)
    print(f"Validation Accuracy: {val_accuracy:.4f}")

Epoch [1/10], Loss: 0.1133
Validation Accuracy: 1.0000
Epoch [2/10], Loss: 0.0507
Validation Accuracy: 0.8125
Epoch [3/10], Loss: 0.0468
Validation Accuracy: 0.9375
Epoch [4/10], Loss: 0.0296
Validation Accuracy: 1.0000
Epoch [5/10], Loss: 0.0357
Validation Accuracy: 0.8750
Epoch [6/10], Loss: 0.0152
Validation Accuracy: 1.0000
Epoch [7/10], Loss: 0.0266
Validation Accuracy: 1.0000
Epoch [8/10], Loss: 0.0270
Validation Accuracy: 1.0000
Epoch [9/10], Loss: 0.0155
Validation Accuracy: 0.9375
Epoch [10/10], Loss: 0.0224
Validation Accuracy: 0.9375


In [None]:
# Testing

model.eval()
test_labels = []
test_preds = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        test_labels.extend(labels.cpu().numpy())
        test_preds.extend(predicted.cpu().numpy())

test_accuracy = accuracy_score(test_labels, test_preds)
print(f"Test Accuracy: {test_accuracy:.4f}")


In [None]:
# Save the model

torch.save(model.state_dict(), 'pneumonia_classifier.pth')
print("Model saved as pneumonia_classifier.pth")