In [3]:
import numpy as np

data = np.load("pneumoniamnist.npz")
print(data.files)


['train_images', 'train_labels', 'val_images', 'val_labels', 'test_images', 'test_labels']


In [8]:
import numpy as np
from PIL import Image
import os

# Load the data
data = np.load("pneumoniamnist.npz")

# Class mapping
class_map = {
    0: "normal",
    1: "abnormal"
}

# Where to save extracted images
base_dir = "D:/PC FILES/image_dataset"

# Loop over train, val, test
for split in ["train", "val", "test"]:
    images = data[f"{split}_images"]
    labels = data[f"{split}_labels"]
    
    for idx, (img_arr, label) in enumerate(zip(images, labels)):
        label_name = class_map[int(label)]

        save_dir = os.path.join(base_dir, split, label_name)
        os.makedirs(save_dir, exist_ok=True)
        
        # Convert to PIL image, scale 0–255 and convert to RGB
        img = Image.fromarray((img_arr * 255).astype(np.uint8)).convert("RGB")
        
        # Save image
        img.save(os.path.join(save_dir, f"{split}_{idx}.png"))

print("Dataset organized.")


  label_name = class_map[int(label)]


Dataset organized.


In [12]:
from PIL import Image
import os

def check_images(root_dir):
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            try:
                img_path = os.path.join(subdir, file)
                with Image.open(img_path) as img:
                    img.verify()  # Raise error if corrupt
            except Exception as e:
                print(f"Corrupt image: {file} in {subdir} – {e}")

check_images("D:/PC FILES/image_dataset")  # or full path if needed


In [20]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [21]:
from torchvision import datasets
from collections import Counter
import os

#base_dir = "dataset"  # or data_dir = "dataset", use consistently

train_ds = datasets.ImageFolder(os.path.join(base_dir, "train"), transform=transform)

labels = [label for _, label in train_ds]
label_counts = Counter(labels)

class_names = train_ds.classes
for i, count in label_counts.items():
    print(f"{class_names[i]}: {count} images")


abnormal: 3494 images
normal: 388 images


In [9]:
import os
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix

In [10]:
# 1. Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
# 2. Transforms with augmentation for train, simpler for val/test
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [20]:
# 3. Dataset and DataLoader
base_dir = "D:/PC FILES/image_dataset"

train_ds = datasets.ImageFolder(os.path.join(base_dir, "train"), transform=train_transform)
val_ds = datasets.ImageFolder(os.path.join(base_dir, "val"), transform=val_test_transform)
test_ds = datasets.ImageFolder(os.path.join(base_dir, "test"), transform=val_test_transform)

batch_size = 32

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size)
test_loader = DataLoader(test_ds, batch_size=batch_size)

In [21]:
# 4. Calculate class weights for weighted loss
from collections import Counter

label_counts = Counter([label for _, label in train_ds])
weight_normal = 1.0 / label_counts[train_ds.class_to_idx['normal']]
weight_abnormal = 1.0 / label_counts[train_ds.class_to_idx['abnormal']]
class_weights = torch.FloatTensor([weight_normal, weight_abnormal]).to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)

In [22]:
# 5. Load pretrained ResNet50, freeze layers, modify FC
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, 2)
)
model = model.to(device)

optimizer = torch.optim.Adam(model.fc.parameters(), lr=1e-4)



In [23]:
# 6. Training function
def train(model, train_loader, val_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{epochs} — Training loss: {avg_loss:.4f}")

        evaluate(model, val_loader, "Validation")

In [24]:
# 7. Evaluation function
def evaluate(model, loader, name="Test"):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

    print(f"\n{name} Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=train_ds.classes))
    print(f"{name} Confusion Matrix:")
    print(confusion_matrix(all_labels, all_preds))

In [None]:
# 8. Run training and final test evaluation
train(model, train_loader, val_loader, epochs=10)
evaluate(model, test_loader, "Test")

Epoch 1/10 — Training loss: 0.0858

Validation Classification Report:
              precision    recall  f1-score   support

    abnormal       0.74      1.00      0.85       389
      normal       0.00      0.00      0.00       135

    accuracy                           0.74       524
   macro avg       0.37      0.50      0.43       524
weighted avg       0.55      0.74      0.63       524

Validation Confusion Matrix:
[[389   0]
 [135   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Epoch 2/10 — Training loss: 0.0642

Validation Classification Report:
              precision    recall  f1-score   support

    abnormal       0.74      1.00      0.85       389
      normal       0.00      0.00      0.00       135

    accuracy                           0.74       524
   macro avg       0.37      0.50      0.43       524
weighted avg       0.55      0.74      0.63       524

Validation Confusion Matrix:
[[389   0]
 [135   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Epoch 3/10 — Training loss: 0.0565

Validation Classification Report:
              precision    recall  f1-score   support

    abnormal       0.74      1.00      0.85       389
      normal       0.00      0.00      0.00       135

    accuracy                           0.74       524
   macro avg       0.37      0.50      0.43       524
weighted avg       0.55      0.74      0.63       524

Validation Confusion Matrix:
[[389   0]
 [135   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Epoch 4/10 — Training loss: 0.0510

Validation Classification Report:
              precision    recall  f1-score   support

    abnormal       0.74      1.00      0.85       389
      normal       0.00      0.00      0.00       135

    accuracy                           0.74       524
   macro avg       0.37      0.50      0.43       524
weighted avg       0.55      0.74      0.63       524

Validation Confusion Matrix:
[[389   0]
 [135   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
