<a href="https://colab.research.google.com/github/Abdulrahman-kawas/training_Data-analysis/blob/main/X_ray_pretrained.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==========================
#  Install libraries and download a small dataset
# ==========================
!pip install kaggle scikit-learn matplotlib

from google.colab import files
files.upload()  # Upload your kaggle.json file

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the small Chest X-ray Pneumonia dataset (~2GB)
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
!unzip -q chest-xray-pneumonia.zip -d chest_xray_small

# ==========================
# Import libraries
# ==========================
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
import torchvision.models as models

import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================
#  Prepare data + RGB conversion + Data augmentation
# ==========================
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),  # Needed for ResNet18
    transforms.RandomHorizontalFlip(),            # Data augmentation
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

train_data = ImageFolder("chest_xray_small/chest_xray/train", transform=train_transform)
test_data  = ImageFolder("chest_xray_small/chest_xray/test",  transform=test_transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_data, batch_size=32, shuffle=False)

# ==========================
# Training and evaluation functions
# ==========================
def train_model(model, train_loader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch [{epoch+1}/{epochs}] Loss: {total_loss/len(train_loader):.4f}")

def evaluate_detailed(model, test_loader, class_names=["Normal","Pneumonia"]):
    model.eval()
    all_labels, all_preds, all_probs = [], [], []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)[:, 1]  # Probability of Pneumonia class
            _, predicted = torch.max(outputs, 1)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    all_labels = np.array(all_labels)
    all_preds  = np.array(all_preds)
    all_probs  = np.array(all_probs)

    acc = (all_preds == all_labels).mean()
    print(f"\nAccuracy: {acc*100:.2f}%")

    cm = confusion_matrix(all_labels, all_preds)
    print("Confusion Matrix:\n", cm)

    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    print(f"Sensitivity (Positive Recall): {sensitivity:.2f}")
    print(f"Specificity (Negative Recall): {specificity:.2f}")

    auc = roc_auc_score(all_labels, all_probs)
    print(f"AUC: {auc:.3f}")

    fpr, tpr, _ = roc_curve(all_labels, all_probs)
    plt.figure()
    plt.plot(fpr, tpr, label=f"AUC = {auc:.3f}")
    plt.plot([0,1],[0,1],'k--')
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC Curve")
    plt.legend(loc="lower right")
    plt.show()

    print("Classification Report:\n", classification_report(all_labels, all_preds, target_names=class_names))

# ==========================
#  Create the ResNet18 model
# ==========================
def create_model():
    model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, 2)  # Binary classification
    model.to(device)
    return model

# ==========================
# Baseline model
# ==========================
baseline_model = create_model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(baseline_model.parameters(), lr=0.001)

print("🔹 Training Baseline Model...")
train_model(baseline_model, train_loader, criterion, optimizer, epochs=5)

print("🔹 Evaluating Baseline Model...")
evaluate_detailed(baseline_model, test_loader)

# ==========================
#  Oversampling model
# ==========================
targets = np.array([label for _, label in train_data.imgs])
class_counts = np.bincount(targets)
class_weights = 1. / class_counts
weights = class_weights[targets]

sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)
train_loader_os = DataLoader(train_data, batch_size=32, sampler=sampler)

oversample_model = create_model()
optimizer_os = optim.Adam(oversample_model.parameters(), lr=0.001)

print("🔹 Training Oversampling Model...")
train_model(oversample_model, train_loader_os, criterion, optimizer_os, epochs=5)

print("🔹 Evaluating Oversampling Model...")
evaluate_detailed(oversample_model, test_loader)


