In [23]:
import os
import torch
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from sklearn.model_selection import StratifiedKFold
from PIL import Image
import numpy as np

# === CONFIG ===
image_dir = '../brain_tumor/data/images'
label_dir = '../brain_tumor/data/labels'
k_folds = 5
batch_size = 16
num_epochs = 10
learning_rate = 1e-4

# === Build List of Samples with Labels for Stratification ===
all_data = []
all_labels = []

for filename in sorted(os.listdir(label_dir)):
    if filename.endswith('.txt'):
        label_path = os.path.join(label_dir, filename)
        image_name = os.path.splitext(filename)[0] + '.jpg'  # change if you use .jpg
        image_path = os.path.join(image_dir, image_name)

        if os.path.exists(image_path):
            with open(label_path, 'r') as f:
                class_label = int(f.readline().split()[0])  # Read first value: 0 or 1
                all_data.append((image_path, label_path))
                all_labels.append(class_label)

print(f"Total samples: {len(all_data)}")
print(f"Class distribution: {sum(all_labels)} positive, {len(all_labels) - sum(all_labels)} negative")

Total samples: 878
Class distribution: 459 positive, 419 negative


In [25]:
import pandas as pd

# Load from CSV
csv_path = '../brain_tumor/data/mapped_data/mapped_data.csv'  
df = pd.read_csv(csv_path)

# Build lists for data + labels
all_data = []
all_labels = []

for _, row in df.iterrows():
    image_path = row['Image_Path']
    label_path = row['Label_Path']

    if os.path.exists(image_path) and os.path.exists(label_path):
        with open(label_path, 'r') as f:
            class_label = int(f.readline().split()[0])  # Read first value: 0 or 1
            all_data.append((image_path, label_path))
            all_labels.append(class_label)

print(f"Total samples: {len(all_data)}")
print(f"Class distribution: {sum(all_labels)} positive, {len(all_labels) - sum(all_labels)} negative")

Total samples: 4212
Class distribution: 2220 positive, 1992 negative


In [26]:

class TumorDataset(Dataset):
    def __init__(self, data_pairs, transform=None):
        self.data_pairs = data_pairs
        self.transform = transform

    def __len__(self):
        return len(self.data_pairs)

    def __getitem__(self, idx):
        image_path, label_path = self.data_pairs[idx]
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        with open(label_path, 'r') as f:
            label = list(map(float, f.readline().split()))
            label = torch.tensor(label, dtype=torch.float32)

        return image, label

# === Transform ===
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# === Define CNN Model ===
import torch.nn as nn

class TumorCNN(nn.Module):
    def __init__(self):
        super(TumorCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 56 * 56, 128), nn.ReLU(),
            nn.Linear(128, 5)  # [class_prob, x_center, y_center, width, height]
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)

from sklearn.model_selection import train_test_split

# Reserve 20% as a final test set
train_val_data, test_data, train_val_labels, test_labels = train_test_split(
    all_data, all_labels, test_size=0.2, stratify=all_labels, random_state=42
)


# === Stratified K-Fold Training ===
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

for fold, (train_idx, val_idx) in enumerate(skf.split(train_val_data, train_val_labels)):
    print(f"\n🔁 Fold {fold+1}")

    train_data = [train_val_data[i] for i in train_idx]
    val_data = [train_val_data[i] for i in val_idx]

    train_dataset = TumorDataset(train_data, transform=transform)
    val_dataset = TumorDataset(val_data, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    model = TumorCNN()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion_class = nn.BCEWithLogitsLoss()
    criterion_bbox = nn.MSELoss()

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        for images, labels in train_loader:
            outputs = model(images)

            pred_class = outputs[:, 0]
            pred_bbox = outputs[:, 1:]
            true_class = labels[:, 0]
            true_bbox = labels[:, 1:]

            loss = criterion_class(pred_class, true_class) + criterion_bbox(pred_bbox, true_bbox)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {avg_loss:.4f}")

    # 💾 Save trained model for this fold
    torch.save(model.state_dict(), f"model_fold_{fold+1}.pth")

    # ✅ === Validation Accuracy Calculation ===
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            pred_class = torch.sigmoid(outputs[:, 0])
            predicted = (pred_class > 0.5).float()
            true_class = labels[:, 0]

            correct += (predicted == true_class).sum().item()
            total += true_class.size(0)

    accuracy = correct / total
    print(f"✅ Fold {fold+1} Validation Accuracy: {accuracy:.4f}")


🔁 Fold 1
Epoch 1/10 - Loss: 0.7122
Epoch 2/10 - Loss: 0.5849
Epoch 3/10 - Loss: 0.4539
Epoch 4/10 - Loss: 0.3615
Epoch 5/10 - Loss: 0.3174
Epoch 6/10 - Loss: 0.2522
Epoch 7/10 - Loss: 0.2127
Epoch 8/10 - Loss: 0.1711
Epoch 9/10 - Loss: 0.1495
Epoch 10/10 - Loss: 0.1127
✅ Fold 1 Validation Accuracy: 0.9614

🔁 Fold 2
Epoch 1/10 - Loss: 0.7252
Epoch 2/10 - Loss: 0.6975
Epoch 3/10 - Loss: 0.6180
Epoch 4/10 - Loss: 0.5036
Epoch 5/10 - Loss: 0.3972
Epoch 6/10 - Loss: 0.3158
Epoch 7/10 - Loss: 0.2447
Epoch 8/10 - Loss: 0.1948
Epoch 9/10 - Loss: 0.1488
Epoch 10/10 - Loss: 0.1183
✅ Fold 2 Validation Accuracy: 0.9674

🔁 Fold 3
Epoch 1/10 - Loss: 0.7142
Epoch 2/10 - Loss: 0.6273
Epoch 3/10 - Loss: 0.4878
Epoch 4/10 - Loss: 0.3803
Epoch 5/10 - Loss: 0.3014
Epoch 6/10 - Loss: 0.2527
Epoch 7/10 - Loss: 0.1842
Epoch 8/10 - Loss: 0.1456
Epoch 9/10 - Loss: 0.1083
Epoch 10/10 - Loss: 0.0899
✅ Fold 3 Validation Accuracy: 0.9777

🔁 Fold 4
Epoch 1/10 - Loss: 0.7086
Epoch 2/10 - Loss: 0.5960
Epoch 3/10 - L

In [27]:
from sklearn.metrics import accuracy_score, f1_score, recall_score
import numpy as np

fold_accuracies = []
fold_f1s = []
fold_recalls = []

for fold, (train_idx, val_idx) in enumerate(skf.split(train_val_data, train_val_labels)):
    print(f"\n📂 Evaluating Fold {fold+1}")

    val_data = [train_val_data[i] for i in val_idx]
    val_dataset = TumorDataset(val_data, transform=transform)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    # Load model
    model = TumorCNN()
    model.load_state_dict(torch.load(f"model_fold_{fold+1}.pth"))
    model.eval()

    all_preds = []
    all_targets = []

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            pred_class = torch.sigmoid(outputs[:, 0])
            predicted = (pred_class > 0.5).float()
            true_class = labels[:, 0]

            all_preds.extend(predicted.numpy())
            all_targets.extend(true_class.numpy())

    # Metrics
    accuracy = accuracy_score(all_targets, all_preds)
    f1 = f1_score(all_targets, all_preds)
    recall = recall_score(all_targets, all_preds)

    fold_accuracies.append(accuracy)
    fold_f1s.append(f1)
    fold_recalls.append(recall)

    print(f"✅ Fold {fold+1} Accuracy: {accuracy:.4f} | F1: {f1:.4f} | Recall: {recall:.4f}")

# Final Summary
print("\n📊 Cross-Validation Results (from saved models):")
print(f"Mean Accuracy: {np.mean(fold_accuracies):.4f}")
print(f"Mean F1 Score: {np.mean(fold_f1s):.4f}")
print(f"Mean Recall: {np.mean(fold_recalls):.4f}")



📂 Evaluating Fold 1
✅ Fold 1 Accuracy: 0.9614 | F1: 0.9625 | Recall: 0.9408

📂 Evaluating Fold 2
✅ Fold 2 Accuracy: 0.9674 | F1: 0.9694 | Recall: 0.9803

📂 Evaluating Fold 3
✅ Fold 3 Accuracy: 0.9777 | F1: 0.9790 | Recall: 0.9859

📂 Evaluating Fold 4
✅ Fold 4 Accuracy: 0.9926 | F1: 0.9930 | Recall: 0.9972

📂 Evaluating Fold 5
✅ Fold 5 Accuracy: 0.9807 | F1: 0.9816 | Recall: 0.9775

📊 Cross-Validation Results (from saved models):
Mean Accuracy: 0.9760
Mean F1 Score: 0.9771
Mean Recall: 0.9763


In [28]:
class TumorTestDataset(Dataset):
    def __init__(self, csv_path, transform=None):
        self.data = pd.read_csv(csv_path)
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx]["Image_Path"]
        label_path = self.data.iloc[idx]["Label_Path"]

        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        with open(label_path, 'r') as f:
            label = list(map(float, f.readline().split()))
            label = torch.tensor(label, dtype=torch.float32)

        return image, label


In [None]:
test_csv_path = "../brain_tumor/data/mapped_data/mapped_testing_data.csv"

test_dataset = TumorTestDataset(test_csv_path, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=8)

# Load the best model (e.g., Fold 4)
model = TumorCNN()
model.load_state_dict(torch.load("model_fold_4.pth"))
model.eval()


TumorCNN(
  (features): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=100352, out_features=128, bias=True)
    (2): ReLU()
    (3): Linear(in_features=128, out_features=5, bias=True)
  )
)

In [46]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

all_bbox_preds = []
all_bbox_targets = []

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        pred_class = torch.sigmoid(outputs[:, 0])
        pred_bbox = outputs[:, 1:]
        
        true_class = labels[:, 0]
        true_bbox = labels[:, 1:]

        all_preds.extend((pred_class > 0.5).float().numpy())
        all_targets.extend(true_class.numpy())
        
        all_bbox_preds.extend(pred_bbox.numpy())
        all_bbox_targets.extend(true_bbox.numpy())

# Classification Metrics
acc = accuracy_score(all_targets, all_preds)
f1 = f1_score(all_targets, all_preds)
recall = recall_score(all_targets, all_preds)

# Bounding Box Metrics
mse = mean_squared_error(all_bbox_targets, all_bbox_preds)
mae = mean_absolute_error(all_bbox_targets, all_bbox_preds)

print("\n🧪 Final Test Set Results (Classification):")
print(f"Accuracy: {acc:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Recall: {recall:.4f}")

print("\n📦 Final Test Set Results (Bounding Box):")
print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")



🧪 Final Test Set Results (Classification):
Accuracy: 0.7614
F1 Score: 0.7742
Recall: 0.8090

📦 Final Test Set Results (Bounding Box):
MSE: 0.0334
MAE: 0.1341


In [47]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os

def export_prediction_report(model, dataset, save_folder="test_report", num_samples=10):
    os.makedirs(save_folder, exist_ok=True)
    model.eval()

    for i in range(min(num_samples, len(dataset))):
        image, label = dataset[i]
        image_batch = image.unsqueeze(0)
        
        with torch.no_grad():
            output = model(image_batch)
            pred_class_prob = torch.sigmoid(output[:, 0]).item()
            pred_class = 1 if pred_class_prob > 0.5 else 0
            pred_bbox = output[:, 1:].squeeze().numpy()

        true_class = int(label[0].item())
        true_bbox = label[1:].numpy()

        # Convert bbox from (xc, yc, w, h) to (x, y, w, h)
        def denorm_bbox(bbox):
            x_c, y_c, w, h = bbox
            x = (x_c - w/2) * 224
            y = (y_c - h/2) * 224
            return x, y, w * 224, h * 224

        pred_rect = denorm_bbox(pred_bbox)
        true_rect = denorm_bbox(true_bbox)

        img_np = image.permute(1, 2, 0).numpy()

        fig, ax = plt.subplots(1, 1, figsize=(5, 5))
        ax.imshow(img_np, cmap="gray")
        ax.axis("off")

        ax.add_patch(patches.Rectangle(
            (pred_rect[0], pred_rect[1]), pred_rect[2], pred_rect[3],
            edgecolor="green", linewidth=2, fill=False, label="Predicted"
        ))

        ax.add_patch(patches.Rectangle(
            (true_rect[0], true_rect[1]), true_rect[2], true_rect[3],
            edgecolor="red", linewidth=2, fill=False, label="Ground Truth"
        ))

        ax.set_title(f"Pred: {'Tumor' if pred_class else 'No Tumor'} ({pred_class_prob:.2f}) | "
                     f"Actual: {'Tumor' if true_class else 'No Tumor'}", fontsize=10)
        ax.legend()

        plt.tight_layout()
        save_path = os.path.join(save_folder, f"sample_{i+1}.png")
        plt.savefig(save_path)
        plt.close()
    
    print(f"✅ Saved {num_samples} prediction images to: {save_folder}")


In [48]:
# Save 10 test samples with predictions
export_prediction_report(model, test_dataset, save_folder="test_report", num_samples=10)


✅ Saved 10 prediction images to: test_report
