In [None]:
import nibabel as nib
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torchvision import transforms, models
import os
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import confusion_matrix
import seaborn as sns
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Define paths to your dataset
data_dir = "/kaggle/input/all-zendo-dataset/DATA/"
t2wi_dir = os.path.join(data_dir, "T2WI")
csv_path = "/kaggle/input/all-zendo-dataset/DATA/all_centers_combined.csv"

# Load the CSV file
df = pd.read_csv(csv_path)
df['image_name'] = df['image_name'].str.replace('.nii.gz', '.nii')
print("CSV Columns:", df.columns)
print("First few rows of CSV:")
print(df.head())

# Function to load a NIfTI file
def load_nifti(file_path):
    nifti = nib.load(file_path)
    return nifti.get_fdata()

# Pair T2WI images with their labels from the CSV
image_label_pairs = []
for t2wi_subfolder in os.listdir(t2wi_dir):
    subfolder_path = os.path.join(t2wi_dir, t2wi_subfolder)
    if not os.path.isdir(subfolder_path):
        continue
    t2wi_files = [f for f in os.listdir(subfolder_path) if f.endswith(".nii")]
    if t2wi_files:
        t2wi_path = os.path.join(subfolder_path, t2wi_files[0])
        t2wi_id = t2wi_subfolder.split(".")[0]
        label_row = df[df['image_name'].str.replace('.nii', '') == t2wi_id]
        if not label_row.empty:
            label = label_row['label'].iloc[0]
            image_label_pairs.append((t2wi_path, label))
        else:
            print(f"No label found for T2WI ID {t2wi_id}")

print("Paired T2WI-Label pairs:", len(image_label_pairs))
for t2wi_path, label in image_label_pairs[:5]:
    print(f"T2WI: {t2wi_path}, Label: {label}")

# Compute class weights to handle imbalance
labels = [pair[1] for pair in image_label_pairs]
nmbic_count = labels.count(0)
mbic_count = labels.count(1)
total = nmbic_count + mbic_count
class_weights = torch.tensor([total / (2 * nmbic_count), total / (2 * mbic_count)]).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
print(f"Class Weights (NMBIC, MBIC): {class_weights}")

# Define data augmentation and transforms
train_transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(limit=30, p=0.5),
    A.RandomBrightnessContrast(p=0.3),
    A.Normalize(mean=0.5, std=0.5),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(256, 256),
    A.Normalize(mean=0.5, std=0.5),
    ToTensorV2(),
])

# Custom Dataset class for classification
class BladderCancerDataset(Dataset):
    def __init__(self, image_label_pairs, transform=None):
        self.image_label_pairs = image_label_pairs
        self.transform = transform
        self.slices = []
        for idx, (img_path, label) in enumerate(self.image_label_pairs):
            try:
                img = load_nifti(img_path)
                img = (img - img.min()) / (img.max() - img.min() + 1e-8)
                middle_slice = img.shape[2] // 2
                self.slices.append((idx, middle_slice, img, label))
            except Exception as e:
                print(f"Error loading {img_path}: {e}")

    def __len__(self):
        return len(self.slices)

    def __getitem__(self, idx):
        img_idx, z, img, label = self.slices[idx]
        img_slice = img[:, :, z]
        if self.transform:
            augmented = self.transform(image=img_slice)
            img_slice = augmented['image']
        else:
            img_slice = torch.FloatTensor(img_slice).unsqueeze(0)
        label = torch.LongTensor([label])
        return img_slice, label

    def get_pair_index(self, idx):
        return self.slices[idx][0]

# Create datasets with transforms
train_dataset = BladderCancerDataset(image_label_pairs, transform=train_transform)
val_dataset = BladderCancerDataset(image_label_pairs, transform=val_transform)

# Split at the pair level
pair_indices = list(range(len(image_label_pairs)))
train_pair_indices, val_pair_indices = train_test_split(pair_indices, test_size=0.2, random_state=42)
train_indices, val_indices = [], []
for slice_idx in range(len(train_dataset)):
    pair_idx = train_dataset.get_pair_index(slice_idx)
    if pair_idx in train_pair_indices:
        train_indices.append(slice_idx)
    elif pair_idx in val_pair_indices:
        val_indices.append(slice_idx)

# Create data loaders
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)
train_loader = DataLoader(train_dataset, batch_size=8, sampler=train_sampler)
val_loader = DataLoader(val_dataset, batch_size=8, sampler=val_sampler)

# Use a pretrained ResNet50 model
model = models.resnet50(pretrained=True)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.6),
    nn.Linear(512, 2)
)

# Move model to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define loss, optimizer, and scheduler with weight decay
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=0.00005, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)

# Training loop with validation and early stopping
num_epochs = 100
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []
best_val_loss = float('inf')
patience = 10
early_stop_counter = 0

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    for images_batch, labels_batch in train_loader:
        images_batch, labels_batch = images_batch.to(device), labels_batch.to(device).squeeze(1)
        outputs = model(images_batch)
        loss = criterion(outputs, labels_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        train_total += labels_batch.size(0)
        train_correct += (predicted == labels_batch).sum().item()
    train_loss /= len(train_loader)
    train_accuracy = train_correct / train_total
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)

    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images_batch, labels_batch in val_loader:
            images_batch, labels_batch = images_batch.to(device), labels_batch.to(device).squeeze(1)
            outputs = model(images_batch)
            loss = criterion(outputs, labels_batch)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            val_total += labels_batch.size(0)
            val_correct += (predicted == labels_batch).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels_batch.cpu().numpy())
    val_loss /= len(val_loader)
    val_accuracy = val_correct / val_total
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")
    
    scheduler.step(val_loss)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0
        torch.save(model.state_dict(), "best_model.pth")
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print("Early stopping triggered!")
            break

# Load the best model
model.load_state_dict(torch.load("best_model.pth"))

# Plot training and validation accuracy/loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, len(train_accuracies) + 1), train_accuracies, label='Train Accuracy')
plt.plot(range(1, len(val_accuracies) + 1), val_accuracies, label='Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.grid()
plt.subplot(1, 2, 2)
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()

# Generate and plot confusion matrix
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['NMBIC', 'MBIC'], yticklabels=['NMBIC', 'MBIC'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Visualize predictions on validation set
model.eval()
val_iter = iter(val_loader)
images, labels = next(val_iter)
images, labels = images.to(device), labels.to(device).squeeze(1)
with torch.no_grad():
    outputs = model(images)
    _, preds = torch.max(outputs, 1)
images, labels, preds = images.cpu(), labels.cpu(), preds.cpu()
num_samples = min(15, len(images))
plt.figure(figsize=(15, 4 * num_samples))
for i in range(num_samples):
    plt.subplot(num_samples, 2, 2 * i + 1)
    plt.imshow(images[i, 0], cmap="gray")
    plt.title(f"Label: {'MBIC' if labels[i] == 1 else 'NMBIC'}")
    plt.axis("off")
    plt.subplot(num_samples, 2, 2 * i + 2)
    plt.imshow(images[i, 0], cmap="gray")
    plt.title(f"Predicted: {'MBIC' if preds[i] == 1 else 'NMBIC'}")
    plt.axis("off")
plt.tight_layout()
plt.show()

In [None]:
import nibabel as nib
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import os
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import confusion_matrix
import seaborn as sns
from monai.transforms import Compose, LoadImaged, Resize, RandFlipd, RandRotate90d, NormalizeIntensityd, ToTensord
from monai.networks.nets import BasicUNet
from monai.data import Dataset as MonaiDataset

# Define paths to your dataset
data_dir = "/kaggle/input/all-zendo-dataset/DATA/"
t2wi_dir = os.path.join(data_dir, "T2WI")
csv_path = "/kaggle/input/all-zendo-dataset/DATA/all_centers_combined.csv"

# Load the CSV file
df = pd.read_csv(csv_path)
df['image_name'] = df['image_name'].str.replace('.nii.gz', '.nii')
print("CSV Columns:", df.columns)
print("First few rows of CSV:")
print(df.head())

# Function to load a NIfTI file
def load_nifti(file_path):
    nifti = nib.load(file_path)
    return nifti.get_fdata()

# Pair T2WI images with their labels from the CSV
image_label_pairs = []
for t2wi_subfolder in os.listdir(t2wi_dir):
    subfolder_path = os.path.join(t2wi_dir, t2wi_subfolder)
    if not os.path.isdir(subfolder_path):
        continue
    t2wi_files = [f for f in os.listdir(subfolder_path) if f.endswith(".nii")]
    if t2wi_files:
        t2wi_path = os.path.join(subfolder_path, t2wi_files[0])
        t2wi_id = t2wi_subfolder.split(".")[0]
        label_row = df[df['image_name'].str.replace('.nii', '') == t2wi_id]
        if not label_row.empty:
            label = label_row['label'].iloc[0]
            image_label_pairs.append((t2wi_path, label))
        else:
            print(f"No label found for T2WI ID {t2wi_id}")

print("Paired T2WI-Label pairs:", len(image_label_pairs))
for t2wi_path, label in image_label_pairs[:5]:
    print(f"T2WI: {t2wi_path}, Label: {label}")

# Compute class weights to handle imbalance
labels = [pair[1] for pair in image_label_pairs]
nmbic_count = labels.count(0)
mbic_count = labels.count(1)
total = nmbic_count + mbic_count
class_weights = torch.tensor([total / (2 * nmbic_count), total / (2 * mbic_count)]).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
print(f"Class Weights (NMBIC, MBIC): {class_weights}")

# Define 3D data augmentation and transforms using MONAI
train_transforms = Compose([
    LoadImaged(keys=["image"]),
    Resize(spatial_size=(64, 64, 64), keys=["image"]),
    RandFlipd(keys=["image"], prob=0.5, spatial_axis=0),
    RandFlipd(keys=["image"], prob=0.5, spatial_axis=1),
    RandRotate90d(keys=["image"], prob=0.5, max_k=3),
    NormalizeIntensityd(keys=["image"]),
    ToTensord(keys=["image", "label"]),
])

val_transforms = Compose([
    LoadImaged(keys=["image"]),
    Resize(spatial_size=(64, 64, 64), keys=["image"]),
    NormalizeIntensityd(keys=["image"]),
    ToTensord(keys=["image", "label"]),
])

# Custom Dataset class for 3D classification
class BladderCancerDataset(MonaiDataset):
    def __init__(self, image_label_pairs, transforms=None):
        self.data = [{"image": img_path, "label": label} for img_path, label in image_label_pairs]
        super().__init__(data=self.data, transform=transforms)

# Create datasets with transforms
train_dataset = BladderCancerDataset(image_label_pairs, transforms=train_transforms)
val_dataset = BladderCancerDataset(image_label_pairs, transforms=val_transforms)

# Split at the pair level
pair_indices = list(range(len(image_label_pairs)))
train_pair_indices, val_pair_indices = train_test_split(pair_indices, test_size=0.2, random_state=42)
train_sampler = SubsetRandomSampler(train_pair_indices)
val_sampler = SubsetRandomSampler(val_pair_indices)

# Create data loaders with small batch size
train_loader = DataLoader(train_dataset, batch_size=2, sampler=train_sampler, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=2, sampler=val_sampler, num_workers=0)

# Define a lightweight 3D CNN model
class Simple3DCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(Simple3DCNN, self).__init__()
        self.conv1 = nn.Conv3d(1, 16, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool3d(2)
        self.conv2 = nn.Conv3d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv3d(32, 64, kernel_size=3, padding=1)
        self.global_pool = nn.AdaptiveAvgPool3d(1)
        self.fc1 = nn.Linear(64, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Initialize model
model = Simple3DCNN(num_classes=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define loss, optimizer, and scheduler
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)

# Gradient accumulation for memory efficiency
accumulation_steps = 4

# Training loop with validation and early stopping
num_epochs = 20  # Reduced for resource efficiency
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []
best_val_loss = float('inf')
patience = 5
early_stop_counter = 0

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    optimizer.zero_grad()
    for i, batch in enumerate(train_loader):
        images = batch["image"].to(device)
        labels = batch["label"].to(device).squeeze()
        outputs = model(images)
        loss = criterion(outputs, labels) / accumulation_steps
        loss.backward()
        train_loss += loss.item() * accumulation_steps
        if (i + 1) % accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
        _, predicted = torch.max(outputs, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
    train_loss /= len(train_loader)
    train_accuracy = train_correct / train_total
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)

    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in val_loader:
            images = batch["image"].to(device)
            labels = batch["label"].to(device).squeeze()
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    val_loss /= len(val_loader)
    val_accuracy = val_correct / val_total
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")
    
    scheduler.step(val_loss)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0
        torch.save(model.state_dict(), "best_model.pth")
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print("Early stopping triggered!")
            break

# Load the best model
model.load_state_dict(torch.load("best_model.pth"))

# Plot training and validation accuracy/loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, len(train_accuracies) + 1), train_accuracies, label='Train Accuracy')
plt.plot(range(1, len(val_accuracies) + 1), val_accuracies, label='Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.grid()
plt.subplot(1, 2, 2)
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid()
plt.savefig('training_plots.png')
plt.close()

# Generate and plot confusion matrix
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['NMBIC', 'MBIC'], yticklabels=['NMBIC', 'MBIC'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.savefig('confusion_matrix.png')
plt.close()

# Visualize predictions on validation set (middle slice of 3D volume)
model.eval()
val_iter = iter(val_loader)
batch = next(val_iter)
images = batch["image"].to(device)
labels = batch["label"].to(device).squeeze()
with torch.no_grad():
    outputs = model(images)
    _, preds = torch.max(outputs, 1)
images, labels, preds = images.cpu(), labels.cpu(), preds.cpu()
num_samples = min(4, len(images))  # Reduced for resource efficiency
plt.figure(figsize=(15, 4 * num_samples))
for i in range(num_samples):
    middle_slice = images[i, 0, :, :, images.shape[4] // 2]  # Middle slice along z-axis
    plt.subplot(num_samples, 2, 2 * i + 1)
    plt.imshow(middle_slice, cmap="gray")
    plt.title(f"Label: {'MBIC' if labels[i] == 1 else 'NMBIC'}")
    plt.axis("off")
    plt.subplot(num_samples, 2, 2 * i + 2)
    plt.imshow(middle_slice, cmap="gray")
    plt.title(f"Predicted: {'MBIC' if preds[i] == 1 else 'NMBIC'}")
    plt.axis("off")
plt.tight_layout()
plt.savefig('predictions.png')
plt.close()



In [1]:
import os
import numpy as np
import tensorflow as tf
import keras
from keras import layers
import nibabel as nib
from scipy import ndimage
import pandas as pd
import matplotlib.pyplot as plt

# Define paths to your dataset
data_dir = "/kaggle/input/all-zendo-dataset/DATA/"
t2wi_dir = os.path.join(data_dir, "T2WI")
csv_path = "/kaggle/input/all-zendo-dataset/DATA/all_centers_combined.csv"

# Load the CSV file
df = pd.read_csv(csv_path)
df['image_name'] = df['image_name'].str.replace('.nii.gz', '.nii')

# Helper functions for preprocessing
def read_nifti_file(filepath):
    """Read and load volume"""
    scan = nib.load(filepath)
    scan = scan.get_fdata()
    return scan

def normalize(volume):
    """Normalize the volume"""
    min_hu = -1000
    max_hu = 400
    volume[volume < min_hu] = min_hu
    volume[volume > max_hu] = max_hu
    volume = (volume - min_hu) / (max_hu - min_hu)
    volume = volume.astype("float32")
    return volume

def resize_volume(img):
    """Resize across z-axis"""
    desired_depth = 64
    desired_width = 128
    desired_height = 128
    current_depth = img.shape[-1]
    current_width = img.shape[0]
    current_height = img.shape[1]
    depth = current_depth / desired_depth
    width = current_width / desired_width
    height = current_height / desired_height
    depth_factor = 1 / depth
    width_factor = 1 / width
    height_factor = 1 / height
    img = ndimage.rotate(img, 90, reshape=False)
    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
    return img

def process_scan(path):
    """Read and resize volume"""
    volume = read_nifti_file(path)
    volume = normalize(volume)
    volume = resize_volume(volume)
    return volume

# Load and pair T2WI images with labels
nmbic_scan_paths = []
mbic_scan_paths = []
for t2wi_subfolder in os.listdir(t2wi_dir):
    subfolder_path = os.path.join(t2wi_dir, t2wi_subfolder)
    if not os.path.isdir(subfolder_path):
        continue
    t2wi_files = [f for f in os.listdir(subfolder_path) if f.endswith(".nii")]
    if t2wi_files:
        t2wi_path = os.path.join(subfolder_path, t2wi_files[0])
        t2wi_id = t2wi_subfolder.split(".")[0]
        label_row = df[df['image_name'].str.replace('.nii', '') == t2wi_id]
        if not label_row.empty:
            label = label_row['label'].iloc[0]
            if label == 0:
                nmbic_scan_paths.append(t2wi_path)
            else:
                mbic_scan_paths.append(t2wi_path)
        else:
            print(f"No label found for T2WI ID {t2wi_id}")

print(f"NMBIC scans: {len(nmbic_scan_paths)}")
print(f"MBIC scans: {len(mbic_scan_paths)}")

# Process scans
nmbic_scans = np.array([process_scan(path) for path in nmbic_scan_paths])
mbic_scans = np.array([process_scan(path) for path in mbic_scan_paths])

# Assign labels (0 for NMBIC, 1 for MBIC)
nmbic_labels = np.array([0 for _ in range(len(nmbic_scans))])
mbic_labels = np.array([1 for _ in range(len(mbic_scans))])

# Split data into training and validation (70-30)
x_train = np.concatenate((mbic_scans[:int(0.7 * len(mbic_scans))], nmbic_scans[:int(0.7 * len(nmbic_scans))]), axis=0)
y_train = np.concatenate((mbic_labels[:int(0.7 * len(mbic_labels))], nmbic_labels[:int(0.7 * len(nmbic_labels))]), axis=0)
x_val = np.concatenate((mbic_scans[int(0.7 * len(mbic_scans)):], nmbic_scans[int(0.7 * len(nmbic_scans)):]), axis=0)
y_val = np.concatenate((mbic_labels[int(0.7 * len(mbic_labels)):], nmbic_labels[int(0.7 * len(nmbic_labels)):]), axis=0)

print(f"Number of samples in train and validation are {x_train.shape[0]} and {x_val.shape[0]}.")

# Data augmentation
def rotate(volume):
    """Rotate the volume by a few degrees"""
    def scipy_rotate(volume):
        angles = [-20, -10, -5, 5, 10, 20]
        angle = np.random.choice(angles)
        volume = ndimage.rotate(volume, angle, reshape=False)
        volume[volume < 0] = 0
        volume[volume > 1] = 1
        return volume
    augmented_volume = tf.numpy_function(scipy_rotate, [volume], tf.float32)
    return augmented_volume

def train_preprocessing(volume, label):
    """Process training data by rotating and adding a channel."""
    volume = rotate(volume)
    volume = tf.expand_dims(volume, axis=3)
    return volume, label

def validation_preprocessing(volume, label):
    """Process validation data by only adding a channel."""
    volume = tf.expand_dims(volume, axis=3)
    return volume, label

# Define data loaders
train_loader = tf.data.Dataset.from_tensor_slices((x_train, y_train))
validation_loader = tf.data.Dataset.from_tensor_slices((x_val, y_val))

batch_size = 2
train_dataset = (
    train_loader.shuffle(len(x_train))
    .map(train_preprocessing, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(batch_size)
    .prefetch(2)
)
validation_dataset = (
    validation_loader.shuffle(len(x_val))
    .map(validation_preprocessing, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(batch_size)
    .prefetch(2)
)

# Visualize an augmented CT scan
data = train_dataset.take(1)
images, labels = list(data)[0]
images = images.numpy()
image = images[0]
print("Dimension of the MRI scan is:", image.shape)
plt.figure()
plt.imshow(np.squeeze(image[:, :, 30]), cmap="gray")
plt.savefig('sample_mri_scan.png')
plt.close()

# Visualize montage of slices
def plot_slices(num_rows, num_columns, width, height, data):
    """Plot a montage of MRI slices"""
    data = np.rot90(np.array(data))
    data = np.transpose(data)
    data = np.reshape(data, (num_rows, num_columns, width, height))
    rows_data, columns_data = data.shape[0], data.shape[1]
    heights = [slc[0].shape[0] for slc in data]
    widths = [slc.shape[1] for slc in data[0]]
    fig_width = 12.0
    fig_height = fig_width * sum(heights) / sum(widths)
    f, axarr = plt.subplots(
        rows_data,
        columns_data,
        figsize=(fig_width, fig_height),
        gridspec_kw={"height_ratios": heights},
    )
    for i in range(rows_data):
        for j in range(columns_data):
            axarr[i, j].imshow(data[i][j], cmap="gray")
            axarr[i, j].axis("off")
    plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)
    plt.savefig('mri_slices_montage.png')
    plt.close()

plot_slices(4, 10, 128, 128, image[:, :, :40])

# Define 3D CNN model
def get_model(width=128, height=128, depth=64):
    """Build a 3D convolutional neural network model."""
    inputs = keras.Input((width, height, depth, 1))
    x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(units=512, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(units=1, activation="sigmoid")(x)
    model = keras.Model(inputs, outputs, name="3dcnn")
    return model

# Build and compile model
model = get_model(width=128, height=128, depth=64)
initial_learning_rate = 0.0001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
)
model.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
    metrics=["acc"],
    run_eagerly=True,
)

# Define callbacks
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    "3d_bladder_classification.keras", save_best_only=True
)
early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_acc", patience=15)

# Train the model
epochs = 100
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=epochs,
    shuffle=True,
    verbose=2,
    callbacks=[checkpoint_cb, early_stopping_cb],
)

# Plot model performance
fig, ax = plt.subplots(1, 2, figsize=(20, 3))
ax = ax.ravel()
for i, metric in enumerate(["acc", "loss"]):
    ax[i].plot(history.history[metric])
    ax[i].plot(history.history["val_" + metric])
    ax[i].set_title(f"Model {metric}")
    ax[i].set_xlabel("epochs")
    ax[i].set_ylabel(metric)
    ax[i].legend(["train", "val"])
plt.savefig('model_performance.png')
plt.close()

# Make predictions on a single MRI scan
model.load_weights("3d_bladder_classification.keras")
prediction = model.predict(np.expand_dims(x_val[0], axis=0))[0]
scores = [1 - prediction[0], prediction[0]]
class_names = ["NMBIC", "MBIC"]
for score, name in zip(scores, class_names):
    print(f"This model is {100 * score:.2f} percent confident that MRI scan is {name}")

2025-05-02 17:44:15.554255: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746207855.750275      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746207855.806680      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


NMBIC scans: 140
MBIC scans: 80
Number of samples in train and validation are 154 and 66.


I0000 00:00:1746208033.323191      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Dimension of the MRI scan is: (128, 128, 64, 1)
Epoch 1/100


I0000 00:00:1746208040.053168      31 cuda_dnn.cc:529] Loaded cuDNN version 90300


77/77 - 24s - 316ms/step - acc: 0.5195 - loss: 0.7545 - val_acc: 0.6364 - val_loss: 0.6581
Epoch 2/100
77/77 - 20s - 257ms/step - acc: 0.5714 - loss: 0.7323 - val_acc: 0.6364 - val_loss: 0.6907
Epoch 3/100
77/77 - 20s - 257ms/step - acc: 0.5844 - loss: 0.6903 - val_acc: 0.6364 - val_loss: 0.6583
Epoch 4/100
77/77 - 19s - 249ms/step - acc: 0.6104 - loss: 0.7198 - val_acc: 0.3636 - val_loss: 0.9983
Epoch 5/100
77/77 - 20s - 254ms/step - acc: 0.5584 - loss: 0.7036 - val_acc: 0.6364 - val_loss: 0.6782
Epoch 6/100
77/77 - 19s - 247ms/step - acc: 0.5844 - loss: 0.6846 - val_acc: 0.6364 - val_loss: 0.7025
Epoch 7/100
77/77 - 19s - 252ms/step - acc: 0.6104 - loss: 0.6865 - val_acc: 0.6364 - val_loss: 0.7736
Epoch 8/100
77/77 - 19s - 248ms/step - acc: 0.6234 - loss: 0.6758 - val_acc: 0.6364 - val_loss: 0.6688
Epoch 9/100
77/77 - 19s - 249ms/step - acc: 0.6429 - loss: 0.6559 - val_acc: 0.6364 - val_loss: 0.7816
Epoch 10/100
77/77 - 19s - 252ms/step - acc: 0.6104 - loss: 0.7005 - val_acc: 0.6515 

In [7]:
# Make predictions on a single MRI scan from a specified .nii file path
def predict_single_scan(model, nii_path):
    """Process and predict on a single .nii MRI scan."""
    # Process the scan
    volume = process_scan(nii_path)
    # Add batch and channel dimensions
    volume = np.expand_dims(volume, axis=0)  # Shape: (1, 128, 128, 64)
    volume = np.expand_dims(volume, axis=-1)  # Shape: (1, 128, 128, 64, 1)
    # Load best weights
    model.load_weights("3d_bladder_classification.keras")
    # Make prediction
    prediction = model.predict(volume)[0]
    scores = [1 - prediction[0], prediction[0]]
    class_names = ["NMBIC", "MBIC"]
    for score, name in zip(scores, class_names):
        print(f"This model is {100 * score:.2f} percent confident that MRI scan is {name}")

# Example usage: specify the path to your .nii file
nii_file_path = "/kaggle/input/testing/019 zhong guo ai.nii"  # Replace with actual .nii file path
predict_single_scan(model, nii_file_path)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
This model is 65.68 percent confident that MRI scan is NMBIC
This model is 34.32 percent confident that MRI scan is MBIC


In [3]:
import os
import numpy as np
import tensorflow as tf
import keras
from keras import layers
import nibabel as nib
from scipy import ndimage
import pandas as pd
import matplotlib.pyplot as plt

# Define paths to your dataset
data_dir = "/kaggle/input/all-zendo-dataset/DATA"
t2wi_dir = os.path.join(data_dir, "T2WI")


csv_path = "/kaggle/input/all-zendo-dataset/DATA/all_centers_combined.csv"

# Load the CSV file
df = pd.read_csv(csv_path)
# df = pd.read_excel(csv_path)
df['image_name'] = df['image_name'].str.replace('.nii.gz', '.nii')

# Helper functions for preprocessing
def read_nifti_file(filepath):
    """Read and load volume"""
    scan = nib.load(filepath)
    scan = scan.get_fdata()
    return scan

def normalize(volume):
    """Normalize the volume"""
    min_hu = -1000
    max_hu = 400
    volume[volume < min_hu] = min_hu
    volume[volume > max_hu] = max_hu
    volume = (volume - min_hu) / (max_hu - min_hu)
    volume = volume.astype("float32")
    return volume

def resize_volume(img):
    """Resize across z-axis"""
    desired_depth = 64
    desired_width = 128
    desired_height = 128
    current_depth = img.shape[-1]
    current_width = img.shape[0]
    current_height = img.shape[1]
    depth = current_depth / desired_depth
    width = current_width / desired_width
    height = current_height / desired_height
    depth_factor = 1 / depth
    width_factor = 1 / width
    height_factor = 1 / height
    img = ndimage.rotate(img, 90, reshape=False)
    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
    return img

def process_scan(path):
    """Read and resize volume"""
    volume = read_nifti_file(path)
    volume = normalize(volume)
    volume = resize_volume(volume)
    return volume

# Load and pair T2WI images with labels
nmbic_scan_paths = []
mbic_scan_paths = []
for t2wi_subfolder in os.listdir(t2wi_dir):
    subfolder_path = os.path.join(t2wi_dir, t2wi_subfolder)
    if not os.path.isdir(subfolder_path):
        continue
    t2wi_files = [f for f in os.listdir(subfolder_path) if f.endswith(".nii")]
    if t2wi_files:
        t2wi_path = os.path.join(subfolder_path, t2wi_files[0])
        t2wi_id = t2wi_subfolder.split(".")[0]
        label_row = df[df['image_name'].str.replace('.nii', '') == t2wi_id]
        if not label_row.empty:
            label = label_row['label'].iloc[0]
            if label == 0:
                nmbic_scan_paths.append(t2wi_path)
            else:
                mbic_scan_paths.append(t2wi_path)
        else:
            print(f"No label found for T2WI ID {t2wi_id}")

print(f"NMBIC scans: {len(nmbic_scan_paths)}")
print(f"MBIC scans: {len(mbic_scan_paths)}")

# Process scans
nmbic_scans = np.array([process_scan(path) for path in nmbic_scan_paths])
mbic_scans = np.array([process_scan(path) for path in mbic_scan_paths])

# Assign labels (0 for NMBIC, 1 for MBIC)
nmbic_labels = np.array([0 for _ in range(len(nmbic_scans))])
mbic_labels = np.array([1 for _ in range(len(mbic_scans))])

# Split data into training and validation (70-30)
x_train = np.concatenate((mbic_scans[:int(0.7 * len(mbic_scans))], nmbic_scans[:int(0.7 * len(nmbic_scans))]), axis=0)
y_train = np.concatenate((mbic_labels[:int(0.7 * len(mbic_labels))], nmbic_labels[:int(0.7 * len(nmbic_labels))]), axis=0)
x_val = np.concatenate((mbic_scans[int(0.7 * len(mbic_scans)):], nmbic_scans[int(0.7 * len(nmbic_scans)):]), axis=0)
y_val = np.concatenate((mbic_labels[int(0.7 * len(mbic_labels)):], nmbic_labels[int(0.7 * len(nmbic_labels)):]), axis=0)

print(f"Number of samples in train and validation are {x_train.shape[0]} and {x_val.shape[0]}.")

# Data augmentation
def rotate(volume):
    """Rotate the volume by a few degrees"""
    def scipy_rotate(volume):
        angles = [-20, -10, -5, 5, 10, 20]
        angle = np.random.choice(angles)
        volume = ndimage.rotate(volume, angle, reshape=False)
        volume[volume < 0] = 0
        volume[volume > 1] = 1
        return volume
    augmented_volume = tf.numpy_function(scipy_rotate, [volume], tf.float32)
    return augmented_volume

def train_preprocessing(volume, label):
    """Process training data by rotating and adding a channel."""
    volume = rotate(volume)
    volume = tf.expand_dims(volume, axis=3)
    return volume, label

def validation_preprocessing(volume, label):
    """Process validation data by only adding a channel."""
    volume = tf.expand_dims(volume, axis=3)
    return volume, label

# Define data loaders
train_loader = tf.data.Dataset.from_tensor_slices((x_train, y_train))
validation_loader = tf.data.Dataset.from_tensor_slices((x_val, y_val))

batch_size = 2
train_dataset = (
    train_loader.shuffle(len(x_train))
    .map(train_preprocessing, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(batch_size)
    .prefetch(2)
)
validation_dataset = (
    validation_loader.shuffle(len(x_val))
    .map(validation_preprocessing, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(batch_size)
    .prefetch(2)
)

NMBIC scans: 140
MBIC scans: 80
Number of samples in train and validation are 154 and 66.


I0000 00:00:1746217848.624564      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


In [15]:
# # Visualize an augmented CT scan
# data = train_dataset.take(1)
# images, labels = list(data)[0]
# images = images.numpy()
# image = images[0]
# print("Dimension of the MRI scan is:", image.shape)
# plt.figure()
# plt.imshow(np.squeeze(image[:, :, 30]), cmap="gray")
# plt.close()

# # Visualize montage of slices
# def plot_slices(num_rows, num_columns, width, height, data):
#     """Plot a montage of MRI slices"""
#     data = np.rot90(np.array(data))
#     data = np.transpose(data)
#     data = np.reshape(data, (num_rows, num_columns, width, height))
#     rows_data, columns_data = data.shape[0], data.shape[1]
#     heights = [slc[0].shape[0] for slc in data]
#     widths = [slc.shape[1] for slc in data[0]]
#     fig_width = 12.0
#     fig_height = fig_width * sum(heights) / sum(widths)
#     f, axarr = plt.subplots(
#         rows_data,
#         columns_data,
#         figsize=(fig_width, fig_height),
#         gridspec_kw={"height_ratios": heights},
#     )
#     for i in range(rows_data):
#         for j in range(columns_data):
#             axarr[i, j].imshow(data[i][j], cmap="gray")
#             axarr[i, j].axis("off")
#     plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)
#     plt.savefig('mri_slices_montage.png')
#     plt.close()

# plot_slices(4, 10, 128, 128, image[:, :, :40])

# # Define 3D CNN model
# def get_model(width=128, height=128, depth=64):
#     """Build a 3D convolutional neural network model."""
#     inputs = keras.Input((width, height, depth, 1))
#     x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
#     x = layers.MaxPool3D(pool_size=2)(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
#     x = layers.MaxPool3D(pool_size=2)(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
#     x = layers.MaxPool3D(pool_size=2)(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
#     x = layers.MaxPool3D(pool_size=2)(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.GlobalAveragePooling3D()(x)
#     x = layers.Dense(units=512, activation="relu")(x)
#     x = layers.Dropout(0.3)(x)
#     outputs = layers.Dense(units=1, activation="sigmoid")(x)
#     model = keras.Model(inputs, outputs, name="3dcnn")
#     return model

# # Build and compile model
# model = get_model(width=128, height=128, depth=64)
# initial_learning_rate = 0.0001
# lr_schedule = keras.optimizers.schedules.ExponentialDecay(
#     initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
# )
# model.compile(
#     loss="binary_crossentropy",
#     optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
#     metrics=["acc"],
#     run_eagerly=True,
# )

# # Define callbacks
# checkpoint_cb = keras.callbacks.ModelCheckpoint(
#     "3d_bladder_classification.keras", save_best_only=True
# )
# early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_acc", patience=15)

# # Train the model
# epochs = 100
# history = model.fit(
#     train_dataset,
#     validation_data=validation_dataset,
#     epochs=epochs,
#     shuffle=True,
#     verbose=2,
#     callbacks=[checkpoint_cb, early_stopping_cb],
# )
# # Visualize an augmented CT scan
# data = train_dataset.take(1)
# images, labels = list(data)[0]
# images = images.numpy()
# image = images[0]
# print("Dimension of the MRI scan is:", image.shape)
# plt.figure()
# plt.imshow(np.squeeze(image[:, :, 30]), cmap="gray")
# plt.savefig('sample_mri_scan.png')
# plt.close()

# # Visualize montage of slices
# def plot_slices(num_rows, num_columns, width, height, data):
#     """Plot a montage of MRI slices"""
#     data = np.rot90(np.array(data))
#     data = np.transpose(data)
#     data = np.reshape(data, (num_rows, num_columns, width, height))
#     rows_data, columns_data = data.shape[0], data.shape[1]
#     heights = [slc[0].shape[0] for slc in data]
#     widths = [slc.shape[1] for slc in data[0]]
#     fig_width = 12.0
#     fig_height = fig_width * sum(heights) / sum(widths)
#     f, axarr = plt.subplots(
#         rows_data,
#         columns_data,
#         figsize=(fig_width, fig_height),
#         gridspec_kw={"height_ratios": heights},
#     )
#     for i in range(rows_data):
#         for j in range(columns_data):
#             axarr[i, j].imshow(data[i][j], cmap="gray")
#             axarr[i, j].axis("off")
#     plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)
#     plt.savefig('mri_slices_montage.png')
#     plt.close()

# plot_slices(4, 10, 128, 128, image[:, :, :40])

# # Define 3D CNN model
# def get_model(width=128, height=128, depth=64):
#     """Build a 3D convolutional neural network model."""
#     inputs = keras.Input((width, height, depth, 1))
#     x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
#     x = layers.MaxPool3D(pool_size=2)(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
#     x = layers.MaxPool3D(pool_size=2)(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
#     x = layers.MaxPool3D(pool_size=2)(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
#     x = layers.MaxPool3D(pool_size=2)(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.GlobalAveragePooling3D()(x)
#     x = layers.Dense(units=512, activation="relu")(x)
#     x = layers.Dropout(0.3)(x)
#     outputs = layers.Dense(units=1, activation="sigmoid")(x)
#     model = keras.Model(inputs, outputs, name="3dcnn")
#     return model

# # Build and compile model
# model = get_model(width=128, height=128, depth=64)
# initial_learning_rate = 0.0001
# lr_schedule = keras.optimizers.schedules.ExponentialDecay(
#     initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
# )
# model.compile(
#     loss="binary_crossentropy",
#     optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
#     metrics=["acc"],
#     run_eagerly=True,
# )

# # Define callbacks
# checkpoint_cb = keras.callbacks.ModelCheckpoint(
#     "3d_bladder_classification.keras", save_best_only=True
# )
# early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_acc", patience=15)

# # Assume model has already been trained and history is available
# # If history is not available, you need to load it from a saved file or ensure training occurred
# # For demonstration, we'll assume history exists from previous training
# # If you have a saved history, you can load it, e.g., with pickle:
# # import pickle
# # with open('training_history.pkl', 'rb') as f:
# #     history = pickle.load(f)

# # Plot model performance (accuracy and loss)
# import matplotlib.pyplot as plt
# import seaborn as sns
# import numpy as np
# from sklearn.metrics import precision_score, recall_score, accuracy_score, confusion_matrix

# fig, ax = plt.subplots(1, 2, figsize=(20, 3))
# ax = ax.ravel()
# for i, metric in enumerate(["acc", "loss"]):
#     ax[i].plot(history.history[metric])
#     ax[i].plot(history.history["val_" + metric])
#     ax[i].set_title(f"Model {metric}")
#     ax[i].set_xlabel("epochs")
#     ax[i].set_ylabel(metric)
#     ax[i].legend(["train", "val"])
# plt.savefig('model_performance_acc_loss.png')
# plt.close()

# # Evaluate model on train and validation sets for final metrics
# model.load_weights("3d_bladder_classification.keras")

# # Get predictions and true labels for training set
# train_true = []
# train_pred = []
# for batch in train_dataset:
#     images, labels = batch
#     predictions = model.predict(images, verbose=0)
#     train_true.extend(labels.numpy().flatten())
#     train_pred.extend((predictions > 0.5).astype(int).flatten())

# train_true = np.array(train_true)
# train_pred = np.array(train_pred)

# # Get predictions and true labels for validation set
# val_true = []
# val_pred = []
# for batch in validation_dataset:
#     images, labels = batch
#     predictions = model.predict(images, verbose=0)
#     val_true.extend(labels.numpy().flatten())
#     val_pred.extend((predictions > 0.5).astype(int).flatten())

# val_true = np.array(val_true)
# val_pred = np.array(val_pred)

# # Compute final metrics
# train_precision = precision_score(train_true, train_pred, zero_division=0)
# train_recall = recall_score(train_true, train_pred, zero_division=0)
# train_accuracy = accuracy_score(train_true, train_pred)

# val_precision = precision_score(val_true, val_pred, zero_division=0)
# val_recall = recall_score(val_true, val_pred, zero_division=0)
# val_accuracy = accuracy_score(val_true, val_pred)

# print("\nFinal Training Set Metrics:")
# print(f"Precision: {train_precision:.4f}")
# print(f"Recall: {train_recall:.4f}")
# print(f"Accuracy: {train_accuracy:.4f}")

# print("\nFinal Validation Set Metrics:")
# print(f"Precision: {val_precision:.4f}")
# print(f"Recall: {val_recall:.4f}")
# print(f"Accuracy: {val_accuracy:.4f}")

# # Plot precision, recall, and accuracy
# fig, ax = plt.subplots(1, 3, figsize=(20, 3))
# ax = ax.ravel()
# metrics = ["precision", "recall", "accuracy"]
# train_metrics = [train_precision, train_recall, train_accuracy]
# val_metrics = [val_precision, val_recall, val_accuracy]

# for i, metric in enumerate(metrics):
#     ax[i].bar(['Train', 'Validation'], [train_metrics[i], val_metrics[i]], color=['blue', 'orange'])
#     ax[i].set_title(f"Model {metric.capitalize()}")
#     ax[i].set_ylabel(metric.capitalize())
#     ax[i].set_ylim(0, 1)
# plt.savefig('model_performance_metrics.png')
# plt.close()

# # Plot confusion matrix for validation set
# cm = confusion_matrix(val_true, val_pred)
# plt.figure(figsize=(6, 6))
# sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['NMBIC', 'MBIC'], yticklabels=['NMBIC', 'MBIC'])
# plt.xlabel('Predicted')
# plt.ylabel('True')
# plt.title('Confusion Matrix')
# plt.savefig('confusion_matrix.png')
# plt.close()

Dimension of the MRI scan is: (128, 128, 64, 1)
Epoch 1/100
41/41 - 11s - 259ms/step - acc: 0.6951 - loss: 0.6555 - val_acc: 0.2432 - val_loss: 0.7421
Epoch 2/100
41/41 - 11s - 258ms/step - acc: 0.7561 - loss: 0.6411 - val_acc: 0.7568 - val_loss: 0.6638
Epoch 3/100
41/41 - 11s - 265ms/step - acc: 0.7439 - loss: 0.6324 - val_acc: 0.7568 - val_loss: 0.5674
Epoch 4/100
41/41 - 11s - 257ms/step - acc: 0.7683 - loss: 0.6049 - val_acc: 0.7568 - val_loss: 0.6431
Epoch 5/100
41/41 - 11s - 259ms/step - acc: 0.7927 - loss: 0.5751 - val_acc: 0.7568 - val_loss: 0.9135
Epoch 6/100
41/41 - 11s - 262ms/step - acc: 0.7317 - loss: 0.6185 - val_acc: 0.7568 - val_loss: 0.8002
Epoch 7/100
41/41 - 11s - 256ms/step - acc: 0.7805 - loss: 0.5622 - val_acc: 0.7568 - val_loss: 0.9744
Epoch 8/100
41/41 - 10s - 255ms/step - acc: 0.7805 - loss: 0.5927 - val_acc: 0.7568 - val_loss: 1.1791
Epoch 9/100
41/41 - 11s - 261ms/step - acc: 0.7927 - loss: 0.5638 - val_acc: 0.7568 - val_loss: 1.1255
Epoch 10/100
41/41 - 10s 

  saveable.load_own_variables(weights_store.get(inner_path))



Final Training Set Metrics:
Precision: 0.0000
Recall: 0.0000
Accuracy: 0.7805

Final Validation Set Metrics:
Precision: 0.0000
Recall: 0.0000
Accuracy: 0.7568


In [4]:
"""
End‑to‑end script for:

1. Visualising one augmented MRI scan + a montage of slices (saved as PNGs).
2. Building, training and saving a 3‑D CNN on binary bladder‑cancer labels.
3. Plotting **training vs validation** curves for accuracy, precision, recall,
   AUC and loss.
4. Computing final set‑level metrics (precision, recall, accuracy, AUC),
   plotting them side‑by‑side, and drawing a confusion‑matrix heat‑map.

📌  Requirements
---------------------------------------------------------------------------
• TensorFlow 2.x  (incl. Keras API)          pip install tensorflow
• scikit‑learn                                pip install scikit-learn
• seaborn                                     pip install seaborn
• matplotlib                                  pip install matplotlib
• numpy                                       pip install numpy

The code assumes you already created two tf.data.Dataset objects:
    train_dataset       – yielding (volume, label) with shapes
                           (128,128,64,1) float32  and  () int32/float32
    validation_dataset  – same as above

Labels must be binary (0 = NMIBC, 1 = MIBC).

If your dataset pipeline is named differently, just replace the variables
`train_dataset` and `validation_dataset` before running.
---------------------------------------------------------------------------
"""

# --------------------------------------------------------------------------
# Imports
# --------------------------------------------------------------------------
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    precision_score,
    recall_score,
    accuracy_score,
    roc_auc_score,
    confusion_matrix,
)

# --------------------------------------------------------------------------
# 1.  Visualise one training scan + montage of 40 slices
# --------------------------------------------------------------------------
def plot_slices(num_rows, num_columns, width, height, data, out_path):
    """Create and save a montage of 2‑D slices from a 3‑D MRI volume."""
    data = np.rot90(np.array(data))            # rotate so cranial‑caudal reads L‑>R
    data = np.transpose(data)                  # swap axes for nicer montage
    data = np.reshape(data, (num_rows, num_columns, width, height))

    rows_data, columns_data = data.shape[:2]
    heights = [slc[0].shape[0] for slc in data]
    widths  = [slc.shape[1]    for slc in data[0]]

    fig_width  = 12.0
    fig_height = fig_width * sum(heights) / sum(widths)

    f, axarr = plt.subplots(
        rows_data,
        columns_data,
        figsize=(fig_width, fig_height),
        gridspec_kw={"height_ratios": heights},
    )

    for i in range(rows_data):
        for j in range(columns_data):
            axarr[i, j].imshow(data[i][j], cmap="gray")
            axarr[i, j].axis("off")

    plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)
    plt.savefig(out_path, dpi=200, bbox_inches="tight")
    plt.close()


# grab one mini‑batch and visualise
sample_batch = next(iter(train_dataset.take(1)))
volumes, labels = sample_batch
volumes = volumes.numpy()
sample_vol = volumes[0]

# single mid‑slice preview
plt.figure()
plt.imshow(np.squeeze(sample_vol[:, :, sample_vol.shape[2] // 2]), cmap="gray")
plt.axis("off")
plt.title("Sample mid‑slice")
plt.savefig("sample_mri_slice.png", dpi=200, bbox_inches="tight")
plt.close()

# montage of first 40 slices (4 × 10 grid)
plot_slices(
    num_rows=4,
    num_columns=10,
    width=128,
    height=128,
    data=sample_vol[:, :, :40],
    out_path="mri_slices_montage.png",
)

# --------------------------------------------------------------------------
# 2.  Define, compile and train the 3‑D CNN
# --------------------------------------------------------------------------
def build_3d_cnn(width=128, height=128, depth=64):
    inputs = keras.Input(shape=(width, height, depth, 1))

    x = layers.Conv3D(64, 3, activation="relu")(inputs)
    x = layers.MaxPool3D(2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(64, 3, activation="relu")(x)
    x = layers.MaxPool3D(2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(128, 3, activation="relu")(x)
    x = layers.MaxPool3D(2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(256, 3, activation="relu")(x)
    x = layers.MaxPool3D(2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(512, activation="relu")(x)
    x = layers.Dropout(0.3)(x)

    outputs = layers.Dense(1, activation="sigmoid")(x)
    return keras.Model(inputs, outputs, name="3d_cnn")


model = build_3d_cnn()

initial_lr   = 1e-4
lr_schedule  = keras.optimizers.schedules.ExponentialDecay(
    initial_lr, decay_steps=100_000, decay_rate=0.96, staircase=True
)

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
    loss="binary_crossentropy",
    metrics=[
        keras.metrics.BinaryAccuracy(name="accuracy"),
        keras.metrics.Precision(name="precision"),
        keras.metrics.Recall(name="recall"),
        keras.metrics.AUC(name="auc"),
    ],
    run_eagerly=False,
)

checkpoint_cb   = keras.callbacks.ModelCheckpoint(
    "3d_bladder_classification.keras", save_best_only=True
)
earlystop_cb    = keras.callbacks.EarlyStopping(
    monitor="val_auc", patience=15, restore_best_weights=True
)

EPOCHS = 100
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=EPOCHS,
    shuffle=True,
    callbacks=[checkpoint_cb, earlystop_cb],
    verbose=2,
)

# --------------------------------------------------------------------------
# 3.  Plot training vs validation curves
# --------------------------------------------------------------------------
plt.figure(figsize=(18, 10))

metrics_to_plot = ["accuracy", "precision", "recall", "auc", "loss"]
for idx, m in enumerate(metrics_to_plot, 1):
    plt.subplot(2, 3, idx)
    plt.plot(history.history[m], label=f"train_{m}")
    plt.plot(history.history[f"val_{m}"], label=f"val_{m}")
    plt.title(m.capitalize())
    plt.xlabel("Epoch")
    plt.ylabel(m.capitalize())
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.savefig("training_validation_curves.png", dpi=200)
plt.close()

# --------------------------------------------------------------------------
# 4.  Evaluate on train & validation sets for final metrics
# --------------------------------------------------------------------------
def evaluate_dataset(ds):
    y_true, y_pred_bin, y_prob = [], [], []
    for vol_batch, lbl_batch in ds:
        prob = model.predict(vol_batch, verbose=0).flatten()
        y_prob.extend(prob)
        y_pred_bin.extend((prob > 0.5).astype(int))
        y_true.extend(lbl_batch.numpy().astype(int))
    y_true      = np.array(y_true)
    y_pred_bin  = np.array(y_pred_bin)
    y_prob      = np.array(y_prob)
    prec  = precision_score(y_true, y_pred_bin, zero_division=0)
    rec   = recall_score(y_true, y_pred_bin, zero_division=0)
    acc   = accuracy_score(y_true, y_pred_bin)
    auc_  = roc_auc_score(y_true, y_prob)
    return prec, rec, acc, auc_


train_prec, train_rec, train_acc, train_auc = evaluate_dataset(train_dataset)
val_prec,   val_rec,   val_acc,   val_auc   = evaluate_dataset(validation_dataset)

print("\n=== Final Training‑set metrics ===")
print(f"Precision : {train_prec:.4f}")
print(f"Recall    : {train_rec:.4f}")
print(f"Accuracy  : {train_acc:.4f}")
print(f"AUC       : {train_auc:.4f}")

print("\n=== Final Validation‑set metrics ===")
print(f"Precision : {val_prec:.4f}")
print(f"Recall    : {val_rec:.4f}")
print(f"Accuracy  : {val_acc:.4f}")
print(f"AUC       : {val_auc:.4f}")

# bar‑chart comparison
bar_metrics      = ["Precision", "Recall", "Accuracy", "AUC"]
train_bar_values = [train_prec, train_rec, train_acc, train_auc]
val_bar_values   = [val_prec, val_rec, val_acc, val_auc]

plt.figure(figsize=(10, 4))
sns.barplot(x=bar_metrics, y=train_bar_values, color="steelblue", label="Train")
sns.barplot(x=bar_metrics, y=val_bar_values,   color="orange",    label="Validation")
plt.ylim(0, 1)
plt.title("Final metrics comparison")
plt.legend()
plt.savefig("final_metrics_barplot.png", dpi=200, bbox_inches="tight")
plt.close()

# --------------------------------------------------------------------------
# 5.  Confusion matrix on the validation set
# --------------------------------------------------------------------------
val_true_bin, val_pred_bin = [], []
for vol_batch, lbl_batch in validation_dataset:
    prob_batch = model.predict(vol_batch, verbose=0).flatten()
    val_true_bin.extend(lbl_batch.numpy().astype(int))
    val_pred_bin.extend((prob_batch > 0.5).astype(int))

cm = confusion_matrix(val_true_bin, val_pred_bin)
plt.figure(figsize=(6, 5))
sns.heatmap(
    cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=["NMIBC", "MIBC"],
    yticklabels=["NMIBC", "MIBC"],
)
plt.xlabel("Predicted label")
plt.ylabel("True label")
plt.title("Confusion matrix (validation)")
plt.savefig("confusion_matrix.png", dpi=200, bbox_inches="tight")
plt.close()


Epoch 1/100


ValueError: as_list() is not defined on an unknown TensorShape.