In [18]:
!pip install numpy pandas matplotlib opencv-python pydicom scikit-image tensorflow


Collecting pydicom
  Downloading pydicom-3.0.1-py3-none-any.whl.metadata (9.4 kB)
Downloading pydicom-3.0.1-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-3.0.1


In [19]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


the path Should be update

In [20]:
 import os

# Path to the dataset folder in Google Drive
DATASET_PATH = "/content/drive/MyDrive/kaggle_3m"  # Update this if necessary

# Check dataset structure
print("Folders in dataset:", os.listdir(DATASET_PATH)[:5])  # Show first 5 patient folders


Folders in dataset: ['README.md', 'data.csv', 'TCGA_HT_8106_19970727', 'TCGA_HT_8114_19981030', 'TCGA_HT_8111_19980330']


In [21]:
import os
import numpy as np
import cv2
from skimage.transform import resize
import matplotlib.pyplot as plt
from tqdm import tqdm  # Optional for progress bar

# Define dataset path
DATASET_PATH = "/content/drive/MyDrive/kaggle_3m"
IMG_SIZE = (256, 256)  # Resize target

def load_dataset(dataset_path, img_size=(256, 256)):
    images = []
    masks = []
    filenames = []

    # Go through each patient folder
    for folder_name in tqdm(sorted(os.listdir(dataset_path))):
        folder_path = os.path.join(dataset_path, folder_name)
        if not os.path.isdir(folder_path):
            continue  # Skip files like README.md or CSVs

        # List all .tif files in the folder
        files = sorted(os.listdir(folder_path))
        img_files = [f for f in files if f.endswith(".tif") and "_mask" not in f]
        mask_files = [f for f in files if f.endswith("_mask.tif")]

        # Build dict for pairing
        img_dict = {f.replace(".tif", ""): f for f in img_files}
        mask_dict = {f.replace("_mask.tif", ""): f for f in mask_files}

        # Match images and masks by their base name
        common_keys = set(img_dict.keys()) & set(mask_dict.keys())

        for key in sorted(common_keys):
            img_path = os.path.join(folder_path, img_dict[key])
            mask_path = os.path.join(folder_path, mask_dict[key])

            # Load the image in grayscale or color as needed
            image = cv2.imread(img_path, cv2.IMREAD_COLOR)  # Use grayscale if needed
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

            if image is None or mask is None:
                continue  # Skip if loading fails

            # Resize both
            image = cv2.resize(image, img_size)
            mask = cv2.resize(mask, img_size)

            # Normalize image and threshold mask
            image = image.astype(np.float32) / 255.0
            mask = (mask > 0).astype(np.uint8)  # Convert mask to binary

            images.append(image)
            masks.append(mask)
            filenames.append(img_dict[key])

    images = np.array(images)
    masks = np.array(masks)[..., np.newaxis]  # Add channel dim for masks (N, H, W, 1)
    return images, masks, filenames

# Load the dataset
images, masks, filenames = load_dataset(DATASET_PATH)

# Display dataset info
print("✅ Total Scans Loaded:", len(images))
print("Image shape:", images.shape)
print("Mask shape:", masks.shape)
print("Unique mask values (sample):", np.unique(masks[0]))

# Visualize a few samples
def show_samples(images, masks, filenames, num=4):
    import random
    indices = random.sample(range(len(images)), num)
    plt.figure(figsize=(10, num * 3))
    for i, idx in enumerate(indices):
        plt.subplot(num, 2, i*2 + 1)
        plt.imshow(images[idx])
        plt.title(f"Image: {filenames[idx]}")
        plt.axis("off")

        plt.subplot(num, 2, i*2 + 2)
        plt.imshow(masks[idx].squeeze(), cmap="gray")
        plt.title("Mask")
        plt.axis("off")
    plt.tight_layout()
    plt.show()

100%|██████████| 112/112 [20:29<00:00, 10.97s/it]


✅ Total Scans Loaded: 3929
Image shape: (3929, 256, 256, 3)
Mask shape: (3929, 256, 256, 1)
Unique mask values (sample): [0]


In [22]:
print("Mask pixel sum (first 10):", [np.sum(mask) for mask in masks[:10]])


Mask pixel sum (first 10): [np.uint64(0), np.uint64(0), np.uint64(1426), np.uint64(2646), np.uint64(2765), np.uint64(2877), np.uint64(1952), np.uint64(1828), np.uint64(811), np.uint64(74)]


In [23]:
from sklearn.model_selection import train_test_split

# Split into training (80%), validation (10%), and test (10%) sets
X_train, X_temp, y_train, y_temp = train_test_split(images, masks, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Training set:", X_train.shape, y_train.shape)
print("Validation set:", X_val.shape, y_val.shape)
print("Test set:", X_test.shape, y_test.shape)


Training set: (3143, 256, 256, 3) (3143, 256, 256, 1)
Validation set: (393, 256, 256, 3) (393, 256, 256, 1)
Test set: (393, 256, 256, 3) (393, 256, 256, 1)


In [24]:
import tensorflow as tf

# Create a Python generator function
def data_generator(X, y):
    for img, mask in zip(X, y):
        yield img, mask

# Use the generator to create a TensorFlow dataset
def create_tf_dataset(X, y, batch_size=16):
    dataset = tf.data.Dataset.from_generator(
        lambda: data_generator(X, y),
        output_signature=(
            tf.TensorSpec(shape=(256, 256, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(256, 256, 1), dtype=tf.uint8)
        )
    )
    dataset = dataset.shuffle(500).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# Create datasets without consuming excessive RAM
train_dataset = create_tf_dataset(X_train, y_train)
val_dataset = create_tf_dataset(X_val, y_val)
test_dataset = create_tf_dataset(X_test, y_test)

print("✅ TF Datasets created successfully!")


✅ TF Datasets created successfully!


3D U-net model

In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF

# Edge Detection Module using Sobel Filter
class EdgeDetection(nn.Module):
    def __init__(self):
        super(EdgeDetection, self).__init__()
        sobel_x = torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype=torch.float32).unsqueeze(0).unsqueeze(0)
        sobel_y = torch.tensor([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtype=torch.float32).unsqueeze(0).unsqueeze(0)
        self.weight_x = nn.Parameter(sobel_x, requires_grad=False)
        self.weight_y = nn.Parameter(sobel_y, requires_grad=False)

    def forward(self, x):
        edge_x = F.conv2d(x, self.weight_x, padding=1)
        edge_y = F.conv2d(x, self.weight_y, padding=1)
        edges = torch.sqrt(edge_x ** 2 + edge_y ** 2)
        return edges

# U-Net Block with Edge Features
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.conv(x)

# ED-U-Net Model
class ED_UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=5):
        super(ED_UNet, self).__init__()
        self.edge_detector = EdgeDetection()

        self.enc1 = DoubleConv(in_channels + 1, 64)  # Extra channel for edge map
        self.enc2 = DoubleConv(64, 128)
        self.enc3 = DoubleConv(128, 256)
        self.enc4 = DoubleConv(256, 512)

        self.pool = nn.MaxPool2d(2)
        self.bottleneck = DoubleConv(512, 1024)

        self.up1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.dec1 = DoubleConv(1024, 512)

        self.up2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.dec2 = DoubleConv(512, 256)

        self.up3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec3 = DoubleConv(256, 128)

        self.up4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec4 = DoubleConv(128, 64)

        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        edges = self.edge_detector(x[:, :1, :, :])  # Apply edge detection to first channel
        x = torch.cat([x, edges], dim=1)  # Concatenate edges as extra channel

        enc1 = self.enc1(x)
        enc2 = self.enc2(self.pool(enc1))
        enc3 = self.enc3(self.pool(enc2))
        enc4 = self.enc4(self.pool(enc3))

        bottleneck = self.bottleneck(self.pool(enc4))

        up1 = self.up1(bottleneck)
        dec1 = self.dec1(torch.cat([up1, enc4], dim=1))

        up2 = self.up2(dec1)
        dec2 = self.dec2(torch.cat([up2, enc3], dim=1))

        up3 = self.up3(dec2)
        dec3 = self.dec3(torch.cat([up3, enc2], dim=1))

        up4 = self.up4(dec3)
        dec4 = self.dec4(torch.cat([up4, enc1], dim=1))

        return self.final_conv(dec4)

# Initialize Model
model = ED_UNet(in_channels=3, out_channels=5)  # 5 classes for multi-class segmentation
print(model)


ED_UNet(
  (edge_detector): EdgeDetection()
  (enc1): DoubleConv(
    (conv): Sequential(
      (0): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
  )
  (enc2): DoubleConv(
    (conv): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
  )
  (enc3): DoubleConv(
    (conv): Sequential(
  

In [26]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Convert TensorFlow dataset into PyTorch Dataset
class BrainSegmentationDataset(Dataset):
    def __init__(self, images, masks):
        self.images = images
        self.masks = masks

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx].transpose(2, 0, 1)  # (H, W, C) → (C, H, W)
        mask = self.masks[idx].squeeze()  # Remove extra dimension if needed

        image = torch.tensor(image, dtype=torch.float32)
        mask = torch.tensor(mask, dtype=torch.long)  # For multi-class segmentation

        return image, mask

# Convert datasets
train_dataset = BrainSegmentationDataset(X_train, y_train)
val_dataset = BrainSegmentationDataset(X_val, y_val)
test_dataset = BrainSegmentationDataset(X_test, y_test)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

print("✅ PyTorch DataLoader created successfully!")


✅ PyTorch DataLoader created successfully!


here below i have added import torch.nn as np

In [29]:
 # Dice Loss (for better segmentation)
 class DiceLoss(nn.Module):
    def __init__(self):
        super(DiceLoss, self).__init__()
    def forward(self, preds, targets, smooth=1.0):
        preds = torch.softmax(preds, dim=1)  # Convert logits to probabilities
        targets = torch.nn.functional.one_hot(targets, num_classes=5).permute(0, 3, 1, 2)  # One-hot encode masks
        intersection = torch.sum(preds * targets, dim=(2, 3))
        union = torch.sum(preds + targets, dim=(2, 3))
        dice = (2.0 * intersection + smooth) / (union + smooth)
        return 1 - dice.mean()
 # Loss function (Dice + CrossEntropy)
 criterion = lambda preds, targets: 0.5 * nn.CrossEntropyLoss()(preds, targets) + 0.5 * DiceLoss()(preds, targets)

In [31]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a simple model (replace with your actual model)
class UNet(nn.Module):  # Example segmentation model
    def __init__(self, in_channels=3, num_classes=5):
        super(UNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, num_classes, kernel_size=3, padding=1)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.conv2(x)
        return x

# Instantiate the model
model = UNet()

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
import time
import torch
import torch.nn as nn

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    best_val_loss = float("inf")

    for epoch in range(epochs):
        start_time = time.time()

        # Training Phase
        model.train()
        train_loss = 0.0
        for images, masks in train_loader:
            images, masks = images.to(device), masks.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)

        # Validation Phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, masks in val_loader:
                images, masks = images.to(device), masks.to(device)
                outputs = model(images)
                loss = criterion(outputs, masks)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        # Print results
        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Time: {time.time() - start_time:.2f}s")

        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "best_model.pth")

    print("✅ Training complete!")

# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10)


In [None]:
import torch

# Load trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ED_UNet(in_channels=3, out_channels=1).to(device)
model.load_state_dict(torch.load("ed_unet_brain_segmentation.pth", map_location=device))
model.eval()  # Set to inference mode

# Convert to FP16 (Half-Precision)
model.half()

# Example input tensor for testing
example_input = torch.randn(1, 3, 256, 256, dtype=torch.half, device=device)

# Run inference to check
with torch.no_grad():
    output = model(example_input)

print("Model successfully converted to FP16 & tested!")


In [None]:
# Convert model to TorchScript (Optimized for Deployment)
traced_model = torch.jit.trace(model, example_input)
torch.jit.save(traced_model, "ed_unet_quantized.pt")

print("TorchScript model saved: ed_unet_quantized.pt")


In [None]:
import onnx

# Convert model to ONNX format
onnx_model_path = "ed_unet_quantized.onnx"
torch.onnx.export(
    model, example_input, onnx_model_path,
    export_params=True, opset_version=11,
    do_constant_folding=True, input_names=['input'], output_names=['output']
)

# Check ONNX Model
onnx_model = onnx.load(onnx_model_path)
onnx.checker.check_model(onnx_model)

print(f"ONNX Model saved: {onnx_model_path}")


evaluation

In [None]:
import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind

# Load optimized model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.jit.load("ed_unet_quantized.pt").to(device)
model.eval()

# Function to preprocess image for inference
def preprocess_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 255.0
    image = cv2.resize(image, (256, 256))
    image = torch.tensor(image).permute(2, 0, 1).unsqueeze(0).to(device)  # Convert to (1, C, H, W)
    return image.half()  # Use FP16 for optimized inference

# Inference Function
def predict_mask(image_path):
    image = preprocess_image(image_path)
    with torch.no_grad():
        pred_mask = model(image).squeeze().cpu().numpy()  # Convert back to NumPy
    pred_mask = (pred_mask > 0.5).astype(np.uint8)  # Apply thresholding
    return pred_mask


In [None]:
# Dice Score & IoU Calculation
def dice_score(y_true, y_pred):
    intersection = np.sum(y_true * y_pred)
    return (2. * intersection) / (np.sum(y_true) + np.sum(y_pred) + 1e-7)

def iou_score(y_true, y_pred):
    intersection = np.sum(y_true * y_pred)
    union = np.sum(y_true) + np.sum(y_pred) - intersection
    return intersection / (union + 1e-7)

# Evaluate on a Test Image
mask_true = cv2.imread("test_mask.tif", cv2.IMREAD_GRAYSCALE) / 255.0  # Ground truth mask
mask_pred = predict_mask("test_image.tif")  # Predicted mask

print(f"Dice Score: {dice_score(mask_true, mask_pred):.4f}")
print(f"IoU Score: {iou_score(mask_true, mask_pred):.4f}")


In [None]:
# Extract tumor pixel intensities from different patient groups
tumor_group1 = mask_pred[mask_true == 1].flatten()  # Example group 1
tumor_group2 = mask_pred[mask_true == 0].flatten()  # Example group 2

# Perform t-test (Compare segmented regions)
t_stat, p_value = ttest_ind(tumor_group1, tumor_group2, equal_var=False)

print(f"T-Test Statistic: {t_stat:.4f}, P-Value: {p_value:.4e}")
if p_value < 0.05:
    print("Significant difference in tumor segmentations between groups (p < 0.05).")
else:
    print("No significant difference in tumor segmentations.")


In [None]:
import seaborn as sns

# Heatmap of Predicted Mask
plt.figure(figsize=(10, 5))
sns.heatmap(mask_pred, cmap="jet", alpha=0.6)
plt.title("Heatmap of Segmented Region")
plt.show()

# Volumetric Plot (Histogram of segmented areas)
plt.hist(mask_pred.flatten(), bins=50, alpha=0.7, color="blue", label="Segmented Pixels")
plt.legend()
plt.title("Tumor Volume Distribution")
plt.xlabel("Intensity")
plt.ylabel("Frequency")
plt.show()
