In [None]:
pip install monai[all] nibabel pydicom scikit-learn


In [None]:
import os
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
from monai.transforms import (
    LoadImage, AddChannel, ScaleIntensity, EnsureType, Compose, Resize
)
from monai.networks.nets import UNet
from monai.data import Dataset, DataLoader
from monai.losses import DiceLoss
from monai.metrics import DiceMetric
from monai.inferers import sliding_window_inference
import torch.compile


In [4]:
# Paths
metadata_file_path = r"D:\PROJECTS_FINAL\Cancer Treatment Prediction\final stuff\manifest-1732777365016\metadata.csv"
base_dir = r"D:\PROJECTS_FINAL\Cancer Treatment Prediction\final stuff\manifest-1732777365016"

# Load metadata
metadata = pd.read_csv(metadata_file_path)
metadata['Absolute Path'] = metadata['File Location'].apply(lambda x: os.path.join(base_dir, x.lstrip(".\\")))

# Ensure data directory exists
processed_images_dir = os.path.join(base_dir, "processed_images")
os.makedirs(processed_images_dir, exist_ok=True)


In [5]:
from pydicom import dcmread
from PIL import Image

IMG_SIZE = (512, 512)
images, masks = [], []

for folder_path in tqdm(metadata['Absolute Path'], desc="Processing DICOM Folders"):
    if not os.path.exists(folder_path):
        print(f"Folder not found: {folder_path}, skipping.")
        continue

    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if not file_name.endswith(".dcm"):
            continue

        try:
            dicom = dcmread(file_path)
            if 'PixelData' not in dicom:
                print(f"No PixelData in {file_path}, skipping.")
                continue

            pixel_array = dicom.pixel_array
            img = Image.fromarray(pixel_array)
            img = img.resize(IMG_SIZE)
            img = np.array(img) / 255.0  # Normalize

            # Placeholder for segmentation mask (Use actual mask if available)
            mask = np.zeros_like(img)  
            
            images.append(img)
            masks.append(mask)

        except Exception as e:
            print(f"Error processing file {file_path}: {e}")

# Convert to NumPy arrays
images = np.array(images)[..., None]  # Add channel dimension
masks = np.array(masks)[..., None]


Processing DICOM Folders:   0%|          | 0/20 [00:00<?, ?it/s]

In [1]:
X_train, X_test, y_train, y_test = train_test_split(images, masks, test_size=0.2, random_state=42)

# MONAI Dataset and DataLoader
train_data = [{"image": img, "label": mask} for img, mask in zip(X_train, y_train)]
test_data = [{"image": img, "label": mask} for img, mask in zip(X_test, y_test)]

train_transforms = Compose([
    LoadImage(image_only=True),
    AddChannel(),
    ScaleIntensity(),
    Resize((512, 512)),
    EnsureType()
])

test_transforms = Compose([
    AddChannel(),
    ScaleIntensity(),
    Resize((512, 512)),
    EnsureType()
])

train_dataset = Dataset(data=train_data, transform=train_transforms)
test_dataset = Dataset(data=test_data, transform=test_transforms)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4)


NameError: name 'train_test_split' is not defined

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = UNet(
    spatial_dims=2,
    in_channels=1,
    out_channels=1,
    channels=(16, 32, 64, 128, 256),
    strides=(2, 2, 2, 2),
    num_res_units=2,
).to(device)

loss_function = DiceLoss(sigmoid=True)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


In [None]:
num_epochs = 20
val_interval = 2
dice_metric = DiceMetric(include_background=False, reduction="mean")

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    model.train()
    epoch_loss = 0

    for batch_data in train_loader:
        inputs, labels = batch_data["image"].to(device), batch_data["label"].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f"Epoch {epoch + 1} average loss: {epoch_loss / len(train_loader)}")

    # Validation
    if (epoch + 1) % val_interval == 0:
        model.eval()
        with torch.no_grad():
            dice_scores = []
            for val_data in test_loader:
                val_inputs, val_labels = val_data["image"].to(device), val_data["label"].to(device)
                val_outputs = sliding_window_inference(val_inputs, (128, 128), 4, model)
                dice_score = dice_metric(val_outputs, val_labels)
                dice_scores.append(dice_score.item())
            print(f"Validation Dice Score: {np.mean(dice_scores)}")


In [None]:
model.eval()
for idx, test_sample in enumerate(test_loader):
    test_image = test_sample["image"].to(device)
    test_output = sliding_window_inference(test_image, (128, 128), 4, model)
    test_output = test_output.detach().cpu().numpy()

    # Display
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 3, 1)
    plt.title("Original Image")
    plt.imshow(test_image[0, 0].cpu(), cmap="gray")

    plt.subplot(1, 3, 2)
    plt.title("Ground Truth")
    plt.imshow(test_sample["label"][0, 0].cpu(), cmap="gray")

    plt.subplot(1, 3, 3)
    plt.title("Predicted Segmentation")
    plt.imshow(test_output[0, 0], cmap="gray")
    plt.show()


In [None]:
torch.save(model.state_dict(), "tumor_segmentation_unet.pth")
