In [2]:
import os
import numpy as np
import pandas as pd
import torch
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from skimage.measure import label, regionprops
from PIL import Image
from sklearn.model_selection import train_test_split
from monai.transforms import LoadImage, EnsureChannelFirst, ScaleIntensity, EnsureType, Compose, Resize
from monai.data import Dataset, DataLoader
from monai.networks.nets import UNet
from monai.losses import DiceLoss
from monai.inferers import sliding_window_inference
from pydicom import dcmread


In [3]:
# Paths
base_dir = r"D:\PROJECTS_FINAL\Cancer Treatment Prediction\final stuff\manifest-1732777365016"
metadata_file_path = os.path.join(base_dir, "metadata.csv")
output_dir = os.path.join(base_dir, "output_images_s1")
os.makedirs(output_dir, exist_ok=True)

# Load metadata
metadata = pd.read_csv(metadata_file_path)
metadata['Absolute Path'] = metadata['File Location'].apply(lambda x: os.path.join(base_dir, x.lstrip(".\\")))


In [5]:
import os
import pandas as pd
from pydicom import dcmread
from PIL import Image
from tqdm.auto import tqdm
import numpy as np

# Define paths
metadata_file_path = r"D:\\PROJECTS_FINAL\\Cancer Treatment Prediction\\final stuff\\manifest-1732777365016\\metadata.csv"
base_dir = r"D:\\PROJECTS_FINAL\\Cancer Treatment Prediction\\final stuff\\manifest-1732777365016"
processed_images_dir = os.path.join(base_dir, "breast_cancer_images_png_s44")

# Ensure output directory exists
os.makedirs(processed_images_dir, exist_ok=True)

# Load metadata
metadata = pd.read_csv(metadata_file_path)

# Update paths in the metadata to absolute paths
metadata['Absolute Path'] = metadata['File Location'].apply(lambda x: os.path.join(base_dir, x.lstrip(".\\")))

images, labels = [], []

# Process each folder listed in the metadata
for folder_path in tqdm(metadata['Absolute Path'], desc="Processing DICOM folders"):
    try:
        if not os.path.exists(folder_path):
            print(f"Folder not found: {folder_path}, skipping.")
            continue

        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)

            if not file_name.endswith(".dcm"):
                continue

            try:
                # Read DICOM file
                dicom = dcmread(file_path)
                if 'PixelData' not in dicom:
                    print(f"No PixelData in {file_path}, skipping.")
                    continue

                # Apply rescale slope and intercept if available
                pixel_array = dicom.pixel_array
                if hasattr(dicom, "RescaleSlope") and hasattr(dicom, "RescaleIntercept"):
                    slope = dicom.RescaleSlope
                    intercept = dicom.RescaleIntercept
                    pixel_array = pixel_array * slope + intercept

                # Normalize to 0-255 for PNG visualization
                pixel_array = np.clip(pixel_array, np.min(pixel_array), np.max(pixel_array))  # Clip to valid range

                # Check if the range is valid
                if np.max(pixel_array) == np.min(pixel_array):
                    print(f"Invalid range detected in {file_path}. Setting image to zeros.")
                    pixel_array = np.zeros_like(pixel_array)  # Avoid divide-by-zero
                else:
                    pixel_array = (pixel_array - np.min(pixel_array)) / (np.max(pixel_array) - np.min(pixel_array))  # Normalize to 0-1

                pixel_array = (pixel_array * 255).astype(np.uint8)  # Scale to 0-255

                # Resize the image to ensure consistent dimensions
                img = Image.fromarray(pixel_array).resize((512, 512))

                # Save image as PNG
                png_file_name = f"{os.path.basename(folder_path)}_{file_name.replace('.dcm', '.png')}"
                img.save(os.path.join(processed_images_dir, png_file_name))

                # Append to lists
                images.append(np.array(img))
                labels.append(0)  # Placeholder label

            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

    except Exception as e:
        print(f"Error processing folder {folder_path}: {e}")

# Verify that all images have consistent shapes
image_shapes = [img.shape for img in images]
if len(set(image_shapes)) > 1:
    print(f"Inconsistent image shapes detected: {set(image_shapes)}")
    raise ValueError("All images must have the same shape.")

# Convert to NumPy arrays
X_images = np.array(images).reshape(-1, 512, 512, 1)  # Reshape to 4D array
y_images = np.array(labels)

# Print summary
print(f"Processed {len(X_images)} images.")
print(f"Processed {len(y_images)} labels.")


Processing DICOM folders:   0%|          | 0/20 [00:00<?, ?it/s]

invalid value encountered in divide
invalid value encountered in cast


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (6852,) + inhomogeneous part.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(images, masks, test_size=0.2, random_state=42)

train_data = [{"image": img, "label": mask} for img, mask in zip(X_train, y_train)]
test_data = [{"image": img, "label": mask} for img, mask in zip(X_test, y_test)]

train_transforms = Compose([
    EnsureChannelFirst(),
    ScaleIntensity(),
    Resize((512, 512)),
    EnsureType()
])

test_transforms = Compose([
    EnsureChannelFirst(),
    ScaleIntensity(),
    Resize((512, 512)),
    EnsureType()
])


train_dataset = Dataset(data=train_data, transform=train_transforms)
test_dataset = Dataset(data=test_data, transform=test_transforms)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = UNet(
    spatial_dims=2,
    in_channels=1,
    out_channels=1,
    channels=(16, 32, 64, 128, 256),
    strides=(2, 2, 2, 2),
    num_res_units=2,
).to(device)

loss_function = DiceLoss(sigmoid=True)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


In [None]:
num_epochs = 10
val_interval = 2

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    model.train()
    epoch_loss = 0

    for batch_data in train_loader:
        inputs, labels = batch_data["image"].to(device), batch_data["label"].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f"Epoch {epoch + 1} average loss: {epoch_loss / len(train_loader)}")

    if (epoch + 1) % val_interval == 0:
        model.eval()
        with torch.no_grad():
            for val_data in test_loader:
                val_inputs, val_labels = val_data["image"].to(device), val_data["label"].to(device)
                val_outputs = sliding_window_inference(val_inputs, (128, 128), 4, model)
        print(f"Validation performed for epoch {epoch + 1}.")


In [None]:
model.eval()
characteristics = []

for idx, test_sample in enumerate(test_loader):
    test_image = test_sample["image"].to(device)
    test_output = sliding_window_inference(test_image, (128, 128), 4, model).detach().cpu().numpy()

    for i in range(len(test_image)):
        img = test_image[i, 0].cpu().numpy()
        pred = test_output[i, 0]

        # Binarize prediction
        pred_mask = (pred > 0.5).astype(np.uint8)
        labeled_mask = label(pred_mask)
        regions = regionprops(labeled_mask)

        # Draw bounding boxes
        for region in regions:
            minr, minc, maxr, maxc = region.bbox
            img[minr:maxr, [minc, maxc]] = 1  # Vertical Lines
            img[[minr, maxr], minc:maxc] = 1  # Horizontal Lines

            characteristics.append({
                "Image Index": idx,
                "Region Area": region.area,
                "Bounding Box": region.bbox,
                "Centroid": region.centroid,
            })

        # Save the image
        plt.imsave(os.path.join(output_dir, f"tumor_detected_{idx}.png"), img, cmap="gray")

# Save characteristics to CSV
pd.DataFrame(characteristics).to_csv(os.path.join(output_dir, "tumor_characteristics.csv"), index=False)


In [None]:
torch.save(model.state_dict(), os.path.join(output_dir, "tumor_segmentation_unet.pth"))
