In [1]:
import os
!mkdir -p /content/mha_images
!mkdir -p /content/mha_labels
!mkdir -p /content/CT_slices_small


In [3]:
!cp "/content/drive/MyDrive/PW1/PENGWIN_CT_train_images_part1/"*.mha /content/mha_images/
!cp "/content/drive/MyDrive/PW1/PENGWIN_CT_train_labels/"*.mha /content/mha_labels/


In [4]:
!pip install SimpleITK scipy


Collecting SimpleITK
  Downloading simpleitk-2.5.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.4 kB)
Downloading simpleitk-2.5.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (52.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SimpleITK
Successfully installed SimpleITK-2.5.3


In [5]:
import SimpleITK as sitk
import numpy as np
import scipy.ndimage

img_folder = "/content/mha_images"
label_folder = "/content/mha_labels"
save_folder = "/content/CT_slices_small"
os.makedirs(save_folder, exist_ok=True)

img_files = sorted([f for f in os.listdir(img_folder) if f.endswith(".mha")])
label_files = sorted([f for f in os.listdir(label_folder) if f.endswith(".mha")])


In [6]:
TARGET_SIZE = (128, 128)  # smaller for CPU

for i, (img_file, label_file) in enumerate(zip(img_files, label_files)):
    print(f"Processing volume {i+1}/{len(img_files)}: {img_file}")

    img_itk = sitk.ReadImage(os.path.join(img_folder, img_file))
    label_itk = sitk.ReadImage(os.path.join(label_folder, label_file))

    img_array = sitk.GetArrayFromImage(img_itk).astype(np.float32)
    label_array = sitk.GetArrayFromImage(label_itk).astype(np.float32)

    # Normalize image to [0,1]
    img_array /= np.max(img_array)

    for s in range(img_array.shape[0]):
        # Resize using scipy
        img_slice = scipy.ndimage.zoom(img_array[s], (TARGET_SIZE[0]/img_array.shape[1],
                                                      TARGET_SIZE[1]/img_array.shape[2]), order=1)
        label_slice = scipy.ndimage.zoom(label_array[s], (TARGET_SIZE[0]/label_array.shape[1],
                                                          TARGET_SIZE[1]/label_array.shape[2]), order=0)

        # Add channel dimension and save
        np.save(f"{save_folder}/CT_{i+1:03d}_slice_{s:03d}.npy", img_slice.astype(np.float32))
        np.save(f"{save_folder}/Label_{i+1:03d}_slice_{s:03d}.npy", label_slice.astype(np.float32))

    del img_array, label_array



Processing volume 1/50: 001.mha
Processing volume 2/50: 002.mha
Processing volume 3/50: 003.mha
Processing volume 4/50: 004.mha
Processing volume 5/50: 005.mha
Processing volume 6/50: 006.mha
Processing volume 7/50: 007.mha
Processing volume 8/50: 008.mha
Processing volume 9/50: 009.mha
Processing volume 10/50: 010.mha
Processing volume 11/50: 011.mha
Processing volume 12/50: 012.mha
Processing volume 13/50: 013.mha
Processing volume 14/50: 014.mha
Processing volume 15/50: 015.mha
Processing volume 16/50: 016.mha
Processing volume 17/50: 017.mha
Processing volume 18/50: 018.mha
Processing volume 19/50: 019.mha
Processing volume 20/50: 020.mha
Processing volume 21/50: 021.mha
Processing volume 22/50: 022.mha
Processing volume 23/50: 023.mha
Processing volume 24/50: 024.mha
Processing volume 25/50: 025.mha
Processing volume 26/50: 026.mha
Processing volume 27/50: 027.mha
Processing volume 28/50: 028.mha
Processing volume 29/50: 029.mha
Processing volume 30/50: 030.mha
Processing volume 3

In [7]:
import tensorflow as tf

slice_files = sorted([f for f in os.listdir(save_folder) if f.startswith("CT")])
label_files = sorted([f for f in os.listdir(save_folder) if f.startswith("Label")])

slice_paths = [os.path.join(save_folder, f) for f in slice_files]
label_paths = [os.path.join(save_folder, f) for f in label_files]

def load_slice(img_path, label_path):
    img = np.load(img_path)  # already normalized
    label = np.load(label_path)

    img = np.expand_dims(img, axis=-1)
    label = np.expand_dims(label, axis=-1)

    return tf.convert_to_tensor(img, dtype=tf.float32), tf.convert_to_tensor(label, dtype=tf.float32)

def tf_wrapper(img_path, label_path):
    img, label = tf.numpy_function(load_slice, [img_path, label_path], [tf.float32, tf.float32])
    img.set_shape([128, 128, 1])
    label.set_shape([128, 128, 1])
    return img, label


In [8]:
dataset = tf.data.Dataset.from_tensor_slices((slice_paths, label_paths))
dataset = dataset.map(tf_wrapper, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.shuffle(200).batch(2).prefetch(tf.data.AUTOTUNE)

In [9]:
from tensorflow.keras import layers, models

def build_small_unet(input_shape=(128,128,1)):
    inputs = layers.Input(shape=input_shape)

    # Encoder
    s1 = layers.Conv2D(16,3,padding='same',activation='relu')(inputs)
    p1 = layers.MaxPooling2D(2)(s1)

    s2 = layers.Conv2D(32,3,padding='same',activation='relu')(p1)
    p2 = layers.MaxPooling2D(2)(s2)

    # Bottleneck
    b = layers.Conv2D(64,3,padding='same',activation='relu')(p2)

    # Decoder
    d1 = layers.Conv2DTranspose(32,2,strides=2,padding='same')(b)
    d1 = layers.concatenate([d1,s2])
    d1 = layers.Conv2D(32,3,padding='same',activation='relu')(d1)

    d2 = layers.Conv2DTranspose(16,2,strides=2,padding='same')(d1)
    d2 = layers.concatenate([d2,s1])
    d2 = layers.Conv2D(16,3,padding='same',activation='relu')(d2)

    outputs = layers.Conv2D(1,1,activation='sigmoid')(d2)
    return models.Model(inputs, outputs)

model = build_small_unet()
model.summary()

In [11]:
def dice_loss(y_true, y_pred):
    smooth = 1e-6
    y_true_f = tf.reshape(y_true, [-1])
    y_pred_f = tf.reshape(y_pred, [-1])
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    return 1 - ((2.*intersection + smooth)/(tf.reduce_sum(y_true_f)+tf.reduce_sum(y_pred_f)+smooth))

model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss=dice_loss,
              metrics=['accuracy'])

In [None]:
history = model.fit(dataset, epochs=10)  


Epoch 1/10
[1m8083/8083[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1457s[0m 179ms/step - accuracy: 0.9126 - loss: -0.2647
Epoch 2/10
[1m8083/8083[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1427s[0m 175ms/step - accuracy: 0.9583 - loss: -0.5250
Epoch 3/10
[1m8083/8083[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1247s[0m 154ms/step - accuracy: 0.9644 - loss: -0.5751
Epoch 4/10
[1m8083/8083[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1337s[0m 161ms/step - accuracy: 0.9671 - loss: -0.5921
Epoch 5/10
[1m8083/8083[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1297s[0m 160ms/step - accuracy: 0.9687 - loss: -0.6036
Epoch 6/10
[1m8083/8083[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1306s[0m 156ms/step - accuracy: 0.9702 - loss: -0.6044
Epoch 7/10
[1m8083/8083[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1359s[0m 168ms/step - accuracy: 0.9710 - loss: -0.6308
Epoch 8/10
[1m8083/8083[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1400s[0m 168ms/step - accuracy: