In [1]:
import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nibabel as nib
import random
from scipy import ndimage
import time
import datetime

from tensorflow import keras
from tensorflow.keras.layers import Input, Conv3D, MaxPool3D, GlobalAveragePooling3D, Dense, Dropout, BatchNormalization, concatenate
from tensorflow.keras.models import Model, load_model
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.utils import plot_model
from keras.metrics import AUC
from tensorflow.keras.regularizers import l2
from keras.utils import Sequence

2025-04-10 00:38:41.702625: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744245522.151629      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744245522.284965      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# The following code will only execute
# successfully when compression is complete

import kagglehub

# Download latest version
path = kagglehub.dataset_download("maximnaddaf/ct-multi-data")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/ct-multi-data


In [7]:
def count_directories(path):
    # List all items in the directory and filter for directories
    return len([d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))])


test = "/kaggle/input/ct-multi-data/Processed_CT_Scans/test/CT-0"
test_2  = "/kaggle/input/ct-multi-data/Processed_CT_Scans/test/CT-23"

train = "/kaggle/input/ct-multi-data/Processed_CT_Scans/train/CT-0"
train_2  = "/kaggle/input/ct-multi-data/Processed_CT_Scans/train/CT-23"

print(f"Number of directories in test (CT-0, CT-23): {count_directories(test)}, {count_directories(test_2)}")
print(f"Number of directories in train (CT-0, CT-23): {count_directories(train)}, {count_directories(train_2)}")

Number of directories in test (CT-0, CT-23): 172, 172
Number of directories in train (CT-0, CT-23): 860, 860


In [9]:
def load_nii(file_path):
    return nib.load(file_path).get_fdata()

class MultiModalDataset(Sequence):
    def __init__(self, patient_dirs, labels, batch_size, fold, shuffle=True):
        self.patient_dirs = patient_dirs
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.fold = fold
        self.indices = np.arange(len(self.patient_dirs))
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.patient_dirs) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_patient_dirs = [self.patient_dirs[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]

        # Load modalities
        X = self.__load_modalities(batch_patient_dirs)
        y = np.array(batch_labels)

        X = tuple(tf.convert_to_tensor(x, dtype=tf.float32) for x in X)

        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __load_modalities(self, patient_dirs):
        modalities = {'split_1': [], 'split_2': []}

        for patient_dir in patient_dirs:
            patient_name = os.path.basename(patient_dir)

            split_1_path = os.path.join(patient_dir, 'split_part_1.nii')
            split_2_path = os.path.join(patient_dir, 'split_part_2.nii')

            # Load each modality
            split_1 = load_nii(split_1_path)
            split_2 = load_nii(split_2_path)

            modalities['split_1'].append(split_1)
            modalities['split_2'].append(split_2)


        # Stack the modalities for each patient along the first axis (batch axis)
        X = [
            np.array(modalities['split_1']),
            np.array(modalities['split_2']),
        ]

        return X


def load_data_from_folder(base_dir):
    patient_dirs = []
    labels = []

    for class_name in ["CT-0", "CT-23"]:  # Only two known classes
        class_path = os.path.join(base_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        label = 0 if class_name == "CT-0" else 1

        for study_name in os.listdir(class_path):
            study_path = os.path.join(class_path, study_name)
            if os.path.isdir(study_path):
                patient_dirs.append(study_path)
                labels.append(label)

    return np.array(patient_dirs), np.array(labels)

# Load and shuffle train data
train_dirs, train_labels = load_data_from_folder("train")
train_indices = np.arange(len(train_dirs))
np.random.shuffle(train_indices)
train_dirs = train_dirs[train_indices]
train_labels = train_labels[train_indices]

# Load and shuffle test data
test_dirs, test_labels = load_data_from_folder("test")
test_indices = np.arange(len(test_dirs))
np.random.shuffle(test_indices)
test_dirs = test_dirs[test_indices]
test_labels = test_labels[test_indices]

# Create your dataset objects
train_dataset = MultiModalDataset(train_dirs, train_labels, batch_size=16, fold="train", shuffle=True)
test_dataset = MultiModalDataset(test_dirs, test_labels, batch_size=8, fold="test", shuffle=True)

X, y = train_dataset[0]

for modality, data in zip(['split_1', 'split_2'], X):
    print(f"Shape of {modality} modality: {data.shape}")

print(f"label: {y}")

Shape of split_1 modality: (0,)
Shape of split_2 modality: (0,)
label: []


I0000 00:00:1744246359.130069      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1744246359.130993      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5
