In [1]:
from IPython.display import clear_output
!pip install git+https://github.com/shijianjian/EfficientNet-PyTorch-3D
clear_output()

In [2]:
!pip install efficientnet_pytorch


Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l- done
Building wheels for collected packages: efficientnet_pytorch
  Building wheel for efficientnet_pytorch (setup.py) ... [?25l- done
[?25h  Created wheel for efficientnet_pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16428 sha256=d708d61305870133ab68490a15fe2ecfe2e69546dca8fd4eb914a60f30546bc4
  Stored in directory: /root/.cache/pip/wheels/03/3f/e9/911b1bc46869644912bda90a56bcf7b960f20b5187feea3baf
Successfully built efficientnet_pytorch
Installing collected packages: efficientnet_pytorch
Successfully installed efficientnet_pytorch-0.7.1


In [3]:
pip show pydicom


Name: pydicom
Version: 2.4.4
Summary: A pure Python package for reading and writing DICOM data
Home-page: 
Author: 
Author-email: Darcy Mason and contributors <darcymason@gmail.com>
License: 
Location: /opt/conda/lib/python3.10/site-packages
Requires: 
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [4]:
import os

# Specify your dataset directory
dataset_dir = "/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification"
# Get a list of all file names in the dataset directory
file_names = [f for f in os.listdir(dataset_dir) if os.path.isfile(os.path.join(dataset_dir, f))]

print(file_names)




['sample_submission.csv', 'train_labels.csv']


# Let's start with the first step: Data Preprocessing. Here's how you can load DICOM images from each patient directory and preprocess them using Python with the PyDICOM and OpenCV libraries:

In [5]:
import os
import cv2
import numpy as np
import pydicom
import glob

def load_dicom_images(patient_dir):
    """
    Load DICOM images from a patient directory.
    
    Parameters:
    - patient_dir: Path to the directory containing DICOM images.
    
    Returns:
    - images: List of loaded DICOM images.
    """
    images = []
    for filepath in glob.glob(os.path.join(patient_dir, '**', '*.dcm'), recursive=True):
        try:
            ds = pydicom.dcmread(filepath)
            img = ds.pixel_array
            images.append(img)
        except Exception as e:
            pass
    return images

def preprocess_images(images):
    """
    Preprocess a list of images.
​
    Parameters:
    - images: List of input images.
​
    Returns:
    - preprocessed_images: List of preprocessed images.
    """
    preprocessed_images = []
    for img in images:
        # Resize image to desired dimensions (e.g., 256x256)
        img = cv2.resize(img, (256, 256))
        # Normalize pixel values (if needed)
        img = img / 255.0
        preprocessed_images.append(img)
    return preprocessed_images

## Example usage:
dataset_dir = "/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/FLAIR/"
print("Files in dataset directory:", os.listdir(dataset_dir)[:5])  # Corrected line

# Debug print: Print first 5 characters of DICOM file paths
count = 0
for root, dirs, files in os.walk(dataset_dir):
    for file in files:
        if file.endswith(".dcm"):
            print(os.path.join(root, file))
            count += 1
            if count >= 5:
                break
    if count >= 5:
        break
        
images = load_dicom_images(dataset_dir)
print("Number of DICOM images loaded:", len(images))
preprocessed_images = preprocess_images(images)
print("Number of preprocessed images:", len(preprocessed_images))


Files in dataset directory: ['Image-273.dcm', 'Image-245.dcm', 'Image-365.dcm', 'Image-130.dcm', 'Image-98.dcm']
/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/FLAIR/Image-273.dcm
/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/FLAIR/Image-245.dcm
/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/FLAIR/Image-365.dcm
/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/FLAIR/Image-130.dcm
/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/FLAIR/Image-98.dcm
Number of DICOM images loaded: 400
Number of preprocessed images: 400


# The second step is Data Augmentation. Data augmentation is crucial for training robust machine learning models, especially when dealing with limited datasets. Here's how you can augment your preprocessed images using the ImageDataGenerator class from Keras

In [6]:
import os
import cv2
import numpy as np
import pydicom
import glob
import pandas as pd
from multiprocessing import Pool
from functools import partial
from tensorflow.keras.preprocessing.image import ImageDataGenerator


def load_labels(file_path):
    """
    Load MGMT values from the train_labels.csv file into a dictionary.
    
    Parameters:
    - file_path: Path to the train_labels.csv file.
    
    Returns:
    - labels_dict: Dictionary mapping patient IDs to MGMT values.
    """
    labels_df = pd.read_csv(file_path)
    labels_dict = dict(zip(labels_df['BraTS21ID'], labels_df['MGMT_value']))
    return labels_dict

def load_dicom(filepath):
    """
    Load and preprocess a single DICOM image.
    
    Parameters:
    - filepath: Path to the DICOM file.
    
    Returns:
    - img: Preprocessed image.
    """
    try:
        ds = pydicom.dcmread(filepath)
        img = ds.pixel_array
        img = cv2.resize(img, (256, 256))
        img = img / 255.0
        return img
    except Exception as e:
        return None

def load_dicom_batch(filepaths):
    """
    Load and preprocess a batch of DICOM images in parallel.
    
    Parameters:
    - filepaths: List of file paths to DICOM files.
    
    Returns:
    - batch_images: Batch of preprocessed images.
    """
    with Pool() as pool:
        batch_images = pool.map(load_dicom, filepaths)
    return np.array(batch_images)

def load_dicom_images_generator(dataset_dir, labels_dict, batch_size=256):
    """
    Generator function to load DICOM images from patient directories in batches.
    
    Parameters:
    - dataset_dir: Path to the directory containing patient directories.
    - labels_dict: Dictionary mapping patient IDs to MGMT values.
    - batch_size: Number of images to load per batch.
    
    Yields:
    - batch_images: Batch of preprocessed images.
    - batch_labels: Batch of MGMT labels.
    """
    for patient_dir in os.listdir(dataset_dir):
        patient_path = os.path.join(dataset_dir, patient_dir)
        if os.path.isdir(patient_path):
            filepaths = glob.glob(os.path.join(patient_path, '**', '*.dcm'), recursive=True)
            filepaths = filepaths[:batch_size]  # Select only first batch_size files
            batch_images = load_dicom_batch(filepaths)
            patient_id = os.path.basename(patient_path)
            mgmt_value = labels_dict.get(patient_id, -1)  # Default value of -1 if not found
            yield batch_images, mgmt_value

# Data augmentation using Keras ImageDataGenerator
data_augmenter = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

# Example usage:
dataset_dir = "/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/"
labels_file = "/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv"

# Load MGMT labels
labels_dict = load_labels(labels_file)

# Load DICOM images and labels using the generator
image_label_generator = load_dicom_images_generator(dataset_dir, labels_dict)

# Process images and labels in batches with data augmentation
for batch_index, (batch_images, batch_labels) in enumerate(image_label_generator):
    # Reshape batch_images to have rank 4
    batch_images = np.expand_dims(batch_images, axis=-1)  # Add channel dimension
    batch_images = np.repeat(batch_images, 3, axis=-1)  # Repeat grayscale image to 3 channels
    augmented_images = next(data_augmenter.flow(batch_images, batch_size=256, shuffle=False))
    print(f"Batch {batch_index + 1}: Augmented images - {augmented_images.shape}, Labels - {batch_labels}")



2024-05-15 16:12:11.654951: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-15 16:12:11.655052: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-15 16:12:11.785633: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Batch 1: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 2: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 3: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 4: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 5: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 6: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 7: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 8: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 9: Augmented images - (226, 256, 256, 3), Labels - -1
Batch 10: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 11: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 12: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 13: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 14: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 15: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 16: Augmented images - (256, 256, 256, 3), Labels - -1
Batch 17: Augmented images - (256