# MRI preprocessing

In [12]:
import os
import pandas as pd
from datetime import datetime

def find_latest_t1w_scan(base_path):
    participant_scans = {}
    for item in os.listdir(base_path):
        # Check if the folder name starts with 'sub-', indicating a participant folder
        if item.startswith('sub-'):
            participant_path = os.path.join(base_path, item)
            # Strip 'sub-' prefix to use as the dictionary key
            participant_id = item[4:]  # Removes the first four characters 'sub-'
            latest_date = None
            latest_file = None

            # Check each session folder within the participant directory
            for session_folder in os.listdir(participant_path):
                if session_folder.startswith('ses-'):
                    session_date = session_folder.split('-')[1]  # Extract the date from the session folder name
                    session_path = os.path.join(participant_path, session_folder)
                    anat_path = os.path.join(session_path, 'anat')  # 'anat' folder inside the session folder

                    if os.path.exists(anat_path) and os.path.isdir(anat_path):
                        # Iterate over all .nii files in the 'anat' directory
                        for file in os.listdir(anat_path):
                            if file.endswith('T1w.nii'):
                                file_date = datetime.strptime(session_date, "%Y%m%d")
                                # Update if this file's date is more recent
                                if latest_date is None or file_date > latest_date:
                                    latest_date = file_date
                                    latest_file = os.path.join(anat_path, file)

            if latest_file:
                participant_scans[participant_id] = latest_file

    return participant_scans

In [13]:
base_path = '../../data/raw/resectMap_nifti_only_20240430'
latest_scans = find_latest_t1w_scan(base_path)
#latest_scans.items()

In [14]:
# Load the prediction data
prediction_data = pd.read_csv('../../data/processed/label_df.csv', index_col=0)

# Convert latest_scans dictionary to DataFrame
scans_df = pd.DataFrame(list(latest_scans.items()), columns=['ParticipantID', 'ScanPath'])

# Merge the dataframes
prediction_data = prediction_data.rename(columns={"record_id" : "ParticipantID"})
final_data = prediction_data.merge(scans_df, on='ParticipantID', how='left')

In [15]:
final_data.to_csv('../../data/processed/MRI_file_path.csv', index=False)

In [16]:
import nibabel as nib

def load_mri(path):
    mri = nib.load(path)
    return mri.get_fdata()

In [17]:
import numpy as np

def preprocess_mri(data):
    # Normalize the data to [0, 1]
    data = (data - np.min(data)) / (np.max(data) - np.min(data))
    return data

In [18]:
subset_df = final_data.dropna(subset=['ScanPath']).head(10)

In [19]:
for _, row in subset_df.iterrows():
    mri_data = load_mri(row['ScanPath'])
    print(f"Participant ID: {row['ParticipantID']}, Shape: {mri_data.shape}")

Participant ID: RSCT000111, Shape: (192, 256, 256)
Participant ID: RSCT000208, Shape: (192, 256, 256)
Participant ID: RSCT000508, Shape: (240, 256, 256)
Participant ID: RSCT000749, Shape: (256, 256, 256)
Participant ID: RSCT001112, Shape: (192, 256, 256)
Participant ID: RSCT001207, Shape: (192, 256, 256)
Participant ID: RSCT001300, Shape: (192, 256, 256)
Participant ID: RSCT001425, Shape: (256, 256, 256)
Participant ID: RSCT001613, Shape: (192, 256, 256)
Participant ID: RSCT001733, Shape: (160, 256, 256)


In [20]:
# Problem : MRIs with different shapes -> need resize
if False:
    X = []  # Image data
    y = []  # Labels

    # Remove participants without any T1w MRI scans
    final_data = final_data.dropna(subset=['ScanPath'])

    for _, row in subset_df.iterrows():
        mri_data = load_mri(row['ScanPath'])
        mri_data = preprocess_mri(mri_data)
        X.append(mri_data)
        y.append(row['surg_engel'])

    X = np.array(X)  # Convert list to array for training
    y = np.array(y)

In [21]:
import nibabel as nib
import numpy as np
from scipy.ndimage import zoom

def resize_mri(data, new_shape=(64, 64, 64)):
    """ Resize the MRI to new_shape """
    # Calculate the zoom factors
    zoom_factors = np.array(new_shape) / np.array(data.shape)
    # Apply the zoom operation with bilinear interpolation
    return zoom(data, zoom_factors, order=1)  # order=1 (bilinear) is often a good trade-off


def preprocess_and_load_mris(df):
    X = []
    y = []

    for _, row in df.iterrows():
        mri_data = load_mri(row['ScanPath'])
        mri_data = preprocess_mri(mri_data)
        mri_data_resized = resize_mri(mri_data)
        X.append(mri_data_resized)
        y.append(row['surg_engel'])

    return np.array(X), np.array(y)

In [22]:
X, y = preprocess_and_load_mris(final_data)

TypeError: expected str, bytes or os.PathLike object, not float

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense

def create_model(input_shape):
    """ Create a 3D CNN model. """
    model = Sequential([
        Conv3D(32, kernel_size=(3, 3, 3), activation='relu', input_shape=input_shape),
        MaxPooling3D(pool_size=(2, 2, 2)),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
X = X[..., np.newaxis]  # Add a channel dimension, assuming X doesn't already have it

In [None]:
input_shape = X.shape[1:]
model = create_model(input_shape)


from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the model
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test loss: {loss}, Test accuracy: {accuracy}")

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense

model = Sequential([
    Conv3D(32, kernel_size=(3, 3, 3), activation='relu', input_shape=(X.shape[1], X.shape[2], X.shape[3], 1)),
    MaxPooling3D(pool_size=(2, 2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])


In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=10, batch_size=5)


In [None]:
predictions = model.predict(X)

In [None]:
from tensorflow.keras.utils import Sequence
import nibabel as nib
import numpy as np

class MRISequence(Sequence):
    def __init__(self, df, batch_size):
        self.df = df
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.df) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.df.iloc[idx * self.batch_size:(idx + 1) * self.batch_size]
        X = []
        y = []
        for _, row in batch_x.iterrows():
            mri_data = load_mri(row['ScanPath'])
            mri_data = preprocess_mri(mri_data)
            mri_data_resized = resize_mri(mri_data)
            X.append(mri_data_resized)
            y.append(row['Outcome'])
        
        return np.array(X), np.array(y)

# Usage
batch_size = 2  # You can adjust the batch size
train_gen = MRISequence(df=subset_df, batch_size=batch_size)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense

def create_model(input_shape):
    # Create a Sequential model
    model = Sequential([
        Conv3D(32, kernel_size=(3, 3, 3), activation='relu', input_shape=input_shape),
        MaxPooling3D(pool_size=(2, 2, 2)),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Assuming your input shape from the preprocessed MRI data is known, e.g., (64, 64, 64, 1)
input_shape = (64, 64, 64, 1)
model = create_model(input_shape)


In [None]:
model.fit(train_gen, epochs=10)


## Print system information

In [None]:
import session_info

session_info.show()