# Introduction

This document is part of a project which compares the performance of deep learning models in classificaiton of a disease or disorder using 3D and 2D brain images.
In this document, I train models using 3D brain images of people with and without autism.

The data is accessible here: **Autism Brain Imaging Data Exchange 1**: http://fcon_1000.projects.nitrc.org/indi/abide/abide_I.html

The original dataset contains 1112 images. However, since running models for 3D images require more processing time I had to minimize the data and only included 628 images due to lack computational capacity. 
The subset data was equally distributed between the two classes of the data, that is: 
- **Autism**
- **NonAutism**

In [None]:

import os
import glob
from keras.utils import to_categorical
from keras.layers import Input,Dense,Flatten,Dropout,Conv3D,MaxPooling3D,Activation #merge,
from keras.models import Model,Sequential,load_model
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from keras import regularizers
from keras import backend as K
from keras import activations
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

import numpy as np
import numpy.random as rng
import scipy.misc
from skimage.transform import resize
from PIL import Image
import nibabel as nib #reading MR images
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
from sklearn.utils import shuffle
import math
import glob
import time
from matplotlib import pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

#!pip install --upgrade scipy==1.2.1 # to fix import imresize error in keras-vis library
#!pip install git+https://github.com/raghakot/keras-vis.git --upgrade   # Load Keras-vis library

## Loading images

Several medical and academic centers recruited participants. Therefore, after loading images I check the number of images which corresponds to a participant in each center. Note that the main directory has separate Autism and Nonautism folders.


In [None]:
def count_nii_files_center(center_folder):
    nii_files = glob.glob(os.path.join(center_folder, '**', '*.nii'), recursive=True)
    return len(nii_files)

main_folder = 'clean_data' 

autism_folder = os.path.join(main_folder, 'Autism')
nonautism_folder = os.path.join(main_folder, 'NonAutism')

autism_centers = [f.path for f in os.scandir(autism_folder) if f.is_dir()]
nonautism_centers = [f.path for f in os.scandir(nonautism_folder) if f.is_dir()]

for center_folder in autism_centers:
    nii_count = count_nii_files_center(center_folder)
    center_name = os.path.basename(center_folder)
    print(f"Autism - Recruiting Center: {center_name}, Number of .nii files: {nii_count}")

for center_folder in nonautism_centers:
    nii_count = count_nii_files_center(center_folder)
    center_name = os.path.basename(center_folder)
    print(f"Nonautism - Recruiting Center: {center_name}, Number of .nii files: {nii_count}")


## Checking the images' dimensions

To prepare the images for the model We need to ensure that the dimensions of the images are consistent.
Therefore, here I check the dimensions. However, the output shows that the dimensions vary across different recruiting centers.
The next challenge would be to decide how to address this issue.

In [None]:

def get_image_dimensions(data_folder):
    image_paths = glob.glob(os.path.join(data_folder, '**', '*.nii'), recursive=True)
    
    for image_path in image_paths:
        img = nib.load(image_path).get_fdata()
        print(f"Image: {image_path}, Dimensions: {img.shape}")

#the data folder
data_root = 'AutismSubset_data'
autism_folder = os.path.join(data_root, 'Autism')
nonautism_folder = os.path.join(data_root, 'NonAutism')

# Print dimensions of autism images
print("Autism Images:")
get_image_dimensions(autism_folder)

# Print dimensions of nonautism images
print("NonAutism Images:")
get_image_dimensions(nonautism_folder)


To address the inconsistency of the images' dimensions, here I extract and select some slices from each axis.

In [None]:

def preprocess_image(image_path):
    img = nib.load(image_path).get_fdata()
    
    # Extract middle slices along y-axis
    img = img[30:110,60:126,60:126]
        
    # Z-score Standardization
    mean = img.mean()
    std = img.std()
    img = (img - mean) / std
    
    return img

def load_images_and_labels(data_folder, label):
    images = []
    labels = []
    
# Specify the subfolder name
    target_subfolder = 'anat_1'
    
    # selecting the target '.nii' files.
    pattern = os.path.join(data_folder, '**', target_subfolder, 'mprage.nii')
    
    # Getting the list of .nii files in the specified subfolder
    image_paths = glob.glob(pattern, recursive=True)
    
    for image_path in image_paths:
        img = preprocess_image(image_path)
        images.append(img)
        labels.append(label)
    
    return images, labels

# Preparing the data
data_root =  'AutismSubset_data'     
autism_folder = os.path.join(data_root, 'Autism')
nonautism_folder = os.path.join(data_root, 'NonAutism')

# Load autism images
autism_images, autism_labels = load_images_and_labels(autism_folder, 1)
print("Autism Images Length:", len(autism_images))

# Print dimensions of each autism image
for i, img in enumerate(autism_images):
    print(f"Autism Image {i + 1} Dimensions: {img.shape}")


# Load nonautism images
nonautism_images, nonautism_labels = load_images_and_labels(nonautism_folder, 0)
print("Nonautism Images Length:", len(nonautism_images))

# Print dimensions of each nonautism image
for i, img in enumerate(nonautism_images):
    print(f"Nonautism Image {i + 1} Dimensions: {img.shape}")


# Combine the data and labels
images = np.concatenate((autism_images, nonautism_images), axis=0)
labels = autism_labels + nonautism_labels

# Convert to numpy arrays
images = np.array(images)
labels = np.array(labels)
labels = to_categorical(labels)  # Convert labels to one-hot encoded format


print("Images Shape:", images.shape)
print("Labels Shape:", labels.shape)



In [None]:
print(labels)

In [None]:
# Test-train split
from sklearn.model_selection import train_test_split
train_X, valid_X, train_y, valid_y = train_test_split(images, labels, test_size=0.2, stratify=labels, random_state=42) 


## 3D Convolutional Neural Network Model

In [None]:

batch_size = 10 
epochs = 100 
input_img = Input(shape = (80, 66, 66, 1))

def model(input_img):
    conv1 = Conv3D(5, (3, 3, 3), padding='same')(input_img) 
    conv1 = BatchNormalization()(conv1)
    conv1 = MaxPooling3D(pool_size=(2, 2, 2))(conv1)
    conv1 = Activation('relu')(conv1)
    conv1 = Dropout(rate = 0.5)(conv1)
    
    conv2 = Conv3D(5, (3, 3, 3), padding='same',activation='relu')(conv1) 
    conv2 = BatchNormalization()(conv2)
    conv2 = MaxPooling3D(pool_size=(2, 2, 2))(conv2) 
    conv2 = Activation('relu')(conv2)
    conv2 = Dropout(rate = 0.5)(conv2)
    
    conv3 = Conv3D(5, (3, 3, 3), padding='same',activation='relu')(conv2) 
    conv3 = BatchNormalization()(conv3)
    conv3 = MaxPooling3D(pool_size=(2, 2, 2))(conv3) 
    conv3 = Activation('relu')(conv3)
    conv3 = Dropout(rate=0.5)(conv3)
    
    conv3 = Flatten()(conv3)

    dense = Dense(2, activation='softmax')(conv3) 
    return dense

In [None]:
learning_rate = 0.001

model = Model(input_img, model(input_img))
model.compile(loss='categorical_crossentropy', optimizer = Adam(learning_rate), metrics=['accuracy'])
model.summary()

In [None]:
# Create an instance of EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

In [None]:
%%time

# fit model
model_train = model.fit(train_X, train_y, batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(valid_X, valid_y), callbacks=[early_stopping])

In [None]:
#save model
model.save('Autism_model.h5')
