# Modules to download

In [None]:
# !pip install matplotlib nibabel numpy opencv-python scipy scikit-image antsx SimpleITK



In [2]:
# Importation for modules.
import matplotlib.pyplot as plt
import nibabel as nib
import numpy as np
import cv2
from scipy.ndimage import binary_fill_holes
from skimage.morphology import remove_small_objects, convex_hull_image
from skimage.segmentation import active_contour
from skimage.filters import gaussian
import ants
import SimpleITK as sitk
from helpers import *
from antspynet.utilities import brain_extraction
import os
import csv

### **Cleaning Demographic Data**
This process cleans the demographic data from the OASIS-2 dataset and stores the cleaned data as a CSV file in the current working directory (CWD).

**Note:** Replace the Dataset_folder variable (line 1) with the path to your demographic data excel file or use the cleaned_demographic_data.csv file aldredy present in this folder

In [None]:
Dataset_folder = "../oasis_db/OAS2_RAW_PART2/OAS2_RAW_PART2" #replace with path to your dataset folder
if not os.path.exists(Dataset_folder):
        print([])  

nifti_files = []
for item in os.listdir(Dataset_folder):
    item_path = os.path.join(Dataset_folder, item)
    if os.path.isdir(item_path):  # Check if it's a directory
        nifti_files.append(item)



header = None #storing header to write to new cleaned csv file
demographic_mri_ids = [] #stores mri mri_ids in demographic data
demographic_data = "oasis_longitudinal_demographics-8d83e569fa2e2d30.csv" #file path of uncleaned data
with open(demographic_data, 'r', newline='', encoding='utf-8') as csvfile:
    reader = csv.reader(csvfile)
    header = next(reader)  # Read header
    column_index = 1  # index of "mri id" column

    for row in reader:
        try:
            demographic_mri_ids.append(row[column_index])
        except IndexError:
            print(f"Warning: Row has fewer columns than expected.")  # Handle missing data
print()
print(len(demographic_mri_ids))
print(len(nifti_files))

#finding intersection
demographic_mri_ids_set = set(demographic_mri_ids)
nifti_files_set = set(nifti_files)
final_data = demographic_mri_ids_set.intersection(nifti_files_set)
print(len(final_data) == len(nifti_files)) # len same as nifti files therfore all demographics data for nifti files available


#creating new csv with cleaned data
filtered_rows = []
with open(demographic_data, 'r', newline='', encoding='utf-8') as csvfile:
    reader = csv.reader(csvfile)
    header = next(reader)  # Read header (if present)
    column_index = 1  # index of "mri id" column

    for row in reader:
        try:
            if row[column_index] in final_data:
                filtered_rows.append(row)
        except IndexError:
            print(f"Warning: Row has fewer columns than expected.")  # Handle missing data

output_filepath = "cleaned_demographic_data"
with open(output_filepath, 'w', newline='', encoding='utf-8') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(header)
    writer.writerows(filtered_rows)
    print(f"Filtered data written to: {output_filepath}")



373
164
True
Filtered data written to: cleaned_demographic_data


### **Functions for NIfTI Display and Processing**

This section includes functions to interactively explore 3D arrays, rescale array values, add suffixes to filenames, and overlay mask contours on 3D images for visualization.


In [2]:
# Function to display nifti files
from ipywidgets import interact
def explore_3D_array(arr: np.ndarray, cmap: str='gray'):
    def fn(SLICE):
        plt.figure(figsize=(7,7))
        plt.axis('off')
        plt.imshow(arr[SLICE,:,:], cmap=cmap)

    interact(fn, SLICE=(0, arr.shape[0]-1))

def add_suffix_to_filename(filename: str, suffix: str) ->str:
    if filename.endswith('.nifti.hdr'):
        result = filename.replace('.nifti.hdr', f'_{suffix}.nifti.hdr')
        return result
    else:
        raise RuntimeError('filename with unknown ext')

def rescale_linear(array: np.ndarray, new_min: int, new_max: int):
  minimum, maximum = np.min(array), np.max(array)
  m = (new_max - new_min) / (maximum - minimum)
  b = new_min - m * minimum
  return m * array + b

def explore_3D_array_with_mask_contour(arr: np.ndarray, mask: np.ndarray, thickness: int = 1):
  _arr = rescale_linear(arr,0,1)
  _mask = rescale_linear(mask,0,1)
  _mask = _mask.astype(np.uint8)

  def fn(SLICE):
    arr_rgb = cv2.cvtColor(_arr[SLICE, :, :], cv2.COLOR_GRAY2RGB)
    contours, _ = cv2.findContours(_mask[SLICE, :, :], cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    arr_with_contours = cv2.drawContours(arr_rgb, contours, -1, (0,1,0), thickness)

    plt.figure(figsize=(7,7))
    plt.imshow(arr_with_contours)

  interact(fn, SLICE=(0, arr.shape[0]-1))

### **Brain Extraction Function**

This function reads a NIfTI image, performs brain extraction using ANTs, applies the brain mask, and returns the masked brain image as a NumPy array, preserving metadata.


In [3]:
def extracting_brain(img_path):
    ant_img = ants.image_read(img_path, reorient='RAS')
    # explore_3D_array(arr=ant_img.numpy(), cmap='nipy_spectral')
    prob_brain_mask = brain_extraction(ant_img, modality="t1")
    brain_mask = ants.get_mask(prob_brain_mask, low_thresh=0.5)
    # explore_3D_array_with_mask_contour(ant_img.numpy(), brain_mask.numpy())
    # Apply the mask
    masked = ants.mask_image(ant_img, brain_mask)

    # Convert to NumPy array
    final_mask = masked.numpy()


    # Convert back to ANTs image (to preserve metadata)
    rotated_masked_ant = ants.from_numpy(final_mask, origin=masked.origin, spacing=masked.spacing, direction=masked.direction)

    # Visualize the rotated image
    # explore_3D_array(final_mask, cmap='gray')
    final_mask.shape
    return final_mask


### **MRI Image Preprocessing**

This function processes MRI images through a series of steps, including:
1. Grayscale conversion (if needed)
2. CLAHE (Contrast Limited Adaptive Histogram Equalization) for contrast enhancement
3. Gaussian and Median blurring for noise reduction
4. Sharpening using a custom kernel
5. Non-Local Means Denoising
6. Adding salt-and-pepper noise
7. Applying PCA to reduce dimensionality of the image slices

It processes both 2D slices and 3D volumes of MRI data.


In [8]:
from sklearn.decomposition import PCA
def preprocess_mri_image(img_array):
    """
    Preprocess an MRI image with the following steps:
    1. Convert to grayscale (if needed)
    2. Apply CLAHE for contrast enhancement
    3. Apply Gaussian blur
    4. Apply Median blur for noise reduction
    5. Apply sharpening
    6. Apply Non-Local Means Denoising
    7. Add salt-and-pepper noise at the end

    :param img_array: NumPy array of the MRI image
    :return: Processed NumPy array
    """
    img_array
    if len(img_array.shape) == 3:  # Check if it's a 3D array (e.g., 128, 256, 256)
        processed_slices = []
        for i in range((img_array.shape[0]//2)-10,(img_array.shape[0]//2)+10):  # Iterate through each slice
            slice_img = img_array[i]
            processed_slices.append(process_single_slice(slice_img))
        return np.array(processed_slices)

    else:
        return process_single_slice(img_array)

def process_single_slice(slice_img):
    """
    Process a single 2D slice of the MRI image.
    """
    slice_img = np.rot90(slice_img, k=3)
    # Ensure image is single-channel grayscale
    if len(slice_img.shape) == 3 and slice_img.shape[-1] == 3:  # Check if RGB
        slice_img = cv2.cvtColor(slice_img, cv2.COLOR_BGR2GRAY)

    # Normalize and convert to uint8
    slice_img = cv2.normalize(slice_img, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)

    # Apply CLAHE (Adaptive Histogram Equalization)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    slice_img = clahe.apply(slice_img)

    # Apply Gaussian Blur
    slice_img = cv2.GaussianBlur(slice_img, (5, 5), 0)

    # Apply sharpening filter
    sharpen_kernel = np.array([[0, -1, 0],
                               [-1, 5, -1],
                               [0, -1, 0]])
    slice_img = cv2.filter2D(slice_img, -1, sharpen_kernel)

    # Apply Non-Local Means Denoising
    slice_img = cv2.fastNlMeansDenoising(slice_img, None, h=10, templateWindowSize=7, searchWindowSize=21)

    # Add Salt-and-Pepper Noise
    slice_img = add_salt_and_pepper(slice_img)

    # Apply Median Blur
    slice_img = cv2.medianBlur(slice_img, 5)

    slice_img = apply_pca_to_slice(slice_img)

    return slice_img

def add_salt_and_pepper(img, salt_prob=0.02, pepper_prob=0.02):
    """
    Function to add salt-and-pepper noise to an image.
    """
    noisy_img = img.copy()
    num_salt = int(salt_prob * img.size)
    num_pepper = int(pepper_prob * img.size)

    # Add salt (white pixels)
    coords = [np.random.randint(0, i - 1, num_salt) for i in img.shape]
    noisy_img[tuple(coords)] = 255

    # Add pepper (black pixels)
    coords = [np.random.randint(0, i - 1, num_pepper) for i in img.shape]
    noisy_img[tuple(coords)] = 0

    return noisy_img


def apply_pca_to_slice(slice_img, n_components=115):
    """
    Apply PCA to a single 2D slice and return the transformed image (without reconstruction).

    :param slice_img: 2D slice of the MRI image
    :param n_components: Number of principal components to keep
    :return: PCA transformed slice (without reconstruction)
    """
    # Step 1: Reshape the slice to a 2D array (flatten it)
    # h, w = slice_img.shape
    # reshaped_slice = slice_img.reshape(h, w)

    # Step 2: Apply PCA to the reshaped slice
    pca = PCA(n_components=n_components)
    transformed_image = pca.fit_transform(slice_img)

    # reconstructed_image = pca.inverse_transform(transformed_image)
    # reconstructed_image = reconstructed_image.reshape(h, w)

    return transformed_image

In [None]:
final=extracting_brain("C:/Users/moksh/OneDrive/Desktop/Alzeimers/oasis_db/OAS2_RAW_PART2/OAS2_RAW_PART2/OAS2_0101_MR1/RAW/mpr-1.nifti.hdr")
final = preprocess_mri_image(final)
explore_3D_array(final)

interactive(children=(IntSlider(value=9, description='SLICE', max=19), Output()), _dom_classes=('widget-intera…

### **Preprocess and Store MRI Data**

This function walks through the dataset directory, processes MRI images by extracting the brain, and performs various preprocessing steps (such as denoising, contrast enhancement, and PCA). The processed images are saved as `.npy` files in corresponding directories which will then be fed into the CNN model.

This step takes approximately 30 minutes to run on a Ryzen 5 processor. Please run it only once to obtain the preprocessed data. Do not run it again after the initial execution.

**Note:** Replace the root_dir variable with the path to your oasis 2 dataset


In [10]:
def preprocess_and_store():
    root_dir = 'C:/Users/moksh/OneDrive/Desktop/Alzeimers/oasis_db/OAS2_RAW_PART2/OAS2_RAW_PART2' #replace with path to your dataset folder
    for root, dirs, files in os.walk(root_dir):
        # print(f"Current Directory: {root}")
        
        # print(f"Subdirectories: {dirs}")
        
        # print(f"Files: {files}")
        
        if "OLD" not in root:
            for file in files:
                
                file_path = os.path.join(root, file)
                if file_path.endswith("hdr"): 
                    new = file_path.split(os.sep)
                    folder = new[1]                
                    os.makedirs(folder,exist_ok=True)
                    print("Processing " + file + " in folder " + folder)                  
                    final_mask = extracting_brain(file_path)
                    processed_img = preprocess_mri_image(final_mask)                   
                    print("Done processing " + file + " in folder " + folder)
                    processed_file_name = file_path.split(os.sep)[-1]
                    np.save(folder+"/"+processed_file_name,processed_img)
                    break

preprocess_and_store()
# reconstructed_image = pca.inverse_transform(transformed_image)
# reconstructed_image = reconstructed_image.reshape(h, w)
# explore_3D_array(np.load("C:/Users/moksh/OneDrive/Desktop/Alzeimers/Alzeimers-detection/OAS2_0100_MR1/mpr-1.np.npy"))
    

Processing mpr-1.nifti.hdr in folder OAS2_0100_MR1
Done processing mpr-1.nifti.hdr in folder OAS2_0100_MR1
Processing mpr-1.nifti.hdr in folder OAS2_0100_MR2
Done processing mpr-1.nifti.hdr in folder OAS2_0100_MR2
Processing mpr-1.nifti.hdr in folder OAS2_0100_MR3
Done processing mpr-1.nifti.hdr in folder OAS2_0100_MR3
Processing mpr-1.nifti.hdr in folder OAS2_0101_MR1
Done processing mpr-1.nifti.hdr in folder OAS2_0101_MR1
Processing mpr-1.nifti.hdr in folder OAS2_0101_MR2
Done processing mpr-1.nifti.hdr in folder OAS2_0101_MR2
Processing mpr-1.nifti.hdr in folder OAS2_0101_MR3
Done processing mpr-1.nifti.hdr in folder OAS2_0101_MR3
Processing mpr-1.nifti.hdr in folder OAS2_0102_MR1
Done processing mpr-1.nifti.hdr in folder OAS2_0102_MR1
Processing mpr-1.nifti.hdr in folder OAS2_0102_MR2
Done processing mpr-1.nifti.hdr in folder OAS2_0102_MR2
Processing mpr-1.nifti.hdr in folder OAS2_0102_MR3
Done processing mpr-1.nifti.hdr in folder OAS2_0102_MR3
Processing mpr-1.nifti.hdr in folder 

# mapping demographic data to the preprocessed MRI data

In [7]:
import os
import pandas as pd

def mapDemographic():
    root_dir = os.getcwd() + "/processedWithPCA"
    demographic_file = "cleaned_demographic_data"

    df = pd.read_csv(demographic_file)

    ses_mean = df["SES"].mean()  
    mmse_mean = df["MMSE"].mean()  
    
    print(int(ses_mean), int(mmse_mean))

    df["SES"]= df["SES"].fillna(ses_mean)
    df["MMSE"] = df["MMSE"].fillna(mmse_mean)

    df["Group"] = df["Group"].apply(lambda x: 0 if x == "Nondemented" else 1)
    df["M/F"] = df["M/F"].apply(lambda x: 0 if x == "F" else 1)

    df = df.drop(columns=["Subject ID", "Hand"])
    print(df.head())

    mri_data = {}
    for _, row in df.iterrows():
        mri_id = row["MRI ID"]
        data = row.drop("MRI ID").values.tolist() 
        mri_data[mri_id] = data
    print(mri_data)
    match=0
    np.set_printoptions(threshold=np.inf)
    count=0
    for root, dirs, files in os.walk(root_dir):
        mri_id = os.path.basename(root)
        
        for file in files:
            file_path = os.path.join(root, file)
            if file_path.endswith(".npy"):
                mri = np.load(file_path)
                print(type(mri))
                print(mri.shape)
                mri = np.append(mri, mri_data[mri_id], axis=1)
                print("after appending data")
                print(mri.shape)
                print("done appending for one file")
                count+=1
        if count==1:
            break
        

    
        

# Run the function
mapDemographic()


2 27
          MRI ID  Group  Visit  MR Delay  M/F  Age  EDUC  SES  MMSE  CDR  \
0  OAS2_0100_MR1      0      1         0    0   77    11  4.0  29.0  0.0   
1  OAS2_0100_MR2      0      2      1218    0   80    11  4.0  30.0  0.0   
2  OAS2_0100_MR3      0      3      1752    0   82    11  4.0  30.0  0.0   
3  OAS2_0101_MR1      0      1         0    0   71    18  2.0  30.0  0.0   
4  OAS2_0101_MR2      0      2       952    0   74    18  2.0  30.0  0.0   

   eTIV   nWBV    ASF  
0  1583  0.777  1.108  
1  1586  0.757  1.107  
2  1590  0.760  1.104  
3  1371  0.769  1.280  
4  1400  0.752  1.254  
{'OAS2_0100_MR1': [0, 1, 0, 0, 77, 11, 4.0, 29.0, 0.0, 1583, 0.777, 1.108], 'OAS2_0100_MR2': [0, 2, 1218, 0, 80, 11, 4.0, 30.0, 0.0, 1586, 0.757, 1.107], 'OAS2_0100_MR3': [0, 3, 1752, 0, 82, 11, 4.0, 30.0, 0.0, 1590, 0.76, 1.104], 'OAS2_0101_MR1': [0, 1, 0, 0, 71, 18, 2.0, 30.0, 0.0, 1371, 0.769, 1.28], 'OAS2_0101_MR2': [0, 2, 952, 0, 74, 18, 2.0, 30.0, 0.0, 1400, 0.752, 1.254], 'OAS2_0101_M

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 3 dimension(s) and the array at index 1 has 1 dimension(s)