# Obtain Brain Data
Obtains number of GM, WM, & CSF Pixels from segmented images and saves alongside metadata to a csv for ML classification (ML Classification.ipynb)

In [1]:
# Pathing Libraries
from pathlib import Path
import os
import os.path as op
import glob

# Image Libraries
import nibabel as nib
import numpy as np
import pandas as pd
import scipy.ndimage as ndi
from scipy import stats
import matplotlib.pyplot as plt


In [2]:
## Pathing
path = '../'

images_path = Path(path, 'data', 'images')
masks_path = Path(path, 'data', 'masks')
segs_path = Path(path, 'data', 'segs_refs')
metadata_path = Path(path, 'data', 'meta')

## Z-normalization using binary mask function
def z_normalize(img_data, mask_data):
    # Ensure the mask is binary
    mask_data = np.where(mask_data > 0, 1, 0)

    # Apply the mask to the image
    masked_image_data = img_data * mask_data

    # Calculate the z-score of the masked image
    masked_image_data[mask_data==0] = stats.zscore(masked_image_data[mask_data==0])

    # Handle NaN values that might result from zscoring zero-valued elements
    masked_image_data = np.nan_to_num(masked_image_data)

    return masked_image_data


### Find Pixels
- Iteratres through participants and obtains the number of GM, WM, & CSF pixels from segmented images

In [4]:
meta_data = pd.read_csv(Path(metadata_path, 'meta_data_all.csv'))

all_data_list = []

for subj, age, gender in zip(meta_data['subject_id'], meta_data['age'], meta_data['gender_code']): # ''gender_code'; 1= male, 2= female
    print(subj, age, gender)

    # Loading in Segmented Data
    seg = nib.load(op.join(segs_path, 'sub-' + subj + '_T1w_seg.nii.gz'))
    seg_data = seg.get_fdata()

    # Loading in Mask Data
    mask = nib.load(op.join(masks_path, 'sub-' + subj + '_T1w_brain_mask' + '.nii.gz'))
    mask_data = mask.get_fdata()

    # Z-score normalization using z_normalize function
    z_seg = z_normalize(seg_data, mask_data)

    # Count CSF (1), grey matter (2), and white matter (3) pixels
    num_ones = np.count_nonzero(z_seg == 1)
    num_twos = np.count_nonzero(z_seg == 2)
    num_threes = np.count_nonzero(z_seg == 3)

    print(f'{subj} CSF: {num_ones} GM: {num_twos} WM: {num_threes}')
    
    # Combining participant data into dataframes
    all_data_list.append(pd.DataFrame({'subject': subj,
                                       'age': age,
                                       'gender': gender,
                                       'CSF': num_ones,
                                       'GM': num_twos,
                                       'WM': num_threes}, index=[0]))

# Concatenating all the dataframes
data_table = pd.concat(all_data_list, ignore_index=True)

CC110033 24 1
CC110033 CSF: 32255 GM: 88877 WM: 53097
CC110037 18 1
CC110037 CSF: 23522 GM: 93552 WM: 53059
CC110045 24 2
CC110045 CSF: 22655 GM: 97481 WM: 49497
CC110056 22 2
CC110056 CSF: 20673 GM: 86147 WM: 49316
CC110062 20 1
CC110062 CSF: 20466 GM: 110771 WM: 60077
CC110069 28 2
CC110069 CSF: 33716 GM: 87070 WM: 50853
CC110087 28 2
CC110087 CSF: 25119 GM: 87639 WM: 45359
CC110098 23 1
CC110098 CSF: 31622 GM: 101799 WM: 53795
CC110101 23 1
CC110101 CSF: 32199 GM: 105892 WM: 55422
CC110126 22 2
CC110126 CSF: 22748 GM: 100499 WM: 55779
CC110174 25 2
CC110174 CSF: 16458 GM: 75982 WM: 43297
CC110182 18 2
CC110182 CSF: 25082 GM: 92165 WM: 45913
CC110187 25 2
CC110187 CSF: 25661 GM: 90829 WM: 48961
CC110319 28 2
CC110319 CSF: 23648 GM: 105225 WM: 60920
CC110411 25 1
CC110411 CSF: 39304 GM: 107591 WM: 62531
CC110606 20 1
CC110606 CSF: 36672 GM: 107434 WM: 59233
CC112141 29 1
CC112141 CSF: 35605 GM: 95060 WM: 59303
CC120008 26 1
CC120008 CSF: 32610 GM: 107184 WM: 67643
CC120049 28 1
CC1200

In [None]:
data_table.head()

Unnamed: 0,subject,age,gender,CSF,GM,WM
0,CC110033,24,MALE,32255,88877,53097
1,CC110037,18,MALE,23522,93552,53059
2,CC110045,24,FEMALE,22655,97481,49497
3,CC110056,22,FEMALE,20673,86147,49316
4,CC110062,20,MALE,20466,110771,60077


In [None]:
# Save the data table to a csv file
data_table.to_csv(Path(path, 'data', 'brain_data_code.csv'), index=False)