In [2]:
import numpy as np
import os
from nilearn import image
import nibabel as nib
import pandas as pd
from junifer.data import get_mask
import nibabel.processing as npr
from nilearn.maskers import NiftiMasker

It is highly recommended to configure Git before using DataLad. Set both 'user.name' and 'user.email' configuration variables.


In [3]:
files = os.listdir('/home/hsreekri/projects/IXI_CAT12.5/IXI/Guys/')
base_paths = []
for i in files:
    base_paths.append(str('/home/hsreekri/projects/IXI_CAT12.5/IXI/Guys/' + str(i)))

In [4]:
# Helper functions

# Partially adapted from https://github.com/juaml/brainage_estimation/blob/main/brainage/calculate_features.py

def file_reader(file_name: str) -> np.ndarray:
    file = nib.load(file_name)
    data = file.get_fdata().ravel()
    return data

def create_histograms(data: np.ndarray, bins: int) -> tuple:
    hist, bin_edges = np.histogram(data, bins)
    return hist, bin_edges

def binarize_3d(img, threshold):
    """binarize 3D spatial image"""
    return nib.Nifti1Image(
        np.where(img.get_fdata() > threshold, 1, 0), img.affine, img.header
    )

def process_and_mask_image(sub_file: str, mask_file: str, smooth_fwhm: float, resample_size: float) -> np.ndarray:
    """
    Processes the subject image by loading, smoothing, resampling, and masking.
    Returns the voxel data after masking.
    """
    # Load the subject and mask images
    sub_img = nib.load(sub_file)
    mask_img = nib.load(mask_file)

    # Smooth the subject image
    sub_img = image.smooth_img(sub_img, smooth_fwhm)

    # Resample the mask image
    mask_img_rs = npr.resample_to_output(mask_img, [resample_size] * len(mask_img.shape), order=1)

    # Resample the subject image to match the mask
    sub_img_rs = image.resample_to_img(sub_img, mask_img_rs, interpolation="linear")

    # Binarize the mask and extract voxel data
    binary_mask_img_rs = binarize_3d(mask_img_rs, 0.5)
    mask_rs = binary_mask_img_rs.get_fdata().astype(bool)
    sub_data_rs = sub_img_rs.get_fdata()[mask_rs]

    return sub_data_rs.ravel()


In [5]:
#Main function

def compute(base_paths: list, bins: int, smooth_fwhm: float, resample_size: float) -> pd.DataFrame:
    histograms = []
    subjects = []

    for base_path in base_paths:
        # Extract the subject folder name from the base path
        sub_folder = base_path.split('/')[-1]  # e.g., 'sub-IXI002'

        # Construct the full path to the .nii.gz file in the 'mri' folder
        sub_file = os.path.join(base_path, 'mri', f'm0wp1{sub_folder}.nii.gz')
        mask_file = '/home/hsreekri/Julearn_predictions/CAT12_IXI555_MNI152_TMP_GS_GMprob0.2_clean.nii.gz'  # using gmprob0.2 mask

        if os.path.exists(sub_file) and os.path.exists(mask_file):
            # Process the subject image and apply the mask
            processed_data = process_and_mask_image(sub_file, mask_file, smooth_fwhm, resample_size)

            # Create histogram from the processed data
            hist_data, bin_edges = create_histograms(processed_data, bins)
            
            # Append histogram data and subject ID to lists
            histograms.append(hist_data)
            subjects.append(sub_folder)
        else:
            print(f"File not found: {sub_file} or {mask_file}")

    # Convert the histogram data to a pandas DataFrame
    df = pd.DataFrame(histograms, index=subjects)

    # Set the column names to indicate the bin ranges
    bin_labels = [f'Bin_{i}' for i in range(1, bins + 1)]
    df.columns = bin_labels
    
    return df


In [7]:
#usage
# Resample size ?
# For test only taking first 10 datafiles
IXI_df = compute(base_paths[0:10],100,0.2,2)

In [8]:
IXI_df

Unnamed: 0,Bin_1,Bin_2,Bin_3,Bin_4,Bin_5,Bin_6,Bin_7,Bin_8,Bin_9,Bin_10,...,Bin_91,Bin_92,Bin_93,Bin_94,Bin_95,Bin_96,Bin_97,Bin_98,Bin_99,Bin_100
sub-IXI350,4530,2557,2446,2446,2380,2293,2371,2351,2458,2441,...,0,1,2,0,0,2,0,0,0,1
sub-IXI487,7271,3044,2673,2630,2574,2550,2389,2481,2419,2517,...,0,0,2,0,1,0,0,0,0,1
sub-IXI154,6989,2706,2412,2349,2297,2180,2236,2222,2195,2149,...,3,3,2,1,1,1,0,0,0,1
sub-IXI123,5167,2722,2387,2371,2262,2180,2262,2176,2106,2118,...,4,3,2,0,0,0,0,0,0,1
sub-IXI129,6185,2572,2318,2163,2169,2140,2095,1965,1991,2119,...,3,1,0,1,1,0,0,0,0,1
sub-IXI024,4229,2467,2257,2261,2218,2224,2193,2183,2283,2314,...,0,4,1,1,0,0,0,1,0,1
sub-IXI053,3882,2174,2059,2079,1988,1985,2095,2105,2100,2016,...,4,3,1,1,0,0,1,1,0,1
sub-IXI414,7745,2754,2419,2313,2166,2181,2119,2050,2124,2020,...,3,2,1,1,0,0,1,1,0,3
sub-IXI265,7194,2765,2433,2287,2269,2326,2164,2194,2189,2201,...,6,0,2,0,1,1,0,0,0,1
sub-IXI368,4552,2447,2376,2087,2145,2192,2133,2158,2078,2022,...,3,4,9,4,1,2,2,1,0,1
