Imports

In [None]:

import os
import glob
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import platform
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns

from sklearn.decomposition import PCA
subject_id = '<subject_id>'


Prep Input and Output Parameters

In [None]:
## Paths Input Here
base_dir = r'/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_AD_DBS_FORNIX/rawdata'
#----------------------------------------------------------------DO NOT TOUCH----------------------------------------------------------------
analysis = fr"<subject_id>/ses-01/single_subject_atrophy"
out_dir = os.path.join(base_dir, analysis)
print('I will save each file to: \n', out_dir)

# Nifti Import

**From Directory**
___

Here's a brief markup (in Markdown format) that explains the purpose and usage of the `segments_dict`:

---

## Neuroimaging File Extraction Dictionary

The `segments_dict` is a predefined dictionary structured to facilitate the extraction of specific types of neuroimaging files. Each key in the dictionary represents a distinct neuroimaging segment, and its associated value is another dictionary containing the following fields:

- **path**: This should be filled with the absolute path to the base directory containing the neuroimaging files for the corresponding segment. 
- **glob_name_pattern**: This is the string pattern that will be used to "glob" or search for the specific files within the provided path. It helps in identifying and extracting the desired files based on their naming conventions.

Here's a breakdown of the segments and their respective fields:

### 1. Cerebrospinal Fluid (CSF)
- **path**: Absolute path to the base directory containing CSF files.
- **glob_name_pattern**: File pattern to search for CSF files.

### 2. Grey Matter
- **path**: Absolute path to the base directory containing grey matter files.
- **glob_name_pattern**: File pattern to search for grey matter files.

### 3. White Matter
- **path**: Absolute path to the base directory containing white matter files.
- **glob_name_pattern**: File pattern to search for white matter files.

---

**Instructions**: Please fill out the `path` and `glob_name_pattern` fields for each segment in the `segments_dict`. This will ensure that the extraction process can locate and identify the appropriate neuroimaging files for further analysis.

---

In [None]:
base_directory = r'/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_AD_DBS_FORNIX/cat12/cat12_ultrafine-reg/CAT12.8.2_2170'
grey_matter_glob_name_pattern = '*/*/*/*mwp1*resampled*'
white_matter_glob_name_pattern = '*/*/*/*mwp2*resampled*'
csf_glob_name_pattern = '*/*/*/*mwp3*resampled*'

In [None]:
from calvin_utils.file_utils.import_matrices import import_matrices_from_folder

def import_dataframes_from_folders(base_directory, grey_matter_glob_name_pattern, white_matter_glob_name_pattern, csf_glob_name_pattern):
    """
    Imports dataframes from specified directories and glob name patterns.
    
    Parameters:
    - base_directory (str): The base directory where the data resides.
    - grey_matter_glob_name_pattern (str): Glob pattern for grey matter data.
    - white_matter_glob_name_pattern (str): Glob pattern for white matter data.
    - csf_glob_name_pattern (str): Glob pattern for cerebrospinal fluid data.
    
    Returns:
    - dict: A dictionary containing dataframes for grey matter, white matter, and cerebrospinal fluid.
    """
    

    segments_dict = {
        'grey_matter': {'path': base_directory, 'glob_name_pattern': grey_matter_glob_name_pattern},
        'white_matter': {'path': base_directory, 'glob_name_pattern': white_matter_glob_name_pattern},
        'cerebrospinal_fluid': {'path': base_directory, 'glob_name_pattern': csf_glob_name_pattern}
    }

    dataframes_dict = {}

    for k, v in segments_dict.items():
        dataframes_dict[k] = import_matrices_from_folder(connectivity_path=v['path'], file_pattern=v['glob_name_pattern'])
        print(f'Imported data {k} data with {dataframes_dict[k].shape[0]} voxels and {dataframes_dict[k].shape[1]} patients')
        print(f'These are the filenames per subject {dataframes_dict[k].columns}')
        print('--------------------------------')

    return dataframes_dict


In [None]:
dataframes_dict = import_dataframes_from_folders(base_directory, grey_matter_glob_name_pattern, white_matter_glob_name_pattern, csf_glob_name_pattern)

In [None]:
# #----------------------------------------------------------------DO NOT TOUCH----------------------------------------------------------------
# from calvin_utils.file_utils.import_matrices import import_matrices_from_folder

# segments_dict = {
#     'grey_matter': {'path': base_directory, 'glob_name_pattern': grey_matter_glob_name_pattern},
#     'white_matter': {'path': base_directory, 'glob_name_pattern': white_matter_glob_name_pattern},
#     'cerebrospinal_fluid': {'path': base_directory, 'glob_name_pattern': csf_glob_name_pattern}
# }

# dataframes_dict = {}
# for k, v in segments_dict.items():
#     dataframes_dict[k] = import_matrices_from_folder(connectivity_path=v['path'], file_pattern=v['glob_name_pattern']);
#     print(f'Imported data {k} data with {dataframes_dict[k].shape[0]} voxels and {dataframes_dict[k].shape[1]} patients')
#     print(f'These are the filenames per subject {dataframes_dict[k].columns}')
#     print('--------------------------------')

**Extract Subject ID From File Names**
Using the example filenames that have been printed above, please define a general string:
1) Preceding the subject ID.
2) Proceeding the subject ID. 

In [None]:
preceding_id = 'sub-'
proceeding_id = '_ses'

In [None]:
def rename_dataframe_subjects(dataframes_dict, preceding_id, proceeding_id):
    """
    Renames the subjects in the provided dataframes based on the split commands.

    Parameters:
    - dataframes_dict (dict): A dictionary containing dataframes with subjects to be renamed.
    - preceding_id (str): The delimiter for taking the part after the split.
    - proceeding_id (str): The delimiter for taking the part before the split.

    Returns:
    - dict: A dictionary containing dataframes with subjects renamed.
    """
    
    from calvin_utils.file_utils.dataframe_utilities import extract_and_rename_subject_id

    split_command_dict = {preceding_id: 1, proceeding_id: 0}
    
    for k, v in dataframes_dict.items():
        dataframes_dict[k] = extract_and_rename_subject_id(dataframe=dataframes_dict[k], split_command_dict=split_command_dict)
        print('Dataframe: ', k)
        display(dataframes_dict[k])
        print('------------- \n')

    return dataframes_dict


In [None]:
renamed_dfs = rename_dataframe_subjects(dataframes_dict, preceding_id, proceeding_id)

# Import Control Segments

In [None]:
base_directory = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/NIFTIS/true_control/cat_12_results/cat_12mri'
control_grey_matter_glob_name_pattern = '*mwp1*resampled*'
control_white_matter_glob_name_pattern = '*mwp2*resampled*'
control_csf_glob_name_pattern = '*mwp3*resampled*'

In [None]:
def import_control_dataframes(base_directory, control_grey_matter_glob_name_pattern, control_white_matter_glob_name_pattern, control_csf_glob_name_pattern):
    """
    Imports control dataframes from specified directories and glob name patterns.

    Parameters:
    - base_directory (str): The base directory where the data resides.
    - control_grey_matter_glob_name_pattern (str): Glob pattern for grey matter data.
    - control_white_matter_glob_name_pattern (str): Glob pattern for white matter data.
    - control_csf_glob_name_pattern (str): Glob pattern for cerebrospinal fluid data.

    Returns:
    - dict: A dictionary containing control dataframes for grey matter, white matter, and cerebrospinal fluid.
    """
    
    from calvin_utils.file_utils.import_matrices import import_matrices_from_folder

    segments_dict = {
        'grey_matter': {'path': base_directory, 'glob_name_pattern': control_grey_matter_glob_name_pattern},
        'white_matter': {'path': base_directory, 'glob_name_pattern': control_white_matter_glob_name_pattern},
        'cerebrospinal_fluid': {'path': base_directory, 'glob_name_pattern': control_csf_glob_name_pattern}
    }

    control_dataframes_dict = {}
    for k, v in segments_dict.items():
        control_dataframes_dict[k] = import_matrices_from_folder(connectivity_path=v['path'], file_pattern=v['glob_name_pattern']);
        print(f'Imported data {k} data with {control_dataframes_dict[k].shape[0]} voxels and {control_dataframes_dict[k].shape[1]} patients')
        print(f'Example subject filename: {control_dataframes_dict[k].columns[-1]}')
        print('--------------------------------')

    return control_dataframes_dict


In [None]:
control_dataframes_dict = import_control_dataframes(base_directory, control_grey_matter_glob_name_pattern, control_white_matter_glob_name_pattern, control_csf_glob_name_pattern)

# Generate Z-Scored Atrophy Maps for Each Segment

In [None]:
from typing import Tuple
from nilearn import datasets
from nilearn import image
def threshold_probabilities(patient_df: pd.DataFrame, threshold: float) -> pd.DataFrame:
    patient_df = patient_df.where(patient_df > threshold, 0)
    return patient_df

def calculate_z_scores(control_df: pd.DataFrame, patient_df: pd.DataFrame, matter_type=None) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Function to calculate voxel-wise mean, standard deviation for control group and z-scores for patient group.

    Args:
    control_df (pd.DataFrame): DataFrame where each column represents a control subject, 
                               and each row represents flattened image data for a voxel.
    patient_df (pd.DataFrame): DataFrame where each column represents a patient, 
                               and each row represents flattened image data for a voxel.

    Returns:
    patient_z_scores (pd.DataFrame): DataFrame of voxel-wise z-scores calculated for each patient using control mean and std.
    """

    # # Mask the dataframes to only consider tissues over acceptable probability thresholds
    # # Using p>0.2, as typical masking to MNI152 segments uses P > 0.2 for a given segment
    
    # # Now you can use the function to apply a threshold to patient_df and control_df
    threshold = 0.2
    patient_df = threshold_probabilities(patient_df, threshold)
    control_df = threshold_probabilities(control_df, threshold)

    # Calculate mean and standard deviation for each voxel in control group
    control_mean = control_df.mean(axis=1)
    control_std = control_df.std(axis=1)

    # Initialize DataFrame to store patient z-scores
    patient_z_scores = pd.DataFrame()

    # Calculate z-scores for each patient using control mean and std
    for patient in patient_df.columns:
        patient_z_scores[patient] = (patient_df[patient] - control_mean) / control_std

    # # Set values back into brain_mask
    # # if matter_type == None:
    # mni_mask = nimds.get_img("mni_icbm152")
    # mask_data = mni_mask.get_fdata().flatten()
    # apply_mask = lambda patient_z_scores: np.where(mask_data > 0, patient_z_scores, 0)
    # patient_z_scores = patient_z_scores.apply(apply_mask, axis=0)
    # print('Not sure what matter class to mask to, returning mask within MNI152 space')
    return patient_z_scores

In [None]:
def process_atrophy_dataframes(dataframes_dict, control_dataframes_dict):
    """
    Processes the provided dataframes to calculate z-scores and determine significant atrophy.

    Parameters:
    - dataframes_dict (dict): Dictionary containing patient dataframes.
    - control_dataframes_dict (dict): Dictionary containing control dataframes.

    Returns:
    - tuple: A tuple containing two dictionaries - atrophy_dataframes_dict and significant_atrophy_dataframes_dict.
    """
    
    atrophy_dataframes_dict = {}
    significant_atrophy_dataframes_dict = {}

    for k in dataframes_dict.keys():
        atrophy_dataframes_dict[k] = calculate_z_scores(control_df=control_dataframes_dict[k], patient_df=dataframes_dict[k])
        if k == 'cerebrospinal_fluid':
            significant_atrophy_dataframes_dict[k] = atrophy_dataframes_dict[k].where(atrophy_dataframes_dict[k] > 2, 0)
        else:
            significant_atrophy_dataframes_dict[k] = atrophy_dataframes_dict[k].where(atrophy_dataframes_dict[k] < -2, 0)
        print('Dataframe: ', k)
        display(dataframes_dict[k])
        print('------------- \n')

    return atrophy_dataframes_dict, significant_atrophy_dataframes_dict


In [None]:
unthresholded_atrophy_dataframes_dict, significant_atrophy_dataframes_dict = process_atrophy_dataframes(dataframes_dict, control_dataframes_dict)


**Derive Significant Atrophy Map**

In [None]:
import pandas as pd

def finalize_atrophy_dataframes(dataframes_dict):
    """
    Summates the absolute values of DataFrames within a dictionary 
    and adds the summation as a new key-value pair with the key 'composite'.
    
    Parameters:
    - dataframes_dict (dict): A dictionary containing DataFrames.
    
    Returns:
    - dict: The input dictionary updated with the 'composite' key representing the summation of absolute values.
    
    Example:
    >>> dfs = {
    ...     'a': pd.DataFrame({'col1': [-1, 2], 'col2': [3, -4]}),
    ...     'b': pd.DataFrame({'col1': [5, -6], 'col2': [-7, 8]})
    ... }
    >>> summed_dfs = summate_absolute_dataframes(dfs)
    >>> print(summed_dfs['composite'])
       col1  col2
    0     6    10
    1     8    12
    """
    
    # Create an empty DataFrame to store the summation of absolute values
    composite_df = pd.DataFrame()
    for k in dataframes_dict.keys():
        abs_df = dataframes_dict[k].abs() # Take the absolute value of the DataFrame

        if composite_df.empty:  # If the composite_df is still empty, initialize it with the first absolute DataFrame
            composite_df = dataframes_dict[k].abs().copy()
        else:
            composite_df += abs_df  # Otherwise, add the absolute values to the composite DataFrame
    
    # Add the composite DataFrame to the dictionary with key 'composite'
    dataframes_dict['composite'] = composite_df
    
    return dataframes_dict


In [None]:
thresholded_atrophy_dataframes_dict = finalize_atrophy_dataframes(significant_atrophy_dataframes_dict)

**Save the Atrophy Results**

Save Raw Z-Scores

In [None]:
import os
from calvin_utils.nifti_utils.generate_nifti import view_and_save_nifti
from tqdm import tqdm

def save_nifti_to_bids(dataframes_dict, bids_base_dir, analysis='tissue_segment_z_scores', ses=None, dry_run=True):
    """
    Saves NIFTI images to a BIDS directory structure.
    
    Parameters:
    - dataframes_dict (dict): Dictionary containing dataframes with NIFTI data.
    - bids_base_dir (str): The base directory where the BIDS structure starts.
    - ses (str, optional): Session identifier. If None, defaults to '01'.
    
    Note:
    This function assumes a predefined BIDS directory structure and saves the NIFTI 
    images accordingly. The function currently has the view_and_save_nifti call commented out 
    for safety. Uncomment this call if you wish to actually save the NIFTI images.
    
    Example:
    >>> dfs = { ... }  # some dictionary with dataframes
    >>> save_nifti_to_bids(dfs, '/path/to/base/dir')
    """
    
    for k in tqdm(dataframes_dict.keys()):
        for col in dataframes_dict[k].columns:
            
            # Define BIDS Directory Architecture
            sub_no = col
            if ses is None:
                ses_no = '01'
            else:
                ses_no = ses
            
            # Define and Initialize the Save Directory
            out_dir = os.path.join(bids_base_dir, f'sub-{sub_no}', f'ses-{ses_no}', analysis)
            os.makedirs(out_dir, exist_ok=True)
            
            # Save Image to BIDS Directory
            if dry_run:
                print(out_dir+f'/sub-{sub_no}_{k}')
            else:
                view_and_save_nifti(matrix=dataframes_dict[k][col],
                                    out_dir=out_dir,
                                    output_name=(f'sub-{sub_no}_{k}'))


In [None]:
bids_base_dir = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_AD_DBS_FORNIX/rawdata'
save_nifti_to_bids(unthresholded_atrophy_dataframes_dict, bids_base_dir=bids_base_dir, analysis='tissue_segment_z_scores', dry_run=False);

# Save The Thresholded Data -- These are the Real Atrophy Seeds

In [None]:
save_nifti_to_bids(thresholded_atrophy_dataframes_dict, bids_base_dir=bids_base_dir, analysis='thresholded_tissue_segment_z_scores', dry_run=False);

All Done. Enjoy your atrophy seeds.

--Calvin