In [2]:
import os
import glob
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import platform
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns

from sklearn.decomposition import PCA

In [3]:
## Paths Input Here
analysis = "z_score"
if platform.uname().system == 'Darwin': #------------------------------Mac OS X---------------------------------------------------------------
    path_1 = r'/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/cat12/ROI_memory_roi_Vgm.csv'
    path_2 = r'/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/NIFTIS/true_control/cat_12_results/roi_volumes/ROI_memory_roi_Vgm.csv'
    # clin_path = 'path to clinical values'
    out_dir = os.path.join(os.path.dirname(path_1), f'{analysis}')
    #out_dir = r'path to out dir here'
    #roi_names = '<path to roi name location>'
    print('I have set pathnames in the Mac style')
    print('I will save to:', out_dir)
else: #----------------------------------------------------------------Windows----------------------------------------------------------------
    print('I have set pathnames in the Windows style')

if os.path.isdir(out_dir) != True:
    os.makedirs(out_dir)

I have set pathnames in the Mac style
I will save to: /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/cat12/z_score


## Import Niftis from a CSV File

In [None]:
import pandas as pd
import numpy as np
import nibabel as nib

def csv_of_nifti_filepaths_to_dataframe(csv_path: str) -> pd.DataFrame:
    '''
    Reads a CSV file containing paths to nifti files, imports the nifti files, flattens them,
    removes NaNs, and creates a dataframe in the specified format.
    
    Parameters:
    -----------
    csv_path : str
        Path to the CSV file containing paths to nifti files.
        
    Returns:
    --------
    pd.DataFrame
        A dataframe where columns represent flattened nifti files and rows represent voxels.
        All values are zero, except for lesions which are binarized at 1.
    
    '''
    # Read the CSV file
    file_paths = pd.read_csv(csv_path)
    
    # Initialize an empty list to store flattened nifti data
    nifti_data = []
    
    # Iterate through the file paths and import nifti files
    for index, row in file_paths.iterrows():
        nifti_file_path = row[0]
        
        # Load the nifti file
        nifti_image = nib.load(nifti_file_path)
        
        # Get the data as a numpy array
        nifti_array = nifti_image.get_fdata()
        
        # Flatten the numpy array
        flattened_array = nifti_array.flatten()
        
        # Replace NaNs with zeros
        flattened_array[np.isnan(flattened_array)] = 0
        
        # Binarize the flattened array
        flattened_array = np.where(flattened_array > 0, 1, 0)
        
        # Append the flattened array to the list
        nifti_data.append(flattened_array)
    
    # Create a dataframe from the list of flattened arrays
    df = pd.DataFrame(np.column_stack(nifti_data))
    
    return df

# Import Niftis from a Folder

In [3]:
from calvin_utils.import_matrices import import_matrices_from_folder
#set file path to'' if you have specified the full path to the nifti file itself
# /sub-101/z_score_atrophy/CSF/sub-101_generated_nifti.nii
# /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_AD_DBS_FORNIX/cat12/cat12_ultrafine-reg/CAT12.8.2_2170/sub-150/z_score_atrophy/white_matter
# /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/z_scored_segments
df_1 = import_matrices_from_folder(path_1, file_pattern='/*/z_score_atrophy/grey_matter/*.nii')
# /Users/cu135/Dropbox (Partners HealthCare)/memory/functional_networks/ferguson_2019_networks/control_lesions/auditory_hallucination_lesions/sub-08uNodau1/roi/sub-08uNodau1_lesionMask.nii.gz
df_1

I will search:  /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_AD_DBS_FORNIX/cat12/cat12_ultrafine-reg/roi_volumes/ROI_memory_roi_Vgm.csv/*/z_score_atrophy/grey_matter/*.nii


In [4]:
df_2 = import_matrices_from_folder(path_2, file_pattern='/*resampled*.nii')
df_2

I will search:  /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/NIFTIS/true_control/cat_12_results/roi_volumes/ROI_memory_roi_Vgm.csv/*resampled*.nii


## Downsample a Dataframe

In [5]:
import os
import glob
from nilearn import image as nli
from nilearn.image import resample_to_img
import nibabel as nib
import nibabel.processing
from tqdm import tqdm
from nimlab import datasets as nimds


def downsample_image(input_path, output_path):
    """
    Function to downsample a 3D image to a new voxel size using a target affine.
    
    Args:
    input_path (str): Filepath to the input image.
    output_path (str): Filepath to save the output image.
    target_voxel_size (list): Target voxels to resample to.
    """
    # Load the image
    img = nib.load(input_path)
    mni_mask = nimds.get_img("mni_icbm152")
    
    # Downsample the image using the target affine
    resampled_img = resample_to_img(img, mni_mask)

    # Save the downsampled image
    nib.save(resampled_img, output_path)
    
mni_mask = nimds.get_img("mni_icbm152")
mask_data = mni_mask.get_fdata().flatten()

def downsample_to_mni152_images_in_folder(input_folder_pattern):
    """
    Function to downsample all 3D images in a folder to a new voxel size.
    
    Args:
    input_folder_pattern (str): Glob pattern to find the input images.
    target_voxel_size (list): Target voxels to resample to.
    """
    # Find all input image filepaths
    input_filepaths = glob.glob(input_folder_pattern)
    print('Will search:, ', input_folder_pattern)

    # Loop over each input image
    for input_path in tqdm(input_filepaths):
        # Define the output path
        base, ext = os.path.splitext(input_path)
        if ext == '.gz':
            base, ext2 = os.path.splitext(base)
            ext = ext2 + ext
        output_path = base + '_resampled' + ext

        # Downsample the image
        downsample_image(input_path, output_path)
    print('Drownsampled images saved to: ' + output_path)


# Usage:
# downsample_images_in_folder('/path/to/your/images/*/*/anat/*mwp1*.nii', '/path/to/target/resolution/image.nii')

In [None]:
directory_to_check = path_2
file_pattern = '*.nii'
#----------------------------------------------------------------DO NOT TOUCH
downsample_to_mni152_images_in_folder(os.path.join(directory_to_check, file_pattern))

## Mask a Dataframe

In [None]:
from nimlab import datasets as nimds
mni_mask = nimds.get_img("mni_icbm152")
mask_data = mni_mask.get_fdata().flatten()
brain_indices = np.where(mask_data > 0)[0]
df_1 = df_1.iloc[brain_indices, :]
df_2 = df_2.iloc[brain_indices, :]

print('Dataframes have been masked such that their shapes are: ', df_1.shape, df_2.shape)

In [None]:
df_1

## Threshold a Dataframe

In [None]:
from calvin_utils.matrix_utilities import threshold_matrix 
threshold_1 = 2
threshold_2 = -2
#This will make everything NOT meeting the condition 0 
df_1 = df_1.where(df_1 < threshold_2, 0)
# df_2.where(df_2 < threshold_2, 0)
df_1

## Manipulate Coordinates

In [None]:
# Convert Coordinates to Index
from calvin_utils.matrix_utilities import convert_coordinate_to_index
from nimlab import datasets as nimds
#Mask within the brain
mni_mask = nimds.get_img("mni_icbm152")
mask_affine = mni_mask.affine

coordinate_tuple = (-2,30,56)
index = convert_coordinate_to_index(coordinate_tuple, mask_affine)
index

In [None]:
#Convert Index to Coordinates
from calvin_utils.matrix_utilities import convert_index_to_coordinate
from nimlab import datasets as nimds
#Mask within the brain
mni_mask = nimds.get_img("mni_icbm152")
mask_affine = mni_mask.affine

index_tuple = (46, 78, 64)
index = convert_index_to_coordinate(index_tuple, mask_affine)
index

In [None]:
#Convert Index (Voxel Coordinates) To Flat Array Index (1 dimensional array after running ___.flatten())
from calvin_utils.matrix_utilities import index_in_flattened_nifti
from nimlab import datasets as nimds
#Mask within the brain
mni_mask = nimds.get_img("mni_icbm152")
mask_shape = mni_mask.shape

index_tuple = (46, 78, 64)
index = index_in_flattened_nifti(index_tuple, mask_shape)
index

## Run FSL Cluster

In [None]:
from calvin_utils.run_fsl_cluster import run_fsl_cluster
run_fsl_cluster(path_1, outdir=out_dir)

## Generate Dice Coefficient

In [None]:
import pandas as pd
import numpy as np

def dice_coefficient(df1: pd.DataFrame, df2: pd.DataFrame) -> float:
    '''
    Calculates the Dice Coefficient between two dataframes containing binary lesion masks.
    
    Parameters:
    -----------
    df1 : pd.DataFrame
        The first dataframe, where columns represent flattened nifti files and rows represent voxels.
        All values are zero, except for lesions which are binarized at 1.
        
    df2 : pd.DataFrame
        The second dataframe, where columns represent flattened nifti files and rows represent voxels.
        All values are zero, except for lesions which are binarized at 1.
    
    Returns:
    --------
    float
        The Dice Coefficient, a value between 0 and 1, where 1 represents a perfect overlap.
        
    '''
    # Check if in numpy array, and convert the dataframes to numpy arrays if required
    if isinstance(df1, np.ndarray):
        array1 = df1
    else:
        array1 = df1.to_numpy()
    if isinstance(df2, np.ndarray):
        array2 = df2
    else:
        array2 = df2.to_numpy()
    
    # Calculate the intersection of non-zero elements
    intersection = np.sum(np.logical_and(array1, array2))
    
    # Calculate the number of non-zero elements in each array
    num_elements_array1 = np.sum(np.count_nonzero(array1))
    num_elements_array2 = np.sum(np.count_nonzero(array2))
    
    # Calculate the Dice Coefficient
    dice_coefficient = (2 * intersection) / (num_elements_array1 + num_elements_array2)
    
    return dice_coefficient


In [None]:
from calvin_utils.matrix_utilities import threshold_matrix
from calvin_utils.fisher_z_transform import fisher_z_transform
from nimlab import datasets as nimds

#If you want to enter a threshold (quantile) to threhsold the matrices at, enter True
threshold=True
#if you have an R map as a matrix and want to fisher transform, enter True
fish_transform=False
# if you have a whole host of matrices in df_1 or df_2, enter summate=True, otherwise summate=False (this compares 2 matrices)
summate=False

if threshold: 
    #Threshold by quantile if desirde
    quantile_target =  0.95
    
    threshold_1 = np.quantile(df_1, quantile_target)
    threshold_2 = np.quantile(df_2, quantile_target)
    
    thresholded_df_1 = threshold_matrix(df_1, threshold = threshold_1, probability=False, direction='keep_greater')
    thresholded_df_2 = threshold_matrix(df_2, threshold = threshold_2, probability=False, direction='keep_greater')
    
    thresholded_df_1[thresholded_df_1 > 0] = 1
    thresholded_df_2[thresholded_df_2 > 0] = 1
    
#Fisher transform 
if fish_transform: 
    df_1 = fisher_z_transform(df_1)

if summate:
    thresholded_df_1['for_dice'] = thresholded_df_1.sum(axis=1)
    thresholded_df_2['for_dice'] = thresholded_df_2.sum(axis=1)
else:
    thresholded_df_1['for_dice'] = thresholded_df_1
    thresholded_df_2['for_dice'] = thresholded_df_2

#Dice Coefficient Calculation
#This can only compare TWO COLUMNS. 
#Make sure you specify what column you want. 
observed_dice_coefficient = dice_coefficient(thresholded_df_1['for_dice'], thresholded_df_2['for_dice'])
print('Dice coefficient:', observed_dice_coefficient)


In [None]:
# Permute the Dice Coefficient
from calvin_utils.palm import brain_permutation
from tqdm import tqdm 

# Assuming df_1 and df_2 are your original dataframes
n_permutations = 1000
dice_coefficients = []
voxel_index = 0
for i in tqdm(range(n_permutations)):
    # Permute dataframes
    permuted_df_1 = brain_permutation(thresholded_df_1.copy().to_numpy().reshape(1,-1), looped_permutation=True)
    permuted_df_2 = brain_permutation(thresholded_df_2.copy().to_numpy().reshape(1,-1), looped_permutation=True)

    # Threshold and calculate the Dice coefficient for the permuted dataframes
    permuted_dice_coefficient = dice_coefficient(permuted_df_1, permuted_df_2)

    # Store the Dice coefficient
    dice_coefficients.append(permuted_dice_coefficient)

# Convert the list to a numpy array
dice_coefficients = np.array(dice_coefficients)

In [None]:
#Same, but with multiprocessing
import concurrent.futures
from calvin_utils.matrix_utilities import dice_coefficient

n_permutations = 1000
dice_coefficients = []
voxel_index = 0

with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor:
    #Begin submitting the masked data to the permutor
    results = []
    for i in tqdm(range(n_permutations), desc="Jobs Launched"):
        permuted_df_1 = brain_permutation(thresholded_df_1.copy().to_numpy().reshape(1,-1), looped_permutation=True)
        permuted_df_2 = brain_permutation(thresholded_df_2.copy().to_numpy().reshape(1,-1), looped_permutation=True)
        
        result = executor.submit(dice_coefficient, permuted_df_1, permuted_df_2)
        results.append(result)
        
    progress_bar = tqdm(total=n_permutations, desc="Jobs Finalized")
    for result in concurrent.futures.as_completed(results):
        
        #Input the permuted data into the array
        permuted_dice_coefficient = result.result()
        dice_coefficients.append(permuted_dice_coefficient)
        
        #Update visualization
        progress_bar.update()
    progress_bar.close()

In [None]:
print('empiric p: ', np.count_nonzero(dice_coefficients>observed_dice_coefficient))

## Histogram of Lesion Incidence

In [None]:
def normalize(df):
    return (df - df.min()) / (df.max() - df.min())

summed_voxels = df_1.sum(axis=1)
summed_voxels2 = df_2.sum(axis=1)

summed_voxels_df = pd.DataFrame({'Voxel_Index': summed_voxels.index, 'Summed_Voxel_Value': summed_voxels.values})
summed_voxels_df2 = pd.DataFrame({'Voxel_Index': summed_voxels.index, 'Summed_Voxel_Value': summed_voxels2.values})


summed_voxels_df['Normalized_Summed_Voxel_Value'] = normalize(summed_voxels_df['Summed_Voxel_Value'])
summed_voxels_df2['Normalized_Summed_Voxel_Value'] = normalize(summed_voxels_df2['Summed_Voxel_Value'])
plt.figure(figsize=(12, 6))

# Plot the first DataFrame with normalized values
plt.plot(summed_voxels_df['Voxel_Index'], summed_voxels_df['Normalized_Summed_Voxel_Value'], label='Dataset 1')

# Plot the second DataFrame with normalized values
plt.plot(summed_voxels_df2['Voxel_Index'], summed_voxels_df2['Normalized_Summed_Voxel_Value'], label='Dataset 2')

plt.xlabel('Voxel Index')
plt.ylabel('Normalized Summed Voxel Value')
plt.title('Normalized Summed Voxel Values vs. Voxel Index')
plt.grid(True)
plt.legend()
plt.show()



## Generate a Heatmap from a CSV

In [None]:
df = pd.read_csv(path_1, index_col=False)
display(df)
#Create heatmap of correlation matrix
import seaborn as sns
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(15, 13))
sns.heatmap(df, square=True, linewidths=.5, cbar_kws={"shrink": .5})
#Save the Elbow Plot Above
save_dirsvg = os.path.join(out_dir, 'heatmap.svg')
save_dirpng = os.path.join(out_dir, 'heatmap.png')
fig.savefig(save_dirsvg)
fig.savefig(save_dirpng)
print(f'Fig saved to ', save_dirpng)

# Generate ROIs from a CSV of Coordinates

In [None]:
from calvin_utils.generate_nifti import read_coordinates_csv

coordinates_df = read_coordinates_csv(filename=path_1, radius=3)
coordinates_df

In [None]:
coordinates_df.to_csv(out_dir+'/coordinates_df.csv')

# Generate BIDS Directory from Subjects/Coordinates CSV

In [None]:
from calvin_utils.generate_nifti import read_subject_coordinates_csv

file_path_df = read_subject_coordinates_csv('/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/VOSS_TMS/Memory_Change_TMS_SimonKwon_to_CalvinHoward.csv', radius=12, method='concentric')
file_path_df.to_csv('/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/VOSS_TMS/Memory_Change_TMS_SimonKwon_to_CalvinHoward' + '_filepaths.csv')

# Add Several Niftis Together

In [None]:
from calvin_utils.generate_nifti import add_matrices_together
from calvin_utils.generate_nifti import view_and_save_nifti

summed_matrix = add_matrices_together(folder=path_1)
summed_matrix_img = view_and_save_nifti(summed_matrix, out_dir=path_1)
summed_matrix_img

## Threshold Matrix by Another Matrix

In [None]:
from calvin_utils.generate_nifti import threshold_matrix_by_another
thresholded_matrix = threshold_matrix_by_another(matrix_file_1=path_1, matrix_file_2=path_1, method='under_threshold', threshold=0.05)
thresholded_matrix

## Threshold a Matrix By a Percentile

In [None]:
from calvin_utils.matrix_utilities import threshold_matrix
from calvin_utils.fisher_z_transform import fisher_z_transform
from calvin_utils.generate_nifti import view_and_save_nifti

fisher_transform = False
#Fisher transform 
if fisher_transform: 
    df_1 = fisher_z_transform(df_1)
    
#Threshold by quantile if desirde
quantile_target =  0.95

threshold_1 = np.quantile(df_1, quantile_target)

thresholded_df_1 = threshold_matrix(df_1, threshold = threshold_1, probability=False, direction='keep_greater')
# thresholded_df_1[thresholded_df_1 > 0] = 1
threhsodled_matrix_img = view_and_save_nifti(thresholded_df_1, out_dir=out_dir)
threhsodled_matrix_img

## Generate Matrix from a CSV

In [None]:
from nimlab import datasets as nimds

data_df = pd.read_csv(path_2)
if len(data_df) == 225222:
    mni_mask = nimds.get_img("mni_icbm152")
    mask_data = mni_mask.get_fdata().flatten()
    brain_indices = np.where(mask_data > 0)[0]
    mask_data[brain_indices] = data_df.iloc[:,-1]
    data_df = pd.DataFrame(mask_data)
display(data_df)
print(np.min(data_df))
print(np.max(data_df))

# data_df = (1/data_df)/10000
# data_df = data_df/np.max(data_df)

In [None]:
from calvin_utils.generate_nifti import view_and_save_nifti
matrix_img = view_and_save_nifti(data_df, out_dir)
matrix_img

# Generate Z-Scores for VBM Atrophy Maps

In [None]:
from typing import Tuple
from nilearn import datasets
from nilearn import image
def threshold_probabilities(patient_df: pd.DataFrame, threshold: float) -> pd.DataFrame:
    patient_df = patient_df.where(patient_df > threshold, 0)
    return patient_df

def calculate_z_scores(control_df: pd.DataFrame, patient_df: pd.DataFrame, matter_type=None) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Function to calculate voxel-wise mean, standard deviation for control group and z-scores for patient group.

    Args:
    control_df (pd.DataFrame): DataFrame where each column represents a control subject, 
                               and each row represents flattened image data for a voxel.
    patient_df (pd.DataFrame): DataFrame where each column represents a patient, 
                               and each row represents flattened image data for a voxel.

    Returns:
    control_mean (pd.DataFrame): DataFrame of voxel-wise means calculated across the control group.
    control_std (pd.DataFrame): DataFrame of voxel-wise standard deviations calculated across the control group.
    patient_z_scores (pd.DataFrame): DataFrame of voxel-wise z-scores calculated for each patient using control mean and std.
    """

    # Mask the dataframes to only consider tissues over acceptable probability thresholds
    # Using p>0.2, as typical masking to MNI152 segments uses P > 0.2 for a given segment
    
    # Now you can use the function to apply a threshold to patient_df and control_df
    threshold = 0.2
    patient_df = threshold_probabilities(patient_df, threshold)
    control_df = threshold_probabilities(control_df, threshold)

    # Calculate mean and standard deviation for each voxel in control group
    control_mean = control_df.mean(axis=1)
    control_std = control_df.std(axis=1)

    # Initialize DataFrame to store patient z-scores
    patient_z_scores = pd.DataFrame()

    # Calculate z-scores for each patient using control mean and std
    for patient in patient_df.columns:
        patient_z_scores[patient] = (patient_df[patient] - control_mean) / control_std

    # Set values back into brain_mask
    # if matter_type == None:
    mni_mask = nimds.get_img("mni_icbm152")
    mask_data = mni_mask.get_fdata().flatten()
    apply_mask = lambda patient_z_scores: np.where(mask_data > 0, patient_z_scores, 0)
    patient_z_scores = patient_z_scores.apply(apply_mask, axis=0)
    print('Not sure what matter class to mask to, returning mask within MNI152 space')
    # elif matter_type == 'grey_matter':
    #     mask_data = image.load_img('/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_gm_mask_resampled.nii').get_fdata().flatten()
    #     patient_z_scores[mask_data < 0.2] = 0
    #     print('Masked to MNI152 Grey Matter')
    # elif matter_type == 'white_matter':
    #     mask_data = image.load_img('/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_wm_mask_resampled.nii').get_fdata().flatten()
    #     patient_z_scores[mask_data < 0.2] = 0
    #     print('Masked to MNI152 White matter')
    # elif matter_type == 'CSF':
    #     mni_mask = nimds.get_img("mni_icbm152")
    #     mask_data = mni_mask.get_fdata().flatten()
    #     apply_mask = lambda patient_z_scores: np.where(mask_data > 0.2, patient_z_scores, 0)
    #     patient_z_scores = patient_z_scores.apply(apply_mask, axis=0)
    #     print('Masking within the MNI brain mask')
    # else:
    #     raise ValueError('Please select a valid matter_type: None, grey_matter, white_matter are currently supported')

    
    return control_mean, control_std, patient_z_scores


In [None]:
matter_type = 'csf'

#----------------------------------------------------------------DO NOT TOUCH ----------------------------------------------------------------

control_mean, control_std, patient_z_score_df = calculate_z_scores(control_df=df_2, patient_df=df_1, matter_type=matter_type)

# Plot pairplot and display descriptive statistics
# print(patient_z_score_df.describe())
display(patient_z_score_df)
# patient_z_score_df.to_csv(os.path.join(out_dir, 'z_scores.csv'))


In [None]:
# Save the atrophy files
character_after_subject_id = '-'
#--------------------------------DO NOT TOUCH--------------------------------------------------------
from calvin_utils.generate_nifti import nifti_from_matrix
root_dir = out_dir
for patient in patient_z_score_df.columns:
    subject = patient.split(character_after_subject_id)[0]
    out_dir = os.path.join(root_dir, ('sub-'+subject+f'/z_score_atrophy/{matter_type}'))
    nifti_from_matrix(patient_z_score_df[patient], output_file=out_dir, output_name=f'sub-{subject}')
    
    # /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_AD_DBS_FORNIX/rawdata/sub-150/ses-01/anat

## Generate Z-Scores for Atrophy ROIs

In [4]:
from tqdm import tqdm

def compute_roi_z_scores(path_atrophy, path_control):
    """
    Function to compute z-scores for all brain regions for atrophy patients
    in comparison to control group.

    Parameters:
    - path_atrophy: str, path to csv file for atrophy patients
    - path_control: str, path to csv file for control group

    Returns:
    - df_atrophy_z_scored: DataFrame, atrophy patients data with z-scores in place of original values
    """

    # Load the data
    df_atrophy = pd.read_csv(path_atrophy)
    df_control = pd.read_csv(path_control)

    # Create a copy of df_atrophy to hold the z-scored data
    df_atrophy_z_scored = df_atrophy.copy()

    # Loop over all columns (brain regions) in the dataframe
    for column in tqdm(df_atrophy.columns):
        
        # Skip if the column is 'names'
        if column == 'names':
            continue
        else:
            # Compute the mean and standard deviation of the control group for the current brain region
            control_mean = df_control[column].mean()
            control_std = df_control[column].std()

            # Calculate the z-scores for the atrophy patients relative to the control group
            df_atrophy_z_scored[column] = df_atrophy[column].apply(lambda x: (x - control_mean) / control_std)

    # Set the index to 'names'
    df_atrophy_z_scored.set_index('names', inplace=True)

    return df_atrophy_z_scored

In [15]:
path_1 = r'/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/cat12/ROI_mni_Cerebellum_Vgm.csv'
path_2 = r'/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/NIFTIS/true_control/cat_12_results/roi_volumes/ROI_mni_Cerebellum_Vgm.csv'
roi_name = 'Cerebellum'
roi_tissue = 'Vgm'
#----------------------------------------------------------------DO NOT CHANGE
df_atrophy_z_scored = compute_roi_z_scores(path_1, path_2)
display(df_atrophy_z_scored)
df_atrophy_z_scored.to_csv(os.path.join(out_dir + f'/{roi_name}_{roi_tissue}_z_scores.csv'))
print('saved to: ', out_dir)

100%|██████████| 2/2 [00:00<00:00, 3663.15it/s]


Unnamed: 0_level_0,ROI001
names,Unnamed: 1_level_1
glanat,3.272702
glanat,3.390599
glanat,4.522047
glanat,4.858367
glanat,2.805814
glanat,4.513402
glanat,3.633305
glanat,3.482619
glanat,3.587716
glanat,4.699325


saved to:  /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/cat12/z_score


# Extract XML File Data

In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

def parse_xml(file_path):
    # Parse XML file
    tree = ET.parse(file_path)
    root = tree.getroot()
    
    # Find required tags and extract data
    for memory_roi in root.findall('memory_roi'):
        Vgm = memory_roi.find('data/Vgm').text
        Vwm = memory_roi.find('data/Vwm').text
        Vcsf = memory_roi.find('data/Vcsf').text
        
        return Vgm, Vwm, Vcsf

def extract_data_from_xmls(directory):
    # Create an empty dataframe
    data = pd.DataFrame(columns=['Patient_ID', 'GM', 'WM', 'CSF'])
    
    # Walk through the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.xml'):
                patient_id = os.path.splitext(file)[0]  # Use filename as patient ID
                file_path = os.path.join(root, file)
                
                Vgm, Vwm, Vcsf = parse_xml(file_path)
                data = data.append({'Patient_ID': patient_id, 'GM': Vgm, 'WM': Vwm, 'CSF': Vcsf}, ignore_index=True)
                
    return data

# Directory containing XML files
directory = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_AD_DBS_FORNIX/cat12/cat12_ultrafine-reg/CAT12.8.2_2170'

# Extract data and save to CSV
data = extract_data_from_xmls(directory)
# data.to_csv('patient_data.csv', index=False)


## Statistics

In [None]:
df_1

In [None]:
from nilearn import datasets
# Fetch the MNI152 1mm white matter mask
white_matter_mask = datasets.load_mni152_wm_mask(resolution=2)

# Example usage
mask_data = white_matter_mask.get_fdata()


## Get Resources

In [None]:
gray_matter_mask = datasets.load_mni152_gm_template(resolution=2)
gray_matter_mask.to_filename('/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_gm_mask.nii')

w_matter_mask = datasets.load_mni152_wm_template(resolution=2)
w_matter_mask.to_filename('/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_wm_mask.nii')

In [None]:
from nilearn import plotting
mask_data = image.load_img('/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_gm_mask_resampled.nii')
plotting.view_img(mask_data, cut_coords=(0,0,0), black_bg=False, opacity=.75, cmap='ocean_hot')

gray_matter_mask = datasets.load_mni152_gm_template(resolution=2)

## Power Analysis of Overlap R Map Method

In [None]:
import numpy as np
from tqdm import tqdm

n_voxels = 200000  # number of voxels per map
n_iterations = 10000  # number of iterations
threshold = 0.3  # threshold for overlap
np.random.seed(0)  # set seed for reproducibility

# store results
results = {}

for n_maps in range(2, 14):  # for each number of maps from 2 to 5
    overlaps = 0  # counter for number of overlaps
    pbar = tqdm(total=n_iterations, desc=f'Processing {n_maps} maps')
    for _ in range(n_iterations):
        # generate n_maps random maps
        maps = [np.random.normal(0, 0.2, n_voxels) for _ in range(n_maps)]
        # check if there's an overlap
        if np.any(np.all([np.abs(map) > threshold for map in maps], axis=0)):
            overlaps += 1
        pbar.update()
    pbar.close()
    false_positive_rate = overlaps / n_iterations
    results[n_maps] = false_positive_rate

# print results
for n_maps, rate in results.items():
    print(f'False positive rate for {n_maps} maps: {rate}')



In [None]:
results

## Calculate Damage Score

In [None]:
damaging_thing = 'grey_matter'
things_damaged = 'hippocampus'
descriptor = 'all_patient'
#----------------------------------------------------------------DO NOT MODIFY!----------------------------------------------------------------
#Initialize dataframe
damage_df = pd.DataFrame(index=df_1.columns, columns=df_2.columns)

for matrix in damage_df.columns:
    for subject in damage_df.index:
        # Mask the subject dataframe to the matrix at hand
        intersection = df_1[subject].where(df_2[matrix] > 0, 0)
        # Weight the overlapping components by multiplication
        weighted_overlap = intersection * df_2[matrix]
        # Assess overall damage score
        damage = weighted_overlap.sum()
        damage_df.loc[subject, matrix] = damage

if not os.path.exists(out_dir + '/damage_scores'):
    os.makedirs(out_dir + '/damage_scores')

damage_df.to_csv(out_dir + f'/damage_scores/{descriptor}_{things_damaged}_damage_scores_{damaging_thing}.csv')
print('saved to: ', out_dir + f'/damage_scores/{descriptor}_{things_damaged}_damage_scores_{damaging_thing}.csv')

display(damage_df)

# Rename Niftis

In [None]:
import glob
import os
import shutil

filename = 'mwp1glanat.nii'
base_dir = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/neuroimaging'
folder_to_name_by = -4
save = True

#----------------------------------------------------------------DO NOT MODIFY!----------------------------------------------------------------

# Use glob to find all mwp1glanat.nii files with incorrect names
file_paths = glob.glob(os.path.join(base_dir, '**', 'mwp1glanat_resampled.nii'), recursive=True)
print(file_paths)
for file_path in file_paths:
    # Print the found file
    print(f'Found file: {file_path}')
    
    # Extract the sub-id
    sub_id = file_path.split(os.sep)[folder_to_name_by]
    print(f'Extracted sub-id: {sub_id}')

    # Construct the new file path
    new_file_path = os.path.join(os.path.dirname(file_path), f'{sub_id}-{filename}')
    print(f'Intended absolute filename: {new_file_path}')
    if save:
        shutil.copy(file_path, new_file_path)
