In [None]:
import os
import glob
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import platform
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns

from sklearn.decomposition import PCA

## Import Niftis from a CSV File

In [None]:
import pandas as pd
import numpy as np
import nibabel as nib

def csv_of_nifti_filepaths_to_dataframe(csv_path: str) -> pd.DataFrame:
    '''
    Reads a CSV file containing paths to nifti files, imports the nifti files, flattens them,
    removes NaNs, and creates a dataframe in the specified format.
    
    Parameters:
    -----------
    csv_path : str
        Path to the CSV file containing paths to nifti files.
        
    Returns:
    --------
    pd.DataFrame
        A dataframe where columns represent flattened nifti files and rows represent voxels.
        All values are zero, except for lesions which are binarized at 1.
    
    '''
    # Read the CSV file
    file_paths = pd.read_csv(csv_path)
    
    # Initialize an empty list to store flattened nifti data
    nifti_data = []
    
    # Iterate through the file paths and import nifti files
    for index, row in file_paths.iterrows():
        nifti_file_path = row[0]
        
        # Load the nifti file
        nifti_image = nib.load(nifti_file_path)
        
        # Get the data as a numpy array
        nifti_array = nifti_image.get_fdata()
        
        # Flatten the numpy array
        flattened_array = nifti_array.flatten()
        
        # Replace NaNs with zeros
        flattened_array[np.isnan(flattened_array)] = 0
        
        # Binarize the flattened array
        flattened_array = np.where(flattened_array > 0, 1, 0)
        
        # Append the flattened array to the list
        nifti_data.append(flattened_array)
    
    # Create a dataframe from the list of flattened arrays
    df = pd.DataFrame(np.column_stack(nifti_data))
    
    return df

# Import Niftis from a Folder

In [31]:
from calvin_utils.file_utils.import_matrices import import_matrices_from_folder
#set file path to'' if you have specified the full path to the nifti file itself
path_1 = '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/vbm_derivatives/mri'
df_1 = import_matrices_from_folder(path_1, file_pattern='mwp20002.nii')
# /Users/cu135/Dropbox (Partners HealthCare)/memory/functional_networks/ferguson_2019_networks/control_lesions/auditory_hallucination_lesions/sub-08uNodau1/roi/sub-08uNodau1_lesionMask.nii.gz
df_1

I will search:  /Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/vbm_derivatives/mri/mwp20002.nii


Unnamed: 0,mwp20002.nii
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
...,...
1749348,0.0
1749349,0.0
1749350,0.0
1749351,0.0


In [28]:
path_2 = '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/vbm_derivatives/mri'
df_2 = import_matrices_from_folder(path_2, file_pattern='mwp30002.nii')
df_2

I will search:  /Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/vbm_derivatives/mri/mwp30002.nii


Unnamed: 0,mwp30002.nii
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
...,...
1749348,0.0
1749349,0.0
1749350,0.0
1749351,0.0


## Downsample a Dataframe

In [18]:
import os
import glob
from nilearn import image as nli
from nilearn.image import resample_to_img
import nibabel as nib
import nibabel.processing
from tqdm import tqdm
from nimlab import datasets as nimds


def downsample_image(input_path, output_path):
    """
    Function to downsample a 3D image to a new voxel size using a target affine.
    
    Args:
    input_path (str): Filepath to the input image.
    output_path (str): Filepath to save the output image.
    target_voxel_size (list): Target voxels to resample to.
    """
    # Load the image
    img = nib.load(input_path)
    mni_mask = nimds.get_img("mni_icbm152")
    
    # Downsample the image using the target affine
    resampled_img = resample_to_img(img, mni_mask)

    # Save the downsampled image
    nib.save(resampled_img, output_path)
    
mni_mask = nimds.get_img("mni_icbm152")
mask_data = mni_mask.get_fdata().flatten()

def downsample_to_mni152_images_in_folder(input_folder_pattern):
    """
    Function to downsample all 3D images in a folder to a new voxel size.
    
    Args:
    input_folder_pattern (str): Glob pattern to find the input images.
    target_voxel_size (list): Target voxels to resample to.
    """
    # Find all input image filepaths
    input_filepaths = glob.glob(input_folder_pattern)
    print('Will search:, ', input_folder_pattern)

    # Loop over each input image
    for input_path in tqdm(input_filepaths):
        # Define the output path
        base, ext = os.path.splitext(input_path)
        if ext == '.gz':
            base, ext2 = os.path.splitext(base)
            ext = ext2 + ext
        output_path = base + '_resampled' + ext

        # Downsample the image
        downsample_image(input_path, output_path)
    print('Drownsampled images saved to: ' + output_path)


# Usage:
# downsample_images_in_folder('/path/to/your/images/*/*/anat/*mwp1*.nii', '/path/to/target/resolution/image.nii')

In [None]:
directory_to_check = '/Users/cu135/Dropbox (Partners HealthCare)/resources/atlases/MNI_structures/cortex'
file_pattern = '*'
#----------------------------------------------------------------DO NOT TOUCH
downsample_to_mni152_images_in_folder(os.path.join(directory_to_check, file_pattern))

## Mask a Dataframe

In [None]:
from nimlab import datasets as nimds
mni_mask = nimds.get_img("mni_icbm152")
mask_data = mni_mask.get_fdata().flatten()
brain_indices = np.where(mask_data > 0)[0]
df_1 = df_1.iloc[brain_indices, :]
# df_2 = df_2.iloc[brain_indices, :]

# print('Dataframes have been masked such that their shapes are: ', df_1.shape, df_2.shape)

In [None]:
df_1

# Resample a High Resolution Nifti to Another Space

In [15]:
import subprocess
import os

def reslice_roi_with_flirt(roi_path, template_path, output_path=None):
    """
    Reslice an ROI NIfTI image to match the resolution and space of a whole-brain template
    using FSL's FLIRT, maintaining its correct spatial location.

    Parameters:
    - roi_path (str): Path to the ROI NIfTI image.
    - template_path (str): Path to the whole-brain template NIfTI image.
    - output_path (str, optional): Path for the output resliced NIfTI image. If not provided,
      the output will be saved in the same directory as `roi_path` with '_resliced' appended to the filename.

    Returns:
    - str: Path to the resliced image if successful, None otherwise.
    """
    if output_path is None:
        # Generate output path by appending '_resliced' to the ROI filename
        roi_dir, roi_filename = os.path.split(roi_path)
        roi_basename, roi_ext = os.path.splitext(roi_filename)
        if roi_ext == '.gz':
            roi_basename, _ = os.path.splitext(roi_basename)
            roi_ext = '.nii.gz'
        output_path = os.path.join(roi_dir, f"{roi_basename}_resliced{roi_ext}")

    # Construct the FLIRT command for reslicing
    flirt_command = [
        'flirt',
        '-in', roi_path,
        '-ref', template_path,
        '-out', output_path,
        '-applyxfm', '-usesqform',
        '-init', '/usr/local/fsl/etc/flirtsch/ident.mat'
    ]

    # Execute the FLIRT command
    result = subprocess.run(flirt_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    # Check for errors in the FLIRT command execution
    if result.returncode != 0:
        print(f"Error reslicing ROI with FLIRT: {result.stderr.decode('utf-8')}")
        return None

    return output_path




In [16]:
image_to_resample_path = '/Users/cu135/Library/CloudStorage/OneDrive-Personal/OneDrive_Documents/MATLAB/leaddbs/templates/space/MNI152NLin2009bAsym/atlases/DISTAL Nano (Ewert 2017)/rh/STN.nii.gz'
reference_image_path = '/Users/cu135/Library/CloudStorage/OneDrive-Personal/OneDrive_Documents/Work/Software/Research/nimlab/nimlab/data/MNI152_T1_2mm_brain_mask.nii'

In [17]:
reslice_roi_with_flirt(roi_path=image_to_resample_path, template_path=reference_image_path)

'/Users/cu135/Library/CloudStorage/OneDrive-Personal/OneDrive_Documents/MATLAB/leaddbs/templates/space/MNI152NLin2009bAsym/atlases/DISTAL Nano (Ewert 2017)/rh/STN_resliced.nii.gz'

# Mask Nifti By Another with FSL

In [2]:
import subprocess
import os

def mask_nifti(mask_path, image_path):
    # Extract directory and filename without extension
    image_dir, image_filename = os.path.split(image_path)
    image_basename, image_ext = os.path.splitext(image_filename)
    
    # Ensure the extension is .nii or .nii.gz
    if image_ext == '.gz':
        image_basename, _ = os.path.splitext(image_basename)
        image_ext = '.nii.gz'
    
    # Define the output path
    output_path = os.path.join(image_dir, f"{image_basename}_masked{image_ext}")
    
    # Build the FSL command
    fsl_command = ['fslmaths', image_path, '-mas', mask_path, output_path]
    
    # Execute the command
    result = subprocess.run(fsl_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    # Check if the command executed successfully
    if result.returncode != 0:
        # An error occurred, handle it here
        print(f"Error running fslmaths: {result.stderr.decode('utf-8')}")
        return None
    
    # Return the path to the masked image
    return output_path


In [18]:
mask_path = '/Users/cu135/hires_backdrops/STN_mni152.nii'
image_path = '/Users/cu135/Dropbox (Partners HealthCare)/resources/published_networks/niftis/Memory Network T Conn.nii'

In [19]:
masked_image_path = mask_nifti(mask_path=mask_path, image_path=image_path)
print(f"Masked image saved to: {masked_image_path}")

Masked image saved to: /Users/cu135/Dropbox (Partners HealthCare)/resources/published_networks/niftis/Memory Network T Conn_masked.nii


## Threshold a Dataframe

In [None]:
from calvin_utils.matrix_utilities import threshold_matrix 
threshold_1 = 2
threshold_2 = -2
#This will make everything NOT meeting the condition 0 
df_1 = df_1.where(df_1 < threshold_2, 0)
# df_2.where(df_2 < threshold_2, 0)
df_1

## Manipulate Coordinates

In [None]:
# Convert Coordinates to Index
from calvin_utils.matrix_utilities import convert_coordinate_to_index
from nimlab import datasets as nimds
#Mask within the brain
mni_mask = nimds.get_img("mni_icbm152")
mask_affine = mni_mask.affine

coordinate_tuple = (-2,30,56)
index = convert_coordinate_to_index(coordinate_tuple, mask_affine)
index

In [None]:
#Convert Index to Coordinates
from calvin_utils.matrix_utilities import convert_index_to_coordinate
from nimlab import datasets as nimds
#Mask within the brain
mni_mask = nimds.get_img("mni_icbm152")
mask_affine = mni_mask.affine

index_tuple = (46, 78, 64)
index = convert_index_to_coordinate(index_tuple, mask_affine)
index

In [None]:
#Convert Index (Voxel Coordinates) To Flat Array Index (1 dimensional array after running ___.flatten())
from calvin_utils.matrix_utilities import index_in_flattened_nifti
from nimlab import datasets as nimds
#Mask within the brain
mni_mask = nimds.get_img("mni_icbm152")
mask_shape = mni_mask.shape

index_tuple = (46, 78, 64)
index = index_in_flattened_nifti(index_tuple, mask_shape)
index

## Run FSL Cluster

In [None]:
from calvin_utils.run_fsl_cluster import run_fsl_cluster
run_fsl_cluster(path_1, outdir=out_dir)

# Calculate Percentage of Nifti Within a Mask

In [None]:
import numpy as np
from tqdm import tqdm
from nimlab import datasets as nimds

def calculate_ratio(non_binary_image, binary_mask):
    """
    Calculate the ratio of nonzero values in the non-binary image that coincide with
    the 1s in the binary mask, to the total number of nonzero values in the non-binary image.
    
    Parameters:
    - non_binary_image: NumPy array representing the non-binary image
    - binary_mask: NumPy array representing the binary mask
    
    Returns:
    - ratio: The calculated ratio
    """
    
    # Ensure that both images have the same shape
    if non_binary_image.shape != binary_mask.shape:
        raise ValueError("The shapes of the non-binary image and the binary mask must be the same.")
    
    # Multiply the non-binary image by the binary mask
    masked_image = non_binary_image * binary_mask
    
    # Count the number of nonzero values in the masked image
    count_masked_nonzero = np.count_nonzero(masked_image)
    
    # Count the number of nonzero values in the non-binary image
    count_non_binary_nonzero = np.count_nonzero(non_binary_image)
    
    # Calculate the ratio
    if count_non_binary_nonzero == 0:
        return 0
    else:
        ratio = count_masked_nonzero / count_non_binary_nonzero
    
    return ratio

class PermutationTest:
    def __init__(self, non_binary_image, binary_mask, mni_mask, n_permutations=1000):
        self.non_binary_image = non_binary_image.get_fdata()
        self.binary_mask = binary_mask.get_fdata()
        self.mni_mask = mni_mask.get_fdata()
        self.n_permutations = n_permutations
        self.permuted_ratios = []
        self.observed_ratio = calculate_ratio(self.non_binary_image, self.binary_mask)
    
    def permute_and_calculate(self):
        for _ in tqdm(range(self.n_permutations)):
            # Find the indices where the MNI mask is non-zero
            mni_indices = np.where(self.mni_mask > 0)
            
            # Extract the corresponding values from the binary mask
            values_to_permute = self.non_binary_image[mni_indices]
            
            # Shuffle only these values
            np.random.shuffle(values_to_permute)
            
            # Create a new mask with the shuffled values placed back into their original positions
            shuffled_mask = np.zeros_like(self.binary_mask)
            shuffled_mask[mni_indices] = values_to_permute
            
            # Calculate the ratio using the shuffled mask and add it to the list
            permuted_ratio = calculate_ratio(self.non_binary_image, shuffled_mask)
            self.permuted_ratios.append(permuted_ratio)
    
    def calculate_p_value(self):
        self.permuted_ratios = np.array(self.permuted_ratios)
        p_value = np.mean(self.permuted_ratios >= self.observed_ratio)
        return p_value
    
    def return_percent_overlap(self):
        return self.observed_ratio

Run to evaluate the singular percent overlap

In [None]:
import nibabel as nib
# Load the NIfTI files
non_binary_img = nib.load('/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/response_topology/voxelwise_glm/stim_by_age/corrected_p_vals/nilean_corrected_fpr_p_values.nii.gz')
binary_mask_img = nib.load('/Users/cu135/Dropbox (Partners HealthCare)/resources/published_networks/memory_network_thresholded/above_t7_below_tneg7.nii')
permutation_tester = PermutationTest(non_binary_img, binary_mask_img, nimds.get_img("mni_icbm152"), n_permutations=10000)

# Perform permutations and calculate ratios
permutation_tester.permute_and_calculate()
p_value = permutation_tester.calculate_p_value()
percent_overlap = permutation_tester.return_percent_overlap()
# Calculate the p-valuea
print(f'{percent_overlap} percent overlap (p={p_value})')

Run to evaluate the 

## Generate Dice Coefficient

In [1]:
import pandas as pd
import numpy as np

def dice_coefficient(df1: pd.DataFrame, df2: pd.DataFrame) -> float:
    '''
    Calculates the Dice Coefficient between two dataframes containing binary lesion masks.
    
    Parameters:
    -----------
    df1 : pd.DataFrame
        The first dataframe, where columns represent flattened nifti files and rows represent voxels.
        All values are zero, except for lesions which are binarized at 1.
        
    df2 : pd.DataFrame
        The second dataframe, where columns represent flattened nifti files and rows represent voxels.
        All values are zero, except for lesions which are binarized at 1.
    
    Returns:
    --------
    float
        The Dice Coefficient, a value between 0 and 1, where 1 represents a perfect overlap.
        
    '''
    # Check if in numpy array, and convert the dataframes to numpy arrays if required
    if isinstance(df1, np.ndarray):
        array1 = df1
    else:
        array1 = df1.to_numpy()
    if isinstance(df2, np.ndarray):
        array2 = df2
    else:
        array2 = df2.to_numpy()
    
    # Calculate the intersection of non-zero elements
    intersection = np.sum(np.logical_and(array1, array2))
    
    # Calculate the number of non-zero elements in each array
    num_elements_array1 = np.sum(np.count_nonzero(array1))
    num_elements_array2 = np.sum(np.count_nonzero(array2))
    
    # Calculate the Dice Coefficient
    dice_coefficient = (2 * intersection) / (num_elements_array1 + num_elements_array2)
    
    return dice_coefficient


In [2]:
from calvin_utils.file_utils.import_matrices import import_matrices_from_folder
#set file path to'' if you have specified the full path to the nifti file itself
path_1 = '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/vbm_derivatives/mri'
GM = import_matrices_from_folder(path_1, file_pattern='mwp10002.nii')
# /Users/cu135/Dropbox (Partners HealthCare)/memory/functional_networks/ferguson_2019_networks/control_lesions/auditory_hallucination_lesions/sub-08uNodau1/roi/sub-08uNodau1_lesionMask.nii.gz
WM = import_matrices_from_folder(path_1, file_pattern='mwp20002.nii')
CSF = import_matrices_from_folder(path_1, file_pattern='mwp30002.nii')
BRAIN = import_matrices_from_folder(path_1, file_pattern='wm0002.nii')

I will search:  /Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/vbm_derivatives/mri/mwp10002.nii
I will search:  /Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/vbm_derivatives/mri/mwp20002.nii
I will search:  /Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/vbm_derivatives/mri/mwp30002.nii
I will search:  /Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/vbm_derivatives/mri/wm0002.nii


In [4]:
from calvin_utils.nifti_utils.matrix_utilities import threshold_matrix
from calvin_utils.statistical_utils.fisher_z_transform import fisher_z_transform
from nimlab import datasets as nimds

GM = np.where(GM > 0.2 , 1, 0)
WM = np.where(WM > 0.2, 1, 0)
CSF = np.where(CSF > 0.2 , 1, 0)
BRAIN = np.where(BRAIN > 0.4 , 1, 0)

print("GM-brain:", dice_coefficient(GM, BRAIN))
print("WM-brain:", dice_coefficient(WM, BRAIN))
print("CSF-brain:", dice_coefficient(CSF, BRAIN))


#Make sure you specify what column you want. 
# observed_dice_coefficient = dice_coefficient(mx_1, mx_2)
# print('Dice coefficient:', observed_dice_coefficient)


GM-brain: 0.7526834141831452
WM-brain: 0.6080165467944493
CSF-brain: 0.26480460520131166


In [15]:
# Permute the Dice Coefficient
from calvin_utils.permutation_analysis_utils.permutation_utils.palm import brain_permutation
from tqdm import tqdm 

# Assuming df_1 and df_2 are your original dataframes
n_permutations = 1000
dice_coefficients = []
voxel_index = 0
for i in tqdm(range(n_permutations)):
    # Permute dataframes
    permuted_df_1 = brain_permutation(thresholded_df_1.copy().to_numpy().reshape(1,-1), looped_permutation=True)
    permuted_df_2 = brain_permutation(thresholded_df_2.copy().to_numpy().reshape(1,-1), looped_permutation=True)

    # Threshold and calculate the Dice coefficient for the permuted dataframes
    permuted_dice_coefficient = dice_coefficient(permuted_df_1, permuted_df_2)

    # Store the Dice coefficient
    dice_coefficients.append(permuted_dice_coefficient)

# Convert the list to a numpy array
dice_coefficients = np.array(dice_coefficients)

  0%|          | 0/1000 [00:00<?, ?it/s]


NameError: name 'thresholded_df_1' is not defined

In [None]:
#Same, but with multiprocessing
import concurrent.futures
from calvin_utils.matrix_utilities import dice_coefficient

n_permutations = 1000
dice_coefficients = []
voxel_index = 0

with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor:
    #Begin submitting the masked data to the permutor
    results = []
    for i in tqdm(range(n_permutations), desc="Jobs Launched"):
        permuted_df_1 = brain_permutation(thresholded_df_1.copy().to_numpy().reshape(1,-1), looped_permutation=True)
        permuted_df_2 = brain_permutation(thresholded_df_2.copy().to_numpy().reshape(1,-1), looped_permutation=True)
        
        result = executor.submit(dice_coefficient, permuted_df_1, permuted_df_2)
        results.append(result)
        
    progress_bar = tqdm(total=n_permutations, desc="Jobs Finalized")
    for result in concurrent.futures.as_completed(results):
        
        #Input the permuted data into the array
        permuted_dice_coefficient = result.result()
        dice_coefficients.append(permuted_dice_coefficient)
        
        #Update visualization
        progress_bar.update()
    progress_bar.close()

In [None]:
print('empiric p: ', np.count_nonzero(dice_coefficients>observed_dice_coefficient))

## Histogram of Lesion Incidence

In [None]:
def normalize(df):
    return (df - df.min()) / (df.max() - df.min())

summed_voxels = df_1.sum(axis=1)
summed_voxels2 = df_2.sum(axis=1)

summed_voxels_df = pd.DataFrame({'Voxel_Index': summed_voxels.index, 'Summed_Voxel_Value': summed_voxels.values})
summed_voxels_df2 = pd.DataFrame({'Voxel_Index': summed_voxels.index, 'Summed_Voxel_Value': summed_voxels2.values})


summed_voxels_df['Normalized_Summed_Voxel_Value'] = normalize(summed_voxels_df['Summed_Voxel_Value'])
summed_voxels_df2['Normalized_Summed_Voxel_Value'] = normalize(summed_voxels_df2['Summed_Voxel_Value'])
plt.figure(figsize=(12, 6))

# Plot the first DataFrame with normalized values
plt.plot(summed_voxels_df['Voxel_Index'], summed_voxels_df['Normalized_Summed_Voxel_Value'], label='Dataset 1')

# Plot the second DataFrame with normalized values
plt.plot(summed_voxels_df2['Voxel_Index'], summed_voxels_df2['Normalized_Summed_Voxel_Value'], label='Dataset 2')

plt.xlabel('Voxel Index')
plt.ylabel('Normalized Summed Voxel Value')
plt.title('Normalized Summed Voxel Values vs. Voxel Index')
plt.grid(True)
plt.legend()
plt.show()



## Generate a Heatmap from a CSV

In [None]:
df = pd.read_csv(path_1, index_col=False)
display(df)
#Create heatmap of correlation matrix
import seaborn as sns
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(15, 13))
sns.heatmap(df, square=True, linewidths=.5, cbar_kws={"shrink": .5})
#Save the Elbow Plot Above
save_dirsvg = os.path.join(out_dir, 'heatmap.svg')
save_dirpng = os.path.join(out_dir, 'heatmap.png')
fig.savefig(save_dirsvg)
fig.savefig(save_dirpng)
print(f'Fig saved to ', save_dirpng)

# Generate ROIs from a CSV of Coordinates

In [None]:
from calvin_utils.generate_nifti import read_coordinates_csv

coordinates_df = read_coordinates_csv(filename=path_1, radius=3)
coordinates_df

In [None]:
coordinates_df.to_csv(out_dir+'/coordinates_df.csv')

# Generate BIDS Directory from Subjects/Coordinates CSV

In [None]:
from calvin_utils.generate_nifti import read_subject_coordinates_csv

file_path_df = read_subject_coordinates_csv('/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/VOSS_TMS/Memory_Change_TMS_SimonKwon_to_CalvinHoward.csv', radius=12, method='concentric')
file_path_df.to_csv('/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/VOSS_TMS/Memory_Change_TMS_SimonKwon_to_CalvinHoward' + '_filepaths.csv')

# Add Several Niftis Together

In [None]:
from calvin_utils.generate_nifti import add_matrices_together
from calvin_utils.generate_nifti import view_and_save_nifti

summed_matrix = add_matrices_together(folder=path_1)
summed_matrix_img = view_and_save_nifti(summed_matrix, out_dir=path_1)
summed_matrix_img

## Threshold a Matrix By another Matrix

In [None]:
path_1 = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_AD_DBS_FORNIX/derivatives/r_maps/r_map/Age/age_to_grey_matter.nii'
path_2 = '/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_gm_mask_resampled.nii'

In [None]:
from calvin_utils.nifti_utils.generate_nifti import threshold_matrix_by_another
thresholded_matrix = threshold_matrix_by_another(matrix_file_1=path_1, matrix_file_2=path_1, method='over_threshold', threshold=0.2)
thresholded_matrix

# Thresholding Matrices with the threshold_matrix Function
Introduction
_____
Matrix thresholding is a common operation in many scientific applications, especially in fields like neuroscience and image processing. The threshold_matrix function provides a flexible way to threshold matrices based on different criteria.

Key Functionalities
Direction of Thresholding:

Keep values above a certain threshold.
Keep values below a certain threshold.
Keep values between two thresholds.
Methods to Determine the Threshold:

Raw values.
Z-score (probability-based).
Percentile of the matrix values.
Setting the Thresholded Values:

To zero.
To NaN (Not a Number).

Usage
Basic Syntax:
python
Copy code
threshold_matrix(matrix, threshold, method, direction, output)
```
Parameters:
matrix (np.array): The input matrix you want to threshold.
threshold (float or tuple): The threshold value(s). If a tuple, interpreted as range.
method (str): How to determine the threshold. Can be 'raw', 'probability', or 'percentile'.
direction (str or tuple): Can be 'keep_above', 'keep_below', 'keep_between', 'exclude_between' or a tuple for a range (e.g., (5, 95)).
output (str): What to set the thresholded values to. Can be 'zero' or 'nan'.
```
____
Examples

Keep values above a raw threshold of 0.5 and set the rest to NaN:
python
Copy code
result = threshold_matrix(your_matrix, threshold=0.5, method='raw', direction='keep_above', output='nan')

Keep values between the 5th and 95th percentiles of the matrix and set the rest to 0:
python
Copy code
result = threshold_matrix(your_matrix, threshold=(5, 95), method='percentile', direction=(5, 95), output='zero')

Keep values below a z-score threshold of 0.05 (using probability) and set the rest to 0:
python
Copy code
result = threshold_matrix(your_matrix, threshold=0.05, method='probability', direction='keep_below', output='zero')



In [None]:
out_dir = '/Users/cu135/Dropbox (Partners HealthCare)/resources/published_networks/reich_2022_thresholded'

In [None]:
# fisher_transform = True
# #Fisher transform 
# from calvin_utils.statistical_utils.fisher_z_transform import fisher_z_transform
# if fisher_transform: 
#     df_1 = fisher_z_transform(df_1)

In [None]:
df_1.fillna(0, inplace=True)


In [None]:
from calvin_utils.nifti_utils.matrix_utilities import threshold_matrix
from calvin_utils.nifti_utils.generate_nifti import view_and_save_nifti

thresholded_df_1 = threshold_matrix(df_1, threshold=95, method='percentile', direction='keep_above', output='zero', mask_mode=False)
threhsodled_matrix_img = view_and_save_nifti(thresholded_df_1, out_dir=out_dir)
threhsodled_matrix_img

## Generate Matrix from a CSV

In [None]:
from nimlab import datasets as nimds

data_df = pd.read_csv(path_2)
if len(data_df) == 225222:
    mni_mask = nimds.get_img("mni_icbm152")
    mask_data = mni_mask.get_fdata().flatten()
    brain_indices = np.where(mask_data > 0)[0]
    mask_data[brain_indices] = data_df.iloc[:,-1]
    data_df = pd.DataFrame(mask_data)
display(data_df)
print(np.min(data_df))
print(np.max(data_df))

# data_df = (1/data_df)/10000
# data_df = data_df/np.max(data_df)

In [None]:
from calvin_utils.generate_nifti import view_and_save_nifti
matrix_img = view_and_save_nifti(data_df, out_dir)
matrix_img

# Generate Z-Scores for VBM Atrophy Maps

In [None]:
from typing import Tuple
from nilearn import datasets
from nilearn import image
def threshold_probabilities(patient_df: pd.DataFrame, threshold: float) -> pd.DataFrame:
    patient_df = patient_df.where(patient_df > threshold, 0)
    return patient_df

def calculate_z_scores(control_df: pd.DataFrame, patient_df: pd.DataFrame, matter_type=None) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Function to calculate voxel-wise mean, standard deviation for control group and z-scores for patient group.

    Args:
    control_df (pd.DataFrame): DataFrame where each column represents a control subject, 
                               and each row represents flattened image data for a voxel.
    patient_df (pd.DataFrame): DataFrame where each column represents a patient, 
                               and each row represents flattened image data for a voxel.

    Returns:
    control_mean (pd.DataFrame): DataFrame of voxel-wise means calculated across the control group.
    control_std (pd.DataFrame): DataFrame of voxel-wise standard deviations calculated across the control group.
    patient_z_scores (pd.DataFrame): DataFrame of voxel-wise z-scores calculated for each patient using control mean and std.
    """

    # Mask the dataframes to only consider tissues over acceptable probability thresholds
    # Using p>0.2, as typical masking to MNI152 segments uses P > 0.2 for a given segment
    
    # Now you can use the function to apply a threshold to patient_df and control_df
    threshold = 0.2
    patient_df = threshold_probabilities(patient_df, threshold)
    control_df = threshold_probabilities(control_df, threshold)

    # Calculate mean and standard deviation for each voxel in control group
    control_mean = control_df.mean(axis=1)
    control_std = control_df.std(axis=1)

    # Initialize DataFrame to store patient z-scores
    patient_z_scores = pd.DataFrame()

    # Calculate z-scores for each patient using control mean and std
    for patient in patient_df.columns:
        patient_z_scores[patient] = (patient_df[patient] - control_mean) / control_std

    # Set values back into brain_mask
    # if matter_type == None:
    mni_mask = nimds.get_img("mni_icbm152")
    mask_data = mni_mask.get_fdata().flatten()
    apply_mask = lambda patient_z_scores: np.where(mask_data > 0, patient_z_scores, 0)
    patient_z_scores = patient_z_scores.apply(apply_mask, axis=0)
    print('Not sure what matter class to mask to, returning mask within MNI152 space')
    # elif matter_type == 'grey_matter':
    #     mask_data = image.load_img('/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_gm_mask_resampled.nii').get_fdata().flatten()
    #     patient_z_scores[mask_data < 0.2] = 0
    #     print('Masked to MNI152 Grey Matter')
    # elif matter_type == 'white_matter':
    #     mask_data = image.load_img('/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_wm_mask_resampled.nii').get_fdata().flatten()
    #     patient_z_scores[mask_data < 0.2] = 0
    #     print('Masked to MNI152 White matter')
    # elif matter_type == 'CSF':
    #     mni_mask = nimds.get_img("mni_icbm152")
    #     mask_data = mni_mask.get_fdata().flatten()
    #     apply_mask = lambda patient_z_scores: np.where(mask_data > 0.2, patient_z_scores, 0)
    #     patient_z_scores = patient_z_scores.apply(apply_mask, axis=0)
    #     print('Masking within the MNI brain mask')
    # else:
    #     raise ValueError('Please select a valid matter_type: None, grey_matter, white_matter are currently supported')

    
    return control_mean, control_std, patient_z_scores


In [None]:
matter_type = 'csf'

#----------------------------------------------------------------DO NOT TOUCH ----------------------------------------------------------------

control_mean, control_std, patient_z_score_df = calculate_z_scores(control_df=df_2, patient_df=df_1, matter_type=matter_type)

# Plot pairplot and display descriptive statistics
# print(patient_z_score_df.describe())
display(patient_z_score_df)
# patient_z_score_df.to_csv(os.path.join(out_dir, 'z_scores.csv'))


In [None]:
# Save the atrophy files
character_after_subject_id = '-'
#--------------------------------DO NOT TOUCH--------------------------------------------------------
from calvin_utils.generate_nifti import nifti_from_matrix
root_dir = out_dir
for patient in patient_z_score_df.columns:
    subject = patient.split(character_after_subject_id)[0]
    out_dir = os.path.join(root_dir, ('sub-'+subject+f'/z_score_atrophy/{matter_type}'))
    nifti_from_matrix(patient_z_score_df[patient], output_file=out_dir, output_name=f'sub-{subject}')
    
    # /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_AD_DBS_FORNIX/rawdata/sub-150/ses-01/anat

## Generate Z-Scores for Atrophy ROIs

In [None]:
from tqdm import tqdm

def compute_roi_z_scores(path_atrophy, path_control):
    """
    Function to compute z-scores for all brain regions for atrophy patients
    in comparison to control group.

    Parameters:
    - path_atrophy: str, path to csv file for atrophy patients
    - path_control: str, path to csv file for control group

    Returns:
    - df_atrophy_z_scored: DataFrame, atrophy patients data with z-scores in place of original values
    """

    # Load the data
    df_atrophy = pd.read_csv(path_atrophy)
    df_control = pd.read_csv(path_control)

    # Create a copy of df_atrophy to hold the z-scored data
    df_atrophy_z_scored = df_atrophy.copy()

    # Loop over all columns (brain regions) in the dataframe
    for column in tqdm(df_atrophy.columns):
        
        # Skip if the column is 'names'
        if column == 'names':
            continue
        else:
            # Compute the mean and standard deviation of the control group for the current brain region
            control_mean = df_control[column].mean()
            control_std = df_control[column].std()

            # Calculate the z-scores for the atrophy patients relative to the control group
            df_atrophy_z_scored[column] = df_atrophy[column].apply(lambda x: (x - control_mean) / control_std)

    # Set the index to 'names'
    df_atrophy_z_scored.set_index('names', inplace=True)

    return df_atrophy_z_scored

In [None]:
path_1 = r'/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/cat12/ROI_mni_Cerebellum_Vgm.csv'
path_2 = r'/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/ADNI/NIFTIS/true_control/cat_12_results/roi_volumes/ROI_mni_Cerebellum_Vgm.csv'
roi_name = 'Cerebellum'
roi_tissue = 'Vgm'
#----------------------------------------------------------------DO NOT CHANGE
df_atrophy_z_scored = compute_roi_z_scores(path_1, path_2)
display(df_atrophy_z_scored)
df_atrophy_z_scored.to_csv(os.path.join(out_dir + f'/{roi_name}_{roi_tissue}_z_scores.csv'))
print('saved to: ', out_dir)

# Extract XML File Data

In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

def parse_xml(file_path):
    # Parse XML file
    tree = ET.parse(file_path)
    root = tree.getroot()
    
    # Find required tags and extract data
    for memory_roi in root.findall('memory_roi'):
        Vgm = memory_roi.find('data/Vgm').text
        Vwm = memory_roi.find('data/Vwm').text
        Vcsf = memory_roi.find('data/Vcsf').text
        
        return Vgm, Vwm, Vcsf

def extract_data_from_xmls(directory):
    # Create an empty dataframe
    data = pd.DataFrame(columns=['Patient_ID', 'GM', 'WM', 'CSF'])
    
    # Walk through the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.xml'):
                patient_id = os.path.splitext(file)[0]  # Use filename as patient ID
                file_path = os.path.join(root, file)
                
                Vgm, Vwm, Vcsf = parse_xml(file_path)
                data = data.append({'Patient_ID': patient_id, 'GM': Vgm, 'WM': Vwm, 'CSF': Vcsf}, ignore_index=True)
                
    return data

# Directory containing XML files
directory = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_AD_DBS_FORNIX/cat12/cat12_ultrafine-reg/CAT12.8.2_2170'

# Extract data and save to CSV
data = extract_data_from_xmls(directory)
# data.to_csv('patient_data.csv', index=False)


## Statistics

In [None]:
df_1

In [None]:
from nilearn import datasets
# Fetch the MNI152 1mm white matter mask
white_matter_mask = datasets.load_mni152_wm_mask(resolution=2)

# Example usage
mask_data = white_matter_mask.get_fdata()


## Get Resources

In [None]:
gray_matter_mask = datasets.load_mni152_gm_template(resolution=2)
gray_matter_mask.to_filename('/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_gm_mask.nii')

w_matter_mask = datasets.load_mni152_wm_template(resolution=2)
w_matter_mask.to_filename('/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_wm_mask.nii')

In [None]:
from nilearn import plotting
mask_data = image.load_img('/Users/cu135/Dropbox (Partners HealthCare)/resources/mni_spaces/6th_gen/mni_152_gm_mask_resampled.nii')
plotting.view_img(mask_data, cut_coords=(0,0,0), black_bg=False, opacity=.75, cmap='ocean_hot')

gray_matter_mask = datasets.load_mni152_gm_template(resolution=2)

## Power Analysis of Overlap R Map Method

In [None]:
import numpy as np
from tqdm import tqdm

n_voxels = 200000  # number of voxels per map
n_iterations = 10000  # number of iterations
threshold = 0.3  # threshold for overlap
np.random.seed(0)  # set seed for reproducibility

# store results
results = {}

for n_maps in range(2, 14):  # for each number of maps from 2 to 5
    overlaps = 0  # counter for number of overlaps
    pbar = tqdm(total=n_iterations, desc=f'Processing {n_maps} maps')
    for _ in range(n_iterations):
        # generate n_maps random maps
        maps = [np.random.normal(0, 0.2, n_voxels) for _ in range(n_maps)]
        # check if there's an overlap
        if np.any(np.all([np.abs(map) > threshold for map in maps], axis=0)):
            overlaps += 1
        pbar.update()
    pbar.close()
    false_positive_rate = overlaps / n_iterations
    results[n_maps] = false_positive_rate

# print results
for n_maps, rate in results.items():
    print(f'False positive rate for {n_maps} maps: {rate}')



In [None]:
results

## Calculate Damage Score

In [None]:
damaging_thing = 'grey_matter'
things_damaged = 'hippocampus'
descriptor = 'all_patient'
#----------------------------------------------------------------DO NOT MODIFY!----------------------------------------------------------------
#Initialize dataframe
damage_df = pd.DataFrame(index=df_1.columns, columns=df_2.columns)

for matrix in damage_df.columns:
    for subject in damage_df.index:
        # Mask the subject dataframe to the matrix at hand
        intersection = df_1[subject].where(df_2[matrix] > 0, 0)
        # Weight the overlapping components by multiplication
        weighted_overlap = intersection * df_2[matrix]
        # Assess overall damage score
        damage = weighted_overlap.sum()
        damage_df.loc[subject, matrix] = damage

if not os.path.exists(out_dir + '/damage_scores'):
    os.makedirs(out_dir + '/damage_scores')

damage_df.to_csv(out_dir + f'/damage_scores/{descriptor}_{things_damaged}_damage_scores_{damaging_thing}.csv')
print('saved to: ', out_dir + f'/damage_scores/{descriptor}_{things_damaged}_damage_scores_{damaging_thing}.csv')

display(damage_df)

# Rename Niftis

In [None]:
import glob
import os
import shutil

filename = 'mwp1glanat.nii'
base_dir = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/neuroimaging'
folder_to_name_by = -4
save = True

#----------------------------------------------------------------DO NOT MODIFY!----------------------------------------------------------------

# Use glob to find all mwp1glanat.nii files with incorrect names
file_paths = glob.glob(os.path.join(base_dir, '**', 'mwp1glanat_resampled.nii'), recursive=True)
print(file_paths)
for file_path in file_paths:
    # Print the found file
    print(f'Found file: {file_path}')
    
    # Extract the sub-id
    sub_id = file_path.split(os.sep)[folder_to_name_by]
    print(f'Extracted sub-id: {sub_id}')

    # Construct the new file path
    new_file_path = os.path.join(os.path.dirname(file_path), f'{sub_id}-{filename}')
    print(f'Intended absolute filename: {new_file_path}')
    if save:
        shutil.copy(file_path, new_file_path)


# Create Graphics