In [None]:
import numpy as np
from scipy.stats import pearsonr

def correlate_npy_files(file1_path, file2_path):
    # Load the .npy files
    data1 = np.load(file1_path)
    data2 = np.load(file2_path)

    # Ensure the data shapes are compatible for correlation
    if data1.shape != data2.shape:
        raise ValueError("The shapes of the two datasets do not match.")
    # 
    #nan check
    if np.isnan(data1).any():
        print('nan in data1')
        data1 = np.nan_to_num(data1, nan=0, posinf=0, neginf=0)
        print(np.max(data1), np.min(data1), np.median(data1))
    if np.isnan(data2).any():
        print('nan in data2')
        data2 = np.nan_to_num(data2, nan=0, posinf=0, neginf=0)
        print(np.max(data2), np.min(data2), np.median(data2))

    # Flatten the data to 1D arrays
    data1_flat = data1.flatten()
    data2_flat = data2.flatten()

    # Calculate the Pearson correlation coefficient
    correlation, p_value = pearsonr(data1_flat, data2_flat)

    return correlation, p_value

# Example usage
file1_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/ccm_memory/results/notebook_02/tmp_tnsr/sante_memory/niftis.npy'
file2_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/ccm_memory/results/notebook_02/tmp_ols/sante_memory/niftis.npy'

correlation, p_value = correlate_npy_files(file1_path, file2_path)
print(f"Correlation: {correlation}, p-value: {p_value}")

In [None]:
import numpy as np
from scipy.stats import spearmanr
import time

def vectorized_rankdata(a):
    """
    Vectorized ranking function using NumPy.

    Parameters:
    -----------
    a : np.array
        Input array to be ranked.

    Returns:
    --------
    ranks : np.array
        Ranked array.
    """
    ranks = np.empty_like(a, dtype=float)
    ranks[np.argsort(a)] = np.arange(len(a)) + 1
    return ranks

def vectorized_spearmanr(niftis, indep_var):
    """
    Calculate the Spearman rank-order correlation coefficient for each voxel
    in a fully vectorized manner.

    Parameters:
    -----------
    niftis : np.array
        2D array where each row represents a subject and each column represents a voxel.
    indep_var : np.array
        1D array representing the independent variable for each subject.

    Returns:
    --------
    rho : np.array
        1D array of Spearman's rank correlation coefficients for each voxel.
    """
    # Rank the data
    ranked_niftis = np.apply_along_axis(vectorized_rankdata, 0, niftis)
    ranked_indep_var = vectorized_rankdata(indep_var)

    # Calculate the differences between the ranks
    d = ranked_niftis - ranked_indep_var[:, np.newaxis]

    # Square the differences
    d_squared = d ** 2

    # Sum the squared differences
    sum_d_squared = np.sum(d_squared, axis=0)

    # Number of observations
    n = niftis.shape[0]

    # Apply the Spearman's Rho formula
    rho = 1 - (6 * sum_d_squared) / (n * (n**2 - 1))

    return rho

# Generate synthetic data
np.random.seed(42)
n_observations = 100
n_voxels = 200000
niftis = np.random.rand(n_observations, n_voxels)
indep_var = np.random.rand(n_observations)

# Vectorized Spearman's Rho
start_time = time.time()
vectorized_rho = vectorized_spearmanr(niftis, indep_var)
vectorized_time = time.time() - start_time

# Looped Spearman's Rho using scipy.stats
start_time = time.time()
looped_rho = np.zeros(n_voxels)
for i in range(n_voxels):
    looped_rho[i] = spearmanr(niftis[:, i], indep_var)[0]
looped_time = time.time() - start_time

# Compare the results
print(f"Vectorized Spearman's Rho Time: {vectorized_time:.4f} seconds")
print(f"Looped Spearman's Rho Time: {looped_time:.4f} seconds")
print(f"Difference in Rho values: {np.mean(np.abs(vectorized_rho - looped_rho)):.6f}")

# Check if the results are close
print(f"Are the results close? {np.allclose(vectorized_rho, looped_rho)}")

In [None]:
import json
import numpy as np
import time
from scipy.stats import spearmanr
from calvin_utils.ccm_utils.npy_utils import DataLoader
from calvin_utils.ccm_utils.stat_utils import CorrelationCalculator



def _calculate_pearson_r_map(self, niftis, indep_var):
        X = indep_var
        Y = niftis
        X_BAR = X.mean(axis=0)[:, np.newaxis]
        Y_BAR = Y.mean(axis=0)[np.newaxis, :]
        X_C = X - X_BAR
        Y_C = Y - Y_BAR
        NUMERATOR = np.dot(X_C.T, Y_C)
        SST_X = np.sum((X - X_BAR)**2, axis=0)
        SST_Y = np.sum((Y - Y_BAR)**2, axis=0)
        DENOMINATOR = np.sqrt(SST_X * SST_Y)
        r = NUMERATOR / DENOMINATOR
        
        if self.verbose:
            print(f"Shape of X: {X.shape}")
            print(f"Shape of Y: {Y.shape}")
            print(f"Shape of X_BAR: {X_BAR.shape}")
            print(f"Shape of Y_BAR: {Y_BAR.shape}")
            print(f"Shape of X_C: {X_C.shape}")
            print(f"Shape of Y_C: {Y_C.shape}")
            print(f"Shape of NUMERATOR: {NUMERATOR.shape}")
            print(f"Shape of DENOMINATOR: {DENOMINATOR.shape}")
        return r
    
def vectorized_spearmanr(niftis, indep_var):
    """
    Calculate the Spearman rank-order correlation coefficient for each voxel
    in a fully vectorized manner.

    Parameters:
    -----------
    niftis : np.array
        2D array where each row represents a subject and each column represents a voxel.
    indep_var : np.array
        1D array representing the independent variable for each subject.

    Returns:
    --------
    rho : np.array
        1D array of Spearman's rank correlation coefficients for each voxel.
    """
    # Rank the data
    ranked_niftis = np.apply_along_axis(vectorized_rankdata, 0, niftis)
    ranked_indep_var = vectorized_rankdata(indep_var)

    # Calculate the differences between the ranks
    d = ranked_niftis - ranked_indep_var[:, np.newaxis]

    # Square the differences
    d_squared = d ** 2

    # Sum the squared differences
    sum_d_squared = np.sum(d_squared, axis=0)

    # Number of observations
    n = niftis.shape[0]

    # Apply the Spearman's Rho formula
    rho = 1 - (6 * sum_d_squared) / (n * (n**2 - 1))

    return rho

# Load the dataset
data_dict_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/ccm_memory/results/notebook_02/tmp/dataset_dict.json'
data_loader = DataLoader(data_dict_path)

# Get the first dataset
with open(data_dict_path, 'r') as f:
    dataset_paths = json.load(f)
first_dataset_name = list(dataset_paths.keys())[1]
first_dataset = data_loader.load_dataset(first_dataset_name)

niftis = first_dataset['niftis']
indep_var = first_dataset['indep_var']

# Vectorized Spearman's Rho
start_time = time.time()
vectorized_rho = vectorized_spearmanr(niftis, indep_var)
vectorized_time = time.time() - start_time

# Looped Spearman's Rho using scipy.stats
start_time = time.time()
looped_rho = np.zeros(niftis.shape[1])
for i in range(niftis.shape[1]):
    looped_rho[i] = spearmanr(niftis[:, i], indep_var)[0]
looped_time = time.time() - start_time

# Compare the results
print(f"Vectorized Spearman's Rho Time: {vectorized_time:.4f} seconds")
print(f"Looped Spearman's Rho Time: {looped_time:.4f} seconds")
print(f"Difference in Rho values: {np.mean(np.abs(vectorized_rho - looped_rho)):.6f}")


In [None]:
# Check if the results are close
print(f"Are the results close? {np.allclose(np.nan_to_num(vectorized_rho), np.nan_to_num(looped_rho))}")

In [None]:
import numpy as np
from scipy.stats import pearsonr
pearsonr(np.nan_to_num(vectorized_rho), np.nan_to_num(looped_rho))

In [None]:
import json
import numpy as np
import time
from calvin_utils.ccm_utils.npy_utils import DataLoader
from calvin_utils.ccm_utils.stat_utils import CorrelationCalculator



# Vectorized Spearman's Rho
start_time2 = time.time()
vectorized_rho2 = vectorized_spearmanr(niftis, indep_var)
vectorized_time2 = time.time() - start_time2

# # Looped Spearman's Rho using scipy.stats
# start_time = time.time()
# looped_rho = np.zeros(niftis.shape[1])
# for i in range(niftis.shape[1]):
#     looped_rho[i] = spearmanr(niftis[:, i], indep_var)[1]
# looped_time = time.time() - start_time

# Compare the results
print(f"Vectorized Spearman's Rho Time: {vectorized_time2:.4f} seconds")
print(f"Looped Spearman's Rho Time: {looped_time:.4f} seconds")
print(f"Difference in Rho values: {np.mean(np.abs(vectorized_rho2 - looped_rho)):.6f}")

# Check if the results are close
print(f"Are the results close? {np.allclose(np.nan_to_num(vectorized_rho2), np.nan_to_num(looped_rho))}")

In [None]:
vectorized_rho2

In [None]:
looped_rho

Generate a GIF of the Optimization Process

In [None]:

import os
import subprocess
import imageio
from typing import List

class MRIcroGLSnapshotter:
    """
    A class that:
      1) Finds all NIfTI files in a given folder.
      2) Builds a Python-like MRIcroGL script to load each NIfTI and save a PNG snapshot.
      3) Executes MRIcroGL in batch mode to run that script.
      4) Reads the PNG frames and assembles them into a GIF.
    """

    def __init__(
        self,
        nifti_folder: str,
        mricrogl_exec: str,
        output_dir: str,
        script_name: str = "mricrogl_snapshot.py",
        gif_name: str = "snapshots.gif",
        milliseconds: int = 2
    ):
        """
        :param nifti_folder: Folder containing existing NIfTI files.
        :param mricrogl_exec: Full path to the MRIcroGL executable.
        :param output_dir: Folder where the MRIcroGL script, PNGs, and final GIF will be placed.
        :param script_name: Name of the generated MRIcroGL script.
        :param gif_name: Name of the final GIF to be created.
        :param milliseconds: Total milliseconds for the resulting GIF.
        """
        self.nifti_folder = os.path.abspath(nifti_folder)
        self.mricrogl_exec = os.path.abspath(mricrogl_exec)
        self.output_dir = os.path.abspath(output_dir)
        self.script_name = script_name
        self.gif_name = gif_name
        self.milliseconds = milliseconds

        self.png_dir = os.path.join(self.output_dir, "png_frames")
        os.makedirs(self.output_dir, exist_ok=True)
        os.makedirs(self.png_dir, exist_ok=True)

        # Gather NIfTI files sorted by name
        self.nifti_files = self._get_nifti_files()
        if not self.nifti_files:
            raise FileNotFoundError(f"No NIfTI files found in {self.nifti_folder}.")

    def _get_nifti_files(self) -> List[str]:
        """
        Returns a sorted list of .nii or .nii.gz files in self.nifti_folder.
        Adjust the pattern to match your naming scheme if necessary.
        """
        all_files = os.listdir(self.nifti_folder)
        nifti_files = [
            os.path.join(self.nifti_folder, f) for f in all_files
            if f.lower().endswith(".nii") or f.lower().endswith(".nii.gz")
        ]
        nifti_files.sort()
        return nifti_files

    def _build_mricrogl_script(self, view='left', cutout='sagittal'):
        """
        Creates a Python-like script for MRIcroGL in batch mode.
        Overlays each NIfTI file twice (positive and negative),
        sets overlay surface style, sets viewpoint, optional cutout,
        and saves a PNG for each map.
        
        :param view: One of ['front','back','left','right','top','bottom'].
        :param cutout: Either 'none' or 'sagittal'.
        """
        script_path = os.path.join(self.output_dir, self.script_name)
        # 1) Define standard viewpoints via azimuth/elevation
        view_dict = {
            'front':   (180, 0),
            'back':    (0, 0),
            'left':    (90, 0),
            'right':   (270, 0),
            'top':     (180, 90),
            'bottom':  (180, -90),
            'front_left': (135, 0)
        }
        if view not in view_dict:
            raise ValueError(f"Invalid view '{view}'. Choose from {list(view_dict.keys())}")
        
        azim, elev = view_dict[view]

        # 2) Determine clip code
        #   This is just one example of a sagittal clip. Customize as needed.
        clip_lines = []
        if cutout == 'sagittal':
            # For example, we clip half the brain from a sagittal plane
            # (0.4, 0, 160) is just an example from the docs
            clip_lines.append("gl.cutout(0.52, 0, 1, 0, 1, 0)")
        elif cutout != 'none':
            raise ValueError(f"Invalid clip mode '{cutout}'. Choose 'none' or 'sagittal'.")
        
        lines = []
        lines.append("import gl")
        lines.append("gl.resetdefaults()")
        lines.append("gl.backcolor(255,255,255)")  # White background
        lines.append("gl.colorbarposition(0)")  # Colour bar

        # If you want the same background each time:
        lines.append("gl.loadimage('spm152')") 
        lines.append("gl.overlaymaskwithbackground(1)") # Mask anything not on background

        lines.append(f"png_folder   = r'{self.png_dir}'")

        # Gather NIfTI filenames (already stored by this class)
        lines.append("nifti_files = [")
        for f in self.nifti_files:
            lines.append(f"    r'{f}',")
        lines.append("]")
        
        # 3) Build loop
        lines.append("for idx, nifti in enumerate(nifti_files):")
        lines.append("    # Close any previous overlays to start fresh each iteration")
        lines.append("    gl.overlaycloseall()")

        #   3a) Load the same file TWICE: once for positive, once for negative
        #   Positive overlay
        lines.append("    gl.overlayload(nifti)")
        lines.append("    gl.colorname(1,'8redyell')")        
        lines.append("    gl.minmax(1, 0.5, 0.00000001)")
        
        # #   Negative overlay
        lines.append("    gl.overlayload(nifti)")
        lines.append("    gl.colorname(2,'6bluegrn')")        
        lines.append("    gl.minmax(2, -0.5, -0.00000001)")

        #   3b) Switch to overlaySurface shader
        lines.append("    gl.shadername('overlaySurface')")

        #   3c) Optionally apply clipping
        if clip_lines:
            # e.g. gl.clipazimuthelevation(0.4, 0, 160)
            for cline in clip_lines:
                lines.append(f"    {cline}")

        #   3d) Choose viewpoint
        lines.append(f"    gl.azimuthelevation({azim}, {elev})")

        #   3e) Save a PNG
        lines.append("    out_png = f'{png_folder}/frame_{idx:03d}.png'")
        lines.append("    gl.savebmp(out_png)")

        # 4) Quit after finishing the loop
        lines.append("gl.quit()")

        # # 5) Write out the script
        with open(script_path, "w") as f:
            f.write("\n".join(lines))


    def _run_mricrogl_script(self):
        """
        Runs MRIcroGL in 'headless' mode, passing our script with -s.
        """
        script_path = os.path.join(self.output_dir, self.script_name)
        cmd = [self.mricrogl_exec, "-s", script_path]
        subprocess.run(cmd, check=True)

    def _assemble_gif(self):
        """
        Reads all PNGs from self.png_dir and assembles them into a single GIF.
        """
        png_files = sorted([f for f in os.listdir(self.png_dir) if f.endswith(".png")])
        if not png_files:
            raise FileNotFoundError(f"No PNG frames found in {self.png_dir}.")

        frames = []
        for png in png_files:
            img_path = os.path.join(self.png_dir, png)
            frames.append(imageio.imread(img_path))

        out_gif_path = os.path.join(self.output_dir, self.gif_name)
        imageio.mimsave(out_gif_path, frames) #, duration=self.milliseconds)
        print(f"GIF created at {out_gif_path}")
        
    def run_snapshot_pipeline(self):
        """
        Top-level method:
          1) Build the MRIcroGL script that loops over each NIfTI.
          2) Run that script in MRIcroGL (batch).
          3) Assemble all PNGs into a GIF.
        """
        self._build_mricrogl_script()
        self._run_mricrogl_script()
        self._assemble_gif()

In [None]:
# from calvin_utils.render_utils.mricrogl_utils import MRIcroGLSnapshotter

snapshot = MRIcroGLSnapshotter(nifti_folder='/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/ccm_memory/results/notebook_07/train_memory_test_ad/conn/optimized_on_dataset_dict/iteration_maps',
        mricrogl_exec='/Applications/MRIcroGL.app/Contents/MacOS/MRIcroGL',
        output_dir='/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/ccm_memory/results/notebook_07/train_memory_test_ad/conn/optimized_on_dataset_dict/gif',
        milliseconds = 8)
snapshot.run_snapshot_pipeline()
# snapshot._assemble_gif()

In [1]:
import pandas as pd


df = pd.read_csv('/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/resources/datasets/BIDS_AD_DBS_FORNIX/study_metadata/Fnm_ADvanceI_ADASComponents_ForCalvin.csv')

In [3]:
df.columns

Index(['PT', 'interval', 'ADAS11Score', 'ADAS13Score',
       'ADASCancellationCorrectScore', 'ADASCancellationIncorrectScore',
       'ADASCancellationReminders', 'ADASCancellationTotalScore',
       'ADASCircleCorrect', 'ADASCommandCorrect', 'ADASCommandIncorrect',
       'ADASCommandTotalScore', 'ADASConstPraxisTotalScore', 'ADASCubeCorrect',
       'ADASDelayedRecallTotalScore', 'ADASDiamondCorrect',
       'ADASIdeationPraxisTotalScore', 'ADASInstructionsScore',
       'ADASLanguageCompScore', 'ADASLanguageScore', 'ADASNamingCorrectScore',
       'ADASNamingTaskTotalScore', 'ADASNoData', 'ADASOrinetationTotalScore',
       'ADASRecognitionTable1Score', 'ADASRectanglesCorrect',
       'ADASWordFindingScore', 'ADASWordRecallTotalScore',
       'ADASWordRecallTrial1Score', 'ADASWordRecallTrial2Score',
       'ADASWordRecallTrial3Score', 'dADAS', 'db'],
      dtype='object')

In [6]:
len(df.PT.unique())

83