In [1]:
out_dir = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/ccm_memory/results/notebook_02'

# 01 - Get Datasets


**Data will be Imported from a CSV which is expected to be in this format**
- sub column contents MUST match the names of the neuroimaging files above. 
    - ID column 
```
+-----+----------------------------+--------------+--------------+--------------+
| sub | Nifti_File_Path            | Indep. Var.  | Covariate_N  | Dataset      |
+-----+----------------------------+--------------+--------------+--------------+
| 1   | /path/to/file1.nii.gz      | 0.5          | 1.2          | 1            |
| 2   | /path/to/file2.nii.gz      | 0.7          | 1.4          | 1            |
| 3   | /path/to/file3.nii.gz      | 0.6          | 1.5          | 2            |
| 4   | /path/to/file4.nii.gz      | 0.9          | 1.1          | 3            |
| ... | ...                        | ...          | ...          | ...          |
+-----+----------------------------+--------------+--------------+--------------+
```

In [2]:
input_csv_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/ccm_memory/metadata/master_list_v3.csv'
sheet = None # Set to None if CSV

In [None]:
from calvin_utils.permutation_analysis_utils.statsmodels_palm import CalvinStatsmodelsPalm
# Instantiate the PalmPrepararation class
cal_palm = CalvinStatsmodelsPalm(input_csv_path=input_csv_path, output_dir=out_dir, sheet=sheet)
# Call the process_nifti_paths method
data_df = cal_palm.read_and_display_data()
display(data_df)

**Handle NANs**
- Set drop_nans=True is you would like to remove NaNs from data
- Provide a column name or a list of column names to remove NaNs from

In [11]:
drop_list = ['Nifti_File_Path', 'diagnosis']

In [None]:
data_df = cal_palm.drop_nans_from_columns(columns_to_drop_from=drop_list)
display(data_df)

**Drop Row Based on Value of Column**

Define the column, condition, and value for dropping rows
- column = 'your_column_name'
- condition = 'above'  # Options: 'equal', 'above', 'below'

In [17]:
column = 'Dataset'
condition = 'equal'
value = 'adni_memory'

In [None]:
data_df, other_df = cal_palm.drop_rows_based_on_value(column, condition, value)
data_df

**Standardize Data**
- Enter Columns you Don't want to standardize into a list

In [19]:
# Remove anything you don't want to standardize
cols_not_to_standardize = ['Dataset', 'Subject'] # ['Z_Scored_Percent_Cognitive_Improvement_By_Origin_Group', 'Z_Scored_Subiculum_T_By_Origin_Group_'] #['Age']
group_col = 'Dataset' #Set to none if there are no specific groups

In [None]:
data_df = cal_palm.standardize_columns(cols_not_to_standardize, group_col)
data_df

# 02 - Import the Data into DataFrames, Control them, and Save them

In [20]:
dataset_col = 'Dataset'
nifti_path_col = 'Nifti_File_Path'
indep_var_col = 'percent_memory_improvement'
covariate_cols = ['diagnosis']

In [None]:
from calvin_utils.file_utils.import_functions import DatasetNiftiImporter
data_importer = DatasetNiftiImporter(df=data_df, dataset_col=dataset_col, nifti_col=nifti_path_col, indep_var_col=indep_var_col, covariate_cols=covariate_cols, out_dir=out_dir, regression_method='ols')

# 03 - Begin Analysis

In [8]:
import numpy as np
from scipy.stats import spearmanr
import json
from tqdm import tqdm

class DataLoader:
    def __init__(self, data_dict_path):
        with open(data_dict_path, 'r') as f:
            self.dataset_paths_dict = json.load(f)
    
    def load_dataset(self, dataset_name):
        paths = self.dataset_paths_dict[dataset_name]
        data = {
            'niftis': np.load(paths['niftis']),
            'indep_var': np.load(paths['indep_var']),
            'covariates': np.load(paths['covariates'])
        }
        return data
    
    @staticmethod
    def load_dataset_static(data_paths_dict, dataset_name):
        paths = data_paths_dict[dataset_name]

        data_dict = {
            'niftis': np.load(paths['niftis']),
            'indep_var': np.load(paths['indep_var']),
            'covariates': np.load(paths['covariates'])
        }
        return data_dict

class CorrelationCalculator:
    def __init__(self, method='pearson', verbose=False):
        self.method = method
        self.verbose = verbose

    def _calculate_pearson_r_map(self, niftis, indep_var):
        X = indep_var
        Y = niftis
        X_BAR = X.mean(axis=0)[:, np.newaxis]
        Y_BAR = Y.mean(axis=0)[np.newaxis, :]
        X_C = X - X_BAR
        Y_C = Y - Y_BAR
        NUMERATOR = np.dot(X_C.T, Y_C)
        SST_X = np.sum((X - X_BAR)**2, axis=0)
        SST_Y = np.sum((Y - Y_BAR)**2, axis=0)
        DENOMINATOR = np.sqrt(SST_X * SST_Y)
        r = NUMERATOR / DENOMINATOR
        
        if self.verbose:
            print(f"Shape of X: {X.shape}")
            print(f"Shape of Y: {Y.shape}")
            print(f"Shape of X_BAR: {X_BAR.shape}")
            print(f"Shape of Y_BAR: {Y_BAR.shape}")
            print(f"Shape of X_C: {X_C.shape}")
            print(f"Shape of Y_C: {Y_C.shape}")
            print(f"Shape of NUMERATOR: {NUMERATOR.shape}")
            print(f"Shape of DENOMINATOR: {DENOMINATOR.shape}")
        return r

    def _calculate_spearman_r_map(self, niftis, indep_var):
        '''Not easily broadcast, sorry!'''
        n_voxels = niftis.shape[1]
        rho = np.zeros(n_voxels)
        for i in tqdm(range(n_voxels), desc='Running Spearman Rho'):
            rho[i], _ = spearmanr(indep_var, niftis[:, i])
            
        if self.verbose:
            print(f"Shape of niftis: {niftis.shape}")
            print(f"Shape of rho: {rho.shape}")
        return rho
    
    def _process_data(self, data):
        if self.method == 'pearson':
            self.correlation_map = self._calculate_pearson_r_map(data['niftis'], data['indep_var'])
        elif self.method == 'spearman':
            self.correlation_map = self._calculate_spearman_r_map(data['niftis'], data['indep_var'])
    
    def process_all_datasets(self, data_dict):
        correlation_maps = {}
        for dataset_name in data_dict.keys():
            data = DataLoader.load_dataset_static(data_dict, dataset_name)
            self._process_data(data)
            correlation_maps[dataset_name] = self.correlation_map
        return correlation_maps


Enter the path to the dictionary generated by Step 2

In [9]:
dict_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/ccm_memory/results/notebook_02/tmp/dataset_dict.json'

What correlation method would you like to use?
- Pearson is very fast, but depends on linearity and is sensitive to outliers
- Spearman is slower, but much more robust

In [12]:
correlation = 'pearson'

In [13]:
data_loader = DataLoader(dict_path)
correlation_calculator = CorrelationCalculator(method=correlation, verbose=False)
corr_map_dict = correlation_calculator.process_all_datasets(data_loader.dataset_paths_dict)

  r = NUMERATOR / DENOMINATOR


In [30]:
for k in corr_map_dict.keys():
    print(k)

grafmann_memory
manitoba_memory
kahana_memory
queensland_memory
sante_memory
fornix_memory


In [51]:
from nilearn import plotting, image
import nibabel as nib
import os
import numpy as np

class ConvergentMapGenerator:
    def __init__(self, corr_map_dict, data_loader, mask_path=None, out_dir=None):
        self.corr_map_dict = corr_map_dict
        self.data_loader = data_loader
        self.mask_path = mask_path
        self.out_dir = out_dir
        self._handle_nans()
        
    def _handle_nans(self):
        for key in self.corr_map_dict.keys():
            if np.isnan(self.corr_map_dict[key]).all():
                print(f"Warning: The correlation map for {key} contains only NaNs and will be excluded from the analysis.")
                del self.corr_map_dict[key]
            elif np.isnan(self.corr_map_dict[key]).any():
                self.corr_map_dict[key] = np.nan_to_num(self.corr_map_dict[key], nan=0, posinf=1, neginf=-1)
            else:
                continue
            
    def generate_weighted_average_r_map(self):
        weights = []
        for dataset_name in self.corr_map_dict.keys():
            data = self.data_loader.load_dataset(dataset_name)
            weights.append(data['niftis'].shape[0])
        weights = np.array(weights)
        r_maps = np.array(list(self.corr_map_dict.values()))
        return np.average(r_maps, axis=0, weights=weights)

    def generate_agreement_map(self):
        r_maps = np.array(list(self.corr_map_dict.values()))
        signs = np.sign(r_maps)
        agreement = np.all(signs == signs[0], axis=0)
        return agreement.astype(int)
    
    def _unmask_array(self, data_array, threshold=0):
        if self.mask_path is None:
            from nimlab import datasets as nimds
            mask = nimds.get_img("mni_icbm152")
        else:
            mask = nib.load(self.mask_path)

        mask_data = mask.get_fdata()
        mask_indices = mask_data.flatten() > threshold
        
        unmasked_array = np.zeros(mask_indices.shape)
        unmasked_array[mask_indices] = data_array.flatten()
        return unmasked_array.reshape(mask_data.shape), mask.affine

    def _save_map(self, map_data, file_name):
        unmasked_map, mask_affine = self._unmask_array(map_data)
        img = nib.Nifti1Image(unmasked_map, affine=mask_affine)
        if self.out_dir is not None:
            file_path = os.path.join(out_dir, 'convergence_map', file_name)
            nib.save(img, file_path)
        return img

    def _visualize_map(self, img, title):
        plotting.view_img(img, title=title).open_in_browser()
        
    def generate_and_save_maps(self):
        # Generate weighted average r map
        weighted_avg_map = self.generate_weighted_average_r_map()
        weighted_avg_img = self._save_map(weighted_avg_map, 'weighted_average_r_map.nii.gz')
        self._visualize_map(weighted_avg_img, 'Weighted Average R Map')

        # Generate agreement map
        agreement_map = self.generate_agreement_map()
        agreement_img = self._save_map(agreement_map, 'agreement_map.nii.gz')
        self._visualize_map(agreement_img, 'Agreement Map')

In [52]:
convergent_map_generator = ConvergentMapGenerator(corr_map_dict, data_loader)
convergent_map_generator.generate_and_save_maps()

  a.partition(kth, axis=axis, kind=kind, order=order)
  a.partition(kth, axis=axis, kind=kind, order=order)


In [None]:

    
r:
    def generate_weighted_average_r_map(self, r_maps, weights):
        return np.average(r_maps, axis=0, weights=weights)

    def generate_agreement_map(self, r_maps):
        signs = np.sign(r_maps)
        agreement = np.all(signs == signs[0], axis=0)
        return agreement.astype(int)
    
class MapSaver:
    def save_map(self, map_data, file_path):
        np.save(file_path, map_data)
        

import numpy as np
from scipy.spatial.distance import cosine
from scipy.stats import spearmanr, pearsonr

class LOOCVAnalyzer:
    def __init__(self, datasets, method='spearman'):
        """
        Initialize the LOOCVAnalyzer.

        Parameters:
        -----------
        datasets : list of dict
            List of datasets, where each dataset is a dictionary containing 'niftis' and 'indep_var'.
        method : str
            Correlation method to use ('spearman' or 'pearson').
        """
        self.datasets = datasets
        self.method = method

    def perform_loocv(self):
        """
        Perform Leave-One-Out Cross-Validation (LOOCV) analysis.

        Returns:
        --------
        list of tuple
            List of tuples containing the R-value and confidence intervals for each dataset.
        """
        results = []
        for i in range(len(self.datasets)):
            test_dataset = self.datasets[i]
            train_datasets = self.datasets[:i] + self.datasets[i+1:]
            convergent_map = self.generate_convergent_map(train_datasets)
            similarities = self.calculate_similarity(test_dataset['niftis'], convergent_map)
            r, ci_lower, ci_upper = self.correlate_similarity_with_outcomes(similarities, test_dataset['indep_var'])
            results.append((r, ci_lower, ci_upper))
        return results

    def generate_convergent_map(self, train_datasets):
        """
        Generate the convergent map using the training datasets.

        Parameters:
        -----------
        train_datasets : list of dict
            List of training datasets.

        Returns:
        --------
        np.array
            Convergent map.
        """
        # Placeholder for generating the convergent map
        # This should be implemented based on the chosen method (e.g., weighted average or agreement map)
        pass

    def calculate_similarity(self, patient_maps, convergent_map):
        """
        Calculate cosine similarity between patient maps and the convergent map.

        Parameters:
        -----------
        patient_maps : np.array
            Array of patient maps.
        convergent_map : np.array
            Convergent map.

        Returns:
        --------
        list of float
            List of cosine similarity values.
        """
        similarities = [1 - cosine(patient_map, convergent_map) for patient_map in patient_maps]
        return similarities

    def correlate_similarity_with_outcomes(self, similarities, indep_var):
        """
        Correlate similarity values with independent variables and calculate confidence intervals.

        Parameters:
        -----------
        similarities : list of float
            List of cosine similarity values.
        indep_var : np.array
            Array of independent variable values.

        Returns:
        --------
        tuple
            R-value, lower confidence interval, and upper confidence interval.
        """
        resampled_r = []
        for _ in range(1000):
            resampled_indices = np.random.choice(len(similarities), len(similarities), replace=True)
            resampled_similarities = np.array(similarities)[resampled_indices]
            resampled_indep_var = np.array(indep_var)[resampled_indices]
            if self.method == 'spearman':
                resampled_r.append(spearmanr(resampled_similarities, resampled_indep_var)[0])
            else:
                resampled_r.append(pearsonr(resampled_similarities, resampled_indep_var)[0])
        ci_lower = np.percentile(resampled_r, 2.5)
        ci_upper = np.percentile(resampled_r, 97.5)
        r = np.mean(resampled_r)
        return r, ci_lower, ci_upper