In [17]:
out_dir = '/Volumes/Expansion/datasets/Manitoba_PET/PET_R_Maps/autonocog_memory/similarity_to_lesion_memory_net'

# 01 - Get Datasets


**Data will be Imported from a CSV which is expected to be in this format**
- sub column contents MUST match the names of the neuroimaging files above. 
    - ID column 
```
+-----+----------------------------+--------------+--------------+--------------+
| sub | Nifti_File_Path            | Indep. Var.  | Covariate_N  | Dataset      |
+-----+----------------------------+--------------+--------------+--------------+
| 1   | /path/to/file1.nii.gz      | 0.5          | 1.2          | 1            |
| 2   | /path/to/file2.nii.gz      | 0.7          | 1.4          | 1            |
| 3   | /path/to/file3.nii.gz      | 0.6          | 1.5          | 2            |
| 4   | /path/to/file4.nii.gz      | 0.9          | 1.1          | 3            |
| ... | ...                        | ...          | ...          | ...          |
+-----+----------------------------+--------------+--------------+--------------+
```

In [18]:
input_csv_path = 'path/to/your/csv_file.csv'
sheet = None # Set to None if CSV

In [None]:
from calvin_utils.permutation_analysis_utils.statsmodels_palm import CalvinStatsmodelsPalm
# Instantiate the PalmPrepararation class
cal_palm = CalvinStatsmodelsPalm(input_csv_path=input_csv_path, output_dir=out_dir, sheet=sheet)
# Call the process_nifti_paths method
data_df = cal_palm.read_and_display_data()

**Handle NANs**
- Set drop_nans=True is you would like to remove NaNs from data
- Provide a column name or a list of column names to remove NaNs from

In [22]:
drop_list = ['variable_of_interest']

In [None]:
data_df = cal_palm.drop_nans_from_columns(columns_to_drop_from=drop_list)
display(data_df)

**Drop Row Based on Value of Column**

Define the column, condition, and value for dropping rows
- column = 'your_column_name'
- condition = 'above'  # Options: 'equal', 'above', 'below'

In [26]:
column = ''
condition = ''
value = ''

In [None]:
data_df, other_df = cal_palm.drop_rows_based_on_value(column, condition, value)
data_df

**Standardize Data**
- Enter Columns you Don't want to standardize into a list

In [28]:
# Remove anything you don't want to standardize
cols_not_to_standardize = ['DX', 'SUBID'] # ['Z_Scored_Percent_Cognitive_Improvement_By_Origin_Group', 'Z_Scored_Subiculum_T_By_Origin_Group_'] #['Age']
group_col = '' #Set to none if there are no specific groups

In [None]:
data_df = cal_palm.standardize_columns(cols_not_to_standardize, group_col)
data_df

# 02 - Import the Data into DataFrames

In [31]:
dataset_col = 'Ds'
nifti_path_col = 'path'
indep_var_col = 'variable_of_interest'
covariate_cols = ['Age', 'sex']

In [None]:
import os
import shutil
import numpy as np
import pandas as pd
from tempfile import mkdtemp
from calvin_utils.nifti_utils.matrix_utilities import mask_dataframe
from calvin_utils.statistical_utils.regression_utils import RegressOutCovariates
import tensorly as tl
from tensorly.regression import CPRegressor
import statsmodels.api as sm

class DatasetNiftiImporter(GiiNiiFileImport):
    def __init__(self, df, dataset_col, nifti_col, indep_var_col, covariate_cols, out_dir, mask_path, regression_method='tensor'):
        self.df = df
        self.dataset_col = dataset_col
        self.nifti_col = nifti_col
        self.indep_var_col = indep_var_col
        self.covariate_cols = covariate_cols
        self.out_dir = out_dir
        self.mask_path = mask_path
        self.tmp_dir = mkdtemp()
        self.data_dict = {}
        self.regression_method = regression_method
        self._prepare_data_dict()

    def _prepare_data_dict(self):
        for dataset in self.df[self.dataset_col].unique():
            print("Importing dataset: ", dataset)
            dataset_df = self.df[self.df[self.dataset_col] == dataset]
            dataset_df.columns = self.df.columns

            dataset_dir = os.path.join(self.tmp_dir, dataset)
            os.makedirs(dataset_dir, exist_ok=True)

            self.data_dict[dataset] = {
                'niftis': pd.DataFrame(),
                'indep_var': pd.DataFrame(),
                'covariates': pd.DataFrame()
            }

            nifti_paths = dataset_df[self.nifti_col].tolist()
            nifti_importer = GiiNiiFileImport(import_path=None, file_column=None, file_pattern=None)
            self.data_dict[dataset]['niftis'] = nifti_importer.import_matrices(nifti_paths)

            self.data_dict[dataset]['indep_var'] = dataset_df.loc[:, [self.indep_var_col]]
            self.data_dict[dataset]['covariates'] = dataset_df.loc[:, self.covariate_cols]

            self._process_dataset(dataset, dataset_dir)

    def _process_dataset(self, dataset, dataset_dir):
        niftis = self.data_dict[dataset]['niftis']
        indep_var = self.data_dict[dataset]['indep_var']
        covariates = self.data_dict[dataset]['covariates']

        # Mask the NIFTI files
        masked_niftis = mask_dataframe(niftis, mask_path=self.mask_path)

        if self.regression_method == 'tensor':
            # Perform tensor regression
            nifti_residuals = self.tensor_regression(masked_niftis.values, covariates.values)
            indep_residuals = self.tensor_regression(indep_var.values, covariates.values)
        elif self.regression_method == 'ols':
            # Perform OLS regression
            nifti_residuals = self.ols_regression(masked_niftis.values, covariates.values)
            indep_residuals = self.ols_regression(indep_var.values, covariates.values)
        else:
            raise ValueError("Invalid regression method. Choose 'tensor' or 'ols'.")

        # Save the residuals, independent variables, and covariates
        np.save(os.path.join(dataset_dir, 'niftis.npy'), nifti_residuals)
        np.save(os.path.join(dataset_dir, 'indep_var.npy'), indep_residuals)
        np.save(os.path.join(dataset_dir, 'covariates.npy'), covariates.values)

    def tensor_regression(self, endog, exog):
        # Ensure endog and exog are tensors
        endog = tl.tensor(endog)
        exog = tl.tensor(exog)

        # Perform tensor regression using CP decomposition
        model = CPRegressor(weight_ranks=1)
        model.fit(exog, endog)
        residuals = endog - model.predict(exog)
        
        return residuals

    def ols_regression(self, endog, exog):
        residuals = np.zeros_like(endog)
        for i in range(endog.shape[1]):
            model = sm.OLS(endog[:, i], exog).fit()
            residuals[:, i] = model.resid
        return residuals

    def cleanup(self):
        shutil.rmtree(self.tmp_dir)

# Example usage:
df = pd.read_csv('path/to/csv_file.csv')
dataset_col = 'Dataset'
nifti_col = 'Nifti_File_Path'
indep_var_col = 'Indep. Var.'
covariate_cols = ['Covariate_1', 'Covariate_2', 'Covariate_3']
out_dir = 'path/to/output_directory'
mask_path = 'path/to/generic_mask.nii'

# Using tensor regression
importer_tensor = DatasetNiftiImporter(df, dataset_col, nifti_col, indep_var_col, covariate_cols, out_dir, mask_path, regression_method='tensor')
importer_tensor.cleanup()

# Using OLS regression
importer_ols = DatasetNiftiImporter(df, dataset_col, nifti_col, indep_var_col, covariate_cols, out_dir, mask_path, regression_method='ols')
importer_ols.cleanup()

In [52]:
from calvin_utils.file_utils.import_functions import DatasetNiftiImporter
data_importer = DatasetNiftiImporter(df=data_df, dataset_col=dataset_col, nifti_col=nifti_path_col, indep_var_col=indep_var_col, covariate_cols=covariate_cols)
data_dict = data_importer.data_dict

# 03 - Contorl for Covariates

Regress out a Covariate
- This will control for a covariate by regressing it out of BOTH the independent variable and the niftis. 
- This will leave the residuals as the resulting data
- Make sure to standardize the niftis (if needed) and definitiely the indep var and covariates before running this. 

In [None]:
from calvin_utils.statistical_utils.regression_utils import RegressOutCovariates
for dataset in data_dict.keys():
    data_dict[dataset]['niftis'] = RegressOutCovariates.regress_out_covariates_using_endog_exog(data_dict[dataset]['niftis'], data_dict[dataset]['covariates'])
    data_dict[dataset]['indep_var'] = RegressOutCovariates.regress_out_covariates_using_endog_exog(data_dict[dataset]['indep_var'], data_dict[dataset]['covariates'])

# 04 - Begin Analysis