In [None]:
from google.colab import drive
drive.mount('/content/drive')

import numpy as np
import os
import pandas as pd
from pathlib import Path
import scipy.io
import h5py
import sklearn
import copy
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import scipy.stats as stats
%cd /content/drive/MyDrive/TESI/

# **MIND Values Harmonization**

In [None]:
matrices_path = '/content/drive/MyDrive/TESI/MINDMatrices/NeuromorphometricsNEW'

In [None]:
#build covariates dataframe

strat_covars = pd.read_csv("MatchedDataNew.csv", sep=';')

sites = ['AOUV', 'FSL_Rome', 'JUH', 'MI_POLI_3T_3', 'OSR', 'PITTS', 'UBC']

site_to_number = {site: index+1 for index, site in enumerate(sites)}

strat_covars['Site'] = strat_covars['Site'].map(site_to_number)

strat_covars = strat_covars.rename(columns={'Site': 'batch'})

In [None]:
#load similarity matrices and flatten them

flattened_matrices = []
mind_matrices = []
roi_values = []

for site in sites:
    site_path = os.path.join(matrices_path, site)
    if os.path.exists(site_path):
        mat_files = sorted(os.listdir(site_path))
        print(f"Found {len(mat_files)} files in {site} folder.")

        for file in mat_files:
            file_path = os.path.join(site_path, file)

            mat_data = scipy.io.loadmat(file_path)

            mind_matrix = mat_data['mind_matrix']
            mind_matrices.append(mind_matrix)

            sum_roi_values = mat_data['sum_roi_values']
            roi_values.append(sum_roi_values)

            upper_triangle_indices = np.triu_indices(mind_matrix.shape[0], k=1)
            upper_triangle_flattened = mind_matrix[upper_triangle_indices]

            flattened_matrices.append(upper_triangle_flattened)


In [None]:
roi_values = [arr.flatten() for arr in roi_values]
roi_data = pd.DataFrame(roi_values)

In [None]:
data_raw = pd.DataFrame(flattened_matrices)

In [None]:
# Load TIV values
tiv_data = pd.read_csv("StratiBip_covar.csv", sep=',')

for i in range(1, 8):
  sorted_batch = tiv_data[tiv_data["Batch"] == i].sort_values(by="SubjID")
  tiv_data.loc[tiv_data["Batch"] == i] = sorted_batch.values

tiv_values = tiv_data.TIV

In [None]:
#eliminate diagnosis = 2 (MDD)
rows_to_delete = strat_covars[strat_covars['Dx'] == 2].index

data_raw = data_raw.drop(rows_to_delete)
strat_covars = strat_covars.drop(rows_to_delete)
roi_data = roi_data.drop(rows_to_delete)
tiv_values = tiv_values.drop(rows_to_delete)

data_raw = data_raw.reset_index(drop=True)
strat_covars = strat_covars.reset_index(drop=True)
roi_data = roi_data.reset_index(drop=True)
tiv_values = tiv_values.reset_index(drop=True)

strat_covars.to_csv("MatchedData01.csv", index=False)

print(data_raw.shape)
print(strat_covars.shape)
print(roi_data.shape)
tiv_values.shape

In [None]:
data_raw.to_csv('data_raw_nmm_new.csv', index=False)

In [None]:
#harmonization
import sys
sys.path.insert(0,'/content/drive/MyDrive/TESI/')

import Confounder_Correction_Classes
from Confounder_Correction_Classes import ComBatHarmonization

In [None]:
volumes_columns = np.arange(0, data_raw.shape[1])

feat_detail={'volumes':           {'id': volumes_columns,
                                        'categorical': ['Gender'],
                                        'continuous':['Age']}}

combat_function=ComBatHarmonization(cv_method=None, ref_batch=None,
                                           regression_fit=0,
                                           feat_detail=feat_detail,
                                           feat_of_no_interest=None)

In [None]:
data_dict={'data': data_raw, 'covariates': strat_covars}

data_harm=combat_function.fit_transform(data_dict)

In [None]:
data_harm = pd.DataFrame(data_harm)
data_harm.to_csv('data_harm_nmm_new.csv', index=False)

# **Roi Volumes Harmonization**

In [None]:
# Normalize each ROI volume by the corresponding TIV
roi_data_norm = roi_data.div(tiv_values, axis=0)

In [None]:
columns = np.arange(0, roi_data_norm.shape[1])

feat_detail={'volumes':           {'id': columns,
                                        'categorical': ['Gender'],
                                        'continuous':['Age']}}

combat_function=ComBatHarmonization(cv_method=None, ref_batch=None,
                                           regression_fit=0,
                                           feat_detail=feat_detail,
                                           feat_of_no_interest=None)

In [None]:
data_dict={'data': roi_data_norm, 'covariates': strat_covars}

roi_data_harm=combat_function.fit_transform(data_dict)

In [None]:
roi_data_harm = pd.DataFrame(roi_data_harm)
roi_data_harm.to_csv('roi_data_harm_nmm_new.csv', index=False)