In [None]:
## IMPORTANT !

# In the first order need to set the number of CPU 
# for calculation before launching (depends on computer's number of cores)
n_jobs= 50
#libraries
import pandas as pd
import numpy as np
import os
import sys
import shutil
import glob
import joblib
import warnings
from datetime import date, datetime

from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import LeavePGroupsOut
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import pearsonr
import scipy.stats as st

from nilearn import image as nli
from nilearn import plotting

#from mne.viz import plot_connectivity_circle
from mne_connectivity.viz import plot_connectivity_circle

import copy

#%matplotlib inline
#import matplotlib_inline
#matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

##### Path variables

In [None]:
path_ya = '/media/hcs-psy-narun/Alina/HCP_results/sep_2023_rerun_v5/hcp_ya/results/totalIQ/'
path_a = '/media/hcs-psy-narun/Alina/HCP_results/aug_rerun_v5/hcp_aging/results/totalIQ/'
path_d = '/media/hcs-psy-narun/Alina/HCP_results/aug_rerun_v5/dud_study/results/IQ45_adj/'
path_d_ch = '/media/hcs-psy-narun/Alina/HCP_results/aug_rerun_v5/dud_study/results/IQch_adj/'
path_d_res = '/media/hcs-psy-narun/Alina/HCP_results/aug_rerun_v5/dud_study/results/IQres_adj/'

In [None]:
path_ya_obs = '/media/hcs-psy-narun/Alina/ready_tables_new_enhanced_REST-acomcor/main_set/totalIQ/'
path_a_obs = '/media/data/HCPAging/data/New_MLTabs_OneTrain_latest_short/totalIQ/'
path_d_obs = '/media/data/Dunedin_Study_Data_Narun_P_Jan2022/New_MLTabs_OneTrain/main_set/IQ45_adj/'
path_d_ch_obs = '/media/data/Dunedin_Study_Data_Narun_P_Jan2022/New_MLTabs_OneTrain/main_set/IQch_adj/'
path_d_res_obs = '/media/data/Dunedin_Study_Data_Narun_P_Jan2022/New_MLTabs_OneTrain/main_set/IQres_adj/'

In [None]:
path_out = '/media/hcs-psy-narun/Alina/HCP_results/predicted_values_for_datasets_REST_acomcor/'

##### Load data

In [None]:
#load observed data
targ_obs = {}

for path, key in zip([path_ya_obs, path_a_obs, path_d_obs, path_d_ch_obs, path_d_res_obs],
                     ['YA', 'A', 'DUD', 'DUD_ch', 'DUD_res']):
    targ = {}
    for fold in sorted(os.listdir(path)):
        targ[fold] = pd.read_csv(glob.glob(path+fold+'/*_train1*')[0], index_col=0)
    
    targ_obs[key] = targ

In [None]:
#load predicted data
layer1 = {}
layer2 = {}

for path_obs, key, path in zip([path_ya_obs, path_a_obs, path_d_obs, path_d_ch_obs, path_d_res_obs],
                               ['YA', 'A', 'DUD', 'DUD_ch', 'DUD_res'],
                               [path_ya, path_a, path_d, path_d_ch, path_d_res]):
    l1={}
    l2={}
    for fold in sorted(os.listdir(path_obs)):
        l1[fold] = {}
        l2[fold] = {}
        for file in sorted(glob.glob(path+'layer1/*/*/'+fold+'/train1_results.csv')):
            l1[fold][file.split('/')[-4]+'_'+file.split('/')[-3]] = pd.read_csv(file, index_col=0, squeeze=True)
        for file in sorted(glob.glob(path+'layer2/*/*/*/'+fold+'/train1_results.csv')):
            l2[fold][file.split('/')[-5]+'_'+file.split('/')[-4]+'_'+file.split('/')[-3]] = pd.read_csv(file, index_col=0, squeeze=True)
    layer1[key] = l1
    layer2[key] = l2

In [None]:
#concat the layers into one tab for each mod

val_pred = {}

for path_obs, KEY in zip([path_ya_obs, path_a_obs, path_d_obs, path_d_ch_obs, path_d_res_obs],
                         ['YA', 'A', 'DUD', 'DUD_ch', 'DUD_res']):
    
    tab_val = pd.DataFrame()
    
    for fold in sorted(os.listdir(path_obs)):
        
        a = pd.DataFrame(layer1[KEY][fold])
        b = pd.DataFrame(layer2[KEY][fold])
        ab = pd.concat([a, b], axis=1)
        ab['y_obs'] = targ_obs[KEY][fold].values.ravel()
        ab['fold'] = np.full(targ_obs[KEY][fold].values.ravel().shape, fold)
        ab.index = targ_obs[KEY][fold].index
        
        tab_val = pd.concat([tab_val, ab], axis=0)
    
    val_pred[KEY] = tab_val

##### Calculate indexes

In [None]:
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore"

R2 = {}
MAE = {}
COR = {}
Results = {}

for path_obs, KEY, path in zip([path_ya_obs, path_a_obs, path_d_obs, path_d_ch_obs, path_d_res_obs],
                               ['YA', 'A', 'DUD', 'DUD_ch', 'DUD_res'],
                               [path_ya, path_a, path_d, path_d_ch, path_d_res]):


    #calculate indexes
    r2 = {}
    mae = {}
    corr = {}

    for fold in sorted(os.listdir(path_obs)):
        #print(fold)
        r2[fold] = {}
        mae[fold] = {}
        corr[fold] = {}
        for key in layer1[KEY][fold].keys():
            #print(key)
            r2[fold][key] = r2_score(targ_obs[KEY][fold], layer1[KEY][fold][key])
            mae[fold][key] = mean_absolute_error(targ_obs[KEY][fold], layer1[KEY][fold][key])
            corr[fold][key] = pearsonr(targ_obs[KEY][fold].values.ravel(), layer1[KEY][fold][key].values.ravel())[0]    
        for key in layer2[KEY][fold].keys():
            #print(key)
            r2[fold][key] = r2_score(targ_obs[KEY][fold], layer2[KEY][fold][key])
            mae[fold][key] = mean_absolute_error(targ_obs[KEY][fold], layer2[KEY][fold][key])
            corr[fold][key] = pearsonr(targ_obs[KEY][fold].values.ravel(), layer2[KEY][fold][key].values.ravel())[0]       
    r2 = pd.DataFrame(r2)        
    mae = pd.DataFrame(mae)
    corr = pd.DataFrame(corr)

    results = pd.DataFrame({'r2':r2.T.mean(),
                            'mae':mae.T.mean(),
                            'corr':corr.T.mean()})
    
    R2[KEY] = r2
    MAE[KEY] = mae
    COR[KEY] = corr
    Results[KEY] = results
    

##### Save

In [None]:


for KEY in ['YA', 'A', 'DUD', 'DUD_ch', 'DUD_res']:
    
    R2[KEY].to_csv(path_out+'R2_'+KEY+'.csv')
    MAE[KEY].to_csv(path_out+'MAE_'+KEY+'.csv')
    COR[KEY].to_csv(path_out+'COR_'+KEY+'.csv')
    Results[KEY].to_csv(path_out+'Results_mean_'+KEY+'.csv')
    val_pred[KEY].to_csv(path_out+'pred_vals_'+KEY+'.csv')