# Stacked Machine Learning

In [1]:
## IMPORTANT !

# In the first order need to set the number of CPU 
# for calculation before launching (depends on computer's number of cores)
n_jobs= 40

### Load libraries

In [2]:
#libraries
import pandas as pd
import numpy as np
import os
import sys
import shutil
import glob
import joblib
import warnings
from datetime import date, datetime

from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import LeavePGroupsOut
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import pearsonr
import scipy.stats as st

from nilearn import image as nli
from nilearn import plotting

from mne.viz import plot_connectivity_circle

### Load functions

In [3]:
def control_features(table_in, control, index): 
    #table_in should be a table of features, where rows - subjects, columns - features
    
    if len(table_in.values.shape) == 1: #for pd.Series # for target
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index
        
        #loop
        dct_table = {}
        dct_lin_models ={}
        dct_std_y_models ={}
        
        col='0'
        
        y = table_in #target, brain ROI
        X = control  #features, like age, sex and/or movements

        #Standartize target
        std_model_y = StandardScaler()
        std_model_y.fit(y.values.reshape(-1, 1))
        y = std_model_y.transform(y.values.reshape(-1, 1))
        
        #reshaping data
        if len(X.values.shape) == 1:
            X = X.values.reshape(-1, 1)
        else:
            X = X.values
        y = y.reshape(-1, 1).ravel()
        
        #Standartize X
        std_model = StandardScaler()
        std_model.fit(X)
        X = std_model.transform(X)

        #Fit to the training set
        model = LinearRegression()
        model.fit(X, y)
        y_pred = model.predict(X)

        y_res = y - y_pred

        dct_table[col] = y_res
        dct_lin_models[col] = model
        dct_std_y_models[col] = std_model_y

        df_table = pd.DataFrame(dct_table, index = ind)

        
    else:
            
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index

        #loop
        dct_table = {}
        dct_lin_models ={}
        dct_std_y_models ={}
        col_names = table_in.columns

        for col in col_names:
            y = table_in[col] #target, brain ROI
            X = control  #features, like age, sex and/or movements
            
            #Standartize target
            std_model_y = StandardScaler()
            std_model_y.fit(y.values.reshape(-1, 1))
            y = std_model_y.transform(y.values.reshape(-1, 1)) 
            
            #reshaping data
            if len(X.values.shape) == 1:
                X = X.values.reshape(-1, 1)
            else:
                X = X.values
            y = y.reshape(-1, 1).ravel()
            
            #Standartize X
            std_model = StandardScaler()
            std_model.fit(X)
            X = std_model.transform(X)

            #Fit to the training set
            model = LinearRegression()
            model.fit(X, y)
            y_pred = model.predict(X)

            y_res = y - y_pred

            dct_table[col] = y_res
            dct_lin_models[col] = model
            dct_std_y_models[col] = std_model_y

        df_table = pd.DataFrame(dct_table, index = ind)
    
    return df_table, dct_std_y_models, std_model, dct_lin_models

In [4]:
def re_control_features(table_in, control, index, dct_std_y_models, std_model, dct_lin_models):
    
    if len(table_in.values.shape) == 1: #for pd.Series # for target
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index
        
        #loop
        dct_table = {}
        
        col='0'
        
        y = table_in #target, brain ROI
        X = control  #features, like age, sex and/or movements
        
        #standartize y
        y = dct_std_y_models[col].transform(y.values.reshape(-1, 1))
        
        #reshaping data
        if len(X.values.shape) == 1:
            X = X.values.reshape(-1, 1)
        else:
            X = X.values
        y = y.reshape(-1, 1).ravel()

        #Standartize X with previous std model
        X = std_model.transform(X)

        #Fit with previous LinReg model
        y_pred =  dct_lin_models[col].predict(X)

        y_res = y - y_pred

        dct_table[col] = y_res

        df_table = pd.DataFrame(dct_table, index = ind)
        
    else:
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index

        #loop
        dct_table = {}
        col_names = table_in.columns

        for col in col_names:
            y = table_in[col] #target, brain ROI
            X = control  #features, like age, sex and/or movements

            #standartize y
            y = dct_std_y_models[col].transform(y.values.reshape(-1, 1))
            
            #reshaping data
            if len(X.values.shape) == 1:
                X = X.values.reshape(-1, 1)
            else:
                X = X.values
            y = y.reshape(-1, 1).ravel()

            #Standartize X with previous std model
            X = std_model.transform(X)

            #Fit with previous LinReg model
            y_pred =  dct_lin_models[col].predict(X)

            y_res = y - y_pred

            dct_table[col] = y_res

        df_table = pd.DataFrame(dct_table, index = ind)
        
    return df_table

In [5]:
def elnet(X, y):

    #drop Nan in target and clean this subj from features
    y = y.dropna()
    X = X.loc[y.index,:]
    ind_y = np.array(y.index)
      
    y_real=y
    
    #reshaping data
    X = X.values
    y = y.values.reshape(-1, 1).ravel()
    
    #fill Nan in X
    #X = SimpleImputer(strategy='mean').fit_transform(X)
    
    #Standartize X
    #X = StandardScaler().fit_transform(X)
    
    # Setup the pipeline steps:
    steps = [('elasticnet', ElasticNet(random_state=42))]

    # Create the pipeline: pipeline 
    pipeline = Pipeline(steps)

    # Specify the hyperparameter space
    parameters = {'elasticnet__alpha': np.logspace(-1, 2, 70),
                  'elasticnet__l1_ratio':np.linspace(0,1,25)}

    # Create the GridSearchCV object:
    gm_cv = GridSearchCV(pipeline, parameters, cv=5, n_jobs=n_jobs)
    
    # Fit to the training set
    gm_cv.fit(X, y)
    
    #predict new y
    y_pred = gm_cv.predict(X)

    # Compute and print the metrics
    acc = gm_cv.best_score_
    bpar = gm_cv.best_params_
    model = gm_cv.best_estimator_
    mse = mean_squared_error(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred)
    corr, _ = pearsonr(np.array(y_real.values.reshape(-1, 1).ravel(), dtype=float), np.array(y_pred, dtype=float))
            
    return bpar['elasticnet__alpha'], bpar['elasticnet__l1_ratio'], acc, mse, corr, model, y_pred, mae

In [6]:
def reaply_ElNet(X, y, model):
    # param should be pd.Series with indexes from model
    
    #drop Nan in target and clean this subj from features
    y = y.dropna()
    X = X.reindex(index =y.index)
    ind_y = np.array(y.index)  # indexes as separate variable 
    
    y_real = y

    #reshaping data
    X = X.values
    y = y.values.reshape(-1, 1).ravel()
    
    #fill Nan in X
    #X = SimpleImputer(strategy='mean').fit_transform(X)
    
    #Standartize X
    #X = StandardScaler().fit_transform(X)
    
    #predict new y
    y_pred = model.predict(X)
    
    # Compute and print the metrics
    bacc = model.score(X, y)
    mse = mean_squared_error(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred) 
    corr, _ = pearsonr(np.array(y_real.values.reshape(-1, 1).ravel(), dtype=float), np.array(y_pred, dtype=float))
    
    return y_pred, y_real, ind_y, bacc, mse, corr, mae

### Path to the tables folder

In [7]:
path='/media/data/HCPAging/data/MLTablesMultCon/'

### Load tables

In [8]:
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

#demography
demo = pd.read_csv(path+'demography.csv', index_col=0)

#targets table
targ = pd.read_csv(path+'cognition.csv', index_col=0)

#features tables as dictionary
features = {
    'carit1':pd.read_csv(path+'carit-con1.csv', index_col=0),
    'carit3':pd.read_csv(path+'carit-con3.csv', index_col=0),
    'carit4':pd.read_csv(path+'carit-con4.csv', index_col=0),
    
    'face1':pd.read_csv(path+'FACENAME_group_table_3EV_con1.csv', index_col=0),
    'face2':pd.read_csv(path+'FACENAME_group_table_3EV_con2.csv', index_col=0),
    'face3':pd.read_csv(path+'FACENAME_group_table_3EV_con3.csv', index_col=0),
    'face4':pd.read_csv(path+'FACENAME_group_table_3EV_con4.csv', index_col=0),
    'face5':pd.read_csv(path+'FACENAME_group_table_3EV_con5.csv', index_col=0),
    'face6':pd.read_csv(path+'FACENAME_group_table_3EV_con6.csv', index_col=0),
    
    'vism':pd.read_csv(path+'vism.csv', index_col=0),
    
    'carit_FC':pd.read_csv(path+'CARIT_taskFC.csv', index_col=0),
    'face_FC':pd.read_csv(path+'FACENAME_task_FC_3EV.csv', index_col=0),
    'vism_FC':pd.read_csv(path+'VISMOTOR_taskFC.csv', index_col=0),

    'cort':pd.read_csv(path+'cort.csv', index_col=0),
    'surf':pd.read_csv(path+'surf.csv', index_col=0),
    'subc':pd.read_csv(path+'subc.csv', index_col=0),
    'VolBrain':pd.read_csv(path+'VolBrain.csv', index_col=0),
    
    'rest':pd.read_csv(path+'rest_hpass.csv', index_col=0) 

}

#table with movements (mean relative displacement Movement_RelativeRMS_mean.txt)
movements = pd.read_csv(path+'movements.csv', index_col=0)



In [9]:
#create tables withcontroling parameters
sex_coded = pd.Series(LabelEncoder().fit_transform(demo.loc[:,['sex']]), index=demo.index, name='sex')

control = pd.DataFrame({'sex':sex_coded, 'age':demo['interview_age']}) #

In [10]:
control

Unnamed: 0_level_0,sex,age
subject,Unnamed: 1_level_1,Unnamed: 2_level_1
HCA6002236,0,558
HCA6018857,0,436
HCA6030645,0,544
HCA6047359,1,640
HCA6051047,0,725
...,...,...
HCA9943504,0,742
HCA9947411,1,459
HCA9953406,0,567
HCA9956008,0,492


In [11]:
for key in features.keys():
    print(key, features[key].shape)

carit1 (504, 379)
carit3 (504, 379)
carit4 (504, 379)
face1 (504, 379)
face2 (504, 379)
face3 (504, 379)
face4 (504, 379)
face5 (504, 379)
face6 (504, 379)
vism (504, 379)
carit_FC (504, 71631)
face_FC (504, 71631)
vism_FC (504, 71631)
cort (504, 148)
surf (504, 148)
subc (504, 19)
VolBrain (504, 5)
rest (504, 71631)


##### Leave-P-group out based on 8-Fold CV

In [13]:
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore"
    
    
    
    
    

for COL in targ.columns:
    #COL = 'nih_fluidcogcomp_unadjusted'  #the script adapted to be launched on table of target variables. To launch in that way you need to uncomment for loop and comment this row with col variable
    y = targ[COL]

    print(y.name)

    ###make folder for outputs
    nmf=path+'output_5cv_AllAdj_STDstackFeatures_'+y.name
    os.mkdir(nmf)

    i=0

    group_kfold = GroupKFold(n_splits=5)
    for train_index, test_index in group_kfold.split(demo, groups=demo['family_user_def_id']): 

        print(' ')
        print('started to calculate the Fold #', i)
        print(datetime.now())
        print(' ')

        ###create directory for specific Fold
        os.mkdir(nmf+'/Fold_'+str(i)) 
        path_out = str(nmf+'/Fold_'+str(i))

        ###Global indices
        train_index = np.array(demo.iloc[train_index].index) #for training all models
        test_index = np.array(demo.iloc[test_index].index) #for final test

        ###Split global train_Gindex to local indices
        #index_train, index_test = train_test_split(train_index, test_size=0.4, random_state=42)

        ###Local indices
        #index_train = np.array(sorted(index_train)) #for training modalities models
        #index_test = np.array(sorted(index_test)) #for testing modalities and training RF


        ### 1st level ################################################################################

        #### Calculations of single ML models on index_train #################################### 

        print('start 1st level ', datetime.now())

        #control for age+gen and age+gen+mov with sorting to index_train

        #control y (target) for age+gen
        y_res1 , std_targ_y, std_targ_X, linreg_targ = control_features(y, control, train_index)
        #= y.reindex(index=train_index)#

        #control modalities
        features_res1 = {}
        std_feat_y_dct = {}
        std_feat_X_dct = {}
        linreg_feat_dct = {}
        for key in features.keys():
            print('controlling ', key, datetime.now())

            mod_res, std_f_y, std_f_X, linreg_f = control_features(features[key], control, y_res1.index)

            features_res1[key] = mod_res
            std_feat_y_dct[key] = std_f_y
            std_feat_X_dct[key] = std_f_X
            linreg_feat_dct[key] = linreg_f

        #save adjastment model
        os.mkdir(path_out+'/adjustment_models')
        #target models
        joblib.dump(std_targ_y, (path_out+'/adjustment_models'+'/target_std_model_y.sav'))
        joblib.dump(std_targ_X, (path_out+'/adjustment_models'+'/target_std_model_X.sav'))
        joblib.dump(linreg_targ, (path_out+'/adjustment_models'+'/target_linreg.sav'))
        #features model
        joblib.dump(std_feat_y_dct, (path_out+'/adjustment_models'+'/features_std_model_y.sav'))
        joblib.dump(std_feat_X_dct, (path_out+'/adjustment_models'+'/features_std_model_X.sav'))
        joblib.dump(linreg_feat_dct, (path_out+'/adjustment_models'+'/features_linreg.sav'))


        ###standartize before model and keep std models
        #features
        std_models_features = {}
        for key in features_res1.keys():
            print('standartize ', key, datetime.now())
            std_model = StandardScaler()
            std_model.fit(features_res1[key].values)
            features_res1[key] = pd.DataFrame(std_model.transform(features_res1[key].values),
                                              index=features_res1[key].index, 
                                              columns=features_res1[key].columns)
            std_models_features[key] = std_model
        #target
        std_model_target = StandardScaler()
        std_model_target.fit(y_res1.values.reshape(-1, 1))
        y_res1 = pd.DataFrame(std_model_target.transform(y_res1.values.reshape(-1, 1)),
                              index=y_res1.index)

        #save 
        os.mkdir(path_out+'/standartization_models')
        #target
        joblib.dump(std_model_target,  (path_out+'/standartization_models'+'/target_std_model.sav'))
        #features
        joblib.dump(std_models_features,  (path_out+'/standartization_models'+'/features_std_model.sav'))


        #save features table before PCA
        y_res1.to_csv(path_out+'/target_y_train1.csv')
        for key in features_res1.keys():
            features_res1[key].to_csv(path_out+'/'+str(key)+'_train1.csv')


        #PCA models to rest and task FC
        PCA_models = {}
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('reduction ', key, datetime.now())
            model_PCA =  PCA(n_components=75, random_state=11)
            model_PCA.fit(features_res1[key].values)
            features_res1[key] = pd.DataFrame(model_PCA.transform(features_res1[key].values), 
                                              index=features_res1[key].index)
            PCA_models[key] = model_PCA
        #save PCA models
        os.mkdir(path_out+'/PCA_models')
        joblib.dump(PCA_models,  (path_out+'/PCA_models'+'/PCA_model.sav'))


        #apply new std to PCA features again
        std_PC_feature_models = {}
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('standartize PC table ', key, datetime.now())
            std_PC_model = StandardScaler()
            std_PC_model.fit(features_res1[key].values)
            features_res1[key] = pd.DataFrame(std_PC_model.transform(features_res1[key].values),
                                              index=features_res1[key].index, 
                                              columns=features_res1[key].columns)
            std_PC_feature_models[key] = std_PC_model
            #save PCA tables
            features_res1[key].to_csv(path_out+'/'+key+'_PCA75_train1.csv')
        #save std PCA models
        os.mkdir(path_out+'/PCA_standardization_models')
        joblib.dump(std_PC_feature_models,  (path_out+'/PCA_standardization_models'+'/std_PCA_model.sav'))




        #Launch ElasticNet for all task(modalities) on index_train (1st level)

        dict_tasks={}
        dict_elnet_model={}
        dict_ypred1={}

        for key in list(features_res1.keys()):

            print('start ', str(key), datetime.now())   #print start time of calculations

            bpar1, bpar2, acc, mse, corr, model, y_pred1, mae = elnet(features_res1[key], y_res1) #ML
            dict_tasks[key] = acc, mse, mae, corr, bpar1, bpar2 
            dict_elnet_model[key] = model
            dict_ypred1[key] = y_pred1
        df_tasks = pd.DataFrame(dict_tasks, index=['best score r2', 'mse', 'mae','corr', 'best alpha', 'best l1_ratio'])
        df_y_pred1 = pd.DataFrame(dict_ypred1, index=y_res1.index)


        ###Save outputs from this step (models and all mod. perf.)

        #models
        for key in dict_elnet_model.keys():
            joblib.dump(dict_elnet_model[key], (path_out+'/'+str(key)+'_elnet_model.sav'))

        #model performance
        df_tasks.to_csv(path_out+'/1level_train_perf_elnet.csv')

        #list of first level targets (observed and predicted)
        df_y_pred1.to_csv(path_out+'/1level_train_y_pred_singleML.csv')







        ### 2st level ################################################################################
        print(' ')
        print('start 2nd level ', datetime.now())

        #### L2 Testing single ML models on index_test #############################################

        print('Checking single ML on test1 data ', datetime.now())

        #control for age+gen and age+gen+mov with sorting to index_test

        #control y (target) for age+gen
        y_res2 = re_control_features(y, control, train_index, 
                                     std_targ_y, std_targ_X, linreg_targ)
        #y.reindex(index=train_index)#

        #control modalities
        features_res2 = {}
        for key in features.keys():
            print('controlling ', key, datetime.now())

            features_res2[key] = re_control_features(features[key], control, y_res2.index, 
                                                     std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])

        ###standartize before model and keep std models
        #features
        for key in features_res2.keys():
            print('standartize ', key, datetime.now())
            features_res2[key] = pd.DataFrame(std_models_features[key].transform(features_res2[key].values),
                                              index=features_res2[key].index, 
                                              columns=features_res2[key].columns)
        #target
        y_res2 = pd.DataFrame(std_model_target.transform(y_res2.values.reshape(-1, 1)),
                              index=y_res2.index) 

        #save features table before PCA
        y_res2.to_csv(path_out+'/target_y_train2.csv')
        for key in features_res2.keys():
            features_res2[key].to_csv(path_out+'/'+str(key)+'_train2.csv')            


        #PCA models to rest and task FC
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('reduction ', key, datetime.now())
            features_res2[key] = pd.DataFrame(PCA_models[key].transform(features_res2[key].values),
                                              index=features_res2[key].index)


        #apply new std to PCA features again
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('standartize PCA ', key, datetime.now())
            features_res2[key] = pd.DataFrame(std_PC_feature_models[key].transform(features_res2[key].values),
                                              index=features_res2[key].index, 
                                              columns=features_res2[key].columns)
            #save std pc table
            features_res2[key].to_csv(path_out+'/'+key+'_PCA75_train2.csv')


        #apply trained single models ElasticNet to new data , index_test

        dict_y_pred2={}
        dict_y_pred2_per={}
        for key in list(features_res2.keys()):
            y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(features_res2[key], y_res2, dict_elnet_model[key]) #ML
            dict_y_pred2[key] = y_pred
            dict_y_pred2_per[key] = bacc, mse, mae, corr

        df_y_pred2 = pd.DataFrame(dict_y_pred2, index=ind_y)
        df_y_pred2_per = pd.DataFrame(dict_y_pred2_per, index=['best score r2', 'mse', 'mae','corr'])


        ###Save outputs from this step (models and all mod. perf.)

        #model performance
        df_y_pred2_per.to_csv(path_out+'/2level_test1_perf_elnet.csv')

        #list of first level targets (observed and predicted)
        df_y_pred2.to_csv(path_out+'/2level_test1_y_pred_singleML.csv')   



        #### L2 Calculating stacked ML models on index_test #############################################

        print('Calculating stacked ML on test1 data ', datetime.now())    


        #identifying sets for several stacked models
        set2 = ['carit1', 'carit3', 'carit4', 'face1', 'face2', 'face3', 'face4', 'face5', 'face6', 'vism']
        set3 = ['cort', 'subc', 'surf', 'rest', 'VolBrain']

        set4 = ['carit_FC', 'face_FC', 'vism_FC']
        set5 = ['carit1', 'carit3', 'carit4', 'face1', 'face2', 'face3', 'face4', 'face5', 'face6', 'vism', 'carit_FC', 'face_FC', 'vism_FC']
        set6 = ['carit1', 'carit3', 'carit4', 'face1', 'face2', 'face3', 'face4', 'face5', 'face6', 'vism', 'cort', 'subc', 'surf', 'rest', 'VolBrain']
        set7 = ['carit_FC', 'face_FC', 'vism_FC', 'cort', 'subc', 'surf', 'rest', 'VolBrain']
        set8 = ['carit_FC', 'face_FC', 'vism_FC', 'rest']

        set1 = list(df_y_pred2.columns) #all existed modalities

        #for presetet sets
        dict_st_perf1={}
        dict_st_models={}
        dict_st_ypred1={}
        dct_std_mod_for_stack = {} #
        dct_std_tab_for_stack = {} #
        dct_std_tab_before_for_stack = {} #

        s=1
        for set_n in [set1, set2, set3, set4, set5, set6, set7, set8]:
            print('set '+str(s), datetime.now())

            st_features = df_y_pred2.loc[:,set_n]
            dct_std_tab_before_for_stack['set'+str(s)] = st_features #

            stack_std_model = StandardScaler().fit(st_features.values) 
            dct_std_mod_for_stack['set'+str(s)] = stack_std_model #

            std_st_features = pd.DataFrame(stack_std_model.transform(st_features.values), 
                                           index=st_features.index, columns=st_features.columns) 
            dct_std_tab_for_stack['set'+str(s)] = std_st_features #



            bpar1, bpar2, acc, mse, corr, model, y_pred3, mae = elnet(std_st_features, y_res2) #ML

            dict_st_perf1['set'+str(s)] = acc, mse, mae, corr, bpar1, bpar2 
            dict_st_models['set'+str(s)] = model
            dict_st_ypred1['set'+str(s)] = y_pred3
            s+=1

        df_st_perf1 = pd.DataFrame(dict_st_perf1, index=['best score r2', 'mse', 'mae','corr', 'best alpha', 'best l1_ratio'])
        df_st_ypred1 = pd.DataFrame(dict_st_ypred1, index=y_res2.index)        

        ###Save outputs from this step (models and all mod. perf.)

        #models
        for key in dict_st_models.keys():
            joblib.dump(dict_st_models[key], (path_out+'/'+str(key)+'_stacked_model.sav'))
        for key in dct_std_mod_for_stack.keys():
            joblib.dump(dct_std_mod_for_stack[key], (path_out+'/'+str(key)+'_stacked_STD_model.sav'))

        #performance and prediction
        df_st_perf1.to_csv(path_out+'/2level_test1_perf_stacked.csv')
        df_st_ypred1.to_csv(path_out+'/2level_test1_y_pred_stacked.csv')
        for key in dct_std_tab_for_stack.keys():
            dct_std_tab_for_stack[key].to_csv(path_out+'/2level_stack_y_feature_tab_STD.csv')
            dct_std_tab_before_for_stack[key].to_csv(path_out+'/2level_stack_y_feature_tab_beforeSTD.csv')


        ### 3rd level ################################################################################
        print(' ')
        print('start 3rd level ', datetime.now())


        #### L3 Testing single ML models on test_index #############################################

        print('Checking single ML on test2 data ', datetime.now())

        #control for age+gen and age+gen+mov with sorting to test_index

        #control y (target) for age+gen
        y_res3 =re_control_features(y, control, test_index, 
                                    std_targ_y, std_targ_X, linreg_targ)
        # y.reindex(index=test_index)#

        #control modalities
        features_res3 = {}
        for key in features.keys():
            print('controlling ', key, datetime.now())

            features_res3[key] = re_control_features(features[key], control, y_res3.index, 
                                                     std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])

        ###standartize before model and keep std models
        #features
        for key in features_res3.keys():
            print('standartize ', key, datetime.now())
            features_res3[key] = pd.DataFrame(std_models_features[key].transform(features_res3[key].values),
                                              index=features_res3[key].index, 
                                              columns=features_res3[key].columns)
        #target
        y_res3 = pd.DataFrame(std_model_target.transform(y_res3.values.reshape(-1, 1)),
                              index=y_res3.index) 

        #save features table before PCA
        y_res3.to_csv(path_out+'/target_y_test.csv')
        for key in features_res3.keys():
            features_res3[key].to_csv(path_out+'/'+str(key)+'_test.csv')            


        #PCA models to rest and task FC
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('reduction ', key, datetime.now())
            features_res3[key] = pd.DataFrame(PCA_models[key].transform(features_res3[key].values),
                                              index=features_res3[key].index)


        #apply new std to PCA features again
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('standartize PCA ', key, datetime.now())
            features_res3[key] = pd.DataFrame(std_PC_feature_models[key].transform(features_res3[key].values),
                                              index=features_res3[key].index, 
                                              columns=features_res3[key].columns)
            #save std pc table
            features_res3[key].to_csv(path_out+'/'+key+'_PCA75_test.csv')



        #apply trained single models ElasticNet to new data , test_index

        dict_y_pred3={}
        dict_y_pred3_per={}
        for key in list(features_res3.keys()):
            y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(features_res3[key], y_res3, dict_elnet_model[key]) #ML
            dict_y_pred3[key] = y_pred
            dict_y_pred3_per[key] = bacc, mse, mae, corr

        df_y_pred3 = pd.DataFrame(dict_y_pred3, index=ind_y)
        df_y_pred3_per = pd.DataFrame(dict_y_pred3_per, index=['best score r2', 'mse', 'mae','corr'])


        ###Save outputs from this step (models and all mod. perf.)

        #model performance
        df_y_pred3_per.to_csv(path_out+'/3level_test2_perf_elnet.csv')

        #list of first level targets (observed and predicted)
        df_y_pred3.to_csv(path_out+'/3level_test2_y_pred_singleML.csv')        


        #### L3 Testing stacked ML models on test_index #############################################

        print('Calculating stacked ML on test2 data ', datetime.now()) 

        #apply trained stacked models ElasticNet to new data , test_index

        #for presetet sets
        dict_st_perf2={}
        dict_st_ypred2={}

        dct_std3_tab_for_stack = {} #
        dct_std3_tab_before_for_stack = {} #

        s=1
        for set_n in [set1, set2, set3, set4, set5, set6, set7, set8]:

            ftrs = df_y_pred3.loc[:, set_n]
            dct_std3_tab_before_for_stack['set'+str(s)] = ftrs

            std_ftrs = pd.DataFrame(dct_std_mod_for_stack['set'+str(s)].transform(ftrs.values), 
                                    index=ftrs.index,columns=ftrs.columns)
            dct_std3_tab_for_stack['set'+str(s)] = std_ftrs

            y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(std_ftrs, y_res3, dict_st_models[('set'+str(s))]) #ML
            dict_st_ypred2[('set'+str(s))] = y_pred
            dict_st_perf2[('set'+str(s))] = bacc, mse, mae, corr
            s+=1

        df_st_ypred2 = pd.DataFrame(dict_st_ypred2, index=ind_y)
        df_st_perf2 = pd.DataFrame(dict_st_perf2, index=['best score r2', 'mse', 'mae','corr'])        

        ###Save outputs from this step (models and all mod. perf.)

        #performance and prediction
        df_st_perf2.to_csv(path_out+'/3level_test2_perf_stacked.csv')
        df_st_ypred2.to_csv(path_out+'/3level_test2_y_pred_stacked.csv') 
        for key in dct_std3_tab_for_stack.keys():
            dct_std3_tab_for_stack[key].to_csv(path_out+'/3level_stack_y_feature_tab_STD.csv')
            dct_std3_tab_before_for_stack[key].to_csv(path_out+'/3level_stack_y_feature_tab_beforeSTD.csv')

        print(' ')
        print('finished to calculate the Fold #', i)
        print(datetime.now())

        i+=1

    print(' ')
    print('finished the MODEL '+COL)
    print(datetime.now())

nih_totalcogcomp_unadjusted
 
started to calculate the Fold # 0
2022-12-07 18:53:04.660373
 
start 1st level  2022-12-07 18:53:04.662875
controlling  carit1 2022-12-07 18:53:04.734847
controlling  carit3 2022-12-07 18:53:05.048092
controlling  carit4 2022-12-07 18:53:05.342019
controlling  face1 2022-12-07 18:53:05.634957
controlling  face2 2022-12-07 18:53:05.911570
controlling  face3 2022-12-07 18:53:06.165350
controlling  face4 2022-12-07 18:53:06.420774
controlling  face5 2022-12-07 18:53:06.675079
controlling  face6 2022-12-07 18:53:06.930198
controlling  vism 2022-12-07 18:53:07.187347
controlling  carit_FC 2022-12-07 18:53:07.446629
controlling  face_FC 2022-12-07 18:53:57.046364
controlling  vism_FC 2022-12-07 18:54:51.827971
controlling  cort 2022-12-07 18:55:55.496251
controlling  surf 2022-12-07 18:55:55.863934
controlling  subc 2022-12-07 18:55:56.041258
controlling  VolBrain 2022-12-07 18:55:56.065193
controlling  rest 2022-12-07 18:55:56.072270
standartize  carit1 2022-12

controlling  face6 2022-12-07 19:14:28.034045
controlling  vism 2022-12-07 19:14:28.284658
controlling  carit_FC 2022-12-07 19:14:28.537821
controlling  face_FC 2022-12-07 19:15:24.413972
controlling  vism_FC 2022-12-07 19:16:21.407774
controlling  cort 2022-12-07 19:17:23.665551
controlling  surf 2022-12-07 19:17:23.973672
controlling  subc 2022-12-07 19:17:24.120738
controlling  VolBrain 2022-12-07 19:17:24.140083
controlling  rest 2022-12-07 19:17:24.145830
standartize  carit1 2022-12-07 19:20:04.361692
standartize  carit3 2022-12-07 19:20:04.370340
standartize  carit4 2022-12-07 19:20:04.372635
standartize  face1 2022-12-07 19:20:04.374305
standartize  face2 2022-12-07 19:20:04.375465
standartize  face3 2022-12-07 19:20:04.376626
standartize  face4 2022-12-07 19:20:04.377786
standartize  face5 2022-12-07 19:20:04.378986
standartize  face6 2022-12-07 19:20:04.380154
standartize  vism 2022-12-07 19:20:04.381321
standartize  carit_FC 2022-12-07 19:20:04.382814
standartize  face_FC 202

standartize  face_FC 2022-12-07 19:42:10.506033
standartize  vism_FC 2022-12-07 19:42:10.734234
standartize  cort 2022-12-07 19:42:10.961947
standartize  surf 2022-12-07 19:42:10.963133
standartize  subc 2022-12-07 19:42:10.963954
standartize  VolBrain 2022-12-07 19:42:10.964549
standartize  rest 2022-12-07 19:42:10.965133
reduction  rest 2022-12-07 19:46:48.946230
reduction  carit_FC 2022-12-07 19:46:49.953112
reduction  face_FC 2022-12-07 19:46:50.922724
reduction  vism_FC 2022-12-07 19:46:51.877975
standartize PC table  rest 2022-12-07 19:46:53.211671
standartize PC table  carit_FC 2022-12-07 19:46:53.246311
standartize PC table  face_FC 2022-12-07 19:46:53.279771
standartize PC table  vism_FC 2022-12-07 19:46:53.313306
start  carit1 2022-12-07 19:46:53.357629
start  carit3 2022-12-07 19:47:01.524931
start  carit4 2022-12-07 19:47:07.875442
start  face1 2022-12-07 19:47:14.414889
start  face2 2022-12-07 19:47:20.707469
start  face3 2022-12-07 19:47:27.636756
start  face4 2022-12-07 

standartize PC table  rest 2022-12-07 20:09:26.428853
standartize PC table  carit_FC 2022-12-07 20:09:26.538609
standartize PC table  face_FC 2022-12-07 20:09:26.573281
standartize PC table  vism_FC 2022-12-07 20:09:26.611588
start  carit1 2022-12-07 20:09:26.651836
start  carit3 2022-12-07 20:09:33.747548
start  carit4 2022-12-07 20:09:39.867967
start  face1 2022-12-07 20:09:46.544335
start  face2 2022-12-07 20:09:53.048632
start  face3 2022-12-07 20:09:59.584872
start  face4 2022-12-07 20:10:05.939614
start  face5 2022-12-07 20:10:12.356254
start  face6 2022-12-07 20:10:18.572113
start  vism 2022-12-07 20:10:25.291122
start  carit_FC 2022-12-07 20:10:31.618413
start  face_FC 2022-12-07 20:10:37.253021
start  vism_FC 2022-12-07 20:10:43.247083
start  cort 2022-12-07 20:10:48.835274
start  surf 2022-12-07 20:10:55.433085
start  subc 2022-12-07 20:11:03.555410
start  VolBrain 2022-12-07 20:11:09.776866
start  rest 2022-12-07 20:11:15.736382
 
start 2nd level  2022-12-07 20:11:23.151366


start  face5 2022-12-07 20:31:53.137713
start  face6 2022-12-07 20:32:01.863484
start  vism 2022-12-07 20:32:07.905617
start  carit_FC 2022-12-07 20:32:14.694014
start  face_FC 2022-12-07 20:32:20.782545
start  vism_FC 2022-12-07 20:32:27.300878
start  cort 2022-12-07 20:32:34.468984
start  surf 2022-12-07 20:32:40.754065
start  subc 2022-12-07 20:32:47.544844
start  VolBrain 2022-12-07 20:32:54.865502
start  rest 2022-12-07 20:33:01.953279
 
start 2nd level  2022-12-07 20:33:07.608354
Checking single ML on test1 data  2022-12-07 20:33:07.608393
controlling  carit1 2022-12-07 20:33:07.610317
controlling  carit3 2022-12-07 20:33:07.683029
controlling  carit4 2022-12-07 20:33:07.748606
controlling  face1 2022-12-07 20:33:07.815234
controlling  face2 2022-12-07 20:33:07.882018
controlling  face3 2022-12-07 20:33:07.948435
controlling  face4 2022-12-07 20:33:08.014796
controlling  face5 2022-12-07 20:33:08.086735
controlling  face6 2022-12-07 20:33:08.152886
controlling  vism 2022-12-07 20

start  rest 2022-12-07 20:56:52.547643
 
start 2nd level  2022-12-07 20:57:00.330897
Checking single ML on test1 data  2022-12-07 20:57:00.330951
controlling  carit1 2022-12-07 20:57:00.333062
controlling  carit3 2022-12-07 20:57:00.407494
controlling  carit4 2022-12-07 20:57:00.476301
controlling  face1 2022-12-07 20:57:00.545922
controlling  face2 2022-12-07 20:57:00.614433
controlling  face3 2022-12-07 20:57:00.683088
controlling  face4 2022-12-07 20:57:00.751937
controlling  face5 2022-12-07 20:57:00.820188
controlling  face6 2022-12-07 20:57:00.888768
controlling  vism 2022-12-07 20:57:00.957763
controlling  carit_FC 2022-12-07 20:57:01.026069
controlling  face_FC 2022-12-07 20:57:15.734146
controlling  vism_FC 2022-12-07 20:57:31.225790
controlling  cort 2022-12-07 20:57:48.556134
controlling  surf 2022-12-07 20:57:48.613595
controlling  subc 2022-12-07 20:57:48.650190
controlling  VolBrain 2022-12-07 20:57:48.655414
controlling  rest 2022-12-07 20:57:48.657363
standartize  carit

controlling  vism 2022-12-07 21:20:22.819446
controlling  carit_FC 2022-12-07 21:20:22.908317
controlling  face_FC 2022-12-07 21:20:41.748837
controlling  vism_FC 2022-12-07 21:20:59.227316
controlling  cort 2022-12-07 21:21:14.717278
controlling  surf 2022-12-07 21:21:14.769822
controlling  subc 2022-12-07 21:21:14.800979
controlling  VolBrain 2022-12-07 21:21:14.805659
controlling  rest 2022-12-07 21:21:14.807479
standartize  carit1 2022-12-07 21:21:31.731700
standartize  carit3 2022-12-07 21:21:31.738234
standartize  carit4 2022-12-07 21:21:31.744119
standartize  face1 2022-12-07 21:21:31.750508
standartize  face2 2022-12-07 21:21:31.756750
standartize  face3 2022-12-07 21:21:31.759212
standartize  face4 2022-12-07 21:21:31.760046
standartize  face5 2022-12-07 21:21:31.760779
standartize  face6 2022-12-07 21:21:31.761451
standartize  vism 2022-12-07 21:21:31.762135
standartize  carit_FC 2022-12-07 21:21:31.762774
standartize  face_FC 2022-12-07 21:21:31.874622
standartize  vism_FC 2

standartize  vism_FC 2022-12-07 21:43:15.917488
standartize  cort 2022-12-07 21:43:16.008082
standartize  surf 2022-12-07 21:43:16.008840
standartize  subc 2022-12-07 21:43:16.009356
standartize  VolBrain 2022-12-07 21:43:16.009730
standartize  rest 2022-12-07 21:43:16.010133
reduction  rest 2022-12-07 21:48:14.468791
reduction  carit_FC 2022-12-07 21:48:14.597241
reduction  face_FC 2022-12-07 21:48:14.706880
reduction  vism_FC 2022-12-07 21:48:14.807819
standartize PCA  rest 2022-12-07 21:48:14.909710
standartize PCA  carit_FC 2022-12-07 21:48:14.950705
standartize PCA  face_FC 2022-12-07 21:48:14.986918
standartize PCA  vism_FC 2022-12-07 21:48:15.020510
Calculating stacked ML on test1 data  2022-12-07 21:48:15.119580
set 1 2022-12-07 21:48:15.119964
set 2 2022-12-07 21:48:21.665258
set 3 2022-12-07 21:48:29.295454
set 4 2022-12-07 21:48:35.977923
set 5 2022-12-07 21:48:43.880922
set 6 2022-12-07 21:48:51.425094
set 7 2022-12-07 21:48:58.282857
set 8 2022-12-07 21:49:04.515007
 
star

Calculating stacked ML on test1 data  2022-12-07 22:09:30.814508
set 1 2022-12-07 22:09:30.814834
set 2 2022-12-07 22:09:38.299182
set 3 2022-12-07 22:09:44.543516
set 4 2022-12-07 22:09:50.882347
set 5 2022-12-07 22:09:56.778789
set 6 2022-12-07 22:10:02.891310
set 7 2022-12-07 22:10:08.672118
set 8 2022-12-07 22:10:16.389237
 
start 3rd level  2022-12-07 22:10:24.859913
Checking single ML on test2 data  2022-12-07 22:10:24.860399
controlling  carit1 2022-12-07 22:10:24.862202
controlling  carit3 2022-12-07 22:10:24.935357
controlling  carit4 2022-12-07 22:10:25.002847
controlling  face1 2022-12-07 22:10:25.072038
controlling  face2 2022-12-07 22:10:25.140540
controlling  face3 2022-12-07 22:10:25.209108
controlling  face4 2022-12-07 22:10:25.276580
controlling  face5 2022-12-07 22:10:25.344929
controlling  face6 2022-12-07 22:10:25.418873
controlling  vism 2022-12-07 22:10:25.484332
controlling  carit_FC 2022-12-07 22:10:25.550520
controlling  face_FC 2022-12-07 22:10:39.469650
contr

controlling  face1 2022-12-07 22:31:38.818193
controlling  face2 2022-12-07 22:31:38.886349
controlling  face3 2022-12-07 22:31:38.955099
controlling  face4 2022-12-07 22:31:39.024967
controlling  face5 2022-12-07 22:31:39.093918
controlling  face6 2022-12-07 22:31:39.164566
controlling  vism 2022-12-07 22:31:39.234449
controlling  carit_FC 2022-12-07 22:31:39.304050
controlling  face_FC 2022-12-07 22:31:53.219658
controlling  vism_FC 2022-12-07 22:32:07.673331
controlling  cort 2022-12-07 22:32:21.056350
controlling  surf 2022-12-07 22:32:21.100687
controlling  subc 2022-12-07 22:32:21.126744
controlling  VolBrain 2022-12-07 22:32:21.130682
controlling  rest 2022-12-07 22:32:21.132145
standartize  carit1 2022-12-07 22:32:34.948448
standartize  carit3 2022-12-07 22:32:34.954707
standartize  carit4 2022-12-07 22:32:34.960420
standartize  face1 2022-12-07 22:32:34.965524
standartize  face2 2022-12-07 22:32:34.969654
standartize  face3 2022-12-07 22:32:34.970172
standartize  face4 2022-12

controlling  vism_FC 2022-12-07 22:53:32.362851
controlling  cort 2022-12-07 22:53:46.693813
controlling  surf 2022-12-07 22:53:46.739278
controlling  subc 2022-12-07 22:53:46.765768
controlling  VolBrain 2022-12-07 22:53:46.769739
controlling  rest 2022-12-07 22:53:46.771110
standartize  carit1 2022-12-07 22:54:00.688814
standartize  carit3 2022-12-07 22:54:00.694815
standartize  carit4 2022-12-07 22:54:00.700385
standartize  face1 2022-12-07 22:54:00.705652
standartize  face2 2022-12-07 22:54:00.710510
standartize  face3 2022-12-07 22:54:00.711031
standartize  face4 2022-12-07 22:54:00.711518
standartize  face5 2022-12-07 22:54:00.711996
standartize  face6 2022-12-07 22:54:00.712483
standartize  vism 2022-12-07 22:54:00.712973
standartize  carit_FC 2022-12-07 22:54:00.713444
standartize  face_FC 2022-12-07 22:54:00.738159
standartize  vism_FC 2022-12-07 22:54:00.761992
standartize  cort 2022-12-07 22:54:00.785293
standartize  surf 2022-12-07 22:54:00.785975
standartize  subc 2022-12-

reduction  rest 2022-12-07 23:16:34.059376
reduction  carit_FC 2022-12-07 23:16:34.163998
reduction  face_FC 2022-12-07 23:16:34.250923
reduction  vism_FC 2022-12-07 23:16:34.342900
standartize PCA  rest 2022-12-07 23:16:34.432301
standartize PCA  carit_FC 2022-12-07 23:16:34.449353
standartize PCA  face_FC 2022-12-07 23:16:34.462799
standartize PCA  vism_FC 2022-12-07 23:16:34.482180
Calculating stacked ML on test2 data  2022-12-07 23:16:34.586786
 
finished to calculate the Fold # 1
2022-12-07 23:16:34.764166
 
started to calculate the Fold # 2
2022-12-07 23:16:34.764797
 
start 1st level  2022-12-07 23:16:34.766371
controlling  carit1 2022-12-07 23:16:35.261223
controlling  carit3 2022-12-07 23:16:35.875314
controlling  carit4 2022-12-07 23:16:36.138565
controlling  face1 2022-12-07 23:16:36.397909
controlling  face2 2022-12-07 23:16:36.658251
controlling  face3 2022-12-07 23:16:36.921141
controlling  face4 2022-12-07 23:16:37.209609
controlling  face5 2022-12-07 23:16:37.577016
con

controlling  carit1 2022-12-07 23:37:50.000207
controlling  carit3 2022-12-07 23:37:50.571771
controlling  carit4 2022-12-07 23:37:50.868231
controlling  face1 2022-12-07 23:37:51.154927
controlling  face2 2022-12-07 23:37:51.404740
controlling  face3 2022-12-07 23:37:51.827856
controlling  face4 2022-12-07 23:37:52.243576
controlling  face5 2022-12-07 23:37:52.617975
controlling  face6 2022-12-07 23:37:53.043083
controlling  vism 2022-12-07 23:37:53.427055
controlling  carit_FC 2022-12-07 23:37:53.753384
controlling  face_FC 2022-12-07 23:38:54.153952
controlling  vism_FC 2022-12-07 23:39:55.355858
controlling  cort 2022-12-07 23:40:48.151343
controlling  surf 2022-12-07 23:40:48.393625
controlling  subc 2022-12-07 23:40:48.533882
controlling  VolBrain 2022-12-07 23:40:48.551420
controlling  rest 2022-12-07 23:40:48.556503
standartize  carit1 2022-12-07 23:43:21.127001
standartize  carit3 2022-12-07 23:43:21.135126
standartize  carit4 2022-12-07 23:43:21.137168
standartize  face1 2022

controlling  face_FC 2022-12-08 00:00:03.312759
controlling  vism_FC 2022-12-08 00:01:02.767750
controlling  cort 2022-12-08 00:01:56.650152
controlling  surf 2022-12-08 00:01:56.856454
controlling  subc 2022-12-08 00:01:56.998063
controlling  VolBrain 2022-12-08 00:01:57.016155
controlling  rest 2022-12-08 00:01:57.021541
standartize  carit1 2022-12-08 00:04:30.559747
standartize  carit3 2022-12-08 00:04:30.566699
standartize  carit4 2022-12-08 00:04:30.568427
standartize  face1 2022-12-08 00:04:30.570068
standartize  face2 2022-12-08 00:04:30.571685
standartize  face3 2022-12-08 00:04:30.573275
standartize  face4 2022-12-08 00:04:30.574831
standartize  face5 2022-12-08 00:04:30.576450
standartize  face6 2022-12-08 00:04:30.578104
standartize  vism 2022-12-08 00:04:30.579731
standartize  carit_FC 2022-12-08 00:04:30.581396
standartize  face_FC 2022-12-08 00:04:30.811849
standartize  vism_FC 2022-12-08 00:04:31.039331
standartize  cort 2022-12-08 00:04:31.265935
standartize  surf 2022-