# Stacked Machine Learning

In [13]:
## IMPORTANT !

# In the first order need to set the number of CPU 
# for calculation before launching (depends on computer's number of cores)
n_jobs= 40

### Load libraries

In [2]:
#libraries
import pandas as pd
import numpy as np
import os
import sys
import shutil
import glob
import joblib
import warnings
from datetime import date, datetime

from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import LeavePGroupsOut
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import pearsonr
import scipy.stats as st

from nilearn import image as nli
from nilearn import plotting

from mne.viz import plot_connectivity_circle

### Load functions

In [3]:
def control_features(table_in, control, index): 
    #table_in should be a table of features, where rows - subjects, columns - features
    
    if len(table_in.values.shape) == 1: #for pd.Series # for target
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index
        
        #loop
        dct_table = {}
        dct_lin_models ={}
        dct_std_y_models ={}
        
        col='0'
        
        y = table_in #target, brain ROI
        X = control  #features, like age, sex and/or movements

        #Standartize target
        std_model_y = StandardScaler()
        std_model_y.fit(y.values.reshape(-1, 1))
        y = std_model_y.transform(y.values.reshape(-1, 1))
        
        #reshaping data
        if len(X.values.shape) == 1:
            X = X.values.reshape(-1, 1)
        else:
            X = X.values
        y = y.reshape(-1, 1).ravel()
        
        #Standartize X
        std_model = StandardScaler()
        std_model.fit(X)
        X = std_model.transform(X)

        #Fit to the training set
        model = LinearRegression()
        model.fit(X, y)
        y_pred = model.predict(X)

        y_res = y - y_pred

        dct_table[col] = y_res
        dct_lin_models[col] = model
        dct_std_y_models[col] = std_model_y

        df_table = pd.DataFrame(dct_table, index = ind)

        
    else:
            
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index

        #loop
        dct_table = {}
        dct_lin_models ={}
        dct_std_y_models ={}
        col_names = table_in.columns

        for col in col_names:
            y = table_in[col] #target, brain ROI
            X = control  #features, like age, sex and/or movements
            
            #Standartize target
            std_model_y = StandardScaler()
            std_model_y.fit(y.values.reshape(-1, 1))
            y = std_model_y.transform(y.values.reshape(-1, 1)) 
            
            #reshaping data
            if len(X.values.shape) == 1:
                X = X.values.reshape(-1, 1)
            else:
                X = X.values
            y = y.reshape(-1, 1).ravel()
            
            #Standartize X
            std_model = StandardScaler()
            std_model.fit(X)
            X = std_model.transform(X)

            #Fit to the training set
            model = LinearRegression()
            model.fit(X, y)
            y_pred = model.predict(X)

            y_res = y - y_pred

            dct_table[col] = y_res
            dct_lin_models[col] = model
            dct_std_y_models[col] = std_model_y

        df_table = pd.DataFrame(dct_table, index = ind)
    
    return df_table, dct_std_y_models, std_model, dct_lin_models

In [4]:
def re_control_features(table_in, control, index, dct_std_y_models, std_model, dct_lin_models):
    
    if len(table_in.values.shape) == 1: #for pd.Series # for target
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index
        
        #loop
        dct_table = {}
        
        col='0'
        
        y = table_in #target, brain ROI
        X = control  #features, like age, sex and/or movements
        
        #standartize y
        y = dct_std_y_models[col].transform(y.values.reshape(-1, 1))
        
        #reshaping data
        if len(X.values.shape) == 1:
            X = X.values.reshape(-1, 1)
        else:
            X = X.values
        y = y.reshape(-1, 1).ravel()

        #Standartize X with previous std model
        X = std_model.transform(X)

        #Fit with previous LinReg model
        y_pred =  dct_lin_models[col].predict(X)

        y_res = y - y_pred

        dct_table[col] = y_res

        df_table = pd.DataFrame(dct_table, index = ind)
        
    else:
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index

        #loop
        dct_table = {}
        col_names = table_in.columns

        for col in col_names:
            y = table_in[col] #target, brain ROI
            X = control  #features, like age, sex and/or movements

            #standartize y
            y = dct_std_y_models[col].transform(y.values.reshape(-1, 1))
            
            #reshaping data
            if len(X.values.shape) == 1:
                X = X.values.reshape(-1, 1)
            else:
                X = X.values
            y = y.reshape(-1, 1).ravel()

            #Standartize X with previous std model
            X = std_model.transform(X)

            #Fit with previous LinReg model
            y_pred =  dct_lin_models[col].predict(X)

            y_res = y - y_pred

            dct_table[col] = y_res

        df_table = pd.DataFrame(dct_table, index = ind)
        
    return df_table

In [5]:
def elnet(X, y):

    #drop Nan in target and clean this subj from features
    y = y.dropna()
    X = X.loc[y.index,:]
    ind_y = np.array(y.index)
      
    y_real=y
    
    #reshaping data
    X = X.values
    y = y.values.reshape(-1, 1).ravel()
    
    # Setup the pipeline steps:
    steps = [('elasticnet', ElasticNet(random_state=42))]

    # Create the pipeline: pipeline 
    pipeline = Pipeline(steps)

    # Specify the hyperparameter space
    parameters = {'elasticnet__alpha': np.logspace(-1, 2, 70),
                  'elasticnet__l1_ratio':np.linspace(0,1,25)}

    # Create the GridSearchCV object:
    gm_cv = GridSearchCV(pipeline, parameters, cv=5, n_jobs=n_jobs)
    
    # Fit to the training set
    gm_cv.fit(X, y)
    
    #predict new y
    y_pred = gm_cv.predict(X)

    # Compute and print the metrics
    acc = gm_cv.best_score_
    bpar = gm_cv.best_params_
    model = gm_cv.best_estimator_
    mse = mean_squared_error(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred)
    corr, _ = pearsonr(np.array(y_real.values.reshape(-1, 1).ravel(), dtype=float), np.array(y_pred, dtype=float))
            
    return bpar['elasticnet__alpha'], bpar['elasticnet__l1_ratio'], acc, mse, corr, model, y_pred, mae

In [6]:
def reaply_ElNet(X, y, model):
    # param should be pd.Series with indexes from model
    
    #drop Nan in target and clean this subj from features
    y = y.dropna()
    X = X.reindex(index =y.index)
    ind_y = np.array(y.index)  # indexes as separate variable 
    
    y_real = y

    #reshaping data
    X = X.values
    y = y.values.reshape(-1, 1).ravel()
    
    #predict new y
    y_pred = model.predict(X)
    
    # Compute and print the metrics
    bacc = model.score(X, y)
    mse = mean_squared_error(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred) 
    corr, _ = pearsonr(np.array(y_real.values.reshape(-1, 1).ravel(), dtype=float), np.array(y_pred, dtype=float))
    
    return y_pred, y_real, ind_y, bacc, mse, corr, mae

### Path to the tables folder

In [7]:
path='/media/data/HCPAging/data/MLTablesMultCon/'

### Load tables

In [8]:
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

#demography
demo = pd.read_csv(path+'demography.csv', index_col=0)

#targets table
targ = pd.read_csv(path+'cognition.csv', index_col=0)

#features tables as dictionary
features = {
    'carit1':pd.read_csv(path+'carit-con1.csv', index_col=0),
    'carit3':pd.read_csv(path+'carit-con3.csv', index_col=0),
    'carit4':pd.read_csv(path+'carit-con4.csv', index_col=0),
    
    'face1':pd.read_csv(path+'FACENAME_group_table_3EV_con1.csv', index_col=0),
    'face2':pd.read_csv(path+'FACENAME_group_table_3EV_con2.csv', index_col=0),
    'face3':pd.read_csv(path+'FACENAME_group_table_3EV_con3.csv', index_col=0),
    'face4':pd.read_csv(path+'FACENAME_group_table_3EV_con4.csv', index_col=0),
    'face5':pd.read_csv(path+'FACENAME_group_table_3EV_con5.csv', index_col=0),
    'face6':pd.read_csv(path+'FACENAME_group_table_3EV_con6.csv', index_col=0),
    
    'vism':pd.read_csv(path+'vism.csv', index_col=0),
    
    'carit_FC':pd.read_csv(path+'CARIT_taskFC.csv', index_col=0),
    'face_FC':pd.read_csv(path+'FACENAME_task_FC_3EV.csv', index_col=0),
    'vism_FC':pd.read_csv(path+'VISMOTOR_taskFC.csv', index_col=0),

    'cort':pd.read_csv(path+'cort.csv', index_col=0),
    'surf':pd.read_csv(path+'surf.csv', index_col=0),
    'subc':pd.read_csv(path+'subc.csv', index_col=0),
    'VolBrain':pd.read_csv(path+'VolBrain.csv', index_col=0),
    
    'rest':pd.read_csv(path+'rest_hpass.csv', index_col=0) 

}

#table with movements (mean relative displacement Movement_RelativeRMS_mean.txt)
movements = pd.read_csv(path+'movements.csv', index_col=0)



In [9]:
#create tables withcontroling parameters
sex_coded = pd.Series(LabelEncoder().fit_transform(demo.loc[:,['sex']]), index=demo.index, name='sex')

control = pd.DataFrame({'sex':sex_coded}) #

##### Leave-P-group out based on n-Fold CV

In [14]:
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore"
    
    
    

for COL in targ.columns:
    #COL = 'nih_fluidcogcomp_unadjusted'  #the script adapted to be launched on table of target variables. To launch in that way you need to uncomment for loop and comment this row with col variable
    y = targ[COL]

    print(y.name)

    ###make folder for outputs
    nmf=path+'output_5cv_sexAdj_noStdTarg_STDstackFeatures_'+y.name
    os.mkdir(nmf)

    i=0

    group_kfold = GroupKFold(n_splits=5) #number of folds
    for train_index, test_index in group_kfold.split(demo, groups=demo['family_user_def_id']): #based on families id 

        print(' ')
        print('started to calculate the Fold #', i)
        print(datetime.now())
        print(' ')

        ###create directory for specific Fold
        os.mkdir(nmf+'/Fold_'+str(i)) 
        path_out = str(nmf+'/Fold_'+str(i))

        ###Global indices
        train_index = np.array(demo.iloc[train_index].index) #for training all models
        test_index = np.array(demo.iloc[test_index].index) #for final test

        ###Split global to local indices (in case if we need to split training part into two as previously)
        #index_train, index_test = train_test_split(train_index, test_size=0.4, random_state=42)
        #index_train = np.array(sorted(index_train)) #for training modalities models
        #index_test = np.array(sorted(index_test)) #for testing modalities and training stacking

        ### 1st level ################################################################################

        #### Calculations of single ML models on training index #################################### 

        print('start 1st level ', datetime.now())

        
        #reindex y (target)
        y_res1 = y.reindex(index=train_index)
        

        #control modalities
        features_res1 = {}
        std_feat_y_dct = {}
        std_feat_X_dct = {}
        linreg_feat_dct = {}
        for key in features.keys():
            print('controlling ', key, datetime.now())

            mod_res, std_f_y, std_f_X, linreg_f = control_features(features[key], control, y_res1.index)

            features_res1[key] = mod_res
            std_feat_y_dct[key] = std_f_y
            std_feat_X_dct[key] = std_f_X
            linreg_feat_dct[key] = linreg_f

        #save adjastment model
        os.mkdir(path_out+'/adjustment_models')
        #features model
        joblib.dump(std_feat_y_dct, (path_out+'/adjustment_models'+'/features_std_model_y.sav'))
        joblib.dump(std_feat_X_dct, (path_out+'/adjustment_models'+'/features_std_model_X.sav'))
        joblib.dump(linreg_feat_dct, (path_out+'/adjustment_models'+'/features_linreg.sav'))


        ###standartize before model and keep std models
        #features
        std_models_features = {}
        for key in features_res1.keys():
            print('standartize ', key, datetime.now())
            std_model = StandardScaler()
            std_model.fit(features_res1[key].values)
            features_res1[key] = pd.DataFrame(std_model.transform(features_res1[key].values),
                                              index=features_res1[key].index, 
                                              columns=features_res1[key].columns)
            std_models_features[key] = std_model
         
            
        #save 
        os.mkdir(path_out+'/standartization_models')
        #features
        joblib.dump(std_models_features,  (path_out+'/standartization_models'+'/features_std_model.sav'))


        #save features table before PCA
        y_res1.to_csv(path_out+'/target_y_train1.csv')
        for key in features_res1.keys():
            features_res1[key].to_csv(path_out+'/'+str(key)+'_train1.csv')


        #PCA models to rest and task FC
        PCA_models = {}
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('reduction ', key, datetime.now())
            model_PCA =  PCA(n_components=75, random_state=11)
            model_PCA.fit(features_res1[key].values)
            features_res1[key] = pd.DataFrame(model_PCA.transform(features_res1[key].values), 
                                              index=features_res1[key].index)
            PCA_models[key] = model_PCA
        #save PCA models
        os.mkdir(path_out+'/PCA_models')
        joblib.dump(PCA_models,  (path_out+'/PCA_models'+'/PCA_model.sav'))


        #apply new std to PCA features again
        std_PC_feature_models = {}
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('standartize PC table ', key, datetime.now())
            std_PC_model = StandardScaler()
            std_PC_model.fit(features_res1[key].values)
            features_res1[key] = pd.DataFrame(std_PC_model.transform(features_res1[key].values),
                                              index=features_res1[key].index, 
                                              columns=features_res1[key].columns)
            std_PC_feature_models[key] = std_PC_model
            #save PCA tables
            features_res1[key].to_csv(path_out+'/'+key+'_PCA75_train1.csv')
        #save std PCA models
        os.mkdir(path_out+'/PCA_standardization_models')
        joblib.dump(std_PC_feature_models,  (path_out+'/PCA_standardization_models'+'/std_PCA_model.sav'))




        #Launch ElasticNet for all task(modalities) on index_train (1st level)

        dict_tasks={}
        dict_elnet_model={}
        dict_ypred1={}

        for key in list(features_res1.keys()):

            print('start ', str(key), datetime.now())   #print start time of calculations

            bpar1, bpar2, acc, mse, corr, model, y_pred1, mae = elnet(features_res1[key], y_res1) #ML
            dict_tasks[key] = acc, mse, mae, corr, bpar1, bpar2 
            dict_elnet_model[key] = model
            dict_ypred1[key] = y_pred1
        df_tasks = pd.DataFrame(dict_tasks, index=['best score r2', 'mse', 'mae','corr', 'best alpha', 'best l1_ratio'])
        df_y_pred1 = pd.DataFrame(dict_ypred1, index=y_res1.index)


        ###Save outputs from this step (models and all mod. perf.)

        #models
        for key in dict_elnet_model.keys():
            joblib.dump(dict_elnet_model[key], (path_out+'/'+str(key)+'_elnet_model.sav'))

        #model performance
        df_tasks.to_csv(path_out+'/1level_train_perf_elnet.csv')

        #list of first level targets (observed and predicted)
        df_y_pred1.to_csv(path_out+'/1level_train_y_pred_singleML.csv')







        ### 2st level ################################################################################
        print(' ')
        print('start 2nd level ', datetime.now())

        #### L2 Testing single ML models on 40pr of training (in no 60/40 splitting it just repeats previous)###################

        print('Checking single ML on train 40prc data ', datetime.now())

        #controlling  with sorting to index_test

        #reindex y (target) 
        y_res2 = y.reindex(index=train_index)

        #control modalities
        features_res2 = {}
        for key in features.keys():
            print('controlling ', key, datetime.now())

            features_res2[key] = re_control_features(features[key], control, y_res2.index, 
                                                     std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])

        ###standartize before model and keep std models
        #features
        for key in features_res2.keys():
            print('standartize ', key, datetime.now())
            features_res2[key] = pd.DataFrame(std_models_features[key].transform(features_res2[key].values),
                                              index=features_res2[key].index, 
                                              columns=features_res2[key].columns) 

        #save features table before PCA
        y_res2.to_csv(path_out+'/target_y_train2.csv')
        for key in features_res2.keys():
            features_res2[key].to_csv(path_out+'/'+str(key)+'_train2.csv')            


        #PCA models to rest and task FC
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('reduction ', key, datetime.now())
            features_res2[key] = pd.DataFrame(PCA_models[key].transform(features_res2[key].values),
                                              index=features_res2[key].index)


        #apply new std to PCA features again
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('standartize PCA ', key, datetime.now())
            features_res2[key] = pd.DataFrame(std_PC_feature_models[key].transform(features_res2[key].values),
                                              index=features_res2[key].index, 
                                              columns=features_res2[key].columns)
            #save std pc table
            features_res2[key].to_csv(path_out+'/'+key+'_PCA75_train2.csv')


        #apply trained single models ElasticNet to new subset

        dict_y_pred2={}
        dict_y_pred2_per={}
        for key in list(features_res2.keys()):
            y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(features_res2[key], y_res2, dict_elnet_model[key]) #ML
            dict_y_pred2[key] = y_pred
            dict_y_pred2_per[key] = bacc, mse, mae, corr

        df_y_pred2 = pd.DataFrame(dict_y_pred2, index=ind_y)
        df_y_pred2_per = pd.DataFrame(dict_y_pred2_per, index=['best score r2', 'mse', 'mae','corr'])


        ###Save outputs from this step (models and all mod. perf.)

        #model performance
        df_y_pred2_per.to_csv(path_out+'/2level_test1_perf_elnet.csv')

        #list of first level targets (observed and predicted)
        df_y_pred2.to_csv(path_out+'/2level_test1_y_pred_singleML.csv')   



        #### L2 Calculating stacked ML models on 40prc #############################################

        print('Calculating stacked ML on train 40prc data ', datetime.now())    


        #identifying sets for several stacked models
        set2 = ['carit1', 'carit3', 'carit4', 'face1', 'face2', 'face3', 'face4', 'face5', 'face6', 'vism']
        set3 = ['cort', 'subc', 'surf', 'rest', 'VolBrain']

        set4 = ['carit_FC', 'face_FC', 'vism_FC']
        set5 = ['carit1', 'carit3', 'carit4', 'face1', 'face2', 'face3', 'face4', 'face5', 'face6', 'vism', 'carit_FC', 'face_FC', 'vism_FC']
        set6 = ['carit1', 'carit3', 'carit4', 'face1', 'face2', 'face3', 'face4', 'face5', 'face6', 'vism', 'cort', 'subc', 'surf', 'rest', 'VolBrain']
        set7 = ['carit_FC', 'face_FC', 'vism_FC', 'cort', 'subc', 'surf', 'rest', 'VolBrain']
        set8 = ['carit_FC', 'face_FC', 'vism_FC', 'rest']

        set1 = list(df_y_pred2.columns) #all existed modalities

        #for presetet sets
        dict_st_perf1={}
        dict_st_models={}
        dict_st_ypred1={}
        dct_std_mod_for_stack = {} #
        dct_std_tab_for_stack = {} #
        dct_std_tab_before_for_stack = {} #

        s=1
        for set_n in [set1, set2, set3, set4, set5, set6, set7, set8]:
            print('set '+str(s), datetime.now())

            st_features = df_y_pred2.loc[:,set_n]
            dct_std_tab_before_for_stack['set'+str(s)] = st_features #

            stack_std_model = StandardScaler().fit(st_features.values) 
            dct_std_mod_for_stack['set'+str(s)] = stack_std_model #

            std_st_features = pd.DataFrame(stack_std_model.transform(st_features.values), 
                                           index=st_features.index, columns=st_features.columns) 
            dct_std_tab_for_stack['set'+str(s)] = std_st_features #



            bpar1, bpar2, acc, mse, corr, model, y_pred3, mae = elnet(std_st_features, y_res2) #ML

            dict_st_perf1['set'+str(s)] = acc, mse, mae, corr, bpar1, bpar2 
            dict_st_models['set'+str(s)] = model
            dict_st_ypred1['set'+str(s)] = y_pred3
            s+=1

        df_st_perf1 = pd.DataFrame(dict_st_perf1, index=['best score r2', 'mse', 'mae','corr', 'best alpha', 'best l1_ratio'])
        df_st_ypred1 = pd.DataFrame(dict_st_ypred1, index=y_res2.index)        

        ###Save outputs from this step (models and all mod. perf.)

        #models
        for key in dict_st_models.keys():
            joblib.dump(dict_st_models[key], (path_out+'/'+str(key)+'_stacked_model.sav'))
        for key in dct_std_mod_for_stack.keys():
            joblib.dump(dct_std_mod_for_stack[key], (path_out+'/'+str(key)+'_stacked_STD_model.sav'))

        #performance and prediction
        df_st_perf1.to_csv(path_out+'/2level_test1_perf_stacked.csv')
        df_st_ypred1.to_csv(path_out+'/2level_test1_y_pred_stacked.csv')
        for key in dct_std_tab_for_stack.keys():
            dct_std_tab_for_stack[key].to_csv(path_out+'/2level_stack_y_feature_tab_STD.csv')
            dct_std_tab_before_for_stack[key].to_csv(path_out+'/2level_stack_y_feature_tab_beforeSTD.csv')


        ### 3rd level ################################################################################
        print(' ')
        print('start 3rd level ', datetime.now())


        #### L3 Testing single ML models on test_index #############################################

        print('Checking single ML on test data ', datetime.now())

        #controlling with sorting to test index

        #control y (target)
        y_res3 = y.reindex(index=test_index)

        #control modalities
        features_res3 = {}
        for key in features.keys():
            print('controlling ', key, datetime.now())

            features_res3[key] = re_control_features(features[key], control, y_res3.index, 
                                                     std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])

        ###standartize before model and keep std models
        #features
        for key in features_res3.keys():
            print('standartize ', key, datetime.now())
            features_res3[key] = pd.DataFrame(std_models_features[key].transform(features_res3[key].values),
                                              index=features_res3[key].index, 
                                              columns=features_res3[key].columns)
            

        #save features table before PCA
        y_res3.to_csv(path_out+'/target_y_test.csv')
        for key in features_res3.keys():
            features_res3[key].to_csv(path_out+'/'+str(key)+'_test.csv')            


        #PCA models to rest and task FC
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('reduction ', key, datetime.now())
            features_res3[key] = pd.DataFrame(PCA_models[key].transform(features_res3[key].values),
                                              index=features_res3[key].index)


        #apply new std to PCA features again
        for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
            print('standartize PCA ', key, datetime.now())
            features_res3[key] = pd.DataFrame(std_PC_feature_models[key].transform(features_res3[key].values),
                                              index=features_res3[key].index, 
                                              columns=features_res3[key].columns)
            #save std pc table
            features_res3[key].to_csv(path_out+'/'+key+'_PCA75_test.csv')



        #apply trained single models ElasticNet to new subset

        dict_y_pred3={}
        dict_y_pred3_per={}
        for key in list(features_res3.keys()):
            y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(features_res3[key], y_res3, dict_elnet_model[key]) #ML
            dict_y_pred3[key] = y_pred
            dict_y_pred3_per[key] = bacc, mse, mae, corr

        df_y_pred3 = pd.DataFrame(dict_y_pred3, index=ind_y)
        df_y_pred3_per = pd.DataFrame(dict_y_pred3_per, index=['best score r2', 'mse', 'mae','corr'])


        ###Save outputs from this step (models and all mod. perf.)

        #model performance
        df_y_pred3_per.to_csv(path_out+'/3level_test2_perf_elnet.csv')

        #list of first level targets (observed and predicted)
        df_y_pred3.to_csv(path_out+'/3level_test2_y_pred_singleML.csv')        


        #### L3 Testing stacked ML models on test_index #############################################

        print('Calculating stacked ML on test2 data ', datetime.now()) 

        #apply trained stacked models ElasticNet to new data , test_index

        #for presetet sets
        dict_st_perf2={}
        dict_st_ypred2={}

        dct_std3_tab_for_stack = {} #
        dct_std3_tab_before_for_stack = {} #

        s=1
        for set_n in [set1, set2, set3, set4, set5, set6, set7, set8]:

            ftrs = df_y_pred3.loc[:, set_n]
            dct_std3_tab_before_for_stack['set'+str(s)] = ftrs

            std_ftrs = pd.DataFrame(dct_std_mod_for_stack['set'+str(s)].transform(ftrs.values), 
                                    index=ftrs.index,columns=ftrs.columns)
            dct_std3_tab_for_stack['set'+str(s)] = std_ftrs

            y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(std_ftrs, y_res3, dict_st_models[('set'+str(s))]) #ML
            dict_st_ypred2[('set'+str(s))] = y_pred
            dict_st_perf2[('set'+str(s))] = bacc, mse, mae, corr
            s+=1

        df_st_ypred2 = pd.DataFrame(dict_st_ypred2, index=ind_y)
        df_st_perf2 = pd.DataFrame(dict_st_perf2, index=['best score r2', 'mse', 'mae','corr'])        

        ###Save outputs from this step (models and all mod. perf.)

        #performance and prediction
        df_st_perf2.to_csv(path_out+'/3level_test2_perf_stacked.csv')
        df_st_ypred2.to_csv(path_out+'/3level_test2_y_pred_stacked.csv') 
        for key in dct_std3_tab_for_stack.keys():
            dct_std3_tab_for_stack[key].to_csv(path_out+'/3level_stack_y_feature_tab_STD.csv')
            dct_std3_tab_before_for_stack[key].to_csv(path_out+'/3level_stack_y_feature_tab_beforeSTD.csv')

        print(' ')
        print('finished to calculate the Fold #', i)
        print(datetime.now())

        i+=1

    print(' ')
    print('finished the MODEL '+COL)
    print(datetime.now())

nih_totalcogcomp_unadjusted
 
started to calculate the Fold # 0
2022-12-07 18:44:50.636494
 
start 1st level  2022-12-07 18:44:50.638914
controlling  carit1 2022-12-07 18:44:50.639435
controlling  carit3 2022-12-07 18:44:50.978467
controlling  carit4 2022-12-07 18:44:51.255432
controlling  face1 2022-12-07 18:44:51.535400
controlling  face2 2022-12-07 18:44:51.818921
controlling  face3 2022-12-07 18:44:52.099681
controlling  face4 2022-12-07 18:44:52.349802
controlling  face5 2022-12-07 18:44:52.587039
controlling  face6 2022-12-07 18:44:52.823596
controlling  vism 2022-12-07 18:44:53.061072
controlling  carit_FC 2022-12-07 18:44:53.298635
controlling  face_FC 2022-12-07 18:45:41.304875
controlling  vism_FC 2022-12-07 18:46:28.964811
controlling  cort 2022-12-07 18:47:17.282011
controlling  surf 2022-12-07 18:47:17.403323
controlling  subc 2022-12-07 18:47:17.503104
controlling  VolBrain 2022-12-07 18:47:17.516345
controlling  rest 2022-12-07 18:47:17.520365
standartize  carit1 2022-12

controlling  face6 2022-12-07 19:06:07.011368
controlling  vism 2022-12-07 19:06:07.256155
controlling  carit_FC 2022-12-07 19:06:07.498633
controlling  face_FC 2022-12-07 19:06:56.008293
controlling  vism_FC 2022-12-07 19:07:45.264971
controlling  cort 2022-12-07 19:08:35.234901
controlling  surf 2022-12-07 19:08:35.367750
controlling  subc 2022-12-07 19:08:35.470931
controlling  VolBrain 2022-12-07 19:08:35.484622
controlling  rest 2022-12-07 19:08:35.488777
standartize  carit1 2022-12-07 19:11:07.618238
standartize  carit3 2022-12-07 19:11:07.626719
standartize  carit4 2022-12-07 19:11:07.629000
standartize  face1 2022-12-07 19:11:07.630488
standartize  face2 2022-12-07 19:11:07.631648
standartize  face3 2022-12-07 19:11:07.632809
standartize  face4 2022-12-07 19:11:07.633968
standartize  face5 2022-12-07 19:11:07.635134
standartize  face6 2022-12-07 19:11:07.636306
standartize  vism 2022-12-07 19:11:07.637476
standartize  carit_FC 2022-12-07 19:11:07.638653
standartize  face_FC 202

standartize  face_FC 2022-12-07 19:32:53.745895
standartize  vism_FC 2022-12-07 19:32:53.992008
standartize  cort 2022-12-07 19:32:54.234729
standartize  surf 2022-12-07 19:32:54.236016
standartize  subc 2022-12-07 19:32:54.236838
standartize  VolBrain 2022-12-07 19:32:54.237424
standartize  rest 2022-12-07 19:32:54.238009
reduction  rest 2022-12-07 19:37:52.931354
reduction  carit_FC 2022-12-07 19:37:54.282330
reduction  face_FC 2022-12-07 19:37:55.691254
reduction  vism_FC 2022-12-07 19:37:57.028678
standartize PC table  rest 2022-12-07 19:37:58.803336
standartize PC table  carit_FC 2022-12-07 19:37:58.843764
standartize PC table  face_FC 2022-12-07 19:37:58.883139
standartize PC table  vism_FC 2022-12-07 19:37:58.922142
start  carit1 2022-12-07 19:37:59.014598
start  carit3 2022-12-07 19:38:11.205059
start  carit4 2022-12-07 19:38:20.864704
start  face1 2022-12-07 19:38:30.844819
start  face2 2022-12-07 19:38:40.791598
start  face3 2022-12-07 19:38:50.386853
start  face4 2022-12-07 

standartize PC table  rest 2022-12-07 20:00:52.437518
standartize PC table  carit_FC 2022-12-07 20:00:52.480743
standartize PC table  face_FC 2022-12-07 20:00:52.515371
standartize PC table  vism_FC 2022-12-07 20:00:52.552128
start  carit1 2022-12-07 20:00:52.595170
start  carit3 2022-12-07 20:01:03.940292
start  carit4 2022-12-07 20:01:13.754960
start  face1 2022-12-07 20:01:24.894248
start  face2 2022-12-07 20:01:36.040013
start  face3 2022-12-07 20:01:45.486741
start  face4 2022-12-07 20:01:57.052398
start  face5 2022-12-07 20:02:07.795409
start  face6 2022-12-07 20:02:17.480768
start  vism 2022-12-07 20:02:27.181160
start  carit_FC 2022-12-07 20:02:37.164661
start  face_FC 2022-12-07 20:02:45.524981
start  vism_FC 2022-12-07 20:02:51.759778
start  cort 2022-12-07 20:02:57.955714
start  surf 2022-12-07 20:03:06.484235
start  subc 2022-12-07 20:03:13.133930
start  VolBrain 2022-12-07 20:03:20.183348
start  rest 2022-12-07 20:03:27.635022
 
start 2nd level  2022-12-07 20:03:33.819892


start  face5 2022-12-07 20:24:00.488699
start  face6 2022-12-07 20:24:09.794039
start  vism 2022-12-07 20:24:19.587827
start  carit_FC 2022-12-07 20:24:29.880487
start  face_FC 2022-12-07 20:24:35.911879
start  vism_FC 2022-12-07 20:24:41.862125
start  cort 2022-12-07 20:24:47.689289
start  surf 2022-12-07 20:24:53.852496
start  subc 2022-12-07 20:25:00.230449
start  VolBrain 2022-12-07 20:25:07.257721
start  rest 2022-12-07 20:25:14.824991
 
start 2nd level  2022-12-07 20:25:20.953454
Checking single ML on test1 data  2022-12-07 20:25:20.953480
controlling  carit1 2022-12-07 20:25:20.954213
controlling  carit3 2022-12-07 20:25:21.025025
controlling  carit4 2022-12-07 20:25:21.090187
controlling  face1 2022-12-07 20:25:21.156109
controlling  face2 2022-12-07 20:25:21.222231
controlling  face3 2022-12-07 20:25:21.287378
controlling  face4 2022-12-07 20:25:21.352504
controlling  face5 2022-12-07 20:25:21.418923
controlling  face6 2022-12-07 20:25:21.484078
controlling  vism 2022-12-07 20

start  rest 2022-12-07 20:47:59.832893
 
start 2nd level  2022-12-07 20:48:06.234501
Checking single ML on test1 data  2022-12-07 20:48:06.234536
controlling  carit1 2022-12-07 20:48:06.235324
controlling  carit3 2022-12-07 20:48:06.320853
controlling  carit4 2022-12-07 20:48:06.404560
controlling  face1 2022-12-07 20:48:06.489106
controlling  face2 2022-12-07 20:48:06.570781
controlling  face3 2022-12-07 20:48:06.652542
controlling  face4 2022-12-07 20:48:06.736698
controlling  face5 2022-12-07 20:48:06.819746
controlling  face6 2022-12-07 20:48:06.901614
controlling  vism 2022-12-07 20:48:06.985021
controlling  carit_FC 2022-12-07 20:48:07.066816
controlling  face_FC 2022-12-07 20:48:22.099295
controlling  vism_FC 2022-12-07 20:48:36.931409
controlling  cort 2022-12-07 20:48:50.746071
controlling  surf 2022-12-07 20:48:50.794541
controlling  subc 2022-12-07 20:48:50.821134
controlling  VolBrain 2022-12-07 20:48:50.825143
controlling  rest 2022-12-07 20:48:50.826752
standartize  carit

controlling  vism 2022-12-07 21:10:36.750137
controlling  carit_FC 2022-12-07 21:10:36.836047
controlling  face_FC 2022-12-07 21:10:55.186689
controlling  vism_FC 2022-12-07 21:11:14.074655
controlling  cort 2022-12-07 21:11:32.574638
controlling  surf 2022-12-07 21:11:32.632276
controlling  subc 2022-12-07 21:11:32.666942
controlling  VolBrain 2022-12-07 21:11:32.672007
controlling  rest 2022-12-07 21:11:32.673928
standartize  carit1 2022-12-07 21:11:51.134977
standartize  carit3 2022-12-07 21:11:51.141355
standartize  carit4 2022-12-07 21:11:51.147436
standartize  face1 2022-12-07 21:11:51.153807
standartize  face2 2022-12-07 21:11:51.159888
standartize  face3 2022-12-07 21:11:51.162699
standartize  face4 2022-12-07 21:11:51.163740
standartize  face5 2022-12-07 21:11:51.164591
standartize  face6 2022-12-07 21:11:51.165458
standartize  vism 2022-12-07 21:11:51.166327
standartize  carit_FC 2022-12-07 21:11:51.167210
standartize  face_FC 2022-12-07 21:11:51.282572
standartize  vism_FC 2

standartize  vism_FC 2022-12-07 21:33:56.040588
standartize  cort 2022-12-07 21:33:56.134562
standartize  surf 2022-12-07 21:33:56.135340
standartize  subc 2022-12-07 21:33:56.135896
standartize  VolBrain 2022-12-07 21:33:56.136322
standartize  rest 2022-12-07 21:33:56.136695
reduction  rest 2022-12-07 21:38:46.988731
reduction  carit_FC 2022-12-07 21:38:47.146704
reduction  face_FC 2022-12-07 21:38:47.272499
reduction  vism_FC 2022-12-07 21:38:47.376434
standartize PCA  rest 2022-12-07 21:38:47.479860
standartize PCA  carit_FC 2022-12-07 21:38:47.595863
standartize PCA  face_FC 2022-12-07 21:38:47.632626
standartize PCA  vism_FC 2022-12-07 21:38:47.667867
Calculating stacked ML on test1 data  2022-12-07 21:38:47.734566
set 1 2022-12-07 21:38:47.735156
set 2 2022-12-07 21:38:56.236215
set 3 2022-12-07 21:39:03.791735
set 4 2022-12-07 21:39:11.646535
set 5 2022-12-07 21:39:18.957145
set 6 2022-12-07 21:39:26.852349
set 7 2022-12-07 21:39:32.751912
set 8 2022-12-07 21:39:38.450228
 
star

set 2 2022-12-07 22:00:04.378460
set 3 2022-12-07 22:00:10.757497
set 4 2022-12-07 22:00:18.790199
set 5 2022-12-07 22:00:26.632484
set 6 2022-12-07 22:00:34.614580
set 7 2022-12-07 22:00:41.040153
set 8 2022-12-07 22:00:46.705029
 
start 3rd level  2022-12-07 22:00:53.336950
Checking single ML on test2 data  2022-12-07 22:00:53.337384
controlling  carit1 2022-12-07 22:00:53.338246
controlling  carit3 2022-12-07 22:00:53.407229
controlling  carit4 2022-12-07 22:00:53.470116
controlling  face1 2022-12-07 22:00:53.534867
controlling  face2 2022-12-07 22:00:53.599323
controlling  face3 2022-12-07 22:00:53.663843
controlling  face4 2022-12-07 22:00:53.726653
controlling  face5 2022-12-07 22:00:53.790030
controlling  face6 2022-12-07 22:00:53.855213
controlling  vism 2022-12-07 22:00:53.917992
controlling  carit_FC 2022-12-07 22:00:53.982621
controlling  face_FC 2022-12-07 22:01:07.855660
controlling  vism_FC 2022-12-07 22:01:21.437003
controlling  cort 2022-12-07 22:01:35.790189
controllin

controlling  face2 2022-12-07 22:22:11.782363
controlling  face3 2022-12-07 22:22:11.845299
controlling  face4 2022-12-07 22:22:11.907761
controlling  face5 2022-12-07 22:22:11.969994
controlling  face6 2022-12-07 22:22:12.032754
controlling  vism 2022-12-07 22:22:12.095355
controlling  carit_FC 2022-12-07 22:22:12.158314
controlling  face_FC 2022-12-07 22:22:26.464675
controlling  vism_FC 2022-12-07 22:22:39.792118
controlling  cort 2022-12-07 22:22:53.992703
controlling  surf 2022-12-07 22:22:54.039070
controlling  subc 2022-12-07 22:22:54.065759
controlling  VolBrain 2022-12-07 22:22:54.069734
controlling  rest 2022-12-07 22:22:54.071138
standartize  carit1 2022-12-07 22:23:07.910395
standartize  carit3 2022-12-07 22:23:07.916237
standartize  carit4 2022-12-07 22:23:07.921570
standartize  face1 2022-12-07 22:23:07.926705
standartize  face2 2022-12-07 22:23:07.931952
standartize  face3 2022-12-07 22:23:07.932764
standartize  face4 2022-12-07 22:23:07.933253
standartize  face5 2022-12

controlling  cort 2022-12-07 22:44:09.780861
controlling  surf 2022-12-07 22:44:09.827798
controlling  subc 2022-12-07 22:44:09.854180
controlling  VolBrain 2022-12-07 22:44:09.858121
controlling  rest 2022-12-07 22:44:09.859606
standartize  carit1 2022-12-07 22:44:23.657910
standartize  carit3 2022-12-07 22:44:23.663863
standartize  carit4 2022-12-07 22:44:23.669200
standartize  face1 2022-12-07 22:44:23.674603
standartize  face2 2022-12-07 22:44:23.679746
standartize  face3 2022-12-07 22:44:23.680473
standartize  face4 2022-12-07 22:44:23.680938
standartize  face5 2022-12-07 22:44:23.681416
standartize  face6 2022-12-07 22:44:23.681919
standartize  vism 2022-12-07 22:44:23.682368
standartize  carit_FC 2022-12-07 22:44:23.682831
standartize  face_FC 2022-12-07 22:44:23.709635
standartize  vism_FC 2022-12-07 22:44:23.734967
standartize  cort 2022-12-07 22:44:23.759562
standartize  surf 2022-12-07 22:44:23.760207
standartize  subc 2022-12-07 22:44:23.760628
standartize  VolBrain 2022-12

reduction  rest 2022-12-07 23:06:56.940976
reduction  carit_FC 2022-12-07 23:06:57.010612
reduction  face_FC 2022-12-07 23:06:57.073728
reduction  vism_FC 2022-12-07 23:06:57.129503
standartize PCA  rest 2022-12-07 23:06:57.181242
standartize PCA  carit_FC 2022-12-07 23:06:57.197736
standartize PCA  face_FC 2022-12-07 23:06:57.211159
standartize PCA  vism_FC 2022-12-07 23:06:57.224055
Calculating stacked ML on test2 data  2022-12-07 23:06:57.262559
 
finished to calculate the Fold # 1
2022-12-07 23:06:57.354398
 
started to calculate the Fold # 2
2022-12-07 23:06:57.354860
 
start 1st level  2022-12-07 23:06:57.356537
controlling  carit1 2022-12-07 23:06:57.753024
controlling  carit3 2022-12-07 23:06:58.329154
controlling  carit4 2022-12-07 23:06:58.581045
controlling  face1 2022-12-07 23:06:58.831456
controlling  face2 2022-12-07 23:06:59.077874
controlling  face3 2022-12-07 23:06:59.323973
controlling  face4 2022-12-07 23:06:59.573829
controlling  face5 2022-12-07 23:06:59.821349
con

controlling  carit1 2022-12-07 23:28:11.355742
controlling  carit3 2022-12-07 23:28:11.846863
controlling  carit4 2022-12-07 23:28:12.098588
controlling  face1 2022-12-07 23:28:12.350914
controlling  face2 2022-12-07 23:28:12.604339
controlling  face3 2022-12-07 23:28:12.875421
controlling  face4 2022-12-07 23:28:13.152836
controlling  face5 2022-12-07 23:28:13.414159
controlling  face6 2022-12-07 23:28:13.675110
controlling  vism 2022-12-07 23:28:13.936251
controlling  carit_FC 2022-12-07 23:28:14.199149
controlling  face_FC 2022-12-07 23:29:04.441177
controlling  vism_FC 2022-12-07 23:29:54.761260
controlling  cort 2022-12-07 23:30:44.041737
controlling  surf 2022-12-07 23:30:44.215642
controlling  subc 2022-12-07 23:30:44.342535
controlling  VolBrain 2022-12-07 23:30:44.358323
controlling  rest 2022-12-07 23:30:44.362985
standartize  carit1 2022-12-07 23:33:12.471414
standartize  carit3 2022-12-07 23:33:12.479260
standartize  carit4 2022-12-07 23:33:12.480859
standartize  face1 2022

controlling  face_FC 2022-12-07 23:50:23.375291
controlling  vism_FC 2022-12-07 23:51:12.358329
controlling  cort 2022-12-07 23:52:00.890656
controlling  surf 2022-12-07 23:52:01.051759
controlling  subc 2022-12-07 23:52:01.176073
controlling  VolBrain 2022-12-07 23:52:01.192122
controlling  rest 2022-12-07 23:52:01.197086
standartize  carit1 2022-12-07 23:54:31.582079
standartize  carit3 2022-12-07 23:54:31.590392
standartize  carit4 2022-12-07 23:54:31.592068
standartize  face1 2022-12-07 23:54:31.593767
standartize  face2 2022-12-07 23:54:31.595426
standartize  face3 2022-12-07 23:54:31.597030
standartize  face4 2022-12-07 23:54:31.598638
standartize  face5 2022-12-07 23:54:31.600254
standartize  face6 2022-12-07 23:54:31.601791
standartize  vism 2022-12-07 23:54:31.603481
standartize  carit_FC 2022-12-07 23:54:31.605071
standartize  face_FC 2022-12-07 23:54:31.840330
standartize  vism_FC 2022-12-07 23:54:32.069125
standartize  cort 2022-12-07 23:54:32.297029
standartize  surf 2022-