# Stacked Machine Learning

In [1]:
## IMPORTANT !

# In the first order need to set the number of CPU 
# for calculation before launching (depends on computer's number of cores)
n_jobs= 10

### Load libraries

In [2]:
#libraries
import pandas as pd
import numpy as np
import os
import sys
import shutil
import glob
import joblib
import warnings
from datetime import date, datetime

from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import LeavePGroupsOut
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import pearsonr
import scipy.stats as st

from nilearn import image as nli
from nilearn import plotting

from mne.viz import plot_connectivity_circle

### Load functions

In [3]:
def control_features(table_in, control, index): 
    #table_in should be a table of features, where rows - subjects, columns - features
    
    if len(table_in.values.shape) == 1: #for pd.Series # for target
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index
        
        #loop
        dct_table = {}
        dct_lin_models ={}
        dct_std_y_models ={}
        
        col='0'
        
        y = table_in #target, brain ROI
        X = control  #features, like age, sex and/or movements

        #Standartize target
        std_model_y = StandardScaler()
        std_model_y.fit(y.values.reshape(-1, 1))
        y = std_model_y.transform(y.values.reshape(-1, 1))
        
        #reshaping data
        if len(X.values.shape) == 1:
            X = X.values.reshape(-1, 1)
        else:
            X = X.values
        y = y.reshape(-1, 1).ravel()
        
        #Standartize X
        std_model = StandardScaler()
        std_model.fit(X)
        X = std_model.transform(X)

        #Fit to the training set
        model = LinearRegression()
        model.fit(X, y)
        y_pred = model.predict(X)

        y_res = y - y_pred

        dct_table[col] = y_res
        dct_lin_models[col] = model
        dct_std_y_models[col] = std_model_y

        df_table = pd.DataFrame(dct_table, index = ind)

        
    else:
            
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index

        #loop
        dct_table = {}
        dct_lin_models ={}
        dct_std_y_models ={}
        col_names = table_in.columns

        for col in col_names:
            y = table_in[col] #target, brain ROI
            X = control  #features, like age, sex and/or movements
            
            #Standartize target
            std_model_y = StandardScaler()
            std_model_y.fit(y.values.reshape(-1, 1))
            y = std_model_y.transform(y.values.reshape(-1, 1)) 
            
            #reshaping data
            if len(X.values.shape) == 1:
                X = X.values.reshape(-1, 1)
            else:
                X = X.values
            y = y.reshape(-1, 1).ravel()
            
            #Standartize X
            std_model = StandardScaler()
            std_model.fit(X)
            X = std_model.transform(X)

            #Fit to the training set
            model = LinearRegression()
            model.fit(X, y)
            y_pred = model.predict(X)

            y_res = y - y_pred

            dct_table[col] = y_res
            dct_lin_models[col] = model
            dct_std_y_models[col] = std_model_y

        df_table = pd.DataFrame(dct_table, index = ind)
    
    return df_table, dct_std_y_models, std_model, dct_lin_models

In [4]:
def re_control_features(table_in, control, index, dct_std_y_models, std_model, dct_lin_models):
    
    if len(table_in.values.shape) == 1: #for pd.Series # for target
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index
        
        #loop
        dct_table = {}
        
        col='0'
        
        y = table_in #target, brain ROI
        X = control  #features, like age, sex and/or movements
        
        #standartize y
        y = dct_std_y_models[col].transform(y.values.reshape(-1, 1))
        
        #reshaping data
        if len(X.values.shape) == 1:
            X = X.values.reshape(-1, 1)
        else:
            X = X.values
        y = y.reshape(-1, 1).ravel()

        #Standartize X with previous std model
        X = std_model.transform(X)

        #Fit with previous LinReg model
        y_pred =  dct_lin_models[col].predict(X)

        y_res = y - y_pred

        dct_table[col] = y_res

        df_table = pd.DataFrame(dct_table, index = ind)
        
    else:
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index

        #loop
        dct_table = {}
        col_names = table_in.columns

        for col in col_names:
            y = table_in[col] #target, brain ROI
            X = control  #features, like age, sex and/or movements

            #standartize y
            y = dct_std_y_models[col].transform(y.values.reshape(-1, 1))
            
            #reshaping data
            if len(X.values.shape) == 1:
                X = X.values.reshape(-1, 1)
            else:
                X = X.values
            y = y.reshape(-1, 1).ravel()

            #Standartize X with previous std model
            X = std_model.transform(X)

            #Fit with previous LinReg model
            y_pred =  dct_lin_models[col].predict(X)

            y_res = y - y_pred

            dct_table[col] = y_res

        df_table = pd.DataFrame(dct_table, index = ind)
        
    return df_table

In [5]:
def elnet(X, y):

    #drop Nan in target and clean this subj from features
    y = y.dropna()
    X = X.loc[y.index,:]
    ind_y = np.array(y.index)
      
    y_real=y
    
    #reshaping data
    X = X.values
    y = y.values.reshape(-1, 1).ravel()
    
    # Setup the pipeline steps:
    steps = [('elasticnet', ElasticNet(random_state=42))]

    # Create the pipeline: pipeline 
    pipeline = Pipeline(steps)

    # Specify the hyperparameter space
    parameters = {'elasticnet__alpha': np.logspace(-1, 2, 70),
                  'elasticnet__l1_ratio':np.linspace(0,1,25)}

    # Create the GridSearchCV object:
    gm_cv = GridSearchCV(pipeline, parameters, cv=5, n_jobs=n_jobs)
    
    # Fit to the training set
    gm_cv.fit(X, y)
    
    #predict new y
    y_pred = gm_cv.predict(X)

    # Compute and print the metrics
    acc = gm_cv.best_score_
    bpar = gm_cv.best_params_
    model = gm_cv.best_estimator_
    mse = mean_squared_error(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred)
    corr, _ = pearsonr(np.array(y_real.values.reshape(-1, 1).ravel(), dtype=float), np.array(y_pred, dtype=float))
            
    return bpar['elasticnet__alpha'], bpar['elasticnet__l1_ratio'], acc, mse, corr, model, y_pred, mae

In [6]:
def reaply_ElNet(X, y, model):
    # param should be pd.Series with indexes from model
    
    #drop Nan in target and clean this subj from features
    y = y.dropna()
    X = X.reindex(index =y.index)
    ind_y = np.array(y.index)  # indexes as separate variable 
    
    y_real = y

    #reshaping data
    X = X.values
    y = y.values.reshape(-1, 1).ravel()
    
    #predict new y
    y_pred = model.predict(X)
    
    # Compute and print the metrics
    bacc = model.score(X, y)
    mse = mean_squared_error(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred) 
    corr, _ = pearsonr(np.array(y_real.values.reshape(-1, 1).ravel(), dtype=float), np.array(y_pred, dtype=float))
    
    return y_pred, y_real, ind_y, bacc, mse, corr, mae

### Path to the tables folder

In [7]:
path='/media/data/HCPAging/data/MLTablesMultCon/'

### Load tables

In [8]:
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

#demography
demo = pd.read_csv(path+'demography.csv', index_col=0)

#targets table
targ = pd.read_csv(path+'cognition.csv', index_col=0)

#features tables as dictionary
features = {
    'carit1':pd.read_csv(path+'carit-con1.csv', index_col=0),
    'carit3':pd.read_csv(path+'carit-con3.csv', index_col=0),
    'carit4':pd.read_csv(path+'carit-con4.csv', index_col=0),
    
    'face1':pd.read_csv(path+'FACENAME_group_table_3EV_con1.csv', index_col=0),
    'face2':pd.read_csv(path+'FACENAME_group_table_3EV_con2.csv', index_col=0),
    'face3':pd.read_csv(path+'FACENAME_group_table_3EV_con3.csv', index_col=0),
    'face4':pd.read_csv(path+'FACENAME_group_table_3EV_con4.csv', index_col=0),
    'face5':pd.read_csv(path+'FACENAME_group_table_3EV_con5.csv', index_col=0),
    'face6':pd.read_csv(path+'FACENAME_group_table_3EV_con6.csv', index_col=0),
    
    'vism':pd.read_csv(path+'vism.csv', index_col=0),
    
    'carit_FC':pd.read_csv(path+'CARIT_taskFC.csv', index_col=0),
    'face_FC':pd.read_csv(path+'FACENAME_task_FC_3EV.csv', index_col=0),
    'vism_FC':pd.read_csv(path+'VISMOTOR_taskFC.csv', index_col=0),

    'cort':pd.read_csv(path+'cort.csv', index_col=0),
    'surf':pd.read_csv(path+'surf.csv', index_col=0),
    'subc':pd.read_csv(path+'subc.csv', index_col=0),
    'VolBrain':pd.read_csv(path+'VolBrain.csv', index_col=0),
    
    'rest':pd.read_csv(path+'rest_hpass.csv', index_col=0) 

}

#table with movements (mean relative displacement Movement_RelativeRMS_mean.txt)
movements = pd.read_csv(path+'movements.csv', index_col=0)



In [9]:
#create tables withcontroling parameters
sex_coded = pd.Series(LabelEncoder().fit_transform(demo.loc[:,['sex']]), index=demo.index, name='sex')

control = pd.DataFrame({'sex':sex_coded}) #

##### Leave-P-group out based on n-Fold CV

In [15]:
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore"
    
    
    

#for COL in targ.columns:
COL = 'interview_age'#'nih_fluidcogcomp_unadjusted'  #the script adapted to be launched on table of target variables. To launch in that way you need to uncomment for loop and comment this row with col variable
y = targ#targ[COL]

print(y.name)

###make folder for outputs
nmf=path+'output_5cv_sexAdj_noStdTarg_STDstackFeatures_'+y.name
os.mkdir(nmf)

i=0

group_kfold = GroupKFold(n_splits=5) #number of folds
for train_index, test_index in group_kfold.split(demo, groups=demo['family_user_def_id']): #based on families id

    print(' ')
    print('started to calculate the Fold #', i)
    print(datetime.now())
    print(' ')

    ###create directory for specific Fold
    os.mkdir(nmf+'/Fold_'+str(i)) 
    path_out = str(nmf+'/Fold_'+str(i))

    ###Global indices
    train_index = np.array(demo.iloc[train_index].index) #for training all models
    test_index = np.array(demo.iloc[test_index].index) #for final test
    
    ###Split global to local indices (in case if we need to split training part into two as previously)
    #index_train, index_test = train_test_split(train_index, test_size=0.4, random_state=42)
    #index_train = np.array(sorted(index_train)) #for training modalities models
    #index_test = np.array(sorted(index_test)) #for testing modalities and training stacking

    ### 1st level ################################################################################

    #### Calculations of single ML models on training index #################################### 

    print('start 1st level ', datetime.now())


    #reindex y (target)
    y_res1 = y.reindex(index=train_index)


    #control modalities
    features_res1 = {}
    std_feat_y_dct = {}
    std_feat_X_dct = {}
    linreg_feat_dct = {}
    for key in features.keys():
        print('controlling ', key, datetime.now())

        mod_res, std_f_y, std_f_X, linreg_f = control_features(features[key], control, y_res1.index)

        features_res1[key] = mod_res
        std_feat_y_dct[key] = std_f_y
        std_feat_X_dct[key] = std_f_X
        linreg_feat_dct[key] = linreg_f

    #save adjastment model
    os.mkdir(path_out+'/adjustment_models')
    #features model
    joblib.dump(std_feat_y_dct, (path_out+'/adjustment_models'+'/features_std_model_y.sav'))
    joblib.dump(std_feat_X_dct, (path_out+'/adjustment_models'+'/features_std_model_X.sav'))
    joblib.dump(linreg_feat_dct, (path_out+'/adjustment_models'+'/features_linreg.sav'))


    ###standartize before model and keep std models
    #features
    std_models_features = {}
    for key in features_res1.keys():
        print('standartize ', key, datetime.now())
        std_model = StandardScaler()
        std_model.fit(features_res1[key].values)
        features_res1[key] = pd.DataFrame(std_model.transform(features_res1[key].values),
                                          index=features_res1[key].index, 
                                          columns=features_res1[key].columns)
        std_models_features[key] = std_model
    
    
    #save 
    os.mkdir(path_out+'/standartization_models')
    #features
    joblib.dump(std_models_features,  (path_out+'/standartization_models'+'/features_std_model.sav'))


    #save features table before PCA
    y_res1.to_csv(path_out+'/target_y_train1.csv')
    for key in features_res1.keys():
        features_res1[key].to_csv(path_out+'/'+str(key)+'_train1.csv')


    #PCA models to rest and task FC
    PCA_models = {}
    for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
        print('reduction ', key, datetime.now())
        model_PCA =  PCA(n_components=75, random_state=11)
        model_PCA.fit(features_res1[key].values)
        features_res1[key] = pd.DataFrame(model_PCA.transform(features_res1[key].values), 
                                          index=features_res1[key].index)
        PCA_models[key] = model_PCA
    #save PCA models
    os.mkdir(path_out+'/PCA_models')
    joblib.dump(PCA_models,  (path_out+'/PCA_models'+'/PCA_model.sav'))


    #apply new std to PCA features again
    std_PC_feature_models = {}
    for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
        print('standartize PC table ', key, datetime.now())
        std_PC_model = StandardScaler()
        std_PC_model.fit(features_res1[key].values)
        features_res1[key] = pd.DataFrame(std_PC_model.transform(features_res1[key].values),
                                          index=features_res1[key].index, 
                                          columns=features_res1[key].columns)
        std_PC_feature_models[key] = std_PC_model
        #save PCA tables
        features_res1[key].to_csv(path_out+'/'+key+'_PCA75_train1.csv')
    
    #save std PCA models
    os.mkdir(path_out+'/PCA_standardization_models')
    joblib.dump(std_PC_feature_models,  (path_out+'/PCA_standardization_models'+'/std_PCA_model.sav'))




    #Launch ElasticNet for all task(modalities) on index_train (1st level)

    dict_tasks={}
    dict_elnet_model={}
    dict_ypred1={}

    for key in list(features_res1.keys()):

        print('start ', str(key), datetime.now())   #print start time of calculations

        bpar1, bpar2, acc, mse, corr, model, y_pred1, mae = elnet(features_res1[key], y_res1) #ML
        dict_tasks[key] = acc, mse, mae, corr, bpar1, bpar2 
        dict_elnet_model[key] = model
        dict_ypred1[key] = y_pred1
    df_tasks = pd.DataFrame(dict_tasks, index=['best score r2', 'mse', 'mae','corr', 'best alpha', 'best l1_ratio'])
    df_y_pred1 = pd.DataFrame(dict_ypred1, index=y_res1.index)


    ###Save outputs from this step (models and all mod. perf.)

    #models
    for key in dict_elnet_model.keys():
        joblib.dump(dict_elnet_model[key], (path_out+'/'+str(key)+'_elnet_model.sav'))

    #model performance
    df_tasks.to_csv(path_out+'/1level_train_perf_elnet.csv')

    #list of first level targets (observed and predicted)
    df_y_pred1.to_csv(path_out+'/1level_train_y_pred_singleML.csv')







    ### 2st level ################################################################################
    print(' ')
    print('start 2nd level ', datetime.now())

    #### L2 Testing single ML models on 40pr of training (in no 60/40 splitting it just repeats previous)###################

    print('Checking single ML on train 40prc data ', datetime.now())

    #controlling  with sorting to index_test

    #reindex y (target) 
    y_res2 = y.reindex(index=train_index) 

    #control modalities
    features_res2 = {}
    for key in features.keys():
        print('controlling ', key, datetime.now())

        features_res2[key] = re_control_features(features[key], control, y_res2.index, 
                                                 std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])

    ###standartize before model and keep std models
    #features
    for key in features_res2.keys():
        print('standartize ', key, datetime.now())
        features_res2[key] = pd.DataFrame(std_models_features[key].transform(features_res2[key].values),
                                          index=features_res2[key].index, 
                                          columns=features_res2[key].columns) 

    #save features table before PCA
    y_res2.to_csv(path_out+'/target_y_train2.csv')
    for key in features_res2.keys():
        features_res2[key].to_csv(path_out+'/'+str(key)+'_train2.csv')            


    #PCA models to rest and task FC
    for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
        print('reduction ', key, datetime.now())
        features_res2[key] = pd.DataFrame(PCA_models[key].transform(features_res2[key].values),
                                          index=features_res2[key].index)


    #apply new std to PCA features again
    for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
        print('standartize PCA ', key, datetime.now())
        features_res2[key] = pd.DataFrame(std_PC_feature_models[key].transform(features_res2[key].values),
                                          index=features_res2[key].index, 
                                          columns=features_res2[key].columns)
        #save std pc table
        features_res2[key].to_csv(path_out+'/'+key+'_PCA75_train2.csv')


    #apply trained single models ElasticNet to new subset

    dict_y_pred2={}
    dict_y_pred2_per={}
    for key in list(features_res2.keys()):
        y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(features_res2[key], y_res2, 
                                                                   dict_elnet_model[key]) #ML
        dict_y_pred2[key] = y_pred
        dict_y_pred2_per[key] = bacc, mse, mae, corr

    df_y_pred2 = pd.DataFrame(dict_y_pred2, index=ind_y)
    df_y_pred2_per = pd.DataFrame(dict_y_pred2_per, index=['best score r2', 'mse', 'mae','corr'])


    ###Save outputs from this step (models and all mod. perf.)

    #model performance
    df_y_pred2_per.to_csv(path_out+'/2level_test1_perf_elnet.csv')

    #list of first level targets (observed and predicted)
    df_y_pred2.to_csv(path_out+'/2level_test1_y_pred_singleML.csv')   



    #### L2 Calculating stacked ML models on 40prc #############################################

    print('Calculating stacked ML on train 40prc data ', datetime.now())    


    #identifying sets for several stacked models
    set2 = ['carit1', 'carit3', 'carit4', 'face1', 'face2', 'face3', 'face4', 'face5', 'face6', 'vism']
    set3 = ['cort', 'subc', 'surf', 'rest', 'VolBrain']

    set4 = ['carit_FC', 'face_FC', 'vism_FC']
    set5 = ['carit1', 'carit3', 'carit4', 'face1', 'face2', 'face3', 'face4', 'face5', 'face6', 'vism', 'carit_FC', 'face_FC', 'vism_FC']
    set6 = ['carit1', 'carit3', 'carit4', 'face1', 'face2', 'face3', 'face4', 'face5', 'face6', 'vism', 'cort', 'subc', 'surf', 'rest', 'VolBrain']
    set7 = ['carit_FC', 'face_FC', 'vism_FC', 'cort', 'subc', 'surf', 'rest', 'VolBrain']
    set8 = ['carit_FC', 'face_FC', 'vism_FC', 'rest']

    set1 = list(df_y_pred2.columns) #all existed modalities

    #for presetet sets
    dict_st_perf1={}
    dict_st_models={}
    dict_st_ypred1={}
    dct_std_mod_for_stack = {} #
    dct_std_tab_for_stack = {} #
    dct_std_tab_before_for_stack = {} #

    s=1
    for set_n in [set1, set2, set3, set4, set5, set6, set7, set8]:
        print('set '+str(s), datetime.now())
        
        #standardize stacking featue table and save std model for next subset
        st_features = df_y_pred2.loc[:,set_n]
        dct_std_tab_before_for_stack['set'+str(s)] = st_features #

        stack_std_model = StandardScaler().fit(st_features.values) 
        dct_std_mod_for_stack['set'+str(s)] = stack_std_model #

        std_st_features = pd.DataFrame(stack_std_model.transform(st_features.values), 
                                       index=st_features.index, columns=st_features.columns) 
        dct_std_tab_for_stack['set'+str(s)] = std_st_features #



        bpar1, bpar2, acc, mse, corr, model, y_pred3, mae = elnet(std_st_features, y_res2) #ML

        dict_st_perf1['set'+str(s)] = acc, mse, mae, corr, bpar1, bpar2 
        dict_st_models['set'+str(s)] = model
        dict_st_ypred1['set'+str(s)] = y_pred3
        s+=1

    df_st_perf1 = pd.DataFrame(dict_st_perf1, index=['best score r2', 'mse', 'mae','corr', 'best alpha', 'best l1_ratio'])
    df_st_ypred1 = pd.DataFrame(dict_st_ypred1, index=y_res2.index)        

    ###Save outputs from this step (models and all mod. perf.)

    #models
    for key in dict_st_models.keys():
        joblib.dump(dict_st_models[key], (path_out+'/'+str(key)+'_stacked_model.sav'))
    for key in dct_std_mod_for_stack.keys():
        joblib.dump(dct_std_mod_for_stack[key], (path_out+'/'+str(key)+'_stacked_STD_model.sav'))

    #performance and prediction
    df_st_perf1.to_csv(path_out+'/2level_test1_perf_stacked.csv')
    df_st_ypred1.to_csv(path_out+'/2level_test1_y_pred_stacked.csv')
    for key in dct_std_tab_for_stack.keys():
        dct_std_tab_for_stack[key].to_csv(path_out+'/2level_stack_y_feature_tab_STD.csv')
        dct_std_tab_before_for_stack[key].to_csv(path_out+'/2level_stack_y_feature_tab_beforeSTD.csv')


    ### 3rd level ################################################################################
    print(' ')
    print('start 3rd level ', datetime.now())


    #### L3 Testing single ML models on test_index #############################################

    print('Checking single ML on test data ', datetime.now())

    #controlling with sorting to test index

    #control y (target)
    y_res3 = y.reindex(index=test_index)

    #control modalities
    features_res3 = {}
    for key in features.keys():
        print('controlling ', key, datetime.now())

        features_res3[key] = re_control_features(features[key], control, y_res3.index, 
                                                 std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])

    ###standartize before model and keep std models
    #features
    for key in features_res3.keys():
        print('standartize ', key, datetime.now())
        features_res3[key] = pd.DataFrame(std_models_features[key].transform(features_res3[key].values),
                                          index=features_res3[key].index, 
                                          columns=features_res3[key].columns)


    #save features table before PCA
    y_res3.to_csv(path_out+'/target_y_test.csv')
    for key in features_res3.keys():
        features_res3[key].to_csv(path_out+'/'+str(key)+'_test.csv')            


    #PCA models to rest and task FC
    for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
        print('reduction ', key, datetime.now())
        features_res3[key] = pd.DataFrame(PCA_models[key].transform(features_res3[key].values),
                                          index=features_res3[key].index)


    #apply new std to PCA features again
    for key in ['rest', 'carit_FC', 'face_FC', 'vism_FC']:
        print('standartize PCA ', key, datetime.now())
        features_res3[key] = pd.DataFrame(std_PC_feature_models[key].transform(features_res3[key].values),
                                          index=features_res3[key].index, 
                                          columns=features_res3[key].columns)
        #save std pc table
        features_res3[key].to_csv(path_out+'/'+key+'_PCA75_test.csv')



    #apply trained single models ElasticNet to new subset

    dict_y_pred3={}
    dict_y_pred3_per={}
    for key in list(features_res3.keys()):
        y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(features_res3[key], y_res3, dict_elnet_model[key]) #ML
        dict_y_pred3[key] = y_pred
        dict_y_pred3_per[key] = bacc, mse, mae, corr

    df_y_pred3 = pd.DataFrame(dict_y_pred3, index=ind_y)
    df_y_pred3_per = pd.DataFrame(dict_y_pred3_per, index=['best score r2', 'mse', 'mae','corr'])


    ###Save outputs from this step (models and all mod. perf.)

    #model performance
    df_y_pred3_per.to_csv(path_out+'/3level_test2_perf_elnet.csv')

    #list of first level targets (observed and predicted)
    df_y_pred3.to_csv(path_out+'/3level_test2_y_pred_singleML.csv')        


    #### L3 Testing stacked ML models on test_index #############################################

    print('Calculating stacked ML on test2 data ', datetime.now()) 

    #apply trained stacked models ElasticNet to new data , test_index

    #for presetet sets
    dict_st_perf2={}
    dict_st_ypred2={}

    dct_std3_tab_for_stack = {} #
    dct_std3_tab_before_for_stack = {} #

    s=1
    for set_n in [set1, set2, set3, set4, set5, set6, set7, set8]:
        #standardize stacking feature
        ftrs = df_y_pred3.loc[:, set_n]
        dct_std3_tab_before_for_stack['set'+str(s)] = ftrs

        std_ftrs = pd.DataFrame(dct_std_mod_for_stack['set'+str(s)].transform(ftrs.values), 
                                index=ftrs.index,columns=ftrs.columns)
        dct_std3_tab_for_stack['set'+str(s)] = std_ftrs
        #ML
        y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(std_ftrs, y_res3, dict_st_models[('set'+str(s))]) #ML
        dict_st_ypred2[('set'+str(s))] = y_pred
        dict_st_perf2[('set'+str(s))] = bacc, mse, mae, corr
        s+=1

    df_st_ypred2 = pd.DataFrame(dict_st_ypred2, index=ind_y)
    df_st_perf2 = pd.DataFrame(dict_st_perf2, index=['best score r2', 'mse', 'mae','corr'])        

    ###Save outputs from this step (models and all mod. perf.)

    #performance and prediction
    df_st_perf2.to_csv(path_out+'/3level_test2_perf_stacked.csv')
    df_st_ypred2.to_csv(path_out+'/3level_test2_y_pred_stacked.csv') 
    for key in dct_std3_tab_for_stack.keys():
        dct_std3_tab_for_stack[key].to_csv(path_out+'/3level_stack_y_feature_tab_STD.csv')
        dct_std3_tab_before_for_stack[key].to_csv(path_out+'/3level_stack_y_feature_tab_beforeSTD.csv')

    print(' ')
    print('finished to calculate the Fold #', i)
    print(datetime.now())

    i+=1

print(' ')
print('finished the MODEL '+COL)
print(datetime.now())

interview_age
 
started to calculate the Fold # 0
2022-12-07 19:24:27.446778
 
start 1st level  2022-12-07 19:24:27.449032
controlling  carit1 2022-12-07 19:24:27.449646
controlling  carit3 2022-12-07 19:24:27.814023
controlling  carit4 2022-12-07 19:24:28.099683
controlling  face1 2022-12-07 19:24:28.383499
controlling  face2 2022-12-07 19:24:28.667085
controlling  face3 2022-12-07 19:24:28.950079
controlling  face4 2022-12-07 19:24:29.233704
controlling  face5 2022-12-07 19:24:29.517774
controlling  face6 2022-12-07 19:24:29.802120
controlling  vism 2022-12-07 19:24:30.087152
controlling  carit_FC 2022-12-07 19:24:30.376357
controlling  face_FC 2022-12-07 19:25:27.623276
controlling  vism_FC 2022-12-07 19:26:20.517218
controlling  cort 2022-12-07 19:27:10.143686
controlling  surf 2022-12-07 19:27:10.326902
controlling  subc 2022-12-07 19:27:10.457284
controlling  VolBrain 2022-12-07 19:27:10.472760
controlling  rest 2022-12-07 19:27:10.477449
standartize  carit1 2022-12-07 19:29:51.1

controlling  face6 2022-12-07 19:50:13.116741
controlling  vism 2022-12-07 19:50:13.469623
controlling  carit_FC 2022-12-07 19:50:13.838628
controlling  face_FC 2022-12-07 19:51:19.995871
controlling  vism_FC 2022-12-07 19:52:29.351394
controlling  cort 2022-12-07 19:53:36.983087
controlling  surf 2022-12-07 19:53:37.224399
controlling  subc 2022-12-07 19:53:37.473360
controlling  VolBrain 2022-12-07 19:53:37.492518
controlling  rest 2022-12-07 19:53:37.498132
standartize  carit1 2022-12-07 19:56:18.226010
standartize  carit3 2022-12-07 19:56:18.234376
standartize  carit4 2022-12-07 19:56:18.236175
standartize  face1 2022-12-07 19:56:18.237730
standartize  face2 2022-12-07 19:56:18.239343
standartize  face3 2022-12-07 19:56:18.241010
standartize  face4 2022-12-07 19:56:18.242710
standartize  face5 2022-12-07 19:56:18.244379
standartize  face6 2022-12-07 19:56:18.246004
standartize  vism 2022-12-07 19:56:18.247475
standartize  carit_FC 2022-12-07 19:56:18.248947
standartize  face_FC 202

standartize  face_FC 2022-12-07 20:21:30.608064
standartize  vism_FC 2022-12-07 20:21:30.885329
standartize  cort 2022-12-07 20:21:31.148245
standartize  surf 2022-12-07 20:21:31.149335
standartize  subc 2022-12-07 20:21:31.150020
standartize  VolBrain 2022-12-07 20:21:31.150456
standartize  rest 2022-12-07 20:21:31.150864
reduction  rest 2022-12-07 20:26:39.410254
reduction  carit_FC 2022-12-07 20:26:40.499834
reduction  face_FC 2022-12-07 20:26:41.450674
reduction  vism_FC 2022-12-07 20:26:42.389169
standartize PC table  rest 2022-12-07 20:26:43.608166
standartize PC table  carit_FC 2022-12-07 20:26:43.646114
standartize PC table  face_FC 2022-12-07 20:26:43.679108
standartize PC table  vism_FC 2022-12-07 20:26:43.711920
start  carit1 2022-12-07 20:26:43.750335
start  carit3 2022-12-07 20:27:12.912243
start  carit4 2022-12-07 20:27:42.553619
start  face1 2022-12-07 20:28:11.147574
start  face2 2022-12-07 20:28:39.069272
start  face3 2022-12-07 20:29:06.993568
start  face4 2022-12-07 

standartize PC table  rest 2022-12-07 20:53:34.441435
standartize PC table  carit_FC 2022-12-07 20:53:34.589607
standartize PC table  face_FC 2022-12-07 20:53:34.628736
standartize PC table  vism_FC 2022-12-07 20:53:34.660856
start  carit1 2022-12-07 20:53:34.698915
start  carit3 2022-12-07 20:54:04.089968
start  carit4 2022-12-07 20:54:35.022808
start  face1 2022-12-07 20:55:05.098306
start  face2 2022-12-07 20:55:35.295332
start  face3 2022-12-07 20:56:05.947435
start  face4 2022-12-07 20:56:36.636609
start  face5 2022-12-07 20:57:08.128682
start  face6 2022-12-07 20:57:35.845304
start  vism 2022-12-07 20:58:09.476940
start  carit_FC 2022-12-07 20:58:48.466984
start  face_FC 2022-12-07 20:58:53.351691
start  vism_FC 2022-12-07 20:58:58.137143
start  cort 2022-12-07 20:59:02.875127
start  surf 2022-12-07 20:59:17.763210
start  subc 2022-12-07 20:59:32.076236
start  VolBrain 2022-12-07 20:59:36.994323
start  rest 2022-12-07 20:59:41.074081
 
start 2nd level  2022-12-07 20:59:45.447399


start  face5 2022-12-07 21:24:36.477773
start  face6 2022-12-07 21:25:03.134987
start  vism 2022-12-07 21:25:31.824952
start  carit_FC 2022-12-07 21:26:02.433302
start  face_FC 2022-12-07 21:26:06.858338
start  vism_FC 2022-12-07 21:26:11.319782
start  cort 2022-12-07 21:26:16.085625
start  surf 2022-12-07 21:26:28.483891
start  subc 2022-12-07 21:26:40.034927
start  VolBrain 2022-12-07 21:26:44.516888
start  rest 2022-12-07 21:26:48.063719
 
start 2nd level  2022-12-07 21:26:52.766363
Checking single ML on test1 data  2022-12-07 21:26:52.766421
controlling  carit1 2022-12-07 21:26:52.768273
controlling  carit3 2022-12-07 21:26:52.852371
controlling  carit4 2022-12-07 21:26:52.921290
controlling  face1 2022-12-07 21:26:52.991154
controlling  face2 2022-12-07 21:26:53.058189
controlling  face3 2022-12-07 21:26:53.124652
controlling  face4 2022-12-07 21:26:53.189636
controlling  face5 2022-12-07 21:26:53.256067
controlling  face6 2022-12-07 21:26:53.321675
controlling  vism 2022-12-07 21