# Stacked Machine Learning

In [2]:
## IMPORTANT !

# In the first order need to set the number of CPU 
# for calculation before launching (depends on computer's number of cores)
n_jobs= 50

### Load libraries

In [3]:
#libraries
import pandas as pd
import numpy as np
import os
import sys
import shutil
import glob
import joblib
import warnings
from datetime import date, datetime

from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import LeavePGroupsOut
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import pearsonr
import scipy.stats as st

from nilearn import image as nli
from nilearn import plotting

#from mne.viz import plot_connectivity_circle

### Load functions

In [14]:
def control_features(table_in, control, index): 
    #table_in should be a table of features, where rows - subjects, columns - features
    
    if len(table_in.values.shape) == 1: #for pd.Series # for target
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index
        
        #loop
        dct_table = {}
        dct_lin_models ={}
        dct_std_y_models ={}
        
        col='0'
        
        y = table_in #target, brain ROI
        X = control  #features, like age, sex and/or movements

        #Standartize target
        std_model_y = StandardScaler()
        std_model_y.fit(y.values.reshape(-1, 1))
        y = std_model_y.transform(y.values.reshape(-1, 1))
        
        #reshaping data
        if len(X.values.shape) == 1:
            X = X.values.reshape(-1, 1)
        else:
            X = X.values
        y = y.reshape(-1, 1).ravel()
        
        #Standartize X
        std_model = StandardScaler()
        std_model.fit(X)
        X = std_model.transform(X)

        #Fit to the training set
        model = LinearRegression()
        model.fit(X, y)
        y_pred = model.predict(X)

        y_res = y - y_pred

        dct_table[col] = y_res
        dct_lin_models[col] = model
        dct_std_y_models[col] = std_model_y

        df_table = pd.DataFrame(dct_table, index = ind)

        
    else:
            
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index

        #loop
        dct_table = {}
        dct_lin_models ={}
        dct_std_y_models ={}
        col_names = table_in.columns

        for col in col_names:
            y = table_in[col] #target, brain ROI
            X = control  #features, like age, sex and/or movements
            
            #Standartize target
            std_model_y = StandardScaler()
            std_model_y.fit(y.values.reshape(-1, 1))
            y = std_model_y.transform(y.values.reshape(-1, 1)) 
            
            #reshaping data
            if len(X.values.shape) == 1:
                X = X.values.reshape(-1, 1)
            else:
                X = X.values
            y = y.reshape(-1, 1).ravel()
            
            #Standartize X
            std_model = StandardScaler()
            std_model.fit(X)
            X = std_model.transform(X)

            #Fit to the training set
            model = LinearRegression()
            model.fit(X, y)
            y_pred = model.predict(X)

            y_res = y - y_pred

            dct_table[col] = y_res
            dct_lin_models[col] = model
            dct_std_y_models[col] = std_model_y

        df_table = pd.DataFrame(dct_table, index = ind)
    
    return df_table, dct_std_y_models, std_model, dct_lin_models

In [15]:
def re_control_features(table_in, control, index, dct_std_y_models, std_model, dct_lin_models):
    
    if len(table_in.values.shape) == 1: #for pd.Series # for target
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index
        
        #loop
        dct_table = {}
        
        col='0'
        
        y = table_in #target, brain ROI
        X = control  #features, like age, sex and/or movements
        
        #standartize y
        y = dct_std_y_models[col].transform(y.values.reshape(-1, 1))
        
        #reshaping data
        if len(X.values.shape) == 1:
            X = X.values.reshape(-1, 1)
        else:
            X = X.values
        y = y.reshape(-1, 1).ravel()

        #Standartize X with previous std model
        X = std_model.transform(X)

        #Fit with previous LinReg model
        y_pred =  dct_lin_models[col].predict(X)

        y_res = y - y_pred

        dct_table[col] = y_res

        df_table = pd.DataFrame(dct_table, index = ind)
        
    else:
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index

        #loop
        dct_table = {}
        col_names = table_in.columns

        for col in col_names:
            y = table_in[col] #target, brain ROI
            X = control  #features, like age, sex and/or movements

            #standartize y
            y = dct_std_y_models[col].transform(y.values.reshape(-1, 1))
            
            #reshaping data
            if len(X.values.shape) == 1:
                X = X.values.reshape(-1, 1)
            else:
                X = X.values
            y = y.reshape(-1, 1).ravel()

            #Standartize X with previous std model
            X = std_model.transform(X)

            #Fit with previous LinReg model
            y_pred =  dct_lin_models[col].predict(X)

            y_res = y - y_pred

            dct_table[col] = y_res

        df_table = pd.DataFrame(dct_table, index = ind)
        
    return df_table

In [16]:
def elnet(X, y):

    #drop Nan in target and clean this subj from features
    y = y.dropna()
    X = X.loc[y.index,:]
    ind_y = np.array(y.index)
      
    y_real=y
    
    #reshaping data
    X = X.values
    y = y.values.reshape(-1, 1).ravel()
    
    #fill Nan in X
    #X = SimpleImputer(strategy='mean').fit_transform(X)
    
    #Standartize X
    #X = StandardScaler().fit_transform(X)
    
    # Setup the pipeline steps:
    steps = [('elasticnet', ElasticNet(random_state=42))]

    # Create the pipeline: pipeline 
    pipeline = Pipeline(steps)

    # Specify the hyperparameter space
    parameters = {'elasticnet__alpha': np.logspace(-1, 2, 70),
                  'elasticnet__l1_ratio':np.linspace(0,1,25)}

    # Create the GridSearchCV object:
    gm_cv = GridSearchCV(pipeline, parameters, cv=5, n_jobs=n_jobs)
    
    # Fit to the training set
    gm_cv.fit(X, y)
    
    #predict new y
    y_pred = gm_cv.predict(X)

    # Compute and print the metrics
    acc = gm_cv.best_score_
    bpar = gm_cv.best_params_
    model = gm_cv.best_estimator_
    mse = mean_squared_error(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred)
    corr, _ = pearsonr(np.array(y_real.values.reshape(-1, 1).ravel(), dtype=float), np.array(y_pred, dtype=float))
            
    return bpar['elasticnet__alpha'], bpar['elasticnet__l1_ratio'], acc, mse, corr, model, y_pred, mae

In [17]:
def reaply_ElNet(X, y, model):
    # param should be pd.Series with indexes from model
    
    #drop Nan in target and clean this subj from features
    y = y.dropna()
    X = X.reindex(index =y.index)
    ind_y = np.array(y.index)  # indexes as separate variable 
    
    y_real = y

    #reshaping data
    X = X.values
    y = y.values.reshape(-1, 1).ravel()
    
    #fill Nan in X
    #X = SimpleImputer(strategy='mean').fit_transform(X)
    
    #Standartize X
    #X = StandardScaler().fit_transform(X)
    
    #predict new y
    y_pred = model.predict(X)
    
    # Compute and print the metrics
    bacc = model.score(X, y)
    mse = mean_squared_error(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred) 
    corr, _ = pearsonr(np.array(y_real.values.reshape(-1, 1).ravel(), dtype=float), np.array(y_pred, dtype=float))
    
    return y_pred, y_real, ind_y, bacc, mse, corr, mae

### Path to the tables folder

In [18]:
path='/media/hcs-psy-narun/Alina/HCP_YA/MLtables_cope/'

### Load tables

In [19]:
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

#demography
demo = pd.read_csv(path+'demographics_table.csv', index_col=0)

#targets table
targ = pd.read_csv(path+'cognition_table.csv', index_col=0)

#features tables as dictionary
features = {
    'emo':pd.read_csv(path+'emo_table.csv', index_col=0),
    'gam':pd.read_csv(path+'gam_table.csv', index_col=0),
    'lan':pd.read_csv(path+'lan_table.csv', index_col=0),
    'mot':pd.read_csv(path+'mot_table.csv', index_col=0),
    'rel':pd.read_csv(path+'rel_table.csv', index_col=0),
    'soc':pd.read_csv(path+'soc_table.csv', index_col=0),
    'wm':pd.read_csv(path+'wm_table.csv', index_col=0),
    
    'gam_FC':pd.read_csv(path+'Task_FC_GAMBLING_group_z_full.csv', index_col=0),
    'lan_FC':pd.read_csv(path+'Task_FC_LANGUAGE_group_z_full.csv', index_col=0),
    'mot_FC':pd.read_csv(path+'Task_FC_MOTOR_group_z_full.csv', index_col=0),
    'rel_FC':pd.read_csv(path+'Task_FC_RELATIONAL_group_z_full.csv', index_col=0),
    'soc_FC':pd.read_csv(path+'Task_FC_SOCIAL_group_z_full.csv', index_col=0),
    'wm_FC':pd.read_csv(path+'Task_FC_WM_group_z_full.csv', index_col=0),
    
    'cort':pd.read_csv(path+'cort_table.csv', index_col=0),
    'subc':pd.read_csv(path+'subc_table.csv', index_col=0),
    'surf':pd.read_csv(path+'surf_table.csv', index_col=0),
    'VolBrain':pd.read_csv(path+'VolBrain_table.csv', index_col=0),
    'rest':pd.read_csv(path+'rest_hpass.csv', index_col=0),
}


#table with movements (mean relative displacement Movement_RelativeRMS_mean.txt)
movements = pd.read_csv(path+'movement_table.csv', index_col=0)

#create tables with 2 controling parameters: gender and age
sex_coded = pd.Series(LabelEncoder().fit_transform(demo.loc[:,['Gender']]), index=demo.index, name='Gender')
control = pd.concat([sex_coded, demo.loc[:, ['Age_in_Yrs']]], axis=1)

In [20]:
for key in features.keys():
    print(key, features[key].shape)
    

emo (882, 379)
gam (882, 379)
lan (882, 379)
mot (882, 379)
rel (882, 379)
soc (882, 379)
wm (882, 379)
gam_FC (873, 71631)
lan_FC (873, 71631)
mot_FC (873, 71631)
rel_FC (873, 71631)
soc_FC (873, 71631)
wm_FC (873, 71631)
cort (882, 148)
subc (882, 19)
surf (882, 148)
VolBrain (882, 5)
rest (882, 71631)


In [21]:
#shrink tables to same subj numers
yy = targ['CogTotalComp_Unadj'].dropna()

demo = demo.reindex(index=yy.index)
movements = movements.reindex(index=yy.index)
control = control.reindex(index=yy.index)
for key in features.keys():
    features[key] = features[key].reindex(index=yy.index)

In [22]:
for key in features.keys():
    print(key, features[key].shape)

emo (873, 379)
gam (873, 379)
lan (873, 379)
mot (873, 379)
rel (873, 379)
soc (873, 379)
wm (873, 379)
gam_FC (873, 71631)
lan_FC (873, 71631)
mot_FC (873, 71631)
rel_FC (873, 71631)
soc_FC (873, 71631)
wm_FC (873, 71631)
cort (873, 148)
subc (873, 19)
surf (873, 148)
VolBrain (873, 5)
rest (873, 71631)


##### Leave-P-group out based on N-Fold CV

In [23]:
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore"
    
    
    
    
    

#for COL in targ.columns:
COL = 'CogTotalComp_Unadj'  #the script adapted to be launched on table of target variables. To launch in that way you need to uncomment for loop and comment this row with col variable
y = targ[COL]

print(y.name)

###make folder for outputs
nmf=path+'output_new_enh_newADJ_'+y.name
os.mkdir(nmf)

i=0

group_kfold = GroupKFold(n_splits=8)
for train_index, test_index in group_kfold.split(demo, groups=demo['Family_ID']): 

    print(' ')
    print('started to calculate the Fold #', i)
    print(datetime.now())
    print(' ')

    ###create directory for specific Fold
    os.mkdir(nmf+'/Fold_'+str(i)) 
    path_out = str(nmf+'/Fold_'+str(i))

    ###Global indices
    train_index = np.array(demo.iloc[train_index].index) #for training all models
    test_index = np.array(demo.iloc[test_index].index) #for final test

    ###Split global train_Gindex to local indices
    #index_train, index_test = train_test_split(train_index, test_size=0.4, random_state=42)

    ###Local indices
    #index_train = np.array(sorted(index_train)) #for training modalities models
    #index_test = np.array(sorted(index_test)) #for testing modalities and training RF


    ### 1st level ################################################################################

    #### Calculations of single ML models on train_index #################################### 

    print('start 1st level ', datetime.now())

    #control for vars control table with sorting to train_index

    #control y (target) 
    y_res1, std_targ_y, std_targ_X, linreg_targ = control_features(y, control, train_index)


    #control modalities
    features_res1 = {}
    std_feat_y_dct = {}
    std_feat_X_dct = {}
    linreg_feat_dct = {}
    for key in features.keys():
        print('controlling ', key, datetime.now())

        #mod_res, std_f_y, std_f_X, linreg_f = control_features(features[key], control, y_res1.index)
        if key in ['emo', 'gam', 'lan', 'mot', 'rel', 'soc', 'wm']:
            control_t = pd.concat([control, movements[key]], axis=1)
            mod_res, std_f_y, std_f_X, linreg_f = control_features(features[key], control_t, y_res1.index)
        else:
            mod_res, std_f_y, std_f_X, linreg_f = control_features(features[key], control, y_res1.index)
            
        features_res1[key] = mod_res
        std_feat_y_dct[key] = std_f_y
        std_feat_X_dct[key] = std_f_X
        linreg_feat_dct[key] = linreg_f

    #save adjastment model
    os.mkdir(path_out+'/adjustment_models')
    #target models
    joblib.dump(std_targ_y, (path_out+'/adjustment_models'+'/target_std_model_y.sav'))
    joblib.dump(std_targ_X, (path_out+'/adjustment_models'+'/target_std_model_X.sav'))
    joblib.dump(linreg_targ, (path_out+'/adjustment_models'+'/target_linreg.sav'))
    #features model
    joblib.dump(std_feat_y_dct, (path_out+'/adjustment_models'+'/features_std_model_y.sav'))
    joblib.dump(std_feat_X_dct, (path_out+'/adjustment_models'+'/features_std_model_X.sav'))
    joblib.dump(linreg_feat_dct, (path_out+'/adjustment_models'+'/features_linreg.sav'))


    ###standartize before model and keep std models
    #features
    std_models_features = {}
    for key in features_res1.keys():
        print('standartize ', key, datetime.now())
        std_model = StandardScaler()
        std_model.fit(features_res1[key].values)
        features_res1[key] = pd.DataFrame(std_model.transform(features_res1[key].values),
                                          index=features_res1[key].index, 
                                          columns=features_res1[key].columns)
        std_models_features[key] = std_model
    #target
    std_model_target = StandardScaler()
    std_model_target.fit(y_res1.values.reshape(-1, 1))
    y_res1 = pd.DataFrame(std_model_target.transform(y_res1.values.reshape(-1, 1)),
                          index=y_res1.index)

    #save 
    os.mkdir(path_out+'/standartization_models')
    #target
    joblib.dump(std_model_target,  (path_out+'/standartization_models'+'/target_std_model.sav'))
    #features
    joblib.dump(std_models_features,  (path_out+'/standartization_models'+'/features_std_model.sav'))


    #save features table before PCA
    y_res1.to_csv(path_out+'/target_y_train1.csv')
    for key in features_res1.keys():
        features_res1[key].to_csv(path_out+'/'+str(key)+'_train1.csv')


    #PCA models to rest and task FC
    PCA_models = {}
    for key in ['rest', 'gam_FC', 'lan_FC', 'mot_FC', 'rel_FC', 'soc_FC', 'wm_FC']:
        print('reduction ', key, datetime.now())
        model_PCA =  PCA(n_components=75, random_state=11)
        model_PCA.fit(features_res1[key].values)
        features_res1[key] = pd.DataFrame(model_PCA.transform(features_res1[key].values), 
                                          index=features_res1[key].index)
        PCA_models[key] = model_PCA
    #save PCA models
    os.mkdir(path_out+'/PCA_models')
    joblib.dump(PCA_models,  (path_out+'/PCA_models'+'/PCA_model.sav'))

    
    #apply new std to PCA features again
    std_PC_feature_models = {}
    for key in ['rest', 'gam_FC', 'lan_FC', 'mot_FC', 'rel_FC', 'soc_FC', 'wm_FC']:
        print('standartize PC table ', key, datetime.now())
        std_PC_model = StandardScaler()
        std_PC_model.fit(features_res1[key].values)
        features_res1[key] = pd.DataFrame(std_PC_model.transform(features_res1[key].values),
                                          index=features_res1[key].index, 
                                          columns=features_res1[key].columns)
        std_PC_feature_models[key] = std_PC_model
        #save PCA tables
        features_res1[key].to_csv(path_out+'/'+key+'_PCA75_train1.csv')
    #save std PCA models
    os.mkdir(path_out+'/PCA_standardization_models')
    joblib.dump(std_PC_feature_models,  (path_out+'/PCA_standardization_models'+'/std_PCA_model.sav'))

    
    

    #Launch ElasticNet for all task(modalities) on index_train (1st level)

    dict_tasks={}
    dict_elnet_model={}
    dict_ypred1={}

    for key in list(features_res1.keys()):

        print('start ', str(key), datetime.now())   #print start time of calculations

        bpar1, bpar2, acc, mse, corr, model, y_pred1, mae = elnet(features_res1[key], y_res1) #ML
        dict_tasks[key] = acc, mse, mae, corr, bpar1, bpar2 
        dict_elnet_model[key] = model
        dict_ypred1[key] = y_pred1
    df_tasks = pd.DataFrame(dict_tasks, index=['best score r2', 'mse', 'mae','corr', 'best alpha', 'best l1_ratio'])
    df_y_pred1 = pd.DataFrame(dict_ypred1, index=y_res1.index)


    ###Save outputs from this step (models and all mod. perf.)

    #models
    for key in dict_elnet_model.keys():
        joblib.dump(dict_elnet_model[key], (path_out+'/'+str(key)+'_elnet_model.sav'))

    #model performance
    df_tasks.to_csv(path_out+'/1level_train_perf_elnet.csv')

    #list of first level targets (observed and predicted)
    df_y_pred1.to_csv(path_out+'/1level_train_y_pred_singleML.csv')







    ### 2st level ################################################################################
    print(' ')
    print('start 2nd level ', datetime.now())

    #### L2 Testing single ML models on train_index #############################################

    print('Checking single ML on test1 data ', datetime.now())

    #control for vars in control table with sorting to train_index

    #control y (target)
    y_res2 = re_control_features(y, control, train_index, 
                                 std_targ_y, std_targ_X, linreg_targ)

    #control modalities
    features_res2 = {}
    for key in features.keys():
        print('controlling ', key, datetime.now())

        #features_res2[key] = re_control_features(features[key], control, y_res2.index, 
        #                                         std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])
        if key in ['emo', 'gam', 'lan', 'mot', 'rel', 'soc', 'wm']:
            control_t = pd.concat([control, movements[key]], axis=1)
            features_res2[key] = re_control_features(features[key], control_t, y_res2.index, 
                                                 std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])
        else:
            features_res2[key] = re_control_features(features[key], control, y_res2.index, 
                                                 std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])

    ###standartize before model and keep std models
    #features
    for key in features_res2.keys():
        print('standartize ', key, datetime.now())
        features_res2[key] = pd.DataFrame(std_models_features[key].transform(features_res2[key].values),
                                          index=features_res2[key].index, 
                                          columns=features_res2[key].columns)
    #target
    y_res2 = pd.DataFrame(std_model_target.transform(y_res2.values.reshape(-1, 1)),
                          index=y_res2.index) 

    #save features table before PCA
    y_res2.to_csv(path_out+'/target_y_train2.csv')
    for key in features_res2.keys():
        features_res2[key].to_csv(path_out+'/'+str(key)+'_train2.csv')            


    #PCA models to rest and task FC
    for key in ['rest', 'gam_FC', 'lan_FC', 'mot_FC', 'rel_FC', 'soc_FC', 'wm_FC']:
        print('reduction ', key, datetime.now())
        features_res2[key] = pd.DataFrame(PCA_models[key].transform(features_res2[key].values), 
                                  index=features_res2[key].index)
    
    
    #apply new std to PCA features again
    for key in ['rest', 'gam_FC', 'lan_FC', 'mot_FC', 'rel_FC', 'soc_FC', 'wm_FC']:
        print('standartize PCA ', key, datetime.now())
        features_res2[key] = pd.DataFrame(std_PC_feature_models[key].transform(features_res2[key].values),
                                          index=features_res2[key].index, 
                                          columns=features_res2[key].columns)
        #save std pc table
        features_res2[key].to_csv(path_out+'/'+key+'_PCA75_train2.csv')
    
    
    #apply trained single models ElasticNet to new data , index_test

    dict_y_pred2={}
    dict_y_pred2_per={}
    for key in list(features_res2.keys()):
        y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(features_res2[key], y_res2, dict_elnet_model[key]) #ML
        dict_y_pred2[key] = y_pred
        dict_y_pred2_per[key] = bacc, mse, mae, corr

    df_y_pred2 = pd.DataFrame(dict_y_pred2, index=ind_y)
    df_y_pred2_per = pd.DataFrame(dict_y_pred2_per, index=['best score r2', 'mse', 'mae','corr'])


    ###Save outputs from this step (models and all mod. perf.)

    #model performance
    df_y_pred2_per.to_csv(path_out+'/2level_test1_perf_elnet.csv')

    #list of first level targets (observed and predicted)
    df_y_pred2.to_csv(path_out+'/2level_test1_y_pred_singleML.csv')   



    #### L2 Calculating stacked ML models on index_test #############################################

    print('Calculating stacked ML on test1 data ', datetime.now())    


    #identifying sets for several stacked models
    set2 = ['emo', 'gam', 'lan', 'mot', 'rel', 'soc', 'wm']
    set3 = ['cort', 'subc', 'surf', 'rest', 'VolBrain']
    
    set4 = ['gam_FC', 'lan_FC', 'mot_FC', 'rel_FC', 'soc_FC', 'wm_FC']
    set5 = ['emo', 'gam', 'lan', 'mot', 'rel', 'soc', 'wm', 'gam_FC', 'lan_FC', 'mot_FC', 'rel_FC', 'soc_FC', 'wm_FC']
    set6 = ['emo', 'gam', 'lan', 'mot', 'rel', 'soc', 'wm', 'cort', 'subc', 'surf', 'rest', 'VolBrain']
    set7 = ['gam_FC', 'lan_FC', 'mot_FC', 'rel_FC', 'soc_FC', 'wm_FC', 'cort', 'subc', 'surf', 'rest', 'VolBrain']
    set8 = ['gam_FC', 'lan_FC', 'mot_FC', 'rel_FC', 'soc_FC', 'wm_FC', 'rest']
    
    set1 = list(df_y_pred2.columns) #all existed modalities

    #for presetet sets
    dict_st_perf1={}
    dict_st_models={}
    dict_st_ypred1={}
    dct_std_mod_for_stack = {} #
    dct_std_tab_for_stack = {} #
    dct_std_tab_before_for_stack = {} #

    s=1
    for set_n in [set1, set2, set3, set4, set5, set6, set7, set8]:
        print('set '+str(s), datetime.now())

        st_features = df_y_pred2.loc[:,set_n]
        dct_std_tab_before_for_stack['set'+str(s)] = st_features #

        stack_std_model = StandardScaler().fit(st_features.values) 
        dct_std_mod_for_stack['set'+str(s)] = stack_std_model #

        std_st_features = pd.DataFrame(stack_std_model.transform(st_features.values), 
                                       index=st_features.index, columns=st_features.columns) 
        dct_std_tab_for_stack['set'+str(s)] = std_st_features #



        bpar1, bpar2, acc, mse, corr, model, y_pred3, mae = elnet(std_st_features, y_res2) #ML

        dict_st_perf1['set'+str(s)] = acc, mse, mae, corr, bpar1, bpar2 
        dict_st_models['set'+str(s)] = model
        dict_st_ypred1['set'+str(s)] = y_pred3
        s+=1

    df_st_perf1 = pd.DataFrame(dict_st_perf1, index=['best score r2', 'mse', 'mae','corr', 'best alpha', 'best l1_ratio'])
    df_st_ypred1 = pd.DataFrame(dict_st_ypred1, index=y_res2.index)        

    ###Save outputs from this step (models and all mod. perf.)

    #models
    for key in dict_st_models.keys():
        joblib.dump(dict_st_models[key], (path_out+'/'+str(key)+'_stacked_model.sav'))
    for key in dct_std_mod_for_stack.keys():
        joblib.dump(dct_std_mod_for_stack[key], (path_out+'/'+str(key)+'_stacked_STD_model.sav'))

    #performance and prediction
    df_st_perf1.to_csv(path_out+'/2level_test1_perf_stacked.csv')
    df_st_ypred1.to_csv(path_out+'/2level_test1_y_pred_stacked.csv')
    for key in dct_std_tab_for_stack.keys():
        dct_std_tab_for_stack[key].to_csv(path_out+'/2level_stack_y_feature_tab_STD.csv')
        dct_std_tab_before_for_stack[key].to_csv(path_out+'/2level_stack_y_feature_tab_beforeSTD.csv')



    ### 3rd level ################################################################################
    print(' ')
    print('start 3rd level ', datetime.now())


    #### L3 Testing single ML models on test_index #############################################

    print('Checking single ML on test2 data ', datetime.now())

    #control for vars in control table with sorting to test_index

    #control y (target)
    y_res3 = re_control_features(y, control, test_index, 
                                 std_targ_y, std_targ_X, linreg_targ)

    #control modalities
    features_res3 = {}
    for key in features.keys(): 
        print('controlling ', key, datetime.now())

        #features_res3[key] = re_control_features(features[key], control, y_res3.index, 
        #                                         std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])
        if key in ['emo', 'gam', 'lan', 'mot', 'rel', 'soc', 'wm']:
            control_t = pd.concat([control, movements[key]], axis=1)
            features_res3[key] = re_control_features(features[key], control_t, y_res3.index, 
                                                 std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])
        else:
            features_res3[key] = re_control_features(features[key], control, y_res3.index, 
                                                 std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])

    ###standartize before model and keep std models
    #features
    for key in features_res3.keys():
        print('standartize ', key, datetime.now())
        features_res3[key] = pd.DataFrame(std_models_features[key].transform(features_res3[key].values),
                                          index=features_res3[key].index, 
                                          columns=features_res3[key].columns)
    #target
    y_res3 = pd.DataFrame(std_model_target.transform(y_res3.values.reshape(-1, 1)),
                          index=y_res3.index) 

    #save features table before PCA
    y_res3.to_csv(path_out+'/target_y_test.csv')
    for key in features_res3.keys():
        features_res3[key].to_csv(path_out+'/'+str(key)+'_test.csv')            


    #PCA models to rest and task FC
    for key in ['rest', 'gam_FC', 'lan_FC', 'mot_FC', 'rel_FC', 'soc_FC', 'wm_FC']:
        print('reduction ', key, datetime.now())
        features_res3[key] = pd.DataFrame(PCA_models[key].transform(features_res3[key].values), 
                                  index=features_res3[key].index)
        
    
    #apply new std to PCA features again
    for key in ['rest', 'gam_FC', 'lan_FC', 'mot_FC', 'rel_FC', 'soc_FC', 'wm_FC']:
        print('standartize PCA ', key, datetime.now())
        features_res3[key] = pd.DataFrame(std_PC_feature_models[key].transform(features_res3[key].values),
                                          index=features_res3[key].index, 
                                          columns=features_res3[key].columns)
        #save std pc table
        features_res3[key].to_csv(path_out+'/'+key+'_PCA75_test.csv')
    
        

    #apply trained single models ElasticNet to new data , test_index

    dict_y_pred3={}
    dict_y_pred3_per={}
    for key in list(features_res3.keys()):
        y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(features_res3[key], y_res3, dict_elnet_model[key]) #ML
        dict_y_pred3[key] = y_pred
        dict_y_pred3_per[key] = bacc, mse, mae, corr

    df_y_pred3 = pd.DataFrame(dict_y_pred3, index=ind_y)
    df_y_pred3_per = pd.DataFrame(dict_y_pred3_per, index=['best score r2', 'mse', 'mae','corr'])


    ###Save outputs from this step (models and all mod. perf.)

    #model performance
    df_y_pred3_per.to_csv(path_out+'/3level_test2_perf_elnet.csv')

    #list of first level targets (observed and predicted)
    df_y_pred3.to_csv(path_out+'/3level_test2_y_pred_singleML.csv')        


    #### L3 Testing stacked ML models on test_index #############################################

    print('Calculating stacked ML on test2 data ', datetime.now()) 

    #apply trained stacked models ElasticNet to new data , test_index

    #for presetet sets
    dict_st_perf2={}
    dict_st_ypred2={}

    dct_std3_tab_for_stack = {} #
    dct_std3_tab_before_for_stack = {} #

    s=1
    for set_n in [set1, set2, set3, set4, set5, set6, set7, set8]:

        ftrs = df_y_pred3.loc[:, set_n]
        dct_std3_tab_before_for_stack['set'+str(s)] = ftrs

        std_ftrs = pd.DataFrame(dct_std_mod_for_stack['set'+str(s)].transform(ftrs.values), 
                                index=ftrs.index,columns=ftrs.columns)
        dct_std3_tab_for_stack['set'+str(s)] = std_ftrs

        y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(std_ftrs, y_res3, dict_st_models[('set'+str(s))]) #ML
        dict_st_ypred2[('set'+str(s))] = y_pred
        dict_st_perf2[('set'+str(s))] = bacc, mse, mae, corr
        s+=1

    df_st_ypred2 = pd.DataFrame(dict_st_ypred2, index=ind_y)
    df_st_perf2 = pd.DataFrame(dict_st_perf2, index=['best score r2', 'mse', 'mae','corr'])        

    ###Save outputs from this step (models and all mod. perf.)

    #performance and prediction
    df_st_perf2.to_csv(path_out+'/3level_test2_perf_stacked.csv')
    df_st_ypred2.to_csv(path_out+'/3level_test2_y_pred_stacked.csv') 
    for key in dct_std3_tab_for_stack.keys():
        dct_std3_tab_for_stack[key].to_csv(path_out+'/3level_stack_y_feature_tab_STD.csv')
        dct_std3_tab_before_for_stack[key].to_csv(path_out+'/3level_stack_y_feature_tab_beforeSTD.csv')


    print(' ')
    print('finished to calculate the Fold #', i)
    print(datetime.now())

    i+=1

print(' ')
print('finished the MODEL '+COL)
print(datetime.now())

CogTotalComp_Unadj
 
started to calculate the Fold # 0
2023-06-26 14:43:39.812995
 
start 1st level  2023-06-26 14:43:39.816229
controlling  emo 2023-06-26 14:43:39.866430
controlling  gam 2023-06-26 14:43:40.185750
controlling  lan 2023-06-26 14:43:40.542737
controlling  mot 2023-06-26 14:43:40.835369
controlling  rel 2023-06-26 14:43:41.137240
controlling  soc 2023-06-26 14:43:41.431130
controlling  wm 2023-06-26 14:43:41.723710
controlling  gam_FC 2023-06-26 14:43:42.024360
controlling  lan_FC 2023-06-26 14:44:33.227179
controlling  mot_FC 2023-06-26 14:45:24.928572
controlling  rel_FC 2023-06-26 14:46:16.875012
controlling  soc_FC 2023-06-26 14:47:08.109594
controlling  wm_FC 2023-06-26 14:48:00.743442
controlling  cort 2023-06-26 14:48:54.130955
controlling  subc 2023-06-26 14:48:54.258058
controlling  surf 2023-06-26 14:48:54.272050
controlling  VolBrain 2023-06-26 14:48:54.380323
controlling  rest 2023-06-26 14:48:54.385052
standartize  emo 2023-06-26 14:52:42.627339
standartize

Calculating stacked ML on test2 data  2023-06-26 15:35:04.481751
 
finished to calculate the Fold # 0
2023-06-26 15:35:04.643495
 
started to calculate the Fold # 1
2023-06-26 15:35:04.643900
 
start 1st level  2023-06-26 15:35:04.651001
controlling  emo 2023-06-26 15:35:05.514942
controlling  gam 2023-06-26 15:35:06.234800
controlling  lan 2023-06-26 15:35:06.531580
controlling  mot 2023-06-26 15:35:06.835137
controlling  rel 2023-06-26 15:35:07.132846
controlling  soc 2023-06-26 15:35:07.432528
controlling  wm 2023-06-26 15:35:07.733241
controlling  gam_FC 2023-06-26 15:35:08.033839
controlling  lan_FC 2023-06-26 15:35:57.796835
controlling  mot_FC 2023-06-26 15:36:50.080625
controlling  rel_FC 2023-06-26 15:37:42.283983
controlling  soc_FC 2023-06-26 15:38:34.091479
controlling  wm_FC 2023-06-26 15:39:27.174836
controlling  cort 2023-06-26 15:40:19.551496
controlling  subc 2023-06-26 15:40:19.713575
controlling  surf 2023-06-26 15:40:19.732736
controlling  VolBrain 2023-06-26 15:40:

 
finished to calculate the Fold # 1
2023-06-26 16:26:21.822520
 
started to calculate the Fold # 2
2023-06-26 16:26:21.823201
 
start 1st level  2023-06-26 16:26:21.828156
controlling  emo 2023-06-26 16:26:22.692526
controlling  gam 2023-06-26 16:26:23.347572
controlling  lan 2023-06-26 16:26:23.645287
controlling  mot 2023-06-26 16:26:23.932562
controlling  rel 2023-06-26 16:26:24.221807
controlling  soc 2023-06-26 16:26:24.513596
controlling  wm 2023-06-26 16:26:24.803689
controlling  gam_FC 2023-06-26 16:26:25.094353
controlling  lan_FC 2023-06-26 16:27:15.162085
controlling  mot_FC 2023-06-26 16:28:06.403996
controlling  rel_FC 2023-06-26 16:28:58.062312
controlling  soc_FC 2023-06-26 16:29:51.396343
controlling  wm_FC 2023-06-26 16:30:43.961793
controlling  cort 2023-06-26 16:31:35.727786
controlling  subc 2023-06-26 16:31:35.892694
controlling  surf 2023-06-26 16:31:35.911078
controlling  VolBrain 2023-06-26 16:31:36.038868
controlling  rest 2023-06-26 16:31:36.043434
standartiz

 
finished to calculate the Fold # 2
2023-06-26 17:18:34.363653
 
started to calculate the Fold # 3
2023-06-26 17:18:34.363906
 
start 1st level  2023-06-26 17:18:34.368958
controlling  emo 2023-06-26 17:18:35.216643
controlling  gam 2023-06-26 17:18:35.868153
controlling  lan 2023-06-26 17:18:36.301138
controlling  mot 2023-06-26 17:18:36.653083
controlling  rel 2023-06-26 17:18:37.005540
controlling  soc 2023-06-26 17:18:37.357304
controlling  wm 2023-06-26 17:18:37.709424
controlling  gam_FC 2023-06-26 17:18:38.028973
controlling  lan_FC 2023-06-26 17:19:28.826041
controlling  mot_FC 2023-06-26 17:20:21.075006
controlling  rel_FC 2023-06-26 17:21:13.386542
controlling  soc_FC 2023-06-26 17:22:11.125534
controlling  wm_FC 2023-06-26 17:23:04.688004
controlling  cort 2023-06-26 17:23:57.043979
controlling  subc 2023-06-26 17:23:57.238615
controlling  surf 2023-06-26 17:23:57.262895
controlling  VolBrain 2023-06-26 17:23:57.408972
controlling  rest 2023-06-26 17:23:57.414567
standartiz

 
finished to calculate the Fold # 3
2023-06-26 18:12:09.764493
 
started to calculate the Fold # 4
2023-06-26 18:12:09.764947
 
start 1st level  2023-06-26 18:12:09.770440
controlling  emo 2023-06-26 18:12:10.619018
controlling  gam 2023-06-26 18:12:11.294719
controlling  lan 2023-06-26 18:12:11.587314
controlling  mot 2023-06-26 18:12:11.877780
controlling  rel 2023-06-26 18:12:12.167227
controlling  soc 2023-06-26 18:12:12.458431
controlling  wm 2023-06-26 18:12:12.749755
controlling  gam_FC 2023-06-26 18:12:13.042354
controlling  lan_FC 2023-06-26 18:13:03.870172
controlling  mot_FC 2023-06-26 18:13:55.645020
controlling  rel_FC 2023-06-26 18:14:49.770449
controlling  soc_FC 2023-06-26 18:15:45.811962
controlling  wm_FC 2023-06-26 18:16:39.533696
controlling  cort 2023-06-26 18:17:31.571342
controlling  subc 2023-06-26 18:17:31.732657
controlling  surf 2023-06-26 18:17:31.746867
controlling  VolBrain 2023-06-26 18:17:31.850493
controlling  rest 2023-06-26 18:17:31.855094
standartiz

Calculating stacked ML on test2 data  2023-06-26 19:04:14.365660
 
finished to calculate the Fold # 4
2023-06-26 19:04:14.727698
 
started to calculate the Fold # 5
2023-06-26 19:04:14.728390
 
start 1st level  2023-06-26 19:04:14.733864
controlling  emo 2023-06-26 19:04:15.782765
controlling  gam 2023-06-26 19:04:16.721695
controlling  lan 2023-06-26 19:04:17.179465
controlling  mot 2023-06-26 19:04:17.621230
controlling  rel 2023-06-26 19:04:18.014422
controlling  soc 2023-06-26 19:04:18.392717
controlling  wm 2023-06-26 19:04:18.771457
controlling  gam_FC 2023-06-26 19:04:19.185057
controlling  lan_FC 2023-06-26 19:05:25.139093
controlling  mot_FC 2023-06-26 19:06:32.047794
controlling  rel_FC 2023-06-26 19:07:39.278983
controlling  soc_FC 2023-06-26 19:08:36.175078
controlling  wm_FC 2023-06-26 19:09:29.296281
controlling  cort 2023-06-26 19:10:21.917730
controlling  subc 2023-06-26 19:10:22.084727
controlling  surf 2023-06-26 19:10:22.103118
controlling  VolBrain 2023-06-26 19:10:

 
finished to calculate the Fold # 5
2023-06-26 19:56:59.878793
 
started to calculate the Fold # 6
2023-06-26 19:56:59.879247
 
start 1st level  2023-06-26 19:56:59.882419
controlling  emo 2023-06-26 19:57:00.728121
controlling  gam 2023-06-26 19:57:01.389572
controlling  lan 2023-06-26 19:57:01.684859
controlling  mot 2023-06-26 19:57:01.980792
controlling  rel 2023-06-26 19:57:02.276051
controlling  soc 2023-06-26 19:57:02.570642
controlling  wm 2023-06-26 19:57:02.865020
controlling  gam_FC 2023-06-26 19:57:03.160413
controlling  lan_FC 2023-06-26 19:57:53.448688
controlling  mot_FC 2023-06-26 19:58:44.842954
controlling  rel_FC 2023-06-26 19:59:36.300735
controlling  soc_FC 2023-06-26 20:00:27.940397
controlling  wm_FC 2023-06-26 20:01:20.250801
controlling  cort 2023-06-26 20:02:12.036486
controlling  subc 2023-06-26 20:02:12.231639
controlling  surf 2023-06-26 20:02:12.252915
controlling  VolBrain 2023-06-26 20:02:12.399267
controlling  rest 2023-06-26 20:02:12.404351
standartiz

 
finished to calculate the Fold # 6
2023-06-26 20:49:20.852607
 
started to calculate the Fold # 7
2023-06-26 20:49:20.853484
 
start 1st level  2023-06-26 20:49:20.857828
controlling  emo 2023-06-26 20:49:21.727286
controlling  gam 2023-06-26 20:49:22.480335
controlling  lan 2023-06-26 20:49:22.829279
controlling  mot 2023-06-26 20:49:23.175446
controlling  rel 2023-06-26 20:49:23.522387
controlling  soc 2023-06-26 20:49:23.838221
controlling  wm 2023-06-26 20:49:24.138850
controlling  gam_FC 2023-06-26 20:49:24.446129
controlling  lan_FC 2023-06-26 20:50:16.523490
controlling  mot_FC 2023-06-26 20:51:07.719954
controlling  rel_FC 2023-06-26 20:52:00.033749
controlling  soc_FC 2023-06-26 20:52:51.836383
controlling  wm_FC 2023-06-26 20:53:43.031527
controlling  cort 2023-06-26 20:54:35.657172
controlling  subc 2023-06-26 20:54:36.000846
controlling  surf 2023-06-26 20:54:36.019239
controlling  VolBrain 2023-06-26 20:54:36.133481
controlling  rest 2023-06-26 20:54:36.137922
standartiz

Calculating stacked ML on test2 data  2023-06-26 21:41:32.123228
 
finished to calculate the Fold # 7
2023-06-26 21:41:32.244052
 
finished the MODEL CogTotalComp_Unadj
2023-06-26 21:41:32.244284
