# Stacked Machine Learning

In [1]:
## IMPORTANT !

# In the first order need to set the number of CPU 
# for calculation before launching (depends on computer's number of cores)
n_jobs= 50

### Load libraries

In [2]:
#libraries
import pandas as pd
import numpy as np
import os
import sys
import shutil
import glob
import joblib
import warnings
from datetime import date, datetime

from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import LeavePGroupsOut
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import pearsonr
import scipy.stats as st

from nilearn import image as nli
from nilearn import plotting

from mne.viz import plot_connectivity_circle

### Load functions

In [3]:
def control_features(table_in, control, index): 
    #table_in should be a table of features, where rows - subjects, columns - features
    
    if len(table_in.values.shape) == 1: #for pd.Series # for target
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index
        
        #loop
        dct_table = {}
        dct_lin_models ={}
        dct_std_y_models ={}
        
        col='0'
        
        y = table_in #target, brain ROI
        X = control  #features, like age, sex and/or movements

        #Standartize target
        std_model_y = StandardScaler()
        std_model_y.fit(y.values.reshape(-1, 1))
        y = std_model_y.transform(y.values.reshape(-1, 1))
        
        #reshaping data
        if len(X.values.shape) == 1:
            X = X.values.reshape(-1, 1)
        else:
            X = X.values
        y = y.reshape(-1, 1).ravel()
        
        #Standartize X
        std_model = StandardScaler()
        std_model.fit(X)
        X = std_model.transform(X)

        #Fit to the training set
        model = LinearRegression()
        model.fit(X, y)
        y_pred = model.predict(X)

        y_res = y - y_pred

        dct_table[col] = y_res
        dct_lin_models[col] = model
        dct_std_y_models[col] = std_model_y

        df_table = pd.DataFrame(dct_table, index = ind)

        
    else:
            
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index

        #loop
        dct_table = {}
        dct_lin_models ={}
        dct_std_y_models ={}
        col_names = table_in.columns

        for col in col_names:
            y = table_in[col] #target, brain ROI
            X = control  #features, like age, sex and/or movements
            
            #Standartize target
            std_model_y = StandardScaler()
            std_model_y.fit(y.values.reshape(-1, 1))
            y = std_model_y.transform(y.values.reshape(-1, 1)) 
            
            #reshaping data
            if len(X.values.shape) == 1:
                X = X.values.reshape(-1, 1)
            else:
                X = X.values
            y = y.reshape(-1, 1).ravel()
            
            #Standartize X
            std_model = StandardScaler()
            std_model.fit(X)
            X = std_model.transform(X)

            #Fit to the training set
            model = LinearRegression()
            model.fit(X, y)
            y_pred = model.predict(X)

            y_res = y - y_pred

            dct_table[col] = y_res
            dct_lin_models[col] = model
            dct_std_y_models[col] = std_model_y

        df_table = pd.DataFrame(dct_table, index = ind)
    
    return df_table, dct_std_y_models, std_model, dct_lin_models

In [4]:
def re_control_features(table_in, control, index, dct_std_y_models, std_model, dct_lin_models):
    
    if len(table_in.values.shape) == 1: #for pd.Series # for target
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index
        
        #loop
        dct_table = {}
        
        col='0'
        
        y = table_in #target, brain ROI
        X = control  #features, like age, sex and/or movements
        
        #standartize y
        y = dct_std_y_models[col].transform(y.values.reshape(-1, 1))
        
        #reshaping data
        if len(X.values.shape) == 1:
            X = X.values.reshape(-1, 1)
        else:
            X = X.values
        y = y.reshape(-1, 1).ravel()

        #Standartize X with previous std model
        X = std_model.transform(X)

        #Fit with previous LinReg model
        y_pred =  dct_lin_models[col].predict(X)

        y_res = y - y_pred

        dct_table[col] = y_res

        df_table = pd.DataFrame(dct_table, index = ind)
        
    else:
        
        #shrink data to local train index
        table_in = table_in.reindex(index = index)
        control = control.reindex(index = index)
        ind = table_in.index

        #loop
        dct_table = {}
        col_names = table_in.columns

        for col in col_names:
            y = table_in[col] #target, brain ROI
            X = control  #features, like age, sex and/or movements

            #standartize y
            y = dct_std_y_models[col].transform(y.values.reshape(-1, 1))
            
            #reshaping data
            if len(X.values.shape) == 1:
                X = X.values.reshape(-1, 1)
            else:
                X = X.values
            y = y.reshape(-1, 1).ravel()

            #Standartize X with previous std model
            X = std_model.transform(X)

            #Fit with previous LinReg model
            y_pred =  dct_lin_models[col].predict(X)

            y_res = y - y_pred

            dct_table[col] = y_res

        df_table = pd.DataFrame(dct_table, index = ind)
        
    return df_table

In [5]:
def elnet(X, y):

    #drop Nan in target and clean this subj from features
    y = y.dropna()
    X = X.loc[y.index,:]
    ind_y = np.array(y.index)
      
    y_real=y
    
    #reshaping data
    X = X.values
    y = y.values.reshape(-1, 1).ravel()
    
    #fill Nan in X
    #X = SimpleImputer(strategy='mean').fit_transform(X)
    
    #Standartize X
    #X = StandardScaler().fit_transform(X)
    
    # Setup the pipeline steps:
    steps = [('elasticnet', ElasticNet(random_state=42))]

    # Create the pipeline: pipeline 
    pipeline = Pipeline(steps)

    # Specify the hyperparameter space
    parameters = {'elasticnet__alpha': np.logspace(-1, 2, 70),
                  'elasticnet__l1_ratio':np.linspace(0,1,25)}

    # Create the GridSearchCV object:
    gm_cv = GridSearchCV(pipeline, parameters, cv=5, n_jobs=n_jobs)
    
    # Fit to the training set
    gm_cv.fit(X, y)
    
    #predict new y
    y_pred = gm_cv.predict(X)

    # Compute and print the metrics
    acc = gm_cv.best_score_
    bpar = gm_cv.best_params_
    model = gm_cv.best_estimator_
    mse = mean_squared_error(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred)
    corr, _ = pearsonr(np.array(y_real.values.reshape(-1, 1).ravel(), dtype=float), np.array(y_pred, dtype=float))
            
    return bpar['elasticnet__alpha'], bpar['elasticnet__l1_ratio'], acc, mse, corr, model, y_pred, mae

In [6]:
def reaply_ElNet(X, y, model):
    # param should be pd.Series with indexes from model
    
    #drop Nan in target and clean this subj from features
    y = y.dropna()
    X = X.reindex(index =y.index)
    ind_y = np.array(y.index)  # indexes as separate variable 
    
    y_real = y

    #reshaping data
    X = X.values
    y = y.values.reshape(-1, 1).ravel()
    
    #fill Nan in X
    #X = SimpleImputer(strategy='mean').fit_transform(X)
    
    #Standartize X
    #X = StandardScaler().fit_transform(X)
    
    #predict new y
    y_pred = model.predict(X)
    
    # Compute and print the metrics
    bacc = model.score(X, y)
    mse = mean_squared_error(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred) 
    corr, _ = pearsonr(np.array(y_real.values.reshape(-1, 1).ravel(), dtype=float), np.array(y_pred, dtype=float))
    
    return y_pred, y_real, ind_y, bacc, mse, corr, mae

### Path to the tables folder

In [7]:
path='/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/'

### Load tables

In [8]:
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

#demography
demo = pd.read_csv(path+'info.csv', index_col=0).iloc[:,0]

#targets table
targ = pd.read_csv(path+'info.csv', index_col=0).iloc[:,1:]

#features tables as dictionary
features = {
    'stroop':pd.read_csv(path+'stroop.csv', index_col=0),
    'faces':pd.read_csv(path+'faces.csv', index_col=0),
    'facename':pd.read_csv(path+'facename.csv', index_col=0),
    'mid':pd.read_csv(path+'mid.csv', index_col=0),
    
    'stroop_FC':pd.read_csv(path+'stroop_FC.csv', index_col=0),
    'faces_FC':pd.read_csv(path+'faces_FC.csv', index_col=0),
    'facename_FC':pd.read_csv(path+'facename_FC.csv', index_col=0),
    'mid_FC':pd.read_csv(path+'mid_FC.csv', index_col=0),    
    
    'cort':pd.read_csv(path+'cort_thck.csv', index_col=0),
    'surf':pd.read_csv(path+'cort_area.csv', index_col=0),    
    'subc':pd.read_csv(path+'subc_vol.csv', index_col=0),
    'rest':pd.read_csv(path+'rest.csv', index_col=0),
    'brainVol':pd.read_csv(path+'total_vol.csv', index_col=0)

}

#table with movements (mean relative displacement Movement_RelativeRMS_mean.txt)
movements = pd.read_csv(path+'movements.csv', index_col=0)

#create tables with 1 controling parameters: bio sex
sex_coded = pd.Series(LabelEncoder().fit_transform(demo.values), index=demo.index, name='sex')

control = sex_coded #

In [9]:
import copy
features2 = copy.deepcopy(features)

In [10]:
targ.columns

Index(['fsiq45', 'ChildhdIQ'], dtype='object')

##### Leave-P-group out based on 8-Fold CV

In [11]:
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore"
    
    
    
    
    

for COL in targ.columns:
#COL = 'ChildhdIQ'  #the script adapted to be launched on table of target variables. To launch in that way you need to uncomment for loop and comment this row with col variable
    y = targ[COL]

    print(y.name)

    ###make folder for outputs
    nmf=path+'output_newADJ_OneTrain_stackSTD_5cv_'+y.name
    os.mkdir(nmf)

    i=0

    group_kfold = GroupKFold(n_splits=7)
    for train_index, test_index in group_kfold.split(demo, groups=demo.index): 

        print(' ')
        print('started to calculate the Fold #', i)
        print(datetime.now())
        print(' ')

        ###create directory for specific Fold
        os.mkdir(nmf+'/Fold_'+str(i)) 
        path_out = str(nmf+'/Fold_'+str(i))

        ###Global indices
        train_index = np.array(demo.iloc[train_index].index) #for training all models
        test_index = np.array(demo.iloc[test_index].index) #for final test

        ###Split global train_Gindex to local indices
        #index_train, index_test = train_test_split(train_index, test_size=0.4, random_state=42)

        ###Local indices
        #index_train = np.array(sorted(index_train)) #for training modalities models
        #index_test = np.array(sorted(index_test)) #for testing modalities and training RF


        ### 1st level ################################################################################

        #### Calculations of single ML models on train_index #################################### 

        print('start 1st level ', datetime.now())

        #control for age+gen and age+gen+mov with sorting to train_index

        #control y (target) for age+gen
        y_res1, std_targ_y, std_targ_X, linreg_targ = control_features(y, control, train_index)


        #control modalities
        features_res1 = {}
        std_feat_y_dct = {}
        std_feat_X_dct = {}
        linreg_feat_dct = {}
        for key in features.keys():
            print('controlling ', key, datetime.now())
            
            if key in ['stroop', 'faces', 'facename', 'mid']:
                control_t = pd.concat([control, movements[key]], axis=1)
                mod_res, std_f_y, std_f_X, linreg_f = control_features(features[key], control_t, y_res1.index)
            else:
                mod_res, std_f_y, std_f_X, linreg_f = control_features(features[key], control, y_res1.index)

            features_res1[key] = mod_res
            std_feat_y_dct[key] = std_f_y
            std_feat_X_dct[key] = std_f_X
            linreg_feat_dct[key] = linreg_f

        #save adjastment model
        os.mkdir(path_out+'/adjustment_models')
        #target models
        joblib.dump(std_targ_y, (path_out+'/adjustment_models'+'/target_std_model_y.sav'))
        joblib.dump(std_targ_X, (path_out+'/adjustment_models'+'/target_std_model_X.sav'))
        joblib.dump(linreg_targ, (path_out+'/adjustment_models'+'/target_linreg.sav'))
        #features model
        joblib.dump(std_feat_y_dct, (path_out+'/adjustment_models'+'/features_std_model_y.sav'))
        joblib.dump(std_feat_X_dct, (path_out+'/adjustment_models'+'/features_std_model_X.sav'))
        joblib.dump(linreg_feat_dct, (path_out+'/adjustment_models'+'/features_linreg.sav'))


        ###standartize before model and keep std models
        #features
        std_models_features = {}
        for key in features_res1.keys():
            print('standartize ', key, datetime.now())
            std_model = StandardScaler()
            std_model.fit(features_res1[key].values)
            features_res1[key] = pd.DataFrame(std_model.transform(features_res1[key].values),
                                              index=features_res1[key].index, 
                                              columns=features_res1[key].columns)
            std_models_features[key] = std_model
        #target
        std_model_target = StandardScaler()
        std_model_target.fit(y_res1.values.reshape(-1, 1))
        y_res1 = pd.DataFrame(std_model_target.transform(y_res1.values.reshape(-1, 1)),
                              index=y_res1.index)

        #save 
        os.mkdir(path_out+'/standartization_models')
        #target
        joblib.dump(std_model_target,  (path_out+'/standartization_models'+'/target_std_model.sav'))
        #features
        joblib.dump(std_models_features,  (path_out+'/standartization_models'+'/features_std_model.sav'))


        #save features table before PCA
        y_res1.to_csv(path_out+'/target_y_train1.csv')
        for key in features_res1.keys():
            features_res1[key].to_csv(path_out+'/'+str(key)+'_train1.csv')


        #PCA models to rest and task FC
        PCA_models = {}
        for key in ['rest', 'stroop_FC', 'faces_FC', 'facename_FC', 'mid_FC']:
            print('reduction ', key, datetime.now())
            model_PCA =  PCA(n_components=75, random_state=11)
            model_PCA.fit(features_res1[key].values)
            features_res1[key] = pd.DataFrame(model_PCA.transform(features_res1[key].values), 
                                              index=features_res1[key].index)
            PCA_models[key] = model_PCA
        #save PCA models
        os.mkdir(path_out+'/PCA_models')
        joblib.dump(PCA_models,  (path_out+'/PCA_models'+'/PCA_model.sav'))
        
        
        
        #apply new std to PCA features again
        std_PC_feature_models = {}
        for key in ['rest', 'stroop_FC', 'faces_FC', 'facename_FC', 'mid_FC']:
            print('standartize PC table ', key, datetime.now())
            std_PC_model = StandardScaler()
            std_PC_model.fit(features_res1[key].values)
            features_res1[key] = pd.DataFrame(std_PC_model.transform(features_res1[key].values),
                                              index=features_res1[key].index, 
                                              columns=features_res1[key].columns)
            std_PC_feature_models[key] = std_PC_model
            #save PCA tables
            features_res1[key].to_csv(path_out+'/'+key+'_PCA75_train1.csv')
        #save std PCA models
        os.mkdir(path_out+'/PCA_standardization_models')
        joblib.dump(std_PC_feature_models,  (path_out+'/PCA_standardization_models'+'/std_PCA_model.sav'))
        
        


        #Launch ElasticNet for all task(modalities) on index_train (1st level)

        dict_tasks={}
        dict_elnet_model={}
        dict_ypred1={}

        for key in list(features_res1.keys()):

            print('start ', str(key), datetime.now())   #print start time of calculations

            bpar1, bpar2, acc, mse, corr, model, y_pred1, mae = elnet(features_res1[key], y_res1) #ML
            dict_tasks[key] = acc, mse, mae, corr, bpar1, bpar2 
            dict_elnet_model[key] = model
            dict_ypred1[key] = y_pred1
        df_tasks = pd.DataFrame(dict_tasks, index=['best score r2', 'mse', 'mae','corr', 'best alpha', 'best l1_ratio'])
        df_y_pred1 = pd.DataFrame(dict_ypred1, index=y_res1.index)


        ###Save outputs from this step (models and all mod. perf.)

        #models
        for key in dict_elnet_model.keys():
            joblib.dump(dict_elnet_model[key], (path_out+'/'+str(key)+'_elnet_model.sav'))

        #model performance
        df_tasks.to_csv(path_out+'/1level_train_perf_elnet.csv')

        #list of first level targets (observed and predicted)
        df_y_pred1.to_csv(path_out+'/1level_train_y_pred_singleML.csv')







        ### 2st level ################################################################################
        print(' ')
        print('start 2nd level ', datetime.now())

        #### L2 Testing single ML models on train_index #############################################

        print('Checking single ML on test1 data ', datetime.now())

        #control for age+gen and age+gen+mov with sorting to train_index

        #control y (target) for age+gen
        y_res2 = re_control_features(y, control, train_index, 
                                     std_targ_y, std_targ_X, linreg_targ)

        #control modalities
        features_res2 = {}
        for key in features.keys():
            print('controlling ', key, datetime.now())
            if key in ['stroop', 'faces', 'facename', 'mid']:
                control_t = pd.concat([control, movements[key]], axis=1)
                features_res2[key] = re_control_features(features[key], control_t, y_res2.index, 
                                                     std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])
            else:
                features_res2[key] = re_control_features(features[key], control, y_res2.index, 
                                                     std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])

        ###standartize before model and keep std models
        #features
        for key in features_res2.keys():
            print('standartize ', key, datetime.now())
            features_res2[key] = pd.DataFrame(std_models_features[key].transform(features_res2[key].values),
                                              index=features_res2[key].index, 
                                              columns=features_res2[key].columns)
        #target
        y_res2 = pd.DataFrame(std_model_target.transform(y_res2.values.reshape(-1, 1)),
                              index=y_res2.index) 

        #save features table before PCA
        y_res2.to_csv(path_out+'/target_y_train2.csv')
        for key in features_res2.keys():
            features_res2[key].to_csv(path_out+'/'+str(key)+'_train2.csv')            


        #PCA models to rest and task FC
        for key in ['rest', 'stroop_FC', 'faces_FC', 'facename_FC', 'mid_FC']:
            print('reduction ', key, datetime.now())
            features_res2[key] = pd.DataFrame(PCA_models[key].transform(features_res2[key].values), 
                                      index=features_res2[key].index)
        
        
        #apply new std to PCA features again
        for key in ['rest', 'stroop_FC', 'faces_FC', 'facename_FC', 'mid_FC']:
            print('standartize PCA ', key, datetime.now())
            features_res2[key] = pd.DataFrame(std_PC_feature_models[key].transform(features_res2[key].values),
                                              index=features_res2[key].index, 
                                              columns=features_res2[key].columns)
            #save std pc table
            features_res2[key].to_csv(path_out+'/'+key+'_PCA75_train2.csv')
            
            
            
            


        #apply trained single models ElasticNet to new data , index_test

        dict_y_pred2={}
        dict_y_pred2_per={}
        for key in list(features_res2.keys()):
            y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(features_res2[key], y_res2, dict_elnet_model[key]) #ML
            dict_y_pred2[key] = y_pred
            dict_y_pred2_per[key] = bacc, mse, mae, corr

        df_y_pred2 = pd.DataFrame(dict_y_pred2, index=ind_y)
        df_y_pred2_per = pd.DataFrame(dict_y_pred2_per, index=['best score r2', 'mse', 'mae','corr'])


        ###Save outputs from this step (models and all mod. perf.)

        #model performance
        df_y_pred2_per.to_csv(path_out+'/2level_test1_perf_elnet.csv')

        #list of first level targets (observed and predicted)
        df_y_pred2.to_csv(path_out+'/2level_test1_y_pred_singleML.csv')   



        #### L2 Calculating stacked ML models on index_test #############################################

        print('Calculating stacked ML on test1 data ', datetime.now())    


        #identifying sets for several stacked models
        set2 = ['stroop', 'faces', 'facename', 'mid']
        set3 = ['cort', 'surf', 'subc','brainVol', 'rest']
        set4 = ['stroop_FC', 'faces_FC', 'facename_FC', 'mid_FC']
        set5 = ['stroop', 'faces', 'facename', 'mid', 'stroop_FC', 'faces_FC', 'facename_FC', 'mid_FC']
        set6 = ['stroop', 'faces', 'facename', 'mid', 'cort', 'surf', 'subc','brainVol', 'rest']
        set7 = ['stroop_FC', 'faces_FC', 'facename_FC', 'mid_FC', 'cort', 'surf', 'subc','brainVol', 'rest']
        set8 = ['stroop_FC', 'faces_FC', 'facename_FC', 'mid_FC', 'rest']
        set1 = list(df_y_pred2.columns) #all existed modalities

        
        #for presetet sets
        dict_st_perf1={}
        dict_st_models={}
        dict_st_ypred1={}
        dct_std_mod_for_stack = {} #
        dct_std_tab_for_stack = {} #
        dct_std_tab_before_for_stack = {} #

        s=1
        for set_n in [set1, set2, set3, set4, set5, set6, set7, set8]:
            print('set '+str(s), datetime.now())

            st_features = df_y_pred2.loc[:,set_n]
            dct_std_tab_before_for_stack['set'+str(s)] = st_features #

            stack_std_model = StandardScaler().fit(st_features.values) 
            dct_std_mod_for_stack['set'+str(s)] = stack_std_model #

            std_st_features = pd.DataFrame(stack_std_model.transform(st_features.values), 
                                           index=st_features.index, columns=st_features.columns) 
            dct_std_tab_for_stack['set'+str(s)] = std_st_features #



            bpar1, bpar2, acc, mse, corr, model, y_pred3, mae = elnet(std_st_features, y_res2) #ML

            dict_st_perf1['set'+str(s)] = acc, mse, mae, corr, bpar1, bpar2 
            dict_st_models['set'+str(s)] = model
            dict_st_ypred1['set'+str(s)] = y_pred3
            s+=1

        df_st_perf1 = pd.DataFrame(dict_st_perf1, index=['best score r2', 'mse', 'mae','corr', 'best alpha', 'best l1_ratio'])
        df_st_ypred1 = pd.DataFrame(dict_st_ypred1, index=y_res2.index)        

        ###Save outputs from this step (models and all mod. perf.)

        #models
        for key in dict_st_models.keys():
            joblib.dump(dict_st_models[key], (path_out+'/'+str(key)+'_stacked_model.sav'))
        for key in dct_std_mod_for_stack.keys():
            joblib.dump(dct_std_mod_for_stack[key], (path_out+'/'+str(key)+'_stacked_STD_model.sav'))

        #performance and prediction
        df_st_perf1.to_csv(path_out+'/2level_test1_perf_stacked.csv')
        df_st_ypred1.to_csv(path_out+'/2level_test1_y_pred_stacked.csv')
        for key in dct_std_tab_for_stack.keys():
            dct_std_tab_for_stack[key].to_csv(path_out+'/2level_stack_y_feature_tab_STD.csv')
            dct_std_tab_before_for_stack[key].to_csv(path_out+'/2level_stack_y_feature_tab_beforeSTD.csv')



        ### 3rd level ################################################################################
        print(' ')
        print('start 3rd level ', datetime.now())


        #### L3 Testing single ML models on test_index #############################################

        print('Checking single ML on test2 data ', datetime.now())

        #control for age+gen and age+gen+mov with sorting to test_index

        #control y (target) for age+gen
        y_res3 = re_control_features(y, control, test_index, 
                                     std_targ_y, std_targ_X, linreg_targ)

        #control modalities
        features_res3 = {}
        for key in features.keys():
            print('controlling ', key, datetime.now())
            if key in ['stroop', 'faces', 'facename', 'mid']:
                control_t = pd.concat([control, movements[key]], axis=1)
                features_res3[key] = re_control_features(features[key], control_t, y_res3.index, 
                                                     std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])
            else:
                features_res3[key] = re_control_features(features[key], control, y_res3.index, 
                                                     std_feat_y_dct[key], std_feat_X_dct[key], linreg_feat_dct[key])

        ###standartize before model and keep std models
        #features
        for key in features_res3.keys():
            print('standartize ', key, datetime.now())
            features_res3[key] = pd.DataFrame(std_models_features[key].transform(features_res3[key].values),
                                              index=features_res3[key].index, 
                                              columns=features_res3[key].columns)
        #target
        y_res3 = pd.DataFrame(std_model_target.transform(y_res3.values.reshape(-1, 1)),
                              index=y_res3.index) 

        #save features table before PCA
        y_res3.to_csv(path_out+'/target_y_test.csv')
        for key in features_res3.keys():
            features_res3[key].to_csv(path_out+'/'+str(key)+'_test.csv')            


        #PCA models to rest and task FC
        for key in ['rest', 'stroop_FC', 'faces_FC', 'facename_FC', 'mid_FC']:
            print('reduction ', key, datetime.now())
            features_res3[key] = pd.DataFrame(PCA_models[key].transform(features_res3[key].values), 
                                      index=features_res3[key].index)
        
        
        #apply new std to PCA features again
        for key in ['rest', 'stroop_FC', 'faces_FC', 'facename_FC', 'mid_FC']:
            print('standartize PCA ', key, datetime.now())
            features_res3[key] = pd.DataFrame(std_PC_feature_models[key].transform(features_res3[key].values),
                                              index=features_res3[key].index, 
                                              columns=features_res3[key].columns)
            #save std pc table
            features_res3[key].to_csv(path_out+'/'+key+'_PCA75_test.csv')  





        #apply trained single models ElasticNet to new data , test_index

        dict_y_pred3={}
        dict_y_pred3_per={}
        for key in list(features_res3.keys()):
            y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(features_res3[key], y_res3, dict_elnet_model[key]) #ML
            dict_y_pred3[key] = y_pred
            dict_y_pred3_per[key] = bacc, mse, mae, corr

        df_y_pred3 = pd.DataFrame(dict_y_pred3, index=ind_y)
        df_y_pred3_per = pd.DataFrame(dict_y_pred3_per, index=['best score r2', 'mse', 'mae','corr'])


        ###Save outputs from this step (models and all mod. perf.)

        #model performance
        df_y_pred3_per.to_csv(path_out+'/3level_test2_perf_elnet.csv')

        #list of first level targets (observed and predicted)
        df_y_pred3.to_csv(path_out+'/3level_test2_y_pred_singleML.csv')        


        #### L3 Testing stacked ML models on test_index #############################################

        print('Calculating stacked ML on test2 data ', datetime.now()) 

        #apply trained stacked models ElasticNet to new data , test_index

        #for presetet sets
        dict_st_perf2={}
        dict_st_ypred2={}

        dct_std3_tab_for_stack = {} #
        dct_std3_tab_before_for_stack = {} #

        s=1
        for set_n in [set1, set2, set3, set4, set5, set6, set7, set8]:

            ftrs = df_y_pred3.loc[:, set_n]
            dct_std3_tab_before_for_stack['set'+str(s)] = ftrs

            std_ftrs = pd.DataFrame(dct_std_mod_for_stack['set'+str(s)].transform(ftrs.values), 
                                    index=ftrs.index,columns=ftrs.columns)
            dct_std3_tab_for_stack['set'+str(s)] = std_ftrs

            y_pred, y_real, ind_y, bacc, mse, corr, mae = reaply_ElNet(std_ftrs, y_res3, dict_st_models[('set'+str(s))]) #ML
            dict_st_ypred2[('set'+str(s))] = y_pred
            dict_st_perf2[('set'+str(s))] = bacc, mse, mae, corr
            s+=1

        df_st_ypred2 = pd.DataFrame(dict_st_ypred2, index=ind_y)
        df_st_perf2 = pd.DataFrame(dict_st_perf2, index=['best score r2', 'mse', 'mae','corr'])        

        ###Save outputs from this step (models and all mod. perf.)

        #performance and prediction
        df_st_perf2.to_csv(path_out+'/3level_test2_perf_stacked.csv')
        df_st_ypred2.to_csv(path_out+'/3level_test2_y_pred_stacked.csv') 
        for key in dct_std3_tab_for_stack.keys():
            dct_std3_tab_for_stack[key].to_csv(path_out+'/3level_stack_y_feature_tab_STD.csv')
            dct_std3_tab_before_for_stack[key].to_csv(path_out+'/3level_stack_y_feature_tab_beforeSTD.csv') 


        print(' ')
        print('finished to calculate the Fold #', i)
        print(datetime.now())

        i+=1

    print(' ')
    print('finished the MODEL '+COL)
    print(datetime.now())
    
    
    features = features2
    features2 = copy.deepcopy(features)

fsiq45
 
started to calculate the Fold # 0
2023-01-26 22:31:36.839605
 
start 1st level  2023-01-26 22:31:36.841334
controlling  stroop 2023-01-26 22:31:36.895483
controlling  faces 2023-01-26 22:31:37.242338
controlling  facename 2023-01-26 22:31:37.612097
controlling  mid 2023-01-26 22:31:37.892079
controlling  stroop_FC 2023-01-26 22:31:38.180354
controlling  faces_FC 2023-01-26 22:32:25.184514
controlling  facename_FC 2023-01-26 22:33:13.697367
controlling  mid_FC 2023-01-26 22:34:01.593486
controlling  cort 2023-01-26 22:34:51.015424
controlling  surf 2023-01-26 22:34:51.167276
controlling  subc 2023-01-26 22:34:51.287895
controlling  rest 2023-01-26 22:34:51.303470
controlling  brainVol 2023-01-26 22:35:40.577844
standartize  stroop 2023-01-26 22:37:45.139464
standartize  faces 2023-01-26 22:37:45.147044
standartize  facename 2023-01-26 22:37:45.149267
standartize  mid 2023-01-26 22:37:45.151493
standartize  stroop_FC 2023-01-26 22:37:45.153780
standartize  faces_FC 2023-01-26 22

standartize PC table  mid_FC 2023-01-26 23:19:39.365286
start  stroop 2023-01-26 23:19:39.418645
start  faces 2023-01-26 23:19:48.309014
start  facename 2023-01-26 23:19:56.833787
start  mid 2023-01-26 23:20:04.008462
start  stroop_FC 2023-01-26 23:20:12.394185
start  faces_FC 2023-01-26 23:20:19.753018
start  facename_FC 2023-01-26 23:20:27.758026
start  mid_FC 2023-01-26 23:20:35.264474
start  cort 2023-01-26 23:20:42.794451
start  surf 2023-01-26 23:20:51.710161
start  subc 2023-01-26 23:21:00.152565
start  rest 2023-01-26 23:21:07.300149
start  brainVol 2023-01-26 23:21:15.314538
 
start 2nd level  2023-01-26 23:21:23.198730
Checking single ML on test1 data  2023-01-26 23:21:23.199211
controlling  stroop 2023-01-26 23:21:23.200851
controlling  faces 2023-01-26 23:21:23.301455
controlling  facename 2023-01-26 23:21:23.395480
controlling  mid 2023-01-26 23:21:23.489320
controlling  stroop_FC 2023-01-26 23:21:23.583782
controlling  faces_FC 2023-01-26 23:21:37.361453
controlling  face

standartize  cort 2023-01-26 23:54:53.149235
standartize  surf 2023-01-26 23:54:53.150138
standartize  subc 2023-01-26 23:54:53.150779
standartize  rest 2023-01-26 23:54:53.151223
standartize  brainVol 2023-01-26 23:54:53.292419
reduction  rest 2023-01-27 00:05:03.296776
reduction  stroop_FC 2023-01-27 00:05:03.493643
reduction  faces_FC 2023-01-27 00:05:03.644162
reduction  facename_FC 2023-01-27 00:05:03.801023
reduction  mid_FC 2023-01-27 00:05:03.963375
standartize PCA  rest 2023-01-27 00:05:04.124816
standartize PCA  stroop_FC 2023-01-27 00:05:04.183820
standartize PCA  faces_FC 2023-01-27 00:05:04.241437
standartize PCA  facename_FC 2023-01-27 00:05:04.292624
standartize PCA  mid_FC 2023-01-27 00:05:04.339106
Calculating stacked ML on test1 data  2023-01-27 00:05:04.436169
set 1 2023-01-27 00:05:04.436670
set 2 2023-01-27 00:05:13.504910
set 3 2023-01-27 00:05:18.894839
set 4 2023-01-27 00:05:26.948283
set 5 2023-01-27 00:05:34.770451
set 6 2023-01-27 00:05:42.229489
set 7 2023-0

controlling  brainVol 2023-01-27 00:39:41.679743
standartize  stroop 2023-01-27 00:39:41.686779
standartize  faces 2023-01-27 00:39:41.692085
standartize  facename 2023-01-27 00:39:41.697161
standartize  mid 2023-01-27 00:39:41.701885
standartize  stroop_FC 2023-01-27 00:39:41.702348
standartize  faces_FC 2023-01-27 00:39:41.727657
standartize  facename_FC 2023-01-27 00:39:41.753089
standartize  mid_FC 2023-01-27 00:39:41.778229
standartize  cort 2023-01-27 00:39:41.803539
standartize  surf 2023-01-27 00:39:41.804168
standartize  subc 2023-01-27 00:39:41.804523
standartize  rest 2023-01-27 00:39:41.804846
standartize  brainVol 2023-01-27 00:39:41.830952
reduction  rest 2023-01-27 00:41:21.980929
reduction  stroop_FC 2023-01-27 00:41:22.049085
reduction  faces_FC 2023-01-27 00:41:22.121742
reduction  facename_FC 2023-01-27 00:41:22.210896
reduction  mid_FC 2023-01-27 00:41:22.274629
standartize PCA  rest 2023-01-27 00:41:22.337444
standartize PCA  stroop_FC 2023-01-27 00:41:22.349835
st

controlling  mid_FC 2023-01-27 01:16:10.646004
controlling  cort 2023-01-27 01:17:00.042845
controlling  surf 2023-01-27 01:17:00.207196
controlling  subc 2023-01-27 01:17:00.344868
controlling  rest 2023-01-27 01:17:00.362202
controlling  brainVol 2023-01-27 01:17:49.778302
standartize  stroop 2023-01-27 01:19:54.532768
standartize  faces 2023-01-27 01:19:54.540810
standartize  facename 2023-01-27 01:19:54.543103
standartize  mid 2023-01-27 01:19:54.545397
standartize  stroop_FC 2023-01-27 01:19:54.547674
standartize  faces_FC 2023-01-27 01:19:54.903202
standartize  facename_FC 2023-01-27 01:19:55.256597
standartize  mid_FC 2023-01-27 01:19:55.606021
standartize  cort 2023-01-27 01:19:55.951590
standartize  surf 2023-01-27 01:19:55.952993
standartize  subc 2023-01-27 01:19:55.953915
standartize  rest 2023-01-27 01:19:55.954534
standartize  brainVol 2023-01-27 01:19:56.301357
reduction  rest 2023-01-27 01:29:14.589793
reduction  stroop_FC 2023-01-27 01:29:15.855574
reduction  faces_FC 

start  mid_FC 2023-01-27 02:02:55.003840
start  cort 2023-01-27 02:03:02.912274
start  surf 2023-01-27 02:03:11.463386
start  subc 2023-01-27 02:03:19.922396
start  rest 2023-01-27 02:03:27.492585
start  brainVol 2023-01-27 02:03:34.883187
 
start 2nd level  2023-01-27 02:03:42.760769
Checking single ML on test1 data  2023-01-27 02:03:42.760987
controlling  stroop 2023-01-27 02:03:42.762764
controlling  faces 2023-01-27 02:03:42.862862
controlling  facename 2023-01-27 02:03:42.956891
controlling  mid 2023-01-27 02:03:43.055754
controlling  stroop_FC 2023-01-27 02:03:43.150119
controlling  faces_FC 2023-01-27 02:03:57.906055
controlling  facename_FC 2023-01-27 02:04:11.316428
controlling  mid_FC 2023-01-27 02:04:25.033311
controlling  cort 2023-01-27 02:04:38.241464
controlling  surf 2023-01-27 02:04:38.288068
controlling  subc 2023-01-27 02:04:38.313112
controlling  rest 2023-01-27 02:04:38.317006
controlling  brainVol 2023-01-27 02:04:51.806214
standartize  stroop 2023-01-27 02:04:51.

reduction  faces_FC 2023-01-27 02:47:43.228281
reduction  facename_FC 2023-01-27 02:47:43.363992
reduction  mid_FC 2023-01-27 02:47:43.500331
standartize PCA  rest 2023-01-27 02:47:43.635943
standartize PCA  stroop_FC 2023-01-27 02:47:43.687189
standartize PCA  faces_FC 2023-01-27 02:47:43.736226
standartize PCA  facename_FC 2023-01-27 02:47:43.786042
standartize PCA  mid_FC 2023-01-27 02:47:43.835294
Calculating stacked ML on test1 data  2023-01-27 02:47:43.930746
set 1 2023-01-27 02:47:43.931217
set 2 2023-01-27 02:47:52.971255
set 3 2023-01-27 02:48:00.878804
set 4 2023-01-27 02:48:11.264038
set 5 2023-01-27 02:48:19.802364
set 6 2023-01-27 02:48:27.425223
set 7 2023-01-27 02:48:34.731041
set 8 2023-01-27 02:48:44.024618
 
start 3rd level  2023-01-27 02:48:52.311756
Checking single ML on test2 data  2023-01-27 02:48:52.311789
controlling  stroop 2023-01-27 02:48:52.313210
controlling  faces 2023-01-27 02:48:52.412134
controlling  facename 2023-01-27 02:48:52.505366
controlling  mid 

reduction  rest 2023-01-27 03:24:15.832640
reduction  stroop_FC 2023-01-27 03:24:15.899106
reduction  faces_FC 2023-01-27 03:24:15.957462
reduction  facename_FC 2023-01-27 03:24:16.013370
reduction  mid_FC 2023-01-27 03:24:16.074009
standartize PCA  rest 2023-01-27 03:24:16.136165
standartize PCA  stroop_FC 2023-01-27 03:24:16.148627
standartize PCA  faces_FC 2023-01-27 03:24:16.159790
standartize PCA  facename_FC 2023-01-27 03:24:16.170942
standartize PCA  mid_FC 2023-01-27 03:24:16.182397
Calculating stacked ML on test2 data  2023-01-27 03:24:16.234746
 
finished to calculate the Fold # 1
2023-01-27 03:24:16.450323
 
started to calculate the Fold # 2
2023-01-27 03:24:16.450653
 
start 1st level  2023-01-27 03:24:16.451827
controlling  stroop 2023-01-27 03:24:17.176341
controlling  faces 2023-01-27 03:24:17.547995
controlling  facename 2023-01-27 03:24:17.825353
controlling  mid 2023-01-27 03:24:18.104061
controlling  stroop_FC 2023-01-27 03:24:18.384486
controlling  faces_FC 2023-01-

standartize  cort 2023-01-27 04:02:46.790688
standartize  surf 2023-01-27 04:02:46.792088
standartize  subc 2023-01-27 04:02:46.793073
standartize  rest 2023-01-27 04:02:46.793716
standartize  brainVol 2023-01-27 04:02:47.142296
reduction  rest 2023-01-27 04:12:03.730134
reduction  stroop_FC 2023-01-27 04:12:04.852156
reduction  faces_FC 2023-01-27 04:12:05.979842
reduction  facename_FC 2023-01-27 04:12:07.090505
reduction  mid_FC 2023-01-27 04:12:08.188411
standartize PC table  rest 2023-01-27 04:12:09.736343
standartize PC table  stroop_FC 2023-01-27 04:12:09.790755
standartize PC table  faces_FC 2023-01-27 04:12:09.839035
standartize PC table  facename_FC 2023-01-27 04:12:09.888476
standartize PC table  mid_FC 2023-01-27 04:12:09.942456
start  stroop 2023-01-27 04:12:10.060192
start  faces 2023-01-27 04:12:20.610329
start  facename 2023-01-27 04:12:29.095293
start  mid 2023-01-27 04:12:37.778735
start  stroop_FC 2023-01-27 04:12:46.408258
start  faces_FC 2023-01-27 04:12:53.880651
s

controlling  mid_FC 2023-01-27 04:47:12.702664
controlling  cort 2023-01-27 04:47:26.360756
controlling  surf 2023-01-27 04:47:26.407235
controlling  subc 2023-01-27 04:47:26.432176
controlling  rest 2023-01-27 04:47:26.436043
controlling  brainVol 2023-01-27 04:47:39.478985
standartize  stroop 2023-01-27 04:47:39.491330
standartize  faces 2023-01-27 04:47:39.497374
standartize  facename 2023-01-27 04:47:39.503482
standartize  mid 2023-01-27 04:47:39.505606
standartize  stroop_FC 2023-01-27 04:47:39.506523
standartize  faces_FC 2023-01-27 04:47:39.646735
standartize  facename_FC 2023-01-27 04:47:39.787278
standartize  mid_FC 2023-01-27 04:47:39.928211
standartize  cort 2023-01-27 04:47:40.068639
standartize  surf 2023-01-27 04:47:40.069542
standartize  subc 2023-01-27 04:47:40.070108
standartize  rest 2023-01-27 04:47:40.070543
standartize  brainVol 2023-01-27 04:47:40.210820
reduction  rest 2023-01-27 04:57:55.025933
reduction  stroop_FC 2023-01-27 04:57:55.177515
reduction  faces_FC 

 
start 3rd level  2023-01-27 05:31:21.277763
Checking single ML on test2 data  2023-01-27 05:31:21.278286
controlling  stroop 2023-01-27 05:31:21.280085
controlling  faces 2023-01-27 05:31:21.382120
controlling  facename 2023-01-27 05:31:21.472729
controlling  mid 2023-01-27 05:31:21.564627
controlling  stroop_FC 2023-01-27 05:31:21.654966
controlling  faces_FC 2023-01-27 05:31:35.100351
controlling  facename_FC 2023-01-27 05:31:47.787557
controlling  mid_FC 2023-01-27 05:32:00.814833
controlling  cort 2023-01-27 05:32:13.884286
controlling  surf 2023-01-27 05:32:13.927250
controlling  subc 2023-01-27 05:32:13.951381
controlling  rest 2023-01-27 05:32:13.955084
controlling  brainVol 2023-01-27 05:32:27.071120
standartize  stroop 2023-01-27 05:32:27.078249
standartize  faces 2023-01-27 05:32:27.083586
standartize  facename 2023-01-27 05:32:27.088622
standartize  mid 2023-01-27 05:32:27.093024
standartize  stroop_FC 2023-01-27 05:32:27.093496
standartize  faces_FC 2023-01-27 05:32:27.11

Calculating stacked ML on test2 data  2023-01-27 06:06:34.197757
 
finished to calculate the Fold # 6
2023-01-27 06:06:34.306040
 
finished the MODEL ChildhdIQ
2023-01-27 06:06:34.306122


### Copy files to new directories

In [14]:
path = '/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/'
path_adult = path+'output_test_newADJ_fsiq45/'
path_child = path+'output_test_newADJ_ChildhdIQ/'

os.mkdir(path+'output_test_newADJ_new_targets')
os.mkdir(path+'output_test_newADJ_new_targets/IQch_raw')
os.mkdir(path+'output_test_newADJ_new_targets/IQ45_raw')
os.mkdir(path+'output_test_newADJ_new_targets/IQres_raw')
os.mkdir(path+'output_test_newADJ_new_targets/IQres_adj')

In [15]:
folds = sorted(os.listdir(path_adult))
print(folds)

['Fold_0', 'Fold_1', 'Fold_2', 'Fold_3', 'Fold_4', 'Fold_5', 'Fold_6']


In [16]:
#targets table
dirs='/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/'
targ = pd.read_csv(dirs+'info.csv', index_col=0).iloc[:,1:]

#demography
demo = pd.read_csv(dirs+'info.csv', index_col=0).iloc[:,0]

#create tables with 1 controling parameters: bio sex
sex_coded = pd.Series(LabelEncoder().fit_transform(demo.values), index=demo.index, name='sex')

control = sex_coded #

In [19]:
for fold in folds:
    ind_t1 = pd.read_csv(path_adult+fold+'/target_y_train1.csv', index_col=0).index
    ind_t2 = pd.read_csv(path_adult+fold+'/target_y_train2.csv', index_col=0).index
    ind_tt = pd.read_csv(path_adult+fold+'/target_y_test.csv', index_col=0).index
    
    os.mkdir(path+'output_test_newADJ_new_targets/IQ45_raw/'+fold)
    os.mkdir(path+'output_test_newADJ_new_targets/IQch_raw/'+fold)
    
    for inds, sset in zip([ind_t1, ind_t2, ind_tt], ['train1', 'train2', 'test']):
        targ['fsiq45'].reindex(index=inds).to_csv(path+'output_test_newADJ_new_targets/IQ45_raw/'+fold+'/target_y_'+sset+'.csv')
        targ['ChildhdIQ'].reindex(index=inds).to_csv(path+'output_test_newADJ_new_targets/IQch_raw/'+fold+'/target_y_'+sset+'.csv')

### Adjusting adult IQ to child IQ

In [20]:
for fold in folds:
    os.mkdir(path+'output_test_newADJ_new_targets/IQres_raw'+'/'+fold)
    
    for sset in ['train1', 'train2', 'test']:
        
        y = pd.read_csv(glob.glob(path+'output_test_newADJ_new_targets/IQ45_raw/'+fold+'/*'+sset+'*')[0], index_col=0, squeeze=True)
        X = pd.read_csv(glob.glob(path+'output_test_newADJ_new_targets/IQch_raw/'+fold+'/*'+sset+'*')[0], index_col=0, squeeze=True)
        indx = X.index
        
        #reshaping data
        X = X.values.reshape(-1, 1)
        y = y.values.reshape(-1, 1).ravel()

        #Standartize X
        X = StandardScaler().fit_transform(X)

        #Fit to the training set
        y_pred = LinearRegression().fit(X, y).predict(X)

        y_res = y - y_pred
        
        y_res = pd.Series(y_res, index=indx)

        
        #save
        y_res.to_csv(path+'output_test_newADJ_new_targets/IQres_raw'+'/'+fold+'/'+'/target_y_'+sset+'.csv')


In [21]:
#
#
for fold in folds:
    os.mkdir(path+'output_test_newADJ_new_targets/IQres_adj'+'/'+fold)
    
    train1 = pd.read_csv(path+'output_test_newADJ_new_targets/IQres_raw'+'/'+fold+'/'+'/target_y_train1.csv', index_col=0)
    train2 = pd.read_csv(path+'output_test_newADJ_new_targets/IQres_raw'+'/'+fold+'/'+'/target_y_train2.csv', index_col=0)
    test = pd.read_csv(path+'output_test_newADJ_new_targets/IQres_raw'+'/'+fold+'/'+'/target_y_test.csv', index_col=0)
    
    #control y (target) for age+gen
    y_res1, std_targ_y, std_targ_X, linreg_targ = control_features(train1, control, train1.index)
    
    #save adjastment model
    os.mkdir(path+'output_test_newADJ_new_targets/IQres_adj'+'/'+fold+'/adjustment_models')
    #target models
    joblib.dump(std_targ_y, (path+'output_test_newADJ_new_targets/IQres_adj'+'/'+fold+'/adjustment_models'+'/target_std_model_y.sav'))
    joblib.dump(std_targ_X, (path+'output_test_newADJ_new_targets/IQres_adj'+'/'+fold+'/adjustment_models'+'/target_std_model_X.sav'))
    joblib.dump(linreg_targ, (path+'output_test_newADJ_new_targets/IQres_adj'+'/'+fold+'/adjustment_models'+'/target_linreg.sav'))
    
    
    ###standartize before model and keep std models
    #target
    std_model_target = StandardScaler()
    std_model_target.fit(y_res1.values.reshape(-1, 1))
    y_res1 = pd.DataFrame(std_model_target.transform(y_res1.values.reshape(-1, 1)),
                          index=y_res1.index)

    #save 
    os.mkdir(path+'output_test_newADJ_new_targets/IQres_adj'+'/'+fold+'/standartization_models')
    #target
    joblib.dump(std_model_target,  (path+'output_test_newADJ_new_targets/IQres_adj'+'/'+fold+'/standartization_models'+'/target_std_model.sav'))
    
    
    #save y_res1
    y_res1.to_csv(path+'output_test_newADJ_new_targets/IQres_adj'+'/'+fold+'/'+'/target_y_train1.csv')
    
    
    for subset, sset in zip([train2, test], ['train2', 'test']):
        
        #control y (target) for age+gen
        y_res2 = re_control_features(subset, control, subset.index, 
                                     std_targ_y, std_targ_X, linreg_targ)
        
        ###standartize before model and keep std models
        #target
        y_res2 = pd.DataFrame(std_model_target.transform(y_res2.values.reshape(-1, 1)),
                              index=y_res2.index)
        
        y_res2.to_csv(path+'output_test_newADJ_new_targets/IQres_adj'+'/'+fold+'/'+'/target_y_'+sset+'.csv')

    
    

In [27]:
os.makedirs('/media/data/Dunedin_Study_Data_Narun_P_Jan2022/New_MLTabs_OneTrain/main_set/ML_modalities/')
os.mkdir('/media/data/Dunedin_Study_Data_Narun_P_Jan2022/New_MLTabs_OneTrain/main_set/IQ45_adj/')
os.mkdir('/media/data/Dunedin_Study_Data_Narun_P_Jan2022/New_MLTabs_OneTrain/main_set/IQch_adj/')

In [28]:
pth = '/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/'
pth2 = '/media/data/Dunedin_Study_Data_Narun_P_Jan2022/New_MLTabs_OneTrain/main_set/ML_modalities/'


folds = sorted(os.listdir(pth))[:7]
print(folds)

for fold in folds:
    os.mkdir(pth2+fold)
    
    for sset in ['train1', 'test']:
        files=sorted(glob.glob(pth+fold+'/*'+sset+'.csv'))
        for file in files:
            if 'target' in file:
                print(file)
            elif 'rest_t' in file:
                print(file)
            elif '_FC_t' in file:
                print(file)
            else:
                shutil.copyfile(file, pth2+fold+'/'+file.split('/')[-1])
        print()

['Fold_0', 'Fold_1', 'Fold_2', 'Fold_3', 'Fold_4', 'Fold_5', 'Fold_6']
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_0/facename_FC_train1.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_0/faces_FC_train1.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_0/mid_FC_train1.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_0/rest_train1.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_0/stroop_FC_train1.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_0/target_y_train1.csv

/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_0/facename_FC_test.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_0/faces_FC_test.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fs

/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_6/target_y_train1.csv

/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_6/facename_FC_test.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_6/faces_FC_test.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_6/mid_FC_test.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_6/rest_test.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_6/stroop_FC_test.csv
/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/Fold_6/target_y_test.csv



In [29]:
pth = '/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_fsiq45/'
pth2 = '/media/data/Dunedin_Study_Data_Narun_P_Jan2022/New_MLTabs_OneTrain/main_set/IQ45_adj/'


folds = sorted(os.listdir(pth))[:7]
print(folds)

for fold in folds:
    os.mkdir(pth2+fold)
    
    for sset in ['train1', 'test']:
        files=sorted(glob.glob(pth+fold+'/*'+sset+'.csv'))
        for file in files:
            if 'target' in file:
                shutil.copyfile(file, pth2+fold+'/'+file.split('/')[-1])


['Fold_0', 'Fold_1', 'Fold_2', 'Fold_3', 'Fold_4', 'Fold_5', 'Fold_6']


In [30]:
pth = '/media/data/Dunedin_Study_Data_Narun_P_Jan2022/MLTabs/output_test_newADJ_ChildhdIQ/'
pth2 = '/media/data/Dunedin_Study_Data_Narun_P_Jan2022/New_MLTabs_OneTrain/main_set/IQch_adj/'


folds = sorted(os.listdir(pth))[:7]
print(folds)

for fold in folds:
    os.mkdir(pth2+fold)
    
    for sset in ['train1', 'test']:
        files=sorted(glob.glob(pth+fold+'/*'+sset+'.csv'))
        for file in files:
            if 'target' in file:
                shutil.copyfile(file, pth2+fold+'/'+file.split('/')[-1])


['Fold_0', 'Fold_1', 'Fold_2', 'Fold_3', 'Fold_4', 'Fold_5', 'Fold_6']
