In [1]:
from IPython.display import display
import numpy as np
import pandas as pd
import os
import pickle
from pathlib import Path
from sklearn.linear_model import ElasticNet

# stacked model

In [5]:
##############################
use_reg = 'eNet'

use_sd_files = True
path_input = ''
path_input_para = ''
path_output = ''

Path(path_output).mkdir(parents=True, exist_ok=True)

## load data -layer1 ############################
target={} #dict for target
features={} #dict for features

add_suffix = lambda x: '_std.csv' if use_sd_files else '.csv'

for fold in sorted(os.listdir(path_input)):
    if 'Fold' not in fold:
        continue
    target[fold] = {}
    features[fold] = {}
    features[fold]['train1']={}
    features[fold]['train2']={}
    features[fold]['test']={}
    
    for sett in ['train1','train2','test']:
        target[fold][sett] = pd.read_csv(path_input+fold+'/target_y_'+sett+'.csv', index_col=0, header=None)
    
    for mod in ['wm', 'lan', 'rel', 'mot', 'emo', 'soc', 'gam', 'rest-pca75', 'surf', 'VolBrain', 'subc', 'cort']:
        for sett in ['train1','train2','test']:
            filename = path_input+fold+'/'+mod+'_'+sett+add_suffix(use_sd_files)
            features[fold][sett][mod] =  pd.read_csv(filename, index_col=0)

In [None]:
#load layer1 para and train
fir_reg ='eNet'
with open(path_input_para+fir_reg+'_layer1_output.pkl', 'rb') as f:
    dic = pickle.load(f)

for mod in ['wm', 'lan', 'rest-pca75', 'rel', 'mot', 'emo', 'soc', 'surf', 'VolBrain', 'subc', 'cort', 'gam']:
    coef = pd.DataFrame()
    for fold in ['Fold_0', 'Fold_1', 'Fold_2', 'Fold_3', 'Fold_4', 'Fold_5', 'Fold_6', 'Fold_7']:
        X_train = features[fold]['train1'][mod]
        y_train = target[fold]['train1']
        reg = ElasticNet(alpha=dic[mod]['best_para'][fold]['alpha'],l1_ratio=dic[mod]['best_para'][fold]['l1_ratio'],max_iter=dic[mod]['best_para'][fold]['max_iter'])
        reg.fit(X_train,y_train)
        coef[fold] = reg.coef_
    display(coef)
    coef.to_csv(path_output+'coef_'+mod+'.csv')

In [None]:
#layer2 coef
with open(path_input_para+'eNet_layer1_output.pkl', 'rb') as f:
    data = pickle.load(f)
        
for stacked_moda in ['all','task','non-task','top-task']:
    if stacked_moda == 'all':
        use_moda = ['wm', 'lan', 'rest-pca75', 'rel', 'mot', 'emo', 'soc', 'surf', 'VolBrain', 'subc', 'cort', 'gam']
    elif stacked_moda == 'task':
        use_moda = ['wm', 'lan', 'rel', 'mot', 'emo', 'soc', 'gam']
    elif stacked_moda == 'non-task':
        use_moda = ['rest-pca75','surf', 'VolBrain', 'subc', 'cort']  
    elif stacked_moda == 'top-task':
        use_moda = ['wm', 'lan', 'rel']
     
    with open(path_input_para+'layer2_'+stacked_moda+'_'+'eNet_eNet.pkl', 'rb') as f:
        dic = pickle.load(f)
    
    coef = pd.DataFrame(index=use_moda)
    for fold in ['Fold_0', 'Fold_1', 'Fold_2', 'Fold_3', 'Fold_4', 'Fold_5', 'Fold_6', 'Fold_7']:
        y_train = pd.read_csv( path_input+fold+'/target_y_train2.csv', index_col=0, header=None).values[:,0]
        X_train = data[use_moda[0]]['train2_pred'][fold].reshape(-1,1)       
        for moda in use_moda[1:]:   
            X_train =  np.hstack((X_train, data[moda]['train2_pred'][fold].reshape(-1,1)))    
        reg = ElasticNet(alpha=dic['best_para'][fold]['alpha'],l1_ratio=dic['best_para'][fold]['l1_ratio'],max_iter=dic['best_para'][fold]['max_iter'])
        reg.fit(X_train,y_train)
        coef[fold] = reg.coef_
    coef.to_csv(path_output+'coef_'+stacked_moda+'.csv')


# reliability test

In [8]:
path_input = ''
path_input_para = ''
path_output = ''

In [None]:
#load layer1 input data
target={}
features={} 
for sett in ['train1','train2','test1','test2']:
    target[sett] = pd.read_csv(path_input+'target_y_'+sett+'.csv', index_col=0, header=None)
for mod in ['wm', 'lan', 'rel', 'mot', 'emo', 'soc', 'gam', 'rest-pca75', 'surf', 'VolBrain', 'subc', 'cort']:
    features[mod] = {}
    for sett in ['train1','train2','test1','test2']:
        features[mod][sett] =  pd.read_csv(path_input+mod+'_'+sett+add_suffix(use_sd_files), index_col=0)

#load layer1 para
with open(path_input_para+'retest_layer1_output.pkl', 'rb') as f:
    dic = pickle.load(f)
        
for moda in ['wm', 'lan', 'rest-pca75', 'rel', 'mot', 'emo', 'soc', 'surf', 'VolBrain', 'subc', 'cort', 'gam']:    
    y_train = target['train1'].values[:,0]
    X_train = features[moda]['train1']
    
    coef = pd.DataFrame()
    reg = ElasticNet(alpha=dic['eNet'][moda]['best_para']['alpha'],l1_ratio=dic['eNet'][moda]['best_para']['l1_ratio'],max_iter=dic['eNet'][moda]['best_para']['max_iter'])
    reg.fit(X_train,y_train)
    coef['coef'] = reg.coef_
    
    display(coef)
    coef.to_csv(path_output+'coef_'+moda+'.csv')
    

In [None]:
#load layer2 input data
with open(path_input_para+'retest_layer1_output.pkl', 'rb') as f:
    data = pickle.load(f)
y_train = pd.read_csv(path_input+'target_y_train2.csv', index_col=0, header=None)

#load layer2 para
with open(path_input_para+'retest_layer2_output.pkl', 'rb') as f:
    dic = pickle.load(f)
    
for stacked_moda in ['all', 'task', 'non-task', 'top-task']:
    if stacked_moda == 'all':
        use_moda = ['wm', 'lan', 'rest-pca75', 'rel', 'mot', 'emo', 'soc', 'surf', 'VolBrain', 'subc', 'cort', 'gam']
    elif stacked_moda == 'task':
        use_moda = ['wm', 'lan', 'rel', 'mot', 'emo', 'soc', 'gam']
    elif stacked_moda == 'non-task':
        use_moda = ['rest-pca75','surf', 'VolBrain', 'subc', 'cort']  
    elif stacked_moda == 'top-task':
        use_moda = ['wm', 'lan', 'rel']
            
    X_train = data['eNet'][use_moda[0]]['train2_pred'].reshape(-1,1)    
    for moda in use_moda[1:]:   
        X_train =  np.hstack((X_train, data['eNet'][moda]['train2_pred'].reshape(-1,1)))
        
    coef = pd.DataFrame(index=use_moda)
    reg = ElasticNet(alpha=dic['eNet']['eNet'][stacked_moda]['best_para']['alpha'],l1_ratio=dic['eNet']['eNet'][stacked_moda]['best_para']['l1_ratio'],max_iter=dic['eNet']['eNet'][stacked_moda]['best_para']['max_iter'])
    reg.fit(X_train,y_train)
    coef['coef'] = reg.coef_
    display(coef)
    coef.to_csv(path_output+'coef_'+stacked_moda+'.csv')

# flat model

In [2]:
path_input_para = ''
path_input_test = ''
path_input_train = ''
path_output = ''

In [3]:
## load data ############################
use_reg = 'eNet'
use_sd_files = True

target={} #dict for target
features={} #dict for features

add_suffix = lambda x: '_std.csv' if True else '.csv'

for fold in ['Fold_0', 'Fold_1', 'Fold_2', 'Fold_3', 'Fold_4', 'Fold_5', 'Fold_6', 'Fold_7']:
    target[fold] = {}
    features[fold] = {}
    features[fold]['train']={}
    features[fold]['test']={}
    
    
    target[fold]['train'] = pd.read_csv(path_input_train+fold+'/target_y_trainFlat.csv', index_col=0, header=None)
    target[fold]['test'] = pd.read_csv(path_input_test+fold+'/target_y_test.csv', index_col=0, header=None)
    
    for mod in ['wm', 'lan', 'rel', 'mot', 'emo', 'soc', 'gam', 'rest-pca75', 'surf', 'VolBrain', 'subc', 'cort']:
        features[fold]['train'][mod] =  pd.read_csv(path_input_train+fold+'/'+mod+'_trainFlat'+add_suffix(use_sd_files), index_col=0)
        features[fold]['test'][mod] =  pd.read_csv(path_input_test+fold+'/'+mod+'_test'+add_suffix(use_sd_files), index_col=0)

In [None]:
fir_reg ='eNet'

for stacked_moda in ['all','task', 'non-task', 'top-task']:
    if stacked_moda == 'all':
        use_moda = ['wm', 'lan', 'rest-pca75', 'rel', 'mot', 'emo', 'soc', 'surf', 'VolBrain', 'subc', 'cort', 'gam']
    elif stacked_moda == 'task':
        use_moda = ['wm', 'lan', 'rel', 'mot', 'emo', 'soc', 'gam']
    elif stacked_moda == 'non-task':
        use_moda = ['rest-pca75','surf', 'VolBrain', 'subc', 'cort']  
    elif stacked_moda == 'top-task':
        use_moda = ['wm', 'lan', 'rel']
    print('\n-- stacked_moda:',stacked_moda,'--')
    
    colnames = []
    for mod in use_moda:
        s = mod+'-'+features['Fold_0']['test'][mod].columns.values
        colnames = colnames + s.tolist()
        
        
    #load para and train
    #############################################################
    with open(path_input_para+fir_reg+'-'+stacked_moda+'_flat_output.pkl', 'rb') as f:
        dic = pickle.load(f)

    coef = pd.DataFrame(columns=colnames,dtype=object)
    for fold in ['Fold_0', 'Fold_1', 'Fold_2', 'Fold_3', 'Fold_4', 'Fold_5', 'Fold_6', 'Fold_7']:
        #print('\n\n  -----',fold,'-----')
        y_train = target[fold]['train'].values[:,0]
        y_test = target[fold]['test'].values[:,0]

        for moda in use_moda:        
                X_train = features[fold]['train'][moda]
                if moda==use_moda[0]:
                    X_train_f = X_train.copy()
                else:
                    X_train_f = np.hstack((X_train_f,X_train))

        reg = ElasticNet(alpha=dic['best_para'][fold]['alpha'],l1_ratio=dic['best_para'][fold]['l1_ratio'],max_iter=dic['best_para'][fold]['max_iter'])
        reg.fit(X_train_f,y_train)
        coef.loc[fold] = reg.coef_

    display(coef)
    coef.to_csv(path_output+'coef_'+stacked_moda+'.csv')