In [46]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, NMF, KernelPCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import MinMaxScaler
import random, os

from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit, KFold, LeaveOneOut # or StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
import pingouin as pg 
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import MultiTaskLasso

OUTDATED_IGNORE=1

## 1. Read in the subjects

In [48]:
def get_clinic(iFile='../../DerivedData/Global.csv'):
    clinic = pd.read_csv('../../DerivedData/Global.csv', sep=';')
    to_keep = ['ParticipantID', 'Cognitive-CompositeScore', 'Communication-CompositeScore', 'Motor-CompositeScore','Cognitive-ScaledScore',
                 'ReceptiveCom-ScaledScore','ExpressiveCom-ScaledScore','FineMotor-ScaledScore','GrossMotor-ScaledScore', 'IMDScore', 
              'Sex', 'Gabirth', 'Pregnancy-size',
              'ParenteralNutrition>21d','FetalGrowthRestriction',
             'PretermMorbiditiesx4', 'BirthWeight','Apgar1', 'Apgar5', 'Apgar10' ]
    clinic= clinic[to_keep]
    
    clinic.rename( columns ={
                'ParticipantID' : 'subject_id',
                'Cognitive-CompositeScore' : 'Cognitive',
                'Communication-CompositeScore' : 'Language', 
                'Motor-CompositeScore' : 'Motor',
                'Gabirth': 'birth_age', 
                'Pregnancy-size' : 'MultiPreg',
                'ParenteralNutrition>21d': 'ParenteralNutrition_21d',
                'PretermMorbiditiesx4': 'Morbidities', 
                'FetalGrowthRestriction': 'FGR'
                }, inplace=True)
    
    for col in ['IMDScore', 'birth_age','BirthWeight']:
        clinic[col] = clinic[col].astype(str)
        clinic[col] = clinic[col].apply(lambda x: x.replace(',','.'))
        clinic[col] = clinic[col].astype(np.float16)
        
    clinic.loc[clinic['Sex'] == 'Female', 'Sex_cat'] = 1
    clinic.loc[clinic['Sex'] == 'Male', 'Sex_cat'] = 0
    
    clinic.loc[clinic['ParenteralNutrition_21d'] == 'Yes', 'ParenteralNutrition_21d_cat'] = 1
    clinic.loc[clinic['ParenteralNutrition_21d'] == 'No', 'ParenteralNutrition_21d_cat'] = 0
      
    #clinic.loc[clinic['ChronicLungDisease'] == 'Yes', 'ChronicLungDisease_cat'] = 1
    #clinic.loc[clinic['ChronicLungDisease'] == 'No', 'ChronicLungDisease_cat'] = 0
    
    clinic.loc[clinic['Morbidities'] == 'Yes', 'Morbidities_cat'] = 1
    clinic.loc[clinic['Morbidities'] == 'No', 'Morbidities_cat'] = 0
    
    clinic.loc[clinic['FGR'] == 'Yes', 'FGR_cat'] = 1
    clinic.loc[clinic['FGR'] == 'No', 'FGR_cat'] = 0

    clinic['MultiPreg'] = clinic['MultiPreg'] -1
    
    return clinic

def _get_meanWM(group):
    

    ex_WM = pd.read_csv('../../DerivedData/extreme_pairs_mean_diffusion_metrics_over_WM.csv', index_col=0)
    mod_WM = pd.read_csv('../../DerivedData/moderate_pairs_mean_diffusion_metrics_over_WM.csv', index_col=0)

    WM = pd.concat([ex_WM, mod_WM])
    WM.rename(columns={'matched_ID' : 'control_ID'}, inplace=True)
        
    cols = [col for col in WM.columns if group in col]
    cols = [col for col in cols if 'post' not in col]
        
    WM = WM[cols]
    WM = WM.set_axis(['meanWM_'+col.split('_')[1] for col in cols] , axis=1, inplace=False)
    WM.reset_index(drop=True, inplace=True)
    WM.rename(columns={'meanWM_ID': 'subject_id'}, inplace =True)
    
    return WM 
        
def import_WM():
    
    preterm_WM = _get_meanWM('preterm')
    control_WM = _get_meanWM('control')

    WM = pd.concat([preterm_WM, control_WM])
    return WM

In [49]:
# read in the metric files saved as csv
controls = pd.read_csv('../../DerivedData/extracted_diffusion_metrics_control_group_mergedLR.csv', index_col=0)
preterms = pd.read_csv('../../DerivedData/extracted_diffusion_metrics_preterm_group_mergedLR.csv', index_col=0)

### create pairing - as there is fewer preterms, use their IDs to find matches with controls 
matched = pd.read_csv('../../DerivedData/subject_matching.csv', index_col=0)
matched = matched[matched['preterm_ID'].isin(preterms['subject_id'].values)]

#get matched controls
controls = controls[controls.subject_id.isin(matched.matched_ID_with_outcome.values)]

### get IDs for evaluations 
preterm_ids = preterms.subject_id.values
control_ids = controls.subject_id.values

### concatenate the two 
df = pd.concat([preterms, controls])
df = pd.merge(df, get_clinic(), how="inner", on=["subject_id"])

## get WM 
df = pd.merge(df, import_WM(), how='inner', on=['subject_id'])

### subjects for the PCA

In [50]:
miss_ids = list(df[df['Cognitive'].isna()].subject_id.values)

ct = df[df['subject_id'].isin(miss_ids) & df['subject_id'].isin(control_ids)].subject_id.values
pt = df[df['subject_id'].isin(miss_ids) & df['subject_id'].isin(preterm_ids)].subject_id.values

req = len(pt) - len(ct)
ids = df[~df['subject_id'].isin(miss_ids) & df['subject_id'].isin(control_ids)].subject_id.values

print('Required number of random controls to select: {}'.format(req))

np.random.seed(42)
random_controls = random.choices(population=ids, k=req)
miss_ids.extend( random_controls)

print('Final number of subjects for PCA: {}'.format(len(miss_ids)))


Required number of random controls to select: 9
Final number of subjects for PCA: 30


In [51]:
### fix miss_Ids so it's reproducible
miss_ids = ['CC00997BN25','CC00301XX04','CC00632XX14','CC00889BN24',
         'CC00525XX14', 'CC00621XX11', 'CC00747XX22', 'CC00326XX13',
         'CC00576XX16','CC00385XX15','CC00889AN24','CC01038XX16',
         'CC01005XX07','CC01077XX14','CC00805XX13','CC00427XX15',
         'CC01042XX12','CC00383XX13','CC00653XX10','CC01014XX08',
         'CC00178XX14','CC00082XX09','CC00150AN02','CC00091XX10',
         'CC00111XX04','CC00716XX15','CC00584XX16','CC00667XX16',
         'CC00566XX14','CC00477XX16']

## Split data 
Into:
1. data for PCA (~30 subjects)
2. hold-out 25 % of the remaining
3. train test (for LOOCV)


In [52]:
from sklearn.model_selection import train_test_split

df_pca = df[df.subject_id.isin(miss_ids)].copy()

df_sub = df[~df.subject_id.isin(miss_ids)].copy()
df_sub = df_sub.sample(frac=1).reset_index(drop=True)

df_pred, df_hold = train_test_split(df_sub, test_size=0.25)

print('Nmber of subjects for PCA: {}'.format(len(df_pca)))
print('Number of subjects in hold-out set: {}'.format(len(df_hold)))
print('Number of subjects in train-out set: {}'.format(len(df_pred)))


Nmber of subjects for PCA: 30
Number of subjects in hold-out set: 22
Number of subjects in train-out set: 66


## Global settings

In [83]:
# global settings 
seed=42
N=50

ndependent = ['Motor', 'Language', 'Cognitive']
#independent = ['Cognitive-ScaledScore',
#       'ReceptiveCom-ScaledScore', 'ExpressiveCom-ScaledScore',
#       'FineMotor-ScaledScore', 'GrossMotor-ScaledScore']
sss = LeaveOneOut()

## 2. Set up scalers, inputers and PCA

In [84]:
def get_scaling(arr_train, arr_test, scaling_strategy, return_scaler=False):
    
    if scaling_strategy == 'min_max':
        scl = MinMaxScaler()
    else: 
        scl = StandardScaler()
        
    scl.fit(arr_train)
    
    if return_scaler == True:
        return scl.transform(arr_train), scl.transform(arr_test), scl
    else:
        return scl.transform(arr_train), scl.transform(arr_test)
    
def get_imputation(arr_train, arr_test, return_medians=False):
    

    if len(arr_train[0]) == 1:
        #print('I am in the worng loop')
        md = np.nanmedian(arr_train)
        arr_train[np.where(np.isnan(arr_train))] = md
        arr_test[np.where(np.isnan(arr_test))] = md
        
        if return_medians == True:
            return arr_train, arr_test, [md]
        else: 
            return arr_train, arr_test
    
    else:
        mds = []
        for col in range(len(arr_train[0])):
            
            md = np.nanmedian(arr_train[:,col])
            #print(md)
            arr_train[:,col][np.where(np.isnan(arr_train[:,col]))] = md
            arr_test[:,col][np.where(np.isnan(arr_test[:,col]))] = md
            
            mds.append(md)
        
        if return_medians == True:
            return arr_train, arr_test, mds
        else: 
            return arr_train, arr_test        

        
def get_pca(X_array, thr = 0.95, return_pca = True):
    
    ### PCA 
    pcs= min(len(X_array), len(X_array[0])) -1 
    pca = PCA(n_components=pcs).fit(X_array)
    num_pcs = np.argwhere( np.cumsum(pca.explained_variance_ratio_)>thr)[0][0] + 1
    print('Number of PCs selected: {}'.format(num_pcs))
    
    if return_pca == True:
        
        return pca, num_pcs
def run_dummy_regression(X_train, X_test, y_train, y_test):
    
    y_pred = np.zeros_like(y_test[0])
    y_test = y_test
    
    #print(y_pred)
    
    for i in range(len(y_train[0])):
        y_pred[i] = _dummy(X_train = X_train, X_test = X_test, y_train = y_train[:,i], y_test = y_test[:,i])
        
    return y_pred, y_test


def _dummy(X_train, X_test, y_train, y_test):
    
    reg = DummyRegressor(strategy='mean')
    reg.fit(X_train, y_train)
    y_pred_test = reg.predict(X_test)
    
    return y_pred_test[0]#, y_test[0]
   

def run_LR_regression(X_train, X_test, y_train, y_test):
    
    y_pred = np.zeros_like(y_test[0])
    y_test = y_test
    
    for i in range(len(y_train[0])):
        y_pred[i] = _LR(X_train = X_train, X_test = X_test, y_train = y_train[:,i], y_test = y_test[:,i])
        
    return y_pred, y_test    
    
           
def _LR(X_train, X_test, y_train, y_test):
    
    reg = LinearRegression()
    reg.fit(X_train, y_train)
    y_pred_test = reg.predict(X_test)
    
    return y_pred_test[0]#, y_test[0]
   

def run_GB_regression(X_train, X_test, y_train, y_test, dct):
    
    y_pred = np.zeros_like(y_test[0])
    y_test = y_test
    
    for i in range(len(y_train[0])):
        y_pred[i] = _GB(X_train = X_train, X_test = X_test, y_train = y_train[:,i], y_test = y_test[:,i], dct=dct)
        
    return y_pred, y_test
    
def _GB(X_train, X_test, y_train, y_test, dct):
    
    reg = GradientBoostingRegressor()
    if dct != None:
        reg.set_params(**dct)
    reg.fit(X_train, y_train)
    y_pred_test = reg.predict(X_test)
    
    return y_pred_test[0]#, y_test[0]
      

def run_LASSO_mutlitask(X_train, X_test, y_train, y_test, dct):
    
    reg = MultiTaskLasso(max_iter=10000)
    if dct != None:
        reg.set_params(**dct)
    reg.fit(X_train , y_train)
    y_pred_test = reg.predict(X_test )
    
    return y_pred_test[0], y_test[0]

In [85]:

def run_LOOCV(X, y, method, scaler=None, imputer=None, pca=None, num_pcs=None, dct=None):
    
    sss = LeaveOneOut()
    
    results = np.zeros_like(y)
    true = np.zeros_like(y)
    
    i = 0
    for train_index, test_index in sss.split(X):
        
        X_train, X_test = X[train_index], X[test_index] 
        y_train, y_test = y[train_index], y[test_index]
        
        if imputer !=None:
            
            for col in range(len(imputer)):
                X_train[:,col][np.where(np.isnan(X_train[:,col]))] = imputer[col]
                X_test[:,col][np.where(np.isnan(X_test[:,col]))] = imputer[col]
    
        if scaler != None:
            
            X_train = scaler.transform(X_train)
            X_test = scaler.transform(X_test)
            
        if pca != None: 
            
            X_train = pca.transform(X_train)[:,:num_pcs]
            X_test = pca.transform(X_test)[:,:num_pcs]
            
    
        if method == 'dummy':
            y_pred, _ = run_dummy_regression(X_train, X_test, y_train, y_test)
            #print(y_pred)
            results[i,:] = y_pred
            
            
        if method == 'lr':
            y_pred, _ = run_LR_regression(X_train, X_test, y_train, y_test)
            #print(y_pred)
            results[i,:] = y_pred 
            
            
        if method == 'gb':
            y_pred, _ = run_GB_regression(X_train, X_test, y_train, y_test, dct)
            #print(y_pred)
            results[i,:] = y_pred    
            
        if method == 'multi':
            y_pred, _ = run_LASSO_mutlitask(X_train, X_test, y_train, y_test, dct)
            #print(y_pred)
            results[i,:] = y_pred  
            
        true[i,:] = y_test
        
        i = i + 1
    return results, true


def evaluate_LOOCV(prediction, true, outcomes):
    
    dct = {}
    for i, score in enumerate(outcomes):
        
        dct[score] = {}
        dct[score]['r2'] = r2_score(y_true = true[:,i], y_pred = prediction[:,i].astype(int))
        dct[score]['mae'] = mean_absolute_error(y_true = true[:,i], y_pred = prediction[:,i].astype(int))
        dct[score]['corr'] = np.corrcoef(prediction[:,i].astype(int), true[:,i])[0,1]
        
    return dct
        

    
    
def run_VALIDATION(X_train, y_train, X_test, y_test, method, scaler=None, imputer=None, pca=None, num_pcs=None, dct=None):

    results = np.zeros_like(y_test)
    true = np.zeros_like(y_test)
      
    if imputer !=None:
            
        for col in range(len(imputer)):
            X_train[:,col][np.where(np.isnan(X_train[:,col]))] = imputer[col]
            X_test[:,col][np.where(np.isnan(X_test[:,col]))] = imputer[col]
    
    if scaler != None:
            
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)
            
    if pca != None: 
            
        X_train = pca.transform(X_train)[:,:num_pcs]
        X_test = pca.transform(X_test)[:,:num_pcs]
        
    
    results = np.zeros_like(y_test)
    
    if method == 'dummy':
        y_pred, _ = run_dummy_regression(X_train, X_test, y_train, y_test)
        #print(y_pred)
        results[:] = y_pred
            
            
    if method == 'lr':
        y_pred, _ = run_LR_regression(X_train, X_test, y_train, y_test)
        #print(y_pred)
        results[:] = y_pred 
            
            
    if method == 'gb':
        y_pred, _ = run_GB_regression(X_train, X_test, y_train, y_test, dct)
        #print(y_pred)
        results[:] = y_pred    
            
    if method == 'multi':
        y_pred, _ = run_LASSO_mutlitask(X_train, X_test, y_train, y_test, dct)
        #print(y_pred)
        results[:] = y_pred  
             
    true = y_test    
        
    return results, true
    
def evaluate_VALIDATION(prediction, true, outcomes):
    
    dct = {}
    for i, score in enumerate(outcomes):
        
        dct[score] = {}
        dct[score]['r2'] = r2_score(y_true = true[:,i], y_pred = prediction[:,i].astype(int))
        dct[score]['mae'] = mean_absolute_error(y_true = true[:,i], y_pred = prediction[:,i].astype(int))
        dct[score]['corr'] = np.corrcoef(prediction[:,i].astype(int), true[:,i])[0,1]
        
    return dct    


    
### incorporate validation into the pipeline here
        
def run_pipeline(X, y, X_hold, y_hold, X_pca, outcomes, run_pca = False, opt_itr = 15):
    
    output_test = {}
    output_valid = {}
    
    X_pca, _, imputer = get_imputation(X_pca, X_pca, return_medians=True)
    X_pca, _, scaler = get_scaling(X_pca, X_pca, scaling_strategy='standard', return_scaler=True)
    
    pca=None
    num_pcs=None
    
    if run_pca != False:
        pca, num_pcs = get_pca(X_pca, thr = 0.95, return_pca = True)
        
    print('Num PCs: {}'.format(num_pcs))   
    for model in ['dummy', 'lr', 'gb', 'multi']:
        #print(model)
        
        if model == 'gb':
            
            gb_opt_settings = optimise_GBR(X, y,  method=model, outcomes=outcomes, scaler=scaler, imputer=imputer, 
                                     pca=pca, num_pcs=num_pcs,  itr=25)
        
            print('best GB: {}'.format(gb_opt_settings))
            
            prediction, true = run_LOOCV(X, y, method=model, scaler=scaler, imputer=imputer, 
                                     pca=pca, num_pcs=num_pcs, dct=gb_opt_settings)     
            results = evaluate_LOOCV(prediction=prediction, true=true, outcomes=outcomes)
            output_test[model] = results
            
        elif model == 'multi':
            
            ml_opt_settings = optimise_MULTI(X, y,  method=model,  outcomes=outcomes, scaler=scaler, imputer=imputer, 
                                     pca=pca, num_pcs=num_pcs, itr=15)
            print('best MULTI: {}'.format(ml_opt_settings))
        
            prediction, true = run_LOOCV(X, y, method=model, scaler=scaler, imputer=imputer, 
                                     pca=pca, num_pcs=num_pcs, dct=ml_opt_settings)     
            results = evaluate_LOOCV(prediction=prediction, true=true, outcomes=outcomes)
            output_test[model] = results
            
        else:
            
            prediction, true = run_LOOCV(X, y, method=model, scaler=scaler, imputer=imputer, 
                                     pca=pca, num_pcs=num_pcs)     
            results = evaluate_LOOCV(prediction=prediction, true=true, outcomes=outcomes)
            output_test[model] = results
            
            
    ### validation 
    
    for model in ['dummy', 'lr', 'gb', 'multi']:
        if model == 'gb':
            prediction, true = run_VALIDATION(X_train=X, y_train=y, X_test=X_hold, y_test=y_hold, 
                                              method=model, scaler=scaler,
                                              imputer=imputer,  pca=pca, num_pcs=num_pcs, dct=gb_opt_settings)
            results = evaluate_VALIDATION(prediction=prediction, true=true, outcomes=outcomes)
            output_valid[model] = results 
            
        elif model == 'multi':
            prediction, true = run_VALIDATION(X_train=X, y_train=y, X_test=X_hold, y_test=y_hold, 
                                              method=model, scaler=scaler,
                                              imputer=imputer,  pca=pca, num_pcs=num_pcs, dct=ml_opt_settings)
            results = evaluate_VALIDATION(prediction=prediction, true=true, outcomes=outcomes)
            output_valid[model] = results 
            
        else: 
            prediction, true = run_VALIDATION(X_train=X, y_train=y, X_test=X_hold, y_test=y_hold, 
                                              method=model, scaler=scaler,
                                              imputer=imputer,  pca=pca, num_pcs=num_pcs)
            results = evaluate_VALIDATION(prediction=prediction, true=true, outcomes=outcomes)
            output_valid[model] = results 
    
    return output_test, output_valid
        

    
def optimise_GBR(X, y,  method,outcomes,  scaler=None, imputer=None, 
                                     pca=None, num_pcs=None, itr=15):
    
    space = {}
    space['loss']= ['absolute_error', 'squared_error', 'huber']
    space['learning_rate'] = [ 0.001, 0.01, 0.1, 1]
    space['n_estimators']= [5,10,25,50]
    space['max_depth']= [3,5,10,25]
    
    res = []
    hyper = []
    
    for it in range(itr):
                
        ### get parameters
        params = {}
        
        for key in space.keys():
            params[key] = random.choices(population=space[key], k=1)[0]
            
        #print('Optimization {}/{}'.format(it, itr))
        
        prediction, true = run_LOOCV(X, y, method=method, scaler=scaler, imputer=imputer, 
                                     pca=pca, num_pcs=num_pcs, dct=params)     
        results = evaluate_LOOCV(prediction=prediction, true=true, outcomes=outcomes)
        
        res.append(results[outcomes[1]]['mae'])
        hyper.append(params)
        
    minimum = np.nanargmin(np.asarray(res))
    
    return hyper[minimum]


    
def optimise_MULTI(X, y,  method, outcomes,  scaler=None, imputer=None, 
                                     pca=None, num_pcs=None, itr=15):
    
    space = {}
    space['alpha'] = [0.001, 0.01, 0.01, 1, 10, 100]
    space['fit_intercept'] = [True, False]
    
    res = []
    hyper = []
    
    for it in range(itr):
                
        ### get parameters
        params = {}
        
        for key in space.keys():
            params[key] = random.choices(population=space[key], k=1)[0]
            
        #print('Optimization {}/{}'.format(it, itr))
        
        prediction, true = run_LOOCV(X, y, method=method, scaler=scaler, imputer=imputer, 
                                     pca=pca, num_pcs=num_pcs, dct=params)     
        results = evaluate_LOOCV(prediction=prediction, true=true, outcomes=outcomes)
        
        res.append(results[outcomes[1]]['mae'])
        hyper.append(params)
        
    minimum = np.nanargmin(np.asarray(res))
    
    return hyper[minimum]

In [86]:
def write_results(input_name, dct):

    for key in dct.keys():
        cols = [ outcome+'_'+metric for outcome in dct[key].keys() for metric in dct[key][outcome].keys() ]

    df_write = pd.DataFrame(columns = ['name', 'model'] + cols)
    for i, key in enumerate(dct.keys()):  
    
        df_write.loc[i] = [input_name, key] + [ dct[key][outcome][metric] for outcome in dct[key].keys() for metric in dct[key][outcome].keys() ]
    
    return df_write


In [87]:
##  

pair_names = [
    'M1-Brainstem', 'S1-Brainstem','Paracentral-Brainstem',
    'M1-Caud', 'S1-Caud','Paracentral-Caud',
    'M1-Lenti', 'S1-Lenti','Paracentral-Lenti', 
    'M1-Thalfus', 'S1-Thalfus','Paracentral-Thalfus', 
    'M1L-M1R', 'S1L-S1R',
    'S1-M1' ]

metrics = ['AD', 'RD', 'FA', 'MD', 'NDI', 'ODI']

all_bundles = [col + '_' + metric for col in pair_names for metric in metrics]

## features 
sets =  [['AD', 'RD'], ['FA', 'MD'], ['NDI', 'ODI']]

features1 = [col + '_' + metric for col in pair_names for metric in sets[0]]
features2 = [col + '_' + metric for col in pair_names for metric in sets[1]]
features3 = [col + '_' + metric for col in pair_names for metric in sets[2]]

features4 = 'IMDScore'

features5 = ['IMDScore', 'birth_age', 'Sex_cat', 'MultiPreg', 
                'ParenteralNutrition_21d_cat', 
                'Morbidities_cat', 'FGR_cat','BirthWeight','Apgar1', 'Apgar5', 'Apgar10']

norm_settings = {
    
    'model1' : {
                    'features' : ['IMDScore'],
                    'pca' : False
            
                },
    
    'model2' : {
                    'features' : ['IMDScore', 'birth_age', 'Sex_cat', 'MultiPreg', 
                'ParenteralNutrition_21d_cat', 
                'Morbidities_cat', 'FGR_cat','BirthWeight','Apgar1', 'Apgar5', 'Apgar10'],
                    'pca' : False
            
                },
    
    'model3' : {
                    'features' : ['IMDScore', 'birth_age', 'Sex_cat', 'MultiPreg', 
                'ParenteralNutrition_21d_cat', 'Morbidities_cat', 'FGR_cat','BirthWeight','Apgar1', 'Apgar5', 'Apgar10'],
                    'pca' : True
            
                },
    
    
    'model4' : {
                    'features' : ['meanWM_AD', 'meanWM_RD', 'meanWM_MD',
                           'meanWM_FA', 'meanWM_NDI', 'meanWM_ODI'],
                    'pca' : False
            
                },
    
    'model5' : {
                    'features' : ['meanWM_AD', 'meanWM_RD', 'meanWM_MD',
                           'meanWM_FA', 'meanWM_NDI', 'meanWM_ODI'],
                    'pca' : True
            
                },
    
    'model6' : {
                    'features' : ['meanWM_AD', 'meanWM_RD', 'meanWM_MD',
                   'meanWM_FA', 'meanWM_NDI', 'meanWM_ODI', 'IMDScore'],
                    'pca' : False
            
                },
    
    'model7' : {
                    'features' : ['meanWM_AD', 'meanWM_RD', 'meanWM_MD',
                   'meanWM_FA', 'meanWM_NDI', 'meanWM_ODI', 'IMDScore'],
                    'pca' : True
            
                },
    
    'model8' : {
                    'features' : ['meanWM_AD', 'meanWM_RD', 'meanWM_MD',
                   'meanWM_FA', 'meanWM_NDI', 'meanWM_ODI', 'IMDScore', 'birth_age', 'Sex_cat', 'MultiPreg', 
                'ParenteralNutrition_21d_cat', 'Morbidities_cat', 'FGR_cat','BirthWeight','Apgar1', 'Apgar5', 'Apgar10'],
                    'pca' : False
            
                },
    
    'model9' : {
                    'features' : ['meanWM_AD', 'meanWM_RD', 'meanWM_MD',
                   'meanWM_FA', 'meanWM_NDI', 'meanWM_ODI', 'IMDScore', 'birth_age', 'Sex_cat', 'MultiPreg', 
                'ParenteralNutrition_21d_cat', 'Morbidities_cat', 'FGR_cat','BirthWeight','Apgar1', 'Apgar5', 'Apgar10'],
                    'pca' : True
            
                },
    
    'model10' : {
                    'features' : all_bundles,
                    'pca' : True
            
                },
    'model11' : {
                    'features' :  list(np.append(all_bundles, ['IMDScore'])),
                    'pca' : True
            
                },
    
    'model12' : {
                    'features' :  list(np.append(all_bundles, ['IMDScore', 'birth_age', 'Sex_cat', 'MultiPreg', 
                'ParenteralNutrition_21d_cat', 'Morbidities_cat', 'FGR_cat','BirthWeight','Apgar1', 'Apgar5', 'Apgar10'])),
                    'pca' : True
            
                },
    
    
    'model16' : {
                    'features' :  features1,
                    'pca' : True
            
                },
    
    'model19' : {
                    'features' :  features2,
                    'pca' : True
            
                },
    
    'model22' : {
                    'features' :  features3,
                    'pca' : True
            
                },
    
}

In [88]:
for input_name in norm_settings:
    print(input_name)
    
    in_features = norm_settings[input_name]['features']
    pca_setting = norm_settings[input_name]['pca']
    print('PCA:', pca_setting)
    
    X_pca = df_pca[in_features].values
    X = df_pred[in_features].values
    y = df_pred[independent].values
    X_hold = df_hold[in_features].values
    y_hold =  df_hold[independent].values
    
    output_test, output_valid = run_pipeline(X=X, y=y, X_hold=X_hold, y_hold=y_hold, 
                                             X_pca=X_pca, outcomes=independent, run_pca = pca_setting)
    
    
    ### write out test
    to_write = write_results(input_name=input_name, dct=output_test)
    output_path_test='composite_outcome_prediction_hyper_opt_results.csv'
    to_write.to_csv(output_path_test, mode='a', header=not os.path.exists(output_path_test))
    
    ### write out validation
    to_write = write_results(input_name=input_name, dct=output_valid)
    output_path_val='composite_outcome_prediction_validaion_results.csv'
    to_write.to_csv(output_path_val, mode='a', header=not os.path.exists(output_path_val))
    
    
    print('{} DONE'.format(input_name))
    

model1
PCA: False
Num PCs: None


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'absolute_error', 'learning_rate': 0.1, 'n_estimators': 5, 'max_depth': 3}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

best MULTI: {'alpha': 0.01, 'fit_intercept': True}
model1 DONE
model2
PCA: False
Num PCs: None
best GB: {'loss': 'absolute_error', 'learning_rate': 0.01, 'n_estimators': 25, 'max_depth': 25}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

best MULTI: {'alpha': 1, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

model2 DONE
model3
PCA: True
Number of PCs selected: 7
Num PCs: 7


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'huber', 'learning_rate': 0.01, 'n_estimators': 5, 'max_depth': 25}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best MULTI: {'alpha': 1, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

model3 DONE
model4
PCA: False
Num PCs: None


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'absolute_error', 'learning_rate': 0.001, 'n_estimators': 10, 'max_depth': 3}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best MULTI: {'alpha': 0.01, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

model4 DONE
model5
PCA: True
Number of PCs selected: 2
Num PCs: 2


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'absolute_error', 'learning_rate': 0.01, 'n_estimators': 25, 'max_depth': 10}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best MULTI: {'alpha': 100, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


model5 DONE
model6
PCA: False
Num PCs: None


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'absolute_error', 'learning_rate': 0.1, 'n_estimators': 25, 'max_depth': 10}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best MULTI: {'alpha': 0.01, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

model6 DONE
model7
PCA: True
Number of PCs selected: 3
Num PCs: 3


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'absolute_error', 'learning_rate': 0.001, 'n_estimators': 10, 'max_depth': 5}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

best MULTI: {'alpha': 10, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


model7 DONE
model8
PCA: False
Num PCs: None


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'absolute_error', 'learning_rate': 0.1, 'n_estimators': 25, 'max_depth': 5}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best MULTI: {'alpha': 1, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

model8 DONE
model9
PCA: True
Number of PCs selected: 8
Num PCs: 8


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'squared_error', 'learning_rate': 0.01, 'n_estimators': 25, 'max_depth': 3}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best MULTI: {'alpha': 100, 'fit_intercept': True}
model9 DONE
model10
PCA: True
Number of PCs selected: 10
Num PCs: 10


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'absolute_error', 'learning_rate': 0.1, 'n_estimators': 5, 'max_depth': 3}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

best MULTI: {'alpha': 100, 'fit_intercept': True}
model10 DONE
model11
PCA: True
Number of PCs selected: 11
Num PCs: 11


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'huber', 'learning_rate': 0.01, 'n_estimators': 5, 'max_depth': 5}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best MULTI: {'alpha': 100, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

model11 DONE
model12
PCA: True
Number of PCs selected: 13
Num PCs: 13


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'huber', 'learning_rate': 0.001, 'n_estimators': 50, 'max_depth': 5}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best MULTI: {'alpha': 100, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

model12 DONE
model16
PCA: True
Number of PCs selected: 6
Num PCs: 6


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'huber', 'learning_rate': 0.1, 'n_estimators': 25, 'max_depth': 3}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

best MULTI: {'alpha': 100, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

model16 DONE
model19
PCA: True
Number of PCs selected: 7
Num PCs: 7
best GB: {'loss': 'absolute_error', 'learning_rate': 0.01, 'n_estimators': 5, 'max_depth': 5}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

best MULTI: {'alpha': 10, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


model19 DONE
model22
PCA: True
Number of PCs selected: 10
Num PCs: 10


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'huber', 'learning_rate': 0.001, 'n_estimators': 5, 'max_depth': 3}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

best MULTI: {'alpha': 100, 'fit_intercept': True}
model22 DONE


### additionally, first run PCA on the three sets separatelly!

In [91]:
settings = ['model13', 'model14', 'model15',
            'model17', 'model18',
            'model20', 'model21', 
            'model23', 'model24']

settings = [
            'model17', 'model18',
            'model20', 'model21', 
            'model23', 'model24']

settings = ['model13', 'model14', 'model15']
    
#settings = [ 'model14']
for input_name in settings:
    print(input_name)
    
    
    if input_name == 'model13':
        ft = [features1, features2, features3]
        
    elif input_name == 'model14':
        ft = [features1, features2, features3]
        
    elif input_name == 'model15':
        ft = [features1, features2, features3, features5]
    
    elif input_name == 'model17':
        ft = [features1]
        
    elif input_name == 'model18':
        ft = [features1, features5]
        
    elif input_name == 'model20':
        ft = [features2]
        
    elif input_name == 'model21':
        ft = [features2, features5]
    
    elif input_name == 'model23':
        ft = [features3]
        
    elif input_name == 'model24':
        ft = [features3, features5]
        
        
        
    X = np.zeros((len(df_pred),len(df_pred.columns)))
    y =  df_pred[independent].values
        
    X_hold = np.zeros((len(df_hold),len(df_hold.columns)))
    y_hold = df_hold[independent].values
        
    X_pca = np.zeros((len(df_pca),len(df_pca.columns)))
        
    cum_pcs = 0
    for i, f in enumerate(ft):
            #print(f)
            X_for_PCA =  df_pca[f].values
            X_for_PCA, _, mds = get_imputation(arr_train=X_for_PCA, arr_test=X_for_PCA, return_medians=True)
            X_for_PCA, _, scl = get_scaling(arr_train=X_for_PCA, arr_test=X_for_PCA, scaling_strategy='standard', return_scaler=True)
    
            ### PCA
            print('{} subset'.format(i+1))
            pcs= min(len(X_for_PCA), len(X_for_PCA[0])) -1 
            pca = PCA(n_components=pcs).fit(X_for_PCA)
            num_pcs = np.argwhere( np.cumsum(pca.explained_variance_ratio_)>0.95)[0][0] + 1
            print('Number of PCs selected: {}'.format(num_pcs))
        
            Xpt = df_pred[f].values
            Xtst = df_hold[f].values
    
            for col in range(len(f)):
                Xpt[:,col][np.where(np.isnan(Xpt[:,col]))] = mds[col]
                Xtst[:,col][np.where(np.isnan(Xtst[:,col]))] = mds[col]
        
            X_for_PCA = pca.transform(X_for_PCA)[:,:num_pcs]
            
            Xpt = scl.transform(Xpt)
            Xpt = pca.transform(Xpt)[:,:num_pcs]
    
            Xtst = scl.transform(Xtst)
            Xtst = pca.transform(Xtst)[:,:num_pcs]
    

            X[:, cum_pcs: cum_pcs+ num_pcs] = Xpt
            X_pca[:,cum_pcs: cum_pcs+ num_pcs ] = X_for_PCA
            X_hold[:, cum_pcs: cum_pcs+ num_pcs] = Xtst
            cum_pcs = cum_pcs+ num_pcs
        
        
    if input_name in ['model14', 'model17', 'model20', 'model23']:
            
            X[:,cum_pcs] = df_pred[features4].values
            X_hold[:,cum_pcs] = df_hold[features4].values
            X_pca[:,cum_pcs] = df_pca[features4].values
        
            X = X[:, :cum_pcs+1]
            X_hold = X_hold[:, :cum_pcs+1]
            X_pca = X_pca[:, :cum_pcs+1]
            print('IMD score added')
            
            run_pca = False
    else:
            
            X = X[:, :cum_pcs]
            X_hold = X_hold[:, :cum_pcs]
            X_pca = X_pca[:, :cum_pcs]
            run_pca= True
        
    output_test, output_valid = run_pipeline(X=X, y=y, X_hold=X_hold, y_hold=y_hold, 
                                             X_pca=X_pca, outcomes=independent, run_pca = run_pca)
    
    
    ### write out test
    to_write = write_results(input_name=input_name, dct=output_test)
    output_path_test='composite_outcome_prediction_hyper_opt_results.csv'
    to_write.to_csv(output_path_test, mode='a', header=not os.path.exists(output_path_test))
    
    ### write out validation
    to_write = write_results(input_name=input_name, dct=output_valid)
    output_path_val='composite_outcome_prediction_validaion_results.csv'
    to_write.to_csv(output_path_val, mode='a', header=not os.path.exists(output_path_val))
    
    
    print('{} DONE'.format(input_name))

model13
1 subset
Number of PCs selected: 6
2 subset
Number of PCs selected: 7
3 subset
Number of PCs selected: 10
Number of PCs selected: 12
Num PCs: 12


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'huber', 'learning_rate': 0.001, 'n_estimators': 50, 'max_depth': 5}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stdd

best MULTI: {'alpha': 10, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


model13 DONE
model14
1 subset
Number of PCs selected: 6
2 subset
Number of PCs selected: 7
3 subset
Number of PCs selected: 10
IMD score added
Num PCs: None


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'absolute_error', 'learning_rate': 0.01, 'n_estimators': 50, 'max_depth': 25}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,


  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  random,
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best MULTI: {'alpha': 1, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


model14 DONE
model15
1 subset
Number of PCs selected: 6
2 subset
Number of PCs selected: 7
3 subset
Number of PCs selected: 10
4 subset
Number of PCs selected: 7
Number of PCs selected: 16
Num PCs: 16


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best GB: {'loss': 'huber', 'learning_rate': 0.001, 'n_estimators': 25, 'max_depth': 5}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


best MULTI: {'alpha': 100, 'fit_intercept': True}


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


model15 DONE


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


In [90]:
#from sklearn.model_selection import KFold
#kf = KFold(n_splits = 4, shuffle = True, random_state = 42)
#ist(kf.split(df))