# Neural Network with 2 hidden layers

### Load Modules

In [1]:
# Set the Random Seed
seed_value= 2022

import os
os.environ['PYTHONHASHSEED']=str(seed_value)
import numpy  as np
import pandas  as pd
import datetime
import random
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.formula.api as smf
import tensorflow as tf
from keras.models     import Sequential, load_model
from keras.layers     import Activation, Dense, LeakyReLU
from keras.optimizers import Adam
from keras.callbacks  import EarlyStopping, ModelCheckpoint
from keras.regularizers import l1
from tensorflow.keras.losses import MeanSquaredError

session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)




In [3]:
def reset_seeds(seed_value=2022):
    os.environ['PYTHONHASHSEED']=str(seed_value)
    np.random.seed(seed_value)
    random.seed(seed_value)
    tf.random.set_seed(seed_value)
reset_seeds()

### Load Dataset - only traditional features

In [4]:
# Load monthly firm characteristics raw data
df = pd.read_parquet('C:/Users/rafae/Documents/HSG/Master Thesis/Data/Final/data07_model_input.parquet')
df = df.sort_values(by=['YM', 'permno'])
df = df.set_index(['year', 'YM', 'permno'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gvkey,reprisk_id,prc,vol,mve_m,absacc,acc,aeavol,age,agr,...,sic2_73,sic2_75,sic2_78,sic2_79,sic2_80,sic2_81,sic2_82,sic2_83,sic2_87,sic2_99
year,YM,permno,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2007,2007-01,10025,11903,37172,45.320000,8086.0,3.700557e+05,0.698728,-0.745547,-0.646819,0.457506,-0.979644,...,0,0,0,0,0,0,0,0,0,0
2007,2007-01,10026,12825,12684,39.689999,7613.0,7.653725e+05,0.577608,-0.635623,-0.393384,0.457506,0.118575,...,0,0,0,0,0,0,0,0,0,0
2007,2007-01,10042,12139,4832,0.720000,26008.0,3.598898e+04,0.990840,-0.989822,-0.894148,0.457506,-0.989822,...,0,0,0,0,0,0,0,0,0,0
2007,2007-01,10078,12136,1719,6.130000,11333293.0,2.390900e+07,0.654962,-0.711959,-0.128753,0.905344,-0.147074,...,0,0,0,0,0,0,0,0,0,0
2007,2007-01,10104,12142,4413,16.430000,7234361.0,8.892640e+07,-0.014758,-0.107379,0.780153,0.905344,0.770992,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021,2021-12,93304,184167,91339,36.750000,183303.0,1.695898e+06,-0.855522,0.706269,-0.871045,-0.500896,0.613134,...,0,0,0,0,0,0,0,0,0,0
2021,2021-12,93373,184323,74074,3.020000,528712.0,2.065325e+05,-0.213134,0.143881,0.663284,-0.500896,-0.875821,...,0,0,0,0,0,0,0,0,0,0
2021,2021-12,93374,184899,64442,74.510002,159495.0,8.587073e+06,-0.514030,0.423284,-0.223881,-0.500896,0.328955,...,0,0,0,0,0,0,0,0,0,0
2021,2021-12,93423,10567,22547,39.490002,254917.0,3.661156e+06,0.242985,-0.303881,0.560597,0.143881,-0.584478,...,0,0,0,1,0,0,0,0,0,0


In [5]:
# Select only relevant columns for X and Y (exclude industry & reprisk rating dummy for now)
info_vars = ['YM', 'year', 'permno', 'gvkey', 'reprisk_id', 'prc', 'vol', 'mve_m']
X_vars = ['absacc', 'acc', 'aeavol', 'age', 'agr', 'baspread', 'beta', 'betasq', 'bm', 'bm_ia', 'cash', 'cashdebt',
          'cashpr', 'cfp', 'cfp_ia', 'chatoia', 'chcsho', 'chempia', 'chinv', 'chmom', 'chpmia', 'chtx', 'cinvest',
          'convind', 'currat', 'depr', 'divi', 'divo', 'dolvol', 'dy', 'ear', 'egr', 'ep', 'gma', 'grcapx', 'grltnoa',
          'herf', 'hire', 'idiovol', 'ill', 'indmom', 'invest', 'lev', 'lgr', 'maxret', 'mom12m', 'mom1m', 'mom36m',
          'mom6m', 'ms', 'mve', 'mve_ia', 'nincr', 'operprof', 'orgcap', 'pchcapx_ia', 'pchcurrat', 'pchdepr',
          'pchgm_pchsale', 'pchquick', 'pchsale_pchinvt', 'pchsale_pchrect', 'pchsale_pchxsga', 'pchsaleinv', 'pctacc',
          'pricedelay', 'ps', 'quick', 'rd', 'rd_mve', 'rd_sale', 'realestate', 'retvol', 'roaq', 'roavol', 'roeq',
          'roic', 'rsup', 'salecash', 'saleinv', 'salerec', 'secured', 'securedind', 'sgr', 'sin', 'sp', 'std_dolvol',
          'std_turn', 'stdacc', 'stdcf', 'tang', 'tb', 'turn', 'zerotrade']
sic2_vars = [col for col in df if col.startswith('sic2')]
reprisk_vars = ['country_sector_average', 'country_sector_average_01', 'current_rri', 'current_rri_01',
                'peak_rri', 'peak_rri_01', 'trend_rri', 'trend_rri_01']
reprisk_rating_vars = [col for col in df if col.startswith('reprisk_rating')]
Y_vars = ['ret', 'ret_wins', 'ret_ex']

# X
X = df[X_vars + sic2_vars]

# Y
Y = df[['ret_ex']]

X.shape, Y.shape

((338288, 162), (338288, 1))

### Hyperparameter Optimization: 4-Fold CV (12y/4 = 3y) and 3y Test

In [6]:
# Training (12y - 80%) and Test set (3y - 20%)
X_trai = X.loc['2007':'2018']
Y_trai = Y.loc['2007':'2018']

X_test = X.loc['2019':'2021']
Y_test = Y.loc['2019':'2021']

In [7]:
# 4-Fold cross validation (9y training and 3y validation)
K_FOLDs = 4
YEARS = list(X_trai.index.unique(level='year').astype('str')) 
TOT = len(YEARS)
TRA = int(TOT* (K_FOLDs-1) / K_FOLDs)
OFF = TOT - TRA

for FOLD in range(K_FOLDs):
    VALI = YEARS[(FOLD*OFF):((FOLD+1)*OFF)]
    TRAI = [x for x in YEARS if x not in VALI]
    print(VALI, TRAI)    

['2007', '2008', '2009'] ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']
['2010', '2011', '2012'] ['2007', '2008', '2009', '2013', '2014', '2015', '2016', '2017', '2018']
['2013', '2014', '2015'] ['2007', '2008', '2009', '2010', '2011', '2012', '2016', '2017', '2018']
['2016', '2017', '2018'] ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015']


In [8]:
# Model
model_name = 'NN2_trad'

def create_NN2(l_rate=0.01, l1_pen=0.0001):
    model = Sequential()
    model.add(Dense(32,
                    activation='relu',
                    input_dim=X_trai.shape[1],
                    kernel_regularizer=l1(l1_pen)))
    model.add(Dense(16,
                    activation='relu',
                    kernel_regularizer=l1(l1_pen)))
    model.add(Dense(1))
    model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=l_rate))
    return model

In [9]:
# Hyperparameter optimization
Y_val_preds = Y_trai.copy()
Y_test_preds = Y_test.copy()
results = []

earlyStopping  = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min')

model_ix = 0
for l1_pen in [0.001, 0.0005, 0.0001, 0.00005, 0.00001]:
        for l_rate in [0.01, 0.005, 0.001]:
            for ensemble in list(range(0,10)):
                print("•", model_ix, ':',  end=' ')
                    
                for FOLD in range(K_FOLDs):
                    VALI = YEARS[(FOLD*OFF):((FOLD+1)*OFF)]
                    TRAI = [x for x in YEARS if x not in VALI]
                        
                    mcp_save = ModelCheckpoint(
                        '%s/%s_%d_%d.h5'%(model_name, model_name, model_ix, FOLD),
                        save_best_only=True, monitor='val_loss', mode='min', save_weights_only=True
                    )

                    reset_seeds(ensemble*10000)
                        
                    NN_model = create_NN2(l_rate, l1_pen)
                        
                    history = NN_model.fit(
                        X_trai.loc[TRAI], Y_trai.loc[TRAI],
                        validation_data=(X_trai.loc[VALI], Y_trai.loc[VALI]),
                        epochs=200, verbose=0, shuffle=True, batch_size=2000,
                        callbacks=[earlyStopping, mcp_save]
                    )
                    tra_loss = np.min(history.history['loss'])
                    val_loss = np.min(history.history['val_loss'])

                    results.append({
                        'model_ix' :model_ix,
                        'l_rate'   :l_rate,
                        'l1_pen'   :l1_pen,
                        'fold'     :FOLD,
                        'tra_loss' :tra_loss,
                        'val_loss' :val_loss
                    })
                    
                    # Calculate validation predictions
                    best_model = create_NN2(l_rate, l1_pen)
                    best_model.load_weights(r'%s/%s_%d_%d.h5'%(model_name, model_name, model_ix, FOLD));
                    Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])
                    
                    # Calculate predictions for test data, if FOLD = 0
                    if FOLD==0:
                        Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)

                model_ix += 1

• 0 : Epoch 8: early stopping
Epoch 8: early stopping
Epoch 8: early stopping
Epoch 10: early stopping
• 1 : Epoch 7: early stopping
Epoch 14: early stopping
Epoch 12: early stopping
Epoch 7: early stopping
• 2 : Epoch 8: early stopping
Epoch 7: early stopping
Epoch 7: early stopping
Epoch 7: early stopping
• 3 : Epoch 8: early stopping
Epoch 8: early stopping
Epoch 7: early stopping
Epoch 7: early stopping
• 4 : Epoch 8: early stopping
Epoch 11: early stopping
Epoch 9: early stopping
Epoch 8: early stopping
• 5 : Epoch 8: early stopping
Epoch 8: early stopping
Epoch 9: early stopping
Epoch 8: early stopping
• 6 : Epoch 8: early stopping
Epoch 9: early stopping
Epoch 7: early stopping
Epoch 7: early stopping
• 7 : Epoch 8: early stopping
Epoch 8: early stopping
Epoch 8: early stopping
Epoch 8: early stopping
• 8 : Epoch 12: early stopping
Epoch 9: early stopping
Epoch 14: early stopping
Epoch 9: early stopping
• 9 : Epoch 8: early stopping
Epoch 9: early stopping
Epoch 9: early stoppin

Epoch 15: early stopping
Epoch 14: early stopping
Epoch 13: early stopping
• 22 : Epoch 17: early stopping
Epoch 16: early stopping
Epoch 16: early stopping
Epoch 17: early stopping
• 23 : Epoch 20: early stopping
Epoch 21: early stopping
Epoch 18: early stopping
Epoch 23: early stopping
• 24 : Epoch 22: early stopping
Epoch 24: early stopping
Epoch 22: early stopping
Epoch 29: early stopping
• 25 : Epoch 20: early stopping
Epoch 23: early stopping
Epoch 29: early stopping
Epoch 20: early stopping
• 26 : Epoch 26: early stopping
Epoch 26: early stopping
Epoch 33: early stopping
Epoch 25: early stopping
• 27 : Epoch 22: early stopping
Epoch 24: early stopping
Epoch 21: early stopping
Epoch 25: early stopping
• 28 : Epoch 13: early stopping
Epoch 13: early stopping
Epoch 16: early stopping
Epoch 17: early stopping
• 29 : Epoch 28: early stopping
Epoch 28: early stopping
Epoch 24: early stopping
Epoch 28: early stopping
• 30 : Epoch 10: early stopping
Epoch 9: early stopping
Epoch 9: earl

Epoch 9: early stopping
Epoch 9: early stopping
Epoch 11: early stopping
• 43 : Epoch 10: early stopping
Epoch 15: early stopping
Epoch 10: early stopping
Epoch 11: early stopping
• 44 : Epoch 15: early stopping
Epoch 11: early stopping
Epoch 12: early stopping
Epoch 12: early stopping
• 45 : Epoch 12: early stopping
Epoch 10: early stopping
Epoch 11: early stopping
Epoch 11: early stopping
• 46 : Epoch 15: early stopping
Epoch 14: early stopping
Epoch 11: early stopping
Epoch 11: early stopping
• 47 : Epoch 13: early stopping
Epoch 11: early stopping
Epoch 11: early stopping
Epoch 12: early stopping
• 48 : Epoch 10: early stopping
Epoch 10: early stopping
Epoch 8: early stopping
Epoch 9: early stopping
• 49 : Epoch 13: early stopping
Epoch 14: early stopping
Epoch 13: early stopping
Epoch 12: early stopping
• 50 : Epoch 38: early stopping
Epoch 30: early stopping
Epoch 30: early stopping
Epoch 30: early stopping
• 51 : Epoch 24: early stopping
Epoch 25: early stopping
Epoch 19: early 

• 63 : Epoch 14: early stopping
Epoch 15: early stopping
Epoch 13: early stopping
Epoch 13: early stopping
• 64 : Epoch 15: early stopping
Epoch 15: early stopping
Epoch 14: early stopping
Epoch 16: early stopping
• 65 : Epoch 22: early stopping
Epoch 18: early stopping
Epoch 16: early stopping
Epoch 15: early stopping
• 66 : Epoch 20: early stopping
Epoch 14: early stopping
Epoch 15: early stopping
Epoch 15: early stopping
• 67 : Epoch 13: early stopping
Epoch 15: early stopping
Epoch 14: early stopping
Epoch 16: early stopping
• 68 : Epoch 14: early stopping
Epoch 12: early stopping
Epoch 9: early stopping
Epoch 9: early stopping
• 69 : Epoch 13: early stopping
Epoch 17: early stopping
Epoch 16: early stopping
Epoch 16: early stopping
• 70 : Epoch 20: early stopping
Epoch 21: early stopping
Epoch 24: early stopping
Epoch 21: early stopping
• 71 : Epoch 15: early stopping
Epoch 19: early stopping
Epoch 13: early stopping
Epoch 14: early stopping
• 72 : Epoch 13: early stopping
Epoch 2

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 17: early stopping
Epoch 22: early stopping
Epoch 28: early stopping
• 100 : Epoch 29: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 21: early stopping
Epoch 28: early stopping
Epoch 30: early stopping
• 101 : Epoch 23: early stopping
   1/1929 [..............................] - ETA: 1:30

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 19: early stopping
Epoch 18: early stopping
Epoch 20: early stopping
• 102 : Epoch 27: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 23: early stopping
Epoch 22: early stopping
Epoch 24: early stopping
• 103 : Epoch 27: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 34: early stopping
Epoch 23: early stopping
Epoch 23: early stopping
• 104 : Epoch 26: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 23: early stopping
Epoch 27: early stopping
Epoch 24: early stopping
• 105 : Epoch 22: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 21: early stopping
Epoch 27: early stopping
Epoch 21: early stopping
• 106 : Epoch 30: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 26: early stopping
Epoch 21: early stopping
Epoch 26: early stopping
• 107 : Epoch 31: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 26: early stopping
Epoch 28: early stopping
Epoch 23: early stopping
• 108 : Epoch 15: early stopping
   1/1929 [..............................] - ETA: 1:07

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 13: early stopping
Epoch 16: early stopping
Epoch 23: early stopping
• 109 : Epoch 29: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 27: early stopping
Epoch 31: early stopping
Epoch 28: early stopping
• 110 : Epoch 60: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 45: early stopping
Epoch 63: early stopping
Epoch 60: early stopping
• 111 : Epoch 42: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 33: early stopping
Epoch 41: early stopping
Epoch 39: early stopping
• 112 : Epoch 47: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 44: early stopping
Epoch 50: early stopping
Epoch 39: early stopping
• 113 : Epoch 57: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 45: early stopping
Epoch 52: early stopping
Epoch 52: early stopping
• 114 : Epoch 57: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 45: early stopping
Epoch 60: early stopping
Epoch 52: early stopping
• 115 : Epoch 45: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 52: early stopping
Epoch 58: early stopping
Epoch 45: early stopping
• 116 : Epoch 59: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 37: early stopping
Epoch 53: early stopping
Epoch 45: early stopping
• 117 : Epoch 66: early stopping
  24/1929 [..............................] - ETA: 4s  

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 55: early stopping
Epoch 56: early stopping
Epoch 54: early stopping
• 118 : Epoch 37: early stopping


  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 32: early stopping
Epoch 40: early stopping
Epoch 40: early stopping
• 119 : Epoch 49: early stopping


  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 62: early stopping
Epoch 71: early stopping
Epoch 57: early stopping
• 120 : Epoch 21: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 17: early stopping
Epoch 36: early stopping
Epoch 23: early stopping
• 121 : Epoch 22: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 20: early stopping
Epoch 22: early stopping
Epoch 24: early stopping
• 122 : Epoch 26: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 23: early stopping
Epoch 22: early stopping
Epoch 28: early stopping
• 123 : Epoch 26: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 20: early stopping
Epoch 28: early stopping
Epoch 37: early stopping
• 124 : Epoch 17: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 21: early stopping
Epoch 23: early stopping
Epoch 19: early stopping
• 125 : Epoch 22: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 19: early stopping
Epoch 28: early stopping
Epoch 31: early stopping
• 126 : Epoch 30: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 20: early stopping
Epoch 26: early stopping
Epoch 26: early stopping
• 127 : Epoch 19: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 24: early stopping
Epoch 31: early stopping
Epoch 24: early stopping
• 128 : Epoch 17: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 12: early stopping
Epoch 17: early stopping
Epoch 21: early stopping
• 129 : Epoch 19: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 17: early stopping
Epoch 29: early stopping
Epoch 36: early stopping
• 130 : Epoch 36: early stopping


  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 23: early stopping
Epoch 29: early stopping
Epoch 29: early stopping
• 131 : Epoch 23: early stopping


  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 22: early stopping
Epoch 17: early stopping
Epoch 22: early stopping
• 132 : Epoch 27: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 23: early stopping
Epoch 29: early stopping
Epoch 20: early stopping
• 133 : Epoch 27: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 24: early stopping
Epoch 21: early stopping
Epoch 24: early stopping
• 134 : Epoch 26: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 29: early stopping
Epoch 30: early stopping
Epoch 19: early stopping
• 135 : Epoch 31: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 37: early stopping
Epoch 37: early stopping
Epoch 31: early stopping
• 136 : Epoch 30: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 20: early stopping
Epoch 32: early stopping
Epoch 28: early stopping
• 137 : Epoch 29: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 27: early stopping
Epoch 36: early stopping
Epoch 25: early stopping
• 138 : Epoch 23: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 21: early stopping
Epoch 26: early stopping
Epoch 21: early stopping
• 139 : Epoch 37: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 19: early stopping
Epoch 38: early stopping
Epoch 26: early stopping
• 140 : Epoch 60: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 45: early stopping
Epoch 48: early stopping
Epoch 43: early stopping
• 141 : Epoch 44: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 30: early stopping
Epoch 45: early stopping
Epoch 36: early stopping
• 142 : Epoch 47: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 34: early stopping
Epoch 45: early stopping
Epoch 39: early stopping
• 143 : Epoch 60: early stopping


  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 35: early stopping
Epoch 42: early stopping
Epoch 41: early stopping
• 144 : Epoch 41: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 45: early stopping
Epoch 41: early stopping
Epoch 44: early stopping
• 145 : Epoch 45: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 37: early stopping
Epoch 45: early stopping
Epoch 47: early stopping
• 146 : Epoch 59: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 46: early stopping
Epoch 41: early stopping
Epoch 48: early stopping
• 147 : Epoch 66: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 42: early stopping
Epoch 45: early stopping
Epoch 51: early stopping
• 148 : Epoch 48: early stopping
   1/1929 [..............................] - ETA: 1:00

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 32: early stopping
Epoch 33: early stopping
Epoch 28: early stopping
• 149 : Epoch 65: early stopping
   1/1929 [..............................] - ETA: 30s

  Y_val_preds.loc[VALI, '%s_%d'%(model_name, model_ix)] = best_model.predict(X_trai.loc[VALI])




  Y_test_preds.loc[['2019','2020', '2021'], '%s_%d'%(model_name, model_ix)] = best_model.predict(X_test)


Epoch 55: early stopping
Epoch 45: early stopping
Epoch 48: early stopping


In [10]:
# Save Y_val_preds
Y_val_preds.to_csv(r'%s/%s_val_preds.csv'%(model_name, model_name))
Y_val_preds

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ret_ex,NN2_trad_0,NN2_trad_1,NN2_trad_2,NN2_trad_3,NN2_trad_4,NN2_trad_5,NN2_trad_6,NN2_trad_7,NN2_trad_8,...,NN2_trad_140,NN2_trad_141,NN2_trad_142,NN2_trad_143,NN2_trad_144,NN2_trad_145,NN2_trad_146,NN2_trad_147,NN2_trad_148,NN2_trad_149
year,YM,permno,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2007,2007-01,10025,-0.031894,0.011511,0.010265,0.011895,0.007963,0.010351,0.010074,0.009212,0.008278,0.004905,...,0.014709,0.013735,0.015137,0.017624,0.025608,0.031536,0.014618,0.010143,0.016104,0.013342
2007,2007-01,10026,-0.042317,0.011511,0.010265,0.011895,0.007963,0.010351,0.010073,0.009212,0.008278,0.004897,...,0.013402,0.011478,0.011170,0.009705,0.010831,0.013463,0.007600,0.008071,0.007851,0.010496
2007,2007-01,10042,-0.125751,0.011511,0.010265,0.011895,0.007963,0.010351,0.010069,0.009212,0.008278,0.004893,...,-0.023655,-0.046739,-0.020375,-0.009305,-0.013054,-0.010110,-0.020202,-0.022547,-0.033088,-0.029064
2007,2007-01,10078,-0.080607,0.011511,0.010265,0.011895,0.007963,0.010351,0.010073,0.009212,0.008277,0.004895,...,0.015101,0.006162,0.007734,0.012032,0.003991,0.016626,0.007027,0.006293,0.010678,0.011064
2007,2007-01,10104,-0.046341,0.011511,0.010265,0.011895,0.007963,0.010351,0.010074,0.009212,0.008277,0.004893,...,0.014709,0.009012,0.007695,0.013807,0.017185,0.009850,0.008612,0.009344,0.013053,0.017028
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018,2018-12,93420,0.086508,0.007405,0.004575,0.010017,0.004956,0.006808,0.007098,0.006236,0.005537,0.004084,...,-0.047416,0.010884,-0.015578,-0.041345,0.007947,-0.024384,-0.030272,-0.001859,-0.039689,-0.013780
2018,2018-12,93422,0.466817,0.007405,0.004575,0.010017,0.004956,0.006808,0.007098,0.006236,0.005537,0.004084,...,-0.060749,0.006642,-0.041883,-0.012960,-0.005954,-0.026264,-0.011008,-0.034243,-0.024282,-0.014388
2018,2018-12,93423,0.105036,0.007405,0.004575,0.010017,0.004956,0.006808,0.007098,0.006236,0.005537,0.004084,...,0.008266,-0.000921,0.005983,0.010616,-0.002082,0.005450,0.011181,0.003295,0.007356,0.001800
2018,2018-12,93429,-0.048712,0.007405,0.004575,0.010017,0.004956,0.006808,0.007098,0.006236,0.005537,0.004084,...,0.011372,0.011003,0.004800,0.013533,0.005680,0.012094,0.019665,-0.002922,0.012965,0.009839


In [11]:
# Save Y_test_preds
Y_test_preds.to_csv(r'%s/%s_test_preds.csv'%(model_name, model_name))
Y_test_preds

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ret_ex,NN2_trad_0,NN2_trad_1,NN2_trad_2,NN2_trad_3,NN2_trad_4,NN2_trad_5,NN2_trad_6,NN2_trad_7,NN2_trad_8,...,NN2_trad_140,NN2_trad_141,NN2_trad_142,NN2_trad_143,NN2_trad_144,NN2_trad_145,NN2_trad_146,NN2_trad_147,NN2_trad_148,NN2_trad_149
year,YM,permno,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2019,2019-01,10026,0.004225,0.011511,0.010265,0.011895,0.007963,0.010351,0.010074,0.009212,0.008279,0.004895,...,0.011137,0.009537,0.008004,0.006528,0.011920,0.006303,0.005997,0.002399,0.006609,0.010488
2019,2019-01,10104,0.036026,0.011511,0.010265,0.011895,0.007963,0.010351,0.010071,0.009212,0.008278,0.004906,...,0.001408,0.010083,0.007149,0.014437,0.005109,0.008852,0.007119,0.001449,0.007096,0.010157
2019,2019-01,10107,0.075381,0.011511,0.010265,0.011895,0.007963,0.010351,0.010074,0.009212,0.008278,0.004896,...,0.014709,0.008990,0.008857,0.010166,0.014434,0.006693,0.008441,0.009491,0.009728,0.011276
2019,2019-01,10138,0.072777,0.011511,0.010265,0.011895,0.007963,0.010351,0.010073,0.009212,0.008278,0.004895,...,0.013777,0.009500,0.005389,0.009839,0.011254,0.007971,0.004971,0.008319,0.009426,0.010524
2019,2019-01,10145,0.076596,0.011511,0.010265,0.011895,0.007963,0.010351,0.010073,0.009212,0.008278,0.004900,...,0.009680,0.008903,0.007258,0.008701,0.013430,0.011732,0.007045,0.001612,0.008828,0.009287
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021,2021-12,93304,-0.096386,0.011511,0.010265,0.011895,0.007963,0.010351,0.010072,0.009212,0.008277,0.004895,...,0.012452,0.010854,0.009769,0.014506,0.013393,0.007730,0.014032,0.009849,0.013256,-0.002604
2021,2021-12,93373,-0.019481,0.011511,0.010265,0.011895,0.007963,0.010351,0.010075,0.009212,0.008277,0.004893,...,-0.009344,-0.007708,-0.005936,0.005287,0.003187,0.002504,-0.036110,-0.043229,-0.072548,-0.023095
2021,2021-12,93374,-0.047552,0.011511,0.010265,0.011895,0.007963,0.010351,0.010072,0.009212,0.008277,0.004903,...,0.009387,0.008697,0.006769,0.010154,0.014935,0.006744,0.008072,0.001911,0.009124,0.011435
2021,2021-12,93423,-0.072569,0.011511,0.010265,0.011895,0.007963,0.010351,0.010071,0.009212,0.008277,0.004892,...,0.011000,-0.000045,0.009666,0.023509,0.015797,0.022272,0.009060,-0.009113,0.013146,0.017847


In [12]:
# Result overview
table = pd.DataFrame(results)
table = table.groupby(['model_ix', 'l_rate', 'l1_pen']).mean().sort_values('val_loss')
table.to_csv(r'%s/%s_results.csv'%(model_name, model_name))

np.sqrt(table[['tra_loss', 'val_loss']].head(20)) * 100

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tra_loss,val_loss
model_ix,l_rate,l1_pen,Unnamed: 3_level_1,Unnamed: 4_level_1
81,0.001,0.0001,12.989207,12.954304
85,0.001,0.0001,12.989086,12.956401
89,0.001,0.0001,12.988984,12.95762
112,0.001,5e-05,12.974317,12.958335
84,0.001,0.0001,12.99098,12.961921
87,0.001,0.0001,12.990828,12.962749
113,0.001,5e-05,12.97172,12.965642
121,0.01,1e-05,12.981387,12.965732
117,0.001,5e-05,12.977291,12.967521
88,0.001,0.0001,12.991679,12.967552


## Model Selection: Maximize SR on LS-Portfolio on validation data

In [13]:
# Model
model_name = 'NN2_trad'

# Get predictions
val = pd.read_csv(r'%s/%s_val_preds.csv'%(model_name, model_name), index_col=['year', 'YM', 'permno'])
test = pd.read_csv(r'%s/%s_test_preds.csv'%(model_name, model_name), index_col=['year', 'YM', 'permno'])

# Get overview table
table = pd.read_csv(r'%s/%s_results.csv'%(model_name, model_name)).sort_values(['model_ix'])

# Extract hyperparameters
list_l_rate = list(pd.unique(table['l_rate']))
list_l1_pen = list(pd.unique(table['l1_pen']))

In [14]:
# Loop over hyperparameter combinations to get ensemble predictions for each hyperparameter combination
val_results = []
hyper_comb = 0
for l_rate in list_l_rate:
    for l1_pen in list_l1_pen:
        # Get list of relevant model_ix
        list_model_ix = table[(table['l_rate'] == l_rate) & (table['l1_pen'] == l1_pen)]
        list_model_ix = list(list_model_ix['model_ix'])
        # Get list of column names
        list_col_names = []
        for ele in list_model_ix:
            list_col_names.append('%s_%d'%(model_name, ele))
        # Select relevant returns and average predictions
        val_ret = val[list_col_names]
        val_ret = val_ret.mean(axis=1)
        # Append ensemble return prediction to true returns
        comb = val[['ret_ex']].copy()
        comb['ret_pred'] = val_ret
        comb = comb.reset_index()
        # Sort the data by predicted returns and divide the data into quintiles
        comb['quintile'] = comb.groupby(['YM'])['ret_pred'].transform(lambda x: pd.qcut(x.rank(method='first'), 5, labels=np.arange(1,6)))
        # Calculate the mean return for each YM
        comb_mean = comb.groupby(['YM', 'quintile']).agg(ret_ex = ('ret_ex', 'mean'))
        # Add LS-Strategy
        comb_mean = comb_mean[['ret_ex']].unstack().add_prefix('Q')
        comb_mean.columns = comb_mean.columns.droplevel(0)
        comb_mean.columns.name = None
        comb_mean['LS'] = comb_mean['Q5'] - comb_mean['Q1']
        # Calculate the average return, standard deviation and Sharpe Ratio (annualized) per Quintile
        summary = pd.DataFrame()
        summary['mean'] = comb_mean.mean()
        summary['std'] = comb_mean.std()
        summary['SR'] = summary['mean'] / summary['std'] * np.sqrt(12)
        # Append results
        val_results.append({
            'hyper_comb':hyper_comb,
            'l_rate'    :l_rate,
            'l1_pen'    :l1_pen,
            'SR_Q1'     :summary.loc['Q1','SR'],
            'SR_Q5'     :summary.loc['Q5','SR'],
            'SR_LS'     :summary.loc['LS','SR'],
            'Mean_Q1'   :summary.loc['Q1','mean'],
            'Mean_Q5'   :summary.loc['Q5','mean'],
            'Mean_LS'   :summary.loc['LS','mean'],
        })
        hyper_comb += 1
        
# Save results
val_results = pd.DataFrame(val_results)
val_results.to_csv(r'results/{}_val.csv'.format(model_name))

In [15]:
# Select best hyperparemeters (max. SR_LS)
val_results = val = pd.read_csv(r'results/{}_val.csv'.format(model_name))
val_results = val_results.set_index(['l_rate', 'l1_pen']).sort_values(['SR_LS'], ascending=False)
l_rate_opt, l1_pen_opt = tuple([x for x in val_results.iloc[0].name[0:]])
print('Optimal l_rate:', l_rate_opt)
print('Optimal l1_pen:', l1_pen_opt)
val_results

Optimal l_rate: 0.001
Optimal l1_pen: 1e-05


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,hyper_comb,SR_Q1,SR_Q5,SR_LS,Mean_Q1,Mean_Q5,Mean_LS
l_rate,l1_pen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.001,1e-05,14,14,0.08819,0.442484,0.82003,0.001927,0.008234,0.006307
0.005,1e-05,9,9,0.059802,0.421488,0.6167,0.001384,0.007287,0.005904
0.001,5e-05,13,13,0.033631,0.55706,0.51519,0.000816,0.008111,0.007295
0.005,5e-05,8,8,0.055401,0.621244,0.507178,0.001359,0.008665,0.007306
0.01,5e-05,3,3,0.057433,0.592869,0.446298,0.001404,0.008062,0.006658
0.01,1e-05,4,4,0.058926,0.363742,0.419169,0.001406,0.005966,0.00456
0.001,0.0005,11,11,0.26174,0.46156,0.411561,0.004789,0.007579,0.00279
0.01,0.0005,1,1,0.245685,0.392133,0.399862,0.004452,0.006938,0.002486
0.005,0.0001,7,7,0.105644,0.616552,0.353338,0.002522,0.007638,0.005117
0.001,0.0001,12,12,0.127254,0.571299,0.334417,0.003081,0.00783,0.004749


## Out-of-Sample: Get Predictions of model with best hyperparameter combination

In [16]:
# Get list of model_ix's with best hyperparameter combinations and make ensemble predictions
list_model_ix = table[(table['l_rate'] == l_rate_opt) & (table['l1_pen'] == l1_pen_opt)]
list_model_ix = list(list_model_ix['model_ix'])

# Get list of column names
list_col_names = []
for ele in list_model_ix:
    list_col_names.append('%s_%d'%(model_name, ele))
    
# Select relevant returns and average predictions
test_ret = test[list_col_names].copy()
test_ret['y_pred'] = test_ret.mean(axis=1)

# Save predictions
test_ret = test_ret.reset_index()
test_ret = test_ret[['YM', 'permno', 'y_pred']]
test_ret.to_csv(r'results/{}_predictions.csv'.format(model_name), index=False)