In [22]:
# import libraries and load data
import sys

import pandas as pd
import numpy as np
import sklearn as skl
import tensorflow as tf
import scikeras as sck
from tqdm import tqdm

from sklearn.model_selection import PredefinedSplit
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.regularizers import l1, l2, l1_l2
from scikeras.wrappers import KerasRegressor
from tensorflow.keras.callbacks import EarlyStopping
from collections import defaultdict

#Import the data
return_data = pd.read_pickle('returns_chars_panel.pkl')
return_data['date'] = pd.to_datetime(return_data['date'])
print(return_data)

macro_data = pd.read_pickle('macro_timeseries.pkl')
macro_data['date'] = pd.to_datetime(macro_data['date'])
print(macro_data)

original_data = pd.merge(return_data, macro_data, how='inner', on='date')
print(original_data)

              date  permno  excess_ret       ret     rfree     mvel1  \
0       1986-02-01   10000   -0.262610 -0.257143  0.005467 -0.375440   
1       1986-03-01   10000    0.360335  0.365385  0.005050 -0.496811   
2       1986-04-01   10000   -0.103717 -0.098592  0.005125 -0.401783   
3       1986-05-01   10000   -0.227831 -0.222656  0.005175 -0.435735   
4       1986-06-01   10000   -0.009883 -0.005025  0.004858 -0.534203   
...            ...     ...         ...       ...       ...       ...   
3739444 2016-08-01   93436   -0.097265 -0.097023  0.000242  0.948225   
3739445 2016-09-01   93436   -0.037915 -0.037640  0.000275  0.940011   
3739446 2016-10-01   93436   -0.031253 -0.030878  0.000375  0.936380   
3739447 2016-11-01   93436   -0.042553 -0.042128  0.000425  0.936096   
3739448 2016-12-01   93436    0.127822  0.128247  0.000425  0.929911   

             beta    betasq     chmom    dolvol  ...    stdacc     stdcf  \
0        0.000000  0.000000  0.000000  0.000000  ...  0.000

**Preparation for Grid Search**

In [23]:
#Split a small portion of data for experiments
exp_size = 0.00001
n_exp = int(original_data.shape[0] * exp_size)
exp_data = original_data.iloc[:n_exp, ] 
X_exp = exp_data.drop(['ret','excess_ret','rfree','permno','date'], axis=1)
y_exp = exp_data['excess_ret']
print(X_exp)
print(y_exp)
X = X_exp.values
y = y_exp.values

       mvel1      beta    betasq     chmom    dolvol   idiovol    indmom  \
0  -0.375440  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
1  -0.700991  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
2  -0.438120  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
3   0.008634  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
4  -0.919092  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
5   0.158299  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
6   0.020147  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
7  -0.705469  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
8  -0.376719  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
9  -0.527023  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
10 -0.793412  0.000000  0.000000  0.000000  0.000000  0.000000  0.305196   
11 -0.003838  0.743782  0.743782  0.382145  0.338629  0.384147 -0.776019   
12  0.513911

In [24]:
#Split the data manually (keep the data sequence in time-series)
train_size = 0.8
test_size = 1 - train_size

n_observations = X.shape[0]
n_train = int(n_observations * train_size)
X_train, X_test = X[:n_train, ], X[n_train:, ]
y_train, y_test = y[:n_train, ], y[n_train:, ]

In [25]:
#Standardise the data
standard_scaler = skl.preprocessing.StandardScaler()
standard_scaler.fit(X_train)
X_train = standard_scaler.transform(X_train)
X_test = standard_scaler.transform(X_test)

```<br><font color='black'>
Define a hyperparameter space for neural networks with 3 hidden layers```
```complex_param_grid_nn_3 = {
                 'model__optimizer': ['adam', 'sgd', 'rmsprop', 'adagrad'],
                 'model__learning_rate': [0.1, 0.01, 0.001],
                 'model__activation_func': ['relu', 'tanh', 'sigmoid', 'elu'],
                 #'batch_size': [32, 64, 128, 256], #Don't know how to grid search this
                 #'epochs': [10, 20, 50], #Don't know how to grid search this
                 'model__neurons_n': [[64, 32, 16], [128, 64, 32], [256, 128, 64]],
                 'model__dropout_rate': [0.0, 0.2, 0.5],
                 'model__regularize_terms': [None,
                                    l1(0.1), l1(0.01), l1(0.001),
                                    l2(0.1), l2(0.01), l2(0.001),
                                    l1_l2(l1=0.1, l2=0.1), l1_l2(l1=0.01, l2=0.01),
                                    l1_l2(l1=0.001, l2=0.001)]
                 #'loss_func': ['mse', 'mae', 'mape', 'msle', tf.keras.losses.Huber()], #Should be the same as the 'scoring' input of GridSearchCV
                 #'metrics_func': [['mae'], ['mse'], ['mape'], ['msle'], [rmse_metric]] #Meaningless hyperparameter: Do not influence model performance
                 #'callbacks': [[early_stopping]] #Dangerous
                 }```

In [26]:
#Index-generating function for rolling-window time-series cross validation
each_window_size = int(0.2 * X_train.shape[0])
n_train_window = int(0.75 * each_window_size)
n_test_window = each_window_size - n_train_window

In [27]:
def rolling_window_index_generator(X_train, train_window_size=n_train_window, test_window_size=n_test_window, step_size=1):
    n_samples = X_train.shape[0]
    indices = np.arange(n_samples)
    for start in range(0, n_samples - train_window_size - test_window_size + 1, step_size):
        train_end = start + train_window_size
        test_end = train_end + test_window_size

        if test_end <= n_samples:
            train_indices = indices[start:train_end]
            test_indices = indices[train_end:test_end]


            yield train_indices, test_indices

In [28]:
#Implement rolling-window time-series cross validation for hyperparameter grid search
val_scores_dict = defaultdict(list)

#Get a function to transfer the list types in params.items() to tuple types
#for further use when we use these tuples as the keys of val_scores_dict
def make_hashable(params):
    hashable_params = {}
    for key, value in params.items():
        if isinstance(value, list):
            hashable_params[key] = tuple(value)
        else:
            hashable_params[key] = value
    return hashable_params

**Neural Network model with 2 hidden layers**

In [29]:
simple_param_grid_nn_2 = {
    'model__optimizer': ['sgd', 'adam'],
    'model__learning_rate': [ 0.01, 0.001],
    'model__activation_func': ['relu', 'sigmoid'],
    'model__neurons_n': [[64, 32], [128, 64], [256, 128]],
    'model__dropout_rate': [0.0, 0.5],
    'model__regularize_terms': [None, l1(0.01), l2(0.01)]
}

In [30]:
#Neural Network 2 Function
def neural_net_2(input_shape, optimizer, learning_rate, activation_func, neurons_n, dropout_rate, regularize_terms, loss_func='mse', metrics_func=['mae']):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(Dense(neurons_n[0], activation=activation_func, kernel_regularizer=regularize_terms))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(neurons_n[1], activation=activation_func, kernel_regularizer=regularize_terms))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    optimizer_instance = tf.keras.optimizers.get(optimizer)
    optimizer_instance.learning_rate = learning_rate
    model.compile(optimizer=optimizer_instance, loss=loss_func, metrics=metrics_func)
    return model

In [31]:
val_scores_dict_2 = defaultdict(list)

for train_indices, test_indices in tqdm(rolling_window_index_generator(X_train=X_train, step_size=n_train_window), file=sys.stdout):
    X_train_cv, y_train_cv = X_train[train_indices], y_train[train_indices]
    X_val_cv, y_val_cv = X_train[test_indices], y_train[test_indices]

    current_fold = np.zeros(X_train_cv.shape[0] + X_val_cv.shape[0])
    current_fold[:X_train_cv.shape[0]] = -1
    current_fold[X_train_cv.shape[0]:] = 0
    ps = skl.model_selection.PredefinedSplit(current_fold)

    X_combined = np.vstack((X_train_cv, X_val_cv))
    y_combined = np.concatenate((y_train_cv, y_val_cv))

    NN_2 = KerasRegressor(model=neural_net_2, input_shape=(X_combined.shape[1],), verbose=2)
    grid_2 = skl.model_selection.GridSearchCV(estimator=NN_2, param_grid=simple_param_grid_nn_2, scoring='neg_mean_squared_error', cv=ps, error_score='raise')
    grid_2.fit(X_combined, y_combined)

    for params, mean_score in zip(grid_2.cv_results_['params'], grid_2.cv_results_['mean_test_score']):
        hashable_params = make_hashable(params)
        val_scores_dict_2[tuple(hashable_params.items())].append(mean_score)

mean_val_scores_2 = {params: np.mean(scores) for params, scores in val_scores_dict_2.items()}
best_params_2 = min(mean_val_scores_2, key=mean_val_scores_2.get)
optimised_params_2 = dict(best_params_2)
print(f'Optimised hyperparameters for 2 layers: {optimised_params_2} & Average validation score: {mean_val_scores_2[best_params_2]}')


0it [00:00, ?it/s]

1/1 - 1s - loss: 0.1068 - mae: 0.2574 - 642ms/epoch - 642ms/step
1/1 - 0s - 68ms/epoch - 68ms/step
1/1 - 0s - loss: 8.9929 - mae: 0.1965 - 472ms/epoch - 472ms/step
1/1 - 0s - 53ms/epoch - 53ms/step
1/1 - 0s - loss: 1.2632 - mae: 0.1960 - 374ms/epoch - 374ms/step
1/1 - 0s - 51ms/epoch - 51ms/step
1/1 - 1s - loss: 0.0429 - mae: 0.1907 - 595ms/epoch - 595ms/step
1/1 - 0s - 55ms/epoch - 55ms/step
1/1 - 1s - loss: 8.9527 - mae: 0.1276 - 585ms/epoch - 585ms/step
1/1 - 0s - 48ms/epoch - 48ms/step
1/1 - 1s - loss: 1.4203 - mae: 0.4237 - 551ms/epoch - 551ms/step
1/1 - 0s - 43ms/epoch - 43ms/step
1/1 - 0s - loss: 0.0748 - mae: 0.1743 - 328ms/epoch - 328ms/step
1/1 - 0s - 49ms/epoch - 49ms/step
1/1 - 0s - loss: 17.9455 - mae: 0.0839 - 398ms/epoch - 398ms/step
1/1 - 0s - 43ms/epoch - 43ms/step
1/1 - 0s - loss: 2.2015 - mae: 0.3793 - 330ms/epoch - 330ms/step
1/1 - 0s - 42ms/epoch - 42ms/step
1/1 - 1s - loss: 0.0224 - mae: 0.1035 - 514ms/epoch - 514ms/step
1/1 - 0s - 44ms/epoch - 44ms/step
1/1 - 1s 

In [32]:
#Train models with optimised hyperparameters
NN_2 = neural_net_2(input_shape=(X_train.shape[1],),
                    optimizer=optimised_params_2['model__optimizer'],
                    learning_rate=optimised_params_2['model__learning_rate'],
                    activation_func=optimised_params_2['model__activation_func'],
                    dropout_rate=optimised_params_2['model__dropout_rate'],
                    neurons_n=optimised_params_2['model__neurons_n'],
                    regularize_terms=optimised_params_2['model__regularize_terms'])

In [33]:
X_NN = original_data.drop(['ret','excess_ret','rfree','permno','date'], axis=1)
y_NN = original_data['excess_ret']
date = original_data['date']

In [36]:
def expanding_window_indices(start_date, end_date, step_size='1Y', val_size='4Y', test_size='1Y'):
    date_range = pd.date_range(start=start_date, end=end_date, freq=step_size)
    indices = []
    for end_val in date_range:
        start = pd.Timestamp(start_date)
        end_train = end_val - pd.DateOffset(years=int(test_size[:-1])) - pd.DateOffset(years=int(val_size[:-1]))
        if end_train < start:
            continue
        train_mask = (date >= start) & (date <= end_train)
        val_mask = (date > end_train) & (date <= (end_train + pd.DateOffset(years=int(val_size[:-1]))))
        test_mask = (date > (end_train + pd.DateOffset(years=int(val_size[:-1])))) & (date <= end_val)
        if test_mask.any():
            indices.append((train_mask, val_mask, test_mask))
    return indices

# Generate the expanding window indices
expanding_indices = expanding_window_indices('1986-02-01', '2016-12-01')

In [37]:
r2_oos_2 = []

for train_mask, val_mask, test_mask in tqdm(expanding_indices):
    X_train, y_train = X_NN.loc[train_mask].values, y_NN.loc[train_mask].values
    X_val, y_val = X_NN.loc[val_mask].values, y_NN.loc[val_mask].values
    X_test, y_test = X_NN.loc[test_mask].values, y_NN.loc[test_mask].values
    
    NN2_history = NN_2.fit(X_train, y_train, epochs=100, batch_size=10000,
                           validation_data=(X_val, y_val), verbose=0,
                           callbacks=[EarlyStopping(patience=2, restore_best_weights=True)])
    
    predictions = NN_2.predict(X_test)
    ss_res = np.sum((y_test - predictions.T) ** 2)
    ss_tot = np.sum((y_test) ** 2)
    r2_out_of_sample = 1 - (ss_res / ss_tot)
    r2_oos_2.append(r2_out_of_sample)

r2_oos_2_mean = np.mean(r2_oos_2)
print(r2_oos_2_mean)

  0%|          | 0/25 [00:00<?, ?it/s]



  4%|▍         | 1/25 [00:21<08:32, 21.35s/it]



  8%|▊         | 2/25 [00:50<09:59, 26.05s/it]



 12%|█▏        | 3/25 [01:37<13:02, 35.59s/it]

**Neural Network model with 3 hidden layers**

In [None]:
simple_param_grid_nn_3 = {
                 'model__optimizer': ['adam', 'sgd'],
                 'model__learning_rate': [0.01, 0.001],
                 'model__activation_func': ['relu', 'sigmoid'],
                 'model__neurons_n': [[64, 32, 16], [128, 64, 32]],
                 'model__dropout_rate': [0.0, 0.5],
                 'model__regularize_terms': [None, l1(0.01), l2(0.01)]
                 }

In [None]:
#Neural Network 3 Function
def neural_net_3(input_shape, optimizer, learning_rate, activation_func, neurons_n, dropout_rate, regularize_terms, loss_func='mse', metrics_func=['mae']):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(Dense(neurons_n[0], activation=activation_func, kernel_regularizer=regularize_terms))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(neurons_n[1], activation=activation_func, kernel_regularizer=regularize_terms))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(neurons_n[2], activation=activation_func, kernel_regularizer=regularize_terms))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    optimizer_instance = tf.keras.optimizers.get(optimizer)
    optimizer_instance.learning_rate = learning_rate
    model.compile(optimizer=optimizer_instance, loss=loss_func, metrics=metrics_func)

    return model

In [None]:
for train_indices, test_indices in tqdm(rolling_window_index_generator(X_train=X_train, step_size=n_train_window), file=sys.stdout):  # n_test_window?
    X_train_cv, y_train_cv = X_train[train_indices], y_train[train_indices]
    
    X_val_cv, y_val_cv = X_train[test_indices], y_train[test_indices]

    current_fold = np.zeros(X_train_cv.shape[0] + X_val_cv.shape[0])
    current_fold[:X_train_cv.shape[0]] = -1 #-1 indicates training set
    current_fold[X_train_cv.shape[0]:] = 0 #0 indicates validation set
    ps = skl.model_selection.PredefinedSplit(current_fold)

    X_combined = np.vstack((X_train_cv, X_val_cv))
    y_combined = np.concatenate((y_train_cv, y_val_cv))

    NN_3 = KerasRegressor(model=neural_net_3, input_shape=(X_combined.shape[1],), verbose=2)
    #print("Available parameters for NN_3: ", NN_3.get_params().keys())
    grid = skl.model_selection.GridSearchCV(estimator=NN_3, param_grid=simple_param_grid_nn_3, scoring='neg_mean_squared_error', cv=ps, error_score='raise')
    grid.fit(X_combined, y_combined)

    #Record the best hyperparameters on average by validation scores across all validation observations (e.g., 1 validation set) within 1 rolling window
    for params, mean_score in zip(grid.cv_results_['params'], grid.cv_results_['mean_test_score']):
        hashable_params = make_hashable(params)
        val_scores_dict[tuple(hashable_params.items())].append(mean_score)


0it [00:00, ?it/s]1/1 - 3s - loss: 0.0833 - mae: 0.2260 - 3s/epoch - 3s/step
1/1 - 0s - 213ms/epoch - 213ms/step
1/1 - 1s - loss: 9.8324 - mae: 0.1371 - 1s/epoch - 1s/step
1/1 - 0s - 129ms/epoch - 129ms/step
1/1 - 1s - loss: 1.4962 - mae: 0.1939 - 1s/epoch - 1s/step
1/1 - 0s - 118ms/epoch - 118ms/step
1/1 - 1s - loss: 0.1356 - mae: 0.3137 - 598ms/epoch - 598ms/step
1/1 - 0s - 119ms/epoch - 119ms/step
1/1 - 1s - loss: 9.9193 - mae: 0.1768 - 625ms/epoch - 625ms/step
1/1 - 0s - 80ms/epoch - 80ms/step
1/1 - 1s - loss: 1.4645 - mae: 0.1375 - 846ms/epoch - 846ms/step
1/1 - 0s - 107ms/epoch - 107ms/step
1/1 - 1s - loss: 0.0178 - mae: 0.1090 - 1s/epoch - 1s/step
1/1 - 0s - 83ms/epoch - 83ms/step
1/1 - 1s - loss: 20.6079 - mae: 0.3571 - 1s/epoch - 1s/step
1/1 - 0s - 101ms/epoch - 101ms/step
1/1 - 1s - loss: 2.5413 - mae: 0.2590 - 1s/epoch - 1s/step
1/1 - 0s - 90ms/epoch - 90ms/step
1/1 - 1s - loss: 0.0316 - mae: 0.1736 - 616ms/epoch - 616ms/step
1/1 - 0s - 84ms/epoch - 84ms/step
1/1 - 1s - loss

In [None]:
#Get the best hyperparamters on average by validation scores across all rolling windows (e.g., all validation sets)
mean_val_scores = {params: np.mean(scores) for params,scores in val_scores_dict.items()}
best_params = min(mean_val_scores, key=mean_val_scores.get)
optimised_params = dict(best_params)
print(f'Optimised hyperparameters: {optimised_params} & Average validation score: {mean_val_scores[best_params]}')

Optimised hyperparameters: {'model__activation_func': 'relu', 'model__dropout_rate': 0.0, 'model__learning_rate': 0.001, 'model__neurons_n': (64, 32, 16), 'model__optimizer': 'sgd', 'model__regularize_terms': <keras.src.regularizers.L2 object at 0x0000013D4E8B1070>} & Average validation score: -1.740365930766274


In [None]:
#Train models with optimised hyperparameters

NN_3 = neural_net_3(input_shape=(X_train.shape[1],),
                    optimizer=optimised_params['model__optimizer'],
                    learning_rate=optimised_params['model__learning_rate'],
                    activation_func=optimised_params['model__activation_func'],
                    dropout_rate=optimised_params['model__dropout_rate'],
                    neurons_n=optimised_params['model__neurons_n'],
                    regularize_terms=optimised_params['model__regularize_terms'])

Epoch 1/10
1/1 - 1s - loss: 1.5440 - mae: 0.2218 - val_loss: 1.7271 - val_mae: 0.3760 - 1s/epoch - 1s/step
Epoch 2/10
1/1 - 0s - loss: 1.5410 - mae: 0.2193 - val_loss: 1.7271 - val_mae: 0.3760 - 37ms/epoch - 37ms/step
Epoch 3/10
1/1 - 0s - loss: 1.5382 - mae: 0.2169 - val_loss: 1.7272 - val_mae: 0.3761 - 38ms/epoch - 38ms/step
Epoch 4/10
1/1 - 0s - loss: 1.5355 - mae: 0.2145 - val_loss: 1.7272 - val_mae: 0.3761 - 36ms/epoch - 36ms/step
Epoch 5/10
1/1 - 0s - loss: 1.5330 - mae: 0.2122 - val_loss: 1.7273 - val_mae: 0.3760 - 41ms/epoch - 41ms/step
Epoch 6/10
1/1 - 0s - loss: 1.5305 - mae: 0.2100 - val_loss: 1.7274 - val_mae: 0.3760 - 46ms/epoch - 46ms/step
Epoch 7/10
1/1 - 0s - loss: 1.5283 - mae: 0.2079 - val_loss: 1.7274 - val_mae: 0.3759 - 62ms/epoch - 62ms/step
Epoch 8/10
1/1 - 0s - loss: 1.5261 - mae: 0.2058 - val_loss: 1.7275 - val_mae: 0.3758 - 61ms/epoch - 61ms/step
Epoch 9/10
1/1 - 0s - loss: 1.5240 - mae: 0.2038 - val_loss: 1.7275 - val_mae: 0.3757 - 65ms/epoch - 65ms/step
Epoch

In [None]:
# make 20 years of training data
date = original_data['date']
training = (date <= '2006-01') # selects 
X_train, y_train = X_NN.loc[training].values, y_NN.loc[training].values 

# make 10 years of validation data
validation = (date > '2002-01') & (date <= '2006-01') 
X_val, y_val = X_NN.loc[validation].values, y_NN.loc[validation].values 

# make test data
test = (date > '2006-01') 
X_test, y_test = X_NN.loc[test].values, y_NN.loc[test].values 

NN3_history = NN_3.fit(X_train, y_train, epochs=100, batch_size=10000, validation_data = (X_val, y_val), verbose=0,  callbacks = [EarlyStopping(patience = 2, restore_best_weights=True)])
predictions = NN_3.predict(X_test)





KeyboardInterrupt: 

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
ss_res = np.sum((y_test - predictions.T) ** 2)
ss_tot = np.sum((y_test) ** 2)
r2_out_of_sample = 1 - (ss_res / ss_tot)

In [None]:
print(r2_out_of_sample)

0.0010841183019766332


In [None]:
r2_oos_3 = []

for train_mask, val_mask, test_mask in tqdm(expanding_indices):
    X_train, y_train = X_NN.loc[train_mask].values, y_NN.loc[train_mask].values
    X_val, y_val = X_NN.loc[val_mask].values, y_NN.loc[val_mask].values
    X_test, y_test = X_NN.loc[test_mask].values, y_NN.loc[test_mask].values
    
    NN3_history = NN_3.fit(X_train, y_train, epochs=100, batch_size=10000,
                           validation_data=(X_val, y_val), verbose=0,
                           callbacks=[EarlyStopping(patience=2, restore_best_weights=True)])
    
    predictions = NN_3.predict(X_test)
    ss_res = np.sum((y_test - predictions.T) ** 2)
    ss_tot = np.sum((y_test) ** 2)
    r2_out_of_sample = 1 - (ss_res / ss_tot)
    r2_oos_3.append(r2_out_of_sample)

r2_oos_3_mean = np.mean(r2_oos_3)
print(r2_oos_3_mean)

**Neural Network model with 4 hidden layers**

In [None]:
simple_param_grid_nn_4 = {
    'model__optimizer': ['sgd', 'adam', 'rmsprop', 'adagrad'],
    'model__learning_rate': [0.1, 0.01, 0.001, 0.0001],
    'model__activation_func': ['relu', 'tanh', 'sigmoid', 'elu'],
    'model__neurons_n': [[256, 128, 64, 32], [128, 64, 32, 16], [512, 256, 128, 64]],
    'model__dropout_rate': [0.0, 0.2, 0.5],
    'model__regularize_terms': [None, l1(0.1), l1(0.01), l1(0.001), l2(0.1), l2(0.01), l2(0.001), l1_l2(l1=0.1, l2=0.1), l1_l2(l1=0.01, l2=0.01), l1_l2(l1=0.001, l2=0.001)]
}

In [None]:
def neural_net_4(input_shape, optimizer, learning_rate, activation_func, neurons_n, dropout_rate, regularize_terms, loss_func='mse', metrics_func=['mae']):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(Dense(neurons_n[0], activation=activation_func, kernel_regularizer=regularize_terms))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(neurons_n[1], activation=activation_func, kernel_regularizer=regularize_terms))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(neurons_n[2], activation=activation_func, kernel_regularizer=regularize_terms))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(neurons_n[3], activation=activation_func, kernel_regularizer=regularize_terms))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    optimizer_instance = tf.keras.optimizers.get(optimizer)
    optimizer_instance.learning_rate = learning_rate
    model.compile(optimizer=optimizer_instance, loss=loss_func, metrics=metrics_func)
    return model

In [None]:
val_scores_dict_4 = defaultdict(list)

for train_indices, test_indices in tqdm(rolling_window_index_generator(X_train=X_train, step_size=n_train_window), file=sys.stdout):
    X_train_cv, y_train_cv = X_train[train_indices], y_train[train_indices]
    X_val_cv, y_val_cv = X_train[test_indices], y_train[test_indices]

    current_fold = np.zeros(X_train_cv.shape[0] + X_val_cv.shape[0])
    current_fold[:X_train_cv.shape[0]] = -1
    current_fold[X_train_cv.shape[0]:] = 0
    ps = skl.model_selection.PredefinedSplit(current_fold)

    X_combined = np.vstack((X_train_cv, X_val_cv))
    y_combined = np.concatenate((y_train_cv, y_val_cv))

    NN_4 = KerasRegressor(model=neural_net_4, input_shape=(X_combined.shape[1],), verbose=2)
    grid_2 = skl.model_selection.GridSearchCV(estimator=NN_4, param_grid=simple_param_grid_nn_4, scoring='neg_mean_squared_error', cv=ps, error_score='raise')
    grid_2.fit(X_combined, y_combined)

    for params, mean_score in zip(grid_2.cv_results_['params'], grid_2.cv_results_['mean_test_score']):
        hashable_params = make_hashable(params)
        val_scores_dict_4[tuple(hashable_params.items())].append(mean_score)

mean_val_scores_4 = {params: np.mean(scores) for params, scores in val_scores_dict_4.items()}
best_params_4 = min(mean_val_scores_4, key=mean_val_scores_4.get)
optimised_params_4 = dict(best_params_4)
print(f'Optimised hyperparameters for 2 layers: {optimised_params_4} & Average validation score: {mean_val_scores_4[best_params_4]}')


In [None]:
NN_4 = neural_net_4(input_shape=(X_train.shape[1],),
                    optimizer=optimised_params_4['model__optimizer'],
                    learning_rate=optimised_params_4['model__learning_rate'],
                    activation_func=optimised_params_4['model__activation_func'],
                    dropout_rate=optimised_params_4['model__dropout_rate'],
                    neurons_n=optimised_params_4['model__neurons_n'],
                    regularize_terms=optimised_params_4['model__regularize_terms'])

In [None]:
r2_oos_4 = []

for train_mask, val_mask, test_mask in tqdm(expanding_indices):
    X_train, y_train = X_NN.loc[train_mask].values, y_NN.loc[train_mask].values
    X_val, y_val = X_NN.loc[val_mask].values, y_NN.loc[val_mask].values
    X_test, y_test = X_NN.loc[test_mask].values, y_NN.loc[test_mask].values
    
    NN4_history = NN_4.fit(X_train, y_train, epochs=100, batch_size=10000,
                           validation_data=(X_val, y_val), verbose=0,
                           callbacks=[EarlyStopping(patience=2, restore_best_weights=True)])
    
    predictions = NN_4.predict(X_test)
    ss_res = np.sum((y_test - predictions.T) ** 2)
    ss_tot = np.sum((y_test) ** 2)
    r2_out_of_sample = 1 - (ss_res / ss_tot)
    r2_oos_4.append(r2_out_of_sample)

r2_oos_4_mean = np.mean(r2_oos_4)
print(r2_oos_4_mean)