In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pprint import pprint

In [30]:
feature_df = pd.read_csv('features.csv')

X_train = pd.read_csv('train_lstm.csv')
X_test = pd.read_csv('test_lstm.csv')
y_train = pd.read_csv('train_labels.csv')
y_test = pd.read_csv('test_labels.csv')

print(f'X_train: {X_train.shape}, X_test: {X_test.shape}, y_train: {y_train.shape}, y_test: {y_test.shape}')

X_train: (3416, 41), X_test: (855, 41), y_train: (3416, 2), y_test: (855, 2)


In [31]:
import pandas_market_calendars as mcal

for df in (feature_df, y_train, y_test):
    df['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
    df.set_index('date', inplace=True)

for df in (X_train, X_test):
    df['date'] = pd.to_datetime(df['date'], format = '%d/%m/%Y')
    df.set_index('date', inplace=True)

nyse = mcal.get_calendar('NYSE')
bday_us = pd.offsets.CustomBusinessDay(holidays=nyse.adhoc_holidays, calendar=nyse.regular_holidays, weekmask="1111100")
feature_df = feature_df.asfreq(bday_us)
X_train = X_train.asfreq(bday_us)
X_test = X_test.asfreq(bday_us)
y_train = y_train.asfreq(bday_us)['ERP_Sign']
y_test = y_test.asfreq(bday_us)['ERP_Sign']

print(f'X_train: {X_train.shape}, X_test: {X_test.shape}, y_train: {y_train.shape}, y_test: {y_test.shape}')

X_train: (3416, 40), X_test: (855, 40), y_train: (3416,), y_test: (855,)


In [53]:
data_df = pd.concat([X_train, X_test])

In [20]:
max_lags = 10
fcst_horizon = [5, 10, 15, 20]
model_vars = feature_df.columns

In [73]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.metrics import accuracy_score, average_precision_score, f1_score, precision_score, recall_score, roc_auc_score
from sktime.utils.plotting import plot_series

tf.random.set_seed(100)

lstm_train_df = pd.DataFrame(
    [(f'h{h}', 'lstm_clf') for h in fcst_horizon], columns=['fh','clf']    
)

lstm_test_df = pd.DataFrame(
    [(f'h{h}', 'lstm_clf') for h in fcst_horizon], columns=['fh','clf']    
)

lstm_pred_df = pd.DataFrame().reindex(data_df.index)
lstm_prob_df = pd.DataFrame().reindex(data_df.index)

k = 0
for h in fcst_horizon:
    p = max_lags + h
    X_train_h = []
    y_train_h = []
    for i in range(p, len(X_train)):
        X_train_h.append(X_train[i-p:i-h])
        y_train_h.append(y_train[i])
    X_train_h, y_train_h = np.array(X_train_h), np.array(y_train_h)

    X_train_h = np.reshape(X_train_h, (X_train_h.shape[0], X_train_h.shape[1], 40))

    lstm_clf = Sequential()

    lstm_clf.add(LSTM(units=50, return_sequences=True, input_shape=(X_train_h.shape[1], 40)))
    lstm_clf.add(Dropout(0.2))

    lstm_clf.add(LSTM(units=50, return_sequences=True))
    lstm_clf.add(Dropout(0.2))

    lstm_clf.add(LSTM(units=50, return_sequences=True))
    lstm_clf.add(Dropout(0.2))

    lstm_clf.add(LSTM(units=50))
    lstm_clf.add(Dropout(0.2))

    lstm_clf.add(Dense(1, activation='sigmoid'))

    lstm_clf.compile(
        optimizer='adam', loss='binary_crossentropy', metrics=['acc']
    )

    lstm_clf.fit(X_train_h, y_train_h, epochs=200)

    y_pred_train = lstm_clf.predict(X_train_h)
    y_pred_train[y_pred_train <= 0.5] = 0
    y_pred_train[y_pred_train > 0.5] = 1
    y_prob_train = lstm_clf.predict(X_train_h)

    lstm_train_df.loc[i, 'Accuracy'] = accuracy_score(y_train_h, y_pred_train)
    lstm_train_df.loc[i, 'F1 Score'] = f1_score(y_train_h, y_pred_train)
    lstm_train_df.loc[i, 'PR AUC'] = average_precision_score(y_train_h, y_prob_train)
    lstm_train_df.loc[i, 'Precision'] = precision_score(y_train_h, y_pred_train)
    lstm_train_df.loc[i, 'Sensitivity'] = recall_score(y_train_h, y_pred_train)
    lstm_train_df.loc[i, 'Specificity'] = recall_score(y_train_h, y_pred_train, pos_label=0)
    lstm_train_df.loc[i, 'ROC AUC'] = roc_auc_score(y_train_h, y_prob_train)

    X_test_h = []
    for i in range(len(X_train), len(data_df)):
        X_test_h.append(data_df[i-p:i-h])
    X_test_h = np.array(X_test_h)
    X_test_h = np.reshape(X_test_h, (X_test_h.shape[0], X_test_h.shape[1], 40))

    y_pred_test = lstm_clf.predict(X_test_h)
    y_pred_test[y_pred_test <= 0.5] = 0
    y_pred_test[y_pred_test > 0.5] = 1
    y_prob_test = lstm_clf.predict(X_test_h)
    
    lstm_test_df.loc[i, 'Accuracy'] = accuracy_score(y_test, y_pred_test)
    lstm_test_df.loc[i, 'F1 Score'] = f1_score(y_test, y_pred_test)
    lstm_test_df.loc[i, 'PR AUC'] = average_precision_score(y_test, y_prob_test)
    lstm_test_df.loc[i, 'Precision'] = precision_score(y_test, y_pred_test)
    lstm_test_df.loc[i, 'Sensitivity'] = recall_score(y_test, y_pred_test)
    lstm_test_df.loc[i, 'Specificity'] = recall_score(y_test, y_pred_test, pos_label=0)
    lstm_test_df.loc[i, 'ROC AUC'] = roc_auc_score(y_test, y_prob_test)

    # Convert np arrays to datetime series for time series plotting
    time_start = y_train.index[0] + p*bday_us
    mask_ystart = y_train.index >= time_start
    ytrain_h = y_train.loc[mask_ystart]
    y_pred = pd.concat([pd.Series(y_pred_train.flatten()).set_axis(ytrain_h.index), pd.Series(y_pred_test.flatten()).set_axis(y_test.index)])
    y_prob = pd.concat([pd.Series(y_prob_train.flatten()).set_axis(ytrain_h.index), pd.Series(y_prob_test.flatten()).set_axis(y_test.index)])

    lstm_pred_df[(f'h{h}', 'lstm_clf')] = y_pred
    lstm_prob_df[(f'h{h}', 'lstm_clf')] = y_prob

    # Plot the actual recession binary values with the predicted binary values
    plot_series(ytrain_h, y_test, y_pred, labels=['y_train', 'y_test', 'y_pred'], x_label='Date', y_label='ERP Sign (Binary)')
    plt.title(f'erp_binary_lstm_clf_h{h}')
    plt.tight_layout()
    plt.savefig(f'erp_binary_lstm_clf_h{h}.png')
    plt.close()

    # Plot the actual recession binary values with the predicted probabilities
    plot_series(ytrain_h, y_test, y_prob, labels=['y_train', 'y_test', 'y_prob'], x_label='Date', y_label='Pr(ERP Sign)')
    plt.title(f'erp_prob_lstm_clf_h{h}')
    plt.tight_layout()
    plt.savefig(f'erp_prob_lstm_clf_h{h}.png')
    plt.close()

    k += 1

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [105]:
lstm_train_df.to_csv('erp_lstm_train_results.csv')
lstm_test_df.to_csv('erp_lstm_test_results.csv')
lstm_pred_df.to_csv('erp_lstm_pred_estimates.csv')
lstm_prob_df.to_csv('erp_lstm_prob_estimates.csv')