In [1]:
import bqplot.pyplot as plt
import pandas as pd
from bqplot.traits import convert_to_date
from bqplot import ColorScale
import datetime
import pickle
import numpy as np

In [2]:
extended_dates = pickle.load(open('../predictions/extended_dates.pkl', 'rb'))

In [3]:
df_path = "../merged_data/CAPE_BAAFFM_BCI_CCI_CLI_IJC_PMIC_T10Y2Y_T10Y3M_vs_USRESCD_GDP.csv"
features = ['CAPE', "BAAFFM", "BCI", "CCI", "CLI", "IJC", "PMIC", "T10Y2Y", "T10Y3M"]
model_names_classification = ["Logistic Regression", "Penalized SVM", "Random Forest", "LSTM"]
model_names_regression = ["Linear Regression", "LSTM"]
target_variables_classification = ["USRECD"]
target_variables_regression = ["GDP_rate", "GDP_abs"]
df = pd.read_csv(df_path)
dates = convert_to_date(df.Date.tolist(), fmt='%Y-%m-%d')

# Overview

We explored two different tasks, classification and regression.

For the classification part, the task is to predict whether the US would be in a recession 180 days later, using the current features. Our models achieved considerable performances in this task.

For the regression part, the task is to predict the exact GDP growth rate and the exact GDP value. Our models' performances were not that good, so we won't focus on them.

# Comparisons of model performance

## Validation

We used TimeSeriesSplit with 3 splits to validate the performances of our models. The following graph displays comparisons between different models.

In [4]:
plt.figure()
plt.title('Average AUC ROC, USRECD')
x = ['Logistic Regression', 'Penalized SVM', 'Random Forest', 'LSTM']
y = [0.820, 0.702, 0.830, 0.828]
plt.bar(x, y)
plt.show()

VBox(children=(Figure(axes=[Axis(scale=OrdinalScale()), Axis(orientation='vertical', scale=LinearScale())], fi…

In [5]:
plt.figure()
plt.title('Average accuracy, USRECD')
x = ['Logistic Regression', 'Penalized SVM', 'Random Forest', 'LSTM']
y = [0.897, 0.560, 0.905, 0.905]
plt.bar(x, y)
plt.show()

VBox(children=(Figure(axes=[Axis(scale=OrdinalScale()), Axis(orientation='vertical', scale=LinearScale())], fi…

In [6]:
plt.figure()
plt.title('R2 Score, GDP growth rate')
x = ['Linear Regression', 'LSTM']
y = [-1.554, -0.424]
plt.bar(x, y)
plt.show()

VBox(children=(Figure(axes=[Axis(scale=OrdinalScale()), Axis(orientation='vertical', scale=LinearScale())], fi…

In [7]:
plt.figure()
plt.title('R2 Score, GDP absolute value')
x = ['Linear Regression', 'LSTM']
y = [-5.763, -68.165]
plt.bar(x, y)
plt.show()

VBox(children=(Figure(axes=[Axis(scale=OrdinalScale()), Axis(orientation='vertical', scale=LinearScale())], fi…

## Test

We tested our models' performances on a test set, which was never used for training. The following graph displays comparisons between different models. For the purpose of comparisons, we also included results from naive methods.

In [8]:
plt.figure()
plt.title('AUC ROC, USRECD')
x = ['Logistic Regression', 'Penalized SVM', 'Random Forest', 'LSTM', 'Baseline #1', 'Baseline #2', 'Baseline #3']
y = [0.948, 0.952, 0.905, 0.843, 0.5, 0.490, 0.5]
plt.bar(x, y)
plt.show()

VBox(children=(Figure(axes=[Axis(scale=OrdinalScale()), Axis(orientation='vertical', scale=LinearScale())], fi…

In [9]:
plt.figure()
plt.title('Accuracy, USRECD')
x = ['Logistic Regression', 'Penalized SVM', 'Random Forest', 'LSTM', 'Baseline #1', 'Baseline #2', 'Baseline #3']
y = [0.962, 0.162, 0.962, 0.960, 0.962, 0.869, 0.962]
plt.bar(x, y)
plt.show()

VBox(children=(Figure(axes=[Axis(scale=OrdinalScale()), Axis(orientation='vertical', scale=LinearScale())], fi…

In [10]:
plt.figure()
plt.title('R2 Score, GDP growth rate')
x = ['Linear Regression', 'LSTM']
y = [0.341, -0.035]
plt.bar(x, y)
plt.show()

VBox(children=(Figure(axes=[Axis(scale=OrdinalScale()), Axis(orientation='vertical', scale=LinearScale())], fi…

In [11]:
plt.figure()
plt.title('R2 Score, GDP absolute value')
x = ['Linear Regression', 'LSTM']
y = [-55.2, -512.3]
plt.bar(x, y)
plt.show()

VBox(children=(Figure(axes=[Axis(scale=OrdinalScale()), Axis(orientation='vertical', scale=LinearScale())], fi…

# Predictions

The following graphs show our models' predictions.

In [12]:
def plot(model_name, target_variable):
    predictions_path = f'../predictions/{model_name}_{target_variable}.pkl'
    predictions = pickle.load(open(predictions_path, 'rb'))
    plt.figure()
    plt.title(model_name + ' ' + target_variable)
    plt.plot(x=dates, y=df[target_variable], colors='blue')
    plt.plot(x=convert_to_date(extended_dates, fmt='%Y-%m-%d'), y=predictions, colors='red')
    plt.show()

In [13]:
for i in range(len(model_names_classification)):
    for j in range(len(target_variables_classification)):
        plot(model_names_classification[i], target_variables_classification[j])

VBox(children=(Figure(axes=[Axis(scale=DateScale()), Axis(orientation='vertical', scale=LinearScale())], fig_m…

VBox(children=(Figure(axes=[Axis(scale=DateScale()), Axis(orientation='vertical', scale=LinearScale())], fig_m…

VBox(children=(Figure(axes=[Axis(scale=DateScale()), Axis(orientation='vertical', scale=LinearScale())], fig_m…

VBox(children=(Figure(axes=[Axis(scale=DateScale()), Axis(orientation='vertical', scale=LinearScale())], fig_m…

In [14]:
for i in range(len(model_names_regression)):
    for j in range(len(target_variables_regression)):
        plot(model_names_regression[i], target_variables_regression[j])

VBox(children=(Figure(axes=[Axis(scale=DateScale()), Axis(orientation='vertical', scale=LinearScale())], fig_m…

VBox(children=(Figure(axes=[Axis(scale=DateScale()), Axis(orientation='vertical', scale=LinearScale())], fig_m…

VBox(children=(Figure(axes=[Axis(scale=DateScale()), Axis(orientation='vertical', scale=LinearScale())], fig_m…

VBox(children=(Figure(axes=[Axis(scale=DateScale()), Axis(orientation='vertical', scale=LinearScale())], fig_m…