In [99]:
### Random Forest Forecasting Model ###

In [100]:
## Import Relevant Libraries
import pandas as pd
import numpy as np
from Forecasters import TSDatasetGenerator, RFForecaster
from Forecasters.XGBForecaster import XGBForecaster
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, classification_report

In [101]:
## Change director to import custom dependencies and libraries
os.chdir('/Users/elisha/Desktop/EC4308 Week 13/EC4308 Project Code')

In [102]:
from Forecasters import TSDatasetGenerator
from Forecasters.RFForecaster import RFForecaster

In [103]:
## Load datasets
# Without DATE col
train_data = pd.read_csv('rec_train.csv').drop(columns = ['Unnamed: 0', 'DATE'])
train_data.head()

Unnamed: 0,Is_Recession,PAYEMS,FEDFUNDS,3MTB_SECONDARYMKT,GS1,GS5,GS10,CPI,DEBT_PUB,SP500,INDPRO,UNRATE,10Y3MTH_SPREAD,FEDFUNDS_ROLMEAN3,INDPRO_ROLMEAN3,UNRATE_ROLMEAN3,PAYEMS_3MTHCHANGE,10Y3MTH_SPREAD_3MTHCHANGE,INDPRO_3MTHCHANGE
0,0,275.0,0.23,0.02,0.03,0.06,0.03,-0.623053,0.0,-4.93,0.3323,0.1,0.01,0.1,0.286133,0.033333,6.6e-05,-5.722222,0.003078
1,0,399.0,0.27,-0.14,0.04,0.08,0.03,0.309598,0.0,-1.39,0.1662,-0.1,0.17,0.17,0.184633,0.0,-4.4e-05,0.329365,-0.009071
2,0,190.0,0.13,0.3,0.2,0.2,0.21,-0.000956,0.35328,-1.14,0.1938,0.0,-0.09,0.21,0.230767,0.0,-0.000923,0.585165,0.003938
3,0,206.0,0.23,0.16,0.37,0.33,0.2,0.306743,0.0,-6.5,0.0277,0.0,0.04,0.21,0.129233,-0.033333,-0.001142,0.164835,-0.008827
4,0,136.0,-0.13,0.41,0.28,0.0,-0.04,-0.615385,0.0,-0.54,0.3323,-0.1,-0.45,0.076667,0.1846,-0.033333,-0.00418,-2.470046,0.004638


In [104]:
test_data = pd.read_csv('rec_train.csv').drop(columns = ['Unnamed: 0', 'DATE'])
test_data.head()

Unnamed: 0,Is_Recession,PAYEMS,FEDFUNDS,3MTB_SECONDARYMKT,GS1,GS5,GS10,CPI,DEBT_PUB,SP500,INDPRO,UNRATE,10Y3MTH_SPREAD,FEDFUNDS_ROLMEAN3,INDPRO_ROLMEAN3,UNRATE_ROLMEAN3,PAYEMS_3MTHCHANGE,10Y3MTH_SPREAD_3MTHCHANGE,INDPRO_3MTHCHANGE
0,0,275.0,0.23,0.02,0.03,0.06,0.03,-0.623053,0.0,-4.93,0.3323,0.1,0.01,0.1,0.286133,0.033333,6.6e-05,-5.722222,0.003078
1,0,399.0,0.27,-0.14,0.04,0.08,0.03,0.309598,0.0,-1.39,0.1662,-0.1,0.17,0.17,0.184633,0.0,-4.4e-05,0.329365,-0.009071
2,0,190.0,0.13,0.3,0.2,0.2,0.21,-0.000956,0.35328,-1.14,0.1938,0.0,-0.09,0.21,0.230767,0.0,-0.000923,0.585165,0.003938
3,0,206.0,0.23,0.16,0.37,0.33,0.2,0.306743,0.0,-6.5,0.0277,0.0,0.04,0.21,0.129233,-0.033333,-0.001142,0.164835,-0.008827
4,0,136.0,-0.13,0.41,0.28,0.0,-0.04,-0.615385,0.0,-0.54,0.3323,-0.1,-0.45,0.076667,0.1846,-0.033333,-0.00418,-2.470046,0.004638


In [105]:
## Create_datasets
dataset_generator = TSDatasetGenerator()

In [106]:
## Auxiliary function to generate Classification Report
def genClassificationRep(y_true, y_pred):
    return pd.DataFrame(classification_report(y_true, y_pred, output_dict=True))

In [119]:
## Grid Search Candidate Parameters
grid_param = {
    'n_estimators': [100, 300],
    'criterion': ['gini'],
    'bootstrap': [True],
    'max_depth': [2, 4],
    'class_weight': ['balanced_subsample', 'balanced'],
    'random_state': [42]
}

In [114]:
## Auxiliary function to train models and log results for various combinations of h, k and l
def runBehemoth(grid_params, data, target_feature, n_splits, model='RF', h=1, k_range=[], l_range=[]):
    results = []
    label_index = []

    # Instantiate dataset generator
    dataset_generator = TSDatasetGenerator()

    # Loop over all combinations of k and l
    for k in k_range:
        for l in l_range:
            # Instantiate model
            if model == 'XGB':
                selected_model = XGBForecaster()
            elif model == 'RF':
                selected_model = RFForecaster()
            else:
                print('Please select a valid model: RF or XGB')
                return None

            # Label for output (h, k, l)
            res_label = "(h=%d, k=%d, l=%d)" % (h, k, l)
            print("Running grid search for the dataset with params: " + res_label)
            # Create datasets
            train = dataset_generator.fit_transform(data, target_feature, h, k, l)
            train_X = train.drop(columns=['Target Feature'])
            train_y = train['Target Feature']

            # Fit model
            fitted_model = selected_model.fit(train_X, train_y)
            fitted_model.grid_search_CV(grid_params, n_splits=n_splits)

            # Log results
            result = {}
            result['Best score (log-loss)'] = fitted_model.getBestScore()
            result['Best params'] = fitted_model.getBestParams()
            result['Best model'] = fitted_model.getBestModel()
            result['Feature Importance'] = fitted_model.getFeatureImportance()
            results.append(result)
            label_index.append(res_label)

    labels_index = pd.DataFrame(label_index, columns=['Dataset'])
    results_final = pd.DataFrame(results)
    collected_results = pd.concat([labels_index, results_final], axis=1)
    return collected_results        

In [120]:
## 1 Step Ahead Forecast
one_step_results = runBehemoth(grid_param,
                               train_data,
                               'Is_Recession',
                               300,
                               k_range=[1, 2, 3],
                               l_range=[1, 2, 3])

  0%|          | 1/300 [00:00<00:48,  6.14it/s]

Running grid search for the dataset with params: (h=1, k=1, l=1)
Dropping any existing invalid observations
Recursive cross val data generated, number of split: 300


100%|██████████| 300/300 [00:42<00:00,  7.03it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 600/600 [04:08<00:00,  2.42it/s]
  0%|          | 1/900 [00:00<02:07,  7.04it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 900/900 [02:14<00:00,  6.67it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1200/1200 [08:45<00:00,  2.28it/s]
  0%|          | 1/1500 [00:00<02:54,  8.58it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1500/1500 [02:58<00:00,  8.41it/s]
  0%|          | 0/1800 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1800/1800 [10:24<00:00,  2.88it/s]
  0%|          | 1/2100 [00:00<04:02,  8.65it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2100/2100 [04:26<00:00,  7.89it/s]
  0%|          | 0/2400 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2400/2400 [14:49<00:00,  2.70it/s]
  0%|          | 1/300 [00:00<00:40,  7.38it/s]

Running grid search for the dataset with params: (h=1, k=1, l=2)
Dropping any existing invalid observations
Recursive cross val data generated, number of split: 300


100%|██████████| 300/300 [00:42<00:00,  6.98it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 600/600 [04:11<00:00,  2.39it/s]
  0%|          | 1/900 [00:00<02:05,  7.14it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 900/900 [02:20<00:00,  6.43it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1200/1200 [08:57<00:00,  2.23it/s]
  0%|          | 1/1500 [00:00<02:58,  8.39it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1500/1500 [03:00<00:00,  8.29it/s]
  0%|          | 0/1800 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1800/1800 [10:30<00:00,  2.85it/s]
  0%|          | 1/2100 [00:00<04:13,  8.27it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2100/2100 [04:34<00:00,  7.66it/s]
  0%|          | 0/2400 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2400/2400 [15:12<00:00,  2.63it/s]
  0%|          | 1/300 [00:00<00:42,  7.08it/s]

Running grid search for the dataset with params: (h=1, k=1, l=3)
Dropping any existing invalid observations
Recursive cross val data generated, number of split: 300


100%|██████████| 300/300 [00:43<00:00,  6.87it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 600/600 [05:11<00:00,  1.93it/s]  
  0%|          | 1/900 [00:00<02:09,  6.93it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 900/900 [02:21<00:00,  6.38it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1200/1200 [09:34<00:00,  2.09it/s]
  0%|          | 1/1500 [00:00<03:11,  7.83it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1500/1500 [03:07<00:00,  7.99it/s]
  0%|          | 0/1800 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1800/1800 [11:18<00:00,  2.65it/s]
  0%|          | 1/2100 [00:00<04:22,  8.00it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2100/2100 [04:56<00:00,  7.07it/s]
  0%|          | 0/2400 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2400/2400 [15:57<00:00,  2.51it/s]
  0%|          | 1/300 [00:00<00:38,  7.87it/s]

Running grid search for the dataset with params: (h=1, k=2, l=1)
Dropping any existing invalid observations
Recursive cross val data generated, number of split: 300


100%|██████████| 300/300 [03:38<00:00,  1.37it/s]  
  0%|          | 0/600 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 600/600 [04:16<00:00,  2.34it/s]
  0%|          | 1/900 [00:00<01:58,  7.58it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 900/900 [02:17<00:00,  6.57it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1200/1200 [08:46<00:00,  2.28it/s]
  0%|          | 1/1500 [00:00<02:42,  9.24it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1500/1500 [02:57<00:00,  8.45it/s]
  0%|          | 0/1800 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1800/1800 [10:33<00:00,  2.84it/s]
  0%|          | 1/2100 [00:00<03:51,  9.07it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2100/2100 [04:37<00:00,  7.58it/s]
  0%|          | 0/2400 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2400/2400 [15:39<00:00,  2.56it/s]
  0%|          | 1/300 [00:00<00:42,  7.07it/s]

Running grid search for the dataset with params: (h=1, k=2, l=2)
Dropping any existing invalid observations
Recursive cross val data generated, number of split: 300


100%|██████████| 300/300 [00:46<00:00,  6.39it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 600/600 [04:31<00:00,  2.21it/s]
  0%|          | 1/900 [00:00<01:58,  7.57it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 900/900 [02:17<00:00,  6.57it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1200/1200 [2:14:49<00:00,  6.74s/it]     
  0%|          | 1/1500 [00:00<02:46,  8.99it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1500/1500 [03:02<00:00,  8.22it/s]
  0%|          | 0/1800 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1800/1800 [10:36<00:00,  2.83it/s]
  0%|          | 1/2100 [00:00<04:41,  7.46it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2100/2100 [04:40<00:00,  7.48it/s]
  0%|          | 0/2400 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2400/2400 [15:55<00:00,  2.51it/s]
  0%|          | 1/300 [00:00<00:40,  7.31it/s]

Running grid search for the dataset with params: (h=1, k=2, l=3)
Dropping any existing invalid observations
Recursive cross val data generated, number of split: 300


100%|██████████| 300/300 [00:45<00:00,  6.64it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 600/600 [04:26<00:00,  2.25it/s]
  0%|          | 1/900 [00:00<02:04,  7.22it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 900/900 [02:26<00:00,  6.13it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1200/1200 [09:31<00:00,  2.10it/s]
  0%|          | 1/1500 [00:00<02:49,  8.83it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1500/1500 [03:03<00:00,  8.18it/s]
  0%|          | 0/1800 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1800/1800 [16:15<00:00,  1.85it/s]  
  0%|          | 1/2100 [00:00<04:12,  8.31it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2100/2100 [04:49<00:00,  7.25it/s]
  0%|          | 0/2400 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2400/2400 [16:23<00:00,  2.44it/s]
  0%|          | 1/300 [00:00<00:42,  7.01it/s]

Running grid search for the dataset with params: (h=1, k=3, l=1)
Dropping any existing invalid observations
Recursive cross val data generated, number of split: 300


100%|██████████| 300/300 [00:43<00:00,  6.91it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 600/600 [04:13<00:00,  2.37it/s]
  0%|          | 1/900 [00:00<02:02,  7.35it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 900/900 [02:25<00:00,  6.20it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1200/1200 [09:08<00:00,  2.19it/s]
  0%|          | 1/1500 [00:00<02:52,  8.69it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1500/1500 [03:04<00:00,  8.15it/s]
  0%|          | 0/1800 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1800/1800 [10:44<00:00,  2.79it/s]
  0%|          | 1/2100 [00:00<04:18,  8.13it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2100/2100 [04:36<00:00,  7.59it/s]
  0%|          | 0/2400 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2400/2400 [15:16<00:00,  2.62it/s]
  0%|          | 1/300 [00:00<00:39,  7.48it/s]

Running grid search for the dataset with params: (h=1, k=3, l=2)
Dropping any existing invalid observations
Recursive cross val data generated, number of split: 300


100%|██████████| 300/300 [00:45<00:00,  6.62it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 600/600 [04:19<00:00,  2.31it/s]
  0%|          | 1/900 [00:00<02:01,  7.38it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 900/900 [02:17<00:00,  6.52it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1200/1200 [08:51<00:00,  2.26it/s]
  0%|          | 1/1500 [00:00<02:46,  8.99it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1500/1500 [03:05<00:00,  8.09it/s]
  0%|          | 0/1800 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1800/1800 [10:49<00:00,  2.77it/s]
  0%|          | 1/2100 [00:00<03:58,  8.79it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2100/2100 [04:33<00:00,  7.67it/s]
  0%|          | 0/2400 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2400/2400 [15:10<00:00,  2.64it/s]
  0%|          | 1/300 [00:00<00:38,  7.69it/s]

Running grid search for the dataset with params: (h=1, k=3, l=3)
Dropping any existing invalid observations
Recursive cross val data generated, number of split: 300


100%|██████████| 300/300 [00:43<00:00,  6.92it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 600/600 [04:20<00:00,  2.30it/s]
  0%|          | 1/900 [00:00<02:05,  7.14it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 900/900 [02:22<00:00,  6.30it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1200/1200 [09:07<00:00,  2.19it/s]
  0%|          | 1/1500 [00:00<02:47,  8.96it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1500/1500 [03:09<00:00,  7.91it/s]
  0%|          | 0/1800 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 1800/1800 [10:49<00:00,  2.77it/s]
  0%|          | 1/2100 [00:00<04:06,  8.51it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2100/2100 [04:38<00:00,  7.54it/s]
  0%|          | 0/2400 [00:00<?, ?it/s]

Recursive cross val data generated, number of split: 300


100%|██████████| 2400/2400 [16:59<00:00,  2.35it/s]


In [121]:
## Write out results
one_step_results.to_csv('one_step_rf_results.csv', index=False)

In [None]:
## 3 Step Ahead Forecast
three_step_results = runBehemoth(grid_param,
                                 train_data,
                                 'Is_Recession',
                                 300,
                                 k_range=[1, 2, 3, 4, 5, 6],
                                 l_range=[1, 2, 3, 4, 5, 6])

In [116]:
## Write out results
three_step_results.to_csv('three_step_rf_results.csv', index=False)

AttributeError: 'NoneType' object has no attribute 'to_csv'

In [None]:
## 6 Step Ahead Forecast
six_step_results = runBehemoth(grid_param,
                               train_data,
                               'Is_Recession',
                               300,
                               k_range=[1, 2, 3, 4, 5, 6],
                               l_range=[1, 2, 3, 4, 5, 6])

In [None]:
## Write out results
six_step_results.to_csv('six_step_rf_results.csv', index=False)

In [None]:
## 12 Step Ahead Forecast
twelve_step_results = runBehemoth(grid_param,
                                  train_data,
                                  'Is_Recession',
                                  300,
                                  k_range=[1, 2, 3, 4, 5, 6],
                                  l_range=[1, 2, 3, 4, 5, 6])

In [None]:
## Write out results
twelve_step_results.to_csv('twelve_step_rf_results.csv', index=False)