In [50]:
import os
import sys
import pandas as pd
import numpy as np

# For notebooks — get the current working directory
notebook_dir = os.getcwd()
project_dir = os.path.abspath(os.path.join(notebook_dir, '..'))
sys.path.append(project_dir)

# Then import your module
#import Utils.functions as data_viz
import Utils.file_io as file_io

from dotenv import load_dotenv
load_dotenv()


False

In [51]:
import re
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, average_precision_score, balanced_accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, Input, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.backend as K
import tensorflow as tf
from keras.metrics import AUC, Precision, Recall

In [52]:
#File imports and initialization
#classifier_models = ['logistic', 'xgb', 'balanced_rf', 'easy_ensemble', 'rf']

file_path = f"{project_dir}/Notebooks/Dataset/data_features"

df_features_daily = file_io.input_csv(f"{file_path}_daily")
df_features_weekly = file_io.input_csv(f"{file_path}_weekly")
df_features_monthly = file_io.input_csv(f"{file_path}_monthly")

#recessions = pd.read_csv(f"{project_dir}/Dataset/recession_periods.csv")
#recessions = file_io.input_csv(f"{project_dir}/Dataset/recession_periods")
recessions = pd.read_csv(f"{project_dir}/Dataset/recession_periods.csv1")

dict_features = {'Daily': df_features_daily,
                 'Weekly': df_features_weekly,
                 'Monthly': df_features_monthly
} 

train_test_split = pd.to_datetime('2015-01-01')
split_at = train_test_split

export_config = {'Print Out For all models': 
                  {'save': False},
                 'Save Probability Plots as PNG': 
                  {'save': False},
                 'Export AUC Report to CSV': 
                  {'save': False}
}

DataFrame loaded from /workspaces/RecessionPredictionML/Notebooks/Dataset/data_features_daily.csv
DataFrame loaded from /workspaces/RecessionPredictionML/Notebooks/Dataset/data_features_weekly.csv
DataFrame loaded from /workspaces/RecessionPredictionML/Notebooks/Dataset/data_features_monthly.csv


In [53]:
# Refactor
def binary_focal_loss(gamma=2.0, alpha=0.25):
    def focal_loss(y_true, y_pred):
        eps = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, eps, 1. - eps)
        
        cross_entropy = - (y_true * tf.math.log(y_pred) + (1 - y_true) * tf.math.log(1 - y_pred))
        p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
        alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)
        modulating_factor = tf.pow(1.0 - p_t, gamma)

        loss = alpha_factor * modulating_factor * cross_entropy
        return tf.reduce_mean(loss)
    
    return focal_loss

def make_sequences(X, y=None, seq_len=12):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_len):
        X_seq.append(X.iloc[i:i+seq_len].values)
        if y is not None:
            y_seq.append(y.iloc[i+seq_len])
    X_seq = np.array(X_seq)
    return (X_seq, np.array(y_seq)) if y is not None else (X_seq, None)

def get_class_weight(y_train):
    class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
    class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}
    alpha = class_weights[1] / (class_weights[0] + class_weights[1])
    return class_weights_dict, alpha
    
    
def scaling_features(train,test, scaler_func = StandardScaler()):
    scaler = scaler_func
    X_train = train
    X_test = test
    n_train, seq_len, n_feat = X_train.shape
    n_test,      _   ,  _    = X_test.shape
    if scaler_func is None:
        return X_train, X_test, seq_len, n_feat

    X_train_scaled = scaler.fit_transform(X_train.reshape(-1, n_feat)).reshape(n_train, seq_len, n_feat)
    X_test_scaled  = scaler.transform    (X_test.reshape(-1, n_feat)).reshape(n_test, seq_len, n_feat)

    return X_train_scaled, X_test_scaled, seq_len, n_feat

def layer_sizes(str_var):
    str_var = 'LSTM_4_2'
    parts = str_var.split('_')
    int_list = [int(x) for x in parts[1:]]
    model_type = parts[0]
    return int_list, model_type
    

In [None]:


#X_train_scaled, X_test_scaled, seq_len, n_feat = scaling_features(X_train, X_test) #one time calculation per time_freq?

#class_weights_dict, alpha = compute_class_weight(y_train) # one time calculation per time_freq?

def LSTM_Model_init(model_name, seq_len, n_feat, alpha):

    layer_size, model_type = layer_sizes(model_name)

    is_bi = model_type.upper().startswith("BILSTM")

    #Build model
    model = Sequential()
    model.add(Input(shape=(seq_len, n_feat)))
    

    for i, units in enumerate(layer_size):
        return_seq = (i < len(layer_size) - 1)
        lstm_layer = LSTM(units, return_sequences=return_seq)

        if is_bi:
            # wrap in Bidirectional
            model.add(Bidirectional(lstm_layer))
        else:
            model.add(lstm_layer)
        
        model.add(Dropout(0.3))
    # stuff
    model.add(Dense(1, activation="sigmoid"))

    model.compile(
        optimizer="adam",
        loss=binary_focal_loss(gamma=2.0, alpha=alpha),
        #metrics=["accuracy"]
        metrics=[#"Precision", "Recall", #"AUC"]
                    AUC(name='AUC-ROC'),
                    AUC(name='AUC-PR', curve='PR')]
    )

    return model

#early_stop = EarlyStopping(monitor="val_Recall", patience=10, restore_best_weights=True, mode='max')

def LSTM_Model_train(train, early_stop, model, class_weights, verbose = 0):
    X_train_scaled, y_train = train[0], train[1]
    callback_list = [early_stop] if early_stop is not None else []
    history = model.fit(
        X_train_scaled, y_train,
        validation_split=0.2,
        epochs=50, batch_size=32,
        callbacks=callback_list,
        class_weight=class_weights,
        verbose=verbose
    )
    print('Done with training')
    epochs = len(history.history['loss'])
    return model, epochs

def LSTM_Model_evaluate(model, verbose=0, threshold = None):

    

    X_all_scaled = np.concatenate([X_train_scaled, X_test_scaled], axis=0) #pd.concat([X_train_scaled, X_test_scaled])
    probs_all = model.predict(X_all_scaled, verbose=verbose).reshape(-1)
    n_test = len(y_test)
    probs_pred = probs_all[-n_test:]
    if threshold is None:
        threshold = 0.001
    y_pred = (probs_pred >= threshold).astype(int)

    #precision, recall, thresholds = precision_recall_curve(y_test, probs)
    #cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=["No Recession", "Recession"], output_dict=True, zero_division=0)
    #report["auc_pr"] = auc(recall, precision)
    report["ap_score"]=  average_precision_score(y_test, probs_all)
    report["auc_roc"]= roc_auc_score(y_test, probs_all)

    epochs = len(history.history['loss'])
    return probs_all, report, epochs




In [55]:
LSTM_models = ['LSTM_4_2', 'LSTM_4_4']
LSTM_results = {}
early_stop = EarlyStopping(monitor="val_Recall", patience=10, restore_best_weights=True, mode='max')

In [56]:
for time_freq, df in dict_features.items():
    X = df.drop(columns='recession')
    y = df['recession'] 

    # Train-test split
    #split_at = pd.to_datetime('2015-01-01')
    X_train, X_test = X[X.index < split_at], X[X.index >= split_at]
    y_train, y_test = y[y.index < split_at].astype(int), y[y.index >= split_at].astype(int)

    # Convert to sequences
    seq_len = 32
    X_train_seq, y_train_seq = make_sequences(X_train, y_train, seq_len)
    X_test_seq, y_test_seq = make_sequences(X_test, y_test, seq_len)

    X_train_scaled, X_test_scaled, _, n_feat = scaling_features(X_train_seq, X_test_seq) #one time calculation per time_freq?
    class_weights_dict, alpha = get_class_weight(y_train_seq) # one time calculation per time_freq?

    LSTM_results[time_freq] = {}
    for model_name in LSTM_models:
        LSTM_results[time_freq][model_name] = {}
        print(model_name)
        model_init = LSTM_Model_init(model_name, seq_len, n_feat, alpha)
        print('Done with Init')
        probs, report, epochs = LSTM_Model_evaluate(train = [X_train_scaled,y_train_seq], test = [X_test_scaled, y_test_seq],
                                  model = model_init, early_stop = early_stop, class_weights = class_weights_dict, 
                                  verbose=0, threshold = None)
        
        LSTM_results[time_freq][model_name]['probs'] = probs
        LSTM_results[time_freq][model_name]['report'] = report
        LSTM_results[time_freq][model_name]['epochs'] = epochs


LSTM_4_2
Done with Init


  current = self.get_monitor_value(logs)


Done with training


ValueError: Found input variables with inconsistent numbers of samples: [2756, 11535]