In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)
import matplotlib.ticker as mtick

In [None]:
from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score, confusion_matrix, r2_score, mean_squared_error, mean_absolute_error

In [None]:
indicators = pd.read_parquet("../../data/indicators/US/all_indicators_raw_outer.parquet", engine="pyarrow")
indicators["date"] = pd.to_datetime(indicators["date"])
indicators.reset_index(drop=True, inplace=True)

In [None]:
nber_recessions = pd.read_parquet("../../data/indicators/US/nber_recession.parquet")
nber_recessions["date"] = pd.to_datetime(nber_recessions["date"])
nber_recessions = nber_recessions[nber_recessions["date"] >= "1962-01-01"]

In [None]:
us_top_500 = pd.read_parquet("../../data/indicators/US/us_top_500.parquet", engine="pyarrow")
us_top_500["date"] = pd.to_datetime(us_top_500["date"])
data = pd.merge(indicators, us_top_500, on=["date"], how="outer")

In [None]:
data.set_index("date", inplace=True)

In [None]:
data["unemployment_change"] = data["unemployment"].dropna().pct_change()
data["initial_claims_change"] = data["initial_claims"].dropna().pct_change()


In [None]:
data["trr_w_wed"] = data["market_cap_usd"].resample("W-WED").last().pct_change()
data["trr_w_thu"] = data["market_cap_usd"].resample("W-THU").last().pct_change()
data["trr_w_fri"] = data["market_cap_usd"].resample("W-FRI").last().pct_change()

In [None]:
import os
markov_date_files = os.listdir("../../time_periods/model_train_ready_before_test")

In [None]:
min_date = pd.Timestamp("1962-01-01")
max_date = pd.Timestamp("2019-12-31")

## Training dates

In [None]:
markov_rec_dates = pd.read_csv("../../time_periods/model_train_ready_before_test/markov_rec_dates_train_2020_order1_4_10_smooth_5yr_avg.csv")
markov_rec_dates["date"] = pd.to_datetime(markov_rec_dates["date"])
markov_rec = data.copy()[data.index.isin(markov_rec_dates["date"])]
markov_rec["name"] = "markov_rec"

In [None]:
nber_rec_dates = pd.read_csv("../../time_periods/model_train_ready/nber_recession_dates.csv")
nber_rec_dates["date"] = pd.to_datetime(nber_rec_dates["date"])
nber_rec = data.copy()[data.index.isin(nber_rec_dates["date"])]
nber_rec["name"] = "nber_rec"

In [None]:
sp500_bear_dates = pd.read_csv("../../time_periods/model_train_ready/bear_dates_sp500.csv")
sp500_bear_dates["date"] = pd.to_datetime(sp500_bear_dates["date"])
sp500_bear = data.copy()[data.index.isin(sp500_bear_dates["date"])]
sp500_bear["name"] = "sp500_bear"

In [None]:
filter_bear_dates_1 = pd.read_csv("../../time_periods/model_train_ready/return_filter_bear_m_long_3_6_12.csv")
filter_bear_dates_1["date"] = pd.to_datetime(filter_bear_dates_1["date"])
filter_bear_1 = data.copy()[data.index.isin(filter_bear_dates_1["date"])]
filter_bear_1["name"] = "filter_bear_1"

In [None]:
filter_bear_dates_2 = pd.read_csv("../../time_periods/model_train_ready/return_filter_bear_m_short_2_3.csv")
filter_bear_dates_2["date"] = pd.to_datetime(filter_bear_dates_2["date"])
filter_bear_2 = data.copy()[data.index.isin(filter_bear_dates_2["date"])]
filter_bear_2["name"] = "filter_bear_2"

In [None]:
epu_rec_dates = pd.read_csv("../../time_periods/model_train_ready/EPU_rec_2yr.csv")
epu_rec_dates["date"] = pd.to_datetime(epu_rec_dates["date"])
epu_rec = data.copy()[data.index.isin(epu_rec_dates["date"])]
epu_rec["name"] = "epu_rec"

In [None]:
markov_exp_dates = pd.read_csv("../../time_periods/model_train_ready_before_test/markov_exp_dates_train_2020_order1_4_10_smooth_5yr_avg.csv")
markov_exp_dates["date"] = pd.to_datetime(markov_exp_dates["date"])
markov_exp = data.copy()[data.index.isin(markov_exp_dates["date"])]
markov_exp["name"] = "markov_exp"

In [None]:
nber_exp_dates = pd.read_csv("../../time_periods/model_train_ready/nber_expansion_dates.csv")
nber_exp_dates["date"] = pd.to_datetime(nber_exp_dates["date"])
nber_exp = data.copy()[data.index.isin(nber_exp_dates["date"])]
nber_exp["name"] = "nber_exp"

In [None]:
sp500_bull_dates = pd.read_csv("../../time_periods/model_train_ready/bull_dates_sp500.csv")
sp500_bull_dates["date"] = pd.to_datetime(sp500_bull_dates["date"])
sp500_bull = data.copy()[data.index.isin(sp500_bull_dates["date"])]
sp500_bull["name"] = "sp500_bull"

In [None]:
sp500_non_bear_dates = pd.read_csv("../../time_periods/model_train_ready/non_bear_dates_sp500.csv")
sp500_non_bear_dates["date"] = pd.to_datetime(sp500_non_bear_dates["date"])
sp500_non_bear = data.copy()[data.index.isin(sp500_non_bear_dates["date"])]
sp500_non_bear["name"] = "sp500_non_bear"

In [None]:
filter_bull_dates_1 = pd.read_csv("../../time_periods/model_train_ready/return_filter_bull_m_long_3_6_12.csv")
filter_bull_dates_1["date"] = pd.to_datetime(filter_bull_dates_1["date"])
filter_bull_1 = data.copy()[data.index.isin(filter_bull_dates_1["date"])]
filter_bull_1["name"] = "filter_bull_1"

In [None]:
filter_bull_dates_2 = pd.read_csv("../../time_periods/model_train_ready/return_filter_bull_m_short_2_3.csv")
filter_bull_dates_2["date"] = pd.to_datetime(filter_bull_dates_2["date"])
filter_bull_2 = data.copy()[data.index.isin(filter_bull_dates_2["date"])]
filter_bull_2["name"] = "filter_bull_2"

In [None]:
epu_exp_dates = pd.read_csv("../../time_periods/model_train_ready/EPU_exp_2yr.csv")
epu_exp_dates["date"] = pd.to_datetime(epu_exp_dates["date"])
epu_exp = data.copy()[data.index.isin(epu_exp_dates["date"])]
epu_exp["name"] = "epu_exp"

In [None]:
sp500_flat_dates = pd.read_csv("../../time_periods/model_train_ready/flat_dates_sp500.csv")
sp500_flat_dates["date"] = pd.to_datetime(sp500_flat_dates["date"])
sp500_flat = data.copy()[data.index.isin(sp500_flat_dates["date"])]
sp500_flat["name"] = "sp500_flat"

In [None]:
all_dates = data.copy()
all_dates["name"] = "all_dates"

In [None]:
all_periods = pd.concat([markov_rec, nber_rec, sp500_bear, filter_bear_1, filter_bear_2, epu_rec, markov_exp, nber_exp, sp500_bull, sp500_non_bear, filter_bull_1, filter_bull_2, epu_exp, sp500_flat, all_dates])

In [None]:
all_periods["trr_1_n_rel"] = all_periods["trr_1_n"] - all_periods[(all_periods.index > min_date) & (all_periods.index < max_date) & (all_periods["name"] == "all_dates")]["trr_1_n"].mean()
all_periods["trr_w_wed_rel"] = all_periods["trr_w_wed"] - all_periods[(all_periods.index > min_date) & (all_periods.index < max_date) & (all_periods["name"] == "all_dates")]["trr_w_wed"].mean()
all_periods["trr_w_thu_rel"] = all_periods["trr_w_thu"] - all_periods[(all_periods.index > min_date) & (all_periods.index < max_date) & (all_periods["name"] == "all_dates")]["trr_w_thu"].mean()
all_periods["trr_w_fri_rel"] = all_periods["trr_w_fri"] - all_periods[(all_periods.index > min_date) & (all_periods.index < max_date) & (all_periods["name"] == "all_dates")]["trr_w_fri"].mean()

## Test results

In [None]:
markov_test_pred_rec_dates = pd.read_csv("../../time_periods/model_test_ready/markov_rec_dates_test_all_years_order1_4_10_5yr_avg.csv")
markov_test_pred_rec_dates["date"] = pd.to_datetime(markov_test_pred_rec_dates["date"])


In [None]:
markov_test_pred_exp_dates = pd.read_csv("../../time_periods/model_test_ready/markov_exp_dates_test_all_years_order1_4_10_5yr_avg.csv")
markov_test_pred_exp_dates["date"] = pd.to_datetime(markov_test_pred_exp_dates["date"])

In [None]:
lstm_test_pred_rec_dates = pd.read_csv("../../time_periods/model_test_ready/nber_recession_dates_class_lstm_ba4da75c.csv")
lstm_test_pred_rec_dates["date"] = pd.to_datetime(lstm_test_pred_rec_dates["date"])

In [None]:
lstm_test_pred_bear_dates = pd.read_csv("../../time_periods/model_test_ready/bear_dates_qbear_class_lstm_9046df4a.csv")
lstm_test_pred_bear_dates["date"] = pd.to_datetime(lstm_test_pred_bear_dates["date"])

In [None]:
lstm_test_pred_bull_dates = pd.read_csv("../../time_periods/model_test_ready/bull_dates_qbull_class_lstm_f241ab59.csv")
lstm_test_pred_bull_dates["date"] = pd.to_datetime(lstm_test_pred_bull_dates["date"])

In [None]:
lstm_test_pred_non_rec_dates = pd.read_csv("../../time_periods/model_test_ready/nber_non_recession_dates_class_lstm_ba4da75c.csv")
lstm_test_pred_non_rec_dates["date"] = pd.to_datetime(lstm_test_pred_non_rec_dates["date"])

In [None]:
lstm_test_pred_mc_change_class_bear = pd.read_csv("../../time_periods/model_test_ready/bear_lstm_mc_change_class.csv")
lstm_test_pred_mc_change_class_bear["date"] = pd.to_datetime(lstm_test_pred_mc_change_class_bear["date"])

In [None]:
lstm_test_pred_mc_change_class_bull = pd.read_csv("../../time_periods/model_test_ready/bull_lstm_mc_change_class.csv")
lstm_test_pred_mc_change_class_bull["date"] = pd.to_datetime(lstm_test_pred_mc_change_class_bull["date"])

# Result testing

In [None]:
result_dirs = os.listdir("../../results/regime/lstm")

In [None]:
result_dirs

In [None]:
#Classification

recall_scores = {}
precision_scores = {}
acc_scores = {}
f1_scores = {}

print_all = True

for directory in result_dirs:
    if "c44a4142" in directory:
        print(directory[-8:])
        current_results = pd.read_csv(f"../../results/regime/lstm/{directory}/test_results.csv")
        recall_scores[directory[-8:]] = recall_score(current_results["real_class"], current_results["pred_class"])
        precision_scores[directory[-8:]] = precision_score(current_results["real_class"], current_results["pred_class"])
        acc_scores[directory[-8:]] = accuracy_score(current_results["real_class"], current_results["pred_class"])
        f1_scores[directory[-8:]] = f1_score(current_results["real_class"], current_results["pred_class"])

        if print_all:
            with open(f"../../results/regime/lstm/{directory}/summary.txt") as f:
                print(f.read())
                pass
            print(confusion_matrix(current_results["real_class"], current_results["pred_class"]))
            print("Recall", recall_score(current_results["real_class"], current_results["pred_class"]))
            print("Precision", precision_score(current_results["real_class"], current_results["pred_class"]))
            print("Accuracy", accuracy_score(current_results["real_class"], current_results["pred_class"]))
            print("F1", f1_score(current_results["real_class"], current_results["pred_class"]))
            print()
            
print("Recall")
print(max(recall_scores, key=recall_scores.get), recall_scores[max(recall_scores, key=recall_scores.get)])
print("Precision")
print(max(precision_scores, key=precision_scores.get), precision_scores[max(precision_scores, key=precision_scores.get)])
print("Accuracy")
print(max(acc_scores, key=acc_scores.get), acc_scores[max(acc_scores, key=acc_scores.get)])
print("F1")
print(max(f1_scores, key=f1_scores.get), f1_scores[max(f1_scores, key=f1_scores.get)])

In [None]:
mc_change_class_results = pd.read_csv("../../results/regime/lstm/mc_change_class_train_before_1980_win_std_3_scale_log_retrained_c44a4142/test_results.csv")
mc_change_class_results["date"] = pd.to_datetime(mc_change_class_results["date"])

In [None]:
train_split_dates_bear = [pd.Timestamp("1975-04-01"), pd.Timestamp("1983-02-15"), pd.Timestamp("1985-02-01"), pd.Timestamp("1988-06-07"), 
                        pd.Timestamp("1991-04-10"), pd.Timestamp("1999-04-10"), pd.Timestamp("2000-11-25"),
                        pd.Timestamp("2003-09-12"), pd.Timestamp("2009-09-10"), pd.Timestamp("2011-01-10"),  
                        pd.Timestamp("2012-04-04"), pd.Timestamp("2016-08-12"), pd.Timestamp("2019-06-28"), 
                        #  pd.Timestamp("2020-09-28"), pd.Timestamp("2023-04-15"), pd.Timestamp("2024-05-01"), 
                         ]

In [None]:
train_split_dates_bull = [
    pd.Timestamp('1976-01-15'), pd.Timestamp('1981-05-26'), pd.Timestamp('1983-12-23'),
    pd.Timestamp('1988-02-25'), pd.Timestamp('1990-04-09'), pd.Timestamp('1994-08-01'),
    pd.Timestamp('1999-01-17'),pd.Timestamp('2000-09-24'),pd.Timestamp('2004-09-01'),
    pd.Timestamp('2005-08-28'),pd.Timestamp('2006-11-08'),pd.Timestamp('2008-01-17'),
    pd.Timestamp('2010-10-23'),pd.Timestamp('2011-11-02'),pd.Timestamp('2015-11-19'),
    pd.Timestamp('2018-07-25'),pd.Timestamp('2019-03-20'),pd.Timestamp('2020-08-19'),
    pd.Timestamp('2022-07-03')
                         ]

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 4), sharex=False)

min_year = 1980
max_year = 2023

data_copy = data.copy()

min_date = pd.Timestamp(f"{min_year}-01-01")
max_date = pd.Timestamp(f"{max_year}-12-31")

data_display = data_copy[(data_copy.index >= min_date) & (data_copy.index <= max_date)]

market_cap = data_display["market_cap_usd"].dropna()

market_cap.plot(ax=ax, alpha=0.5, color="tab:orange", label="Index Market Cap (Log)", logy=True, linewidth=2)

colors = ["tab:blue", "tab:red", "tab:green", "tab:orange", "tab:purple", "tab:brown", "tab:pink", "tab:olive", "tab:cyan"]

date_file_true = sp500_bull_dates.copy()

date_file_true = date_file_true[date_file_true["date"] >= min_date]
date_file_true = date_file_true[date_file_true["date"] <= max_date]

current_i = 0
for i in range(len(date_file_true['date'])-1):
    if date_file_true['date'].iloc[i+1] - pd.DateOffset(days=1) == date_file_true['date'].iloc[i]:
        continue
    print(date_file_true['date'].iloc[current_i], date_file_true['date'].iloc[i] + pd.DateOffset(days=1))
    ax.axvspan(date_file_true['date'].iloc[current_i], date_file_true['date'].iloc[i] + pd.DateOffset(days=1), facecolor='tab:brown', alpha=0.5)
    current_i = i + 1
ax.axvspan(date_file_true['date'].iloc[current_i], date_file_true['date'].iloc[i] + pd.DateOffset(days=1), facecolor='tab:brown', alpha=0.5)


current_date_file = lstm_test_pred_mc_change_class_bull.copy()

current_date_file = current_date_file[current_date_file["date"] >= min_date]
current_date_file = current_date_file[current_date_file["date"] <= max_date]

current_i = 0
for i in range(len(current_date_file['date'])-1):
    if current_date_file['date'].iloc[i+1] - pd.DateOffset(days=1) == current_date_file['date'].iloc[i]:
        continue
    ax.axvspan(current_date_file['date'].iloc[current_i], current_date_file['date'].iloc[i] + pd.DateOffset(days=1), facecolor='blue', alpha=0.3, ymin=0.1, ymax=0.9)
    current_i = i + 1
ax.axvspan(current_date_file['date'].iloc[current_i], current_date_file['date'].iloc[i] + pd.DateOffset(days=1), facecolor='blue', alpha=0.3, ymin=0.1, ymax=0.9)


axvlines = None

if axvlines is not None:
    for line in axvlines:
        ax.axvline(line, color="black", linestyle="--", linewidth=2)


nber_start_ann_dates = ["1980-06-03", "1982-01-06", "1991-04-25", "2001-11-26", "2008-12-01"]

plt.xlim(left=min_date, right=max_date)

ax.tick_params(axis='both', which='major', labelsize=14)

ax.axes.get_yaxis().set_ticks([])

ax.axes.get_xaxis().set_label_text('')

plt.tight_layout()

In [None]:
fig.savefig("../../figures/LSTM_model_bear_mc_change_timeline.pdf", dpi=3000)

In [None]:
fig.savefig("../../figures/LSTM_model_bull_mc_change_timeline.pdf", dpi=3000)

In [None]:
fig.savefig("../../figures/LSTM_model_NBER_recessions_timeline.pdf", dpi=3000)

## Figures

In [None]:
test_file_name_dict = {
    'nber_recession_dates_class_lstm_ba4da75c' : "NBER Recession Class LSTM",
    'nber_non_recession_dates_class_lstm_ba4da75c' : "NBER Non-Recession Class LSTM",
    'bear_dates_qbear_class_lstm_9046df4a' : "Bear Class LSTM",
    'bull_dates_qbull_class_lstm_f241ab59' : "Bull Class LSTM",
    'non_bear_dates_qbear_class_lstm_9046df4a' : "Non-Bear Class LSTM",
    'non_bull_dates_bqull_class_lstm_f241ab59' : "Non-Bull Class LSTM",
    'markov_rec_dates_test_all_years_order1_4_10_5yr_avg' : "Markov Recession",
    'markov_exp_dates_test_all_years_order1_4_10_5yr_avg' : "Markov Expansion",
    'bear_lstm_mc_change_class' : "Bear MC Change LSTM",
    'bull_lstm_mc_change_class' : "Bull MC Change LSTM",
    'return_filter_bear_m_long_3_6_12' : "Negative Filter (LT)",
    'return_filter_bear_m_short_2_3' : "Negative Filter (ST)",
    'return_filter_bull_m_long_3_6_12' : "Positive Filter (LT)",
    'return_filter_bull_m_short_2_3' : "Positive Filter (ST)",
}

In [None]:
test_file_name_dict_order = {
    'nber_recession_dates_class_lstm_ba4da75c' : "NBER Recession Class LSTM",
    'bear_dates_qbear_class_lstm_9046df4a' : "Bear Class LSTM",
    'non_bull_dates_bqull_class_lstm_f241ab59' : "Non-Bull Class LSTM",
    'markov_rec_dates_test_all_years_order1_4_10_5yr_avg' : "Markov Recession",
    'bear_lstm_mc_change_class' : "Bear MC Change LSTM",
    'return_filter_bear_m_long_3_6_12' : "Negative Filter (LT)",
    'return_filter_bear_m_short_2_3' : "Negative Filter (ST)",
    
    'nber_non_recession_dates_class_lstm_ba4da75c' : "NBER Non-Recession Class LSTM",
    'bull_dates_qbull_class_lstm_f241ab59' : "Bull Class LSTM",
    'non_bear_dates_qbear_class_lstm_9046df4a' : "Non-Bear Class LSTM",
    'markov_exp_dates_test_all_years_order1_4_10_5yr_avg' : "Markov Expansion",
    'return_filter_bull_m_long_3_6_12' : "Positive Filter (LT)",
    'return_filter_bull_m_short_2_3' : "Positive Filter (ST)",
    'bull_lstm_mc_change_class' : "Bull MC Change LSTM",
}

In [None]:
train_file_name_dict = {
    'bear_dates_sp500' : "Qualitative Bear",
    'bull_dates_sp500' : "Qualitative Bull",
    'nber_recession_dates' : "NBER Recession",
    'nber_expansion_dates' : "NBER Expansion",
}

In [None]:
all_test_files = os.listdir("../../time_periods/model_test_ready/")

In [None]:
min_date = pd.Timestamp("1980-01-01")
max_date = pd.Timestamp("2023-12-31")

In [None]:
all_test_periods = data.copy()
all_test_periods["name"] = "all_dates"
all_test_periods["proper_name"] = "All Dates"

In [None]:
for i, test_file in enumerate(all_test_files):
    current_dates = pd.read_csv(f"../../time_periods/model_test_ready/{test_file}")
    current_dates["date"] = pd.to_datetime(current_dates["date"])
    current_test_period = data.copy()[data.index.isin(current_dates["date"])]
    current_test_period["name"] = test_file.split(".")[0]
    current_test_period["proper_name"] = test_file_name_dict[test_file.split(".")[0]]
    all_test_periods = pd.concat([all_test_periods, current_test_period])



In [None]:
for i, test_file in enumerate(["bear_dates_sp500.csv", "bull_dates_sp500.csv", "nber_recession_dates.csv", "nber_expansion_dates.csv"]):
    current_dates = pd.read_csv(f"../../time_periods/model_train_ready/{test_file}")
    current_dates["date"] = pd.to_datetime(current_dates["date"])
    current_test_period = data.copy()[data.index.isin(current_dates["date"])]
    current_test_period["name"] = test_file.split(".")[0]
    current_test_period["proper_name"] = train_file_name_dict[test_file.split(".")[0]]
    all_test_periods = pd.concat([all_test_periods, current_test_period])

In [None]:
all_test_periods["trr_1_n_rel"] = all_test_periods["trr_1_n"] - all_test_periods[(all_test_periods.index > min_date) & (all_test_periods.index < max_date) & (all_test_periods["name"] == "all_dates")]["trr_1_n"].mean()
all_test_periods["trr_w_fri_rel"] = all_test_periods["trr_w_fri"] - all_test_periods[(all_test_periods.index > min_date) & (all_test_periods.index < max_date) & (all_test_periods["name"] == "all_dates")]["trr_w_fri"].mean()


In [None]:
fig, ax = plt.subplots(1,1, figsize=(15,8))

min_date = pd.Timestamp("1962-01-01")
max_date = pd.Timestamp("2019-12-31")

feature = "trr_1_n"


order = list(test_file_name_dict_order.keys())
labels = list(test_file_name_dict_order.values())

for i, name in enumerate(order):
    ax.boxplot(all_test_periods[(all_test_periods.index > min_date) & (all_test_periods.index < max_date) & (all_test_periods["name"] == name)][feature].dropna(), positions=[i], labels=[labels[i]],
               widths=0.5, showfliers=False, showmeans=True, meanline=True,
               whiskerprops={"color": "tab:blue", 'lw' : 2}, flierprops={"color": "tab:blue", 'lw' : 2}, boxprops={"color": "tab:blue", 'lw' : 2}, 
               medianprops={"color": "tab:orange", 'lw' : 2}, capprops={"color": "tab:blue", 'lw' : 2}, meanprops={'lw' : 2})


ax.axhline(y=0, color="black", linestyle="--", alpha=0.5)
ax.tick_params(axis='both', which='major', labelsize=18)
ax.set_xticklabels(labels, rotation=45, ha='right')
#ax.set_ylim(-0.05, 0.05)
plt.tight_layout()
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0))


In [None]:
fig.savefig("../../figures/train_periods_boxplot_unemployment_change_with_outliers.pdf", dpi=3000)

In [None]:
fig.savefig("../../figures/train_periods_boxplot_trr_1_n_rel_no_outliers.pdf", dpi=3000)

In [None]:
test_file_name_dict_order = {
    'nber_recession_dates_class_lstm_ba4da75c' : "NBER Recession Class LSTM",
    'bear_dates_qbear_class_lstm_9046df4a' : "Bear Class LSTM",
    'non_bull_dates_bqull_class_lstm_f241ab59' : "Non-Bull Class LSTM",
    'markov_rec_dates_test_all_years_order1_4_10_5yr_avg' : "Markov Recession",
    'bear_lstm_mc_change_class' : "Bear MC Change LSTM",
    'return_filter_bear_m_long_3_6_12' : "Negative Filter (LT)",
    'return_filter_bear_m_short_2_3' : "Negative Filter (ST)",
    
    'nber_non_recession_dates_class_lstm_ba4da75c' : "NBER Non-Recession Class LSTM",
    'bull_dates_qbull_class_lstm_f241ab59' : "Bull Class LSTM",
    'non_bear_dates_qbear_class_lstm_9046df4a' : "Non-Bear Class LSTM",
    'markov_exp_dates_test_all_years_order1_4_10_5yr_avg' : "Markov Expansion",
    'return_filter_bull_m_long_3_6_12' : "Positive Filter (LT)",
    'return_filter_bull_m_short_2_3' : "Positive Filter (ST)",
    'bull_lstm_mc_change_class' : "Bull MC Change LSTM",

     
}

In [None]:
fig, ax = plt.subplots(1,1, figsize=(18,8))

min_date = pd.Timestamp("1980-01-01")
max_date = pd.Timestamp("2023-12-31")

feature = "trr_w_fri_rel"

#all_periods_current = all_test_periods.copy()[all_test_periods["initial_claims_change"] < 1]
#all_periods_current = all_test_periods.copy()[all_test_periods["trr_w_fri_rel"] < 1]
#all_periods_current = all_test_periods.copy()[all_test_periods["trr_w_fri_rel"] > -0.5]

all_periods_current = all_test_periods.copy()

order = list(test_file_name_dict_order.keys())
labels = list(test_file_name_dict_order.values())



for i, name in enumerate(order):
    hatch = None
    if "bear" in name or "rec" in name or "non_bull" in name:
        if "non_bear" not in name and "non_rec" not in name:
            hatch = "\\\\"
    if "flat" in name or "all_dates" in name:
        hatch = '..'
    if i == 0:
        bar1 = ax.bar(height = all_periods_current[(all_periods_current.index > min_date) & (all_periods_current.index < max_date) & (all_periods_current["name"] == name)][feature].dropna().mean(), 
           x=i-0.2, width=0.4, label="Mean", color="tab:blue", edgecolor="black", hatch=hatch)
        bar2 = ax.bar(height = all_periods_current[(all_periods_current.index > min_date) & (all_periods_current.index < max_date) & (all_periods_current["name"] == name)][feature].dropna().median(), 
           x=i+0.2, width=0.4, label="Median", color="tab:orange", edgecolor="black", hatch=hatch)
    else:
        bar1 = ax.bar(height = all_periods_current[(all_periods_current.index > min_date) & (all_periods_current.index < max_date) & (all_periods_current["name"] == name)][feature].dropna().mean(), 
            x=i-0.2, width=0.4, color="tab:blue", edgecolor="black", hatch=hatch)
        bar2 = ax.bar(height = all_periods_current[(all_periods_current.index > min_date) & (all_periods_current.index < max_date) & (all_periods_current["name"] == name)][feature].dropna().median(), 
            x=i+0.2, width=0.4, color="tab:orange", edgecolor="black", hatch=hatch)
        
    if all_periods_current[(all_periods_current.index > min_date) & (all_periods_current.index < max_date) & (all_periods_current["name"] == name)][feature].dropna().median() == 0:
        ax.bar_label(bar2, padding=3, fontsize=16)


ax.axhline(y=0, color="black", lw=1)
ax.tick_params(axis='both', which='major', labelsize=18, labelbottom=True)
plt.xticks(range(0,len(labels)))
ax.set_xticklabels(labels, rotation=35, ha='right')

ax.grid(axis='y')

ax.legend(fontsize=18)
leg = ax.get_legend()
leg.legend_handles[0].set_hatch("")
leg.legend_handles[1].set_hatch("")
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0, decimals=2))

plt.tight_layout()


In [None]:
#two time periods:

fig, ax = plt.subplots(1,1, figsize=(18,8))

min_date_1 = pd.Timestamp("1980-01-01")
max_date_1 = pd.Timestamp("2002-12-31")

min_date_2 = pd.Timestamp("2003-01-01")
max_date_2 = pd.Timestamp("2023-12-31")

feature = "initial_claims_change"

all_periods_current = all_test_periods.copy()[all_test_periods["initial_claims_change"] < 1]

#all_periods_current = all_test_periods.copy()

order = list(test_file_name_dict_order.keys())# + ["bear_dates_sp500", "bull_dates_sp500", "nber_recession_dates", "nber_expansion_dates"]
labels = list(test_file_name_dict_order.values())# + ["Qualitative Bear", "Qualitative Bull", "NBER Recession", "NBER Expansion"]

use_relative = True


for i, name in enumerate(order):
    hatch = None
    if "bear" in name or "rec" in name or "non_bull" in name:
        if "non_bear" not in name and "non_rec" not in name:
            hatch = "\\\\"
    if "flat" in name or "all_dates" in name:
        hatch = '..'
    
    if use_relative:
        value_1 = all_periods_current[(all_periods_current.index > min_date_1) & (all_periods_current.index < max_date_1) & (all_periods_current["name"] == name)][feature].dropna().mean() - all_periods_current[(all_periods_current.index > min_date_1) & (all_periods_current.index < max_date_1) & (all_periods_current["name"] == "all_dates")][feature].dropna().mean()
        value_2 = all_periods_current[(all_periods_current.index > min_date_2) & (all_periods_current.index < max_date_2) & (all_periods_current["name"] == name)][feature].dropna().mean() - all_periods_current[(all_periods_current.index > min_date_2) & (all_periods_current.index < max_date_2) & (all_periods_current["name"] == "all_dates")][feature].dropna().mean()
    else:
        value_1 = all_periods_current[(all_periods_current.index > min_date_1) & (all_periods_current.index < max_date_1) & (all_periods_current["name"] == name)][feature].dropna().mean()
        value_2 = all_periods_current[(all_periods_current.index > min_date_2) & (all_periods_current.index < max_date_2) & (all_periods_current["name"] == name)][feature].dropna().mean()
    
    if i == 0:
        bar1 = ax.bar(height = value_1, 
           x=i-0.2, width=0.4, label="1980-2002", color="tab:blue", edgecolor="black", hatch=hatch)
        bar2 = ax.bar(height = value_2, 
           x=i+0.2, width=0.4, label="2003-2023", color="tab:orange", edgecolor="black", hatch=hatch)
    else:
        bar1 = ax.bar(height = value_1, 
            x=i-0.2, width=0.4, color="tab:blue", edgecolor="black", hatch=hatch)
        bar2 = ax.bar(height = value_2, 
            x=i+0.2, width=0.4, color="tab:orange", edgecolor="black", hatch=hatch)


avg_1 = all_periods_current[(all_periods_current["name"] == "all_dates") & (all_periods_current.index > min_date_1) & (all_periods_current.index < max_date_1)][feature].dropna().mean()
avg_2 = all_periods_current[(all_periods_current["name"] == "all_dates") & (all_periods_current.index > min_date_2) & (all_periods_current.index < max_date_2)][feature].dropna().mean()


ax.axhline(y=0, color="black", lw=1)
ax.tick_params(axis='both', which='major', labelsize=18, labelbottom=True)
plt.xticks(range(0,len(labels)))
ax.set_xticklabels(labels, rotation=35, ha='right')

ax.grid(axis='y')

ax.legend(fontsize=18)
leg = ax.get_legend()
leg.legend_handles[0].set_hatch("")
leg.legend_handles[1].set_hatch("")
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0, decimals=2))

plt.tight_layout()


In [None]:
fig.savefig("../../figures/test_periods_barplot_trr_w_fri_rel.pdf", dpi=3000)

In [None]:
fig.savefig("../../figures/test_periods_barplot_trr_w_fri_rel_time_period_comparison.pdf", dpi=3000)

In [None]:
fig.savefig("../../figures/test_periods_barplot_initial_claims_relative_change_time_period_comparison.pdf", dpi=3000)

# Statistics

In [None]:
current_pred_dates = filter_bull_dates_2.copy()
#current_pred_dates = filter_bull_dates_2.copy()
#current_true_dates = nber_rec_dates.copy()
current_true_dates = sp500_bear_dates.copy()

current_min_date = pd.Timestamp("1980-01-01")
current_max_date = pd.Timestamp("2023-12-31")


results_df = pd.DataFrame(pd.date_range(start=current_min_date, end=current_max_date, freq="B"), columns=["date"])

results_df["real_class"] = 0
results_df.loc[results_df["date"].isin(current_true_dates["date"]), "real_class"] = 1

results_df["pred_class"] = 0
results_df.loc[results_df["date"].isin(current_pred_dates["date"]), "pred_class"] = 1

print("Accuracy:", accuracy_score(results_df["real_class"], results_df["pred_class"]))
print("Recall:", recall_score(results_df["real_class"], results_df["pred_class"]))
print("Precision:", precision_score(results_df["real_class"], results_df["pred_class"]))
print("F1:", f1_score(results_df["real_class"], results_df["pred_class"]))
print("b.days chosen:", len(results_df[results_df["pred_class"] == 1])/len(results_df))