In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)

In [None]:
indicators = pd.read_parquet("../../data/indicators/US/all_indicators_raw_outer.parquet", engine="pyarrow")
indicators["date"] = pd.to_datetime(indicators["date"])
indicators.reset_index(drop=True, inplace=True)

In [None]:
nber_recessions = pd.read_parquet("../../data/indicators/US/nber_recession.parquet")
nber_recessions["date"] = pd.to_datetime(nber_recessions["date"])
nber_recessions = nber_recessions[nber_recessions["date"] >= "1962-01-01"]

In [None]:
us_top_500 = pd.read_parquet("../../data/indicators/US/us_top_500.parquet", engine="pyarrow")
us_top_500["date"] = pd.to_datetime(us_top_500["date"])
data = pd.merge(indicators, us_top_500, on=["date"], how="outer")

In [None]:
data.set_index("date", inplace=True)
np.random.seed(49)

# Change in Market Cap

In [None]:
data["change_1m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(1)
data["change_2m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(2)
data["change_3m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(3)
data["change_4m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(4)
data["change_6m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(6)
data["change_12m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(12)
data["change_24m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(24)
data["change_36m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(36)
data["change_48m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(48)
data["change_60m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(60)
data["change_72m"] = data["market_cap_usd"].dropna().resample("ME").last().shift(1, freq="D").pct_change(72)

In [None]:
negative_filter_lt = pd.read_csv("../../time_periods/model_train_ready/return_filter_bear_m_long_3_6_12.csv")
negative_filter_lt["date"] = pd.to_datetime(negative_filter_lt["date"])

In [None]:
negative_filter_st = pd.read_csv("../../time_periods/model_train_ready/return_filter_bear_m_short_2_3.csv")
negative_filter_st["date"] = pd.to_datetime(negative_filter_st["date"])

In [None]:
positive_filter_lt = pd.read_csv("../../time_periods/model_train_ready/return_filter_bull_m_long_3_6_12.csv")
positive_filter_lt["date"] = pd.to_datetime(positive_filter_lt["date"])

In [None]:
positive_filter_st = pd.read_csv("../../time_periods/model_train_ready/return_filter_bull_m_short_2_3.csv")
positive_filter_st["date"] = pd.to_datetime(positive_filter_st["date"])

In [None]:
nber_rec_dates = pd.read_csv("../../time_periods/model_train_ready/nber_recession_dates.csv")
nber_rec_dates["date"] = pd.to_datetime(nber_rec_dates["date"])

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 4), sharex=False)


data_copy = data.copy()

min_date = pd.Timestamp("1962-01-01")

resample_freq = "MS"


data_display = data_copy[data_copy.index > min_date]

market_cap = data_display["market_cap_usd"].dropna().resample(resample_freq).first()


ax.plot(market_cap, alpha=0.5, color="tab:orange", linewidth=2)
ax.set_yscale('log')


current_i = 0
for i in range(len(nber_rec_dates['date'])-1):
    if nber_rec_dates['date'].iloc[i+1] - pd.DateOffset(days=1) == nber_rec_dates['date'].iloc[i]:
        continue
    ax.axvspan(nber_rec_dates['date'].iloc[current_i], nber_rec_dates['date'].iloc[i] + pd.DateOffset(days=1), facecolor='grey', alpha=0.5)
    current_i = i + 1
ax.axvspan(nber_rec_dates['date'].iloc[current_i], nber_rec_dates['date'].iloc[i] + pd.DateOffset(days=1), facecolor='grey', alpha=0.5)


current_i = 0
for i in range(len(negative_filter_lt['date'])-1):
    if negative_filter_lt['date'].iloc[i+1] - pd.DateOffset(days=1) == negative_filter_lt['date'].iloc[i]:
        continue
    ax.axvspan(negative_filter_lt['date'].iloc[current_i], negative_filter_lt['date'].iloc[i] + pd.DateOffset(days=1), facecolor='red', alpha=0.3, ymin=0.5, ymax=0.9)
    current_i = i + 1
ax.axvspan(negative_filter_lt['date'].iloc[current_i], negative_filter_lt['date'].iloc[i] + pd.DateOffset(days=1), facecolor='red', alpha=0.3, ymin=0.5, ymax=0.9)

    
current_i = 0
for i in range(len(negative_filter_st['date'])-1):
    if negative_filter_st['date'].iloc[i+1] - pd.DateOffset(days=1) == negative_filter_st['date'].iloc[i]:
        continue
    ax.axvspan(negative_filter_st['date'].iloc[current_i], negative_filter_st['date'].iloc[i] + pd.DateOffset(days=1), facecolor='red', alpha=0.3, ymin=0.1, ymax=0.5)
    current_i = i + 1
ax.axvspan(negative_filter_st['date'].iloc[current_i], negative_filter_st['date'].iloc[i] + pd.DateOffset(days=1), facecolor='red', alpha=0.3, ymin=0.1, ymax=0.5)


fill_height = 4.5*10**13



ax.tick_params(axis='both', which='major', labelsize=14)


ax.axes.get_yaxis().set_ticks([])


ax.axes.get_xaxis().set_label_text('')

plt.xlim(min_date, pd.Timestamp("2023-12-31"))

plt.tight_layout()

In [None]:
fig.savefig("../../figures/return-filter-positive-lt-and-st.pdf", dpi=3000)

In [None]:
fig.savefig("../../figures/return-filter-negative-lt-and-st.pdf", dpi=3000)

### Negative filter (LT)

In [None]:
bear_filtered = data_display.resample("MS").first().copy()
bear_filtered["class"] = 0
bear_filtered.loc[bear_filtered.dropna(subset=["change_3m"])[(bear_filtered["change_3m"].dropna() < 0).resample("D").ffill()].index, "class"] = 1
bear_filtered.loc[bear_filtered.dropna(subset=["change_6m"])[(bear_filtered["change_6m"].dropna() < 0).resample("D").ffill()].index, "class"] = 1
bear_filtered.loc[bear_filtered.dropna(subset=["change_12m"])[(bear_filtered["change_12m"].dropna() < 0).resample("D").ffill()].index, "class"] = 1
bear_dates = bear_filtered["class"].resample("D").ffill()

In [None]:
pd.DataFrame(bear_dates[bear_dates == 1].index, columns=["date"]).to_csv("../../time_periods/return_filter_bear_m_long_3_6_12.csv", index=False)

### Negative filter (ST)

In [None]:
bear_filtered = data_display.resample("MS").first().copy()
bear_filtered["class"] = 0
bear_filtered.loc[bear_filtered.dropna(subset=["change_2m"])[(bear_filtered["change_2m"].dropna() < 0).resample("D").ffill()].index, "class"] = 1
bear_filtered.loc[bear_filtered.dropna(subset=["change_3m"])[(bear_filtered["change_3m"].dropna() < 0).resample("D").ffill()].index, "class"] = 1
bear_dates = bear_filtered["class"].resample("D").ffill()

In [None]:
pd.DataFrame(bear_dates[bear_dates == 1].index, columns=["date"]).to_csv("../../time_periods/return_filter_bear_m_short_2_3.csv", index=False)

### Positive filter (LT)

In [None]:
bull_filtered = data_display.resample("MS").first().copy()
bull_filtered["class"] = 0
bull_filtered.loc[bull_filtered.dropna(subset=["change_3m"])[(bull_filtered["change_3m"].dropna() > 0).resample("D").ffill()].index, "class"] = 1
bull_filtered.loc[bull_filtered.dropna(subset=["change_6m"])[(bull_filtered["change_6m"].dropna() > 0).resample("D").ffill()].index, "class"] = 1
bull_filtered.loc[bull_filtered.dropna(subset=["change_12m"])[(bull_filtered["change_6m"].dropna() > 0).resample("D").ffill()].index, "class"] = 1
bull_dates = bull_filtered["class"].resample("D").ffill()

In [None]:
pd.DataFrame(bull_dates[bull_dates == 1].index, columns=["date"]).to_csv("../../time_periods/return_filter_bull_m_long_3_6_12.csv", index=False)

### Positive filter (ST)

In [None]:
bull_filtered = data_display.resample("MS").first().copy()
bull_filtered["class"] = 0
bull_filtered.loc[bull_filtered.dropna(subset=["change_2m"])[(bull_filtered["change_2m"].dropna() > 0).resample("D").ffill()].index, "class"] = 1
bull_filtered.loc[bull_filtered.dropna(subset=["change_3m"])[(bull_filtered["change_3m"].dropna() > 0).resample("D").ffill()].index, "class"] = 1
bull_dates = bull_filtered["class"].resample("D").ffill()

In [None]:
pd.DataFrame(bull_dates[bull_dates == 1].index, columns=["date"]).to_csv("../../time_periods/return_filter_bull_m_short_2_3.csv", index=False)