In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)

In [None]:
nber_recessions = pd.read_parquet("../../data/indicators/US/nber_recession.parquet")
nber_recessions["date"] = pd.to_datetime(nber_recessions["date"])
nber_recessions = nber_recessions[nber_recessions["date"] >= "1962-01-01"]

In [None]:
us_top_500 = pd.read_parquet("../../data/indicators/US/us_top_500.parquet", engine="pyarrow")
us_top_500["date"] = pd.to_datetime(us_top_500["date"])
data = us_top_500

In [None]:
data.set_index("date", inplace=True)
np.random.seed(49)

In [None]:
bear_periods = pd.read_csv("../../time_periods/bear_periods_sp500.csv", delimiter=";")
bear_periods["period_start"] = pd.to_datetime(bear_periods["period_start"])
bear_periods["period_end"] = pd.to_datetime(bear_periods["period_end"])

In [None]:
bear_dates = pd.read_csv("../../time_periods/bear_dates_sp500.csv")
bear_dates["date"] = pd.to_datetime(bear_dates["date"])

In [None]:
bull_periods = pd.read_csv("../../time_periods/bull_periods_sp500.csv", delimiter=";")
bull_periods["period_start"] = pd.to_datetime(bull_periods["period_start"])
bull_periods["period_end"] = pd.to_datetime(bull_periods["period_end"])

In [None]:
bull_dates = pd.read_csv("../../time_periods/bull_dates_sp500.csv")
bull_dates["date"] = pd.to_datetime(bull_dates["date"])

In [None]:
nber_rec_dates = pd.read_csv("../../time_periods/model_train_ready/nber_recession_dates.csv")
nber_rec_dates["date"] = pd.to_datetime(nber_rec_dates["date"])

In [None]:


fig, ax = plt.subplots(1, figsize=(12, 4), sharex=False)

data_copy = data.copy()

min_date = pd.Timestamp("1962-01-01")
max_date = pd.Timestamp("2024-01-01")

resample_freq = "W-FRI"


market_cap = data_copy[(data_copy.index > min_date) & (data_copy.index < max_date)]["market_cap_usd"].resample(resample_freq).first()
#market_cap = data_copy[data_copy.index > min_date]["market_cap_usd"]

#ax2 = ax.twinx()
market_cap.plot(ax=ax, alpha=0.5, color="tab:orange", label="Index Market Cap (Log)", logy=True, linewidth=2)


current_i = 0
for i in range(len(nber_rec_dates['date'])-1):
    if nber_rec_dates['date'].iloc[i+1] - pd.DateOffset(days=1) == nber_rec_dates['date'].iloc[i]:
        continue
    ax.axvspan(nber_rec_dates['date'].iloc[current_i], nber_rec_dates['date'].iloc[i] + pd.DateOffset(days=1), facecolor='grey', alpha=0.5)
    current_i = i + 1
ax.axvspan(nber_rec_dates['date'].iloc[current_i], nber_rec_dates['date'].iloc[i] + pd.DateOffset(days=1), facecolor='grey', alpha=0.5)


#for row in bear_periods.iterrows():
#    criterion = (market_cap.index > row[1]["period_start"]) & (market_cap.index < row[1]["period_end"])
#    ax.fill_between(market_cap.index, 10**14, where= criterion, facecolor='red', alpha=0.3)
    
#for row in bull_periods.iterrows():
#    criterion = (market_cap.index > row[1]["period_start"]) & (market_cap.index < row[1]["period_end"])
#    ax.fill_between(market_cap.index, 10**14, where= criterion, facecolor='blue', alpha=0.3)

current_i = 0
for i in range(len(bear_dates['date'])-1):
    if bear_dates['date'].iloc[i+1] - pd.DateOffset(days=1) == bear_dates['date'].iloc[i]:
        continue
    ax.axvspan(bear_dates['date'].iloc[current_i], bear_dates['date'].iloc[i] + pd.DateOffset(days=1), facecolor='red', alpha=0.3, ymin=0.5, ymax=0.9)
    current_i = i + 1
ax.axvspan(bear_dates['date'].iloc[current_i], bear_dates['date'].iloc[i] + pd.DateOffset(days=1), facecolor='red', alpha=0.3, ymin=0.5, ymax=0.9)

    
current_i = 0
for i in range(len(bull_dates['date'])-1):
    if bull_dates['date'].iloc[i+1] - pd.DateOffset(days=1) == bull_dates['date'].iloc[i]:
        continue
    ax.axvspan(bull_dates['date'].iloc[current_i], bull_dates['date'].iloc[i] + pd.DateOffset(days=1), facecolor='blue', alpha=0.3, ymin=0.1, ymax=0.5)
    current_i = i + 1
ax.axvspan(bull_dates['date'].iloc[current_i], bull_dates['date'].iloc[i] + pd.DateOffset(days=1), facecolor='blue', alpha=0.3, ymin=0.1, ymax=0.5)



#ax.legend(fontsize=14)
#plt.tight_layout()

#axes[0].grid()
#axes[1].grid()

ax.tick_params(axis='both', which='major', labelsize=14)

#Remove ticks from ax2:
ax.axes.get_yaxis().set_ticks([])


ax.axes.get_xaxis().set_label_text('')

nber_end_lines = False
nber_end_ann_dates = ["1981-07-08", "1983-07-08", "1992-12-22", "2003-07-17", "2010-09-20"]

if nber_end_lines:
    for i, date in enumerate(nber_end_ann_dates):
        if i == 0:
            ax.axvline(pd.Timestamp(date), color="red", linestyle="--", alpha=0.5, lw=2, label="NBER Rec. End Accouncement")
        else:
            ax.axvline(pd.Timestamp(date), color="red", linestyle="--", alpha=0.5, lw=2)

nber_start_lines = False
nber_start_ann_dates = ["1980-06-03", "1982-01-06", "1991-04-25", "2001-11-26", "2008-12-01"]

if nber_start_lines:
    for i, date in enumerate(nber_start_ann_dates):
        if i == 0:
            ax.axvline(pd.Timestamp(date), color="green", linestyle="--", alpha=0.5, lw=2, label="NBER Rec. Start Accouncement")
        else:
            ax.axvline(pd.Timestamp(date), color="green", linestyle="--", alpha=0.5, lw=2)

bear = True
training_lines = True

training_split_dates_bear = [date + pd.DateOffset(months=6) for date in bear_periods["period_end"]]
training_split_dates_bear = [date for date in training_split_dates_bear if date > pd.Timestamp("1974-01-01")]
training_split_dates_bear.remove(pd.Timestamp("2000-11-25"))

training_split_dates_bull = [date + pd.DateOffset(months=6) for date in bull_periods["period_end"]]
training_split_dates_bull = [date for date in training_split_dates_bull if date > pd.Timestamp("1974-01-01")]
training_split_dates_bull.remove(pd.Timestamp("1988-02-25"))
training_split_dates_bull.remove(pd.Timestamp("1999-01-17"))
training_split_dates_bull.remove(pd.Timestamp("2004-09-01"))
training_split_dates_bull.remove(pd.Timestamp("2006-11-08"))
training_split_dates_bull.remove(pd.Timestamp("2010-10-23"))
training_split_dates_bull.remove(pd.Timestamp("2011-11-02"))
training_split_dates_bull.remove(pd.Timestamp("2018-07-25"))
training_split_dates_bull.remove(pd.Timestamp("2019-03-20"))
training_split_dates_bull.remove(pd.Timestamp("2020-08-19"))
#training_split_dates.remove(pd.Timestamp("2000-11-25"))


if training_lines:
    for i, date in enumerate(training_split_dates_bear):
        ax.axvline(pd.Timestamp(date), color="red", linestyle="--", alpha=0.6, lw=2, ymax=0.9, ymin=0.5)
    for i, date in enumerate(training_split_dates_bull):
        ax.axvline(pd.Timestamp(date), color="blue", linestyle="--", alpha=0.6, lw=2, ymax=0.5, ymin=0.1)
        
plt.tight_layout()
#ax.legend(fontsize=14)

In [None]:
all_dates = pd.DataFrame(list(pd.date_range(start="1961-01-01", end="2025-01-01", freq="D")), columns=["date"])

#### Bear/bull periods are plotted into bear_periods/bull_periods csvs and converted to individual dates here

### Bear periods to bear/non bear dates

In [None]:
date_intervals = []
for index, row in bear_periods.iterrows():
    start_date = row['period_start']
    end_date = row['period_end']
    date_intervals.append((pd.to_datetime(start_date), pd.to_datetime(end_date)))

bear_dates = pd.concat([all_dates[(all_dates['date'] >= start_date) & (all_dates['date'] <= end_date)] 
            for start_date, end_date in date_intervals])

non_bear_dates = all_dates[~all_dates['date'].isin(bear_dates['date'])]


In [None]:
bear_dates.to_csv("../../time_periods/bear_dates_sp500.csv", index=False)
non_bear_dates.to_csv("../../time_periods/non_bear_dates_sp500.csv", index=False)

### Bull periods to bull/non bull dates

In [None]:
bull_periods = pd.read_csv("../../time_periods/bull_periods_sp500.csv", delimiter=";")
bull_periods["period_start"] = pd.to_datetime(bull_periods["period_start"])
bull_periods["period_end"] = pd.to_datetime(bull_periods["period_end"])

In [None]:
date_intervals = []
for index, row in bull_periods.iterrows():
    start_date = row['period_start']
    end_date = row['period_end']
    date_intervals.append((pd.to_datetime(start_date), pd.to_datetime(end_date)))

bull_dates = pd.concat([all_dates[(all_dates['date'] >= start_date) & (all_dates['date'] <= end_date)] 
            for start_date, end_date in date_intervals])

non_bull_dates = all_dates[~all_dates['date'].isin(bull_dates['date'])]

In [None]:
bull_dates.to_csv("../../time_periods/bull_dates_sp500.csv", index=False)
non_bull_dates.to_csv("../../time_periods/non_bull_dates_sp500.csv", index=False)

### Flat dates

In [None]:
flat_dates = pd.merge(non_bull_dates, non_bear_dates, on="date", how="inner")
flat_dates.to_csv("../../time_periods/flat_dates_sp500.csv", index=False)