In [None]:
import pandas as pd
import matplotlib.dates as mdates
pd.set_option('display.max_columns', 100)

In [None]:
bear_weeks = pd.read_csv("details/bear_weeks.csv", delimiter=";", parse_dates=["monday_start", "monday_end"], date_format="%d.%m.%Y")
bull_weeks = pd.read_csv("details/bull_weeks.csv", delimiter=";", parse_dates=["monday_start", "monday_end"], date_format="%d.%m.%Y")

In [None]:
bear_weeks["monday_start"] = pd.to_datetime(bear_weeks["monday_start"], format="%d.%m.%Y")
bear_weeks["monday_end"] = pd.to_datetime(bear_weeks["monday_end"], format="%d.%m.%Y")

#Offset to correspond with friday instead of monday
bear_weeks["monday_start"] = bear_weeks["monday_start"] + pd.DateOffset(days=4)
bear_weeks["monday_end"] = bear_weeks["monday_end"] + pd.DateOffset(days=4)

bull_weeks["monday_start"] = pd.to_datetime(bull_weeks["monday_start"], format="%d.%m.%Y")
bull_weeks["monday_end"] = pd.to_datetime(bull_weeks["monday_end"], format="%d.%m.%Y")

#Offset to correspond with friday instead of monday
bull_weeks["monday_start"] = bull_weeks["monday_start"] + pd.DateOffset(days=4)
bull_weeks["monday_end"] = bull_weeks["monday_end"] + pd.DateOffset(days=4)

In [None]:
crsp_index = pd.read_csv("data/crsp_index.zip")
crsp_index.rename(columns={"DlyCalDt": "date", "sprtrn": "sp500_return"}, inplace=True)
crsp_index = crsp_index[["date", "sp500_return"]]
crsp_index["date"] = pd.to_datetime(crsp_index["date"])
crsp_index.sort_values(by="date", inplace=True)
crsp_index.reset_index(drop=True, inplace=True)
crsp_index = pd.concat([pd.DataFrame({"date": [pd.to_datetime("1999-12-31")], "sp500_return": [0]}), crsp_index])
crsp_index["total_return"] = (crsp_index["sp500_return"] + 1).cumprod() -1

In [None]:
china_index = pd.read_csv("data/wrds_index_china.zip")
china_index.rename(columns={"portret": "china_return"}, inplace=True)
china_index = china_index[["date", "china_return"]]
china_index["date"] = pd.to_datetime(china_index["date"])
china_index.sort_values(by="date", inplace=True)
china_index.reset_index(drop=True, inplace=True)
china_index = pd.concat([pd.DataFrame({"date": [pd.to_datetime("1999-12-31")], "china_return": [0]}), china_index])
china_index["total_return"] = (china_index["china_return"] + 1).cumprod() -1

In [None]:
germany_index = pd.read_csv("data/wrds_index_germany.zip")
germany_index.rename(columns={"portret": "germany_return"}, inplace=True)
germany_index = germany_index[["date", "germany_return"]]
germany_index["date"] = pd.to_datetime(germany_index["date"])
germany_index.sort_values(by="date", inplace=True)
germany_index.reset_index(drop=True, inplace=True)
germany_index = pd.concat([pd.DataFrame({"date": [pd.to_datetime("1999-12-31")], "germany_return": [0]}), germany_index])
germany_index["total_return"] = (germany_index["germany_return"] + 1).cumprod() -1

In [None]:
test_data = pd.read_parquet("data/xgboost_Global_2023-12-08-15-27-52_detached_bull_trained_no_validation_min_vol_5_1000000_min_mcap_percentile_na_0.6_min_mcap_percentile_global_0.65/results.parquet", engine="pyarrow")

In [None]:
min_market_cap_percentile_na = 0.65
min_market_cap_percentile_global = 0.6
volume_usd_5_min  = 1000000

In [None]:
na_data = test_data[test_data["currency"].isin(["USD", "CAD"])]
na_data = na_data.groupby("date").apply(lambda x: x[x["market_cap_usd"] > x["market_cap_usd"].quantile(min_market_cap_percentile_na)]).reset_index(drop=True)
na_data = na_data[na_data["volume_usd_5"] > volume_usd_5_min]
na_data_returns = na_data.groupby(["date"])["trr_5"].mean().reset_index()
na_data_returns["trr_5"] = (1 + na_data_returns["trr_5"]).pow(5) - 1
na_data_returns["total_return"] = (na_data_returns["trr_5"] + 1).cumprod() - 1

In [None]:
row_data = test_data[~test_data["currency"].isin(["USD", "CAD"])]
row_data = row_data.groupby("date").apply(lambda x: x[x["market_cap_usd"] > x["market_cap_usd"].quantile(min_market_cap_percentile_global)]).reset_index(drop=True)
row_data = row_data[row_data["volume_usd_5"] > volume_usd_5_min]
row_data_returns = row_data.groupby(["date"])["trr_5"].mean().reset_index()
row_data_returns["trr_5"] = (1 + row_data_returns["trr_5"]).pow(5) - 1
row_data_returns["total_return"] = (row_data_returns["trr_5"] + 1).cumprod() - 1

In [None]:
#Training data plot:
plt = china_index.plot(x="date", y="total_return", figsize=(20, 10), label="China Index", color="tab:purple")

plt.plot(germany_index["date"], germany_index["total_return"], label="Germany Index", color="tab:orange")
plt.plot(crsp_index["date"], crsp_index["total_return"], label="S&P 500", color="tab:blue")

alpha = 0.4

plt.axvspan(bull_weeks.iloc[0]["monday_start"], bull_weeks.iloc[0]["monday_end"], facecolor='g', alpha=alpha, label="Bull Period")
plt.axvspan(bear_weeks.iloc[0]["monday_start"], bear_weeks.iloc[0]["monday_end"], facecolor='r', alpha=alpha, label="Bear Period")

for index, row in bear_weeks.iterrows():
    if index == 0:
        continue
    plt.axvspan(row["monday_start"], row["monday_end"], facecolor='r', alpha=alpha)

for index, row in bull_weeks.iterrows():
    if index == 0:
        continue
    plt.axvspan(row["monday_start"], row["monday_end"], facecolor='g', alpha=alpha)

plt.tick_params(axis='both', which='major', labelsize=16)

plt.legend(fontsize=20)
plt.set_xlabel("Date", fontsize=20)
plt.set_ylabel("Index average return", fontsize=20)
plt.set_xlim(pd.Timestamp('2000-01-01'), pd.Timestamp('2023-09-01'))


plt.xaxis.set_major_locator(mdates.YearLocator())

plt.set_xlabel("")



vals = plt.get_yticks()
plt.set_yticklabels(['+{:,.2%}'.format(abs(x)) if x > 0 else '{:,.2%}'.format(x) if x == 0 else '-{:,.2%}'.format(abs(x)) for x in vals])


plt.lines[0].set_linewidth(2)
plt.lines[1].set_linewidth(2)
plt.lines[2].set_linewidth(2)
plt.grid(True, which="both", axis="both")

#plt.set_xlim(pd.Timestamp('2000-01-01'), pd.Timestamp('2011-12-31'))
plt.set_xlim(pd.Timestamp('2012-01-01'), pd.Timestamp('2021-12-31'))
plt.legend(fontsize=20)


#plt.figure.savefig("figures/BullAndBearPeriodsAfter2012.pdf", dpi=3000, bbox_inches='tight')


In [None]:
#Test data plot:
plt = na_data_returns.plot(x="date", y="total_return", figsize=(20, 10), label="NA Average Returns", color="tab:purple")
plt.plot(row_data_returns["date"], row_data_returns["total_return"], label="ROW Average Returns", color="tab:blue")

alpha = 0.4

plt.axvspan(bull_weeks.iloc[0]["monday_start"], bull_weeks.iloc[0]["monday_end"], facecolor='g', alpha=alpha, label="Bull Period")
plt.axvspan(bear_weeks.iloc[0]["monday_start"], bear_weeks.iloc[0]["monday_end"], facecolor='r', alpha=alpha, label="Bear Period")

for index, row in bear_weeks.iterrows():
    if index == 0:
        continue
    plt.axvspan(row["monday_start"], row["monday_end"], facecolor='r', alpha=alpha)

for index, row in bull_weeks.iterrows():
    if index == 0:
        continue
    plt.axvspan(row["monday_start"], row["monday_end"], facecolor='g', alpha=alpha)

plt.tick_params(axis='both', which='major', labelsize=16)

plt.legend(fontsize=20)
plt.set_xlabel("Date", fontsize=20)
plt.set_xlim(pd.Timestamp('2000-01-01'), pd.Timestamp('2023-09-01'))


plt.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
plt.xaxis.set_minor_locator(mdates.MonthLocator(interval=1))
plt.xaxis.set_major_formatter(mdates.DateFormatter('%b. %y'))

plt.set_xlabel("")
plt.set_ylabel("")

plt.axvline(x=pd.Timestamp('2022-10-14'), color="tab:orange", linestyle="-", linewidth=7, label="Rough bull/bear split")

vals = plt.get_yticks()
plt.set_yticklabels(['+{:,.2%}'.format(abs(x)) if x > 0 else '{:,.2%}'.format(x) if x == 0 else '-{:,.2%}'.format(abs(x)) for x in vals])

plt.lines[0].set_linewidth(2)
plt.lines[1].set_linewidth(2)

plt.grid(True, which="both", axis="both")
plt.set_xlim(pd.Timestamp('2022-01-01'), pd.Timestamp('2023-08-30'))

plt.legend(fontsize=20)


#plt.figure.savefig("figures/BullAndBearPeriodsTesting.pdf", dpi=3000, bbox_inches='tight')