Strategies
<small>

This notebook defines the 4 strategies that will be incorporated in the black-litterman framework:
- Momentum
- Short term reversal
- Short interest
- ARIMA-GARCH

Each strategy are backtested using the in-sample data. The weight dataframes corresponding to each strategy are saved as csv for later use. 
<small/>

In [22]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from arch import arch_model
import pmdarima
from utils.backtest import Backtest

In [23]:
# Data preprocessing
price_df = pd.read_csv('data/price.csv', index_col='Date', parse_dates=True).shift(1)
volume_df = pd.read_csv('data/volume.csv', index_col='Date', parse_dates=True).shift(1)
short_interest_df = pd.read_csv('data/short_interest.csv', index_col='Date', parse_dates=True, dtype=float).shift(1)
benchmark_df = pd.read_csv('data/benchmark.csv', index_col='Date', parse_dates=True).shift(1)

In [None]:
# Momentum
short_momentum = price_df.pct_change(periods=21, fill_method=None)
medium_momentum = price_df.pct_change(periods=105, fill_method=None).shift(periods=21)
long_momentum = price_df.pct_change(periods=231, fill_method=None).shift(periods=21)
weight_df = (short_momentum.loc[long_momentum.index] + medium_momentum.loc[long_momentum.index] + long_momentum)

for date in weight_df.index:
    vol = volume_df.loc[date]
    upper_vol = vol[vol >= vol.quantile(0.8)]

    r = weight_df.loc[date, upper_vol.index]
    upper_r_symbols = r[r >= r.quantile(0.8)].index

    weight_df.loc[date, :] = 0
    weight_df.loc[date, upper_r_symbols] = 1

weight_df = weight_df.div(weight_df.sum(axis=1), axis=0).dropna(how='all')
weight_df = weight_df.groupby(pd.Grouper(freq='ME')).tail(1)
weight_df.to_csv('weight/momentum.csv')

backtest = Backtest(weight_df)
backtest.run(transaction_cost=0)
backtest.show()

In [None]:
# Short term reversal
return_df = price_df.groupby(pd.Grouper(freq='ME')).tail(1).pct_change(fill_method=None).dropna(how="all")
volume_change_df = volume_df.groupby(pd.Grouper(freq='ME')).mean().pct_change(fill_method=None).dropna(how="all")
volume_change_df.index = return_df.index
weight_df = return_df.copy()

for date in weight_df.index:
    vol = volume_change_df.loc[date]
    bottom_vol = vol[vol <= vol.quantile(0.1)]

    r = return_df.loc[date, bottom_vol.index]
    bottom_r_symbols = r[r <= r.quantile(0.1)].index
        
    weight_df.loc[date, :] = 0
    weight_df.loc[date, bottom_r_symbols] = 1

weight_df = weight_df.div(weight_df.sum(axis=1), axis=0).dropna(how='all')
weight_df.to_csv('weight/reversal.csv')

backtest = Backtest(weight_df)
backtest.run()
backtest.show()

In [None]:
# Short interest
weight_df = short_interest_df.copy()
period_ends = weight_df.index
period_starts = pd.Index([volume_df.index[0]]).append(weight_df.index[:-1] + pd.Timedelta(days=1))
for start_date, end_date in zip(period_starts, period_ends):
    weight_df.loc[end_date] = weight_df.loc[end_date].div(volume_df.loc[start_date:end_date].mean())
    
lower = weight_df.apply(lambda x: x.quantile(0.05), axis=1)
weight_df = pd.DataFrame(np.where(weight_df.lt(lower, axis=0), 1, 0), 
                         index=weight_df.index,  
                         columns=weight_df.columns)

weight_df = weight_df.div(weight_df.sum(axis=1), axis=0).dropna(how="all")
weight_df.to_csv('weight/shortinterest.csv')

backtest = Backtest(weight_df)
backtest.run()
backtest.show()

In [None]:
# ARIMA-GARCH 
# This block of code takes around 2 hours to run
return_df = price_df.pct_change(fill_method=None)

arima_results, garch_results = [], []
month_end = return_df.iloc[500:].groupby(pd.Grouper(freq='ME')).tail(1).index
for date in tqdm(month_end):
    current_index = return_df.index.get_loc(date)

    # For each rebalancing period, conduct PCA to reduce dimensionality
    r = return_df.iloc[current_index-500:current_index]
    cov = r.cov()

    cov_eigval, cov_eigvec = np.linalg.eig(cov)
    idx = cov_eigval.argsort()[::-1]
    cov_eigval = cov_eigval[idx]    
    cov_eigvec = cov_eigvec[:, idx]

    pc_stock = []
    for i in range(20):
        mask = np.ones(len(cov), dtype=bool)
        mask[pc_stock] = False
        pc_stock.append(np.argmax(np.abs(cov_eigvec[i])[mask]))

    # Fit ARIMA-GARCH models for each period
    data = [r[col].dropna().values if i in pc_stock else pd.DataFrame() for i, col in enumerate(return_df.columns)]
    
    arima_res = [pmdarima.auto_arima(df) if len(df) > 0 else None for df in data]
    arima_results.append(arima_res)

    garch_models = [arch_model(res.resid(), vol='Garch', p=1, q=1, lags=1) if res != None else None for res in arima_res]
    garch_res = [model.fit(disp="off") if model is not None else None for model in garch_models]
    garch_results.append(garch_res)

# forecast with the fitted model
mean_forecast = []
var_forecast = []
for arima, garch in tqdm(zip(arima_results, garch_results)):
    mean_forecast.append([np.mean(res.predict(n_periods=21)) if res != None else np.nan for res in arima])
    var_forecast.append([np.mean(res.forecast(horizon=21).variance) if res != None else np.nan for res in garch])

mean_df = pd.DataFrame(mean_forecast, index=month_end, columns=return_df.columns)
variance_df = pd.DataFrame(var_forecast, index=month_end, columns=return_df.columns)

# Calculate the weights
weight_df = mean_df.div(variance_df)
weight_df = weight_df.clip(lower=0).fillna(0)
weight_df = weight_df.div(weight_df.sum(axis=1), axis=0).dropna(how='all')
weight_df.to_csv('weight/timeseries.csv')

backtest = Backtest(weight_df)
backtest.run()
backtest.show()