In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import yfinance as yf
from collections import namedtuple
from datetime import date, timedelta

from algo.sde.ornstein_uhlenbeck_optimisation import OptimiserOU
from algo.sde.ornstein_uhlenbeck_parameters import HedgeParamsOU, ModelParamsOU
from etl.yfinance_data import get_pairs_data
from execution.positions import compute_positions, compute_returns
from performance.sharpe import sharpe_ratio_log


sns.set_style("darkgrid")

In [7]:
## Get all data

# 730 days is the max, includes today.
num_data_full = 24*729

# Size of initial training set. Note: 23 trading days per month.
# num_train_initial = 24*23*12  # 1 year
num_train_initial = 24*23*6  # 6 months

# Size of each test period. Note: 1 futures trading week = 6 days. TODO: start on Sunday?
num_test_window = 24*6


# Short ranges for testing implementation
# num_data_full = 200
# num_train_initial = 48
# num_test_window = 48


# Instruments and dates
ticker1 = "BZ=F"
ticker2 = "CL=F"
end_date = date.today()
start_date = end_date - timedelta(hours=num_data_full)
df_full = get_pairs_data(ticker1, ticker2, start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"), interval="1h")

print(f"\nDates Requested: {start_date} to {end_date}")
print(f"Dates Received: {df_full.index[0]} to {df_full.index[-1]}\n")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Dates Requested: 2022-11-16 to 2022-11-24
Dates Received: 2022-11-15 19:00:00-05:00 to 2022-11-23 18:00:00-05:00



In [8]:
## Fit model on initial training data

# Model Input Parameters
dt = 1   # Let one unit of time be equivalent to one step in the data.
A = 1.0  # For every $A long/short in asset1, we go $B short/long in asset2.

# Entry/Exit Signals - Z-score
z_entry = 1.0
z_exit = 0.25

# TODO: back-calculate (data_size_full-train_size_initial) % test_size_window
start_train_index = 0
end_train_index = start_train_index + num_train_initial
end_test_index = end_train_index + num_test_window

In [9]:
all_returns = []

while end_test_index <= num_data_full:

    # Build datasets
    df_train = df_full.iloc[start_train_index : end_train_index].copy()
    
    # Test window, e.g. 1 week.
    # start_test_index = start_train_index + num_train_initial
    # end_test_index = start_test_index+num_test_window
    # df_test = df_full.iloc[start_test_index : end_test_index]
    # start_test_index = start_train_index + num_train_initial
    # end_test_index = start_test_index+num_test_window
    df_test = df_full.iloc[end_train_index : end_test_index].copy()

    # Train Model
    optimiser_train = OptimiserOU(A=A, dt=dt)
    hp_train, _ = optimiser_train.optimise(asset1=df_train["S1"].to_numpy(), asset2=df_train["S2"].to_numpy())
    alpha = hp_train.alpha
    beta = hp_train.beta

    ## Evaluate performance - TODO: add cointegration pre-trade checks.

    # Compute spreads using (alpha, beta) calibrated in training.
    df_train["spread"] = alpha*df_train["S1"] - beta*df_train["S2"]
    df_test["spread"] = alpha*df_test["S1"] - beta*df_test["S2"]

    # Equiv. df_full.iloc[start_train_index : end_test_index]
    df_train_test = pd.concat([df_train, df_test])
    
    # Temporary join to roll the expanding metrics
    df_train_test["zscore_expanding"] = (df_train_test["spread"] - df_train_test["spread"].expanding().mean()) / df_train_test["spread"].expanding().std()

    df_train = df_train_test.iloc[0:len(df_train)].copy()
    df_test = df_train_test.iloc[len(df_train):].copy()


    # Plot expanding
    # xmin=df_train.index[0]
    # xmax=df_test.index[-1]
    # plt.figure(figsize=(12, 4))
    # plt.plot(df_train.index, df_train["zscore_expanding"], color="blue", label="spread_train")
    # plt.plot(df_test.index, df_test["zscore_expanding"], color="orange", label="spread_test")
    # plt.hlines(z_entry, label="short", colors="red", linestyle="dotted", xmin=xmin, xmax=xmax)
    # plt.hlines(-z_entry, label="long", colors="green", linestyle="dotted", xmin=xmin, xmax=xmax)
    # plt.hlines(z_exit, label="exit", colors="blue", linestyle="dotted", xmin=xmin, xmax=xmax)
    # plt.hlines(-z_exit, label="exit", colors="blue", linestyle="dotted", xmin=xmin, xmax=xmax)
    # plt.legend()
    # plt.title("Expanding Metrics on Z-Score")
    # plt.show()
    
    ## Save plots: name contains date of W/C.

    ## Do trades

    # Generate entry/exit signals
    threshold_col = "zscore_expanding"
    df_test["long"] = 1.0 * (df_test[threshold_col] <= -z_entry)
    df_test["short"] = 1.0 * (df_test[threshold_col] >= z_entry)
    df_test["exit"] = 1.0 * (np.abs(df_test[threshold_col]) <= z_exit)
    
    df_test = df_test.pipe(compute_positions)
    df_test = df_test.pipe(compute_returns)

    # df_test[["returns_cml", "returns_cml_S1", "returns_cml_S2"]].plot()
    # plt.title("Test Set")
    # plt.show()
    
    # sharpe_ratio = sharpe_ratio_log(df_test)
    # sharpe_ratio_annual = sharpe_ratio*np.sqrt(23*252)
    # print(f"Sharpe Ratio Test Annual = {sharpe_ratio_annual}")
    
    # df_all_returns = pd.concat([df_all_returns, df_test])
    all_returns.extend(df_test["total"].tolist())
    
    ## After test window num_data points elapses, add to training set
    end_train_index += num_test_window
    end_test_index += num_test_window
    
    
df_all_returns = pd.DataFrame(columns=["total"])
df_all_returns["total"] = all_returns
sharpe_ratio = sharpe_ratio_log(df_all_returns, colname="total")
sharpe_ratio_annual = sharpe_ratio*np.sqrt(23*252)
print(f"Sharpe Ratio Test Annual = {sharpe_ratio_annual}")

48
54
54
Sharpe Ratio Test Annual = 7.853438386890733


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
