In [5]:
import pandas as pd
import yfinance as yf

tickers = ["SPY"]
data = yf.download(tickers, start="2010-01-01", end="2020-12-31")["Adj Close"]
retuns = data.pct_change().dropna()

[*********************100%%**********************]  1 of 1 completed


In [2]:
data

Date
2010-01-04     86.860092
2010-01-05     87.090019
2010-01-06     87.151329
2010-01-07     87.519218
2010-01-08     87.810448
                 ...    
2020-12-23    350.511139
2020-12-24    351.874725
2020-12-28    354.897614
2020-12-29    354.220551
2020-12-30    354.725891
Name: Adj Close, Length: 2768, dtype: float64

In [3]:
retuns

Date
2010-01-05    0.002647
2010-01-06    0.000704
2010-01-07    0.004221
2010-01-08    0.003328
2010-01-11    0.001396
                ...   
2020-12-23    0.000899
2020-12-24    0.003890
2020-12-28    0.008591
2020-12-29   -0.001908
2020-12-30    0.001427
Name: Adj Close, Length: 2767, dtype: float64

### 2. Compute Variance-Covariance Matrix

Next, we need to understand how the returns of different assets move in relation to each other. We use a statistical model called GARCH DCC to compute a matrix that captures these relationships. This matrix helps us understand the risk associated with holding different combinations of assets.

In [14]:
import pandas as pd
import numpy as np
import yfinance as yf
from arch import arch_model
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [29]:
def fetch_data(tickers, start_date, end_date):
    data = yf.download(tickers, start=start_date, end=end_date)
    return data['Adj Close']

tickers = ['SPY']
start_date = '2010-01-01'
end_date = '2023-01-01'

data = fetch_data(tickers, start_date, end_date)
returns = data.pct_change().dropna()


[*********************100%%**********************]  1 of 1 completed


In [25]:
returns = returns.astype(float)

In [30]:
def fit_garch(returns):
    models = []
    residuals = []
    for column in returns.columns:
        model = arch_model(returns[column], vol='Garch', p=1, q=1)
        res = model.fit(disp='off')
        models.append(res)
        residuals.append(res.resid)
    residuals = np.array(residuals).T
    return models, residuals

models, residuals = fit_garch(returns)


AttributeError: 'Series' object has no attribute 'columns'

In [18]:
def fit_dcc(residuals):
    T, N = residuals.shape
    Qbar = np.cov(residuals, rowvar=False)
    Qt = Qbar.copy()
    Rt = np.zeros((T, N, N))
    a = 0.01
    b = 0.98

    for t in range(T):
        rt = residuals[t].reshape(-1, 1)
        Qt = (1 - a - b) * Qbar + a * (rt @ rt.T) + b * Qt
        Rt[t] = Qt / np.sqrt(np.diag(Qt).reshape(-1, 1) @ np.diag(Qt).reshape(1, -1))

    return Rt

Rt = fit_dcc(residuals)


In [19]:
def compute_cov_matrix(models, Rt):
    T, N, _ = Rt.shape
    cov_matrices = np.zeros((T, N, N))
    for t in range(T):
        D = np.diag([model.conditional_volatility[t] for model in models])
        cov_matrices[t] = D @ Rt[t] @ D
    return cov_matrices

cov_matrices = compute_cov_matrix(models, Rt)


  D = np.diag([model.conditional_volatility[t] for model in models])


In [20]:
def compute_nrp_weights(cov_matrix):
    inv_var = 1 / np.diag(cov_matrix)
    weights = inv_var / np.sum(inv_var)
    return weights

def compute_hrp_weights(cov_matrix):
    weights = np.ones(len(cov_matrix)) / len(cov_matrix)
    return weights


In [21]:
def compute_features(returns, cov_matrix, weights_nrp, weights_hrp):
    features = {}
    features['mean_return'] = np.mean(returns, axis=0)
    features['std_return'] = np.std(returns, axis=0)
    features['nrp_volatility'] = np.sqrt(np.dot(weights_nrp.T, np.dot(cov_matrix, weights_nrp)))
    features['hrp_volatility'] = np.sqrt(np.dot(weights_hrp.T, np.dot(cov_matrix, weights_hrp)))
    return pd.Series(features)

features = compute_features(returns, cov_matrices[-1], compute_nrp_weights(cov_matrices[-1]), compute_hrp_weights(cov_matrices[-1]))


In [22]:
lookback_period = 252  # One year of daily returns
X = []  # Feature vectors
y = []  # Sharpe ratio spreads

for t in range(len(returns) - lookback_period):
    historical_returns = returns.iloc[t:t+lookback_period]
    cov_matrix = cov_matrices[t]
    weights_nrp = compute_nrp_weights(cov_matrix)
    weights_hrp = compute_hrp_weights(cov_matrix)
    features = compute_features(historical_returns, cov_matrix, weights_nrp, weights_hrp)
    
    # Debugging to ensure features are numeric
    for key, value in features.items():
        print(f"Feature {key}: {value}, Type: {type(value)}")
    
    X.append(features)
    sharpe_nrp = np.mean(historical_returns @ weights_nrp) / np.std(historical_returns @ weights_nrp)
    sharpe_hrp = np.mean(historical_returns @ weights_hrp) / np.std(historical_returns @ weights_hrp)
    y.append(sharpe_hrp - sharpe_nrp)

X = pd.DataFrame(X).astype(float)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = XGBRegressor(objective='reg:squarederror')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print('Test MSE:', mean_squared_error(y_test, y_pred))


Feature mean_return: Ticker
AGG    0.000238
QQQ    0.000806
SPY    0.000594
dtype: float64, Type: <class 'pandas.core.series.Series'>
Feature std_return: Ticker
AGG    0.002367
QQQ    0.012140
SPY    0.011242
dtype: float64, Type: <class 'pandas.core.series.Series'>
Feature nrp_volatility: 0.0022314623828830807, Type: <class 'numpy.float64'>
Feature hrp_volatility: 0.005492109369001057, Type: <class 'numpy.float64'>
Feature mean_return: Ticker
AGG    0.000222
QQQ    0.000803
SPY    0.000581
dtype: float64, Type: <class 'pandas.core.series.Series'>
Feature std_return: Ticker
AGG    0.002351
QQQ    0.012140
SPY    0.011241
dtype: float64, Type: <class 'pandas.core.series.Series'>
Feature nrp_volatility: 0.002610926596352736, Type: <class 'numpy.float64'>
Feature hrp_volatility: 0.06440445796685804, Type: <class 'numpy.float64'>
Feature mean_return: Ticker
AGG    0.000205
QQQ    0.000861
SPY    0.000599
dtype: float64, Type: <class 'pandas.core.series.Series'>
Feature std_return: Ticker
A

ValueError: setting an array element with a sequence.