In [3]:
# basic
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import time
import glob
import os
from numpy.linalg import inv
from bidict import bidict

# user-defined
import sys
sys.path.append('/Users/cheng/Google Drive/PhD/Research/Portfolio Selection via TBN/codes/')
from module import *
from module.backtesting import *
from module.agent_network import *
from module.environment import market_envrionment
from module.data_handler import *
import tensorflow as tf
tf.config.run_functions_eagerly(True)

# visulization
# import igraph
# import cairocffi
# import cairo
import matplotlib.pyplot as plt

# 
from scipy.sparse.csgraph import minimum_spanning_tree

## Notation
$y_{it}$: the return on stock $i$ during period $t$, with $1 \leq i \leq N, 1 \leq t \leq T$

$\bar{y}$: the sample average of the returns of stock $i$, that is $\bar{y}_{i}=T^{-1} \sum_{t=1}^{T} y_{i t}$

$\Sigma$: the population (or true) covariance matrix

$\sigma_{ij}$: entries of $\Sigma$

$S$: the sample covariance matrix

$s_{ij}$: entries of $S$

$r_{i j}=\frac{s_{i j}}{\sqrt{s_{i i} s_{j j}}}$

$\bar{r}=\frac{2}{(N-1) N} \sum_{i=1}^{N-1} \sum_{j=i+1}^{N} r_{i j}$


## 1. estimator for $\pi_T$
$\hat{\pi}_{T}:=\sum_{i=1}^{N} \sum_{j=1}^{N} \hat{\pi}_{i j}^{T} \quad$

$\hat{\pi}_{i j}=\frac{1}{T} \sum_{t=1}^{T}\left\{\left(y_{i t}-\bar{y}_{i}\right)\left(y_{j t}-\bar{y}_{j} .\right)-s_{i j}\right\}^{2}$


In [23]:
def get_pi(stock_returns):

    # init
    # symbols = stock_returns.columns
    y = stock_returns.values
    y_hat = np.average(y, axis=0)
    s = stock_returns.cov().values
    T = stock_returns.shape[0]
    N = stock_returns.shape[1]
    pi = []

    # calculation
    for i in range(N):
        for j in range(N):
            pi_ij = []
            for t in range(T):
                tmp = (y[t, j] - y_hat[j]) * (y[t, i] - y_hat[i]) - s[i, j]
                pi_ij.append(tmp ** 2)
            pi_ij = np.average(pi_ij)
            pi.append(pi_ij)
    pi = np.sum(pi)
    
    return pi

In [25]:
get_pi(x)

0.021773118708330652

## 2 estimator for $\gamma_T$
$\hat{\gamma}_{T}:=\sum_{i=1}^{N} \sum_{j=1}^{N}\left(f_{i j}^{T}-s_{i j}^{T}\right)^{2}$

In [48]:
def get_gamma(stock_returns, shrink_target_cor):

    # init
    # symbols = stock_returns.columns
    f = shrink_target_cor.values
    s = stock_returns.cov().values
    N = stock_returns.shape[1]
    D = x.std().values # (non-annualized) std
    f = np.diag(D) @ f @ np.diag(D)
    gamma_ij = []

    # calculation
    for i in range(N):
        for j in range(N):
            tmp = f[i, j] - s[i, j]
            gamma_ij.append(tmp ** 2)
    gamma = np.sum(gamma_ij)
    
    return gamma

In [49]:
get_gamma(x, y)

0.0005534544826441096

## 3. estimator for $\rho_T$
$\hat{\rho}=\sum_{i=1}^{N} \hat{\pi}_{i i}+\sum_{i=1}^{N} \sum_{j=1, j \neq i}^{N} \frac{\bar{r}}{2}\left(\sqrt{\frac{s_{j j}}{s_{i i}}} \hat{\vartheta}_{i i, i j}+\sqrt{\frac{s_{i i}}{s_{j j}} \hat{\vartheta}_{j j, i j}}\right)$

where

$\hat{\vartheta}_{i i, i j}=\frac{1}{T} \sum^{T}\left\{\left(y_{i t}-\bar{y}_{i}\right)^{2}-s_{i i}\right\}\left\{\left(y_{i t}-\bar{y}_{i} \cdot\right)\left(y_{j t}-\bar{y}_{j}\right)-s_{i j}\right\}$

$\hat{\vartheta}_{j j, i j}=\frac{1}{T} \sum^{T}\left\{\left(y_{j t}-\bar{y}_{j}\right)^{2}-s_{j j}\right\}\left\{\left(y_{i t}-\bar{y}_{i} .\right)\left(y_{j t}-\bar{y}_{j}\right)-s_{i j}\right\}$

In [66]:
def get_rho(stock_returns):
    # init
    y = stock_returns.values
    y_hat = np.average(y, axis=0)
    s = stock_returns.cov().values
    T = stock_returns.shape[0]
    N = stock_returns.shape[1]
    theta_i = np.zeros([N, N])
    theta_j = np.zeros([N, N])
    theta = np.zeros([N, N])
    r = np.zeros([N, N])
    pi = np.zeros([N, N])

    for i in range(N):
        for j in range(N):
            theta_ii = []
            theta_jj = []
            pi_ij = []
            for t in range(T):
                tmp = (y[t, j] - y_hat[j]) * (y[t, i] - y_hat[i]) - s[i, j]
                
                tmp_ii = ((y[t, i] - y_hat[i]) ** 2 - s[i, i]) * ((y[t, i] - y_hat[i]) * (y[t, j] - y_hat[j]) - s[i, j])
                tmp_jj = ((y[t, j] - y_hat[j]) ** 2 - s[j, j]) * ((y[t, i] - y_hat[i]) * (y[t, j] - y_hat[j]) - s[i, j])

                theta_ii.append(tmp_ii)
                theta_jj.append(tmp_jj)
                pi_ij.append(tmp ** 2)

            pi_ij = np.average(pi_ij)
            pi[i, j] = pi_ij

            r_ij = s[i, j] / np.sqrt(s[i, i] * s[j, j])
            r[i, j] = r_ij

            theta_ii = np.average(theta_ii)
            theta_jj = np.average(theta_jj)

            theta_i[i, j] = theta_ii
            theta_j[i, j] = theta_jj

    r_sum_off_diag = np.sum(r - np.diag(np.diag(r))) / 2
    r_bar = (2 / ((N - 1) * N)) * r_sum_off_diag

    for i in range(N):
        for j in range(N):
            tmp = (r_bar / 2) * (np.sqrt(s[j, j] / s[i, i]) * theta_i[i, j] + np.sqrt(s[i, i] / s[j, j]) * theta_j[i, j])
            theta[i, j] = tmp

    np.fill_diagonal(theta, 0)
    rho = np.sum(np.diag(pi)) + np.sum(theta)

    return rho

In [65]:
get_rho(x)

0.0028740878014524613

## 4. shrinkage intensity $\kappa$

$\hat{\kappa}=\frac{\hat{\pi}-\hat{\rho}}{\hat{\gamma}}$


In [71]:
def get_kappa(stock_returns, shrink_target):
    pi = get_pi(stock_returns)
    rho = get_rho(stock_returns)
    gamma = get_gamma(stock_returns, shrink_target)
    #T = stock_returns.shape[0]

    # bound
    #gamma_tilde = (pi - rho) / (T * nu)
    #gamma = np.max([gamma_tilde, 0])
    #gamma = np.min([gamma, 1])
    kappa = (pi - rho) / gamma

    return kappa 

In [68]:
bt = vectorized_backtesting(150)
db = data_handler()
x = bt.stocks_returns_aggregate.loc[2017]
y = bt.tbn_combined.loc[2017]

In [72]:
kappa = get_kappa(x, y)

In [74]:
kappa_list = [get_kappa(bt.stocks_returns_aggregate.loc[year], bt.tbn_combined.loc[year]) for year in range(1996, 2017)]
kappa_array = np.array(kappa_list) / 252
kappa_df = pd.DataFrame(kappa_array, index=range(1996, 2017))

## 5. Backtesting

In [98]:
class Shrink_to_TBN(vectorized_backtesting):
    '''
    Linear shrinkage method(ledoit) that shrink to TBN
    '''
    def get_portfolio(self, year):
        alpha = kappa_df.loc[year - 1].values[0] #unsafe
        covariance_shrunk = self.get_shrank_cov(correlation_matrix=self.correlation_aggregate.loc[year - 1].values,\
                                                shrink_target=self.tbn_combined.loc[year - 1].values,\
                                                volatility_vector=self.volatility_aggregate.loc[year - 1].values,
                                                a=alpha)
        portfolio = self.get_GMVP(covariance_matrix = covariance_shrunk)
        return portfolio

In [94]:
def get_backtesting_performance_vector(strategy_object: object) -> np.array:
    '''
    Given a portfolio stratgy, the Sharpe ratio and average turnover are returned
    '''

    # backtesting period
    year_start = 2001
    year_end = 2017

    # get backtesting time series
    portfolio_returns_vector = strategy_object.get_portfolio_daily_return(year_start, year_end)
    sharpe_ratio = strategy_object.get_sharpe_ratio()
    turnover_vector = strategy_object.get_turn_over_for_each_period()

    # get backtesting performance indicator
    #sharpe_ratio = np.mean(sharpe_ratio_vector)
    turnover = np.mean(turnover_vector)

    return np.array([sharpe_ratio, turnover])

In [85]:
import sys
sys.path.append('/Users/cheng/Google Drive/PhD/Research/Portfolio Selection via TBN/codes/')
from module.strategy import *
from itertools import chain
import nonlinshrink as nls
from sklearn.covariance import LedoitWolf
from sklearn.covariance import OAS
from scipy.stats import moment
import numpy as np
import pandas as pd

from module.data_handler import data_handler

In [101]:
# strategy agents
sample_size = 150
shrink_0_pct = Shrink_0(sample_size)
shrink_50_pct = Shrink_50(sample_size)
shrink_100_pct = Shrink_100(sample_size)
shrink_ledoit = Linear_shrink_ledoit(sample_size)

shrink_0_pct_tbn = Shrink_0_tbn(sample_size)
shrink_50_pct_tbn = Shrink_50_tbn(sample_size)
shrink_100_pct_tbn = Shrink_100_tbn(sample_size)
shrink_tbn = Shrink_to_TBN(sample_size)

# strategy vector
strategy_dict = {'shrink 0 pct': shrink_0_pct, 
                       'shrink 50 pct': shrink_50_pct,
                       'shrink 100 pct': shrink_100_pct, 
                       'shrink ledoit': shrink_ledoit, 
                       'shrink 0 pct tbn': shrink_0_pct_tbn, 
                       'shrink 50 pct tbn': shrink_50_pct_tbn, 
                       'shrink 100 pct tbn': shrink_100_pct_tbn, 
                       'shrink_tbn': shrink_tbn
}

# backtest strategies
backtest_performance_dict = {strategy_name: get_backtesting_performance_vector(strategy) for strategy_name, strategy in strategy_dict.items()}
backtest_performance_df = pd.DataFrame(backtest_performance_dict).T
backtest_performance_df.columns = ['Sharpe ratio', 'Turnover']
backtest_performance_df

Unnamed: 0,Sharpe ratio,Turnover
shrink 0 pct,0.803892,3.486161
shrink 50 pct,0.698904,0.242921
shrink 100 pct,0.677564,0.0
shrink ledoit,0.869363,2.248338
shrink 0 pct tbn,0.803892,3.486161
shrink 50 pct tbn,0.907298,0.980764
shrink 100 pct tbn,0.728605,0.242698
shrink_tbn,0.840168,3.009054


In [104]:
file_name = 'performance_table_150_new'
label = 'tbl:sample 150 new'
caption = 'Backtesting performance table on 150 companies from S\&P 500 new'

output_file_path = '/Users/cheng/Dropbox/Apps/Overleaf/Weekly Report Cheng/' + '/' + file_name + '.tex'
caption = r'\textbf{' + file_name + r'} \\ ' + caption
                
latex_table = backtest_performance_df.to_latex(output_file_path, 
                        float_format="%.3f", 
                        caption=caption, 
                        label=label,
                        position = 'h!')