-
Notifications
You must be signed in to change notification settings - Fork 1
/
experiment.py
115 lines (106 loc) · 5.06 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import pandas as pd
import numpy as np
import scipy
import time
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
from config import Configuration
from utils import *
from model import *
class DR_Markowitz_Experiment(object):
def __init__(self, data_path):
self.data_path = data_path
self.window = 12 # Try to evaluate annual return
self.n = 109 # self.n: length of training periods
self.d = 20 # self.d: total number of assets
def load_returns(self, seed, month):
window = self.window
train_path = self.data_path + 'Data/DataW/Data/seed' + str(seed) + '/NDDataPlay' + str(month) + '.csv'
test_path = self.data_path + 'Data/DataW/Datatest/seed' + str(seed) + '/NDDataPlay' + str(month) + '.csv'
prices_train_pd = pd.read_csv(train_path, delimiter=',', index_col='Unnamed: 0')
prices_test_pd = pd.read_csv(test_path, delimiter=',', index_col='Unnamed: 0')
n = prices_train_pd.shape[0] # total number of instances of returns
prices_train = prices_train_pd.astype(np.float32).values
prices_test = prices_test_pd['x'].astype(np.float32).values
returns_train = (prices_train[window:,:] - prices_train[:n-window,:])\
/ prices_train[:n-window, :] # compute the annual return
returns_test = (prices_test - prices_train[-1,:]) \
/ prices_train[-1,:]
self.n, self.d = returns_train.shape
return returns_train, returns_test
def load_implied_vol(self, month):
window = self.window
path = self.data_path + 'Data/VIX.csv'
vol_data = pd.read_csv(path, delimiter=',')
vol_open_data = vol_data['Open']
vol_train = vol_open_data[(month+window):month+121]
return vol_train
def get_mahalanobis_matrix(self, seed, month, method = 'local'):
vol_train = self.load_implied_vol(month)
returns_train, returns_test = self.load_returns(seed, month)
if method == 'local':
A = np.stack([(np.mean(vol_train)/vol) * np.eye(self.d) for vol in vol_train], axis = 2)
elif method == 'covariance':
A = np.stack([(np.mean(vol_train)/vol) * np.linalg.inv(np.cov(returns_train.T)) for vol in vol_train], axis = 2)
else:
A = np.stack([np.eye(self.d) for vol in vol_train], axis = 2)
return A
def one_step_portfolio_return(self, seed, month, method = 'local'):
returns_train, returns_test = self.load_returns(seed, month)
A = self.get_mahalanobis_matrix(seed, month, method = method)
model = DistRobustMarkowitz(X = returns_train, A = A)
model.optimize(verbose = False, grad_method = 'analytic')
one_step_return = returns_test.dot(model.beta)
return one_step_return
def portfolio_return(self, method = 'local'):
seed_df = pd.read_csv(self.data_path+'Data/LinHistogramDataTangent.csv', delimiter=',')
seeds = seed_df['Seed'][0:100]
cores = cpu_count()
p = Pool(cores)
result = []
# Testmonth is the number of month in the time period 2000-2017. In our case testmonth=12*17=204.
testmonth = pd.read_csv(self.data_path+"Data/Nummonth.csv", delimiter=',')
testmonth = testmonth['x'][0]
pbar = tqdm(total=(len(seeds) * testmonth), leave = False)
def update_pbar(*a):
pbar.update()
for idx,seed in enumerate(seeds):
for month in range(1, testmonth + 1):
result.append(p.apply_async(self.one_step_portfolio_return, args=(seed, month, method),
callback = update_pbar))
p.close()
p.join()
pbar.close()
returns_mat = []
for res in result:
returns_mat.append(res.get())
returns_mat = np.array(returns_mat).reshape(len(seeds), testmonth)
return returns_mat
def one_step_simulated_return(self, mean, cov, method = 'constant'):
A = self.get_mahalanobis_matrix(1, 1, method = method)
np.random.seed(int(time.time() * 1000) % (2 ** 31))
exp_sqrt_cov = scipy.linalg.sqrtm(cov)
exp_train = exp_sqrt_cov.dot(np.random.randn(20,109)).T + mean
exp_test = exp_sqrt_cov.dot(np.random.randn(20)).T + mean
model = DistRobustMarkowitz(X = exp_train, A = A)
model.optimize(verbose = False, grad_method = 'analytic')
one_step_return = exp_test.dot(model.beta)
return one_step_return
def simulated_portfolio_return(self, mean, cov, method = 'constant'):
num_exp = 1000
cores = cpu_count()
p = Pool(cores)
result = []
pbar = tqdm(total=num_exp, leave = False)
def update_pbar(*a):
pbar.update()
for i in range(num_exp):
result.append(p.apply_async(self.one_step_simulated_return, args=(mean, cov, method),
callback = update_pbar))
p.close()
p.join()
pbar.close()
returns_mat = []
for res in result:
returns_mat.append(res.get())
return returns_mat