In [1]:
import os
import torch
import numpy as np
import pandas as pd
from torcheval.metrics import R2Score
from utils.metrics import metric

In [8]:
test_result_dir = "./long_short_pred_results"
fnames = os.listdir(test_result_dir)
fnames.sort()

In [26]:
def calc_r2(preds, trues):
    metrics = R2Score()
    input = torch.tensor(preds.flatten())
    target = torch.tensor(trues.flatten())  
    metrics.update(input, target)
    
    return metrics.compute()

In [52]:
r2_rows = []
cols = ['yearmonth', 'm1', 'm2', 'm3', 'all']

for fn in fnames:
    res_df = pd.read_csv(os.path.join(test_result_dir, fn))
    months = sorted(res_df['yearmonth'].unique())

    r2_row = [months[0]]

    for month in months:
        preds = res_df.loc[res_df['yearmonth'] == month, 'mean'].to_numpy()
        trues = res_df.loc[res_df['yearmonth'] == month, 'exret'].to_numpy()
        
        r2_row.append(calc_r2(preds, trues))
    
    preds = res_df['mean'].to_numpy()
    trues = res_df['exret'].to_numpy()
    r2_row.append(calc_r2(preds, trues))

    r2_rows.append(r2_row)


In [57]:
all_res = pd.DataFrame(r2_rows, columns=cols)
all_res.sort_values(by=['all'], ascending=False).to_csv('r2_monthly_res.csv', index=False)

In [42]:
res_df = pd.read_csv("./long_short_pred_results/results_2014-11-15_2015-01-15.csv")
preds = res_df['mean'].to_numpy()
trues = res_df['exret'].to_numpy()


In [47]:
sorted(res_df['yearmonth'].unique())

['2014-11', '2014-12', '2015-01']

In [43]:
input = torch.tensor(preds.flatten())
target = torch.tensor(trues.flatten())
target

tensor([-3.3200e-02, -1.4000e-02, -1.0900e-02, -1.4959e-03,  8.7218e-03,
        -8.0100e-02,  1.3120e-01,  1.4914e-02, -2.2079e-02, -3.2838e-02,
        -4.0000e-04,  5.6600e-02, -2.0000e-03,  4.2194e-04, -1.3700e-02,
        -4.5000e-03, -5.6598e-02, -6.0649e-02, -5.1700e-02, -1.9200e-02,
        -1.2600e-02, -1.5000e-02, -4.8588e-04, -1.1248e-02, -7.6500e-04,
        -4.6300e-02, -6.1000e-02,  1.6300e-02, -2.1800e-02,  2.4530e-02,
        -6.0000e-02, -1.7700e-02, -5.2000e-03, -2.4000e-02,  2.0400e-02,
         3.5030e-02, -5.6172e-03, -1.3000e-02,  1.1700e-02,  6.6000e-03,
         5.0072e-02,  7.0000e-03,  7.0000e-03, -2.8981e-03, -2.2100e-02,
        -5.8800e-02, -5.9200e-02,  7.3000e-03, -3.5800e-02, -3.5800e-02,
        -3.7500e-02, -3.7500e-02,  3.1200e-02, -3.5066e-02,  2.4000e-03,
        -3.5600e-02,  7.0000e-03, -1.6620e-02,  4.1413e-02, -1.5800e-02,
        -7.1000e-03, -4.9600e-02, -1.0900e-02,  5.9000e-03, -4.3675e-02,
         3.6312e-02,  1.8800e-02,  2.4000e-03, -1.0

In [44]:
metrics = R2Score()
input = torch.tensor(preds.flatten())
target = torch.tensor(trues.flatten())
metrics.update(input, target)
print(metrics.compute())

tensor(-0.0819)


In [29]:
# 23 features, sample data, multivariate-predict single, on excess return
preds = np.load('results/{}/pred.npy'.format(test_res_path2))
trues = np.load('results/{}/true.npy'.format(test_res_path2))
mae, mse, _, _, _ = metric(preds[:, :, -1], trues[:, :, -1])

In [30]:
for i in range(preds.shape[2]):
    metrics = R2Score()
    input = torch.tensor(preds[:,:,i].flatten())
    target = torch.tensor(trues[:,:,i].flatten())
    # target = torch.tensor(trues.flatten())
    metrics.update(input, target)
    print(metrics.compute())

tensor(-0.0621)


In [35]:
# 10 features, sample data, multivariate-predict multi, on excess return

add_cols = [ 
            # 'ret',
        'aum', 
        'SENT_', 
        # 'PTFSBD', 
        # 'PTFSFX', 
        # 'PTFSCOM', 
        # 'em', 
        # 'sp500', 
        # 'sizespread', 
        # 'bondmkt', 
        # 'creditspread',
        # 'SMB', 'HML','RF', 'mom', 
        # 'con','ipg','tfp','term','def','dei','mkt','lab',
        'confeature', 'tfpfeature', 'ipgfeature', 'termfeature', 'deffeature', 'deifeature', 'mktfeature', 'labfeature',
        'exret',
        ]

preds = np.load('results/{}/pred.npy'.format(test_res_path3))
trues = np.load('results/{}/true.npy'.format(test_res_path3))
mae, mse, _, _, _ = metric(preds[:, :, -1], trues[:, :, -1])

In [36]:
for i in range(preds.shape[2]):
    metrics = R2Score()
    input = torch.tensor(preds[:,:,i].flatten())
    target = torch.tensor(trues[:,:,i].flatten())
    # target = torch.tensor(trues.flatten())
    metrics.update(input, target)
    print(metrics.compute(), add_cols[i])

tensor(0.7751) aum
tensor(0.6990) SENT_
tensor(-0.0483) confeature
tensor(0.0119) tfpfeature
tensor(-0.0597) ipgfeature
tensor(0.4115) termfeature
tensor(0.0093) deffeature
tensor(-0.0888) deifeature
tensor(-0.0922) mktfeature
tensor(0.1613) labfeature
tensor(-0.0634) exret
