In [None]:
import sys
from pathlib import Path

# Add root folder to Python path (to import modules)
notebook_dir = Path().absolute()
project_root = notebook_dir.parent
sys.path.append(str(project_root))

In [None]:
from os import listdir
from os.path import isfile, join
import pandas as pd
import torchmetrics as tm
import torch
import json
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import sys,os

from src.portfolios.portfolio_new import Portfolio

In [None]:
resultsPath = '../saved/results_old/'

In [None]:
GuEnsemblePath = resultsPath + 'GuEnsemble/'
predicted_ret_path = GuEnsemblePath + 'predicted_returns/'
trial_info_path = GuEnsemblePath + 'trial_info/'

In [None]:
predicted_ret_files = [f for f in listdir(predicted_ret_path) if isfile(join(predicted_ret_path, f))]
trial_info_files = [f for f in listdir(trial_info_path) if isfile(join(trial_info_path, f))]

In [None]:
df = pd.read_csv(predicted_ret_path + '/' + predicted_ret_files[0], index_col=0)
import models.neural_net.metric as metric
import data.data_preprocessing as dp

print(f'R2 with predicted_ret file: {metric.calc_r2(df)}')
crsp = dp.load_crsp()
df.drop('ret', axis=1, inplace=True)
df.merge(crsp[['permno','yyyymm','ret']], on=['permno','yyyymm'], how='left')
metric.calc_r2(df)

In [None]:
returns = pd.DataFrame()

for idx, file in enumerate(predicted_ret_files):
    df = pd.read_csv(predicted_ret_path + '/' + file, index_col=0)
    df = df.rename({'predicted_ret': 'predicted_ret_'+str(idx)}, axis=1)
    if idx == 0:
        # returns = returns.reindex(columns=df.columns.tolist())
        returns = pd.concat([returns, df], axis=1)

    else:
        returns = pd.concat([returns, df.iloc[:,-1]], axis=1)

In [None]:
returns['predicted_ret'] = returns.iloc[:,3:].mean(axis=1)

In [None]:
returns = returns[['permno','yyyymm','ret', 'predicted_ret']]

In [None]:
import torchmetrics
import torch

spear = torchmetrics.SpearmanCorrCoef()
spearman = spear(torch.tensor(returns['ret']),torch.tensor(returns['predicted_ret']))

In [None]:
print(f'Spearman correlation coefficient: {spearman.item():.4f}')

In [None]:
# Remove the plot frame lines. They are unnecessary chartjunk.
# These are the "Tableau 20" colors as RGB.  
tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),  
             (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),  
             (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),  
             (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),  
             (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]  

# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.  
for i in range(len(tableau20)):  
    r, g, b = tableau20[i]  
    tableau20[i] = (r / 255., g / 255., b / 255.)  

# You typically want your plot to be ~1.33x wider than tall. This plot is a rare  
# exception because of the number of lines being plotted on it.  
# Common sizes: (10, 7.5) and (12, 9)  
plt.figure(figsize=(8, 5.5))

# Remove the plot frame lines. They are unnecessary chartjunk.  
ax = plt.subplot(111)  
ax.spines["top"].set_visible(False)  
ax.spines["bottom"].set_visible(True)  
ax.spines["right"].set_visible(False)  
ax.spines["left"].set_visible(True)  

ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()

plt.title("Stock returns distribution (actual and forecasted)\n", fontsize=22)


# Distribution of actual returns and predicted returns
bins = np.linspace(-30, 30, 100)
x1 = returns['predicted_ret']
x2 = returns['ret']
plt.hist(x1, bins, alpha=0.5, label='NN4', color=tableau20[3])
plt.hist(x2, bins, alpha=0.5, label='Actual', color=tableau20[5])
plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.xticks(range(-30, 30, 5), fontsize=13)
plt.yticks(range(0, 120000, 20000), fontsize=13)
plt.legend(loc='upper right', fontsize=14)
plt.xlabel("Monthly stock returns (%)", fontsize=15)
plt.ylim(1, 90000)

plt.tight_layout()
plt.show()

# bbox_inches="tight" removes all the extra whitespace on the edges of your plot.
# plt.savefig("Returns_distribution.png", bbox_inches="tight")


# Portfolio calculation

In [None]:
portfolioVW = Portfolio(returns, weighting='VW')
# portfolioEW = Portfolio(returns, weighting='EW')

In [None]:
pret = portfolioVW.returns

In [None]:
from portfolios.market_portfolio import MarketPortfolio
mp = MarketPortfolio()

In [None]:
mkt = mp.mkt_returns
pret = pret.merge(mkt, on='yyyymm', how='left')

In [None]:
T = pret.count()[1]
alpha_market_long = ((((pret.iloc[:,-3] - pret.iloc[:,-1])/100+1)**(12/T)).prod())-1
alpha_market_long_short = ((((pret.iloc[:,-2] - pret.iloc[:,-1])/100+1)**(12/T)).prod())-1
        

In [None]:
(((pret.iloc[:,-2] - pret.iloc[:,-1])/100+1)**(12/T)).prod()

In [None]:
alpha_market_long_short

In [None]:
portfolioVW.plot_cumulative_returns(path=None)

In [None]:
portfolioVW.regress_on_FF5FM(method='long')

In [None]:
portfolioVW._calculate_metrics()