# ESG Active RL Portfolio Management

In [None]:
import os
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from stable_baselines3 import DQN, DDPG

from alphaQ.utils import download_ticker_data, train_test_split, plot_episodes, sharpe, save_to_s3, load_from_s3
from alphaQ.env import PortfolioEnv
from alphaQ.agent.features import FeatureExtractor
from alphaQ.agent.callbacks import EvalCallback
from alphaQ.agent.utils import AgentStrategy, load_model, display_attributes
from alphaQ.eval import evalu8, evaluate_baselines

import config
import config as cfg
from config import MODELS, MODEL_PARAMS

warnings.filterwarnings('ignore')

rcParams = {
    'xtick.labelsize': 12,
    'ytick.labelsize': 12,
    'figure.figsize': (18, 9),
    'legend.fontsize': 13,
    'axes.labelsize': 14
}
plt.rcParams.update(rcParams)

In [None]:
# Experiment config
EXP_NAME = "esg_dqn_low_lr"  # Experiment name
RANDOM_SEED = 42

model = 'dqn'

train_episodes = 50  # Num pre-training episodes (20 DDPG, 30 DQN)

In [None]:
# Load ticker and market data, split into train, validation and test sets
esg_key = 'tradeGPT/prices/esg.pkl'
market_key = 'tradeGPT/prices/market.pkl'  # SPY

tickers = ["MSFT", "NVDA", "GOOGL", "LLY"]  # MSCI USA ESG Leaders Index (USD)
tickers = ["MSFT", "NVDA", "GOOGL", "LLY"]  # MSCI USA ESG Leaders Index (USD)

if False:
    data = download_ticker_data(tickers, start=config.START, end=config.END, columns=['Open', 'High', 'Low', 'Close']).dropna()
    save_to_s3(data, esg_key)
    market = download_ticker_data('SPY', start=config.START, end=config.END, columns=['Adj Close']).loc[data.index]
    save_to_s3(market, market_key)

In [None]:
# Load data
data = load_from_s3(esg_key)
market = load_from_s3(market_key)

train, val, test = train_test_split(data, train_years=12)
market_train, market_val, market_test = train_test_split(market, train_years=12)

In [None]:
action_space_type = config.ACTION_SPACE[model]

config.RENDER_ENV = False
config.RENDER_FREQ = 2

TICKERS = ["MSFT", "NVDA", "GOOGL", "LLY"]  # MSCI USA ESG Leaders Index (USD)
# tickers = [""]  # S&P Global Clean Energy Index (argue that this is down recently, we need a way to actively trade these stocks)

# Train and val environments
env = PortfolioEnv(
    tickers=tickers,
    prices=train,
    market_prices=market,
    window_length=config.WINDOW_LENGTH,
    trading_cost=config.COMMISSION_RATE,
    action_space_type=action_space_type,
    render=config.RENDER_ENV,
)
val_env = PortfolioEnv(
    tickers=tickers,
    prices=val, 
    market_prices=market_val,
    window_length=config.WINDOW_LENGTH,
    trading_cost=config.COMMISSION_RATE,
    action_space_type=action_space_type,
    render=config.RENDER_ENV,
    render_mode='val'
)
test_env = PortfolioEnv(
    tickers=tickers,
    prices=test, 
    market_prices=market_test,
    window_length=config.WINDOW_LENGTH,
    trading_cost=config.COMMISSION_RATE,
    action_space_type=action_space_type,
)

In [None]:
# Load up model args
model_params = MODEL_PARAMS[model]

# Set neural network parameters
policy_kwargs = {
    'features_extractor_class': FeatureExtractor,
    'features_extractor_kwargs': {
        'features_dim': 16 * model_params['multiplier'] * 4 + 5,
        'multiplier': model_params['multiplier']
    },
    'net_arch': model_params['net_arch'],
    'optimizer_kwargs': {
#         'weight_decay': 5e-9,  # uncomment to use ridge regularisation
    },
}

# load model hyperparameters
model_kwargs = dict(model_params['hyperparams'])
# add exploration params
model_kwargs.update(model_params['exploration'])

# set up action noise (for DDPG)
if 'action_noise' in model_kwargs:
    # determine dimension of action space
    n_actions = env.action_space.shape[-1]
    # extract noise sigma from params
    sigma = model_kwargs.pop('noise_sigma')
    model_kwargs['action_noise'] = config.ACTION_NOISE[model_kwargs['action_noise']](
        mean=np.zeros(n_actions), 
        sigma= sigma * np.ones(n_actions)
    )

model_kwargs.get('action_noise')

In [None]:
# Early stopping. Is kind of already implemented. I think we just cap at 20 episodes,
# then say as a limitation, we could try training extremely long runs. Thing is, training is just hella unstable.

In [None]:
# Train agent from scratch
best_model_save_path = f'{config.SAVE_PATH}/{EXP_NAME}/best_model'
episode_length = env.prices.shape[0] - config.WINDOW_LENGTH

eval_callback = EvalCallback(
    eval_env=val_env, 
    n_eval_episodes=1,
    eval_freq=episode_length,
    log_path=config.LOG_PATH,
    best_model_save_path=best_model_save_path,
    verbose=config.CALLBACK_VERBOSE_LEVEL,
    warn=False,
)
agent = MODELS[model](
    env=env,
    policy_kwargs=policy_kwargs,
    verbose=config.TRAIN_VERBOSE_LEVEL, 
    seed=config.RANDOM_SEED,
    **model_kwargs,
)

agent.learn(total_timesteps=train_episodes*episode_length, callback=eval_callback)

In [None]:
# Learning curves
pd.DataFrame(env.record.episodes).plot()
pd.DataFrame(val_env.record.episodes).plot()