In [None]:
# NEED TO INSTALL THE FOLLOWING WITH PIP FIRST:

"""
wrds,
swig,
finrl==0.3.5
elegantrl==0.3.3
git+https://github.com/AI4Finance-Foundation/FinRL.git
"""

In [None]:


from stable_baselines3.common.logger import configure


import sys
sys.path.append("../FinRL")



In [None]:
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
)
check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])



In [None]:
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader

tickers_list = ['MSFT', 'AAPL', 'CAT', 'CSCO', 'NKE']

df = YahooDownloader(start_date = '2012-01-01',
                     end_date = '2023-10-31',
                     ticker_list = tickers_list).fetch_data()

In [None]:
print(tickers_list)

In [None]:
df.shape

In [None]:
# df.sort_values(['date','tic'],ignore_index=True).head()
df.sort_values(['date'],ignore_index=True)

In [None]:
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split


feature_engineer = FeatureEngineer(
    use_technical_indicator=True,
    tech_indicator_list = ['macd',
    'boll_ub',
    'boll_lb',
    'rsi_30',
    'cci_30',
    'dx_30',
    'close_30_sma',
    'close_60_sma'],
    use_vix=True,
    use_turbulence=True,
    user_defined_feature = False)

states_df = feature_engineer.preprocess_data(df)

In [None]:
from itertools import product
import pandas as pd
dates = list(pd.date_range(states_df['date'].min(),states_df['date'].max()).astype(str))

preprocessed_df = pd.DataFrame(list(product(dates,tickers_list)),columns=["date","tic"])
preprocessed_df = preprocessed_df.merge(states_df,how="left",on=["date","tic"],)
preprocessed_df = preprocessed_df[preprocessed_df['date'].isin(states_df['date'])]
preprocessed_df = preprocessed_df.sort_values(['date','tic'])

preprocessed_df = preprocessed_df.fillna(0)



In [None]:
preprocessed_df.describe()

In [None]:
train = data_split(preprocessed_df, '2012-01-01','2020-07-01')
test = data_split(preprocessed_df, '2020-07-01','2023-10-31')


In [None]:
train

In [None]:
test

In [None]:
stock_size = len(tickers_list)
state_space = 1 + 2*stock_size + len(['macd',
    'boll_ub',
    'boll_lb',
    'rsi_30',
    'cci_30',
    'dx_30',
    'close_30_sma',
    'close_60_sma'])*stock_size

# 8 indictor/price features PER stock (there's 5 here)
# Plus, the raw price data is captured again as 2 extra features per stock (typically Open and Close price).
# So + 2N state variables
# Hence 8*5 (indicators) + 2*5 (raw price) + 1 (offset variable)

stock_size, state_space

In [None]:
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv


training_environment, initial_observations = StockTradingEnv(df = train, hmax= 100,
    initial_amount= 1000000, # STARTING AMOUNT HERE
    num_stock_shares= [0] * stock_size,
    buy_cost_pct= [.1/100] * stock_size, # Transaction fee percent of buys per stock
    sell_cost_pct= [.1/100] * stock_size,
    state_space= state_space,
    stock_dim= stock_size, # Stock dimensions
    tech_indicator_list= ['macd',
    'boll_ub',
    'boll_lb',
    'rsi_30',
    'cci_30',
    'dx_30',
    'close_30_sma',
    'close_60_sma'],
    action_space= stock_size, 
    reward_scaling= 1e-4).get_sb_env() 
# creates a vectorized environment compatible with Stable Baselines algorithms
# uses DummyVecEnv from Stable Baselines to create a vectorized wrapper of the trading env
# wraps the env in a Vectorized environment that handles all the multiprocessing - steps, resets etc.
# It calls reset() on the vectorized env to get the initial observations
"""
The training process involves observing stock price change, taking an action and reward's calculation. By interacting with the market environment, the agent will eventually derive a trading strategy that may maximize (expected) rewards.

Our market environment, based on OpenAI Gym, simulates stock markets with historical market data.
"""


In [None]:
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, get_baseline


In [None]:

"""
# Automatically build models from list of model names
agent = DRLAgent(training_environment)
models=[]
def log_model(model_name):
    print(model_name)
    model = agent.get_model(f"{model_name}")
    model.set_logger(configure(RESULTS_DIR + f'/{model_name}', ["stdout", "csv", "tensorboard"]))
    models.append((model_name, model))

model_names=['a2c'
            ,'ddpg'
            # ,'ppo'
            # ,'sac'
            ]
for model_name in model_names:
    log_model(model_name)
print(models)

"""

In [None]:
a2c_params = {
    "n_steps": 5,
    "ent_coef": 0.01,
    "learning_rate": 7e-4,
    "gamma": 0.99,
    "gae_lambda": 0.95
}
a2c_tuned_params = { # THESE PARAMS WERE OBTAINED AFTER OPTUNA
    "n_steps": 1,
    "ent_coef": 0.0755882482216129,
    "learning_rate": 2.637065887731285e-05,
    "gamma": 0.9048260592925886,
    "gae_lambda": 0.9717236074963396
}

agent = DRLAgent(training_environment)
a2c_model = agent.get_model("a2c", model_kwargs=a2c_tuned_params)
tmp_path = RESULTS_DIR + '/a2c'
new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
a2c_model.set_logger(new_logger_a2c)
trained_a2c = DRLAgent(training_environment).train_model(model=a2c_model,
                             tb_log_name='a2c',
                             total_timesteps=50000) 


# A2C HYPERPARAMS MOST IMPORTANT
# learning_rate: The learning rate determines how quickly the model learns from new experiences. This is one of the most important to get right. Too small and it will learn slowly. Too large and it may have issues converging.
# n_steps: The number of steps collected before each update. More steps allows more efficient batch updates but delays learning from recent experiences. Finding a good balance is important.
# gamma: The discount factor determines how much the agent values future rewards. Higher values make it value long-term rewards more.
# gae_lambda: The GAE lambda controls the bias-variance tradeoff for estimating returns. Values closer to 1 have lower variance but higher bias.
# ent_coef: The entropy coefficient controls how much the agent is encouraged to explore randomly. Higher values result in more random actions.
# max_grad_norm: Gradient clipping limit to improve stability. You generally don't need to tune this much.


In [None]:
agent = DRLAgent(env = training_environment)
ppo_params = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}

ppo_tuned_params = {
    "n_steps": 232,
    "ent_coef": 0.08005421293955037,
    "learning_rate": 0.0002058992300570136,
    "batch_size": 238


}
model_ppo = agent.get_model("ppo",model_kwargs = ppo_tuned_params)

# set up logger
new_logger_ppo = configure(RESULTS_DIR + '/ppo', ["stdout", "csv", "tensorboard"])
# Set new logger
model_ppo.set_logger(new_logger_ppo)

trained_ppo = agent.train_model(model=model_ppo,
                             tb_log_name='ppo',
                             total_timesteps=50000)

# DDPG HYPERPARAMS MOST IMPORTANT
# learning_rate: Determines how quickly the model learns from new data. Too low may learn slowly, too high can destabilize training.
# buffer_size: The size of the replay buffer holding experiences. Larger buffers allow longer term learning but cost memory.
# batch_size: The size of sampled batch from the replay buffer for learning updates. Too small may underutilize GPU/CPU resources.
# tau: Controls weighting between older and newer Q-network weights during update. Controls stability vs plasticity.
# train_freq: How frequently the model trains. Balance between learning from more data vs more frequent updates.
# gradient_steps: Number of gradient steps during each training update. More may increase stability.


In [None]:
processed_risk_dfs = preprocessed_df[(preprocessed_df.date<'2020-07-01') & (preprocessed_df.date>='2012-01-01')]
risk_df = processed_risk_dfs.drop_duplicates(subset=['date']) # INCLUDES THE VIX AND TURBULENCE INDICATORS ALONGSIDE WITH PREV TECHNICAL INDICATORS

for col in risk_df: 
    print(col)

In [None]:
risk_df.vix.describe()

In [None]:
risk_df.vix.quantile(0.996)

In [None]:
risk_df.turbulence.describe()

In [None]:
risk_df.turbulence.quantile(0.996)

In [None]:


# TESTING OVER HERE
trading_environment = StockTradingEnv(df = test, turbulence_threshold = 70,risk_indicator_col='vix', hmax= 100,
    initial_amount= 1000000, # STARTING AMOUNT HERE
    num_stock_shares= [0] * stock_size,
    buy_cost_pct= [.1/100] * stock_size, # Transaction fee percent of buys per stock
    sell_cost_pct= [.1/100] * stock_size,
    state_space= state_space,
    stock_dim= stock_size, # Stock dimensions
    tech_indicator_list= ['macd',
    'boll_ub',
    'boll_lb',
    'rsi_30',
    'cci_30',
    'dx_30',
    'close_30_sma',
    'close_60_sma'],
    action_space= stock_size, 
    reward_scaling= 1e-4)
env_trade, obs_trade = trading_environment.get_sb_env()

In [None]:
# trained_model_ddpg = trained_ddpg
# df_account_value_ddpg, df_actions_ddpg = DRLAgent.DRL_prediction(
#     model=trained_model_ddpg,
#     environment = trading_environment)


testing_a2c_model = trained_a2c
df_account_value_a2c, df_actions_a2c = DRLAgent.DRL_prediction(
    model=testing_a2c_model,
    environment = trading_environment)

testing_ppo_model = trained_ppo
df_account_value_ppo, df_actions_ppo = DRLAgent.DRL_prediction(
    model=testing_ppo_model,
    environment = trading_environment)



In [None]:
train
a2c_params =  testing_a2c_model.get_parameters()
a2c_params['policy']
a2c_params['policy.optimizer']    
df_account_value_a2c
df_actions_a2c
print()

In [None]:

a2c_performance_stats = pd.DataFrame(backtest_stats(account_value=df_account_value_a2c))
# a2c_performance_stats.to_csv("./"+RESULTS_DIR+"/a2c_performance_stats"+datetime.datetime.now().strftime('%Y%m%d-%Hh%M')+'.csv')

In [None]:
ppo_performance_stats = pd.DataFrame(backtest_stats(account_value=df_account_value_ppo))


In [None]:
cumulative_return_value = a2c_performance_stats.loc["Cumulative returns"].iloc[0]
print("Cumulative Returns Value:", cumulative_return_value)


In [None]:
baseline_df = get_baseline(
        ticker="^NDX",
        start = df_account_value_a2c.loc[0,'date'],
        end = df_account_value_a2c.loc[len(df_account_value_a2c)-1,'date'])

stats = backtest_stats(baseline_df, value_col_name = 'close')


In [None]:
baseline_df = get_baseline(
        ticker="^NDX",
        start = df_account_value_ppo.loc[0,'date'],
        end = df_account_value_ppo.loc[len(df_account_value_ppo)-1,'date'])

stats = backtest_stats(baseline_df, value_col_name = 'close')


In [None]:
df_account_value_a2c['date']

In [None]:
df_account_value_a2c.loc[0,'date']

In [None]:
df_account_value_a2c.loc[len(df_account_value_a2c)-1,'date']

In [None]:
import numpy as np

tune_hyper_params =False

# BE CAREUFL THS CAN TAKE HOURS. RUN ONLY IF NEEDED

def evaluate_model(model, trading_environment):
    df_account_value, _ = DRLAgent.DRL_prediction(model=model, environment=trading_environment)
    perf_stats = pd.DataFrame(backtest_stats(account_value=df_account_value))
    return perf_stats

n_steps_values = [5, 10, 15]
ent_coef_values = [0.01, 0.1, 0.2]
learning_rate_values = [1e-4, 5e-4, 1e-3]

best_performance = float('-inf')
best_params = None
if tune_hyper_params:
    # Training and evaluation loop
    for n_steps in n_steps_values:
        for ent_coef in ent_coef_values:
            for learning_rate in learning_rate_values:
                a2c_params = {
                    "n_steps": n_steps,
                    "ent_coef": ent_coef,
                    "learning_rate": learning_rate,
                    "gamma": 0.99,
                    "gae_lambda": 0.95
                }

                agent = DRLAgent(training_environment)
                a2c_model = agent.get_model("a2c", model_kwargs=a2c_params)
                trained_a2c = DRLAgent(training_environment).train_model(model=a2c_model, tb_log_name='a2c', total_timesteps=50000)

                perf_stats = evaluate_model(trained_a2c, trading_environment)

                if perf_stats.loc["Cumulative returns"].iloc[-1] > best_performance:
                    best_performance = perf_stats.loc["Cumulative returns"].iloc[-1]
                    best_params = a2c_params

print("Best Hyperparameters:", best_params)
print("Best Performance (Cumulative Return):", best_performance)

#TOOK 57 MINS:
# Best Hyperparameters: {'n_steps': 15, 'ent_coef': 0.2, 'learning_rate': 0.0005, 'gamma': 0.99, 'gae_lambda': 0.95}
# Best Performance (Cumulative Return): 0.822119795194425



In [None]:
import optuna
from optuna import Trial

def objective(trial: Trial, model_name=None):
    # Define the search space for hyperparameters
    a2c_params = {
        "n_steps": trial.suggest_int("n_steps", 1, 10),
        "ent_coef": trial.suggest_float("ent_coef", 0.001, 0.1),
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-3),
        "gamma": trial.suggest_float("gamma", 0.9, 0.999),
        "gae_lambda": trial.suggest_float("gae_lambda", 0.9, 0.999)
    }

    # Train the A2C model with the current set of hyperparameters
    agent = DRLAgent(training_environment)
    a2c_model = agent.get_model("a2c", model_kwargs=a2c_params)
    trained_a2c = DRLAgent(training_environment).train_model(
        model=a2c_model,
        tb_log_name='a2c',
        total_timesteps=50000
    ) 

    # Evaluate the model
    trading_environment = StockTradingEnv(df=test, turbulence_threshold=70, risk_indicator_col='vix', hmax=100,
                                          initial_amount=1000000, num_stock_shares=[0] * stock_size,
                                          buy_cost_pct=[.1/100] * stock_size, sell_cost_pct=[.1/100] * stock_size,
                                          state_space=state_space, stock_dim=stock_size,
                                          tech_indicator_list=['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30',
                                                               'close_30_sma', 'close_60_sma'],
                                          action_space=stock_size, reward_scaling=1e-4)
    # env_trade, obs_trade = trading_environment.get_sb_env()

    # df_account_value_a2c, _ = DRLAgent.DRL_prediction(model=trained_a2c, environment=trading_environment)
    perf_stats = evaluate_model(trained_a2c, trading_environment)

    # Return the metric to be optimized (negative because Optuna minimizes)
    return -perf_stats.loc["Cumulative returns"].iloc[-1]

if tune_hyper_params:
    # Create a study object and optimize the objective function
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=50)

    # Print the best parameters found by Optuna
    print("Best trial:")
    trial = study.best_trial
    print("Value: ", trial.value)
    print("Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    """ 
    Best trial:
Value:  0.12409658470314999
Params: 
    n_steps: 1
    ent_coef: 0.0755882482216129
    learning_rate: 2.637065887731285e-05
    gamma: 0.9048260592925886
    gae_lambda: 0.9717236074963396

    """


In [None]:
import optuna.visualization as optuna_viz

if tune_hyper_params:
# Plot parameter importances
    optuna_viz.plot_param_importances(study)


In [None]:
if tune_hyper_params:
    optuna_viz.plot_optimization_history(study)
# This plot tells us that Optuna made the score converge to the minimum after only a few trials.



In [None]:
import optuna
from stable_baselines3.common.noise import NormalActionNoise

from optuna import Trial

def objective(trial: Trial):
    # Define the search space for hyperparameters
    ppo_params = {
        "n_steps": trial.suggest_int("n_steps", 16, 512),
        "ent_coef": trial.suggest_float("ent_coef", 0.01, 0.1),
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-3),
        "batch_size": trial.suggest_int("batch_size", 32, 256),
    }



    # Train the PPO model with the current set of hyperparameters
    agent = DRLAgent(training_environment)
    ppo_model = agent.get_model("ppo", model_kwargs=ppo_params)
    trained_ppo = DRLAgent(training_environment).train_model(
        model=ppo_model,
        tb_log_name='ppo',
        total_timesteps=50000
    ) 

    # Evaluate the model
    trading_environment = StockTradingEnv(df=test, turbulence_threshold=70, risk_indicator_col='vix', hmax=100,
                                          initial_amount=1000000, num_stock_shares=[0] * stock_size,
                                          buy_cost_pct=[.1/100] * stock_size, sell_cost_pct=[.1/100] * stock_size,
                                          state_space=state_space, stock_dim=stock_size,
                                          tech_indicator_list=['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30',
                                                               'close_30_sma', 'close_60_sma'],
                                          action_space=stock_size, reward_scaling=1e-4)
    # env_trade, obs_trade = trading_environment.get_sb_env()

    perf_stats = evaluate_model(trained_ppo, trading_environment)

    # Return the metric to be optimized (negative because Optuna minimizes)
    return -perf_stats.loc["Cumulative returns"].iloc[-1]
if tune_hyper_params:
    # Create a study object and optimize the objective function
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=50)

    print("Best trial:")
    trial = study.best_trial
    print("Value: ", trial.value)
    print("Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")



In [None]:
if tune_hyper_params:
    optuna_viz.plot_param_importances(study)


In [None]:
if tune_hyper_params:
    optuna_viz.plot_optimization_history(study)


In [None]:
df_account_value_ppo

In [None]:
# S&P 500: ^GSPC
# Dow Jones Index: ^DJI
# NASDAQ 100: ^NDX
df_ndx_ = get_baseline(
        ticker="^NDX", 
        start = '2020-07-01',
        end = '2023-10-31')
stats = backtest_stats(df_ndx_, value_col_name = 'close')
df_ndx = pd.DataFrame()
df_ndx['date'] = df_account_value_a2c['date']
df_ndx['account_value'] = df_ndx_['close'] / df_ndx_['close'][0] * 1000000 # INITIAL AMOUNT HERE!


In [None]:
df_ndx

In [None]:
# CAN DO BEFORE TUNING AND AFTER TUNING PARAMS

import matplotlib.pyplot as plt
%matplotlib inline

# Plotting code

plt.figure(figsize=(20, 12))
plt.plot(df_account_value_a2c.index, df_account_value_a2c['account_value'], label='A2C')
# plt.plot(df_account_value_ddpg.index, df_account_value_ddpg['account_value'], label='DDPG')
plt.plot(df_account_value_ppo.index, df_account_value_ppo['account_value'], label='PPO')
plt.plot(df_ndx.index, df_ndx['account_value'], label='NDX')

plt.title('A2C vs PPO Performance')
plt.xlabel('Date')
plt.ylabel('Account Value')
plt.legend()

# Add grid lines
plt.grid(True)

plt.show()
