# INTRODUCTION
1. In this tutorial, we will be tuning hyperparameters for Stable baselines3 models using Optuna.
2. The default model hyperparamters may not be adequate for your custom portfolio or custom state-space. Reinforcement learning algorithms are sensitive to hyperparamters, hence tuning is an important step.
3. Hyperparamters are tuned based on an objective, which needs to be maximized or minimized. Here we tuned our hyperparamters to maximize the Sharpe Ratio 

In [None]:
#Installing FinRL
%%capture
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git

In [None]:
#Installing Optuna
%%capture
!pip3 install optuna

In [None]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime
import optuna
from thop import profile
%matplotlib inline
from finrl.apps import config
from finrl.neo_finrl.preprocessor.yahoodownloader import YahooDownloader
from finrl.neo_finrl.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.neo_finrl.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.neo_finrl.env_stock_trading.env_stocktrading_np import StockTradingEnv as StockTradingEnv_numpy
from finrl.drl_agents.stablebaselines3.models import DRLAgent
from finrl.drl_agents.rllib.models import DRLAgent as DRLAgent_rllib
from finrl.neo_finrl.data_processor import DataProcessor


from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
import ray
from pprint import pprint

import sys
sys.path.append("../FinRL-Library")

import itertools


Module "zipline.assets" not found; multipliers will not be applied to position notionals.



In [None]:
import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

## COLLECTING DATA AND PREPROCESSING

In [None]:
#Custom ticker list dataframe download
ticker_list = ['AAPL','GOOGL','MSFT','FB']
df = YahooDownloader(start_date = '2009-01-01',
                     end_date = '2021-07-06',
                     ticker_list = ticker_list).fetch_data()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (11737, 8)


In [None]:
#You can add technical indicators and turbulence factor to dataframe
#Just set the use_technical_indicator=True, use_vix=True and use_turbulence=True and also change the state space accordingly
fe = FeatureEngineer(
                    use_technical_indicator=False,
                    tech_indicator_list = [],
                    use_vix=False,
                    use_turbulence=False,
                    user_defined_feature = False)

processed = fe.preprocess_data(df)

In [None]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)
processed_full.sort_values(['date','tic'],ignore_index=True).head(5)

Unnamed: 0,date,tic,open,high,low,close,volume,day
0,2009-01-02,AAPL,3.067143,3.251429,3.041429,2.782837,746015200.0,4.0
1,2009-01-02,GOOGL,154.454453,161.071075,152.902908,160.820816,7213779.0,4.0
2,2009-01-02,MSFT,19.530001,20.4,19.370001,15.370625,50084000.0,4.0
3,2009-01-05,AAPL,3.3275,3.435,3.311071,2.900283,1181608000.0,0.0
4,2009-01-05,GOOGL,160.66066,165.785782,157.657654,164.189194,9768222.0,0.0


In [None]:
train = data_split(processed_full, '2009-01-01','2020-07-01')
trade = data_split(processed_full, '2020-07-01','2021-07-06')
print(len(train))
print(len(trade))

8679
762


In [None]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension 
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 3, State Space: 7


In [None]:
#Defining the environment kwargs

env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": [], 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}
#Instantiate the training gym compatible environment
e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [None]:
#Instantiate the training environment
# Also instantiate our training gent
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))
agent = DRLAgent(env = env_train)

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [None]:
#Instantiate the trading environment
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = None, **env_kwargs)

## TUNING HYPERPARAMETERS USING OPTUNA
1. Go to this [link](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/utils/hyperparams_opt.py), you will find all possible hyperparamters to tune for all the models.
2. For your model, grab those hyperparamters which you want to optimize and then return a dictionary of hyperparamters.
3. There is a feature in Optuna called as hyperparamters importance, you can point out those hyperparamters which are important for tuning.
4. By default Optuna use [TPESampler](https://www.youtube.com/watch?v=tdwgR1AqQ8Y) for sampling hyperparamters from the search space. 

In [None]:
def sample_ppo_params(trial:optuna.Trial):
  # Episode length is a categorical hyperparamter
  n_steps = trial.suggest_categorical("n_steps", [512, 1024, 2048])
  #Entropy coefficient for exploration-exploitation
  ent_coef = trial.suggest_loguniform("ent_coef", 0.0001, 0.1)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  batch_size = trial.suggest_categorical("batch_size", [ 64, 128])
  
  return {"n_steps":n_steps,
          "ent_coef":ent_coef,
          "learning_rate":learning_rate,
          "batch_size":batch_size}


In [None]:
#Calculate the Sharpe ratio
#This is our objective for tuning
def calculate_sharpe(df):
  df['daily_return'] = df['account_value'].pct_change(1)
  if df['daily_return'].std() !=0:
    sharpe = (252**0.5)*df['daily_return'].mean()/ \
          df['daily_return'].std()
  return sharpe

In [None]:
from IPython.display import clear_output
         
def objective(trial:optuna.Trial):
  #Trial will suggest a set of hyperparamters from the specified range
  hyperparameters = sample_ppo_params(trial)
  model_ppo = agent.get_model("ppo",model_kwargs = hyperparameters )
  #Train your model for 5000 timesteps
  #You can increase it for better comparison
  trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=5000)
  clear_output(wait=True)
  #For the given hyperparamters, determine the account value in the trading period
  df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_ppo, 
    environment = e_trade_gym)
  #Calculate sharpe from the account value
  sharpe = calculate_sharpe(df_account_value)

  return sharpe

#Create a study object and specify the direction as 'maximize
#As you want to maximize sharpe
#Pruner stops not promising iterations
#Use a pruner, else you will get error related to divergence of model
study = optuna.create_study(direction='maximize',pruner=optuna.pruners.HyperbandPruner())
#You can increase the n_trials for a better search space scanning
study.optimize(objective, n_trials=10,catch=(ValueError,))

[32m[I 2021-09-18 21:02:34,015][0m Trial 9 finished with value: 2.277115685639976 and parameters: {'n_steps': 1024, 'ent_coef': 0.004601798561824062, 'learning_rate': 8.0085709602842e-05, 'batch_size': 64}. Best is trial 9 with value: 2.277115685639976.[0m


hit end!


In [None]:
#It returns all the trials
# The key values mean sharpe ratio
# So for number 9 we have highest sharpe ratio
study.trials

[FrozenTrial(number=0, values=[0.8781024156065699], datetime_start=datetime.datetime(2021, 9, 18, 20, 58, 50, 705018), datetime_complete=datetime.datetime(2021, 9, 18, 20, 59, 15, 580389), params={'n_steps': 512, 'ent_coef': 0.0013871852601235754, 'learning_rate': 0.01604305201570951, 'batch_size': 64}, distributions={'n_steps': CategoricalDistribution(choices=(512, 1024, 2048)), 'ent_coef': LogUniformDistribution(high=0.1, low=0.0001), 'learning_rate': LogUniformDistribution(high=1.0, low=1e-05), 'batch_size': CategoricalDistribution(choices=(64, 128))}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=0, state=TrialState.COMPLETE, value=None),
 FrozenTrial(number=1, values=[0.5811463453222786], datetime_start=datetime.datetime(2021, 9, 18, 20, 59, 15, 582235), datetime_complete=datetime.datetime(2021, 9, 18, 20, 59, 35, 435872), params={'n_steps': 1024, 'ent_coef': 0.04359802358207561, 'learning_rate': 1.876729323504833e-05, 'batch_size': 128}, distributions={'n_steps

In [None]:
#Get the best hyperparamters
study.best_params

{'batch_size': 64,
 'ent_coef': 0.004601798561824062,
 'learning_rate': 8.0085709602842e-05,
 'n_steps': 1024}

In [None]:
#Train your tuned model using the best hyperparamters for 50000 steps
tuned_model_ppo = agent.get_model("ppo",model_kwargs = study.best_params )
trained_ppo = agent.train_model(model=tuned_model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=50000)

{'n_steps': 1024, 'ent_coef': 0.004601798561824062, 'learning_rate': 8.0085709602842e-05, 'batch_size': 64}
Using cuda device
Logging to tensorboard_log/ppo/ppo_29
-----------------------------
| time/              |      |
|    fps             | 318  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 1024 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 267          |
|    iterations           | 2            |
|    time_elapsed         | 7            |
|    total_timesteps      | 2048         |
| train/                  |              |
|    approx_kl            | 0.0046637743 |
|    clip_fraction        | 0.022        |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.26        |
|    explained_variance   | 0.000894     |
|    learning_rate        | 8.01e-05     |
|    loss                 | 3.99         |
|    n_updates          

In [None]:
#Trading period account value with tuned model
df_account_value_tuned, df_actions_tuned = DRLAgent.DRL_prediction(
    model=tuned_model_ppo, 
    environment = e_trade_gym)

hit end!


In [None]:
#Backtesting with our pruned model
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all_tuned = backtest_stats(account_value=df_account_value_tuned)
perf_stats_all_tuned = pd.DataFrame(perf_stats_all)
perf_stats_all_tuned.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_tuned_"+now+'.csv')

Annual return          0.397420
Cumulative returns     0.401136
Annual volatility      0.163037
Sharpe ratio           2.142516
Calmar ratio           5.586195
Stability              0.910848
Max drawdown          -0.071143
Omega ratio            1.526346
Sortino ratio          3.770178
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.279125
Daily value at risk   -0.019155
dtype: float64


**You can see the sharpe ratio is 2.14** 

In [None]:
#Now train with not tuned hyperaparameters
#Default config.PPO_PARAMS
non_tuned_model_ppo = agent.get_model("ppo",model_kwargs = config.PPO_PARAMS )
trained_ppo = agent.train_model(model=non_tuned_model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=50000)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 64}
Using cuda device
Logging to tensorboard_log/ppo/ppo_30
-----------------------------
| time/              |      |
|    fps             | 317  |
|    iterations      | 1    |
|    time_elapsed    | 6    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 266          |
|    iterations           | 2            |
|    time_elapsed         | 15           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0044475133 |
|    clip_fraction        | 0.0453       |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.25        |
|    explained_variance   | -0.000302    |
|    learning_rate        | 0.00025      |
|    loss                 | 6.37         |
|    n_updates            | 10           |
|    poli

In [None]:
df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=non_tuned_model_ppo, 
    environment = e_trade_gym)

hit end!


In [None]:
#Backtesting for not tuned hyperparamters
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
# perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

Annual return          0.200346
Cumulative returns     0.202087
Annual volatility      0.147013
Sharpe ratio           1.320698
Calmar ratio           2.507303
Stability              0.724229
Max drawdown          -0.079905
Omega ratio            1.280327
Sortino ratio          1.954077
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.011580
Daily value at risk   -0.017751
dtype: float64


**You can see the sharpe ratio is 1.32, less than our tuned model (2.14)**

In [None]:
#You can see with trial, our sharpe ratio is increasing
#Certainly you can afford more number of trials for further optimization
from optuna.visualization import plot_optimization_history
plot_optimization_history(study)

In [None]:
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [None]:
#Hyperparamters importance
#Ent_coef is the most important
plot_param_importances(study)

## FURTHER WORKS

1.   You can tune more critical hyperparameters
2.   Multi-objective hyperparameter optimization using Optuna. Here we can maximize Sharpe and simultaneously minimize Volatility in our account value to tune our hyperparameters

