# INTRODUCTION
1. In this tutorial, we will be tuning hyperparameters for Stable baselines3 models using Optuna.
2. The default model hyperparamters may not be adequate for your custom portfolio or custom state-space. Reinforcement learning algorithms are sensitive to hyperparamters, hence tuning is an important step.
3. Hyperparamters are tuned based on an objective, which needs to be maximized or minimized. ***In this version, the ratio of average winning to losing trade value is used as the objective.***

In [None]:
#Installing FinRL
# Set colab status to trigger installs
clb = True
n_trials = 25
dl_prices = False
read_data = True
tpm_hist = {}  # record tp metric values for trials
print(f'Preparing for colab: {clb}')
pkgs = ['FinRL', 'optuna', 'Ray/rllib','plotly','ipywidgets']
total_timesteps = 25000
lc_threshold=1e-5
lc_patience=15
lc_trial_number=5
tp_metric = 'avgwl'  # average trade win $/loss $
if clb:
    print(f'Installing packages: {pkgs}')

In [None]:
from google.colab import files
#!pip install unzip
import shutil
import os
from os.path import exists

def upload_prices():
  uploaded = files.upload() 

download_data=True
read_data=True

# check if prices available
if exists("/content/DOW_prices_20211229-20h35.csv"):
  print('prices already loaded')
  download_data=False
  read_data=True
  try:
    os.remove("/content/DOW_prices_20211229-20h35.csv.zip")
  except:
    print('Not found')
  try:
    shutil.rmtree("/content/__MACOSX")
  except:
    print('Not found')

elif exists("/content/DOW_prices_20211229-20h35.csv.zip"):
  print('Found zipped prices')
  ! unzip -uq "/content/DOW_prices_20211229-20h35.csv.zip" -d "/content"
  download_data = False
  read_data=True
  
else:
  download_data = False
  uploaded = files.upload() 
! unzip -uq "/content/DOW_prices_20211229-20h35.csv.zip" -d "/content"
  


In [None]:
import pandas as pd
if not download_data and read_data:
  df = pd.read_csv('/content/DOW_prices_20211229-20h35.csv',index_col=0)
  print(df.shape)
  
elif download_data:
  #Custom ticker list dataframe download
  ticker_list = config.DOW_30_TICKER
  df = YahooDownloader(start_date = '2009-01-01',
                       end_date = '2021-10-01',
                       ticker_list = ticker_list).fetch_data()

  

In [None]:
 %%capture
   
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git
!pip install optuna
!pip install -U "ray[rllib]"
!pip install plotly
!pip install ipywidgets
!pip install -U kaleido

In [None]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime
import optuna
%matplotlib inline
from finrl.apps import config
from optuna.integration import PyTorchLightningPruningCallback
from finrl.finrl_meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.finrl_meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.finrl_meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.finrl_meta.env_stock_trading.env_stocktrading_np import StockTradingEnv as StockTradingEnv_numpy
from finrl.drl_agents.stablebaselines3.models import DRLAgent
from finrl.drl_agents.rllib.models import DRLAgent as DRLAgent_rllib
from finrl.finrl_meta.data_processor import DataProcessor
import joblib
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
import ray
from pprint import pprint
import kaleido

import sys
sys.path.append("../FinRL-Library")

import itertools

import torch
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
print(f'Torch device: {device}')


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

## COLLECTING DATA AND PREPROCESSING

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
if dl_prices:
  now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
  df.to_csv("./"+config.RESULTS_DIR+"/DOW_prices_" +now+ '.csv')

In [None]:
#You can add technical indicators and turbulence factor to dataframe
#Just set the use_technical_indicator=True, use_vix=True and use_turbulence=True
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                    use_vix=True,
                    use_turbulence=True,
                    user_defined_feature = False)

processed = fe.preprocess_data(df)

In [None]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)
processed_full.sort_values(['date','tic'],ignore_index=True).head(5)

processed_full.to_csv('processed_full.csv')

In [None]:
train = data_split(processed_full, '2009-01-01','2020-07-01')
trade = data_split(processed_full, '2020-05-01','2021-10-01')
print(len(train))
print(len(trade))

In [None]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

In [None]:
#Defining the environment kwargs

env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}
#Instantiate the training gym compatible environment
e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [None]:
#Instantiate the training environment
# Also instantiate our training gent
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))
agent = DRLAgent(env = env_train)

In [None]:
#Instantiate the trading environment
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = None, **env_kwargs)

## TUNING HYPERPARAMETERS USING OPTUNA
1. Go to this [link](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/utils/hyperparams_opt.py), you will find all possible hyperparamters to tune for all the models.
2. For your model, grab those hyperparamters which you want to optimize and then return a dictionary of hyperparamters.
3. There is a feature in Optuna called as hyperparamters importance, you can point out those hyperparamters which are important for tuning.
4. By default Optuna use [TPESampler](https://www.youtube.com/watch?v=tdwgR1AqQ8Y) for sampling hyperparamters from the search space. 

In [None]:
def sample_ddpg_params(trial:optuna.Trial):
  # Size of the replay buffer
  buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(1e5), int(1e6)])
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512])
  
  return {"buffer_size": buffer_size,
          "learning_rate":learning_rate,
          "batch_size":batch_size}

In [None]:
def sample_ddpg_params_all(trial:optuna.Trial,
                           # fixed values from previous study
                           learning_rate=0.0103,
                           batch_size=128,
                           buffer_size=int(1e6)):

    gamma = trial.suggest_categorical("gamma", [0.94, 0.96, 0.98])
    # Polyak coeff
    tau = trial.suggest_categorical("tau", [0.08, 0.1, 0.12])

    train_freq = trial.suggest_categorical("train_freq", [512,768,1024])
    gradient_steps = train_freq
    
    noise_type = trial.suggest_categorical("noise_type", ["ornstein-uhlenbeck", "normal", None])
    noise_std = trial.suggest_categorical("noise_std", [.1,.2,.3] )

    # NOTE: Add "verybig" to net_arch when tuning HER (see TD3)
    net_arch = trial.suggest_categorical("net_arch", ["small", "big"])
    # activation_fn = trial.suggest_categorical('activation_fn', [nn.Tanh, nn.ReLU, nn.ELU, nn.LeakyReLU])

    net_arch = {
        "small": [64, 64],
        "medium": [256, 256],
        "big": [512, 512],
    }[net_arch]
  
    hyperparams = {
        "batch_size": batch_size,
        "buffer_size": buffer_size,
        "gamma": gamma,
        "gradient_steps": gradient_steps,
        "learning_rate": learning_rate,
        "tau": tau,
        "train_freq": train_freq,
        #"noise_std": noise_std,
        #"noise_type": noise_type,
        
        "policy_kwargs": dict(net_arch=net_arch)
    }
    print(hyperparams.keys())
    return hyperparams


# TRADE PERFORMANCE CODE
Follows in cells below


In [None]:
#MAIN METHOD
# called from objective method
# returns selected trade perf metric(s)
# Calculating Trade Performance for Objective
# Need actions and associated prices
# Select TF metric
# TODO Long-term create DI with multiple metrics
# TODO immediate
  # set threshold in logging c/b
def calc_trade_perf_metric(df_actions, 
                           df_prices_trade,
                           tp_metric,
                           dbg=False,
                           dummy=False
                           ):
  
    df_actions_p, df_prices_p, tics = prep_data(df_actions.copy(),
                                            df_prices_trade.copy())
    # actions predicted by trained model on trade data
    df_actions_p.to_csv('df_actions.csv') 

    if dbg:
          import pdb; pdb.set_trace()
    
    tics_buysell, tics_buyonly = segregate_tics_type(df_actions_p)
    
    # include sync n samples (that is, dates)
    df_actions_s, df_prices_s, tics_prtfl = \
        sync_tickers(df_actions_p.copy(),df_prices_p.copy(),tics)
    
    perf_data= calc_trade_init_vals(df_actions_s, df_prices_s, tics=tics_prtfl)
    
    pnl_bo = calc_pnl_buyonly(perf_data,tics_buyonly)
    perf_results=calc_trade_perf(pnl_bo)
    pnl_bs=calc_pnl_buysell(perf_data,tics_buysell)
    perf_results_bs = calc_trade_perf(pnl_bs)
    # integrate results
    perf_results.update(perf_results_bs)
    df = pd.DataFrame.from_dict(perf_results, orient='index')
    # Temp return dummy metric
    
    if dummy:
        m = np.random.uniform(0,2)
        return m
    else:
        # return element(s) from perf_results
        m = calc_trade_metric(df,tp_metric)
        print(f'{tp_metric}: {m}')
        k = str(len(tpm_hist)+1)
        tpm_hist[k] = m
        return m
        


In [None]:

# Supporting methods

def calc_trade_metric(df,metric='avgwl'):
    '''# trades', '# wins', '# losses', 'wins total value', 'wins avg value',
       'losses total value', 'losses avg value'''
    #TO DO formalize methods and max/min vals
    tpm_mult = 1.0
    avgwl_no_losses = 25
    if metric == 'avgwl':
        if sum(df['# losses']) == 0:
          try:
            return max(tpm_hist.values())*tpm_mult
          except ValueError:
            return avgwl_no_losses
        avg_w = sum(df['wins total value'])/sum(df['# wins'])
        avg_l = sum(df['losses total value'])/sum(df['# losses'])
        m = abs(avg_w/avg_l)

    return m

def prep_data(df_actions,
              df_prices_trade):
    #print(f'Prices raw file {df_prices_trade.head()}')
    print(f'Prices raw file shape {df_prices_trade.shape}')
    #print(f'Actions file {df_actions.head()}')
    print(f'Actions file shape {df_actions.shape}')
    df=df_prices_trade[['date','close','tic']]
    df['Date'] = pd.to_datetime(df['date'])
    df = df.set_index('Date')
    # set indices on both df to datetime
    idx = pd.to_datetime(df_actions.index, infer_datetime_format=True)
    df_actions.index=idx
    tics = np.unique(df.tic)
    #print(tics)
    n_tics = len(tics)
    print(f'tics: {tics} n_tics: {n_tics}')
    dategr = df.groupby('tic')
    p_d={t:dategr.get_group(t).loc[:,'close'] for t in tics}
    df_prices = pd.DataFrame.from_dict(p_d)
    df_prices.index = df_prices.index.normalize()
    print(f'Price file shape {df_prices.shape}')
    #print(f'Price file index {df_prices.index}')
    return df_actions, df_prices, tics


# prepares for integrating action and price files
def link_prices_actions(df_a,
                        df_p):
    cols_a = [t + '_a' for t in df_a.columns]
    df_a.columns = cols_a
    cols_p = [t + '_p' for t in df_p.columns]
    df_p.columns = cols_p
    return df_a, df_p

def sync_tickers(df_actions,df_tickers_p,tickers):
    # Some DOW30 components may not be included in portfolio
    # passed tickers includes all DOW30 components
    # actions and ticker files may have different length indices
    if len(df_actions) != len(df_tickers_p):
      msng_dates = set(df_actions.index)^set(df_tickers_p.index)
      try:
        #assumption is prices has one additional timestamp (row)
        df_tickers_p.drop(msng_dates,inplace=True)
      except:
        df_actions.drop(msng_dates,inplace=True)
    df_actions, df_tickers_p = link_prices_actions(df_actions,df_tickers_p)
    # identify any DOW components not in portfolio
    t_not_in_a = [t for t in tickers if t + '_a' not in list(df_actions.columns)]
    #t_not_in_p = [t for t in tickers if t + '_p' not in list(df_tickers_p.columns)]
    # remove t_not_in_a from df_tickers_p
    drop_cols = [t + '_p' for t in t_not_in_a]
    df_tickers_p.drop(columns=drop_cols,inplace=True)
    # Tickers in portfolio
    tickers_prtfl = [c.split('_')[0] for c in df_actions.columns]
    return df_actions,df_tickers_p, tickers_prtfl

def segregate_tics_type(df_actions):
    tics = list(df_actions.columns)
    tt = df_actions.apply(min)
    # tics with redemptions (i.e.,negative share values
    tics_buysell = tt[tt < 0].index.values
    tics_buysell.sort()
    tics_buyonly_set = set(tics) ^ set(tics_buysell)
    tics_buyonly = np.array([*tics_buyonly_set])
    tics_buyonly.sort()
    return tics_buysell,tics_buyonly

def calc_trade_init_vals(dfa,dfp,tics):
    # action and prices columns have added _a and _p
    fnl_prices = []
    perf_data = {}
    for t in tics:
        acts = dfa[t+'_a'].values
        prices = dfp[t+'_p'].values
        #tvals_init = [a*p for a, p in zip(acts,prices)]
        tvals_init = np.multiply(acts,prices)
        d={'actions':acts, 'prices':prices,'init_values':tvals_init}
        perf_data[t]=d

    return perf_data

def calc_pnl_buyonly(perf_data,tics_buyonly):
    # assumes all positions held until end of trading period
    # no sales/redemptions
    # compare purchase/entry price to final price to calc gain/loss
    # tickers with redemptions are processed separately
    tics_buyonly.sort()
    pnl_d = {}
    for t in tics_buyonly:
        init_values = perf_data[t]['init_values']
        prices = perf_data[t]['prices']
        actions = perf_data[t]['actions']
        fnl_price = prices[-1]
        final_values = np.multiply(fnl_price,actions)
        pnl=np.subtract(final_values, init_values)
        pnl_d[t] = np.array(pnl)
    return pnl_d

def calc_pnl_for_open_positions(acts,prices):
    # identify any positive share values
    pnl = []
    fp = prices[-1]
    open_pos_arg = np.argwhere(acts>0)
    if len(open_pos_arg)==0:return pnl
    mkt_vals_open = np.multiply(acts[open_pos_arg], prices[open_pos_arg])
    # mkt val at end of testing period
    # treat as trades for purposes of calculating pnl at end of testing period
    mkt_vals_final = np.multiply(fp, acts[open_pos_arg])
    pnl = np.subtract(mkt_vals_final, mkt_vals_open)
    print(f'Market value of open positions at end of testing {pnl}')
    return pnl

def calc_pnl_buysell(perf_dict, tics_buysell):
    #TODO main question is associating sale with original purchase
    # process sales trades
    pnl_all = {}
    for tic in tics_buysell:
        pnl_t = []
        tic_data = perf_dict[tic]
        acts = tic_data['actions']
        prices = tic_data['prices']
        cs = np.cumsum(acts)
        # copy acts: acts_rev will be revised based on closing/reducing init positions
        acts_rev = acts.copy()
        # find args of sales actions
        args_s = [i + 1 for i in range(len(cs) - 1) if cs[i + 1] < cs[i]]
        for s in args_s:  # s is scaler
                # need list to hold scaler values for np concat
                #price_s = [prices[s]]
                act_s = [acts_rev[s]]
                args_b = [i for i in range(s) if acts_rev[i] > 0]
                prcs_init_trades = prices[args_b]
                acts_init_trades = acts_rev[args_b]
                #print(f'Lengths of trades/prices {len(acts_init_trades)}, {len(prcs_init_trades)}')
                # update actions for sales
                # reduce/eliminate init values through trades
                # always start with earliest purchase that has not been closed through sale
                # selectors for purchase and sales trades
                # find earliest remaining purchase
                arg_sel = min(args_b)
                #sel_s = len(acts_trades) - 1

                # closing part/all of earliest init trade not yet closed
                # sales actions are negative
                # in this test case, abs_val of init and sales share counts are same
                # zero-out sales actions
                # market value of sale
                # max number of shares to be closed: may be less than # originally purchased
                acts_shares = min(abs(act_s.pop()), acts_rev[arg_sel])

                # mv of shares when purchased
                mv_p = abs(acts_shares * prices[arg_sel])
                # mv of sold shares
                mv_s = abs(acts_shares*prices[s])

                # calc pnl
                pnl = mv_s - mv_p
                # reduce init share count
                # close all/part of init purchase
                acts_rev[arg_sel] -= acts_shares
                acts_rev[s] += acts_shares
                # calculate pnl for trade
                # value of associated purchase

                # find earliest non-zero positive act in acts_revs
                #print(pnl)
                pnl_t.append(pnl)
        print(f'PnL for tic {tic} {pnl_t}')
        pnl_op = calc_pnl_for_open_positions(acts_rev,prices)
        pnl_t.extend(pnl_op)
        pnl_all[tic]=np.array(pnl_t)
    return pnl_all

def calc_trade_perf(pnl_d):
    perf_results = {}
    for t,pnl in pnl_d.items():
        wins = pnl[pnl>0]  # total val
        losses = pnl[pnl<0]
        n_wins = len(wins)
        n_losses = len(losses)
        n_trades = n_wins + n_losses
        wins_val = np.sum(wins)
        losses_val = np.sum(losses)
        wins_avg = 0 if n_wins==0 else np.mean(wins)
        #print(f'{t} n_wins: {n_wins} n_losses: {n_losses}')
        losses_avg = 0 if n_losses==0 else np.mean(losses)
        d = {'# trades':n_trades,'# wins':n_wins,'# losses':n_losses,
             'wins total value':wins_val, 'wins avg value':wins_avg,
             'losses total value':losses_val, 'losses avg value':losses_avg,}
        perf_results[t] = d
    return perf_results

In [None]:
#Calculate the Sharpe ratio
#This is our objective for tuning
def calculate_sharpe(df):
  df['daily_return'] = df['account_value'].pct_change(1)
  if df['daily_return'].std() !=0:
    sharpe = (252**0.5)*df['daily_return'].mean()/ \
          df['daily_return'].std()
    return sharpe
  else:
    return 0

## CALLBACKS
1. The callback will terminate if the improvement margin is below certain point
2. It will terminate after certain number of trial_number are reached, not before that
3. It will hold its patience to reach the threshold

In [None]:
class LoggingCallback:
    def __init__(self,threshold,trial_number,patience):
      '''
      threshold:int tolerance for increase in sharpe ratio
      trial_number: int Prune after minimum number of trials
      patience: int patience for the threshold
      '''
      self.threshold = threshold
      self.trial_number  = trial_number
      self.patience = patience
      print(f'Callback threshold {self.threshold}, \
            trial_number {self.trial_number}, \
            patience {self.patience}')
      self.cb_list = [] #Trials list for which threshold is reached
    def __call__(self,study:optuna.study, frozen_trial:optuna.Trial):
      #Setting the best value in the current trial
      study.set_user_attr("previous_best_value", study.best_value)
      
      #Checking if the minimum number of trials have pass
      if frozen_trial.number >self.trial_number:
          previous_best_value = study.user_attrs.get("previous_best_value",None)
          #Checking if the previous and current objective values have the same sign
          if previous_best_value * study.best_value >=0:
              #Checking for the threshold condition
              if abs(previous_best_value-study.best_value) < self.threshold: 
                  self.cb_list.append(frozen_trial.number)
                  #If threshold is achieved for the patience amount of time
                  if len(self.cb_list)>self.patience:
                      print('The study stops now...')
                      print('With number',frozen_trial.number ,'and value ',frozen_trial.value)
                      print('The previous and current best values are {} and {} respectively'
                              .format(previous_best_value, study.best_value))
                      study.stop()

In [None]:
from IPython.display import clear_output
import sys   

os.makedirs("models",exist_ok=True)

def objective(trial:optuna.Trial):
  #Trial will suggest a set of hyperparamters from the specified range
  hyperparameters = sample_ddpg_params_all(trial)
  print(f'HP params from objective: {hyperparameters.keys()}')
  policy_kwargs = None  # default
  if 'policy_kwargs' in hyperparameters.keys():
    policy_kwargs = hyperparameters['policy_kwargs']
    del hyperparameters['policy_kwargs']
    print(f'pol kw args {policy_kwargs}')
  model_ddpg = agent.get_model("ddpg",
                               policy_kwargs = policy_kwargs,
                               model_kwargs = hyperparameters )
  #You can increase it for better comparison
  trained_ddpg = agent.train_model(model=model_ddpg,
                                   tb_log_name="ddpg",
                                   total_timesteps=total_timesteps)
  trained_ddpg.save('models/ddpg_{}.pth'.format(trial.number))
  clear_output(wait=True)
  #For the given hyperparamters, determine the account value in the trading period
  df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym)
  '''
  #Calculate sharpe from the account value
  sharpe = calculate_sharpe(df_account_value)
  print(f'Shape df_account_value: {df_account_value.shape}')
  print(df_account_value.head())
  print(f'Shape df_actions: {df_actions.shape}')
  print(df_actions.head())

  return sharpe
  '''
  # Calculate trade performance metric
  # TODO allow selection of metric
  tpm = calc_trade_perf_metric(df_actions,trade,tp_metric)
  return tpm

#Create a study object and specify the direction as 'maximize'
#As you want to maximize sharpe
#Pruner stops not promising iterations
#Use a pruner, else you will get error related to divergence of model
#You can also use Multivariate samplere
#sampler = optuna.samplers.TPESampler(multivarite=True,seed=42)
sampler = optuna.samplers.TPESampler()
study = optuna.create_study(study_name="ddpg_study",direction='maximize',
                            sampler = sampler, pruner=optuna.pruners.HyperbandPruner())

logging_callback = LoggingCallback(threshold=
                                   lc_threshold,
                                   patience=lc_patience,
                                   trial_number=lc_trial_number)
#You can increase the n_trials for a better search space scanning
study.optimize(objective, n_trials=n_trials,catch=(ValueError,),callbacks=[logging_callback])

In [None]:
joblib.dump(study, "final_ddpg_study__.pkl")

In [None]:
#Get the best hyperparamters
print('Hyperparameters after tuning',study.best_params)
print('Hyperparameters before tuning',config.DDPG_PARAMS)

In [None]:
study.best_trial

In [None]:
from stable_baselines3 import DDPG
tuned_model_ddpg = DDPG.load('models/ddpg_{}.pth'.format(study.best_trial.number),env=env_train)

In [None]:
#Trading period account value with tuned model
df_account_value_tuned, df_actions_tuned = DRLAgent.DRL_prediction(
    model=tuned_model_ddpg, 
    environment = e_trade_gym)

In [None]:
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
df_actions_tuned.to_csv("./"+config.RESULTS_DIR+"/tuned_actions_" +now+ '.csv')

In [None]:
#Backtesting with our pruned model
print("==============Get Backtest Results===========")
print("==============Pruned Model===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all_tuned = backtest_stats(account_value=df_account_value_tuned)
perf_stats_all_tuned = pd.DataFrame(perf_stats_all_tuned)
perf_stats_all_tuned.columns = ['Value']
# add trade performance metric
tpm = calc_trade_perf_metric(df_actions_tuned,trade,tp_metric)
trp_metric = {'Value':tpm}
df2 = pd.DataFrame(trp_metric,index=['Trade_Perf'])
perf_stats_all_tuned = perf_stats_all_tuned.append(df2)
perf_stats_all_tuned.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_tuned_"+now+'.csv')

In [None]:
#Now train with not tuned hyperaparameters
#Default config.ddpg_PARAMS
non_tuned_model_ddpg = agent.get_model("ddpg",model_kwargs = config.DDPG_PARAMS )
trained_ddpg = agent.train_model(model=non_tuned_model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=total_timesteps)

In [None]:
df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym)

In [None]:
#Backtesting for not tuned hyperparamters
print("==============Get Backtest Results===========")
print("============Default Hyperparameters==========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
# perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

In [None]:
#You can see with trial, our sharpe ratio is increasing
#Certainly you can afford more number of trials for further optimization
from optuna.visualization import plot_optimization_history
fig = plot_optimization_history(study)
#"./"+config.RESULTS_DIR+
fig.write_image("./"+config.RESULTS_DIR+"/opt_hist.png")
fig.show()

In [None]:
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [None]:
#Hyperparamters importance
#Ent_coef is the most important
fig = plot_param_importances(study)
fig.write_image("./"+config.RESULTS_DIR+"/params_importances.png")
fig.show()

## FURTHER WORKS

1.   You can tune more critical hyperparameters
2.   Multi-objective hyperparameter optimization using Optuna. Here we can maximize Sharpe and simultaneously minimize Volatility in our account value to tune our hyperparameters



In [None]:
plot_edf(study)

In [None]:
files.download('/content/final_ddpg_study__.pkl')