# Environment Setup

## Google Colab Installation

### Install Python Environment

In [1]:
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Code specific to Google Colab
    print("Running in Google Colab")

    # Additional setup commands for Colab
    !pip install neuralforecast
    !pip install gymnasium
    !pip install QuantStats
else:
    # Code for other environments (e.g., VS Code)
    print("Running in another environment (e.g., VS Code)")

Running in another environment (e.g., VS Code)


### Install RL Libraries

In [2]:
if IN_COLAB:
    # Retrive required files
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/environments/stockenv.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/cleandata.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/data.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/epsilon_decay.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/agentperform.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/prob_evaluate.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/agents/ddqn.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/agents/random.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/agents/baseagent.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/rewards/stockmarket.py            
    # Move all directories and files from content/raw.githubusercontent.com to content/
    !mv /content/raw.githubusercontent.com/* /content/

    # Delete the raw.githubusercontent.com directory
    !rm -rf /content/raw.githubusercontent.com

# Activate Python Libraries & Random Seed

In [3]:
import os
import sys
import torch
import optuna
import random
import numpy as np
import pandas as pd
import torch.nn as nn
import utilities.agentperform as agentperform
import utilities.cleandata as cln 
from utilities.epsilon_decay import linear_decay
from utilities.data import UniStockEnvDataStruct, TimesNetProcessing
from agents.ddqn import DdqnAgent
from agents.random import RandomAgent
from rewards.stockmarket import future_profit, risk_reward
from environments.stockenv import ContinuousOHLCVEnv
from datetime import datetime
from neuralforecast.core import NeuralForecast
from neuralforecast.models import TimesNet
from neuralforecast.losses.numpy import mae, mse
import logging


# 
logging.getLogger("pytorch_lightning.utilities.rank_zero").addHandler(logging.NullHandler())
logging.getLogger("pytorch_lightning.accelerators.cuda").addHandler(logging.NullHandler())
os.environ['NIXTLA_ID_AS_COL'] = '1' # Prevent Warning 

def set_seed(seed):
    """Set seed for reproducibility."""
    # Python random module
    random.seed(seed)

    # NumPy
    np.random.seed(seed)

    # PyTorch
    torch.manual_seed(seed)

    # If you are using CUDA
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
        # Additional settings to force determinism in your operations:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False



# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the current device
    device = torch.cuda.current_device()
    print(f"GPU Name: {torch.cuda.get_device_name(device)}")
else:
    device = 'cpu'
    print("CUDA (GPU support) is not available. PyTorch is running on CPU.")


def decimal_to_text(decimal_number):
    # Remove the decimal point and convert to integer
    integer_part = int(decimal_number * 1000)
    # Convert the integer to text
    text_representation = str(integer_part)
    return text_representation

CUDA (GPU support) is not available. PyTorch is running on CPU.


# RL Environment Setup

## Parameters & CSV Locations

In [4]:
RANDOM_SEED = 42
set_seed(RANDOM_SEED)

if not IN_COLAB:
    pwd = "C:/programming/MADDQN"
    sys.path.append(pwd)
    
    # Output Path Location for CSV export
    export_path = pwd + "/output_data"

# Input Data Location, File Name, Stock name for labels
input_url = 'https://raw.githubusercontent.com/CodeBeckZero/MADDQN/main/input_data'

stock_inputs ={'DJI':'^DJI_daily.csv',
               'NDAQ': '^IXIC_daily.csv',
               'SP500': '^SPX_daily.csv',
               'AAPL': 'AAPL_daily.csv',
               'AMZN': 'AMZN_daily.csv',
               'GOOGL': 'GOOGL_daily.csv',
               'MSFT': 'MSFT_daily.csv',
               'SINE': 'sine_wave_daily.csv',
               'FORD': 'F_daily.csv',
               'JNJ': 'JNJ_daily.csv',
               'NEE': 'NEE_daily.csv',
               'PFE': 'PFE_daily.csv',
               'TSLA': 'TSLA_daily.csv',
               'COKE': 'COKE_daily.csv',
               'PG': 'PG_daily.csv'}

# Training Inputs
trn_keys = ['DJI','NDAQ','SP500']

# Validation Inputs
val_keys = trn_keys

# Testing Inputs
tst_keys = ['AAPL','AMZN','GOOGL','MSFT','FORD','JNJ','NEE','PFE','TSLA','COKE','PG']

window_size = 28 # Needs to match the size Timesnet is trained on
price_based_on = 'close'
columns = ['open','high','low','close','volume']


# Metrics Interested in
metrics = ['n_trades','n_wins', 'win_percentage','cumulative_return','sortino','max_drawdown','sharpe', 'trade_dur_avg']

aval_metrics_rank_dic = {'n_trades':'max','n_wins': 'max' ,'n_losses':'max','win_percentage':'max','cumulative_return':'max', 
                 'sortino':'max','max_drawdown':'min', 'sharpe':'max', 'trade_dur_avg':'max', 'trade_dur_min':'max',
                 'trade_dur_max':'max','buy_hold':'max'}
## See agentperform.py -> results dictionary for options





## RL Enviornment Generation

In [5]:
env_data = {}
env = {}

for stock, file in stock_inputs.items():
    if stock in set(trn_keys + val_keys + tst_keys):
        # Import
        df = cln.YAHOO_csv_input(file, input_url)
        data_dic = UniStockEnvDataStruct(df,price_based_on,window_size)
        env_data[stock] = data_dic
        env[stock] = ContinuousOHLCVEnv(name=stock,
                                        ohlcv_data = env_data[stock]['rw_raw_env'] ,
                                        stock_price_data= env_data[stock]['rw_raw_price_env'],
                                        commission_rate=0.005)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")

# Workbench Setup

## Parameters

In [6]:
hyperparam_discovery = False                 # Limited Exploratory Hyperparmater Discover for RL Agent
n_prediction = 5                            # Number of price predictions in the Future by TimesNet (Required for RL agent's input layer)
train_tn_model = False                      # Need to Train TimesNet Preprocessing model (processed every cycle)
import_tn_model = False                     # Importing TimesNet Preprocessing model (processed every cycle)
import_tn_csvs = True                       # Use Imported CSVs from Preprocessing model (no processing, straight to RL agent)
tn_path = pwd + '/gen_data/timesnet/'       # Location of
no_tn_preprocessing = False                 #

timesnet = TimesNetProcessing(env_data)

env_mod_func_dic = {'train': timesnet.process,
               'import':timesnet.process,
               'csv':timesnet.csv_process,
               'none': None}

if  train_tn_model ^ import_tn_model ^ import_tn_csvs ^ no_tn_preprocessing:
    if train_tn_model or import_tn_model:
        env_mod_func = env_mod_func_dic['train']
    if import_tn_csvs:
        env_mod_func = env_mod_func_dic['csv']
    if no_tn_preprocessing:
        env_mod_func = env_mod_func_dic['none']
else:
    raise ValueError("Only one TimesNet Preprocessing Options can be selected")
        
        



## Metric Function

In [7]:
def metric_function(env):
    metric = env.step_info[-1]['New Portfolio Value'] -  env.step_info[-1]['Portfolio Value']
    return metric

# TimesNet Preprocessing

## Training

### Parameters

In [8]:
if train_tn_model:
    
    model = TimesNet(h = n_prediction,          # Forecast horizon
                    input_size = window_size,   # Length of Batches
                    batch_size = 1,             # Number of timeseries to predict
                    #futr_exog_list = remaining_columns,
                    hidden_size = 128,          # Size of embedding for embedding and encoders,
                    dropout = 0.40,             # Dropout for embeddings
                    conv_hidden_size = 3,       # Channels for the inception block
                    top_k = 5,                  # Top num of periods from FFT considered
                    num_kernels = 13,           # number of kernels for the inception block
                    encoder_layers = 3,         # num of encoders
                    max_steps = 1000,           # of training steps
                    early_stop_patience_steps = 10, #early stoppage on validation
                    val_check_steps = 100,      # Val check every X steps,
                    windows_batch_size = 150,   # Number of windows in training epoch,
                    num_workers_loader = 7,
                    learning_rate = 0.0003,
                    random_seed = RANDOM_SEED)

### Code Execution

In [9]:
if train_tn_model:
  nf = NeuralForecast(models=[model], freq='d')
  results = {}
  for key in trn_keys:
    results[key] = nf.fit(df=env[key],val_size=0.2)

  nf.save(path= tn_path,
          model_index=None,
          overwrite=True,
          save_dataset=True)

## Load Model

In [10]:
if import_tn_model:
# Define the correct path
  if IN_COLAB:
    
    model_path = os.path.join(os.getcwd(), 'gen_data', 'timesnet')

    # Ensure the directory and file exist
    if os.path.exists(model_path):
        nf = NeuralForecast.load(path=model_path)
    else:
        raise FileNotFoundError(f"Model path {model_path} does not exist.")

## CSV Upload

In [11]:
if import_tn_csvs:

    if IN_COLAB:
        # Input Data Location, File Name, Stock name for labels
        csv_path = 'https://raw.githubusercontent.com/CodeBeckZero/MADDQN/main/gen_data/csvs/'

    else:
        csv_path  = pwd +'/gen_data/csvs/'

    stock_tn ={'DJI':'DJI_tn.csv',
                'NDAQ': 'NDAQ_tn.csv',
                'SP500': 'SP500_tn.csv',
                'AAPL': 'AAPL_tn.csv',
                'AMZN': 'AMZN_tn.csv',
                'GOOGL': 'GOOGL_tn.csv',
                'MSFT': 'MSFT_tn.csv',
                'FORD': 'FORD_tn.csv',
                'JNJ': 'JNJ_tn.csv',
                'NEE': 'NEE_tn.csv',
                'PFE': 'PFE_tn.csv',
                'TSLA': 'TSLA_tn.csv',
                'COKE': 'COKE_tn.csv',
                'PG': 'PG_tn.csv'}



# Exploratory Hyperparameterization

## Interval Setup

In [12]:
if hyperparam_discovery:
    # Training Inputs
    hyp_training_range = ('2007-01-01','2010-12-31')
    hyp_trn_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in hyp_training_range]

    # Validation Inputs
    hyp_validation_range = ('2013-01-01', '2014-12-31')
    hyp_val_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in hyp_validation_range]

    # Testing Inputs
    hyp_testing_range = ('2016-01-01', '2017-12-31')
    hyp_tst_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in hyp_testing_range]

    hyp_trn_idx = {}
    hyp_val_idx = {}
    hyp_tst_idx = {}

    for stock, file in stock_inputs.items():
        if stock in set(trn_keys + val_keys + tst_keys):
            if stock in trn_keys:
                hyp_trn_idx[stock] = env_data[stock].gen_rw_idxs(hyp_trn_dt_range)
            if stock in val_keys:
                hyp_val_idx[stock] = env_data[stock].gen_rw_idxs(hyp_val_dt_range)
            if stock in tst_keys:
                hyp_tst_idx[stock] = env_data[stock].gen_rw_idxs(hyp_tst_dt_range)

    display(hyp_trn_idx,hyp_val_idx,hyp_tst_idx)

## Parameter Search & Code Execution

In [13]:
if hyperparam_discovery:
    
    # For Objective function, need to create agent name before to link agent with enviornment
    agent_name = 'hyp_discovery_agent'
    agent_path = export_path + '/' + agent_name
    metric = 'val_tot_r'
    max_len_buf = np.round(hyp_trn_idx['DJI'][1] - hyp_trn_idx['DJI'][0] + window_size, -2) -10 # manual input, could be error here if 
    print(f'Max Mem Length: {max_len_buf}')
           
    def objective(trial):
    
        # Define the hyperparameters to search over
        
        ## NN hyperparameters
        sug_hidden_layers = trial.suggest_int('hidden_layers', low=1, high=3)
        sug_hidden_size = trial.suggest_int('hidden_size', low=64, high=512, step=64)
        sug_update_q_freq = trial.suggest_int('update_q_freq',low=1,high=5)
        sug_update_tgt_freq = trial.suggest_int('update_tgt_freq',low=5,high=15)
        
        ## Activation Function Passing
        activation_functions = {
        'LRELUd': nn.LeakyReLU(),
        'LRELUs02': nn.LeakyReLU(negative_slope=0.2),
        'GELU': nn.GELU(),
        'TANH': nn.Tanh(),
        'SELU':nn.SELU(),
        'SILU': nn.SiLU()
        }
        sug_activation_function_name = trial.suggest_categorical('activation_function', list(activation_functions.keys()))
        sug_activation_function = activation_functions[sug_activation_function_name]
        
        """
        ## Reward Function Passing
        reward_functions = {
        'profit': future_profit(None,5),
        'risk': risk_reward(None,5),
        }
        sug_reward_function_name = trial.suggest_categorical('reward_function', list(reward_functions.keys()))
        sug_reward_function = reward_functions[sug_reward_function_name]
        """
        ## Optimizer hyperparameters
        sug_opt_lre = trial.suggest_categorical('opt_lre',[0.0001,0.0005,0.001, 0.005, 0.01, 0.05, 0.1])
        sug_gamma = trial.suggest_float('gamma',low=0.90,high=0.99,step=0.01)
        ## Memory Replay hyperparameters
        sug_buffer_size = trial.suggest_int('buffer_size',low=100,high=max_len_buf,step=10)
        sug_batch_size = trial.suggest_int('batch_size',low=10,high=sug_buffer_size,step=5)
        
        # Saving Setup
        ## Current Parameter Values:
        cur_n_fcl = trial.params['hidden_layers']
        cur_fcl_size = trial.params['hidden_size']
        cur_q_freq = trial.params['update_q_freq']
        cur_tgt_freq = trial.params['update_tgt_freq']
        cur_act_func = trial.params['activation_function']
        #cur_rwd_func = trial.params['reward_function']
        cur_lre = decimal_to_text(trial.params['opt_lre'])
        cur_buf_size = trial.params['buffer_size']
        cur_bat_size = trial.params['batch_size']
        
        ## Create Notation for Hyperparameter Setup    
        test_name = (f'{cur_n_fcl}FC{cur_fcl_size}_{cur_act_func}_' +
                    f'BT{cur_bat_size}BF{cur_buf_size}_Q{cur_q_freq}_' +
                    f'TGT{cur_tgt_freq}_LR{cur_lre}')
        
        ## Create Dir to save results
        test_name_path =  agent_path + '/' + test_name 
        if not os.path.exists(test_name_path):
            os.makedirs(test_name_path)
            print(f"Directory '{test_name_path}' created successfully.")
        else:
            print(f"Directory '{test_name_path}' already exists.")
        
        # Create Agent with hyperparameters  
        best_agent = DdqnAgent(name=agent_name,
                            environment=None,
                            reward_function = future_profit,
                            reward_params = {'n':5},
                            env_state_mod_func = env_mod_func,     
                            input_size= 11,
                            hidden_size= sug_hidden_size, 
                            output_size=3, 
                            activation_function = sug_activation_function,
                            num_hidden_layers = sug_hidden_layers,                  
                            buffer_size= sug_buffer_size, 
                            batch_size = sug_batch_size,
                            alpha = sug_opt_lre,
                            gamma = sug_gamma,
                            opt_wgt_dcy = 0.01,
                            dropout_rate = 0.25,                
                            device = device)
        
        # Training Model
        for key, rl_env in env.items():
            
            if key in trn_keys:
                rl_env.add_agent(agent_name)
                rl_env.set_decision_agent(agent_name)
                if import_tn_csvs:
                    timesnet.upload_csv(f'{csv_path}/{stock_tn[key]}')    #Requires outside variable         
                best_agent.set_environment(rl_env)
                best_agent.train(start_idx=hyp_trn_idx[key][0],
                            end_idx=hyp_trn_idx[key][1],
                            training_episodes= 1,
                            epsilon_decya_func= linear_decay,
                            initial_epsilon= 0.9,
                            final_epsilon= 0.1,
                            update_q_freq= sug_update_q_freq,
                            update_tgt_freq= sug_update_tgt_freq,
                            save_path = export_path,
                            val_start_idx = hyp_val_idx[key][0],
                            val_end_idx = hyp_val_idx[key][1],
                            metric_func= metric_function,
                            min_training_episodes = 1, 
                            early_stop = True,
                            stop_metric = metric,
                            stop_patience = 3,
                            stop_delta = 0.001)
                rl_env.remove_agent(agent_name)

        # Test Model
        
        scores = []
        for key, rl_env in env.items():
        
            if key in tst_keys:
                rl_env.add_agent(agent_name)
                rl_env.set_decision_agent(agent_name)
                if import_tn_csvs:
                    timesnet.upload_csv(f'{csv_path}/{stock_tn[key]}')    #Requires outside variable              
                best_agent.set_environment(rl_env)              
                best_agent.test(start_idx = hyp_tst_idx[key][0],
                            end_idx = hyp_tst_idx[key][1],
                            metric_func= metric_function, 
                            testing_episodes=1)
                rl_env.remove_agent(agent_name)

                ## Save Test Metric Result(s) into 
                ddqn_tst = best_agent.get_testing_episodic_data()
                score = ddqn_tst['tot_r'].mean()
                scores.append(score)
        
                ## Export Test data
                a = rl_env.get_step_data()
                b = best_agent.get_step_data()
                combined_df = pd.concat([a,b],axis=1)
                tst_df_file_name  = f'TST-{key}' + test_name + '.csv'
                trn_df_save_path = test_name_path + '/' + tst_df_file_name
                combined_df.to_csv(trn_df_save_path)

                ## Generate Trading Graphic
                tst_graph_file_name = trn_df_save_path[:-4] + '.png'
                agentperform.agent_stock_performance(env[key].stock_price_data[hyp_tst_idx[key][0]:hyp_tst_idx[key][1]][:,-1,0], # Selecting all batches, last price of window, closing price
                                                    combined_df['Env Action'].to_numpy(),
                                                    key,
                                                    best_agent.get_name(),
                                                    display_graph=False,
                                                    save_graphic=True,
                                                    path_file=tst_graph_file_name)

        mean = np.mean(scores)
        return mean

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=100)

    print("Best value: ", study.best_value)
    print("Best params: ", study.best_params)
        
    
        

    
    

# Agent Setup

## Parameters

In [14]:
# Agent Type Setup
agent_classes = {'profit': DdqnAgent,
                 'risk': DdqnAgent}

# Mul
agent_setup = {'profit': ['profit'],
                 'risk': ['risk']}
                 #final': ['profit', 'risk'], for multi agent key is decision agent
                 #'macro': 'macro', 
                 #'opt': ['profit', 'risk', 'macro']}
                 
agent_name_list = list(agent_classes.keys())

agent_params = {
    agent_name_list[0]:{
        'name': agent_name_list[0],
        'environment': None,
        'reward_function': future_profit,
        'reward_params': {'n':5},
        'env_state_mod_func': env_mod_func,
        'input_size': 11,
        'hidden_size': 256,
        'output_size':3,
        'activation_function': nn.Tanh(),
        'num_hidden_layers': 2,
        'buffer_size': 150,
        'batch_size': 30,
        'alpha': 0.005,
        'gamma':0.97,
        'opt_wgt_dcy': 0.01,
        'dropout_rate': 0.25,
        'device': device
    },
    agent_name_list[1]:{
        'name': agent_name_list[1],
        'environment': None,
        'reward_function': risk_reward,
        'reward_params': {'n':5},
        'env_state_mod_func': env_mod_func,
        'input_size': 11,
        'hidden_size': 256,
        'output_size':3,
        'activation_function': nn.Tanh(),
        'num_hidden_layers': 2,
        'buffer_size': 150,
        'batch_size': 30,
        'alpha': 0.005,
        'gamma':0.97,
        'opt_wgt_dcy': 0.01,
        'dropout_rate': 0.25,
        'device': device
    }}


## Agent Generation

In [15]:
agents_dic = {}

for agent_name, agent_class in agent_classes.items():
            selected_agent = agent_class(**agent_params[agent_name])
            agents_dic[agent_name] = selected_agent
            


# Agent Training

In [16]:
# Training Inputs
training_range = ('2007-01-01','2009-12-31')
trn_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in training_range]

# Validation Inputs
validation_range = ('2011-01-01', '2011-12-31')
val_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in validation_range]

# Testing Inputs
testing_range = ('2012-01-01', '2012-12-31')
tst_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in testing_range]

trn_idx = {}
val_idx = {}
tst_idx = {}

for stock, file in stock_inputs.items():
    if stock in set(trn_keys + val_keys + tst_keys):
        if stock in trn_keys:
            trn_idx[stock] = env_data[stock].gen_rw_idxs(trn_dt_range)
        if stock in val_keys:
            val_idx[stock] = env_data[stock].gen_rw_idxs(val_dt_range)
        if stock in tst_keys:
            tst_idx[stock] = env_data[stock].gen_rw_idxs(tst_dt_range)


In [17]:
training_params = {'training_episodes': 10,
                   'epsilon_decya_func': linear_decay,
                   'initial_epsilon': 0.9,
                   'final_epsilon': 0.1,
                   'update_q_freq': 1,
                   'update_tgt_freq': 15,
                   'save_path': export_path,
                   'metric_func': metric_function,
                   'min_training_episodes': 2,
                   'early_stop': True,
                   'stop_metric': 'Q1_loss',
                   'stop_patience': 2,
                   'stop_delta': 0.001}

In [18]:
for decision_agent, agents_in_setup in agent_setup.items():
    for key in trn_keys:
        rl_env = env[key]
       
        # Using Csvs
        if import_tn_csvs:
            timesnet.upload_csv(f'{csv_path}/{stock_tn[key]}')
        
        # Setup agents with environment  
        for agent in agents_in_setup:
            rl_env.add_agent(agent)
            agents_dic[agent].set_environment(rl_env)
        rl_env.set_decision_agent(decision_agent)
        
        # Train Sub-subagents
        for agent in agents_in_setup:
            if agent is not decision_agent:
                agents_dic[agent].train(start_idx=trn_idx[key][0],
                                        end_idx=trn_idx[key][1],
                                        val_start_idx= val_idx[key][0],
                                        val_end_idx=val_idx[key][1],                                    
                                        **training_params)
        
        # Train Decision Agent
        agents_dic[decision_agent].train(start_idx=trn_idx[key][0],
                                    end_idx=trn_idx[key][1],
                                    val_start_idx= val_idx[key][0],
                                    val_end_idx=val_idx[key][1],                                    
                                    **training_params)
        
        # Remove Agent
        for agent in agents_in_setup:
            rl_env.remove_agent(agent)
            agents_dic[agent].set_environment(None)
        """
        ## Export Training Session Data to CSV
        ddqn_trn = best_ddqn_agent.get_training_episodic_data()
        ddqn_trn.to_csv('test.csv')
        display(ddqn_trn)
        env[key].remove_agent(best_ddqn_agent.get_name())"""

DJI ENV: Agent profit added
DJI ENV: Agent profit assigned as decision agent

profit: Training Initialized on DJI[0:728] -> Validation on DJI[1008:1232]
profit: EP 5 of 10 Finished -> ΔQ1 = 1.02, ΔQ2 = 0.68 | ∑R = 16.05, μR = 0.02 σR = 2.36 | Loss: Q1_loss = 1.02 -> EarlyStopping counter: 2 out of 2                                                                                                       
profit: Early Stoppage on EP 5 -> Best QNet Loaded from EP 3
profit: Training finished on DJI[0:728]

DJI ENV: Agent profit removed
NDAQ ENV: Agent profit added
NDAQ ENV: Agent profit assigned as decision agent

profit: Training Initialized on NDAQ[0:728] -> Validation on NDAQ[1008:1232]
profit: EP 4 of 10 Finished -> ΔQ1 = 0.89, ΔQ2 = 1.18 | ∑R = 33.92, μR = 0.05 σR = 2.80 | Loss: Q1_loss = 0.89 -> EarlyStopping counter: 2 out of 2                                                                                                       
profit: Early Stoppage on EP 4 -> Best QNet Loaded from 

In [19]:
display()

# Agent Testing

## Parameters

In [20]:
testing_params = {DdqnAgent: {
                   'metric_func': metric_function,
                   'metric_func_arg': {},
                   'testing_episodes':3},
                  RandomAgent: {
                   'metric_func': metric_function,
                   'metric_func_arg': {},
                   'testing_episodes':100}}


## Execution

In [21]:
result_dic_struct = ['stock','agent','test_interval','test_num']
results = {}

for key in tst_keys:
    
    # Init Record[Stock]
    results[key] = {}
    test_key = f'{tst_idx[key][0]}:{tst_idx[key][1]}'
    stock_price_data = env_data[key]['rw_raw_price_env'][tst_idx[key][0]:tst_idx[key][1],-1,0]
    rl_env = env[key]

    for decision_agent, agents_in_setup in agent_setup.items():
        
        # Init Record[Stock][Agent]
        results[key][decision_agent] = {}
        
        # Init Record[Stock][Agent][test_interval]
        results[key][decision_agent][test_key] = {}   # Different Test Keys will need loop
        
        # Using Csvs       
        if import_tn_csvs:
            timesnet.upload_csv(f'{csv_path}/{stock_tn[key]}')
        
        # Setup agents with environment  
        for agent in set([decision_agent] + agents_in_setup):
            rl_env.add_agent(agent)
            agents_dic[agent].set_environment(rl_env)
        rl_env.set_decision_agent(decision_agent)
               
        # Test Decision Agent
        params = testing_params[agent_classes[decision_agent]]
        agents_dic[decision_agent].test(start_idx=tst_idx[key][0],
                                        end_idx=tst_idx[key][1],
                                        **params)
        test_results = agents_dic[decision_agent].get_testing_episodic_data()
        trade_actions_per_test = test_results['tst_actions']
               
        for idx, action_set in enumerate(trade_actions_per_test):
            test_metrics = agentperform.agent_stock_performance(stock_price_ts=np.array(stock_price_data),
                                                                trade_ts=np.array(action_set),
                                                                stock_name=key,
                                                                agent_name=decision_agent,
                                                                display_graph=False, 
                                                                save_graphic= False,
                                                                path_file = None)
            del test_metrics['stock']
            del test_metrics['agent_name']                                                                     
            results[key][decision_agent][test_key][idx] = test_metrics
        
        # Remove Agent
        for agent in set([decision_agent] + agents_in_setup):
            rl_env.remove_agent(agent)
            agents_dic[agent].set_environment(None)

display(results)

AAPL ENV: Agent profit added
AAPL ENV: Agent profit assigned as decision agent

profit: Testing Initialized on AAPL[1260:1482]
profit - AAPL[1260:1482] - Testing Finished - EP - 3 of 3-> ∑R = 550.63, μR = 2.48, σR = 1560.83
profit: Testing Complete on AAPL[1260:1482]

AAPL ENV: Agent profit removed
AAPL ENV: Agent risk added
AAPL ENV: Agent risk assigned as decision agent

risk: Testing Initialized on AAPL[1260:1482]
risk - AAPL[1260:1482] - Testing Finished - EP - 3 of 3-> ∑R = 0.00, μR = 0.00, σR = 0.00
risk: Testing Complete on AAPL[1260:1482]

AAPL ENV: Agent risk removed
AMZN ENV: Agent profit added
AMZN ENV: Agent profit assigned as decision agent

profit: Testing Initialized on AMZN[1260:1482]
profit - AMZN[1260:1482] - Testing Finished - EP - 3 of 3-> ∑R = 12657.01, μR = 57.01, σR = 1765.93
profit: Testing Complete on AMZN[1260:1482]

AMZN ENV: Agent profit removed
AMZN ENV: Agent risk added
AMZN ENV: Agent risk assigned as decision agent

risk: Testing Initialized on AMZN[1260

{'AAPL': {'profit': {'1260:1482': {0: {'n_trades': 12,
     'n_wins': 5,
     'n_losses': 7,
     'win_percentage': 41.66666666666667,
     'cumulative_return': 1.1337761512922764,
     'sortino': 7.576785390926368,
     'max_drawdown': -8.781273057841886,
     'sharpe': 3.041354017056678,
     'trade_dur_avg': 12.333333333333334,
     'trade_dur_min': 1,
     'trade_dur_max': 48,
     'buy_hold': 1.0329171396140748},
    1: {'n_trades': 12,
     'n_wins': 5,
     'n_losses': 7,
     'win_percentage': 41.66666666666667,
     'cumulative_return': 1.1337761512922764,
     'sortino': 7.576785390926368,
     'max_drawdown': -8.781273057841886,
     'sharpe': 3.041354017056678,
     'trade_dur_avg': 12.333333333333334,
     'trade_dur_min': 1,
     'trade_dur_max': 48,
     'buy_hold': 1.0329171396140748},
    2: {'n_trades': 12,
     'n_wins': 5,
     'n_losses': 7,
     'win_percentage': 41.66666666666667,
     'cumulative_return': 1.1337761512922764,
     'sortino': 7.576785390926368,
  

# Aggreating Test Results

In [22]:
aggerate_results = {}
for agent in agent_name_list:
    aggerate_results[agent] = {}
    for stock in tst_keys:
        aggerate_results[agent][stock] = {}
        test_key = f'{tst_idx[stock][0]}:{tst_idx[stock][1]}'
        aggerate_results[agent][stock][test_key] = {}
        values = np.empty((0,len(metrics)))
        for test_num in range(testing_params[agent_classes[agent]]['testing_episodes']):

            values_array = [results[stock][agent][test_key][test_num][key] for key in metrics]
            current_values = np.array(values_array)
            values = np.vstack((values,current_values))

            means_for_metrics = np.mean(values, axis=0)
            std_for_metrics = np.std(values, axis=0)
        
        for idx,metric in enumerate(metrics):
            aggerate_results[agent][stock][test_key][metric] = (means_for_metrics[idx],std_for_metrics[idx])

summarized_aggerate_results = {}

for metric in metrics:
    model_list = []
    dataset_name = []
    scores = []
    for agent in aggerate_results.keys():
        
        model_list.append(agent)
        score_list = []
        for stock in aggerate_results[agent].keys():
            for test in aggerate_results[agent][stock].keys():
                run_name = stock + "-" + test
                if run_name not in dataset_name:
                    dataset_name.append(run_name)
                score = aggerate_results[agent][stock][test][metric][0]
                score_list.append(np.round(score,2))
        scores.append(score_list)

    score_array = np.array(scores).T

    df = pd.DataFrame(score_array,columns=model_list)
    df['dataset'] = dataset_name

    column_order = ['dataset'] + [col for col in df.columns if col != 'dataset']
    df = df[column_order]

    model_means = list(zip(model_list,df[model_list].mean()))

    summarized_aggerate_results[metric] = (df, model_means)


display(summarized_aggerate_results)


{'n_trades': (            dataset  profit  risk
  0    AAPL-1260:1482    12.0   0.0
  1    AMZN-1260:1482    15.0   0.0
  2   GOOGL-1260:1482    18.0   0.0
  3    MSFT-1260:1482    18.0   0.0
  4    FORD-1260:1482    23.0   0.0
  5     JNJ-1260:1482    15.0   0.0
  6     NEE-1260:1482    19.0   0.0
  7     PFE-1260:1482    18.0   0.0
  8      TSLA-382:604    23.0   0.0
  9    COKE-1260:1482    25.0   0.0
  10     PG-1260:1482    15.0   0.0,
  [('profit', 18.272727272727273), ('risk', 0.0)]),
 'n_wins': (            dataset  profit  risk
  0    AAPL-1260:1482     5.0   0.0
  1    AMZN-1260:1482     8.0   0.0
  2   GOOGL-1260:1482     7.0   0.0
  3    MSFT-1260:1482     6.0   0.0
  4    FORD-1260:1482    12.0   0.0
  5     JNJ-1260:1482     6.0   0.0
  6     NEE-1260:1482     8.0   0.0
  7     PFE-1260:1482    12.0   0.0
  8      TSLA-382:604     8.0   0.0
  9    COKE-1260:1482    10.0   0.0
  10     PG-1260:1482     6.0   0.0,
  [('profit', 8.0), ('risk', 0.0)]),
 'win_percentage': (   

# Significance Testing

In [23]:
from utilities import prob_evaluate

for metric in metrics:
    test = prob_evaluate.generate_rank_array_from_dataframe(summarized_aggerate_results[metric][0],
                                                            model_list,equal_rank_behav="mean",
                                                            rank_order=aval_metrics_rank_dic[metric])
    display(test)
    stat, critical_f_value, reject_null_hypo = prob_evaluate.iman_davenport_test(test,0.95)
    display(stat, critical_f_value, reject_null_hypo)

    results1 = prob_evaluate.nemenyi_test(test,0.95,model_list)
    display(results1)

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]])

  iman_davenport_stat = ((N-1)*chi_square)/(N*(k-1)-chi_square)


inf

4.9646027437307145

True

[(('profit', 'risk'), 1.0, 0.5909622353724167, True)]

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]])

  iman_davenport_stat = ((N-1)*chi_square)/(N*(k-1)-chi_square)


inf

4.9646027437307145

True

[(('profit', 'risk'), 1.0, 0.5909622353724167, True)]

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]])

  iman_davenport_stat = ((N-1)*chi_square)/(N*(k-1)-chi_square)


inf

4.9646027437307145

True

[(('profit', 'risk'), 1.0, 0.5909622353724167, True)]

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]])

  iman_davenport_stat = ((N-1)*chi_square)/(N*(k-1)-chi_square)


inf

4.9646027437307145

True

[(('profit', 'risk'), 1.0, 0.5909622353724167, True)]

array([[1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2],
       [2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1]])

2.6041666666666523

4.9646027437307145

False

[(('profit', 'risk'), 0.4545454545454546, 0.5909622353724167, False)]

array([[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

  iman_davenport_stat = ((N-1)*chi_square)/(N*(k-1)-chi_square)


inf

4.9646027437307145

True

[(('profit', 'risk'), 1.0, 0.5909622353724167, True)]

array([[1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2],
       [2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1]])

2.6041666666666523

4.9646027437307145

False

[(('profit', 'risk'), 0.4545454545454546, 0.5909622353724167, False)]

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]])

  iman_davenport_stat = ((N-1)*chi_square)/(N*(k-1)-chi_square)


inf

4.9646027437307145

True

[(('profit', 'risk'), 1.0, 0.5909622353724167, True)]

# ?Final Hypertuning?

In [24]:
# For Objective function, need to create agent name before to link agent with enviornment
agent_name = 'REWARD_DDQN_AGENT'
agent_path = export_path + '/' + agent_name
metric = 'val_ror'

for key, env in environments.items():
  
        env.add_agent(agent_name)
        env.set_decision_agent(agent_name)

def objective(trial):
    
    # Define the hyperparameters to search over
    
    ## NN hyperparameters
    sug_hidden_layers = trial.suggest_int('hidden_layers', low=1, high=3)
    sug_hidden_size = trial.suggest_int('hidden_size', low=256, high=1280, step=64)
    sug_update_q_freq = trial.suggest_int('update_q_freq',low=1,high=5)
    sug_update_tgt_freq = trial.suggest_int('update_tgt_freq',low=10,high=50,step=10)
    
    ## Function Passing
    activation_functions = {
    'RELU': nn.ReLU(),
    'LRELU': nn.LeakyReLU(),
    'GELU': nn.GELU(),
    'TANH': nn.Tanh()
    }
    sug_activation_function_name = trial.suggest_categorical('activation_function', list(activation_functions.keys()))
    sug_activation_function = activation_functions[sug_activation_function_name]
    
    ## Optimizer hyperparameters
    sug_opt_lre = trial.suggest_float('opt_lre',0.0001,0.1,log=True)
    ## Memory Replay hyperparameters
    sug_buffer_size = trial.suggest_int('buffer_size',low=100,high=1500,step=100)
    sug_batch_size = trial.suggest_int('batch_size',low=10,high=150,step=10)

    # Saving Setup
    ## Current Parameter Values:
    cur_n_fcl = trial.params['hidden_layers']
    cur_fcl_size = trial.params['hidden_size']
    cur_q_freq = trial.params['update_q_freq']
    cur_tgt_freq = trial.params['update_tgt_freq']
    cur_act_func = trial.params['activation_function']
    cur_lre = decimal_to_text(trial.params['opt_lre'])
    cur_buf_size = trial.params['buffer_size']
    cur_bat_size = trial.params['batch_size']
    
    ## Create Notation for Hyperparameter Setup    
    test_name = (f'{cur_n_fcl}FC{cur_fcl_size}_{cur_act_func}_' +
                f'BT{cur_bat_size}BF{cur_buf_size}_Q{cur_q_freq}_' +
                f'TGT{cur_tgt_freq}_LR{cur_lre}')
    
    ## Create Dir to save results
    test_name_path =  agent_path + '/' + test_name 
    if not os.path.exists(test_name_path):
        os.makedirs(test_name_path)
        print(f"Directory '{test_name_path}' created successfully.")
    else:
        print(f"Directory '{test_name_path}' already exists.")    
    
    # Create Agent with hyperparameters  
    best_ddqn_agent = DdqnAgent(name=agent_name,
                        environment=None,
                        reward_function = future_profit,
                        reward_params = {'n':5},
                        env_state_mod_func = flatten_state,     
                        input_size= 13,
                        hidden_size= sug_hidden_size, 
                        output_size=3, 
                        activation_function = sug_activation_function,
                        num_hidden_layers = sug_hidden_layers,                  
                        buffer_size= sug_buffer_size, 
                        batch_size = sug_batch_size,
                        opt_lr= sug_opt_lre,
                        alpha = ALPHA,
                        gamma = GAMMA,
                        opt_wgt_dcy = 0.0,
                        dropout_rate = 0.25,                
                        device = device)

    # Training Model
    for key, env in environments.items():
        
        if key in trn_keys:
            
            best_ddqn_agent.set_environment(env)
            best_ddqn_agent.train(start_idx=training_range[0],
                        end_idx=training_range[1],
                        training_episodes= 100,
                        epsilon_decya_func= linear_decay,
                        initial_epsilon= 0.9,
                        final_epsilon= 0.1,
                        update_q_freq= sug_update_q_freq,
                        update_tgt_freq= sug_update_tgt_freq,
                        save_path = export_path,
                        val_start_idx = validation_range[0],
                        val_end_idx = validation_range[1],
                        early_stop = True,
                        stop_metric = metric,
                        stop_patience = 20,
                        stop_delta = 0.001)
        
            ## Export Training Session Data to CSV
            ddqn_trn = best_ddqn_agent.get_training_episodic_data()
            trn_df_file_name  = f'TRN-{key}' + test_name + '.csv'
            trn_df_save_path = test_name_path + '/' + trn_df_file_name
            ddqn_trn.to_csv(trn_df_save_path)
        
        
    
    # Test Model
    
    
    scores = []
    for key, env in environments.items():
    
        if key in tst_keys:
            
            best_ddqn_agent.set_environment(env)              
            best_ddqn_agent.test(start_idx = testing_range[0],
                        end_idx = testing_range[1], 
                        testing_episodes=1)

            ## Save Test Metric Result(s) into 
            ddqn_tst = best_ddqn_agent.get_testing_episodic_data()
            score = ddqn_tst['Total Reward'].mean()
            scores.append(score)
    
            ## Export Test data
            a = env.get_step_data()
            b = best_ddqn_agent.get_step_data()
            combined_df = pd.concat([a,b],axis=1)
            tst_df_file_name  = f'TST-{key}' + test_name + '.csv'
            trn_df_save_path = test_name_path + '/' + tst_df_file_name
            combined_df.to_csv(trn_df_save_path)

            ## Generate Trading Graphic
            tst_graph_file_name = trn_df_save_path[:-4] + '.png'
            agentperform.agent_stock_performance(env.stock_price_data[testing_range[0]:testing_range[1]],
                                                combined_df['Env Action'].to_numpy(),
                                                key,
                                                best_ddqn_agent.get_name(),
                                                display_graph=True,
                                                save_graphic=True,
                                                path_file=tst_graph_file_name)

    mean = np.mean(scores)
    return mean

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print("Best value: ", study.best_value)
print("Best params: ", study.best_params)


NameError: name 'environments' is not defined