# Environment Setup

## Google Colab Installation

### Install Python Environment

In [1]:
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Code specific to Google Colab
    print("Running in Google Colab")

    # Additional setup commands for Colab
    !pip install neuralforecast
    !pip install gymnasium
    !pip install QuantStats
else:
    # Code for other environments (e.g., VS Code)
    print("Running in another environment (e.g., VS Code)")

Running in another environment (e.g., VS Code)


### Install RL Libraries

In [2]:
if IN_COLAB:
    # Retrive required files
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/environments/stockenv.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/cleandata.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/data.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/epsilon_decay.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/agentperform.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/prob_evaluate.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/agents/ddqn.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/agents/random.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/agents/baseagent.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/rewards/stockmarket.py            
    # Move all directories and files from content/raw.githubusercontent.com to content/
    !mv /content/raw.githubusercontent.com/* /content/

    # Delete the raw.githubusercontent.com directory
    !rm -rf /content/raw.githubusercontent.com

# Activate Python Libraries & Random Seed

In [3]:
import os
import sys
import torch
import optuna
import random
import numpy as np
import pandas as pd
import torch.nn as nn
import utilities.agentperform as agentperform
import utilities.cleandata as cln 
from utilities.epsilon_decay import linear_decay
from utilities.data import UniStockEnvDataStruct, TimesNetProcessing, ModifyDDQNAgentState
from utilities import prob_evaluate
from agents.ddqn import DdqnAgent
from agents.random import RandomAgent
from rewards.stockmarket import future_profit, risk_reward, zero_reward
from environments.stockenv import ContinuousOHLCVEnv
from datetime import datetime
from neuralforecast.core import NeuralForecast
from neuralforecast.models import TimesNet
from neuralforecast.losses.numpy import mae, mse
import logging
from sklearn import preprocessing

# 
logging.getLogger("pytorch_lightning.utilities.rank_zero").addHandler(logging.NullHandler())
logging.getLogger("pytorch_lightning.accelerators.cuda").addHandler(logging.NullHandler())
os.environ['NIXTLA_ID_AS_COL'] = '1' # Prevent Warning 

def set_seed(seed):
    """Set seed for reproducibility."""
    # Python random module
    random.seed(seed)

    # NumPy
    np.random.seed(seed)

    # PyTorch
    torch.manual_seed(seed)

    # If you are using CUDA
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
        # Additional settings to force determinism in your operations:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False



# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the current device
    device = torch.cuda.current_device()
    print(f"GPU Name: {torch.cuda.get_device_name(device)}")
else:
    device = 'cpu'
    print("CUDA (GPU support) is not available. PyTorch is running on CPU.")


def decimal_to_text(decimal_number):
    # Remove the decimal point and convert to integer
    integer_part = int(decimal_number * 1000)
    # Convert the integer to text
    text_representation = str(integer_part)
    return text_representation

CUDA (GPU support) is not available. PyTorch is running on CPU.


# RL Environment Setup

## Parameters & CSV Locations

In [4]:
RANDOM_SEED = 42
set_seed(RANDOM_SEED)



if not IN_COLAB:
    pwd = "C:/programming/MADDQN"
    sys.path.append(pwd)
    
    # Output Path Location for CSV export
    export_path = pwd + "/output_data/test001/"

# Input Data Location, File Name, Stock name for labels
input_url = 'https://raw.githubusercontent.com/CodeBeckZero/MADDQN/main/input_data'

stock_inputs ={'DJI':'^DJI_daily.csv',
               'NDAQ': '^IXIC_daily.csv',
               'SP500': '^SPX_daily.csv',
               'AAPL': 'AAPL_daily.csv',
               'AMZN': 'AMZN_daily.csv',
               'GOOGL': 'GOOGL_daily.csv',
               'MSFT': 'MSFT_daily.csv',
               'SINE': 'sine_wave_daily.csv',
               'FORD': 'F_daily.csv',
               'JNJ': 'JNJ_daily.csv',
               'NEE': 'NEE_daily.csv',
               'PFE': 'PFE_daily.csv',
               'TSLA': 'TSLA_daily.csv',
               'COKE': 'COKE_daily.csv',
               'PG': 'PG_daily.csv'}

# Training Inputs
trn_keys = ['DJI']#,'NDAQ','SP500']

# Validation Inputs
val_keys = trn_keys

# Testing Inputs
tst_keys = ['AAPL','AMZN','GOOGL']#,'MSFT','FORD','JNJ','NEE','PFE','TSLA','COKE','PG']

window_size = 28 # Needs to match the size Timesnet is trained on
price_based_on = 'close'
columns = ['open','high','low','close','volume']


# Metrics Interested in
metrics = ['n_trades','n_wins', 'win_percentage','cumulative_return','sortino','max_drawdown','sharpe', 'trade_dur_avg']

aval_metrics_rank_dic = {'n_trades':'max','n_wins': 'max' ,'n_losses':'max','win_percentage':'max','cumulative_return':'max', 
                 'sortino':'max','max_drawdown':'min', 'sharpe':'max', 'trade_dur_avg':'max', 'trade_dur_min':'max',
                 'trade_dur_max':'max','buy_hold':'max'}
## See agentperform.py -> results dictionary for options

env_mod_parms = {'columns': columns, 'scaling_type': 'col', 'scaler_func':preprocessing.StandardScaler()}



## RL Enviornment Generation

In [5]:
env_data = {}
env = {}


for stock, file in stock_inputs.items():
    if stock in set(trn_keys + val_keys + tst_keys):
        # Import
        df = cln.YAHOO_csv_input(file, input_url)
        data_dic = UniStockEnvDataStruct(df,columns,price_based_on,window_size)
        env_data[stock] = data_dic
        env[stock] = ContinuousOHLCVEnv(name=stock,
                                        ohlcv_data = env_data[stock]['rw_raw_env'] ,
                                        stock_price_data= env_data[stock]['rw_raw_price_env'],
                                        commission_rate=0.005)

# Workbench Setup

## Parameters

In [6]:
## Create Directory
case_name = '/test000'
save_path_root = pwd + case_name
os.makedirs(save_path_root, exist_ok=False)

# Timesnet
## Number of price predictions in the Future by TimesNet (Required for RL agent's input layer)
n_prediction = 5
## Need to Train TimesNet Preprocessing model (processed every cycle)                            
train_tn_model = False 
## Importing TimesNet Preprocessing model (processed every cycle)            
import_tn_model = False                     
tn_model_path = pwd + '/gen_data/timesnet/'
## Use Imported CSVs from Preprocessing model (no processing, straight to RL agent)
import_tn_csvs = True                     
tn_csvs_path = pwd + '/gen_data/csvs/'   
## No modifaction of environmental state before input to agent
no_tn_preprocessing = False              

# Limited Exploratory Hyperparmater Discover for single RL Agent
hyperparam_discovery = False               

# Traditional Training/Testing
## Train Agent(s)
train_agent = True                          
# Test Agent(s)
test_agent = True  

                         

In [7]:
if  not(train_tn_model ^ import_tn_model ^ import_tn_csvs ^ no_tn_preprocessing):
    raise ValueError("Only one preprocessing options can and must be selected")
        

## Metric Function

Function that generates metric from enviornment that will be used during validation phase of training or testing phase of model


In [8]:
def metric_function(env):
    metric = env.step_info[-1]['New Portfolio Value'] -  env.step_info[-1]['Portfolio Value']
    return metric

# TimesNet Preprocessing

## Training

### Parameters

In [9]:
if train_tn_model:
    
    model = TimesNet(h = n_prediction,          # Forecast horizon
                    input_size = window_size,   # Length of Batches
                    batch_size = 1,             # Number of timeseries to predict
                    #futr_exog_list = remaining_columns,
                    hidden_size = 128,          # Size of embedding for embedding and encoders,
                    dropout = 0.40,             # Dropout for embeddings
                    conv_hidden_size = 3,       # Channels for the inception block
                    top_k = 5,                  # Top num of periods from FFT considered
                    num_kernels = 13,           # number of kernels for the inception block
                    encoder_layers = 3,         # num of encoders
                    max_steps = 1000,           # of training steps
                    early_stop_patience_steps = 10, #early stoppage on validation
                    val_check_steps = 100,      # Val check every X steps,
                    windows_batch_size = 150,   # Number of windows in training epoch,
                    num_workers_loader = 7,
                    learning_rate = 0.0003,
                    random_seed = RANDOM_SEED)

### Code Execution

In [10]:
if train_tn_model:
  nf = NeuralForecast(models=[model], freq='d')
  results = {}
  for key in trn_keys:
    results[key] = nf.fit(df=env[key],val_size=0.2)

  nf.save(path= tn_model_path,
          model_index=None,
          overwrite=True,
          save_dataset=True)

## Load Model

In [11]:
if import_tn_model:
# Define the correct path
  if IN_COLAB:
    
    model_path = os.path.join(os.getcwd(), 'gen_data', 'timesnet')

    # Ensure the directory and file exist
    if os.path.exists(model_path):
        nf = NeuralForecast.load(path=model_path)
    else:
        raise FileNotFoundError(f"Model path {model_path} does not exist.")

## CSV Upload

In [12]:
if import_tn_csvs:
    env_mod_func_dic = {}
    if IN_COLAB:
        # Input Data Location, File Name, Stock name for labels
        csv_path = 'https://raw.githubusercontent.com/CodeBeckZero/MADDQN/main/gen_data/csvs/'

    else:
        csv_path  = tn_csvs_path

    stock_tn ={'DJI':'DJI_tn.csv',
                'NDAQ': 'NDAQ_tn.csv',
                'SP500': 'SP500_tn.csv',
                'AAPL': 'AAPL_tn.csv',
                'AMZN': 'AMZN_tn.csv',
                'GOOGL': 'GOOGL_tn.csv',
                'MSFT': 'MSFT_tn.csv',
                'FORD': 'FORD_tn.csv',
                'JNJ': 'JNJ_tn.csv',
                'NEE': 'NEE_tn.csv',
                'PFE': 'PFE_tn.csv',
                'TSLA': 'TSLA_tn.csv',
                'COKE': 'COKE_tn.csv',
                'PG': 'PG_tn.csv'}
    
    for stock in set(trn_keys + val_keys + tst_keys):
        import_csv_path = f'{csv_path}/{stock_tn[stock]}'
        temp_env_mod_fuc = ModifyDDQNAgentState(uni_data=env_data[stock],
                                                columns=columns,
                                                csv_import=import_tn_csvs,
                                                csv_path=import_csv_path,
                                                scaling_type='col',
                                                scaler_func=preprocessing.StandardScaler())
        env_mod_func_dic[stock] = temp_env_mod_fuc



## Direct
Current direct default is to take the rolling windows last entry and convert it to a list prior to input to agent. Agent's experience memory is currently list-based

In [13]:
if no_tn_preprocessing:
    env_mod_func_dic = {}
    for stock in set(trn_keys + val_keys + tst_keys):
        temp_env_mod_fuc = ModifyDDQNAgentState(uni_data=env_data[stock],
                                                columns=columns,
                                                csv_import= None,
                                                csv_path= None,
                                                scaling_type= None,
                                                scaler_func=None)
        env_mod_func_dic[stock] = temp_env_mod_fuc

# Exploratory Hyperparameterization

## Interval Setup

In [14]:
if hyperparam_discovery:
    # Training Inputs
    hyp_training_range = ('2007-01-01','2020-12-31')
    hyp_trn_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in hyp_training_range]

    # Validation Inputs
    hyp_validation_range = ('2021-01-01', '2021-12-31')
    hyp_val_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in hyp_validation_range]

    # Testing Inputs
    hyp_testing_range = ('2021-01-01', '2023-12-31')
    hyp_tst_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in hyp_testing_range]

    hyp_trn_idx = {}
    hyp_val_idx = {}
    hyp_tst_idx = {}

    for stock, file in stock_inputs.items():
        if stock in set(trn_keys + val_keys + tst_keys):
            if stock in trn_keys:
                hyp_trn_idx[stock] = env_data[stock].gen_rw_idxs(hyp_trn_dt_range)
            if stock in val_keys:
                hyp_val_idx[stock] = env_data[stock].gen_rw_idxs(hyp_val_dt_range)
            if stock in tst_keys:
                hyp_tst_idx[stock] = env_data[stock].gen_rw_idxs(hyp_tst_dt_range)

    display(hyp_trn_idx,hyp_val_idx,hyp_tst_idx)

## Parameter Search & Code Execution

In [15]:
if hyperparam_discovery:
    
    # For Objective function, need to create agent name before to link agent with enviornment
    agent_name = 'hyp_discovery_agent'
    agent_path = export_path + '/' + agent_name
    metric = 'val_tot_r'
    max_len_buf = np.round(hyp_trn_idx['DJI'][1] - hyp_trn_idx['DJI'][0] + window_size, -2) -10 # manual input, could be error here if 
    print(f'Max Mem Length: {max_len_buf}')
           
    def objective(trial):
    
        # Define the hyperparameters to search over
        
        ## NN hyperparameters
        sug_hidden_layers = trial.suggest_int('hidden_layers', low=1, high=3)
        sug_hidden_size = trial.suggest_int('hidden_size', low=64, high=512, step=64)
        sug_update_q_freq = trial.suggest_int('update_q_freq',low=1,high=5)
        sug_update_tgt_freq = trial.suggest_int('update_tgt_freq',low=5,high=15)
        
        ## Activation Function Passing
        activation_functions = {
        'LRELUd': nn.LeakyReLU(),
        'LRELUs02': nn.LeakyReLU(negative_slope=0.2),
        'GELU': nn.GELU(),
        'TANH': nn.Tanh(),
        'SELU':nn.SELU(),
        'SILU': nn.SiLU()
        }
        sug_activation_function_name = trial.suggest_categorical('activation_function', list(activation_functions.keys()))
        sug_activation_function = activation_functions[sug_activation_function_name]
        
        """
        ## Reward Function Passing
        reward_functions = {
        'profit': future_profit(None,5),
        'risk': risk_reward(None,5),
        }
        sug_reward_function_name = trial.suggest_categorical('reward_function', list(reward_functions.keys()))
        sug_reward_function = reward_functions[sug_reward_function_name]
        """
        ## Optimizer hyperparameters
        sug_opt_lre = trial.suggest_categorical('opt_lre',[0.0001,0.0005,0.001, 0.005, 0.01, 0.05, 0.1])
        sug_gamma = trial.suggest_float('gamma',low=0.90,high=0.99,step=0.01)
        ## Memory Replay hyperparameters
        sug_buffer_size = trial.suggest_int('buffer_size',low=100,high=max_len_buf,step=10)
        sug_batch_size = trial.suggest_int('batch_size',low=10,high=sug_buffer_size,step=5)
        
        # Saving Setup
        ## Current Parameter Values:
        cur_n_fcl = trial.params['hidden_layers']
        cur_fcl_size = trial.params['hidden_size']
        cur_q_freq = trial.params['update_q_freq']
        cur_tgt_freq = trial.params['update_tgt_freq']
        cur_act_func = trial.params['activation_function']
        #cur_rwd_func = trial.params['reward_function']
        cur_lre = decimal_to_text(trial.params['opt_lre'])
        cur_buf_size = trial.params['buffer_size']
        cur_bat_size = trial.params['batch_size']
        
        ## Create Notation for Hyperparameter Setup    
        test_name = (f'{cur_n_fcl}FC{cur_fcl_size}_{cur_act_func}_' +
                    f'BT{cur_bat_size}BF{cur_buf_size}_Q{cur_q_freq}_' +
                    f'TGT{cur_tgt_freq}_LR{cur_lre}')
        
        ## Create Dir to save results
        test_name_path =  agent_path + '/' + test_name 
        if not os.path.exists(test_name_path):
            os.makedirs(test_name_path)
            print(f"Directory '{test_name_path}' created successfully.")
        else:
            print(f"Directory '{test_name_path}' already exists.")
        
        # Create Agent with hyperparameters  
        best_agent = DdqnAgent(name=agent_name,
                            environment=None,
                            reward_function = future_profit,
                            reward_params = {'n':5},
                            env_state_mod_func = env_mod_func,     
                            input_size= 11,
                            hidden_size= sug_hidden_size, 
                            output_size=3, 
                            activation_function = sug_activation_function,
                            num_hidden_layers = sug_hidden_layers,                  
                            buffer_size= sug_buffer_size, 
                            batch_size = sug_batch_size,
                            alpha = sug_opt_lre,
                            gamma = sug_gamma,
                            opt_wgt_dcy = 0.01,
                            dropout_rate = 0.25,                
                            device = device)
        
        # Training Model
        for key, rl_env in env.items():
            
            if key in trn_keys:
                rl_env.add_agent(agent_name)
                rl_env.set_decision_agent(agent_name)
                if import_tn_csvs:
                    timesnet.upload_csv(f'{csv_path}/{stock_tn[key]}')    #Requires outside variable         
                best_agent.set_environment(rl_env)
                best_agent.train(start_idx=hyp_trn_idx[key][0],
                            end_idx=hyp_trn_idx[key][1],
                            training_episodes= 30,
                            epsilon_decya_func= linear_decay,
                            initial_epsilon= 0.9,
                            final_epsilon= 0.1,
                            update_q_freq= sug_update_q_freq,
                            update_tgt_freq= sug_update_tgt_freq,
                            save_path = export_path,
                            val_start_idx = hyp_val_idx[key][0],
                            val_end_idx = hyp_val_idx[key][1],
                            metric_func= metric_function,
                            min_training_episodes = 1, 
                            early_stop = True,
                            stop_metric = metric,
                            stop_patience = 15,
                            stop_delta = 0.001)
                rl_env.remove_agent(agent_name)

        # Test Model
        
        scores = []
        for key, rl_env in env.items():
        
            if key in tst_keys:
                rl_env.add_agent(agent_name)
                rl_env.set_decision_agent(agent_name)
                if import_tn_csvs:
                    timesnet.upload_csv(f'{csv_path}/{stock_tn[key]}')    #Requires outside variable              
                best_agent.set_environment(rl_env)              
                best_agent.test(start_idx = hyp_tst_idx[key][0],
                            end_idx = hyp_tst_idx[key][1],
                            metric_func= metric_function, 
                            testing_episodes=1)
                rl_env.remove_agent(agent_name)

                ## Save Test Metric Result(s) into 
                ddqn_tst = best_agent.get_testing_episodic_data()
                score = ddqn_tst['tot_r'].mean()
                scores.append(score)
        
                ## Export Test data
                a = rl_env.get_step_data()
                b = best_agent.get_step_data()
                combined_df = pd.concat([a,b],axis=1)
                tst_df_file_name  = f'TST-{key}' + test_name + '.csv'
                trn_df_save_path = test_name_path + '/' + tst_df_file_name
                combined_df.to_csv(trn_df_save_path)

                ## Generate Trading Graphic
                tst_graph_file_name = trn_df_save_path[:-4] + '.png'
                agentperform.agent_stock_performance(env[key].stock_price_data[hyp_tst_idx[key][0]:hyp_tst_idx[key][1]][:,-1,0], # Selecting all batches, last price of window, closing price
                                                    combined_df['Env Action'].to_numpy(),
                                                    key,
                                                    best_agent.get_name(),
                                                    display_graph=False,
                                                    save_graphic=True,
                                                    path_file=tst_graph_file_name)

        mean = np.mean(scores)
        return mean

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=100)

    print("Best value: ", study.best_value)
    print("Best params: ", study.best_params)
        
    
        

    
    

# Agent Setup

## Parameters

In [16]:
# Agent Type Setup
agent_classes = {'profit': DdqnAgent,
                 'risk': DdqnAgent,
                 'random':RandomAgent}

# Mul
agent_setup = {'profit': ['profit'],
                 'risk': ['risk'],
                 'random': ['random']}
                 #final': ['profit', 'risk'], for multi agent key is decision agent
                 #'macro': 'macro', 
                 #'opt': ['profit', 'risk', 'macro']}
                 
agent_name_list = list(agent_classes.keys())

agents_to_train = ['profit', 'risk']
agents_to_import = {'agent_name': 'path_to_model'}

agent_params = {
    agent_name_list[0]:{
        'name': agent_name_list[0],
        'environment': None,
        'reward_function': future_profit,
        'reward_params': {'n':5},
        'env_state_mod_func': None,  #Is Set in Training Loop
        'input_size': 11,
        'hidden_size': 256,
        'output_size':3,
        'activation_function': nn.ELU(),
        'num_hidden_layers': 2,
        'buffer_size': 330,
        'batch_size': 75,
        'alpha': 0.0005,
        'gamma':0.96,
        'opt_wgt_dcy': 0,
        'dropout_rate': 0.25,
        'device': device
    },
    agent_name_list[1]:{
        'name': agent_name_list[1],
        'environment': None,
        'reward_function': risk_reward,
        'reward_params': {'n':5},
        'env_state_mod_func': None, #Is Set in Training Loop
        'input_size': 11,
        'hidden_size': 256,
        'output_size':3,
        'activation_function': nn.LeakyReLU(),
        'num_hidden_layers': 2,
        'buffer_size': 330,
        'batch_size': 75,
        'alpha': 0.005,
        'gamma':0.97,
        'opt_wgt_dcy': 0.0,
        'dropout_rate': 0.25,
        'device': device
    },
        agent_name_list[2]:{
        'name': agent_name_list[2],
        'environment': None,
        'reward_function': zero_reward,
        'reward_params': {},
        }}

    

## Agent Generation

In [17]:
agents_dic = {}

for agent_name, agent_class in agent_classes.items():
            selected_agent = agent_class(**agent_params[agent_name])
            agents_dic[agent_name] = selected_agent
            


# Agent Training

In [18]:
# Training Inputs
training_range = ('2007-01-01','2010-12-31')
trn_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in training_range]

# Validation Inputs
validation_range = ('2011-01-01', '2011-12-31')
val_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in validation_range]

# Testing Inputs
testing_range = ('2011-01-01', '2013-12-31')
tst_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in testing_range]

# Scaling Inputs - **Needs to be thought about with how ModifyDDQNAgentState Class works##
scaling_range = ('2007-01-01','2010-12-31')
scale_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in scaling_range]


trn_idx = {}
val_idx = {}
tst_idx = {}
scale_idx = {}

for stock, file in stock_inputs.items():
    if stock in set(trn_keys + val_keys + tst_keys):
        if stock in trn_keys:
            trn_idx[stock] = env_data[stock].gen_rw_idxs(trn_dt_range)
        if stock in val_keys:
            val_idx[stock] = env_data[stock].gen_rw_idxs(val_dt_range)
        if stock in tst_keys:
            tst_idx[stock] = env_data[stock].gen_rw_idxs(tst_dt_range)
        
        scale_idx[stock] = env_data[stock].gen_idxs(scale_dt_range)
 



In [19]:
training_params = {'training_episodes': 5,
                   'epsilon_decya_func': linear_decay,
                   'initial_epsilon': 0.9,
                   'final_epsilon': 0.1,
                   'update_q_freq': 1,
                   'update_tgt_freq': 5,
                   'save_path': export_path,
                   'metric_func': metric_function,
                   'min_training_episodes': 2,
                   'early_stop': True,
                   'stop_metric': 'val_tot_r',
                   'stop_patience': 7,
                   'stop_delta': 0.001}

In [20]:
if train_agent:
    filtered_agents = {
        decision_agent: agents_in_setup
        for decision_agent, agents_in_setup in agent_setup.items()
        if decision_agent in agents_to_train
    }

    for decision_agent, agents_in_setup in filtered_agents.items():
        for key in trn_keys:
            rl_env = env[key]

            # Setup agents with environment  
            for agent in agents_in_setup:
                rl_env.add_agent(agent)
                agents_dic[agent].set_environment(rl_env)
            rl_env.set_decision_agent(decision_agent)
            
            # Train Sub-subagents
            for agent in agents_in_setup:
                if agent is not decision_agent:
                    agents_dic[agent].set_env_stat_modify_func(env_mod_func_dic[key].process)
                    agents_dic[agent].train(start_idx=trn_idx[key][0],
                                            end_idx=trn_idx[key][1],
                                            val_start_idx= val_idx[key][0],
                                            val_end_idx=val_idx[key][1],         
                                            **training_params)
                    # Save Agent
                    save_agent_path = save_path_root + f'/{agent}/'
                    os.makedirs(save_agent_path, exist_ok=True)
                    agents_dic[agent].export_Q_nn(save_agent_path)
                            
            # Train Decision Agent
            agents_dic[decision_agent].set_env_stat_modify_func(env_mod_func_dic[key].process)           
            agents_dic[decision_agent].train(start_idx=trn_idx[key][0],
                                        end_idx=trn_idx[key][1],
                                        val_start_idx= val_idx[key][0],
                                        val_end_idx=val_idx[key][1],                                                      
                                        **training_params)
            # Save Agent
            save_agent_path = save_path_root + f'/{decision_agent}/'
            os.makedirs(save_agent_path, exist_ok=True)
            file_root_name = f'{key}_TRN_{trn_idx[key][0]}-{trn_idx[key][1]}'
            agents_dic[decision_agent].export_Q_nn(save_agent_path + decision_agent)
            env_data_record = rl_env.get_step_data()
            agent_data_record = agents_dic[decision_agent].get_step_data()
            training_record =  pd.concat([env_data_record, agent_data_record], axis=1, join='inner')
            training_record.to_csv(f'{save_agent_path}{file_root_name}_step_data.csv')
            episodic_training_record = agents_dic[decision_agent].get_training_episodic_data()
            episodic_training_record.to_csv(f'{save_agent_path}{file_root_name}_epi_data.csv')
    
            # Remove Agent
            for agent in agents_in_setup:
                rl_env.remove_agent(agent)
                agents_dic[agent].set_environment(None)

DJI ENV: Agent profit added
DJI ENV: Agent profit assigned as decision agent

profit: Training Initialized on DJI[0:980] -> Validation on DJI[1008:1232]
profit: EP 5 of 5 Finished -> ΔQ1 = 0.38, ΔQ2 = 0.36 | ∑R = 19.32, μR = 0.02 σR = 2.11 | Max: val_tot_r = 45.84 -> Validation loss decreased (269000.0000 --> 2000.0000).  Saving model ...                                                               
profit: Training finished on DJI[0:980]

profit: Q-Network Exported to file "C:/programming/MADDQN/test000/profit/profit"
DJI ENV: Agent profit removed
DJI ENV: Agent risk added
DJI ENV: Agent risk assigned as decision agent

risk: Training Initialized on DJI[0:980] -> Validation on DJI[1008:1232]
risk: EP 5 of 5 Finished -> ΔQ1 = 49841220.00, ΔQ2 = 52776588.00 | ∑R = 48.45, μR = 0.05 σR = 0.64 | Max: val_tot_r = -518.65 -> EarlyStopping counter: 3 out of 7                                                                                        
risk: Training finished on DJI[0:980]

risk: Q

# Agent Testing

## Parameters

In [21]:
testing_params = {DdqnAgent: {
                   'metric_func': metric_function,
                   'metric_func_arg': {},
                   'testing_episodes':1},
                  RandomAgent: {
                    'metric_func': metric_function,
                   'metric_func_arg': {},
                   'testing_episodes':100}}


## Testing

In [22]:
if test_agent:
    result_dic_struct = ['stock','agent','test_interval','test_num']
    results = {}

    for key in tst_keys:
        
        # Init Record[Stock]
        results[key] = {}
        test_key = f'{tst_idx[key][0]}:{tst_idx[key][1]}'
        stock_price_data = env_data[key]['rw_raw_price_env'][tst_idx[key][0]:tst_idx[key][1],-1,0]
        rl_env = env[key]

        for decision_agent, agents_in_setup in agent_setup.items():

            # Init Record[Stock][Agent]
            results[key][decision_agent] = {}
            
            # Init Record[Stock][Agent][test_interval]
            results[key][decision_agent][test_key] = {}   # Different Test Keys will need loop
                        
            # Setup agents with environment  
            for agent in set([decision_agent] + agents_in_setup):
                rl_env.add_agent(agent)
                agents_dic[agent].set_environment(rl_env)
            rl_env.set_decision_agent(decision_agent)
            
            # Enable Randomess if Agent is of class RandomAgent
            if isinstance(agents_dic[decision_agent], RandomAgent):
                new_random_seed = random.randint(1, 10**9)
                set_seed(new_random_seed)
            
            # Save Agent
            save_agent_path = save_path_root + f'/{decision_agent}/'
            os.makedirs(save_agent_path, exist_ok=True)
            file_root_name = f'{key}_TST_{tst_idx[key][0]}-{tst_idx[key][1]}' 
            
            # Test Decision Agent
            params = testing_params[agent_classes[decision_agent]]
            if not isinstance(agents_dic[decision_agent], RandomAgent):
                agents_dic[agent].set_env_stat_modify_func(env_mod_func_dic[key].process)
            agents_dic[decision_agent].test(start_idx=tst_idx[key][0],
                                            end_idx=tst_idx[key][1],
                                            **params)
            
            # Generate Testing Records
            env_data_record = rl_env.get_step_data()
            agent_data_record = agents_dic[decision_agent].get_step_data()
            test_record =  pd.concat([env_data_record, agent_data_record], axis=1, join='inner')
            test_record.to_csv(f'{save_agent_path}{file_root_name}_step_data.csv')
            episodic_testing_record = agents_dic[decision_agent].get_testing_episodic_data()
            episodic_testing_record.to_csv(f'{save_agent_path}{file_root_name}_epi_data.csv')    
            
            trade_actions_per_test = episodic_testing_record['tst_actions']
                
            for idx, action_set in enumerate(trade_actions_per_test):
                file_root_name = f'{key}_TST_{tst_idx[key][0]}-{tst_idx[key][1]}_{[idx]}'
                test_metrics = agentperform.agent_stock_performance(stock_price_ts=np.array(stock_price_data),
                                                                    trade_ts=np.array(action_set),
                                                                    stock_name=key,
                                                                    agent_name=decision_agent,
                                                                    display_graph=False, 
                                                                    save_graphic= True,
                                                                    path_file = f'{save_agent_path}{file_root_name}.png')
                del test_metrics['stock']
                del test_metrics['agent_name']                                                                     
                results[key][decision_agent][test_key][idx] = test_metrics
                       
            # Remove Agent
            for agent in set([decision_agent] + agents_in_setup):
                rl_env.remove_agent(agent)
                agents_dic[agent].set_environment(None)

    display(results)
    set_seed(RANDOM_SEED)

AAPL ENV: Agent profit added
AAPL ENV: Agent profit assigned as decision agent

profit: Testing Initialized on AAPL[1008:1734]
profit - AAPL[1008:1734] - Testing Finished - EP - 1 of 1-> ∑R = 55109.87, μR = 75.91, σR = 2460.96
profit: Testing Complete on AAPL[1008:1734]



  res = returns.mean() / downside


AAPL ENV: Agent profit removed
AAPL ENV: Agent risk added
AAPL ENV: Agent risk assigned as decision agent

risk: Testing Initialized on AAPL[1008:1734]
risk - AAPL[1008:1734] - Testing Finished - EP - 1 of 1-> ∑R = 55109.87, μR = 75.91, σR = 2460.96
risk: Testing Complete on AAPL[1008:1734]



  res = returns.mean() / downside


AAPL ENV: Agent risk removed
AAPL ENV: Agent random added
AAPL ENV: Agent random assigned as decision agent
random: Testing Initialized on AAPL[1008:1734]
random: EP 100 of 100 Finished -> ∑R = -74968.68, μR = -103.26, σR = 726.95                                                                                                                             
random: Testing Complete on AAPL[1008:1734]
AAPL ENV: Agent random removed
AMZN ENV: Agent profit added
AMZN ENV: Agent profit assigned as decision agent

profit: Testing Initialized on AMZN[1008:1734]
profit - AMZN[1008:1734] - Testing Finished - EP - 1 of 1-> ∑R = 104931.74, μR = 144.53, σR = 2560.22
profit: Testing Complete on AMZN[1008:1734]

AMZN ENV: Agent profit removed
AMZN ENV: Agent risk added
AMZN ENV: Agent risk assigned as decision agent

risk: Testing Initialized on AMZN[1008:1734]
risk - AMZN[1008:1734] - Testing Finished - EP - 1 of 1-> ∑R = 109720.39, μR = 151.13, σR = 2560.41
risk: Testing Complete on AMZN[1008:1734]



  res = returns.mean() / downside


AMZN ENV: Agent risk removed
AMZN ENV: Agent random added
AMZN ENV: Agent random assigned as decision agent
random: Testing Initialized on AMZN[1008:1734]
random: EP 100 of 100 Finished -> ∑R = -84519.14, μR = -116.42, σR = 690.32                                                                                                                             
random: Testing Complete on AMZN[1008:1734]
AMZN ENV: Agent random removed
GOOGL ENV: Agent profit added
GOOGL ENV: Agent profit assigned as decision agent

profit: Testing Initialized on GOOGL[1008:1734]
profit - GOOGL[1008:1734] - Testing Finished - EP - 1 of 1-> ∑R = 65657.56, μR = 90.44, σR = 1740.69
profit: Testing Complete on GOOGL[1008:1734]

GOOGL ENV: Agent profit removed
GOOGL ENV: Agent risk added
GOOGL ENV: Agent risk assigned as decision agent

risk: Testing Initialized on GOOGL[1008:1734]
risk - GOOGL[1008:1734] - Testing Finished - EP - 1 of 1-> ∑R = 78499.04, μR = 108.13, σR = 1744.24
risk: Testing Complete on GOOGL[1008

  res = returns.mean() / downside


GOOGL ENV: Agent risk removed
GOOGL ENV: Agent random added
GOOGL ENV: Agent random assigned as decision agent
random: Testing Initialized on GOOGL[1008:1734]
random: EP 100 of 100 Finished -> ∑R = -67462.66, μR = -92.92, σR = 805.11                                                                                                                              
random: Testing Complete on GOOGL[1008:1734]
GOOGL ENV: Agent random removed


{'AAPL': {'profit': {'1008:1734': {0: {'n_trades': 1,
     'n_wins': 1,
     'n_losses': 0,
     'win_percentage': 100.0,
     'cumulative_return': 1.5639810426540284,
     'sortino': 0,
     'max_drawdown': 0.0,
     'sharpe': 0,
     'trade_dur_avg': 725.0,
     'trade_dur_min': 725,
     'trade_dur_max': 725,
     'buy_hold': 1.5639810426540284}}},
  'risk': {'1008:1734': {0: {'n_trades': 1,
     'n_wins': 1,
     'n_losses': 0,
     'win_percentage': 100.0,
     'cumulative_return': 1.5639810426540284,
     'sortino': 0,
     'max_drawdown': 0.0,
     'sharpe': 0,
     'trade_dur_avg': 725.0,
     'trade_dur_min': 725,
     'trade_dur_max': 725,
     'buy_hold': 1.5639810426540284}}},
  'random': {'1008:1734': {0: {'n_trades': 185,
     'n_wins': 96,
     'n_losses': 87,
     'win_percentage': 51.891891891891895,
     'cumulative_return': 1.2609993470349468,
     'sortino': 1.442859198393216,
     'max_drawdown': -32.45801210631164,
     'sharpe': 0.9979068380577096,
     'trade_du

# Aggreating Test Results

In [23]:
if test_agent:
    aggerate_results = {}
    for agent in agent_name_list:
        aggerate_results[agent] = {}
        for stock in tst_keys:
            aggerate_results[agent][stock] = {}
            test_key = f'{tst_idx[stock][0]}:{tst_idx[stock][1]}'
            aggerate_results[agent][stock][test_key] = {}
            values = np.empty((0,len(metrics)))
            for test_num in range(testing_params[agent_classes[agent]]['testing_episodes']):

                values_array = [results[stock][agent][test_key][test_num][key] for key in metrics]
                current_values = np.array(values_array)
                values = np.vstack((values,current_values))

                means_for_metrics = np.mean(values, axis=0)
                std_for_metrics = np.std(values, axis=0)
            
            for idx,metric in enumerate(metrics):
                aggerate_results[agent][stock][test_key][metric] = (means_for_metrics[idx],std_for_metrics[idx])

    summarized_aggerate_results = {}

    for metric in metrics:
        model_list = []
        dataset_name = []
        scores = []
        for agent in aggerate_results.keys():
            
            model_list.append(agent)
            score_list = []
            for stock in aggerate_results[agent].keys():
                for test in aggerate_results[agent][stock].keys():
                    run_name = stock + "-" + test
                    if run_name not in dataset_name:
                        dataset_name.append(run_name)
                    score = aggerate_results[agent][stock][test][metric][0]
                    score_list.append(np.round(score,2))
            scores.append(score_list)

        score_array = np.array(scores).T

        df = pd.DataFrame(score_array,columns=model_list)
        df['dataset'] = dataset_name

        column_order = ['dataset'] + [col for col in df.columns if col != 'dataset']
        df = df[column_order]
        summarized_aggerate_results[metric] = df

        
        # Export Aggreate Date to CSV
        means = df[model_list].mean()
        model_means = {model: means[model] for model in model_list}
        model_means.update({'dataset': 'mean'})
        df_export = df.copy()
        df_export.loc[len(df)] = model_means
        df_export.to_csv(f'{save_path_root}/{metric}_agg_data.csv')
        
        


    display(summarized_aggerate_results)


{'n_trades':            dataset  profit  risk  random
 0   AAPL-1008:1734     1.0   1.0  182.96
 1   AMZN-1008:1734     2.0   1.0  181.32
 2  GOOGL-1008:1734     8.0   1.0  182.05,
 'n_wins':            dataset  profit  risk  random
 0   AAPL-1008:1734     1.0   1.0   92.13
 1   AMZN-1008:1734     1.0   1.0   93.44
 2  GOOGL-1008:1734     5.0   1.0   94.57,
 'win_percentage':            dataset  profit   risk  random
 0   AAPL-1008:1734   100.0  100.0   50.34
 1   AMZN-1008:1734    50.0  100.0   51.55
 2  GOOGL-1008:1734    62.5  100.0   51.95,
 'cumulative_return':            dataset  profit  risk  random
 0   AAPL-1008:1734    1.56  1.56    1.28
 1   AMZN-1008:1734    2.09  2.11    1.54
 2  GOOGL-1008:1734    1.79  1.80    1.37,
 'sortino':            dataset  profit  risk  random
 0   AAPL-1008:1734    0.00   0.0    1.53
 1   AMZN-1008:1734  460.84   0.0    2.39
 2  GOOGL-1008:1734  501.23   0.0    2.16,
 'max_drawdown':            dataset  profit  risk  random
 0   AAPL-1008:1734  

# Significance Testing

In [35]:
if test_agent:
    for metric in metrics:
        display(metric)
        display(summarized_aggerate_results[metric])
        test = prob_evaluate.generate_rank_array_from_dataframe(summarized_aggerate_results[metric],
                                                                model_list,equal_rank_behav="mean",
                                                                rank_order=aval_metrics_rank_dic[metric])
        display(test)
        stat, critical_f_value, reject_null_hypo = prob_evaluate.iman_davenport_test(test,0.95,arr_order='cols')
        display(stat, critical_f_value, reject_null_hypo)

        results_raw = prob_evaluate.nemenyi_test(test,0.95,model_list)
        df_export = pd.DataFrame(results_raw, columns=['agent1_agent2', 'nemenyI_stat', 'nemeny_threshold', 'reject_null_hypo'])

        # Expand the 'agent1_agent2' tuple into separate columns
        df_export[['agent1', 'agent2']] = pd.DataFrame(df_export['agent1_agent2'].tolist(), index=df.index)
        
        # Drop the original 'agent1_agent2' column
        df_export = df_export.drop(columns=['agent1_agent2'])
        
        new_column_order = ['agent1', 'agent2', 'nemenyI_stat', 'nemeny_threshold', 'reject_null_hypo']
        df_export = df_export[new_column_order]
        df_export.to_csv(f'{save_path_root}/{metric}_sig_test.csv')
        
        display(df)

'n_trades'

Unnamed: 0,dataset,profit,risk,random,profit_rank,risk_rank,random_rank
0,AAPL-1008:1734,1.0,1.0,182.96,3,3,1
1,AMZN-1008:1734,2.0,1.0,181.32,2,3,1
2,GOOGL-1008:1734,8.0,1.0,182.05,2,3,1


array([[3, 2, 2],
       [3, 3, 3],
       [1, 1, 1]])

-4.769230769230766

6.944271909999155

False

Unnamed: 0,agent1,agent2,nemenyI_stat,nemeny_threshold,reject_null_hypo
0,profit,risk,0.666667,1.913051,False
1,profit,random,1.0,1.913051,False
2,risk,random,1.666667,1.913051,False


'n_wins'

Unnamed: 0,dataset,profit,risk,random,profit_rank,risk_rank,random_rank
0,AAPL-1008:1734,1.0,1.0,92.13,3,3,1
1,AMZN-1008:1734,1.0,1.0,93.44,3,3,1
2,GOOGL-1008:1734,5.0,1.0,94.57,2,3,1


array([[3, 3, 2],
       [3, 3, 3],
       [1, 1, 1]])

-3.285714285714286

6.944271909999155

False

Unnamed: 0,agent1,agent2,nemenyI_stat,nemeny_threshold,reject_null_hypo
0,profit,risk,0.666667,1.913051,False
1,profit,random,1.0,1.913051,False
2,risk,random,1.666667,1.913051,False


'win_percentage'

Unnamed: 0,dataset,profit,risk,random,profit_rank,risk_rank,random_rank
0,AAPL-1008:1734,100.0,100.0,50.34,2,2,3
1,AMZN-1008:1734,50.0,100.0,51.55,3,1,2
2,GOOGL-1008:1734,62.5,100.0,51.95,2,1,3


array([[2, 3, 2],
       [2, 1, 1],
       [3, 2, 3]])

-13.999999999999979

6.944271909999155

False

Unnamed: 0,agent1,agent2,nemenyI_stat,nemeny_threshold,reject_null_hypo
0,profit,risk,0.666667,1.913051,False
1,profit,random,1.0,1.913051,False
2,risk,random,1.666667,1.913051,False


'cumulative_return'

Unnamed: 0,dataset,profit,risk,random,profit_rank,risk_rank,random_rank
0,AAPL-1008:1734,1.56,1.56,1.28,2,2,3
1,AMZN-1008:1734,2.09,2.11,1.54,2,1,3
2,GOOGL-1008:1734,1.79,1.8,1.37,2,1,3


array([[2, 2, 2],
       [2, 1, 1],
       [3, 3, 3]])

-7.142857142857138

6.944271909999155

False

Unnamed: 0,agent1,agent2,nemenyI_stat,nemeny_threshold,reject_null_hypo
0,profit,risk,0.666667,1.913051,False
1,profit,random,1.0,1.913051,False
2,risk,random,1.666667,1.913051,False


'sortino'

Unnamed: 0,dataset,profit,risk,random,profit_rank,risk_rank,random_rank
0,AAPL-1008:1734,0.0,0.0,1.53,3,3,1
1,AMZN-1008:1734,460.84,0.0,2.39,1,3,2
2,GOOGL-1008:1734,501.23,0.0,2.16,1,3,2


array([[3, 1, 1],
       [3, 3, 3],
       [1, 2, 2]])

-9.19999999999998

6.944271909999155

False

Unnamed: 0,agent1,agent2,nemenyI_stat,nemeny_threshold,reject_null_hypo
0,profit,risk,0.666667,1.913051,False
1,profit,random,1.0,1.913051,False
2,risk,random,1.666667,1.913051,False


'max_drawdown'

Unnamed: 0,dataset,profit,risk,random,profit_rank,risk_rank,random_rank
0,AAPL-1008:1734,0.0,0.0,-29.93,1,1,3
1,AMZN-1008:1734,-2.72,0.0,-25.03,2,1,3
2,GOOGL-1008:1734,-0.72,0.0,-19.76,2,1,3


array([[1, 2, 2],
       [1, 1, 1],
       [3, 3, 3]])

1.272727272727275

6.944271909999155

False

Unnamed: 0,agent1,agent2,nemenyI_stat,nemeny_threshold,reject_null_hypo
0,profit,risk,0.666667,1.913051,False
1,profit,random,1.0,1.913051,False
2,risk,random,1.666667,1.913051,False


'sharpe'

Unnamed: 0,dataset,profit,risk,random,profit_rank,risk_rank,random_rank
0,AAPL-1008:1734,0.0,0.0,0.95,3,3,1
1,AMZN-1008:1734,10.7,0.0,1.42,1,3,2
2,GOOGL-1008:1734,6.32,0.0,1.29,1,3,2


array([[3, 1, 1],
       [3, 3, 3],
       [1, 2, 2]])

-9.19999999999998

6.944271909999155

False

Unnamed: 0,agent1,agent2,nemenyI_stat,nemeny_threshold,reject_null_hypo
0,profit,risk,0.666667,1.913051,False
1,profit,random,1.0,1.913051,False
2,risk,random,1.666667,1.913051,False


'trade_dur_avg'

Unnamed: 0,dataset,profit,risk,random,profit_rank,risk_rank,random_rank
0,AAPL-1008:1734,725.0,725.0,2.0,2,2,3
1,AMZN-1008:1734,362.0,725.0,2.02,2,1,3
2,GOOGL-1008:1734,89.75,725.0,2.0,2,1,3


array([[2, 2, 2],
       [2, 1, 1],
       [3, 3, 3]])

-7.142857142857138

6.944271909999155

False

Unnamed: 0,agent1,agent2,nemenyI_stat,nemeny_threshold,reject_null_hypo
0,profit,risk,0.666667,1.913051,False
1,profit,random,1.0,1.913051,False
2,risk,random,1.666667,1.913051,False
