In [1]:
import os
import numpy as np
import cleandata
import stockenv 
import sys
import pandas as pd
import agentperform
import torch
import torch.nn as nn
import optuna
from agents.ddqn import DDQN


pwd = "C:/programming/MADDQN"
sys.path.append(pwd)

# Input Data Location, File Name, Stock name for labels
import_path = pwd + "/input_data"

# Output Path Location for CSV export
export_path = pwd + "/output_data"

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the current device
    device = torch.cuda.current_device()
    print(f"GPU Name: {torch.cuda.get_device_name(device)}")
else:
    device = 'cpu'
    print("CUDA (GPU support) is not available. PyTorch is running on CPU.")


stock_inputs ={'DJI':'^DJI_daily.csv',
               'NDAQ': '^IXIC_daily.csv',
               'SP500': '^SPX_daily.csv',
               'AAPL': 'AAPL_daily.csv',
               'AMZN': 'AMZN_daily.csv',
               'GOOGL': 'GOOGL_daily.csv',
               'MSFT': 'MSFT_daily.csv',
               'SINE': 'sine_wave_daily.csv',
               'FORD': 'F_daily.csv',
               'JNJ': 'JNJ_daily.csv',
               'NEE': 'NEE_daily.csv',
               'PFE': 'PFE_daily.csv',
               'TSLA': 'TSLA_daily.csv',
               'USIDX': '^USIDX_daily.csv'}

# Training Inputs
trn_keys = ['DJI','NDAQ','SP500']
training_range = (0,2500)
validation_range = (2500,2750)

ALPHA = 0.1
GAMMA = 0.9

# Testing Inputs
tst_keys = stock_inputs.keys()
testing_range = (2750,3000)

environments = {}

for name, file in stock_inputs.items():
    temp_df = cleandata.YAHOO_csv_input(file,import_path)
    temp_norm_df = cleandata.normalize_df_ohlcv_by_row_range(temp_df,training_range[0],training_range[1])
    environments[name] = stockenv.ContinuousOHLCVEnv(name,ohlcv_data=temp_norm_df.to_numpy(),
                                  stock_price_data=temp_df['close'].to_numpy(),
                                  commission_rate=0.005)


# Define a reward function outside the Environment class
def norm_min_1(norm_num):
    """
    Normalize values with respect to a mean of 0 by scaling them to be centered around 1.
    
    Parameters:
    - norm_num: Input number to be normalized.

    Returns:
    - Normalized value: If input is close to zero, output is around 1. 
      For inputs -6 to -2 or 2 to 6, output approximates the range 6 to 2. 
      Six standard deviations is approximately 99.9999998%.
    """
    if norm_num < -2:
        return -norm_num
    if norm_num > 2:
        return norm_num
    if norm_num == 0:
        return 1
    if norm_num > 0:
        return 0.5 * norm_num + 1
    if norm_num < 0:
        return -0.5 * norm_num + 1

def MADDQN_return_reward(env):
    n = 5 # How many days in the future
    
    
    current_price = env.stock_price_data[env.current_step]
    
    # Check if there are enough elements for the future prices
    if len(env.ohlcv_raw_data) < env.current_step + n:
        raise ValueError("Not enough OHLCV data for the future prices")
    
    tomorrows_price = env.stock_price_data[env.current_step+n]
    position = env.position
    reward = (((tomorrows_price - current_price)/current_price))*position
    opp_cost = 0.0002*(1-position) # Assuming risk-free return of 5% / 252 trading days + np.mean() in agents counts zeros
    
    return (reward - opp_cost)*100

def return_reward(env):
    n = 10 # How many days in the future
    
    
    current_price = norm_min_1(env.ohlcv_raw_data[env.current_step,3])
    
    # Check if there are enough elements for the future prices
    if len(env.ohlcv_raw_data) < env.current_step + n:
        raise ValueError("Not enough OHLCV data for the future prices")
    
    tomorrows_price = norm_min_1(env.ohlcv_raw_data[env.current_step:env.current_step+n,3].mean())
    position = env.position
    reward = (((tomorrows_price - current_price)/current_price))*position
    
    opp_cost = 0.0002*(1-position) # Assuming risk-free return of 5% / 252 trading days
    
    return 100*(reward - opp_cost)

def risk_reward(env):
    """
    Calculate the risk-reward ratio based on historical price data and current position in the environment.

    Args:
    - env: Environment object containing OHLCV raw data and position information.

    Returns:
    - float: Risk-reward ratio.
    """
    n = 33  # How many days in the future

    current_price = env.ohlcv_raw_data[env.current_step, 3]
    
    # Check if there are enough elements for the future prices
    if len(env.ohlcv_raw_data) < env.current_step + n:
        raise ValueError("Not enough OHLCV data for the future prices")
    
    tomorrows_price = env.ohlcv_raw_data[env.current_step:env.current_step+n, 3]
    position = env.position
    
    rewards = (tomorrows_price - current_price) / current_price
    
    rewards_mean = np.mean(rewards)  # Calculate mean using NumPy's mean function
    rewards_std = np.std(rewards)  # Calculate standard deviation using NumPy's std function
    
    
    
    return (rewards_mean / rewards_std) * position
   
def linear_decay(initial_epsilon, final_epsilon, current_epoch, total_epochs):
    if initial_epsilon == final_epsilon:
        return initial_epsilon
    elif total_epochs == 1:
        return final_epsilon
    else:
        rate_of_change = (final_epsilon - initial_epsilon) / (total_epochs-1)
        current_epsilon = np.round((initial_epsilon - rate_of_change) + (rate_of_change * current_epoch),3)
        
        if current_epsilon > initial_epsilon or current_epsilon < final_epsilon:
            raise ValueError(f'Epsilon value ({current_epsilon}) out of valid range ({initial_epsilon}:{final_epsilon})')
    
        return current_epsilon 

def decimal_to_text(decimal_number):
    # Remove the decimal point and convert to integer
    integer_part = int(decimal_number * 1000)
    # Convert the integer to text
    text_representation = str(integer_part)
    return text_representation

CUDA (GPU support) is not available. PyTorch is running on CPU.


  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")

In [2]:
# For Objective function, need to create agent name before to link agent with enviornment
agent_name = 'REWARD_DDQN_AGENT'
agent_path = export_path + '/' + agent_name
metric = 'val_ror'

for key, env in environments.items():
  
        env.add_agent(agent_name)
        env.set_decision_agent(agent_name)

def objective(trial):
    
    # Define the hyperparameters to search over
    
    ## NN hyperparameters
    sug_hidden_layers = trial.suggest_int('hidden_layers', low=1, high=3)
    sug_hidden_size = trial.suggest_int('hidden_size', low=64, high=256, step=64)
    sug_update_q_freq = trial.suggest_int('update_q_freq',low=1,high=5)
    sug_update_tgt_freq = trial.suggest_int('update_tgt_freq',low=10,high=50,step=10)
    
    ## Function Passing
    activation_functions = {
    'RELU': nn.ReLU(),
    'LRELU': nn.LeakyReLU(),
    'GELU': nn.GELU(),
    'TANH': nn.Tanh()
    }
    sug_activation_function_name = trial.suggest_categorical('activation_function', list(activation_functions.keys()))
    sug_activation_function = activation_functions[sug_activation_function_name]
    
    ## Optimizer hyperparameters
    sug_opt_lre = trial.suggest_float('opt_lre',0.0001,0.1,log=True)
    ## Memory Replay hyperparameters
    sug_buffer_size = trial.suggest_int('buffer_size',low=100,high=1500,step=100)
    sug_batch_size = trial.suggest_int('batch_size',low=10,high=150,step=10)

    # Saving Setup
    ## Current Parameter Values:
    cur_n_fcl = trial.params['hidden_layers']
    cur_fcl_size = trial.params['hidden_size']
    cur_q_freq = trial.params['update_q_freq']
    cur_tgt_freq = trial.params['update_tgt_freq']
    cur_act_func = trial.params['activation_function']
    cur_lre = decimal_to_text(trial.params['opt_lre'])
    cur_buf_size = trial.params['buffer_size']
    cur_bat_size = trial.params['batch_size']
    
    ## Create Notation for Hyperparameter Setup    
    test_name = (f'{cur_n_fcl}FC{cur_fcl_size}_{cur_act_func}_' +
                f'BT{cur_bat_size}BF{cur_buf_size}_Q{cur_q_freq}_' +
                f'TGT{cur_tgt_freq}_LR{cur_lre}')
    
    ## Create Dir to save results
    test_name_path =  agent_path + '/' + test_name 
    if not os.path.exists(test_name_path):
        os.makedirs(test_name_path)
        print(f"Directory '{test_name_path}' created successfully.")
    else:
        print(f"Directory '{test_name_path}' already exists.")    
    
    # Create Agent with hyperparameters  
    best_ddqn_agent = DDQN(name=agent_name,
                        environment=None,
                        reward_function = MADDQN_return_reward,
                        input_size= 6, 
                        hidden_size= sug_hidden_size, 
                        output_size=3, 
                        activation_function = sug_activation_function,
                        num_hidden_layers = sug_hidden_layers,                  
                        buffer_size= sug_buffer_size, 
                        batch_size = sug_batch_size,
                        opt_lr= sug_opt_lre,
                        alpha = ALPHA,
                        gamma = GAMMA,
                        opt_wgt_dcy = 0.0,
                        dropout_rate = 0.25,                    
                        device = device)

    # Training Model
    for key, env in environments.items():
        
        if key in trn_keys:
            
            best_ddqn_agent.set_environment(env)
            best_ddqn_agent.train(start_idx=training_range[0],
                        end_idx=training_range[1],
                        training_epsidoes= 100,
                        epsilon_decya_func= linear_decay,
                        initial_epsilon= 0.9,
                        final_epsilon= 0.1,
                        update_q_freq= sug_update_q_freq,
                        update_tgt_freq= sug_update_tgt_freq,
                        save_path = export_path,
                        val_start_idx = validation_range[0],
                        val_end_idx = validation_range[1],
                        early_stop = True,
                        stop_metric = metric,
                        stop_patience = 20,
                        stop_delta = 0.001)
        
            ## Export Training Session Data to CSV
            ddqn_trn = best_ddqn_agent.get_training_episodic_data()
            trn_df_file_name  = f'TRN-{key}' + test_name + '.csv'
            trn_df_save_path = test_name_path + '/' + trn_df_file_name
            ddqn_trn.to_csv(trn_df_save_path)
        
        
    
    # Test Model
    
    
    scores = []
    for key, env in environments.items():
    
        if key in tst_keys:
            
            best_ddqn_agent.set_environment(env)              
            best_ddqn_agent.test(start_idx = testing_range[0],
                        end_idx = testing_range[1], 
                        testing_epsidoes=1)

            ## Save Test Metric Result(s) into 
            ddqn_tst = best_ddqn_agent.get_testing_episodic_data()
            score = ddqn_tst['Total Reward'].mean()
            scores.append(score)
    
            ## Export Test data
            a = env.get_step_data()
            b = best_ddqn_agent.get_step_data()
            combined_df = pd.concat([a,b],axis=1)
            tst_df_file_name  = f'TST-{key}' + test_name + '.csv'
            trn_df_save_path = test_name_path + '/' + tst_df_file_name
            combined_df.to_csv(trn_df_save_path)

            ## Generate Trading Graphic
            tst_graph_file_name = trn_df_save_path[:-4] + '.png'
            agentperform.agent_stock_performance(env.stock_price_data[testing_range[0]:testing_range[1]],
                                                combined_df['Env Action'].to_numpy(),
                                                key,
                                                best_ddqn_agent.get_name(),
                                                display_graph=True,
                                                save_graphic=True,
                                                path_file=tst_graph_file_name)

    mean = np.mean(scores)
    return mean

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print("Best value: ", study.best_value)
print("Best params: ", study.best_params)

[I 2024-04-22 20:38:33,060] A new study created in memory with name: no-name-3c65f414-753f-48f4-933a-c74659028e7a


DJI ENV: Agent REWARD_DDQN_AGENT added
DJI ENV: Agent REWARD_DDQN_AGENT assigned as decision agent
NDAQ ENV: Agent REWARD_DDQN_AGENT added
NDAQ ENV: Agent REWARD_DDQN_AGENT assigned as decision agent
SP500 ENV: Agent REWARD_DDQN_AGENT added
SP500 ENV: Agent REWARD_DDQN_AGENT assigned as decision agent
AAPL ENV: Agent REWARD_DDQN_AGENT added
AAPL ENV: Agent REWARD_DDQN_AGENT assigned as decision agent
AMZN ENV: Agent REWARD_DDQN_AGENT added
AMZN ENV: Agent REWARD_DDQN_AGENT assigned as decision agent
GOOGL ENV: Agent REWARD_DDQN_AGENT added
GOOGL ENV: Agent REWARD_DDQN_AGENT assigned as decision agent
MSFT ENV: Agent REWARD_DDQN_AGENT added
MSFT ENV: Agent REWARD_DDQN_AGENT assigned as decision agent
SINE ENV: Agent REWARD_DDQN_AGENT added
SINE ENV: Agent REWARD_DDQN_AGENT assigned as decision agent
FORD ENV: Agent REWARD_DDQN_AGENT added
FORD ENV: Agent REWARD_DDQN_AGENT assigned as decision agent
JNJ ENV: Agent REWARD_DDQN_AGENT added
JNJ ENV: Agent REWARD_DDQN_AGENT assigned as decis

[W 2024-04-22 20:39:12,053] Trial 0 failed with parameters: {'hidden_layers': 3, 'hidden_size': 256, 'update_q_freq': 1, 'update_tgt_freq': 30, 'activation_function': 'TANH', 'opt_lre': 0.00010869154827561221, 'buffer_size': 500, 'batch_size': 10} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\beckm\AppData\Local\Temp\ipykernel_22552\589820524.py", line 85, in objective
    best_ddqn_agent.train(start_idx=training_range[0],
  File "c:\Programming\MADDQN\agents\ddqn.py", line 194, in train
    tot_reward, mean_reward, std_reward, loss = self._play_episode(epsilon, update_q_freq, update_tgt_freq, 'training')
                                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Programming\MA

KeyboardInterrupt: 