# Workbench Setup

## Google Colab Installation

In [1]:
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Code specific to Google Colab
    print("Running in Google Colab")

    # Additional setup commands for Colab
    !pip install neuralforecast
    !pip install gymnasium
else:
    # Code for other environments (e.g., VS Code)
    print("Running in another environment (e.g., VS Code)")

Running in another environment (e.g., VS Code)


In [2]:
if IN_COLAB:
    # Retrive required files
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/environments/stockenv.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/cleandata.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/data.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/epsilon_decay.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/agentperform.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/agent/ddqn.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/agent/random.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/reward/stockmarket.py            
    # Move all directories and files from content/raw.githubusercontent.com to content/
    !mv /content/raw.githubusercontent.com/* /content/

    # Delete the raw.githubusercontent.com directory
    !rm -rf /content/raw.githubusercontent.com

## Software Enviornment Setup

In [3]:
import os
import sys
import torch
import optuna
import random
import numpy as np
import pandas as pd
import torch.nn as nn
import utilities.agentperform as agentperform
import utilities.cleandata as cln 
from utilities.epsilon_decay import linear_decay
from utilities.data import UniStockEnvDataStruct, TimesNetProcessing
from agents.ddqn import DdqnAgent
from rewards.stockmarket import future_profit, risk_reward
from environments.stockenv import ContinuousOHLCVEnv
from datetime import datetime
import logging
# 
logging.getLogger("pytorch_lightning.utilities.rank_zero").addHandler(logging.NullHandler())
logging.getLogger("pytorch_lightning.accelerators.cuda").addHandler(logging.NullHandler())
os.environ['NIXTLA_ID_AS_COL'] = '1' # Prevent Warning 

def set_seed(seed):
    """Set seed for reproducibility."""
    # Python random module
    random.seed(seed)

    # NumPy
    np.random.seed(seed)

    # PyTorch
    torch.manual_seed(seed)

    # If you are using CUDA
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
        # Additional settings to force determinism in your operations:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False



# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the current device
    device = torch.cuda.current_device()
    print(f"GPU Name: {torch.cuda.get_device_name(device)}")
else:
    device = 'cpu'
    print("CUDA (GPU support) is not available. PyTorch is running on CPU.")


def decimal_to_text(decimal_number):
    # Remove the decimal point and convert to integer
    integer_part = int(decimal_number * 1000)
    # Convert the integer to text
    text_representation = str(integer_part)
    return text_representation

CUDA (GPU support) is not available. PyTorch is running on CPU.


## RL Environment Setup

In [33]:
RANDOM_SEED = 42
set_seed(RANDOM_SEED)

if not IN_COLAB:
    pwd = "C:/programming/MADDQN"
    sys.path.append(pwd)
    
    # Output Path Location for CSV export
    export_path = pwd + "/output_data"

# Input Data Location, File Name, Stock name for labels
input_url = 'https://raw.githubusercontent.com/CodeBeckZero/MADDQN/main/input_data'

stock_inputs ={'DJI':'^DJI_daily.csv',
               'NDAQ': '^IXIC_daily.csv',
               'SP500': '^SPX_daily.csv',
               'AAPL': 'AAPL_daily.csv',
               'AMZN': 'AMZN_daily.csv',
               'GOOGL': 'GOOGL_daily.csv',
               'MSFT': 'MSFT_daily.csv',
               'SINE': 'sine_wave_daily.csv',
               'FORD': 'F_daily.csv',
               'JNJ': 'JNJ_daily.csv',
               'NEE': 'NEE_daily.csv',
               'PFE': 'PFE_daily.csv',
               'TSLA': 'TSLA_daily.csv',
               'COKE': 'COKE_daily.csv',
               'PG': 'PG_daily.csv'}

# Training Inputs
trn_keys = ['DJI']#,'NDAQ','SP500']
training_range = ('2007-01-01','2019-12-31')
trn_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in training_range]

# Validation Inputs
val_keys = trn_keys
validation_range = ('2021-01-01', '2022-12-31')
val_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in validation_range]

# Testing Inputs
tst_keys = trn_keys#['AAPL','AMAZON','GOOGL','MSFT','FORD','JNJ','NEE','PFE','TSLA','COKE','PG']
testing_range = ('2021-01-01', '2022-12-31')
tst_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in testing_range]

window_size = 28 # Needs to match the size Timesnet is trained on
price_based_on = 'close'
columns = ['open','high','low','close','volume']

## RL Enviornment Generation

In [34]:
env_data = {}
env = {}

for stock, file in stock_inputs.items():
    if stock in set(trn_keys + val_keys + tst_keys):
        # Import
        df = cln.YAHOO_csv_input(file, input_url)
        data_dic = UniStockEnvDataStruct(df,price_based_on,window_size)
        env_data[stock] = data_dic
        env[stock] = ContinuousOHLCVEnv(name=stock,
                                        ohlcv_data = env_data[stock]['rw_raw_env'] ,
                                        stock_price_data= env_data[stock]['rw_raw_price_env'],
                                        commission_rate

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [35]:
trn_idx = {}
val_idx = {}
tst_idx = {}

for stock, file in stock_inputs.items():
    if stock in set(trn_keys + val_keys + tst_keys):
        if stock in trn_keys:
            trn_idx[stock] = env_data[stock].gen_rw_idxs(trn_dt_range)
        if stock in val_keys:
            val_idx[stock] = env_data[stock].gen_rw_idxs(val_dt_range)
        if stock in tst_keys:
            tst_idx[stock] = env_data[stock].gen_rw_idxs(tst_dt_range)


[datetime.datetime(2007, 1, 1, 0, 0), datetime.datetime(2019, 12, 31, 0, 0)]

{'DJI': (0, 3244)}

[datetime.datetime(2021, 1, 1, 0, 0), datetime.datetime(2022, 12, 31, 0, 0)]

{'DJI': (3525, 4000)}

[datetime.datetime(2021, 1, 1, 0, 0), datetime.datetime(2022, 12, 31, 0, 0)]

{'DJI': (3525, 4000)}

# Agent Setup

## Preprocessing

### Timesnet Preprocessing Module (Option)

In [36]:
tn_path = pwd + '/gen_data/timesnet/'
timesnet = TimesNetProcessing(env_data,tn_path)

c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\utilities\parsing.py:199: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
Seed set to 42


### Using CSV export from Timesnet Preprocessing Model (Option)

In [37]:
if IN_COLAB:
    # Input Data Location, File Name, Stock name for labels
    csv_path = 'https://raw.githubusercontent.com/CodeBeckZero/MADDQN/main/input_data'
else:

    csv_path  = pwd +'/gen_data/csvs/'

stock_tn ={'DJI':'DJI_tn.csv',
               'NDAQ': 'NDAQ_tn.csv',
               'SP500': 'SP500_tn.csv',
               'AAPL': 'AAPL_tn.csv',
               'AMZN': 'AMZN_tn.csv',
               'GOOGL': 'GOOGL_tn.csv',
               'MSFT': 'MSFT_tn.csv',
               'FORD': 'FORD_tn.csv',
               'JNJ': 'JNJ_tn.csv',
               'NEE': 'NEE_tn.csv',
               'PFE': 'PFE_tn.csv',
               'TSLA': 'TSLA_tn.csv',
               'COKE': 'COKE_tn.csv',
               'PG': 'PG_tn.csv'}

timesnet = TimesNetProcessing(env_data,tn_path)

c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\utilities\parsing.py:199: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
Seed set to 42


## DDQN Setup

In [38]:
best_ddqn_agent = DdqnAgent(name='profit',
                        environment=None,
                        reward_function = future_profit,
                        reward_params = {'n':5},
                        env_state_mod_func = timesnet.csv_process,     
                        input_size= 11,
                        hidden_size= 256, 
                        output_size=3, 
                        activation_function = nn.ReLU(),
                        num_hidden_layers = 2,                  
                        buffer_size= 100, 
                        batch_size = 20,
                        opt_lr= 0,
                        alpha = 0.001,
                        gamma = 0.9,
                        opt_wgt_dcy = 0.0,
                        dropout_rate = 0.25,                
                        device = device)

## Train DDQN

In [39]:
for key, rl_env in env.items():
    
    if key in trn_keys:
        timesnet.upload_csv(csv_path + stock_tn[key])
        env[key].add_agent(best_ddqn_agent.get_name())
        env[key].set_decision_agent(best_ddqn_agent.get_name())
        best_ddqn_agent.set_environment(rl_env)
        best_ddqn_agent.train(start_idx = trn_idx[key][0],
                    end_idx=trn_idx[key][1],
                    training_episodes= 100,
                    epsilon_decya_func= linear_decay,
                    initial_epsilon= 0.9,
                    final_epsilon= 0.1,
                    update_q_freq= 1,
                    update_tgt_freq= 10,
                    save_path = export_path,
                    val_start_idx = val_idx[key][0],
                    val_end_idx = val_idx[key][1],
                    early_stop = False,
                    stop_metric = 'val_ror',
                    stop_patience = 20,
                    stop_delta = 0.001)
    
        ## Export Training Session Data to CSV
        ddqn_trn = best_ddqn_agent.get_training_episodic_data()
        ddqn_trn.to_csv('test.csv')
        display(ddqn_trn)
        env[key].remove_agent(best_ddqn_agent.get_name())

DJI ENV: Agent profit added
DJI ENV: Agent profit assigned as decision agent
profit: Training Initialized on DJI[0:3244] -> Validation on DJI[3525:4000]
profit: EP 16 of 100 Finished -> ΔQ1 = 0.11, ΔQ2 = 0.04 | ∑R = 151.97, μR = 0.05 σR = 1.64                                                                                                              

KeyboardInterrupt: 

In [16]:
display(env['DJI'].get_step_data())
display(best_ddqn_agent.get_step_data())
env['DJI'].csv_export_step_data("DJI_training.csv")

Unnamed: 0,Step,idx,Portfolio Value,Cash,Stock Value,Stock Holdings,Stock Price,State,Available Actions,Env Action,New Portfolio Value,New Cash,New Stock Value,New Stock Holdings,New Commission Cost,Total Commission Cost
0,1,81,100000.00000,100000.00000,0.00,0,13424.39,"([[13062.75, 13137.76, 13041.3, 13136.14, 2489...","(H, B)",B,99530.14635,5559.41635,93970.73,7,469.85365,469.85365
1,2,82,99530.14635,5559.41635,93974.72,7,13424.96,"([[13133.94, 13256.33, 13130.53, 13211.88, 251...","(S, H)",S,99064.26275,99064.26275,0.00,0,469.87360,939.72725
2,3,83,99064.26275,99064.26275,0.00,0,13295.01,"([[13206.65, 13246.82, 13196.03, 13241.38, 247...","(H, B)",B,98598.93740,5533.86740,93065.07,7,465.32535,1405.05260
3,4,84,98598.93740,5533.86740,94376.45,7,13482.35,"([[13243.08, 13284.53, 13228.78, 13264.62, 236...","(S, H)",S,99438.43515,99438.43515,0.00,0,471.88225,1876.93485
4,5,85,99438.43515,99438.43515,0.00,0,13553.73,"([[13264.13, 13317.69, 13260.8, 13312.97, 2061...","(H, B)",B,98964.05460,4087.94460,94876.11,7,474.38055,2351.31540
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
227,228,308,35487.90405,35487.90405,0.00,0,13058.20,"([[12531.79, 12531.95, 12376.7, 12422.86, 2350...","(H, B)",B,35357.32205,9240.92205,26116.40,2,130.58200,60741.70795
228,229,309,35357.32205,9240.92205,25939.08,2,12969.54,"([[12421.88, 12476.76, 12293.34, 12302.46, 235...","(S, H)",S,35050.30665,35050.30665,0.00,0,129.69540,60871.40335
229,230,310,35050.30665,35050.30665,0.00,0,13020.83,"([[12303.92, 12382.16, 12196.87, 12216.4, 2090...","(H, B)",B,34920.09835,8878.43835,26041.66,2,130.20830,61001.61165
230,231,311,34920.09835,8878.43835,25628.70,2,12814.35,"([[12215.92, 12326.47, 12176.11, 12262.89, 273...","(S, H)",S,34378.99485,34378.99485,0.00,0,128.14350,61129.75515


Unnamed: 0,profit Action,profit Action Type,profit Q_Val Sell,profit Q_Val Hold,profit Q_Val Buy,profit Reward
0,B,Best,0.010607,-0.075722,-0.048623,1.602233
1,S,Best,0.029220,-0.080427,-0.008248,-0.020000
2,B,Best,0.023580,-0.055169,-0.027450,2.560434
3,S,Best,0.039352,-0.069941,0.010973,-0.020000
4,B,Best,-0.000628,-0.138312,0.044113,-0.058213
...,...,...,...,...,...,...
227,B,Best,-0.015168,-0.171995,0.083201,-2.391754
228,S,Best,0.028471,-0.133362,0.032458,-0.020000
229,B,Best,-0.016968,-0.145591,0.056702,-1.448832
230,S,Best,0.041449,-0.117988,-0.004962,-0.020000


DJI: Step data exported to DJI_training.csv


## Hypertuning

In [None]:
# For Objective function, need to create agent name before to link agent with enviornment
agent_name = 'REWARD_DDQN_AGENT'
agent_path = export_path + '/' + agent_name
metric = 'val_ror'

for key, env in environments.items():
  
        env.add_agent(agent_name)
        env.set_decision_agent(agent_name)

def objective(trial):
    
    # Define the hyperparameters to search over
    
    ## NN hyperparameters
    sug_hidden_layers = trial.suggest_int('hidden_layers', low=1, high=3)
    sug_hidden_size = trial.suggest_int('hidden_size', low=256, high=1280, step=64)
    sug_update_q_freq = trial.suggest_int('update_q_freq',low=1,high=5)
    sug_update_tgt_freq = trial.suggest_int('update_tgt_freq',low=10,high=50,step=10)
    
    ## Function Passing
    activation_functions = {
    'RELU': nn.ReLU(),
    'LRELU': nn.LeakyReLU(),
    'GELU': nn.GELU(),
    'TANH': nn.Tanh()
    }
    sug_activation_function_name = trial.suggest_categorical('activation_function', list(activation_functions.keys()))
    sug_activation_function = activation_functions[sug_activation_function_name]
    
    ## Optimizer hyperparameters
    sug_opt_lre = trial.suggest_float('opt_lre',0.0001,0.1,log=True)
    ## Memory Replay hyperparameters
    sug_buffer_size = trial.suggest_int('buffer_size',low=100,high=1500,step=100)
    sug_batch_size = trial.suggest_int('batch_size',low=10,high=150,step=10)

    # Saving Setup
    ## Current Parameter Values:
    cur_n_fcl = trial.params['hidden_layers']
    cur_fcl_size = trial.params['hidden_size']
    cur_q_freq = trial.params['update_q_freq']
    cur_tgt_freq = trial.params['update_tgt_freq']
    cur_act_func = trial.params['activation_function']
    cur_lre = decimal_to_text(trial.params['opt_lre'])
    cur_buf_size = trial.params['buffer_size']
    cur_bat_size = trial.params['batch_size']
    
    ## Create Notation for Hyperparameter Setup    
    test_name = (f'{cur_n_fcl}FC{cur_fcl_size}_{cur_act_func}_' +
                f'BT{cur_bat_size}BF{cur_buf_size}_Q{cur_q_freq}_' +
                f'TGT{cur_tgt_freq}_LR{cur_lre}')
    
    ## Create Dir to save results
    test_name_path =  agent_path + '/' + test_name 
    if not os.path.exists(test_name_path):
        os.makedirs(test_name_path)
        print(f"Directory '{test_name_path}' created successfully.")
    else:
        print(f"Directory '{test_name_path}' already exists.")    
    
    # Create Agent with hyperparameters  
    best_ddqn_agent = DdqnAgent(name=agent_name,
                        environment=None,
                        reward_function = future_profit,
                        reward_params = {'n':5},
                        env_state_mod_func = flatten_state,     
                        input_size= 13,
                        hidden_size= sug_hidden_size, 
                        output_size=3, 
                        activation_function = sug_activation_function,
                        num_hidden_layers = sug_hidden_layers,                  
                        buffer_size= sug_buffer_size, 
                        batch_size = sug_batch_size,
                        opt_lr= sug_opt_lre,
                        alpha = ALPHA,
                        gamma = GAMMA,
                        opt_wgt_dcy = 0.0,
                        dropout_rate = 0.25,                
                        device = device)

    # Training Model
    for key, env in environments.items():
        
        if key in trn_keys:
            
            best_ddqn_agent.set_environment(env)
            best_ddqn_agent.train(start_idx=training_range[0],
                        end_idx=training_range[1],
                        training_episodes= 100,
                        epsilon_decya_func= linear_decay,
                        initial_epsilon= 0.9,
                        final_epsilon= 0.1,
                        update_q_freq= sug_update_q_freq,
                        update_tgt_freq= sug_update_tgt_freq,
                        save_path = export_path,
                        val_start_idx = validation_range[0],
                        val_end_idx = validation_range[1],
                        early_stop = True,
                        stop_metric = metric,
                        stop_patience = 20,
                        stop_delta = 0.001)
        
            ## Export Training Session Data to CSV
            ddqn_trn = best_ddqn_agent.get_training_episodic_data()
            trn_df_file_name  = f'TRN-{key}' + test_name + '.csv'
            trn_df_save_path = test_name_path + '/' + trn_df_file_name
            ddqn_trn.to_csv(trn_df_save_path)
        
        
    
    # Test Model
    
    
    scores = []
    for key, env in environments.items():
    
        if key in tst_keys:
            
            best_ddqn_agent.set_environment(env)              
            best_ddqn_agent.test(start_idx = testing_range[0],
                        end_idx = testing_range[1], 
                        testing_episodes=1)

            ## Save Test Metric Result(s) into 
            ddqn_tst = best_ddqn_agent.get_testing_episodic_data()
            score = ddqn_tst['Total Reward'].mean()
            scores.append(score)
    
            ## Export Test data
            a = env.get_step_data()
            b = best_ddqn_agent.get_step_data()
            combined_df = pd.concat([a,b],axis=1)
            tst_df_file_name  = f'TST-{key}' + test_name + '.csv'
            trn_df_save_path = test_name_path + '/' + tst_df_file_name
            combined_df.to_csv(trn_df_save_path)

            ## Generate Trading Graphic
            tst_graph_file_name = trn_df_save_path[:-4] + '.png'
            agentperform.agent_stock_performance(env.stock_price_data[testing_range[0]:testing_range[1]],
                                                combined_df['Env Action'].to_numpy(),
                                                key,
                                                best_ddqn_agent.get_name(),
                                                display_graph=True,
                                                save_graphic=True,
                                                path_file=tst_graph_file_name)

    mean = np.mean(scores)
    return mean

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print("Best value: ", study.best_value)
print("Best params: ", study.best_params)