# RL Test Bench

In [2]:
import sys
pwd = "P:/dsv/DAMII/Practical Project/rl_stock_agents"
sys.path.append(pwd)

import os
import cleandata
import numpy as np
import matplotlib.pyplot as plt
from agents import Discrete_QtabAgent, Discrete_Random_Agent
from agentperform import agent_stock_performance
import logging

# Configure logging
logging.basicConfig(filename='example.log', level=logging.DEBUG)

## Workbench Configuration

In [3]:
# Input Data Location, File Name, Stock name for labels
import_path = "P:/dsv/DAMII/Practical Project/rl_stock_agents/input_data"

stock_list =["amzn_daily.csv", "appl_daily.csv","tsla_daily.csv","f_daily.csv",
             "pfe_daily.csv", "coke_daily.csv","brk_daily.csv","nee_daily.csv",
             "jnj_daily.csv","pg_daily.csv"]

stock_name_list = ['AMZN','APPL','TSLA','F','PFE','COKE','BRK','NEE','JNJ','PG']


# Configuration for Grpahic Output
save_graphic_option = True
output_graphic_option = False
main_path = "P:/dsv/DAMII/Practical Project/rl_stock_agents/test_bench"
test_dir = '/_bin_sanity_check/'
save_path = main_path + test_dir

## Check if the directory exists; if not, create it
if not os.path.exists(save_path):
    os.makedirs(save_path)

# Common Model Parameters 
training_range = [[0,2000]]
test_range = [[2000,2518]]
epochs = 100
num_of_tests = 5
metric = "n_trades" ## See agentperform.py -> results dictionary for options
## current setting "n_trades" has its ranks based on min (manual change in
## prob._valuate.py line 92. Need to create feature to avoid this manual change)


# Agent Declariations 

agent_classes = {'Qtab_[2,2,2,2,2]': Discrete_QtabAgent,
                 'Qtab_[50,50,50,50,50]': Discrete_QtabAgent,
                 'Qtab_[50,50,50,500,300]': Discrete_QtabAgent,
                 'Random_Agent': Discrete_Random_Agent}

agent_list = list(agent_classes.keys())

no_train_agents = ['Random_Agent']  

agent_params = {
    'Qtab_[2,2,2,2,2]':{
        'bins_per_feature': [2,2,2,2,2],
        'bin_padding': 0.0,
        'training_idxs': training_range,
        'num_training_episodes': epochs,
        'testing_idxs': test_range,
        'epsilon':  0.1,
        'alpha':  0.1,
        'gamma':  0.9
    },

    'Qtab_[50,50,50,50,50]':{
        'bins_per_feature': [50,50,50,50,50],
        'bin_padding': 0.25,
        'training_idxs': training_range,
        'num_training_episodes': epochs,
        'testing_idxs': test_range,
        'epsilon':  0.1,
        'alpha':  0.1,
        'gamma':  0.9
    },

    'Qtab_[50,50,50,500,300]':{
        'bins_per_feature': [50,50,50,500,300],
        'bin_padding': 0.25,
        'training_idxs': training_range,
        'num_training_episodes': epochs,
        'testing_idxs': test_range,
        'epsilon':  0.1,
        'alpha':  0.1,
        'gamma':  0.9
    },
    'Random_Agent':{
        'bins_per_feature': [50,50,50,500,300],
        'bin_padding': 0.25,
        'training_idxs': training_range,
        'testing_idxs': test_range,
    }
    }
    

stock_list =["amzn_daily.csv", "appl_daily.csv","tsla_daily.csv","f_daily.csv",
             "pfe_daily.csv", "coke_daily.csv","brk_daily.csv","nee_daily.csv",
             "jnj_daily.csv","pg_daily.csv"]

stock_name_list = ['AMZN','APPL','TSLA','F','PFE','COKE','BRK','NEE','JNJ','PG']









## Code for Analysis, shouldn't have to be touched

In [4]:
results = {}

for file_name, stock_name in zip(stock_list,stock_name_list):
    logging.info(stock_name)
    results[stock_name] = {}
    # Import File
    df_ohlcv = cleandata.NASDAQ_csv_input(file_name,import_path)
    
    for agent_name, agent_class in agent_classes.items():
        
        selected_agent = agent_class(df_ohlcv,
                                **agent_params[agent_name])

        # Output Status    
        update_text = f'{agent_name}:{stock_name} - Initialized'    
        print(update_text)

        # Debugging Log  
        logging.info(update_text)
        
        results[stock_name][agent_name] = {}
                                           
        # Training and Testing Pairs  
        for (train_start_idx, train_end_idx), (test_start_idx, test_end_idx) \
            in zip(selected_agent.training_range, selected_agent.test_range):
            
            selected_agent.reset() # Wipe out any internal values
                 
            if agent_name not in no_train_agents:  ### Exclude if agent does not require training
                train_key = f'[{train_start_idx}:{train_end_idx})' 
                graph_train_key = 'TRN'+ train_key
                ### Output Status
                update_text = f'{agent_name}:{stock_name} - Training: {train_key}'    
                print(update_text)
                
                ### Train Agent
                selected_agent.train(train_start_idx,
                                     train_end_idx,
                                     initial_epsilon = 0.75, 
                                     final_epsilon = 0.1)
                ### Log Training 
                logging.info(update_text)
                logging.info(selected_agent.env.step_info[-1:])
            
            ## Testing Section 

            # Marker for Output Status, Debugging, and Return
            test_key = f'[{test_start_idx}:{test_end_idx})'
            
            # For Graph Title 
            graph_test_key = 'TST'+ test_key


            # Debugging Log                                   
            logging.info(test_key)
            
            # Initializing Results Dictionary for Test Keys
            results[stock_name][agent_name][test_key] = []

            # Combine Graph Title
            if agent_name in no_train_agents:
                graph_label = f'{agent_name}$_{{{graph_test_key}}}$'
            else:
                graph_label = f'{agent_name}$_{{{graph_train_key},{graph_test_key}}}$'

            for idx in range(1, num_of_tests + 1):

                # Combine Graph Title
                if agent_name in no_train_agents:
                
                    graph_image_file = f'{agent_name}-{stock_name}-TST_{test_start_idx}_{test_end_idx}-{idx}.png'
                else:
                            
                    graph_image_file = f'{agent_name}-{stock_name}-TRN_{train_start_idx}_{train_end_idx}-TST_{test_start_idx}_{test_end_idx}-{idx}.png'
                
                image_file_path = save_path + graph_image_file

                # Output Status    
                update_text = f'{agent_name}:{stock_name} -  \
                    Testing: [{test_start_idx}:{test_end_idx}) - Test #: {idx}'    
                print(update_text)

                logging.info(update_text)    
                                    
                selected_agent.test(test_start_idx,test_end_idx)

                trades_list = [entry['Action'] for entry in \
                               selected_agent.env.step_info]
        
                trade_seq = np.array(trades_list)
                logging.info(selected_agent.env.step_info[-2:])
                logging.info(trade_seq)
                test_result = agent_stock_performance(df_ohlcv['close']\
                        .iloc[test_start_idx:test_end_idx].to_numpy(),
                        trade_seq,stock_name,graph_label, 
                        display_graph=output_graphic_option,
                        save_graphic=save_graphic_option, 
                        path_file=image_file_path)
                
                results[stock_name][agent_name][test_key].append((test_result[metric], trade_seq))

Qtab_[2,2,2,2,2]:AMZN - Initialized
Qtab_[2,2,2,2,2]:AMZN - Training: [0:2000)
Qtab_[2,2,2,2,2]:AMZN -                      Testing: [2000:2518) - Test #: 1
Qtab_[2,2,2,2,2]:AMZN -                      Testing: [2000:2518) - Test #: 2
Qtab_[2,2,2,2,2]:AMZN -                      Testing: [2000:2518) - Test #: 3
Qtab_[2,2,2,2,2]:AMZN -                      Testing: [2000:2518) - Test #: 4
Qtab_[2,2,2,2,2]:AMZN -                      Testing: [2000:2518) - Test #: 5
Qtab_[50,50,50,50,50]:AMZN - Initialized
Qtab_[50,50,50,50,50]:AMZN - Training: [0:2000)
Qtab_[50,50,50,50,50]:AMZN -                      Testing: [2000:2518) - Test #: 1
Qtab_[50,50,50,50,50]:AMZN -                      Testing: [2000:2518) - Test #: 2
Qtab_[50,50,50,50,50]:AMZN -                      Testing: [2000:2518) - Test #: 3
Qtab_[50,50,50,50,50]:AMZN -                      Testing: [2000:2518) - Test #: 4
Qtab_[50,50,50,50,50]:AMZN -                      Testing: [2000:2518) - Test #: 5
Qtab_[50,50,50,500,300]:AMZ

# Aggreating Test Bench Data

In [5]:
import pandas as pd

test_keys = [f"[{interval[0]}:{interval[1]})" for interval in test_range]
aggerate_results = {}
for agent in agent_list:
    aggerate_results[agent] = {}
    for stock in stock_name_list:
        aggerate_results[agent][stock] = {}
        for test in test_keys:
      
            value = [score[0] for score in results[stock][agent][test]]
            aggerate_results[agent][stock][test] = (np.mean(value),np.std(value))

display(aggerate_results)

model_list = []
dataset_name = []
scores = []

for agent in aggerate_results.keys():
    model_list.append(agent)
    score_list = []
    for stock in aggerate_results[agent].keys():
        for test in aggerate_results[agent][stock].keys():
            run_name = stock + "-" + test
            if run_name not in dataset_name:
                dataset_name.append(run_name)
            score = aggerate_results[agent][stock][test][0]
            score_list.append(np.round(score,2))
    scores.append(score_list)

score_array = np.array(scores).T

df = pd.DataFrame(score_array,columns=model_list)
df['dataset'] = dataset_name   

display(df)

display(df[model_list].mean())


{'Qtab_[2,2,2,2,2]': {'AMZN': {'[2000:2518)': (29.4, 18.94835085172322)},
  'APPL': {'[2000:2518)': (24.0, 2.280350850198276)},
  'TSLA': {'[2000:2518)': (15.8, 10.703270528207723)},
  'F': {'[2000:2518)': (19.8, 4.664761515876241)},
  'PFE': {'[2000:2518)': (22.8, 5.67097875150313)},
  'COKE': {'[2000:2518)': (14.6, 3.826225293941798)},
  'BRK': {'[2000:2518)': (38.2, 21.460661685977907)},
  'NEE': {'[2000:2518)': (27.4, 2.33238075793812)},
  'JNJ': {'[2000:2518)': (246.2, 1.1661903789690602)},
  'PG': {'[2000:2518)': (13.8, 2.3151673805580453)}},
 'Qtab_[50,50,50,50,50]': {'AMZN': {'[2000:2518)': (24.6, 4.758150901348127)},
  'APPL': {'[2000:2518)': (13.4, 1.624807680927192)},
  'TSLA': {'[2000:2518)': (12.4, 1.9595917942265424)},
  'F': {'[2000:2518)': (23.6, 5.642694391866354)},
  'PFE': {'[2000:2518)': (14.4, 1.4966629547095764)},
  'COKE': {'[2000:2518)': (14.0, 1.8973665961010275)},
  'BRK': {'[2000:2518)': (16.0, 1.2649110640673518)},
  'NEE': {'[2000:2518)': (25.4, 5.043808085

Unnamed: 0,"Qtab_[2,2,2,2,2]","Qtab_[50,50,50,50,50]","Qtab_[50,50,50,500,300]",Random_Agent,dataset
0,29.4,24.6,33.6,130.6,AMZN-[2000:2518)
1,24.0,13.4,12.4,133.4,APPL-[2000:2518)
2,15.8,12.4,13.4,129.2,TSLA-[2000:2518)
3,19.8,23.6,25.0,129.8,F-[2000:2518)
4,22.8,14.4,21.0,126.8,PFE-[2000:2518)
5,14.6,14.0,31.6,129.6,COKE-[2000:2518)
6,38.2,16.0,13.2,123.8,BRK-[2000:2518)
7,27.4,25.4,25.2,132.2,NEE-[2000:2518)
8,246.2,11.8,24.4,128.0,JNJ-[2000:2518)
9,13.8,12.6,29.4,130.6,PG-[2000:2518)


Qtab_[2,2,2,2,2]            45.20
Qtab_[50,50,50,50,50]       16.82
Qtab_[50,50,50,500,300]     22.92
Random_Agent               129.40
dtype: float64

# Significance

In [6]:
import prob_evaluate

test = prob_evaluate.generate_rank_array_from_dataframe(df,model_list,equal_rank_behav="mean")
print(test)
stat, critical_f_value, reject_null_hypo = prob_evaluate.iman_davenport_test(test,0.95)

print(f'f-score: {stat:.3f}\nf-critical: {critical_f_value:.3f}\nSignificant: {reject_null_hypo}')

results1 = prob_evaluate.nemenyi_test(test,0.95,model_list)
display(results1)

[[3 2 2 4 2 3 2 2 1 3]
 [4 3 4 3 4 4 3 3 4 4]
 [2 4 3 2 3 2 4 4 3 2]
 [1 1 1 1 1 1 1 1 2 1]]
f-score: 18.108
f-critical: 2.960
Significant: True


[(('Qtab_[2,2,2,2,2]', 'Qtab_[50,50,50,50,50]'),
  1.2000000000000002,
  1.4820581410097693,
  False),
 (('Qtab_[2,2,2,2,2]', 'Qtab_[50,50,50,500,300]'),
  0.5,
  1.4820581410097693,
  False),
 (('Qtab_[2,2,2,2,2]', 'Random_Agent'),
  1.2999999999999998,
  1.4820581410097693,
  False),
 (('Qtab_[50,50,50,50,50]', 'Qtab_[50,50,50,500,300]'),
  0.7000000000000002,
  1.4820581410097693,
  False),
 (('Qtab_[50,50,50,50,50]', 'Random_Agent'), 2.5, 1.4820581410097693, True),
 (('Qtab_[50,50,50,500,300]', 'Random_Agent'),
  1.7999999999999998,
  1.4820581410097693,
  True)]