# Prepare the market data for the ensemble strategy presented in 'Deep Reinforcement Learning for Automated Stock Trading: An Ensemble Strategy'

Note: At first, the 'agent_environment_data.csv' file was used for the ensemble strategy. Due to the retraining & validation window, the agent traded only until 2020-04-06. The market data will therefore be prolonged, so that the agent trades at least until 2020-06-12. Afterwards, the portfolio history of the agent will be read only until the 2020-06-12 to match the date in the testing data.

## Import Libraries

In [1]:
import pandas as pd
import sys 
sys.path.append('/Applications/Studium/Master/Masterarbeit/AADRL/')
from preprocessing.preprocessor import Preprocessor


## Read market data and create artifical sentiment data (to run preprocessor)

In [2]:
# Read market data
data = pd.read_csv(
                r'/Applications/Studium/Master/Masterarbeit/AADRL/data/market_data.csv',
                header=[0,1],
                index_col=0
                )

In [3]:
# Define the multindex
unique_tics = data.columns.get_level_values(1).unique()

multindex = [
    ['sentiment' for sentiment in unique_tics],
    unique_tics
]

In [4]:
# Define the values for the sentiment dataframe
data_sentiments = [[0 for i in unique_tics] for k in data.index]

In [5]:
# Define the artifical sentiment dataframe. This is needed to work with the existing Preprocessing class (which needs a sentiment table).
tuples = list(zip(*multindex))
index = pd.MultiIndex.from_tuples(tuples)

sentiments = pd.DataFrame(data_sentiments, columns=index, index=data.index)

In [6]:
# Sentiments should start still on the same date as 'agent_environment_data.csv' (we are just prolonging the data at the end)
sentiments = sentiments.loc[sentiments.index >= '2009-06-16']

## Create the final market data

In [7]:
# Define the preprocessor
preprocessor = Preprocessor(data, sentiments)

In [8]:
# Merge market data and sentiments
preprocessor.merged_datasets = preprocessor._merge_datasets()

In [9]:
# Calculate technical indicators
preprocessor.add_MACD()
preprocessor.add_RSI()
preprocessor.add_ADX()
preprocessor.add_CCI()

In [10]:
# Drop nan
final_data = preprocessor.merged_datasets.dropna()

In [11]:
# Reshape the market data to match the format of the data of the ensemble strategy
df_unstacked = final_data.unstack().reset_index()

df_pivot = df_unstacked.pivot_table(
                            index=['Date','level_1'],
                            values=0,
                            columns='level_0'
                            ).reset_index()

df_pivot = df_pivot.rename(columns={'level_1':'tic','Date':'datadate'})
df_pivot.columns = df_pivot.columns.str.lower()
df_pivot = df_pivot.rename(columns={'adj close':'adjcp'})

df_pivot['datadate'] = df_pivot['datadate'].astype(str)
df_pivot['datadate'] = df_pivot['datadate'].str.replace('-', '')

In [13]:
# Save the dataframe (Note this file will now be read for the ensemble strategy!)
df_pivot.drop(columns=['open','close','high','low','volume','sentiment']).to_csv(r'/Applications/Studium/Master/Masterarbeit/AADRL/evaluate_agents/prepared_data_ensemble.csv')