# Experiment 02

This Experiment consists in reproduce the exactly the same experiment as proposed by Martinez (https://www.researchgate.net/publication/221533996_From_an_artificial_neural_network_to_a_stock_market_day-trading_system_A_case_study_on_the_BMF_BOVESPA).

In Summary:
- Each node from ANN Implements a Logistic Function.
- 3 Classic layers:
    - Until 33 Nodes in the Input Layer
    - 2 Node as Outpuy Layers
    - The hidden layers has sqrt(# Input nodes * # Output Nodes) nodes
- 100,000 epochs
- Learning Rate constat and equals to 0.01
- The Momentum from the Back-Propagation Algorithm is 0.8


In [1]:
import numpy as np
import pandas as pd

import Utils 
from Utils import calculateEMA, calculateBB, addPreviousDays, removeColumns, getPeriod, mape, mae, createDate, getDate, printResult

import matplotlib.pyplot as plt
import matplotlib.dates as mpdates
import mplfinance as mpf
from mplfinance.original_flavor import candlestick_ohlc

import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn.metrics import r2_score

### Constants and Parameters

In [2]:
dfColumns = ['date','open', 'high', 'low','close','tickvol','volume','spread']
newColumns = ['open', 'high', 'low', 'close']
col15 = ['open','d-5_high','d-5_low','d-4_high','d-4_low','d-3_high','d-3_low','d-2_high','d-2_low','d-1_high','d-1_low','EMA_high','EMA_low','UB_close','LB_close']
col25 = ['open','d-5_open','d-5_high','d-5_low','d-5_close','d-4_open','d-4_high','d-4_low','d-4_close','d-3_open','d-3_high','d-3_low','d-3_close','d-2_open','d-2_high','d-2_low','d-2_close','d-1_open','d-1_high','d-1_low','d-1_close','EMA_high' ,'EMA_low','UB_close','LB_close']
DAYS = 5

In [3]:
TEST_BEGIN = '2020.06.01'
TEST_END = '2021.06.01'

WINDOW = 128
DRIFT = 10

TIMES = 10

def safeDate(df, begin, period):
    index = df[df['date'] == begin].index[0]
    
    newIndex = index - period*3
    newDate = df[df.index == newIndex].date
    
    return newDate.iloc[0]

### Auxiliary functions

In [4]:
def Normalize(df):
    date = df['date']
    maxVal = df[['high', 'low']].max()
    normalized = removeColumns(df,['date'])/(removeColumns(df,['date']).max()*2)
    
    
    df.loc[:, df.columns != 'date'] = normalized
    
    return df, maxVal

### Opening the dataset

In [5]:
df = pd.read_csv('../Data/PETR4_Daily.csv', sep = "\t")
df.columns = dfColumns
df = removeColumns(df, ['tickvol', 'volume','spread'])

beginDate = safeDate(df, TEST_BEGIN, WINDOW)

df2 = pd.read_csv('../Data/VALE3_Daily.csv', sep = "\t")
df2.columns = dfColumns
df2 = removeColumns(df2, ['tickvol', 'volume','spread'])

df.tail(290)

Unnamed: 0,date,open,high,low,close
1049,2020.03.31,13.16,14.07,13.16,13.54
1050,2020.04.01,13.01,14.08,12.89,13.84
1051,2020.04.02,14.90,16.01,14.52,15.01
1052,2020.04.03,15.77,15.83,14.44,14.84
1053,2020.04.06,15.53,15.58,14.69,15.26
...,...,...,...,...,...
1334,2021.05.26,25.91,26.18,25.65,26.09
1335,2021.05.27,26.08,26.25,25.77,25.91
1336,2021.05.28,26.20,27.02,26.15,26.99
1337,2021.05.31,26.96,27.20,26.78,26.87


In [6]:
#PETR4
df = calculateEMA(df,DAYS,newColumns)
df = calculateBB(df,DAYS,newColumns)
df = addPreviousDays(df,DAYS, newColumns)

#VALE3
df2 = calculateEMA(df2,DAYS,newColumns)
df2 = calculateBB(df2,DAYS,newColumns)
df2 = addPreviousDays(df2,DAYS, newColumns)

In [7]:
##df = getPeriod(df, '2020.03.02','2021.04.30', True)
##initialDf = getPeriod(df, '2020.03.02','2021.04.30', True)

In [8]:
initialDf_PETR4 = getPeriod(df, beginDate,TEST_END, True)
initialDf_VALE3 = getPeriod(df2, beginDate,TEST_END, True)

In [9]:
initialDf_PETR4

Unnamed: 0,date,open,high,low,close,EMA_open,EMA_high,EMA_low,EMA_close,UB_open,...,d-3_low,d-3_close,d-4_open,d-4_high,d-4_low,d-4_close,d-5_open,d-5_high,d-5_low,d-5_close
0,2016.10.05,12.87,13.28,12.84,13.22,12.592550,12.853439,12.479983,12.747775,13.018191,...,12.19,12.31,12.52,12.61,12.18,12.18,12.06,12.56,11.90,12.56
1,2016.10.06,13.34,13.70,13.26,13.70,12.841700,13.135626,12.739988,13.065183,13.556441,...,12.35,12.67,12.27,12.49,12.19,12.31,12.52,12.61,12.18,12.18
2,2016.10.07,13.81,13.95,13.58,13.84,13.164467,13.407084,13.019992,13.323456,14.137587,...,12.54,12.72,12.40,12.67,12.35,12.67,12.27,12.49,12.19,12.31
3,2016.10.10,14.08,14.30,14.07,14.27,13.469645,13.704723,13.369995,13.638970,14.549197,...,12.84,13.22,12.68,12.81,12.54,12.72,12.40,12.67,12.35,12.67
4,2016.10.11,14.16,14.20,13.87,13.96,13.699763,13.869815,13.536663,13.745980,14.736011,...,13.26,13.70,12.87,13.28,12.84,13.22,12.68,12.81,12.54,12.72
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1144,2021.05.26,25.91,26.18,25.65,26.09,26.114791,26.354219,25.760657,26.042263,26.608719,...,25.94,25.95,26.15,26.28,25.72,25.93,25.85,26.35,25.80,26.15
1145,2021.05.27,26.08,26.25,25.77,25.91,26.103194,26.319479,25.763771,25.998175,26.604087,...,25.99,26.39,26.08,26.31,25.94,25.95,26.15,26.28,25.72,25.93
1146,2021.05.28,26.20,27.02,26.15,26.99,26.135462,26.552986,25.892514,26.328784,26.617551,...,25.78,25.84,26.28,26.52,25.99,26.39,26.08,26.31,25.94,25.95
1147,2021.05.31,26.96,27.20,26.78,26.87,26.410308,26.768657,26.188343,26.509189,27.147657,...,25.65,26.09,26.48,26.62,25.78,25.84,26.28,26.52,25.99,26.39


In [10]:
initialDf_VALE3

Unnamed: 0,date,open,high,low,close,EMA_open,EMA_high,EMA_low,EMA_close,UB_open,...,d-3_low,d-3_close,d-4_open,d-4_high,d-4_low,d-4_close,d-5_open,d-5_high,d-5_low,d-5_close
0,2016.10.05,15.14,15.25,14.99,15.12,15.176550,15.340787,14.926749,15.094690,15.550217,...,14.95,15.08,15.41,15.59,15.13,15.23,14.97,15.42,14.94,15.40
1,2016.10.06,15.10,15.19,14.98,15.07,15.151033,15.290525,14.944499,15.086460,15.463304,...,15.03,15.32,15.25,15.30,14.95,15.08,15.41,15.59,15.13,15.23
2,2016.10.07,15.21,15.47,15.00,15.24,15.170689,15.350350,14.962999,15.137640,15.450359,...,14.86,14.94,15.11,15.47,15.03,15.32,15.25,15.30,14.95,15.08
3,2016.10.10,15.44,16.16,15.37,16.13,15.260459,15.620233,15.098666,15.468427,15.572090,...,14.99,15.12,15.41,15.49,14.86,14.94,15.11,15.47,15.03,15.32
4,2016.10.11,15.98,16.17,15.58,15.74,15.500306,15.803489,15.259111,15.558951,16.100856,...,14.98,15.07,15.14,15.25,14.99,15.12,15.41,15.49,14.86,14.94
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1144,2021.05.26,106.37,110.32,106.03,110.20,109.166674,111.131617,107.622606,109.807900,113.973861,...,108.50,109.40,111.70,112.10,110.20,111.11,112.10,112.78,109.84,112.25
1145,2021.05.27,110.80,112.09,109.77,110.99,109.711116,111.451078,108.338404,110.201933,113.405981,...,107.51,109.78,111.11,111.25,108.50,109.40,111.70,112.10,110.20,111.11
1146,2021.05.28,111.11,112.75,109.42,111.59,110.177411,111.884052,108.698936,110.664622,113.405981,...,106.78,107.05,108.00,110.14,107.51,109.78,111.11,111.25,108.50,109.40
1147,2021.05.31,112.50,114.89,112.50,114.78,110.951607,112.886035,109.965957,112.036415,114.811204,...,106.03,110.20,110.28,110.89,106.78,107.05,108.00,110.14,107.51,109.78


#### Experimento 02: Reproducing the Experiment proposed by Martinez

In [11]:
def getTrainPeriod(df, begin, period):
    
    index = df[df['date'] == begin].index[0]
    index = index-1
    
    endDate = df[df.index == index].date
    beginDate = df[df.index == (index-period)].date
    
    return beginDate.iloc[0], endDate.iloc[0]

def exp02(df, stock, inputColumns = []):
    aux, maxVal = Normalize(df)
    df = aux
    
    beginTrain, endTrain = getTrainPeriod(df,TEST_BEGIN, WINDOW)
    
    df_test_aux = getPeriod(df, TEST_BEGIN, TEST_END)
    
    indexBegin = df_test_aux.head(1).index[0]
    indexEnd = df_test_aux.tail(1).index[0]
    
    
    for i in range (indexBegin, indexEnd, DRIFT):
        #print(i)
        
        df_test = df[(df.index >= i) & (df.index <= i+DRIFT-1)]
        df_train = df[(df.index >= i-WINDOW) & (df.index < i)]
    
        if(inputColumns == []):
            x_train = df_train.drop(['date', 'high', 'low', 'close'], axis=1)
            y_train = df_train[['high','low']]

            x_test = df_test.drop(['date', 'high', 'low', 'close'], axis=1)
            y_test = df_test[['high','low']]
        else:
            x_train = df_train[inputColumns]
            y_train = df_train[['high','low']]

            x_test = df_test[inputColumns]
            y_test = df_test[['high','low']]
        
        scaler = MaxAbsScaler()
        scaler.fit(x_train)
        scaler.max_abs_ = scaler.max_abs_*2
        scaler.scale_ = scaler.scale_*2
        x_train = scaler.transform(x_train)
        x_test = scaler.transform(x_test)

        MLP = MLPRegressor(hidden_layer_sizes = (8), #sqrt(33*2) according to the article
                   momentum = 0.8,
                   max_iter = 100000,
                   activation = 'logistic',
                   learning_rate = 'constant',
                   learning_rate_init = 0.01)
        
        MLP.fit(x_train, y_train)
        y_pred = MLP.predict(x_test)
        
        max_test = np.array(y_test['high'])
        min_test = np.array(y_test['low'])

        
        
        if(i == indexBegin):
            predDf = y_test.copy()
            predDf['high_pred'] = y_pred[:,0]
            predDf['low_pred'] = y_pred[:,1]
        else:
            auxDf = y_test.copy()
            auxDf['high_pred'] = y_pred[:,0]
            auxDf['low_pred'] = y_pred[:,1]
            predDf = predDf.append(auxDf)
    
    errorMax = mape(y_testFix['high'],y_predFix[:,0])
    errorMin = mape(y_testFix['low'],y_predFix[:,1])
    
    return errorMax, errorMin

def test30timesExp02(df,stock, phrase, columnInput = []):
    errorMax = []
    errorMin = []
    
    for i in range(1, TIMES+1):
        if(columnInput == []):
            errorMax_petr4, errorMin_petr4 = exp02(df, stock)
        else: 
            errorMax_petr4, errorMin_petr4 = exp02(df, stock, columnInput)
        
        errorMax.append(errorMax_petr4[0])
        errorMin.append(errorMin_petr4[0])
    
    errorMax = np.array(errorMax)
    errorMin = np.array(errorMin)
    print(phrase)
    printResult((errorMax.mean(),errorMax.std()), 'High')
    printResult((errorMin.mean(),errorMin.std()), 'Low')
    
test30timesExp02(initialDf_PETR4, 'PETR4', 'PETR4 15 Variables', col15) 
test30timesExp02(initialDf_PETR4, 'PETR4', 'PETR4 25 Variables', col25) 
test30timesExp02(initialDf_PETR4, 'PETR4', 'PETR4 33 Variables') 
print('\n')
test30timesExp02(initialDf_VALE3, 'VALE3', 'VALE3 15 Variables', col15) 
test30timesExp02(initialDf_VALE3, 'VALE3', 'VALE3 25 Variables', col25) 
test30timesExp02(initialDf_VALE3, 'VALE3', 'VALE3 33 Variables') 

PETR4 15 Variables
High: 7.69 +- 4.2 %
Low: 11.51 +- 5.28 %
PETR4 25 Variables
High: 11.79 +- 5.75 %
Low: 8.55 +- 3.53 %
PETR4 33 Variables
High: 10.78 +- 5.22 %
Low: 11.8 +- 6.74 %


VALE3 15 Variables
High: 15.79 +- 8.56 %
Low: 15.06 +- 5.91 %
VALE3 25 Variables
High: 10.1 +- 5.62 %
Low: 19.48 +- 10.79 %
VALE3 33 Variables
High: 17.38 +- 7.58 %
Low: 14.53 +- 8.77 %
