## Randon forest aplicado a previsão de retornos

Analise de indicadores técnicos na previsão dos retornos da ação da petrobras (PETR4)

In [256]:
### Importando bibliotecas
import pandas as pd
import numpy as np
import ta  ### biblioteca para analise de indicadores técnicos
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error as MSE
import matplotlib.pyplot as plt


In [257]:
### Importando dados
df = pd.read_excel('economatica.xlsx', parse_dates=True, index_col=0, skiprows=3)
df.rename(columns={'Volume$':'Volume'}, inplace=True)
df.head()

Unnamed: 0_level_0,Q Negs,Q Títs,Volume,Fechamento,Abertura,Mínimo,Máximo,Médio
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04,13531,13303600,493660216,23.135237,22.905869,22.82528,23.135237,23.005055
2010-01-05,22782,21396400,794327759,22.936865,23.172432,22.812881,23.203428,23.011254
2010-01-06,18647,18720600,697345692,23.246822,22.812881,22.812881,23.246822,23.091843
2010-01-07,12720,10964600,408386356,23.029852,23.104242,22.980259,23.215826,23.091843
2010-01-08,14192,14624200,542061948,22.905869,23.036051,22.850076,23.178631,22.980259


In [258]:
df['Retornos'] = df.Fechamento.pct_change()
df['Kama'] = ta.momentum.KAMAIndicator(close=df.Fechamento, window=21).kama()
df['ROC'] = ta.momentum.ROCIndicator(close=df.Fechamento, window=12).roc()
df['RSI'] = ta.momentum.RSIIndicator(close=df.Fechamento, window=14).rsi()
df['Stoch'] = ta.momentum.StochasticOscillator(close=df.Fechamento, high=df.Máximo, low=df.Mínimo, window=14, smooth_window=3).stoch()
df['Chaikin_money'] = ta.volume.ChaikinMoneyFlowIndicator(high=df.Máximo, low=df.Mínimo, close=df.Fechamento, volume=df.Volume, window=20).chaikin_money_flow()
df['Force_index'] = ta.volume.ForceIndexIndicator(close=df.Fechamento, volume=df.Volume, window=13).force_index()
df['Normal'] = (df.Fechamento - df.Mínimo) / (df.Máximo - df.Mínimo)


In [259]:
df = df.dropna()
X = df[['Q Negs', 'Q Títs', 'Volume', 'Fechamento', 'Abertura', 'Mínimo', 'Máximo', 'Médio', 'Kama', 'ROC', 'RSI', 'Stoch', 'Chaikin_money', 'Force_index', 'Normal']][:-1]
y = df['Fechamento'].pct_change().shift(-1).dropna()
#y = np.where(df['Fechamento'].shift(-1) > df['Fechamento'], 1, -1)

In [260]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [261]:
dt = DecisionTreeRegressor(max_depth=8,
                            min_samples_leaf=0.15,
                            random_state=3)

In [262]:
dt.fit(X_train, y_train)

DecisionTreeRegressor(max_depth=8, min_samples_leaf=0.15, random_state=3)

In [263]:
y_predic = dt.predict(X_test)

In [264]:
mse_dt = MSE(y_predic, y_test)

In [265]:
mse_dt**(1/2)

0.03389337040777027

In [269]:
y_predic

array([ 0.00012175,  0.00012175,  0.00012175,  0.00012175,  0.00012175,
        0.00012175,  0.00012175,  0.00123107,  0.00123107,  0.00123107,
       -0.0032152 ,  0.00012175, -0.0032152 ,  0.00012175, -0.0032152 ,
       -0.0032152 ,  0.00012175, -0.0032152 ,  0.00012175, -0.0032152 ,
        0.0001352 ,  0.0001352 , -0.0032152 ,  0.00012175,  0.00123107,
       -0.0032152 , -0.0032152 ,  0.00123107,  0.00012175,  0.00012175,
        0.00012175,  0.00123107,  0.00012175,  0.00123107,  0.00012175,
       -0.0032152 ,  0.00123107,  0.00123107, -0.0032152 ,  0.00123107,
        0.00012175,  0.00012175,  0.00123107,  0.00012175, -0.0032152 ,
       -0.0032152 , -0.0032152 , -0.0032152 ,  0.0001352 ,  0.0001352 ,
       -0.0032152 , -0.0032152 ,  0.0001352 ,  0.0001352 ,  0.00012175,
       -0.0032152 ,  0.00012175,  0.00012175, -0.0032152 , -0.0032152 ,
        0.00012175, -0.0032152 ,  0.00012175,  0.00012175,  0.00012175,
        0.00012175, -0.0032152 , -0.0032152 ,  0.00012175,  0.00