In [1]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error


plt.style.use('bmh')

In [5]:
def stock_predict(**params):
    pre_path = 'data/stock/'
    pos_path = '.SA.csv'
    ticket = params['path']
    dfs = pd.read_csv(pre_path+params['path']+pos_path,index_col='Date')
    dfs.dropna(inplace=True)

    dfs.head()
    df=dfs[['Close']]
    #future_days = 100
    df['prediction']=df[['Close']].shift(-params['future_days'])
    X = np.array(df.drop(['prediction'],1))[:-params['future_days']]
    y = np.array(df['prediction'])[:-params['future_days']]
    x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
    print('limites de teste:')

    print(x_train.shape)
    print(x_test.shape)
    #regressores
    tree= DecisionTreeRegressor().fit(x_train,y_train)
    lr = LinearRegression().fit(x_train,y_train)
    rf = RandomForestRegressor(n_estimators=params['n_estimators'],max_depth=params['max_depth']).fit(x_train,y_train)
    #últimas linhas de x do futuro
    x_future = df.drop(['prediction'],1)[:-params['future_days']]
    x_future = x_future.tail(params['future_days'])
    x_future = np.array(x_future)
    tree_prediction = tree.predict(x_future)
    lr_prediction = lr.predict(x_future)
    rf_prediction = rf.predict(x_future)

    print('testando modelo linear...')
    predictions = lr_prediction

    valid = df[X.shape[0]:]
    valid['Predictions'] = predictions
    df_fig = df.copy()
    df_fig = df_fig.join(valid.add_prefix('lr_'))
    #df_fig.index = df.index

    fig1 = px.line(df_fig,x=df_fig.index,y=['Close','lr_Close','lr_Predictions'],title=f"{ticket}",color_discrete_sequence=['blue','red','black'])

    error_lr = mean_squared_error(valid['Close'], predictions)
    print('testando modelo árvore de decisão')
    predictions = tree_prediction

    valid = df[X.shape[0]:]
    valid['Predictions'] = predictions
    df_fig = df.copy()
    df_fig = df_fig.join(valid.add_prefix('tree_'))
    #df_fig.index = df.index
    fig2 = px.line(df_fig,x=df_fig.index,y=['Close','tree_Close','tree_Predictions'],title=f"{ticket}",color_discrete_sequence=['blue','red','purple'])

    error_tree = mean_squared_error(valid['Close'], predictions)
    print('testando modelo random forest...')
    predictions = rf_prediction

    valid = df[X.shape[0]:]
    valid['Predictions'] = predictions
    df_fig = df.copy()
    df_fig = df_fig.join(valid.add_prefix('rf_'))

    fig3 = px.line(df_fig,x=df_fig.index,y=['Close','rf_Close','rf_Predictions'],title=f"{ticket}",color_discrete_sequence=['blue','red','yellow'])

    error_rf = mean_squared_error(valid['Close'], predictions)

    fig = fig1.add_trace(fig2.data[2]).add_trace(fig3.data[2])
    fig.show()
    print('erro linear:',error_lr)
    print('erro da árvore de decisão:',error_tree)
    print('erro de random forest:',error_rf)

    print('O modelo mais funcional para esta ação foi:',min([error_lr,error_tree,error_rf]))


In [6]:
params={'path':'CSNA3','n_estimators':400,'max_depth':190,'future_days':300}
for ticket in ['CSNA3','KEPL3','FLRY3','KLBN4','UNIP3','ITSA4','TAEE4','QUAL3','MGLU3','BBAS3']:
    params['path'] = ticket
    stock_predict(**params)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



limites de teste:
(4320, 1)
(1080, 1)



In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo linear...
testando modelo árvore de decisão




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo random forest...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



erro linear: 85.23205079585256
erro da árvore de decisão: 11.047423416470519
erro de random forest: 10.744643443246378
O modelo mais funcional para esta ação foi: 10.744643443246378
limites de teste:
(4236, 1)
(1059, 1)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo linear...
testando modelo árvore de decisão




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo random forest...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



erro linear: 1499.838517429188
erro da árvore de decisão: 430.2261977469705
erro de random forest: 621.6223526396789
O modelo mais funcional para esta ação foi: 430.2261977469705
limites de teste:
(2386, 1)
(597, 1)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



testando modelo linear...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo árvore de decisão




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo random forest...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



erro linear: 37.46120862087237
erro da árvore de decisão: 16.35249570144697
erro de random forest: 18.817894967764648
O modelo mais funcional para esta ação foi: 16.35249570144697




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



limites de teste:
(2776, 1)
(695, 1)



In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



testando modelo linear...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo árvore de decisão




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo random forest...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



erro linear: 0.7985436056869755
erro da árvore de decisão: 0.13089560632842023
erro de random forest: 0.11968718771028666
O modelo mais funcional para esta ação foi: 0.11968718771028666




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



limites de teste:
(4412, 1)
(1104, 1)



In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



testando modelo linear...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo árvore de decisão




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo random forest...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



erro linear: 670.5753819507547
erro da árvore de decisão: 82.15353581850538
erro de random forest: 95.27200832193984
O modelo mais funcional para esta ação foi: 82.15353581850538




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



limites de teste:
(4388, 1)
(1098, 1)



In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



testando modelo linear...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo árvore de decisão




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo random forest...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



erro linear: 0.5455305036709561
erro da árvore de decisão: 0.8905220252618956
erro de random forest: 0.8612196144898053
O modelo mais funcional para esta ação foi: 0.5455305036709561
limites de teste:
(840, 1)
(211, 1)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



testando modelo linear...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo árvore de decisão




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo random forest...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



erro linear: 1.688258300721415
erro da árvore de decisão: 0.2826452197136138
erro de random forest: 0.29241586465330577
O modelo mais funcional para esta ação foi: 0.2826452197136138




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



limites de teste:
(2086, 1)
(522, 1)



In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo linear...
testando modelo árvore de decisão




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo random forest...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



erro linear: 137.43165497588672
erro da árvore de decisão: 63.30473518888975
erro de random forest: 65.85736020631576
O modelo mais funcional para esta ação foi: 63.30473518888975
limites de teste:
(2119, 1)
(530, 1)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo linear...
testando modelo árvore de decisão




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo random forest...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



erro linear: 56.866604526096225
erro da árvore de decisão: 13.381844870557421
erro de random forest: 16.75680150722145
O modelo mais funcional para esta ação foi: 13.381844870557421
limites de teste:
(4351, 1)
(1088, 1)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo linear...
testando modelo árvore de decisão




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



testando modelo random forest...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



erro linear: 42.578855340092154
erro da árvore de decisão: 37.45405606424317
erro de random forest: 36.81654142596389
O modelo mais funcional para esta ação foi: 36.81654142596389
