In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor 

In [15]:
df = pd.read_csv("cotacao_dolar.csv")

In [17]:
df.head()

Unnamed: 0,saldoMercado,saldoMercado_2,Valor
0,211.18,129.34,0.79
1,0.0,-530.69,0.61
2,0.0,0.0,1.81
3,0.0,0.0,3.7
4,348.55,-331.49,2.1


In [20]:
df.corr()

Unnamed: 0,saldoMercado,saldoMercado_2,Valor
saldoMercado,1.0,0.090796,0.93431
saldoMercado_2,0.090796,1.0,0.115964
Valor,0.93431,0.115964,1.0


In [21]:
df.describe()

Unnamed: 0,saldoMercado,saldoMercado_2,Valor
count,212739.0,212739.0,212739.0
mean,840.92774,-8.001396,2.532518
std,1373.510947,364.06209,3.121715
min,-16564.97,-19902.63,0.01
25%,0.0,-77.4,0.6
50%,243.32,-29.54,1.55
75%,1252.425,50.85,3.13
max,107909.6,8227.13,246.68


In [23]:
from sklearn.model_selection import train_test_split

In [25]:
# Seleção de variáveis preditoras (Feature Selection)
atributos = ['saldoMercado','saldoMercado_2']
# Variável a ser prevista
atrib_prev = ['Valor']

# Criando objetos
X = np.absolute(df[atributos].values)
Y = df[atrib_prev].values


# Definindo a taxa de split
split_test_size = 0.3

# Criando dados de treino e de teste
X_treino, X_teste, Y_treino, Y_teste = train_test_split(X, Y, test_size = split_test_size, random_state = 42)


In [31]:
modelTreeRegressor = DecisionTreeRegressor().fit(X_treino, Y_treino)
modelForestRegressor = RandomForestRegressor().fit(X_treino, Y_treino)

  


In [34]:
modelTreeRegressor.score(X_teste,Y_teste)

0.8355654455190136

In [33]:
modelForestRegressor.score(X_teste,Y_teste)

0.9068992816020158

In [35]:
previsto = modelForestRegressor.predict(X_teste)
real = Y_teste

In [43]:
d = {"Previsto": [x for x in previsto], "Real":[x[0] for x in real]}
df_resultado = pd.DataFrame(data=d)
df_resultado['diff'] = df_resultado.Previsto - df_resultado.Real
df_resultado['porcen'] =  np.absolute(((df_resultado.Previsto / df_resultado.Real)*100)-100)
df_resultado = df_resultado.round(2)
df_resultado = df_resultado.dropna()

In [45]:
df_resultado.head()

Unnamed: 0,Previsto,Real,diff,porcen
0,2.09,2.09,0.0,0.0
1,0.98,0.04,0.94,2356.58
2,1.58,1.7,-0.12,6.78
3,0.14,0.05,0.09,180.8
4,3.29,3.25,0.04,1.33


In [48]:
#Nome do modelo de machine learning escolhido
model_name = "RandomForestRegressor"

#Somatorio das colunas de diferença, real, previsto e diferença absoluta (valores não negativos)
total_diff = df_resultado.aggregate(['sum'])['diff'][0]
total_absolute_diff = df_resultado['diff'].apply(lambda c: np.absolute(c).sum()).sum()
total_real = df_resultado.aggregate(['sum'])['Real'][0]
total_prev = df_resultado.aggregate(['sum'])['Previsto'][0]
total_reg  = df_resultado.aggregate(['count'])['Real'][0]
    
# Percentual de diferença do previsto com o real  
total_percent_diff = (((df_resultado['Real'] - df_resultado['Previsto']) / df_resultado['Real']) * 100)
    
# Remove valores infinitivos
total_percent_diff          = total_percent_diff.replace([np.inf, -np.inf], np.nan).dropna(axis=0)
    
# Obtem total absoluto dos percentuais da diferença
total_percent_absolute_diff = total_percent_diff.apply(lambda x: np.absolute(x).sum()).sum()
    
# Obtem total não absoluto dos percentuais da diferença
total_percent_diff          = total_percent_diff.sum()
    
print("SE - Sum of Error: ")
print("["+model_name+"] - Total Real:  %.2f" %(total_real))
print("["+model_name+"] - Total Prev:  %.2f" %(total_prev))
print("["+model_name+"] - SE Final:  %.2f" %(total_diff))
print("["+model_name+"] - Total Porcentagem de diferença do previsto x real:  {:.2f}%".format(np.absolute(((total_diff / total_real)*100))))
print("----------------------------------------------------------")
    
print("ME - Mean Error:  ")
print("["+model_name+"] - Total Registros: %.f" % (total_reg))
print("["+model_name+"] - Total Diff : %.f" % (total_diff))
print("["+model_name+"] - ME Final : %.6f" % (total_diff / total_reg))
print("----------------------------------------------------------")
    
print("MAE - Mean Absolute Error:  ")
print("["+model_name+"] - Total Registros: %.f" % (total_reg))
print("["+model_name+"] - Total Absolute Diff : %.f" % (total_absolute_diff))
print("["+model_name+"] - ME Final : %.6f" % (total_absolute_diff / total_reg))
print("----------------------------------------------------------")
    
print("MPE - Mean Percentage Error:  ")
print("["+model_name+"] - Total Registros: %.f" % (total_reg))
print("["+model_name+"] - Total percent Diff: %.f" % (total_percent_diff))
print("["+model_name+"] - MPE Final : {:.2f}%".format(total_percent_diff / total_reg))
print("----------------------------------------------------------")
    
print("MAPAE - Mean Absolute Percentage Error:  ")
print("["+model_name+"] - Total Registros: %.f" % (total_reg))
print("["+model_name+"] - Total percent Absolute Diff: %.f" % (total_percent_absolute_diff))
print("["+model_name+"] - MAPAE Final : {:.2f}%".format(total_percent_absolute_diff / total_reg))

SE - Sum of Error: 
[RandomForestRegressor] - Total Real:  161922.24
[RandomForestRegressor] - Total Prev:  162500.39
[RandomForestRegressor] - SE Final:  578.11
[RandomForestRegressor] - Total Porcentagem de diferença do previsto x real:  0.36%
----------------------------------------------------------
ME - Mean Error:  
[RandomForestRegressor] - Total Registros: 63822
[RandomForestRegressor] - Total Diff : 578
[RandomForestRegressor] - ME Final : 0.009058
----------------------------------------------------------
MAE - Mean Absolute Error:  
[RandomForestRegressor] - Total Registros: 63822
[RandomForestRegressor] - Total Absolute Diff : 26926
[RandomForestRegressor] - ME Final : 0.421887
----------------------------------------------------------
MPE - Mean Percentage Error:  
[RandomForestRegressor] - Total Registros: 63822
[RandomForestRegressor] - Total percent Diff: -12973334
[RandomForestRegressor] - MPE Final : -203.27%
----------------------------------------------------------
