In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import time

data_path = '../src/data/raw/GBPUSD/GBPUSD15.csv'

df = pd.read_csv(data_path)

df.columns = ['date', 'time', 'open', 'high', 'low', 'close', 'volume']
df['date'] = pd.to_datetime(df['date'], format='%Y.%m.%d')

# Filtrar os dados para manter apenas as linhas a partir de 2022
df = df[df['date'] >= '2022-01-01']
df['Target'] = (df['close'] > df['open']).astype(int)
df


Unnamed: 0,date,time,open,high,low,close,volume,Target
570178,2022-01-03,00:00,1.35256,1.35268,1.35238,1.35246,50,0
570179,2022-01-03,00:15,1.35243,1.35306,1.35215,1.35246,94,1
570180,2022-01-03,00:30,1.35267,1.35336,1.35237,1.35286,153,1
570181,2022-01-03,00:45,1.35284,1.35319,1.35265,1.35281,95,0
570182,2022-01-03,01:00,1.35302,1.35328,1.35250,1.35254,231,0
...,...,...,...,...,...,...,...,...
633721,2024-07-19,22:45,1.29118,1.29123,1.29101,1.29109,907,0
633722,2024-07-19,23:00,1.29108,1.29119,1.29101,1.29104,692,0
633723,2024-07-19,23:15,1.29103,1.29122,1.29100,1.29120,445,1
633724,2024-07-19,23:30,1.29120,1.29124,1.29097,1.29100,253,0


In [7]:
# Selecionar features e target
features = df[['open', 'high', 'low', 'close']]
target = df['Target']

# Dividir os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Instanciar o modelo RandomForestClassifier
model = RandomForestClassifier(n_estimators=10, random_state=42)

start_time = time.time()

# Treinar o modelo
model.fit(X_train, y_train)

# Fazer previsões no conjunto de teste
y_pred = model.predict(X_test)

# Calcular o tempo de execução
end_time = time.time()
execution_time = end_time - start_time

# Avaliar a performance do modelo
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)
print(f"Tempo de execução: {execution_time} segundos")

Accuracy: 0.9325727773406767
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.94      0.93      6350
           1       0.94      0.92      0.93      6360

    accuracy                           0.93     12710
   macro avg       0.93      0.93      0.93     12710
weighted avg       0.93      0.93      0.93     12710

Tempo de execução: 37.1348512172699 segundos
