In [1]:
import sys
from pathlib import Path

# Caminho absoluto do diretório raiz do projeto
sys.path.append(str(Path().resolve().parent))


In [2]:
import sqlite3
from pathlib import Path
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from indicators.rsi import adicionar_rsi
from indicators.macd import adicionar_macd
from indicators.bollinger import adicionar_bbands
from patterns.engolfo import engolfo_de_alta, engolfo_de_baixa

In [8]:
def carregar_dados():
    # Caminho absoluto até o arquivo sinais.db
    caminho_db = Path().resolve().parent / 'data' / 'sinais.db'
    
    # Conecta e carrega os dados
    conn = sqlite3.connect(str(caminho_db))
    df = pd.read_sql('SELECT * FROM candles ORDER BY open_time', conn, parse_dates=['open_time'])
    conn.close()
    return df
    
def preparar_dados(df):
    print("Antes do preparo:", len(df))
    df = adicionar_rsi(adicionar_macd(adicionar_bbands(df)))
    df['engolfo_alta'] = False
    df['engolfo_baixa'] = False
    for i in range(1, len(df)):
        df.loc[df.index[i], 'engolfo_alta'] = engolfo_de_alta(df.iloc[i], df.iloc[i-1])
        df.loc[df.index[i], 'engolfo_baixa'] = engolfo_de_baixa(df.iloc[i], df.iloc[i-1])
    df['target'] = (df['close'].shift(-1) > df['close']).astype(int)
    df = df.dropna()
    print("Depois do preparo:", len(df))
    return df


def treinar_modelo(df):
    features = ['open','high','low','close','volume','rsi','macd','macd_signal','macd_diff','bb_high','bb_low','engolfo_alta','engolfo_baixa']
    X = df[features]
    y = df['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    modelo = RandomForestClassifier(n_estimators=100, random_state=42)
    modelo.fit(X_train, y_train)
    pred = modelo.predict(X_test)
    print('Accuracy:', accuracy_score(y_test, pred))
    print('Precision:', precision_score(y_test, pred))
    print('Recall:', recall_score(y_test, pred))
    print('F1:', f1_score(y_test, pred))
    print('Matriz de confusao:\n', confusion_matrix(y_test, pred))
    Path('../models').mkdir(exist_ok=True)
    joblib.dump(modelo, '../models/modelo_binario.pkl')
    joblib.dump(scaler, '../models/escalador.pkl')
    return modelo, scaler

df = carregar_dados()
df = preparar_dados(df)
treinar_modelo(df)

Antes do preparo: 8647
Depois do preparo: 8614
Accuracy: 0.51015670342426
Precision: 0.49563046192259674
Recall: 0.4743130227001195
F1: 0.48473748473748474
Matriz de confusao:
 [[482 404]
 [440 397]]


(RandomForestClassifier(random_state=42), StandardScaler())

In [4]:
df_raw = carregar_dados()
print(len(df_raw))
df_raw.head()


8640


Unnamed: 0,id,ativo,open_time,open,high,low,close,volume
0,1,BTCUSDT,2025-05-19 23:55:00,105600.01,105614.65,105551.15,105573.74,20.35815
1,2,BTCUSDT,2025-05-20 00:00:00,105573.73,105700.99,105554.31,105700.99,50.24604
2,3,BTCUSDT,2025-05-20 00:05:00,105700.99,105988.0,105681.6,105970.52,131.77018
3,4,BTCUSDT,2025-05-20 00:10:00,105970.51,106000.0,105939.56,105999.99,137.24583
4,5,BTCUSDT,2025-05-20 00:15:00,105999.99,106499.0,105999.99,106495.32,536.40351
