In [2]:
import pandas as pd 
import numpy as np
import  matplotlib.pyplot as plt
import utils_eda as ut


In [3]:
def adicionar_features():
    df = ut.leitura_csv()
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)

    df['Return'] = df['Close'].pct_change()

    df['SMA5'] = df['Close'].rolling(5).mean()
    df['SMA20'] = df['Close'].rolling(20).mean()
    df['SMA50'] = df['Close'].rolling(50).mean()
    df['SMA200'] = df['Close'].rolling(200).mean()

    df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()

    df['RSI14'] = ut.calculo_RSI(df['Close'])
    df['MACD'] = ut.calculate_MACD(df['Close'])

    df['Momentum_5'] = df['Close'] / df['Close'].shift(5) - 1
    df['Momentum_10'] = df['Close'] / df['Close'].shift(10) - 1
    df['Momentum_20'] = df['Close'] / df['Close'].shift(20) - 1

    df['Volatility_20'] = df['Return'].rolling(20).std()

    df['Price_Range'] = df['High'] - df['Low']
    df['Price_Change'] = df['Close'] - df['Open']
    df['Upper_Shadow'] = df['High'] - df[['Open', 'Close']].max(axis=1)
    df['Lower_Shadow'] = df[['Open', 'Close']].min(axis=1) - df['Low']

    df['BB_middle'] = df['Close'].rolling(20).mean()
    df['BB_std'] = df['Close'].rolling(20).std()
    df['BB_width'] = (4 * df['BB_std']) / df['BB_middle']

    df['Volume_MA5'] = df['Volume'].rolling(5).mean()
    df['Volume_MA20'] = df['Volume'].rolling(20).mean()
    df['Volume_Ratio'] = df['Volume'] / df['Volume_MA20']

    df['ROC_10'] = df['Close'].pct_change(10)

    df['ATR_pct'] = ut.calcular_ATR_percentual(df, period=14)

    return df


In [4]:
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_curve, roc_auc_score
import numpy as np
import pandas as pd
import joblib

def treinar_modelo_compra(
    horizontes=[3, 5, 10],
    movimentos_pct=[0.005, 0.01, 0.015]
):
    df_base = adicionar_features()

    melhor_auc = 0
    melhor_config = None
    melhor_modelo = None
    melhor_threshold = None
    melhor_scaler = None
    melhor_imputer = None
    melhor_features = None

    for horizonte in horizontes:
        for x_pct in movimentos_pct:

            df = df_base.copy()

            retorno_futuro = (df['Close'].shift(-horizonte) / df['Close']) - 1
            df['Target'] = (retorno_futuro > x_pct).astype(int)

            df = df.dropna().iloc[200:].copy()

            features = df.select_dtypes(include=[np.number]).columns.tolist()
            features.remove('Target')

            X = df[features]
            y = df['Target']

            imputer = SimpleImputer(strategy='mean')
            scaler = StandardScaler()

            X = imputer.fit_transform(X)
            X = scaler.fit_transform(X)

            split = int(len(X) * 0.8)

            X_train, X_test = X[:split], X[split:]
            y_train, y_test = y[:split], y[split:]

            model = XGBClassifier(
                n_estimators=300,
                max_depth=5,
                learning_rate=0.05,
                subsample=0.8,
                colsample_bytree=0.8,
                eval_metric='logloss',
                tree_method='hist',
                random_state=42
            )

            model.fit(X_train, y_train)

            probs = model.predict_proba(X_test)[:, 1]

            auc = roc_auc_score(y_test, probs)

            if auc > melhor_auc:
                fpr, tpr, thresholds = roc_curve(y_test, probs)
                idx = np.argmax(tpr - fpr)

                melhor_auc = auc
                melhor_threshold = thresholds[idx]
                melhor_config = {
                    "horizonte": horizonte,
                    "movimento_minimo": x_pct
                }
                melhor_modelo = model
                melhor_scaler = scaler
                melhor_imputer = imputer
                melhor_features = features

            print(f"H={horizonte} | x_pct={x_pct:.3f} | AUC={auc:.4f}")


    joblib.dump(melhor_modelo, "../models/ml_compra/modelo_compra.pkl")
    joblib.dump(melhor_threshold, "../models/ml_compra/threshold_compra.pkl")
    joblib.dump(melhor_scaler, "../models/ml_compra/scaler_compra.pkl")
    joblib.dump(melhor_imputer, "../models/ml_compra/imputer_compra.pkl")
    joblib.dump(melhor_features, "../models/ml_compra/features_compra.pkl")
    joblib.dump(melhor_config, "../models/ml_compra/config_compra.pkl")

    print("\nMELHOR MODELO")
    print(f"AUC-ROC: {melhor_auc:.4f}")
    print(melhor_config)

    return melhor_auc, melhor_config, melhor_threshold


In [5]:
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_curve, roc_auc_score
import numpy as np
import pandas as pd
import joblib

def treinar_modelo_queda(
    horizontes=[3, 5, 10],
    movimentos_pct=[0.005, 0.01, 0.015]
):
    df_base = adicionar_features()

    melhor_auc = 0
    melhor_config = None
    melhor_modelo = None
    melhor_threshold = None


    for horizonte in horizontes:
        for x_pct in movimentos_pct:

            df = df_base.copy()

            retorno_futuro = (df['Close'].shift(-horizonte) / df['Close']) - 1
            df['Target'] = (retorno_futuro < -x_pct).astype(int)

            df = df.dropna().iloc[200:].copy()

            features = df.select_dtypes(include=[np.number]).columns.tolist()
            features.remove('Target')

            X = df[features]
            y = df['Target']

            imputer = SimpleImputer(strategy='mean')
            scaler = StandardScaler()

            X = imputer.fit_transform(X)
            X = scaler.fit_transform(X)

            split = int(len(X) * 0.8)

            X_train, X_test = X[:split], X[split:]
            y_train, y_test = y[:split], y[split:]

            model = XGBClassifier(
                n_estimators=300,
                max_depth=5,
                learning_rate=0.05,
                subsample=0.8,
                colsample_bytree=0.8,
                eval_metric='logloss',
                tree_method='hist',
                random_state=42
            )

            model.fit(X_train, y_train)

            probs = model.predict_proba(X_test)[:, 1]

            auc = roc_auc_score(y_test, probs)

            if auc > melhor_auc:
                fpr, tpr, thresholds = roc_curve(y_test, probs)
                idx = np.argmax(tpr - fpr)

                melhor_auc = auc
                melhor_threshold = thresholds[idx]
                melhor_config = {
                    "horizonte": horizonte,
                    "movimento_minimo": x_pct
                }
                melhor_modelo = model
                melhor_modelo = model
                melhor_scaler = scaler
                melhor_imputer = imputer
                melhor_features = features
            print(f"H={horizonte} | x_pct={x_pct:.3f} | AUC={auc:.4f}")


    joblib.dump(melhor_modelo, "../models/ml_queda/modelo_queda.pkl")
    joblib.dump(melhor_threshold, "../models/ml_queda/threshold_queda.pkl")
    joblib.dump(melhor_scaler, "../models/ml_queda/scaler_queda.pkl")
    joblib.dump(melhor_imputer, "../models/ml_queda/imputer_queda.pkl")
    joblib.dump(melhor_features, "../models/ml_queda/features_queda.pkl")
    joblib.dump(melhor_config, "../models/ml_queda/config_queda.pkl")
    
    print("\nMELHOR MODELO")
    print(f"AUC-ROC: {melhor_auc:.4f}")
    print(melhor_config)

    return melhor_auc, melhor_config, melhor_threshold


In [6]:
treinar_modelo_compra()
treinar_modelo_queda()

H=3 | x_pct=0.005 | AUC=0.5575
H=3 | x_pct=0.010 | AUC=0.5756
H=3 | x_pct=0.015 | AUC=0.5899
H=5 | x_pct=0.005 | AUC=0.5515
H=5 | x_pct=0.010 | AUC=0.5639
H=5 | x_pct=0.015 | AUC=0.5853
H=10 | x_pct=0.005 | AUC=0.6110
H=10 | x_pct=0.010 | AUC=0.6204
H=10 | x_pct=0.015 | AUC=0.6350

MELHOR MODELO
AUC-ROC: 0.6350
{'horizonte': 10, 'movimento_minimo': 0.015}
H=3 | x_pct=0.005 | AUC=0.5296
H=3 | x_pct=0.010 | AUC=0.4756
H=3 | x_pct=0.015 | AUC=0.5104
H=5 | x_pct=0.005 | AUC=0.5206
H=5 | x_pct=0.010 | AUC=0.5141
H=5 | x_pct=0.015 | AUC=0.4879
H=10 | x_pct=0.005 | AUC=0.5372
H=10 | x_pct=0.010 | AUC=0.5754
H=10 | x_pct=0.015 | AUC=0.5536

MELHOR MODELO
AUC-ROC: 0.5754
{'horizonte': 10, 'movimento_minimo': 0.01}


(0.5754301075268817,
 {'horizonte': 10, 'movimento_minimo': 0.01},
 np.float32(0.5998786))