<a href="https://colab.research.google.com/github/Lucas-Siade/Data-Science/blob/main/Projeto%20Final/Modelo%20-%20Tend%C3%AAncia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Bibliotecas**

In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

import joblib

## **Funções**

### **Carregar os Dados**

In [9]:
def carregarDados(path):
    dataset = pd.read_csv(path, sep="\t")

    dataset = dataset.drop(columns=["<SPREAD>"])
    dataset = dataset.rename(columns={
        "<DATE>":"DATE",
        "<TIME>":"TIME",
        "<OPEN>":"OPEN",
        "<CLOSE>":"CLOSE",
        "<HIGH>":"HIGH",
        "<LOW>":"LOW",
        "<TICKVOL>":"TICKVOL",
        "<VOL>":"VOL"
    })

    dataset["TIMESTAMP"] = pd.to_datetime(dataset["DATE"] + " " + dataset["TIME"], format="%Y.%m.%d %H:%M:%S")
    dataset.drop(columns=["DATE", "TIME"], inplace=True)
    dataset = dataset[["TIMESTAMP", "OPEN", "HIGH", "LOW", "CLOSE", "TICKVOL", "VOL"]]

    return dataset

### **Adicionar minutos anteriores**

In [10]:
def adicionarAnteriores(dataset, min):
    datasetCopy = dataset.copy()

    for i in range(1, min + 1):
        for coluna in ['OPEN', 'HIGH', 'LOW', 'CLOSE', 'TICKVOL', 'VOL']:
            datasetCopy[f'{coluna}_{i}'] = datasetCopy[coluna].shift(i)

    return datasetCopy

### **Classificação**

In [11]:
def classificar(dataset):
    datasetCopy = dataset.copy()
    datasetCopy['VARIACAO'] = 'NEUTRO'

    for i in range(1, len(datasetCopy)):
        if datasetCopy.loc[i, 'CLOSE'] > datasetCopy.loc[i - 1, 'CLOSE']:
            datasetCopy.loc[i, 'VARIACAO'] = 'ALTA'
        elif datasetCopy.loc[i, 'CLOSE'] < datasetCopy.loc[i - 1, 'CLOSE']:
            datasetCopy.loc[i, 'VARIACAO'] = 'QUEDA'

    datasetCopy = datasetCopy.drop(index=0).reset_index(drop=True)

    while 'NEUTRO' in datasetCopy['VARIACAO'].values:
        for i in range(len(datasetCopy) - 1):
            if datasetCopy.loc[i, 'VARIACAO'] == 'NEUTRO':
                datasetCopy.loc[i, 'VARIACAO'] = datasetCopy.loc[i + 1, 'VARIACAO']

    return datasetCopy

### **Remover valores do minuto atual**

In [12]:
def removerValores(dataset):
    colunas = ['OPEN', 'HIGH', 'LOW', 'CLOSE', 'TICKVOL', 'VOL']

    return dataset.drop(columns=[col for col in colunas])

### **Treino do Modelo**

In [13]:
def treinar(dataset, tamanho_bloco=5000):
    dataset = dataset.copy()
    dataset['VARIACAO_BIN'] = dataset['VARIACAO'].map({'ALTA': 1, 'QUEDA': 0})

    colunasX = [col for col in dataset.columns if any(p in col for p in ['OPEN_', 'HIGH_', 'LOW_', 'CLOSE_', 'TICKVOL_', 'VOL_'])]
    X = dataset[colunasX].values
    Y = dataset['VARIACAO_BIN'].values

    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    booster = None
    resultados = []
    n_blocos = len(X) // tamanho_bloco

    for i in range(n_blocos):
        inicio = i * tamanho_bloco
        fim = inicio + tamanho_bloco
        X_bloco = X[inicio:fim]
        Y_bloco = Y[inicio:fim]

        dtrain = xgb.DMatrix(X_bloco, label=Y_bloco)

        params = {
            'objective': 'binary:logistic',
            'eval_metric': 'logloss',
            'max_depth': 8,
            'learning_rate': 0.1,
            'subsample': 0.8,
            'colsample_bytree': 0.8
        }

        booster = xgb.train(params, dtrain, num_boost_round=10, xgb_model=booster)

    X_teste = X[-(len(X) // 5):]
    Y_teste = Y[-(len(Y) // 5):]

    dtest = xgb.DMatrix(X_teste)
    Y_pred = (booster.predict(dtest) > 0.5).astype(int)

    joblib.dump(booster, "/content/drive/MyDrive/Colab Notebooks/Data Science/Projeto Final/modelTendFinal.joblib")
    joblib.dump(scaler, "/content/drive/MyDrive/Colab Notebooks/Data Science/Projeto Final/scalerTendFinal.save")

    print(f"\n✅ Modelo salvo em: /content/drive/MyDrive/Colab Notebooks/Data Science/Projeto Final/modelTendFinal.joblib")

    return confusion_matrix(Y_teste, Y_pred), classification_report(Y_teste, Y_pred, output_dict=True)

## **Código**

In [14]:
path = "/content/drive/MyDrive/Colab Notebooks/Data Science/Projeto Final/Dados.csv"
dataset = carregarDados(path)

resultados = {}
janela = 2

print(f"📊 Avaliando modelo com {janela} minutos anteriores...")
dataset = adicionarAnteriores(dataset, janela)
dataset = dataset.dropna().reset_index(drop=True)
dataset = classificar(dataset)
dataset = removerValores(dataset)

display(dataset.head())

matriz, relatorio = treinar(dataset)

print("Matriz de Confusão:")
print(matriz)

print("Acurácia:", relatorio['accuracy'])
print("Precisão:", relatorio['1']['precision'])

📊 Avaliando modelo com 2 minutos anteriores...


Unnamed: 0,TIMESTAMP,OPEN_1,HIGH_1,LOW_1,CLOSE_1,TICKVOL_1,VOL_1,OPEN_2,HIGH_2,LOW_2,CLOSE_2,TICKVOL_2,VOL_2,VARIACAO
0,2020-10-15 11:52:00,98595.0,98600.0,98535.0,98580.0,4572.0,17663.0,98570.0,98625.0,98570.0,98600.0,5483.0,19111.0,ALTA
1,2020-10-15 11:53:00,98575.0,98620.0,98535.0,98615.0,3536.0,13143.0,98595.0,98600.0,98535.0,98580.0,4572.0,17663.0,QUEDA
2,2020-10-15 11:54:00,98620.0,98625.0,98575.0,98610.0,3729.0,13702.0,98575.0,98620.0,98535.0,98615.0,3536.0,13143.0,ALTA
3,2020-10-15 11:55:00,98610.0,98655.0,98590.0,98655.0,5040.0,17128.0,98620.0,98625.0,98575.0,98610.0,3729.0,13702.0,QUEDA
4,2020-10-15 11:56:00,98650.0,98665.0,98610.0,98615.0,4021.0,13669.0,98610.0,98655.0,98590.0,98655.0,5040.0,17128.0,ALTA



✅ Modelo salvo em: /content/drive/MyDrive/Colab Notebooks/Data Science/Projeto Final/modelTendFinal.joblib
Matriz de Confusão:
[[25898  6874]
 [23516  9463]]
Acurácia: 0.5378017064379249
Precisão: 0.5792373140723511
