In [6]:
import pandas as pd
import numpy as np

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import accuracy_score, classification_report
pd.set_option('display.max_columns',None)
import joblib

In [26]:
price_data = pd.read_parquet(f'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Dados_bolsa_interpolar.parquet')
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.290425,985,ABEV3.SA
1,2000-01-06,0.494478,0.494478,0.494478,0.494478,0.275703,227,ABEV3.SA
2,2000-01-12,0.481293,0.481293,0.481293,0.481293,0.268352,1137,ABEV3.SA
3,2000-01-13,0.484589,0.484589,0.484589,0.484589,0.270189,606,ABEV3.SA
4,2000-01-14,0.494478,0.494478,0.494478,0.494478,0.275703,6445,ABEV3.SA


In [27]:
import numpy as np
import pandas as pd

# Transformar e ordenar os dados
price_data['Date'] = pd.to_datetime(price_data['Date'])
price_data.sort_values(by=['Ticker', 'Date'], inplace=True)

# Criar a coluna de mudança de preço
price_data['change_in_price'] = price_data['Close'].diff()

# Máscara para identificar a virada entre os tickers
mask = price_data['Ticker'] != price_data['Ticker'].shift(1)
price_data['change_in_price'] = np.where(mask == True, np.nan, price_data['change_in_price'])

price_data[price_data.isna().any(axis = 1)]

# Função para suavização exponencial
def exponential_smoothing(data, alpha):
    smoothed = np.zeros(len(data))
    smoothed[0] = data[0]
    for t in range(1, len(data)):
        smoothed[t] = alpha * data[t] + (1 - alpha) * smoothed[t-1]
    return smoothed

def exponential_smoothing_Verify(data, alpha_verify):
    smoothed = np.zeros(len(data))
    smoothed[0] = data[0]
    for t in range(1, len(data)):
        smoothed[t] = alpha * data[t] + (1 - alpha) * smoothed[t-1]
    return smoothed

# Função para calcular o target
def calculate_target(data, d):
    target = np.sign(data.shift(-d) - data)
    return target

# Função para calcular o target
def calculate_target_verify(data, d):
    target = np.sign(data.shift(-d) - data)
    target[target == 0] = -1
    return target

def obv(group):
    Volume = group['Volume']
    change = group['Smoothed_Close'].diff()

    prev_obv = 0
    obv_values = []

    for i, j in zip(change, Volume):
        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv

        prev_obv = current_obv
        obv_values.append(current_obv)

    return pd.Series(obv_values, index=group.index)

# Lista de valores alpha
alpha_list = [0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90]
 

# Período para calcular o target, RSI e estocástico
d = 10
n = 14
e = 9

# Loop para aplicar a suavização exponencial, calcular o target, o RSI e o estocástico
for alpha in alpha_list:
    col_name_smoothed = f"Smoothed_Close_{alpha}"
    col_name_prediction = f"Prediction_{alpha}"
    col_name_k_percent = f"k_percent_{alpha}"
    col_name_macd = f"MACD_{alpha}"
    col_name_macd_ema = f"MACD_EMA_{alpha}"
    col_name_price_rate_of_change = f"Price_Rate_Of_Change_{alpha}"
    col_name_obv = f"OBV_{alpha}"
    col_name_r_percent = f"r_percent_{alpha}"

    # Suavização exponencial
    price_data[col_name_smoothed] = price_data.groupby('Ticker')['Close'].transform(
        lambda x: exponential_smoothing(x.values, alpha)
    )

    # Cálculo do target
    price_data[col_name_prediction] = price_data.groupby('Ticker')[col_name_smoothed].transform(
        lambda x: calculate_target(x, d)
    )
    price_data[col_name_prediction] = np.where(mask, np.nan, price_data[col_name_prediction])

    # Aplicar o cálculo do OBV para cada Ticker
    obv_groups = price_data.groupby('Ticker').apply(lambda group: obv(group.rename(columns={col_name_smoothed: 'Smoothed_Close'})))
    price_data[col_name_obv] = obv_groups.reset_index(level=0, drop=True)

    # Calculando o Price Rate of Change (ROC)
    price_data[col_name_price_rate_of_change] = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.pct_change(periods=e))
    price_data[col_name_price_rate_of_change] = np.where(mask, np.nan, price_data[col_name_price_rate_of_change])

    # Calcular low_14 e high_14 para o estocástico
    low_14 = price_data.groupby('Ticker')['Low'].transform(lambda x: x.rolling(window=n).min())
    high_14 = price_data.groupby('Ticker')['High'].transform(lambda x: x.rolling(window=n).max())

    # Adicionar low_14 e high_14 ao DataFrame
    price_data['low_14'] = low_14
    price_data['high_14'] = high_14

    # Calcular o %K para o estocástico
    price_data[col_name_k_percent] = 100 * ((price_data[col_name_smoothed] - low_14) / (high_14 - low_14))

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_k_percent] = np.where(mask, np.nan, price_data[col_name_k_percent])

    # Cálculo do MACD
    ema_26 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=26).mean())
    ema_12 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=12).mean())
    macd = ema_12 - ema_26

    # Cálculo da EMA do MACD
    ema_9_macd = macd.ewm(span=9).mean()

    # Adicionar MACD e MACD_EMA ao DataFrame
    price_data[col_name_macd] = macd
    price_data[col_name_macd_ema] = ema_9_macd

    # Cálculo do r_percent
    r_percent = ((high_14 - price_data[col_name_smoothed]) / (high_14 - low_14)) * (-100)
    price_data[col_name_r_percent] = r_percent

# Coluna de verificação (sem suavização, alpha = 1)
alpha_verify = 1
price_data['Smoothed_Close_1'] = price_data.groupby('Ticker')['Close'].transform(
    lambda x: exponential_smoothing_Verify(x.values, alpha_verify)
)
price_data['Smoothed_Close_1'] = np.where(mask, np.nan, price_data['Smoothed_Close_1'])

price_data['Verify'] = price_data.groupby('Ticker')['Smoothed_Close_1'].transform(
    lambda x: calculate_target_verify(x, d)
)
price_data['Verify'] = np.where(mask, np.nan, price_data['Verify'])

# Cálculo do RSI
up_df = price_data[['Ticker', 'change_in_price']].copy()
down_df = price_data[['Ticker', 'change_in_price']].copy()

up_df['change_in_price'] = up_df['change_in_price'].where(up_df['change_in_price'] > 0, 0)
down_df['change_in_price'] = down_df['change_in_price'].where(down_df['change_in_price'] < 0, 0).abs()

ewma_up = up_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())
ewma_down = down_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())

relative_strength = ewma_up / ewma_down
price_data['RSI'] = 100.0 - (100.0 / (1.0 + relative_strength))

price_data['up_days'] = up_df['change_in_price']
price_data['down_days'] = down_df['change_in_price']


# Visualizar o DataFrame atualizado
price_data.head()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.290425,985,ABEV3.SA,,0.520882,,0,,,,,0.0,0.0,,0.520882,,0,,,0.0,0.0,,0.520882,,0,,,0.0,0.0,,0.520882,,0,,,0.0,0.0,,0.520882,,0,,,0.0,0.0,,0.520882,,0,,,0.0,0.0,,0.520882,,0,,,0.0,0.0,,0.520882,,0,,,0.0,0.0,,0.520882,,0,,,0.0,0.0,,,,,0.0,0.0
1,2000-01-06,0.494478,0.494478,0.494478,0.494478,0.275703,227,ABEV3.SA,-0.026404,0.518242,-1.0,-227,,,,,-5.9e-05,-3.3e-05,,0.515601,-1.0,-227,,,-0.000118,-6.6e-05,,0.512961,-1.0,-227,,,-0.000178,-9.9e-05,,0.51032,-1.0,-227,,,-0.000237,-0.000132,,0.50768,-1.0,-227,,,-0.000296,-0.000165,,0.50504,-1.0,-227,,,-0.000355,-0.000197,,0.502399,1.0,-227,,,-0.000415,-0.00023,,0.499759,1.0,-227,,,-0.000474,-0.000263,,0.497118,1.0,-227,,,-0.000533,-0.000296,,0.497118,1.0,0.0,0.0,0.026404
2,2000-01-12,0.481293,0.481293,0.481293,0.481293,0.268352,1137,ABEV3.SA,-0.013185,0.514547,-1.0,-1364,,,,,-0.00019,-9.7e-05,,0.50874,-1.0,-1364,,,-0.000364,-0.000188,,0.50346,-1.0,-1364,,,-0.000521,-0.000272,,0.498709,1.0,-1364,,,-0.000662,-0.000349,,0.494486,1.0,-1364,,,-0.000787,-0.000419,,0.490792,1.0,-1364,,,-0.000895,-0.000483,,0.487625,1.0,-1364,,,-0.000987,-0.00054,,0.484986,1.0,-1364,,,-0.001062,-0.000591,,0.482876,1.0,-1364,,,-0.001121,-0.000634,,0.482876,1.0,0.0,0.0,0.013185
3,2000-01-13,0.484589,0.484589,0.484589,0.484589,0.270189,606,ABEV3.SA,0.003296,0.511551,-1.0,-1970,,,,,-0.000353,-0.000184,,0.503909,-1.0,-1970,,,-0.000641,-0.000341,,0.497799,1.0,-1970,,,-0.000869,-0.000474,,0.493061,1.0,-1970,,,-0.001043,-0.000584,,0.489538,1.0,-1970,,,-0.001169,-0.000673,,0.48707,1.0,-1970,,,-0.001252,-0.000744,,0.4855,1.0,-1970,,,-0.001299,-0.000797,,0.484668,1.0,-1970,,,-0.001315,-0.000836,,0.484418,1.0,-758,,,-0.001305,-0.000862,,0.484418,1.0,9.538366,0.003296,0.0
4,2000-01-14,0.494478,0.494478,0.494478,0.494478,0.275703,6445,ABEV3.SA,0.009889,0.509844,-1.0,-8415,,,,,-0.000502,-0.000279,,0.502023,-1.0,-8415,,,-0.000853,-0.000494,,0.496803,1.0,-8415,,,-0.001079,-0.000654,,0.493628,1.0,4475,,,-0.001206,-0.000769,,0.492008,1.0,4475,,,-0.001254,-0.000846,,0.491515,1.0,4475,,,-0.001244,-0.000892,,0.491785,1.0,4475,,,-0.001192,-0.000915,,0.492516,1.0,4475,,,-0.001112,-0.000918,,0.493472,1.0,5687,,,-0.001018,-0.000908,,0.493472,1.0,31.99422,0.009889,0.0


#### Se quiser rodar para um Ticker.

In [69]:
price_data = price_data[price_data['Ticker'].isin(['ABEV3.SA'])]
price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
5730,2024-12-26,12.18,12.24,11.99,12.14,12.14,21166800,ABEV3.SA,-0.08,12.916441,-1.0,-2832724966,-0.037254,11.99,14.53,36.474081,0.071183,0.104943,-63.525919,12.67507,-1.0,-2455355020,-0.0817,26.971287,0.028984,0.124149,-73.028713,12.46737,-1.0,-2630572746,-0.107626,18.794106,-0.027737,0.108392,-81.205894,12.338429,-1.0,-1929375396,-0.119863,13.717703,-0.069958,0.088349,-86.282297,12.260293,-1.0,-1675676378,-0.124889,10.641471,-0.098665,0.071533,-89.358529,12.211341,-1.0,-1363339260,-0.126352,8.714229,-0.118478,0.058476,-91.285771,12.180295,-1.0,-1697107248,-0.125988,7.491945,-0.132654,0.04838,-92.508055,12.160864,-1.0,-1607037908,-0.124603,6.726938,-0.143117,0.040457,-93.273062,12.148651,-1.0,-1542087046,-0.122615,6.246131,-0.151022,0.034127,-93.753869,12.14,-1.0,25.409653,0.0,0.08
5731,2024-12-27,12.16,12.19,11.97,12.07,12.07,37624000,ABEV3.SA,-0.070001,12.831797,-1.0,-2870348966,-0.041303,11.97,14.53,33.663951,0.043745,0.092703,-66.336049,12.554056,-1.0,-2492979020,-0.08111,22.814694,-0.017453,0.095829,-77.185306,12.348159,-1.0,-2668196746,-0.099313,14.771826,-0.080376,0.070639,-85.228174,12.231058,-1.0,-1966999396,-0.104035,10.197552,-0.123255,0.046028,-89.802448,12.165146,-1.0,-1713300378,-0.102534,7.622898,-0.150932,0.02704,-92.377102,12.126536,-1.0,-1400963260,-0.098464,6.11469,-0.169311,0.012919,-93.88531,12.103088,-1.0,-1734731248,-0.093457,5.198754,-0.182036,0.002297,-94.801246,12.088173,-1.0,-1644661908,-0.088261,4.616106,-0.191183,-0.005871,-95.383894,12.077865,-1.0,-1579711046,-0.083249,4.213462,-0.197982,-0.012295,-95.786538,12.07,-1.0,24.226632,0.0,0.070001
5732,2024-12-30,12.06,12.16,11.74,11.74,11.74,38907400,ABEV3.SA,-0.33,12.722617,-1.0,-2909256366,-0.047292,11.74,14.5,35.602088,0.01304,0.07677,-64.397912,12.391245,-1.0,-2531886420,-0.085231,23.595839,-0.066624,0.063338,-76.404161,12.165711,-1.0,-2707104146,-0.100223,15.424325,-0.135255,0.02946,-84.575675,12.034634,-1.0,-2005906796,-0.103416,10.675168,-0.179276,0.000967,-89.324832,11.952573,-1.0,-1752207778,-0.102491,7.701931,-0.20712,-0.019792,-92.298069,11.894614,-1.0,-1439870660,-0.100827,5.601978,-0.225709,-0.034807,-94.398022,11.848926,-1.0,-1773638648,-0.099711,3.946615,-0.238926,-0.045948,-96.053385,11.809634,-1.0,-1683569308,-0.099578,2.522991,-0.248883,-0.054473,-97.477009,11.773786,-1.0,-1618618446,-0.100514,1.224149,-0.256775,-0.061191,-98.775851,11.74,-1.0,19.331009,0.0,0.33
5733,2025-01-02,11.72,11.89,11.49,11.6,11.6,33417900,ABEV3.SA,-0.139999,12.610356,-1.0,-2942674266,-0.052049,11.49,14.5,37.221127,-0.020121,0.057392,-62.778873,12.232996,-1.0,-2565304320,-0.087404,24.684261,-0.117013,0.027268,-75.315739,11.995998,-1.0,-2740522046,-0.099167,16.810569,-0.190249,-0.014482,-83.189431,11.860781,-1.0,-2039324696,-0.100751,12.318306,-0.234993,-0.046225,-87.681694,11.776287,-1.0,-1785625678,-0.099586,9.511194,-0.262844,-0.068402,-90.488806,11.717846,-1.0,-1473288560,-0.098305,7.569641,-0.281425,-0.08413,-92.430359,11.674678,-1.0,-1807056548,-0.097531,6.135495,-0.294676,-0.095693,-93.864505,11.641927,-1.0,-1716987208,-0.097185,5.047422,-0.304631,-0.104505,-94.952578,11.617379,-1.0,-1652036346,-0.096974,4.231867,-0.312389,-0.111431,-95.768133,11.6,-1.0,17.590948,0.0,0.139999
5734,2025-01-03,11.49,11.55,11.27,11.33,11.33,41365900,ABEV3.SA,-0.27,12.48232,-1.0,-2984040166,-0.060241,11.27,14.36,39.233659,-0.056086,0.034696,-60.766341,12.052397,-1.0,-2606670220,-0.096772,25.320278,-0.169565,-0.012098,-74.679722,11.796199,-1.0,-2781887946,-0.109828,17.029069,-0.247106,-0.061007,-82.970931,11.648468,-1.0,-2080690596,-0.11444,12.248158,-0.292904,-0.095561,-87.751842,11.553143,-1.0,-1826991578,-0.117365,9.163202,-0.321308,-0.118983,-90.836798,11.485138,-1.0,-1514654460,-0.120462,6.962393,-0.340433,-0.135391,-93.037607,11.433403,-1.0,-1848422448,-0.123938,5.288122,-0.354243,-0.147403,-94.711878,11.392385,-1.0,-1758353108,-0.127624,3.960678,-0.364743,-0.156553,-96.039322,11.358738,-1.0,-1693402246,-0.131362,2.87176,-0.373033,-0.163751,-97.12824,11.33,-1.0,14.655374,0.0,0.27


In [14]:
price_data = price_data[price_data['Ticker'].isin(['ITUB4.SA'])]
price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
23673,2024-12-26,30.85,31.23,30.790001,31.09,31.072144,20545700,ITUB4.SA,0.200001,32.217769,-1.0,37246667915,-0.038074,30.75,33.75,48.925627,-0.655505,-0.57435,-51.074373,31.545086,-1.0,21228548627,-0.046533,26.50285,-0.756948,-0.672238,-73.49715,31.292998,-1.0,15935575115,-0.052746,18.099919,-0.793729,-0.705454,-81.900081,31.175426,-1.0,15231191869,-0.057262,14.180872,-0.813629,-0.722939,-85.819128,31.118273,-1.0,15675423859,-0.060316,12.275755,-0.82511,-0.734227,-87.724245,31.089857,-1.0,14115789487,-0.062437,11.328567,-0.831667,-0.742004,-88.671433,31.076009,-1.0,9742431293,-0.064021,10.866955,-0.83542,-0.747531,-89.133045,31.071534,-1.0,7043805985,-0.065247,10.717809,-0.837589,-0.751572,-89.282191,31.075732,-1.0,8825774253,-0.066139,10.857735,-0.838787,-0.75461,-89.142265,31.09,-1.0,34.747663,0.200001,0.0
23674,2024-12-27,31.15,31.23,30.780001,30.780001,30.762321,13608600,ITUB4.SA,-0.309999,32.073992,-1.0,37233059315,-0.039461,30.75,33.75,44.133067,-0.6755,-0.59458,-55.866933,31.392069,-1.0,21214940027,-0.047677,21.402285,-0.776651,-0.693121,-78.597715,31.139099,-1.0,15921966515,-0.052625,12.96995,-0.812249,-0.726813,-87.03005,31.017256,-1.0,15217583269,-0.055315,8.908532,-0.830362,-0.744424,-91.091468,30.949137,-1.0,15661815259,-0.056367,6.637889,-0.840286,-0.755439,-93.362111,30.903943,-1.0,14102180887,-0.056429,5.131441,-0.845944,-0.762792,-94.868559,30.868803,-1.0,9728822693,-0.055898,3.960102,-0.849449,-0.767915,-96.039898,30.838307,1.0,7030197385,-0.054969,2.94358,-0.851887,-0.771635,-97.05642,30.809574,1.0,8812165653,-0.05375,1.985794,-0.853794,-0.774447,-98.014206,30.780001,1.0,31.115092,0.0,0.309999
23675,2024-12-30,30.870001,31.07,30.73,30.73,30.712349,28377300,ITUB4.SA,-0.050001,31.939593,-1.0,37204682015,-0.039656,30.73,33.75,40.05275,-0.69419,-0.614502,-59.94725,31.259655,-1.0,21186562727,-0.046466,17.538249,-0.793799,-0.713256,-82.461751,31.016369,1.0,15893589215,-0.049341,9.482425,-0.827293,-0.746909,-90.517575,30.902353,1.0,15189205969,-0.049862,5.70708,-0.843175,-0.764174,-94.29292,30.839568,1.0,15633437959,-0.04901,3.628098,-0.85134,-0.774619,-96.371902,30.799577,1.0,14073803587,-0.047539,2.303889,-0.855814,-0.781396,-97.696111,30.771641,1.0,9700445393,-0.045869,1.378843,-0.858511,-0.786034,-98.621157,30.751661,1.0,7001820085,-0.04422,0.717271,-0.860293,-0.789367,-99.282729,30.737957,1.0,8783788353,-0.042728,0.263491,-0.861535,-0.791864,-99.736509,30.73,1.0,30.521267,0.0,0.050001
23676,2025-01-02,30.65,30.85,30.24,30.57,30.57,25643400,ITUB4.SA,-0.16,31.802633,-1.0,37179038615,-0.039301,30.24,33.75,44.519475,-0.711847,-0.633971,-55.480525,31.121724,1.0,21160919327,-0.044418,25.120339,-0.809192,-0.732444,-74.879661,30.882458,1.0,15867945815,-0.045404,18.303654,-0.840334,-0.765594,-81.696346,30.769412,1.0,15163562569,-0.044382,15.082966,-0.85421,-0.782181,-84.917034,30.704784,1.0,15607794559,-0.042554,13.241712,-0.861051,-0.791906,-86.758288,30.661831,1.0,14048160187,-0.040641,12.017972,-0.864783,-0.798074,-87.982028,30.630492,1.0,9674801993,-0.038961,11.125133,-0.867087,-0.802245,-88.874867,30.606332,1.0,6976176685,-0.037624,10.436814,-0.868668,-0.805227,-89.563186,30.586795,1.0,8758144953,-0.036636,9.880217,-0.86984,-0.807459,-90.119783,30.57,1.0,28.512154,0.0,0.16
23677,2025-01-03,30.59,30.620001,29.82,29.82,29.82,35339100,ITUB4.SA,-0.75,31.60437,-1.0,37143699515,-0.041748,29.82,33.75,45.403823,-0.733385,-0.653854,-54.596177,30.861379,1.0,21125580227,-0.04839,26.498197,-0.832798,-0.752515,-73.501803,30.563721,1.0,15832606715,-0.051109,18.924194,-0.866401,-0.785755,-81.075806,30.389647,1.0,15128223469,-0.052428,14.494842,-0.883416,-0.802428,-85.505158,30.262392,1.0,15572455459,-0.053553,11.256796,-0.894138,-0.812352,-88.743204,30.156732,1.0,14012821087,-0.055012,8.568253,-0.902248,-0.818909,-91.431747,30.063147,1.0,9639462893,-0.056922,6.186963,-0.909183,-0.823632,-93.813037,29.977266,1.0,6940837585,-0.059229,4.001691,-0.915513,-0.827284,-95.998309,29.896679,1.0,8722805853,-0.061829,1.951134,-0.921486,-0.830264,-98.048866,29.82,1.0,21.026134,0.0,0.75


In [20]:
price_data = price_data[price_data['Ticker'].isin(['MGLU3.SA'])]
price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
27001,2024-12-26,7.03,7.08,6.52,6.53,6.53,28600000,MGLU3.SA,-0.45,7.994798,-1.0,-1169154463,-0.128782,6.52,9.67,46.818986,-0.383847,-0.287826,-53.181014,7.368741,-1.0,-716152579,-0.18223,26.944161,-0.503875,-0.355139,-73.055839,7.074503,-1.0,-925329623,-0.208827,17.603261,-0.573843,-0.405919,-82.396739,6.918078,-1.0,-832043561,-0.225299,12.637383,-0.613102,-0.438338,-87.362617,6.821353,-1.0,-864533033,-0.238056,9.566756,-0.636397,-0.459701,-90.433244,6.749692,-1.0,-392522369,-0.249597,7.29181,-0.651245,-0.474401,-92.70819,6.688044,-1.0,-360855379,-0.260758,5.334738,-0.661594,-0.484948,-94.665262,6.631049,-1.0,-307563071,-0.271577,3.525372,-0.669536,-0.492831,-96.474628,6.577997,-1.0,-326182079,-0.281764,1.841176,-0.676132,-0.498953,-98.158824,6.53,-1.0,25.729006,0.0,0.45
27002,2024-12-27,6.66,6.73,6.28,6.51,6.51,32270000,MGLU3.SA,-0.02,7.846318,-1.0,-1201424463,-0.137475,6.28,9.53,48.19441,-0.417262,-0.313713,-51.80559,7.196993,-1.0,-748422579,-0.189949,28.215164,-0.546698,-0.393451,-71.784836,6.905152,-1.0,-957599623,-0.212932,19.235442,-0.617087,-0.448153,-80.764558,6.754847,-1.0,-864313561,-0.224472,14.610661,-0.655058,-0.481682,-85.389339,6.665677,-1.0,-896803033,-0.230849,11.866965,-0.677194,-0.503199,-88.133035,6.605877,-1.0,-424792369,-0.234181,10.026978,-0.691293,-0.517779,-89.973022,6.563413,-1.0,-393125379,-0.235047,8.720408,-0.701072,-0.528173,-91.279592,6.53421,-1.0,-339833071,-0.233493,7.821842,-0.7083,-0.535924,-92.178158,6.5168,-1.0,-358452079,-0.229497,7.286146,-0.713805,-0.541923,-92.713854,6.51,-1.0,25.521949,0.0,0.02
27003,2024-12-30,6.54,6.79,6.41,6.5,6.5,23979900,MGLU3.SA,-0.01,7.711686,-1.0,-1225404363,-0.143833,6.28,9.53,44.051891,-0.449426,-0.340855,-55.948109,7.057594,-1.0,-772402479,-0.193206,23.925977,-0.585139,-0.431788,-76.074023,6.783606,-1.0,-981579523,-0.211326,15.495577,-0.653632,-0.489249,-84.504423,6.652908,-1.0,-888293461,-0.217857,11.474087,-0.688596,-0.523065,-88.525913,6.582838,-1.0,-920782933,-0.219408,9.318095,-0.708048,-0.544169,-90.681905,6.542351,-1.0,-448772269,-0.218391,8.072326,-0.719859,-0.558195,-91.927674,6.519024,-1.0,-417105279,-0.215902,7.35458,-0.727553,-0.568049,-92.64542,6.506842,-1.0,-363812971,-0.212717,6.979749,-0.732782,-0.575296,-93.020251,6.50168,-1.0,-382431979,-0.209598,6.820917,-0.736393,-0.580817,-93.179083,6.5,-1.0,25.403999,0.0,0.01
27004,2025-01-02,6.5,6.57,6.25,6.43,6.43,28463800,MGLU3.SA,-0.07,7.583518,-1.0,-1253868163,-0.146241,6.25,9.53,40.656034,-0.479728,-0.36863,-59.343966,6.932075,-1.0,-800866279,-0.189246,20.794984,-0.618601,-0.469151,-79.205016,6.677524,-1.0,-1010043323,-0.200191,13.034282,-0.683277,-0.528054,-86.965718,6.563745,-1.0,-916757261,-0.200285,9.565389,-0.714137,-0.561279,-90.434611,6.506419,-1.0,-949246733,-0.196398,7.817654,-0.730249,-0.581385,-92.182346,6.47494,-1.0,-477236069,-0.191052,6.857934,-0.739413,-0.594439,-93.142066,6.456707,-1.0,-445569079,-0.185398,6.302046,-0.744981,-0.603436,-93.697954,6.445368,-1.0,-392276771,-0.180082,5.95635,-0.748516,-0.60994,-94.04365,6.437168,-1.0,-410895779,-0.175418,5.706337,-0.750844,-0.614823,-94.293663,6.43,-1.0,24.489873,0.0,0.07
27005,2025-01-03,6.43,6.56,6.15,6.26,6.26,20272000,MGLU3.SA,-0.17,7.451166,-1.0,-1274140163,-0.147586,6.15,9.53,38.496038,-0.50856,-0.396616,-61.503962,6.79766,-1.0,-821138279,-0.184358,19.161549,-0.648491,-0.505019,-80.838451,6.552267,-1.0,-1030315323,-0.1896,11.901394,-0.708709,-0.564185,-88.098606,6.442247,-1.0,-937029261,-0.18582,8.646357,-0.735702,-0.596164,-91.353643,6.38321,-1.0,-969518733,-0.179882,6.899691,-0.74915,-0.614938,-93.100309,6.345976,-1.0,-497508069,-0.174094,5.798111,-0.756595,-0.62687,-94.201889,6.319012,-1.0,-465841079,-0.169301,5.000361,-0.761129,-0.634974,-94.999639,6.297074,-1.0,-412548771,-0.165751,4.351295,-0.764143,-0.640781,-95.648705,6.277717,-1.0,-431167779,-0.163379,3.778607,-0.766329,-0.645124,-96.221393,6.26,-1.0,22.246682,0.0,0.17


In [33]:
price_data = price_data[price_data['Ticker'].isin(['PETR4.SA'])]
price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days,Match
17832,2024-12-26,35.630001,36.0,35.599998,35.77,35.77,22920700,PETR4.SA,-1.09,37.870973,1.0,-250627041755,-0.027331,35.599998,40.759998,44.011126,0.271358,0.394534,-55.988874,37.336865,1.0,-122024321851,-0.058592,33.660196,0.117387,0.38409,-66.339804,36.886411,1.0,31986151317,-0.076656,24.930474,-0.023458,0.314571,-75.069526,36.593466,1.0,-128314624547,-0.08797,19.253248,-0.116173,0.257524,-80.746752,36.39186,1.0,81612954693,-0.095903,15.346153,-0.177489,0.21568,-84.653847,36.237691,1.0,189333312965,-0.101988,12.358373,-0.22017,0.184899,-87.641627,36.107921,1.0,190386918349,-0.106976,9.843451,-0.251683,0.161676,-90.156549,35.990618,1.0,294170670181,-0.111272,7.570145,-0.276282,0.143632,-92.429855,35.879235,1.0,212829417981,-0.115132,5.411561,-0.296445,0.129211,-94.588439,35.77,1.0,21.194492,0.0,1.09,1
17833,2024-12-27,36.0,36.0,35.610001,35.66,35.66,24167200,PETR4.SA,-0.110001,37.649875,1.0,-250651208955,-0.03153,35.599998,40.759998,39.726295,0.199699,0.355567,-60.273705,37.001492,1.0,-122048489051,-0.060793,27.16072,0.004863,0.308245,-72.83928,36.518488,1.0,31961984117,-0.074687,17.800177,-0.149991,0.221659,-82.199823,36.22008,1.0,-128338791747,-0.080939,12.017076,-0.247798,0.156459,-87.982924,36.02593,1.0,81588787493,-0.083218,8.254485,-0.311362,0.110271,-91.745515,35.891076,1.0,189309145765,-0.083132,5.64104,-0.355209,0.076877,-94.35896,35.794376,1.0,190362751149,-0.081442,3.767008,-0.387169,0.051907,-96.232992,35.726123,1.0,294146502981,-0.078543,2.444283,-0.411444,0.032617,-97.555717,35.681923,1.0,212805250781,-0.074657,1.587692,-0.430356,0.017298,-98.412308,35.66,1.0,20.526365,0.0,0.110001,1
17834,2024-12-30,35.779999,36.369999,35.77,36.189999,36.189999,22355600,PETR4.SA,0.529999,37.503888,1.0,-250673564555,-0.033356,35.599998,40.759998,36.897077,0.129634,0.310381,-63.102923,36.839193,1.0,-122070844651,-0.058717,24.015398,-0.0963,0.227336,-75.984602,36.419941,1.0,31939628517,-0.067501,15.890357,-0.255279,0.126271,-84.109643,36.208047,1.0,-128361147347,-0.068865,11.78389,-0.349058,0.055356,-88.21611,36.107964,1.0,81611143093,-0.066931,9.844298,-0.406156,0.006986,-90.155702,36.07043,1.0,189331501365,-0.063546,9.116883,-0.442653,-0.027029,-90.883117,36.071312,1.0,190385106749,-0.059673,9.133981,-0.466815,-0.051838,-90.866019,36.097224,1.0,294168858581,-0.055893,9.636146,-0.483048,-0.070516,-90.363854,36.139191,1.0,212827606381,-0.05261,10.44947,-0.49389,-0.08494,-89.55053,36.189999,1.0,32.377384,0.529999,0.0,1
17835,2025-01-02,36.419998,37.09,36.189999,36.77,36.77,30046800,PETR4.SA,0.580002,37.430499,1.0,-250703611355,-0.03311,35.599998,40.759998,35.474815,0.067408,0.261786,-64.525185,36.825354,1.0,-122100891451,-0.053278,23.74721,-0.175564,0.146756,-76.25279,36.524959,1.0,31969675317,-0.056724,17.925587,-0.326482,0.035721,-82.074413,36.432828,1.0,-128331100547,-0.053878,16.140117,-0.406485,-0.037012,-83.859883,36.438982,1.0,81641189893,-0.049063,16.259378,-0.44939,-0.084289,-83.740622,36.490172,-1.0,189361548165,-0.04404,17.251428,-0.472635,-0.11615,-82.748572,36.560394,-1.0,190415153549,-0.039555,18.612315,-0.484881,-0.138446,-81.387685,36.635445,1.0,294198905381,-0.03588,20.066795,-0.490709,-0.154555,-79.933205,36.70692,1.0,212857653181,-0.033026,21.451959,-0.49275,-0.166502,-78.548041,36.77,1.0,43.092658,0.580002,0.0,1
17836,2025-01-03,36.880001,37.040001,36.32,36.380001,36.380001,23314200,PETR4.SA,-0.389999,37.325449,1.0,-250726925555,-0.034795,35.599998,40.759998,33.438966,0.009508,0.21133,-66.561034,36.736284,1.0,-122124205651,-0.052657,22.021034,-0.242771,0.06885,-77.978966,36.481471,1.0,31946361117,-0.054761,17.08281,-0.382017,-0.047827,-82.91719,36.411698,1.0,-128354414747,-0.052384,15.730602,-0.44853,-0.119316,-84.269398,36.409492,-1.0,81617875693,-0.049597,15.687854,-0.480494,-0.16353,-84.312146,36.424069,-1.0,189338233965,-0.047699,15.970369,-0.496013,-0.192123,-84.029631,36.434119,-1.0,190391839349,-0.046972,16.165125,-0.503583,-0.211474,-83.834875,36.43109,1.0,294175591181,-0.047298,16.106423,-0.50742,-0.225128,-83.893577,36.412693,1.0,212834338981,-0.048434,15.749893,-0.509713,-0.235144,-84.250107,36.380001,1.0,38.374848,0.0,0.389999,1


In [None]:
price_data = price_data[price_data['Ticker'].isin(['VALE3.SA'])]
price_data.tail()

In [14]:
price_data.to_excel('C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Datas referências/test_day.xlsx', index=False)

#### Se quiser normal só pular etapa anterior.

In [29]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Para cada alpha, criar as variáveis X_Cols e Y_Cols e treinar o modelo
for alpha in alpha_list:
    # Definindo as colunas de X e Y dinamicamente
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']]
    
    Y_Cols = price_data[f'Prediction_{alpha}']

    # Dividir os dados em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state=0)

    # Criar o classificador Random Forest
    rand_frst_clf = RandomForestClassifier(n_estimators=100, oob_score=True, criterion="gini", random_state=0)

    # Ajustar o modelo
    rand_frst_clf.fit(X_train, y_train)

    # Fazer previsões
    y_pred = rand_frst_clf.predict(X_test)

    # Imprimir a acurácia
    accuracy = accuracy_score(y_test, y_pred, normalize=True) * 100.0
    print(f'Alpha: {alpha}')
    print(f'Correct Prediction (%): {accuracy:.2f}')
    print('---')


Alpha: 0.1
Correct Prediction (%): 84.20
---
Alpha: 0.2
Correct Prediction (%): 76.60
---
Alpha: 0.3
Correct Prediction (%): 73.76
---
Alpha: 0.4
Correct Prediction (%): 71.65
---
Alpha: 0.5
Correct Prediction (%): 72.31
---
Alpha: 0.6
Correct Prediction (%): 70.52
---
Alpha: 0.7
Correct Prediction (%): 68.94
---
Alpha: 0.8
Correct Prediction (%): 69.66
---
Alpha: 0.9
Correct Prediction (%): 70.32
---


In [30]:
import warnings

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}'
    price_data[f'Prediction_{alpha}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    price_data['Match'] = (price_data[f'Prediction_{alpha}'] == price_data['Verify']).astype(int)  
    accuracy = price_data['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo: {accuracy:.2%}')



Precisão para Alpha: 0.1
Precisão do modelo: 52.38%

Precisão para Alpha: 0.2
Precisão do modelo: 52.79%

Precisão para Alpha: 0.3
Precisão do modelo: 54.03%

Precisão para Alpha: 0.4
Precisão do modelo: 55.80%

Precisão para Alpha: 0.5
Precisão do modelo: 56.92%

Precisão para Alpha: 0.6
Precisão do modelo: 57.37%

Precisão para Alpha: 0.7
Precisão do modelo: 58.34%

Precisão para Alpha: 0.8
Precisão do modelo: 57.93%

Precisão para Alpha: 0.9
Precisão do modelo: 91.52%


In [31]:
import warnings
from datetime import timedelta

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Filtrar últimos 30 dias
last_30_days = price_data[price_data['Date'] >= price_data['Date'].max() - timedelta(days=30)]

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = last_30_days[['RSI',
                           f'k_percent_{alpha}',
                           f'r_percent_{alpha}',
                           f'Price_Rate_Of_Change_{alpha}',
                           f'MACD_{alpha}',
                           f'MACD_EMA_{alpha}',
                           f'OBV_{alpha}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}' para os últimos 30 dias
    last_30_days[f'Prediction_{alpha}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    last_30_days['Match'] = (last_30_days[f'Prediction_{alpha}'] == last_30_days['Verify']).astype(int)  # 1 para igual, 0 para diferente
    accuracy = last_30_days['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo nos últimos 30 dias: {accuracy:.2%}')



Precisão para Alpha: 0.1
Precisão do modelo nos últimos 30 dias: 47.37%

Precisão para Alpha: 0.2
Precisão do modelo nos últimos 30 dias: 57.89%

Precisão para Alpha: 0.3
Precisão do modelo nos últimos 30 dias: 84.21%

Precisão para Alpha: 0.4
Precisão do modelo nos últimos 30 dias: 57.89%

Precisão para Alpha: 0.5
Precisão do modelo nos últimos 30 dias: 47.37%

Precisão para Alpha: 0.6
Precisão do modelo nos últimos 30 dias: 36.84%

Precisão para Alpha: 0.7
Precisão do modelo nos últimos 30 dias: 36.84%

Precisão para Alpha: 0.8
Precisão do modelo nos últimos 30 dias: 94.74%

Precisão para Alpha: 0.9
Precisão do modelo nos últimos 30 dias: 94.74%


#### Time Series com Verify

In [34]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Criar listas para armazenar os resultados gerais
all_predictions = []
all_verify_values = []
all_actual_values = []
all_accuracy_scores_ytest = []
all_accuracy_scores_verify = []

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X), o target (y) e a verificação (verify) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha}',
                    f'r_percent_{alpha}',
                    f'Price_Rate_Of_Change_{alpha}',
                    f'MACD_{alpha}',
                    f'MACD_EMA_{alpha}',
                    f'OBV_{alpha}']]
    y = price_data[f'Prediction_{alpha}']
    verify = price_data['Verify']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y, verify], axis=1).dropna()
    X = data.iloc[:, :-2]
    y = data.iloc[:, -2]
    verify = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        y_verify = verify.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0, oob_score=True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Armazenar as predições e valores reais para análise posterior
        all_predictions.extend(y_pred)
        all_verify_values.extend(y_verify.values)
        all_actual_values.extend(y_test.values)

        # Calcular acurácia para y_test e verify
        accuracy_ytest = accuracy_score(y_test, y_pred) * 100.0
        accuracy_verify = accuracy_score(y_verify, y_pred) * 100.0

        # Armazenar os resultados das acurácias
        all_accuracy_scores_ytest.append(accuracy_ytest)
        all_accuracy_scores_verify.append(accuracy_verify)

        # Exibir resultados por fold
        print(f'Fold {fold}:')
        print(f' - Correct Prediction (y_test): {accuracy_ytest:.2f}%')
        print(f' - Correct Prediction (Verify): {accuracy_verify:.2f}%')
        fold += 1

    print('---')

# Resultados gerais
print("Resumo geral:")
print(f'Média de acurácia (y_test): {np.mean(all_accuracy_scores_ytest):.2f}%')
print(f'Média de acurácia (Verify): {np.mean(all_accuracy_scores_verify):.2f}%')


Alpha: 0.1
Fold 1:
 - Correct Prediction (y_test): 88.39%
 - Correct Prediction (Verify): 57.84%
Fold 2:
 - Correct Prediction (y_test): 81.94%
 - Correct Prediction (Verify): 50.40%
Fold 3:
 - Correct Prediction (y_test): 89.98%
 - Correct Prediction (Verify): 50.99%
Fold 4:
 - Correct Prediction (y_test): 87.90%
 - Correct Prediction (Verify): 51.98%
Fold 5:
 - Correct Prediction (y_test): 88.79%
 - Correct Prediction (Verify): 48.71%
---
Alpha: 0.2
Fold 1:
 - Correct Prediction (y_test): 88.19%
 - Correct Prediction (Verify): 60.42%
Fold 2:
 - Correct Prediction (y_test): 80.16%
 - Correct Prediction (Verify): 48.51%
Fold 3:
 - Correct Prediction (y_test): 90.87%
 - Correct Prediction (Verify): 47.62%
Fold 4:
 - Correct Prediction (y_test): 88.79%
 - Correct Prediction (Verify): 54.17%
Fold 5:
 - Correct Prediction (y_test): 90.77%
 - Correct Prediction (Verify): 49.90%
---
Alpha: 0.3
Fold 1:
 - Correct Prediction (y_test): 78.27%
 - Correct Prediction (Verify): 53.17%
Fold 2:
 - Co

#### Time Series sem Verify

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X) e o target (Y) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha}',
                    f'r_percent_{alpha}',
                    f'Price_Rate_Of_Change_{alpha}',
                    f'MACD_{alpha}',
                    f'MACD_EMA_{alpha}',
                    f'OBV_{alpha}']]
    y = price_data[f'Prediction_{alpha}']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y], axis=1).dropna()
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0,oob_score = True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Calcular e imprimir a acurácia para cada divisão
        accuracy = accuracy_score(y_test, y_pred) * 100.0
        print(f'Fold {fold}: Correct Prediction (%): {accuracy:.2f}')
        fold += 1

    print('---')


Alpha: 0.1
Fold 1: Correct Prediction (%): 60.13
Fold 2: Correct Prediction (%): 70.62
Fold 3: Correct Prediction (%): 60.34
Fold 4: Correct Prediction (%): 67.68
Fold 5: Correct Prediction (%): 71.88
---
Alpha: 0.2
Fold 1: Correct Prediction (%): 49.53
Fold 2: Correct Prediction (%): 55.82
Fold 3: Correct Prediction (%): 56.87
Fold 4: Correct Prediction (%): 57.61
Fold 5: Correct Prediction (%): 56.66
---
Alpha: 0.3
Fold 1: Correct Prediction (%): 46.38
Fold 2: Correct Prediction (%): 51.63
Fold 3: Correct Prediction (%): 52.47
Fold 4: Correct Prediction (%): 55.19
Fold 5: Correct Prediction (%): 55.40
---
Alpha: 0.4
Fold 1: Correct Prediction (%): 45.54
Fold 2: Correct Prediction (%): 52.68
Fold 3: Correct Prediction (%): 53.73
Fold 4: Correct Prediction (%): 51.52
Fold 5: Correct Prediction (%): 49.32
---
Alpha: 0.5
Fold 1: Correct Prediction (%): 45.33
Fold 2: Correct Prediction (%): 48.27
Fold 3: Correct Prediction (%): 54.04
Fold 4: Correct Prediction (%): 51.31
Fold 5: Correct P

#### Salvar modelo 

In [8]:
# Salvar o modelo treinado
joblib.dump(rand_frst_clf, 'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl')

['C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl']

In [None]:

# Agrupando por Ticker e pegando a primeira e a última linha de cada grupo
start_end_dates = price_data.groupby('Ticker').agg(
    start_date=('Date', 'first'),  # Pegando a primeira data (início)
    end_date=('Date', 'last')      # Pegando a última data (fim)
).reset_index()

# Mostrando todas as colunas das linhas com as datas de início e fim para cada Ticker
result = pd.DataFrame()

for ticker in start_end_dates['Ticker']:
    # Obtendo as linhas de data de início e de fim para cada Ticker
    start_row = price_data[(price_data['Ticker'] == ticker) & (price_data['Date'] == start_end_dates[start_end_dates['Ticker'] == ticker]['start_date'].values[0])]
    end_row = price_data[(price_data['Ticker'] == ticker) & (price_data['Date'] == start_end_dates[start_end_dates['Ticker'] == ticker]['end_date'].values[0])]
    
    # Concatenando as linhas de data de início e fim
    result = pd.concat([result, start_row, end_row])

result
