In [66]:
import pandas as pd
import numpy as np

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import accuracy_score, classification_report
pd.set_option('display.max_columns',None)
import joblib

In [67]:
price_data = pd.read_parquet(f'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Dados_bolsa_interpolar.parquet')
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.284432,985,ABEV3.SA
1,2000-01-06,0.494478,0.494478,0.494478,0.494478,0.270014,227,ABEV3.SA
2,2000-01-12,0.481293,0.481293,0.481293,0.481293,0.262814,1137,ABEV3.SA
3,2000-01-13,0.484589,0.484589,0.484589,0.484589,0.264614,606,ABEV3.SA
4,2000-01-14,0.494478,0.494478,0.494478,0.494478,0.270014,6445,ABEV3.SA


In [45]:
import numpy as np
import pandas as pd

# Transformar e ordenar os dados
price_data['Date'] = pd.to_datetime(price_data['Date'])
price_data.sort_values(by=['Ticker', 'Date'], inplace=True)

# Criar a coluna de mudança de preço
price_data['change_in_price'] = price_data['Close'].diff()

# Máscara para identificar a virada entre os tickers
mask = price_data['Ticker'] != price_data['Ticker'].shift(1)

# Função para suavização exponencial
def exponential_smoothing(data, alpha):
    smoothed = np.zeros(len(data))
    smoothed[0] = data[0]
    for t in range(1, len(data)):
        smoothed[t] = alpha * data[t] + (1 - alpha) * smoothed[t-1]
    return smoothed

# Função para calcular o target
def calculate_target(data, d):
    target = np.sign(data.shift(-d) - data)
    target[target == 0] = -1
    return target

# Função para calcular o OBV
def obv(group, smoothed_col):
    Volume = group['Volume']
    change = group[smoothed_col].diff()

    prev_obv = 0
    obv_values = []

    for i, j in zip(change, Volume):
        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv

        prev_obv = current_obv
        obv_values.append(current_obv)

    return pd.Series(obv_values, index=group.index)

# Lista de valores alpha
alpha_list = [0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90]

# Período para calcular o target, RSI e estocástico
d = 10
n = 14
e = 9

# Loop para aplicar a suavização exponencial, calcular o target, o RSI e o estocástico
for alpha in alpha_list:
    col_name_smoothed = f"Smoothed_Close_{alpha}"
    col_name_prediction = f"Prediction_{alpha}"
    col_name_k_percent = f"k_percent_{alpha}"
    col_name_macd = f"MACD_{alpha}"
    col_name_macd_ema = f"MACD_EMA_{alpha}"
    col_name_price_rate_of_change = f"Price_Rate_Of_Change_{alpha}"
    col_name_obv = f"OBV_{alpha}"
    col_name_r_percent = f"r_percent_{alpha}"

    # Suavização exponencial
    price_data[col_name_smoothed] = price_data.groupby('Ticker')['Close'].transform(
        lambda x: exponential_smoothing(x.values, alpha)
    )

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_smoothed] = np.where(mask, np.nan, price_data[col_name_smoothed])

    # Cálculo do target
    price_data[col_name_prediction] = price_data.groupby('Ticker')[col_name_smoothed].transform(
        lambda x: calculate_target(x, d)
    )
    price_data[col_name_prediction] = np.where(mask, np.nan, price_data[col_name_prediction])

    # Calculando o OBV usando a suavização exponencial
    obv_groups = price_data.groupby('Ticker').apply(obv, smoothed_col=col_name_smoothed)
    price_data[col_name_obv] = obv_groups.reset_index(level=0, drop=True)

    # Calculando o Price Rate of Change (ROC)
    price_data[col_name_price_rate_of_change] = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.pct_change(periods=e))
    price_data[col_name_price_rate_of_change] = np.where(mask, np.nan, price_data[col_name_price_rate_of_change])

    # Calcular low_14 e high_14 para o estocástico
    low_14 = price_data.groupby('Ticker')['Low'].transform(lambda x: x.rolling(window=n).min())
    high_14 = price_data.groupby('Ticker')['High'].transform(lambda x: x.rolling(window=n).max())

    # Adicionar low_14 e high_14 ao DataFrame
    price_data['low_14'] = low_14
    price_data['high_14'] = high_14

    # Calcular o %K para o estocástico
    price_data[col_name_k_percent] = 100 * ((price_data[col_name_smoothed] - low_14) / (high_14 - low_14))

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_k_percent] = np.where(mask, np.nan, price_data[col_name_k_percent])

    # Cálculo do MACD
    ema_26 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=26).mean())
    ema_12 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=12).mean())
    macd = ema_12 - ema_26

    # Cálculo da EMA do MACD
    ema_9_macd = macd.ewm(span=9).mean()

    # Adicionar MACD e MACD_EMA ao DataFrame
    price_data[col_name_macd] = macd
    price_data[col_name_macd_ema] = ema_9_macd

    # Cálculo do r_percent
    r_percent = ((high_14 - price_data[col_name_smoothed]) / (high_14 - low_14)) * (-100)
    price_data[col_name_r_percent] = r_percent

# Coluna de verificação (sem suavização, alpha = 1)
alpha = 1
price_data['Smoothed_Close_1'] = price_data.groupby('Ticker')['Close'].transform(
    lambda x: exponential_smoothing(x.values, alpha)
)
price_data['Smoothed_Close_1'] = np.where(mask, np.nan, price_data['Smoothed_Close_1'])

price_data['Verify'] = price_data.groupby('Ticker')['Smoothed_Close_1'].transform(
    lambda x: calculate_target(x, d)
)
price_data['Verify'] = np.where(mask, np.nan, price_data['Verify'])

# Cálculo do RSI
up_df = price_data[['Ticker', 'change_in_price']].copy()
down_df = price_data[['Ticker', 'change_in_price']].copy()

up_df['change_in_price'] = up_df['change_in_price'].where(up_df['change_in_price'] > 0, 0)
down_df['change_in_price'] = down_df['change_in_price'].where(down_df['change_in_price'] < 0, 0).abs()

ewma_up = up_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())
ewma_down = down_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())

relative_strength = ewma_up / ewma_down
price_data['RSI'] = 100.0 - (100.0 / (1.0 + relative_strength))

price_data['up_days'] = up_df['change_in_price']
price_data['down_days'] = down_df['change_in_price']

# Remover as linhas com NaN
price_data = price_data.dropna()

# Visualizar o DataFrame atualizado
price_data.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
11757,2024-12-26,54.849998,55.400002,54.400002,55.009998,55.009998,13155800,VALE3.SA,0.16,56.505553,-1.0,2615895548,-0.037093,53.330002,60.189999,46.29086,-0.818763,-0.682099,-53.70914,55.480276,-1.0,2210130270,-0.053182,31.345124,-0.989594,-0.804075,-68.654876,55.063495,-1.0,335066462,-0.063026,25.269591,-1.08372,-0.857817,-74.730409,54.907717,-1.0,743919266,-0.067973,22.99877,-1.141483,-0.89846,-77.00123,54.875543,-1.0,671816026,-0.070039,22.529758,-1.173487,-0.928296,-77.470242,54.894808,-1.0,412217338,-0.070428,22.810601,-1.189711,-0.94957,-77.189399,54.929836,-1.0,604148042,-0.069784,23.321205,-1.197007,-0.964704,-76.678795,54.964064,-1.0,-997892342,-0.068496,23.820162,-1.199513,-0.975599,-76.179838,54.990965,-1.0,-832181038,-0.066853,24.212304,-1.199625,-0.983593,-75.787696,55.009998,-1.0,38.934135,0.16,0.0
11758,2024-12-27,54.73,55.080002,54.459999,54.740002,54.740002,16635300,VALE3.SA,-0.269997,56.328998,-1.0,2599260248,-0.036864,53.330002,60.189999,43.717168,-0.856589,-0.716997,-56.282832,55.332221,-1.0,2193494970,-0.049589,29.186887,-1.03108,-0.849476,-70.813113,54.966447,-1.0,318431162,-0.054749,23.854897,-1.117711,-0.909796,-76.145103,54.840631,-1.0,727283966,-0.054982,22.020839,-1.163699,-0.951508,-77.979161,54.807772,-1.0,655180726,-0.052739,21.541851,-1.185256,-0.979688,-78.458149,54.801924,-1.0,395582038,-0.049363,21.456606,-1.194099,-0.998476,-78.543394,54.796952,-1.0,587512742,-0.045552,21.384122,-1.197019,-1.011167,-78.615878,54.784814,-1.0,-1014527642,-0.041707,21.207187,-1.197522,-1.019984,-78.792813,54.765098,-1.0,-848816338,-0.038062,20.919779,-1.197284,-1.026331,-79.080221,54.740002,-1.0,36.806222,0.0,0.269997
11759,2024-12-30,54.900002,55.189999,54.549999,54.549999,54.549999,11250900,VALE3.SA,-0.190002,56.151098,-1.0,2588009348,-0.035577,53.330002,60.189999,41.123874,-0.890655,-0.751729,-58.876126,55.175777,-1.0,2182244070,-0.044532,26.906355,-1.064313,-0.892443,-73.093645,54.841513,-1.0,307180262,-0.045622,22.033696,-1.141571,-0.956151,-77.966304,54.724378,-1.0,716033066,-0.042657,20.326194,-1.177118,-0.99663,-79.673806,54.678886,-1.0,643929826,-0.038353,19.663039,-1.19125,-1.022,-80.336961,54.650769,-1.0,384331138,-0.034012,19.253179,-1.195987,-1.037978,-80.746821,54.624085,-1.0,576261842,-0.03021,18.864195,-1.197177,-1.048369,-81.135805,54.596962,-1.0,-1025778542,-0.027171,18.468819,-1.1973,-1.055447,-81.531181,54.571509,-1.0,-860067238,-0.024938,18.097782,-1.197249,-1.060515,-81.902218,54.549999,-1.0,35.242227,0.0,0.190002
11760,2025-01-02,54.709999,55.099998,54.23,54.25,54.25,17623900,VALE3.SA,-0.299999,55.960988,-1.0,2570385448,-0.034877,53.330002,59.939999,39.803145,-0.92236,-0.785855,-60.196855,54.990622,-1.0,2164620170,-0.041373,25.122852,-1.092991,-0.932553,-74.877148,54.664059,-1.0,289556362,-0.04053,20.182416,-1.161412,-0.997203,-79.817584,54.534627,-1.0,698409166,-0.036996,18.224291,-1.189353,-1.035174,-81.775709,54.464443,-1.0,626305926,-0.033372,17.162505,-1.199478,-1.057496,-82.837495,54.410308,-1.0,366707238,-0.030603,16.343516,-1.203018,-1.070986,-83.656484,54.362226,-1.0,558637942,-0.028877,15.6161,-1.204547,-1.079605,-84.3839,54.319392,-1.0,-1043402442,-0.028073,14.968095,-1.205625,-1.085483,-85.031905,54.282151,-1.0,-877691138,-0.027963,14.404683,-1.20666,-1.089744,-85.595317,54.25,-1.0,32.709994,0.0,0.299999
11761,2025-01-03,53.900002,54.0,52.880001,53.240002,53.240002,23608700,VALE3.SA,-1.009998,55.68889,-1.0,2546776748,-0.036456,52.880001,59.59,41.861238,-0.958395,-0.820363,-58.138762,54.640498,-1.0,2141011470,-0.043294,26.236911,-1.130935,-0.972229,-73.763089,54.236842,-1.0,265947662,-0.043682,20.221175,-1.197801,-1.037322,-79.778825,54.016777,-1.0,674800466,-0.042626,16.941518,-1.226695,-1.073479,-83.058482,53.852222,-1.0,602697226,-0.042242,14.48914,-1.241093,-1.094215,-85.51086,53.708124,-1.0,343098538,-0.042929,12.341626,-1.250832,-1.106955,-87.658374,53.576669,1.0,535029242,-0.044479,10.382531,-1.25926,-1.115536,-89.617469,53.45588,1.0,-1067011142,-0.046549,8.582397,-1.267292,-1.121844,-91.417603,53.344217,1.0,-901299838,-0.048841,6.918265,-1.275103,-1.126816,-93.081735,53.240002,1.0,25.572295,0.0,1.009998


In [68]:
import numpy as np
import pandas as pd

# Transformar e ordenar os dados
price_data['Date'] = pd.to_datetime(price_data['Date'])
price_data.sort_values(by=['Ticker', 'Date'], inplace=True)

# Criar a coluna de mudança de preço
price_data['change_in_price'] = price_data['Close'].diff()

# Máscara para identificar a virada entre os tickers
mask = price_data['Ticker'] != price_data['Ticker'].shift(1)

# Função para suavização exponencial
def exponential_smoothing(data, alpha):
    smoothed = np.zeros(len(data))
    smoothed[0] = data[0]
    for t in range(1, len(data)):
        smoothed[t] = alpha * data[t] + (1 - alpha) * smoothed[t-1]
    return smoothed

# Função para calcular o target
def calculate_target(data, d):
    target = np.sign(data.shift(-d) - data)
    target[target == 0] = -1
    return target

def obv(group):
    Volume = group['Volume']
    change = group['Smoothed_Close'].diff()

    prev_obv = 0
    obv_values = []

    for i, j in zip(change, Volume):
        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv

        prev_obv = current_obv
        obv_values.append(current_obv)

    return pd.Series(obv_values, index=group.index)

# Lista de valores alpha
alpha_list = [0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90]

# Período para calcular o target, RSI e estocástico
d = 10
n = 14
e = 9

# Loop para aplicar a suavização exponencial, calcular o target, o RSI e o estocástico
for alpha in alpha_list:
    col_name_smoothed = f"Smoothed_Close_{alpha}"
    col_name_prediction = f"Prediction_{alpha}"
    col_name_k_percent = f"k_percent_{alpha}"
    col_name_macd = f"MACD_{alpha}"
    col_name_macd_ema = f"MACD_EMA_{alpha}"
    col_name_price_rate_of_change = f"Price_Rate_Of_Change_{alpha}"
    col_name_obv = f"OBV_{alpha}"
    col_name_r_percent = f"r_percent_{alpha}"

    # Suavização exponencial
    price_data[col_name_smoothed] = price_data.groupby('Ticker')['Close'].transform(
        lambda x: exponential_smoothing(x.values, alpha)
    )

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_smoothed] = np.where(mask, np.nan, price_data[col_name_smoothed])

    # Cálculo do target
    price_data[col_name_prediction] = price_data.groupby('Ticker')[col_name_smoothed].transform(
        lambda x: calculate_target(x, d)
    )
    price_data[col_name_prediction] = np.where(mask, np.nan, price_data[col_name_prediction])

    # Aplicar o cálculo do OBV para cada Ticker
    obv_groups = price_data.groupby('Ticker').apply(lambda group: obv(group.rename(columns={col_name_smoothed: 'Smoothed_Close'})))
    price_data[col_name_obv] = obv_groups.reset_index(level=0, drop=True)

    # Calculando o Price Rate of Change (ROC)
    price_data[col_name_price_rate_of_change] = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.pct_change(periods=e))
    price_data[col_name_price_rate_of_change] = np.where(mask, np.nan, price_data[col_name_price_rate_of_change])

    # Calcular low_14 e high_14 para o estocástico
    low_14 = price_data.groupby('Ticker')['Low'].transform(lambda x: x.rolling(window=n).min())
    high_14 = price_data.groupby('Ticker')['High'].transform(lambda x: x.rolling(window=n).max())

    # Adicionar low_14 e high_14 ao DataFrame
    price_data['low_14'] = low_14
    price_data['high_14'] = high_14

    # Calcular o %K para o estocástico
    price_data[col_name_k_percent] = 100 * ((price_data[col_name_smoothed] - low_14) / (high_14 - low_14))

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_k_percent] = np.where(mask, np.nan, price_data[col_name_k_percent])

    # Cálculo do MACD
    ema_26 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=26).mean())
    ema_12 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=12).mean())
    macd = ema_12 - ema_26

    # Cálculo da EMA do MACD
    ema_9_macd = macd.ewm(span=9).mean()

    # Adicionar MACD e MACD_EMA ao DataFrame
    price_data[col_name_macd] = macd
    price_data[col_name_macd_ema] = ema_9_macd

    # Cálculo do r_percent
    r_percent = ((high_14 - price_data[col_name_smoothed]) / (high_14 - low_14)) * (-100)
    price_data[col_name_r_percent] = r_percent

# Coluna de verificação (sem suavização, alpha = 1)
alpha = 1
price_data['Smoothed_Close_1'] = price_data.groupby('Ticker')['Close'].transform(
    lambda x: exponential_smoothing(x.values, alpha)
)
price_data['Smoothed_Close_1'] = np.where(mask, np.nan, price_data['Smoothed_Close_1'])

price_data['Verify'] = price_data.groupby('Ticker')['Smoothed_Close_1'].transform(
    lambda x: calculate_target(x, d)
)
price_data['Verify'] = np.where(mask, np.nan, price_data['Verify'])

# Cálculo do RSI
up_df = price_data[['Ticker', 'change_in_price']].copy()
down_df = price_data[['Ticker', 'change_in_price']].copy()

up_df['change_in_price'] = up_df['change_in_price'].where(up_df['change_in_price'] > 0, 0)
down_df['change_in_price'] = down_df['change_in_price'].where(down_df['change_in_price'] < 0, 0).abs()

ewma_up = up_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())
ewma_down = down_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())

relative_strength = ewma_up / ewma_down
price_data['RSI'] = 100.0 - (100.0 / (1.0 + relative_strength))

price_data['up_days'] = up_df['change_in_price']
price_data['down_days'] = down_df['change_in_price']

# Remover as linhas com NaN
price_data = price_data.dropna()

# Visualizar o DataFrame atualizado
price_data.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
11757,2024-12-26,54.849998,55.400002,54.400002,55.009998,55.009998,13155800,VALE3.SA,0.16,56.505553,-1.0,2615895548,-0.037093,53.330002,60.189999,46.29086,-0.818763,-0.682099,-53.70914,55.480276,-1.0,2210130270,-0.053182,31.345124,-0.989594,-0.804075,-68.654876,55.063495,-1.0,335066462,-0.063026,25.269591,-1.08372,-0.857817,-74.730409,54.907717,-1.0,743919266,-0.067973,22.99877,-1.141483,-0.89846,-77.00123,54.875543,-1.0,671816026,-0.070039,22.529758,-1.173487,-0.928296,-77.470242,54.894808,-1.0,412217338,-0.070428,22.810601,-1.189711,-0.94957,-77.189399,54.929836,-1.0,604148042,-0.069784,23.321205,-1.197007,-0.964704,-76.678795,54.964064,-1.0,-997892342,-0.068496,23.820162,-1.199513,-0.975599,-76.179838,54.990965,-1.0,-832181038,-0.066853,24.212304,-1.199625,-0.983593,-75.787696,55.009998,-1.0,38.934135,0.16,0.0
11758,2024-12-27,54.73,55.080002,54.459999,54.740002,54.740002,16635300,VALE3.SA,-0.269997,56.328998,-1.0,2599260248,-0.036864,53.330002,60.189999,43.717168,-0.856589,-0.716997,-56.282832,55.332221,-1.0,2193494970,-0.049589,29.186887,-1.03108,-0.849476,-70.813113,54.966447,-1.0,318431162,-0.054749,23.854897,-1.117711,-0.909796,-76.145103,54.840631,-1.0,727283966,-0.054982,22.020839,-1.163699,-0.951508,-77.979161,54.807772,-1.0,655180726,-0.052739,21.541851,-1.185256,-0.979688,-78.458149,54.801924,-1.0,395582038,-0.049363,21.456606,-1.194099,-0.998476,-78.543394,54.796952,-1.0,587512742,-0.045552,21.384122,-1.197019,-1.011167,-78.615878,54.784814,-1.0,-1014527642,-0.041707,21.207187,-1.197522,-1.019984,-78.792813,54.765098,-1.0,-848816338,-0.038062,20.919779,-1.197284,-1.026331,-79.080221,54.740002,-1.0,36.806222,0.0,0.269997
11759,2024-12-30,54.900002,55.189999,54.549999,54.549999,54.549999,11250900,VALE3.SA,-0.190002,56.151098,-1.0,2588009348,-0.035577,53.330002,60.189999,41.123874,-0.890655,-0.751729,-58.876126,55.175777,-1.0,2182244070,-0.044532,26.906355,-1.064313,-0.892443,-73.093645,54.841513,-1.0,307180262,-0.045622,22.033696,-1.141571,-0.956151,-77.966304,54.724378,-1.0,716033066,-0.042657,20.326194,-1.177118,-0.99663,-79.673806,54.678886,-1.0,643929826,-0.038353,19.663039,-1.19125,-1.022,-80.336961,54.650769,-1.0,384331138,-0.034012,19.253179,-1.195987,-1.037978,-80.746821,54.624085,-1.0,576261842,-0.03021,18.864195,-1.197177,-1.048369,-81.135805,54.596962,-1.0,-1025778542,-0.027171,18.468819,-1.1973,-1.055447,-81.531181,54.571509,-1.0,-860067238,-0.024938,18.097782,-1.197249,-1.060515,-81.902218,54.549999,-1.0,35.242227,0.0,0.190002
11760,2025-01-02,54.709999,55.099998,54.23,54.25,54.25,17623900,VALE3.SA,-0.299999,55.960988,-1.0,2570385448,-0.034877,53.330002,59.939999,39.803145,-0.92236,-0.785855,-60.196855,54.990622,-1.0,2164620170,-0.041373,25.122852,-1.092991,-0.932553,-74.877148,54.664059,-1.0,289556362,-0.04053,20.182416,-1.161412,-0.997203,-79.817584,54.534627,-1.0,698409166,-0.036996,18.224291,-1.189353,-1.035174,-81.775709,54.464443,-1.0,626305926,-0.033372,17.162505,-1.199478,-1.057496,-82.837495,54.410308,-1.0,366707238,-0.030603,16.343516,-1.203018,-1.070986,-83.656484,54.362226,-1.0,558637942,-0.028877,15.6161,-1.204547,-1.079605,-84.3839,54.319392,-1.0,-1043402442,-0.028073,14.968095,-1.205625,-1.085483,-85.031905,54.282151,-1.0,-877691138,-0.027963,14.404683,-1.20666,-1.089744,-85.595317,54.25,-1.0,32.709994,0.0,0.299999
11761,2025-01-03,53.900002,54.0,52.880001,53.240002,53.240002,23608700,VALE3.SA,-1.009998,55.68889,-1.0,2546776748,-0.036456,52.880001,59.59,41.861238,-0.958395,-0.820363,-58.138762,54.640498,-1.0,2141011470,-0.043294,26.236911,-1.130935,-0.972229,-73.763089,54.236842,-1.0,265947662,-0.043682,20.221175,-1.197801,-1.037322,-79.778825,54.016777,-1.0,674800466,-0.042626,16.941518,-1.226695,-1.073479,-83.058482,53.852222,-1.0,602697226,-0.042242,14.48914,-1.241093,-1.094215,-85.51086,53.708124,-1.0,343098538,-0.042929,12.341626,-1.250832,-1.106955,-87.658374,53.576669,1.0,535029242,-0.044479,10.382531,-1.25926,-1.115536,-89.617469,53.45588,1.0,-1067011142,-0.046549,8.582397,-1.267292,-1.121844,-91.417603,53.344217,1.0,-901299838,-0.048841,6.918265,-1.275103,-1.126816,-93.081735,53.240002,1.0,25.572295,0.0,1.009998


In [34]:
price_data.loc[price_data['Date']== '2024-12-27', ['Date','Close','Ticker','OBV_0.3','Smoothed_Close_0.3','MACD_0.3','MACD_EMA_0.3']]

Unnamed: 0,Date,Close,Ticker,OBV_0.3,Smoothed_Close_0.3,MACD_0.3,MACD_EMA_0.3
5731,2024-12-27,12.07,ABEV3.SA,-2668196746,12.348159,-0.080376,0.070639
23674,2024-12-27,30.780001,ITUB4.SA,15921966515,31.139099,-0.812249,-0.726813
27002,2024-12-27,6.51,MGLU3.SA,-957599623,6.905152,-0.617087,-0.448153
17833,2024-12-27,35.66,PETR4.SA,31961984117,36.518488,-0.149991,0.221659
11758,2024-12-27,54.740002,VALE3.SA,318431162,54.966447,-1.117711,-0.909796


#### Se quiser rodar para um Ticker.

In [69]:
price_data = price_data[price_data['Ticker'].isin(['ABEV3.SA'])]
price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
5730,2024-12-26,12.18,12.24,11.99,12.14,12.14,21166800,ABEV3.SA,-0.08,12.916441,-1.0,-2832724966,-0.037254,11.99,14.53,36.474081,0.071183,0.104943,-63.525919,12.67507,-1.0,-2455355020,-0.0817,26.971287,0.028984,0.124149,-73.028713,12.46737,-1.0,-2630572746,-0.107626,18.794106,-0.027737,0.108392,-81.205894,12.338429,-1.0,-1929375396,-0.119863,13.717703,-0.069958,0.088349,-86.282297,12.260293,-1.0,-1675676378,-0.124889,10.641471,-0.098665,0.071533,-89.358529,12.211341,-1.0,-1363339260,-0.126352,8.714229,-0.118478,0.058476,-91.285771,12.180295,-1.0,-1697107248,-0.125988,7.491945,-0.132654,0.04838,-92.508055,12.160864,-1.0,-1607037908,-0.124603,6.726938,-0.143117,0.040457,-93.273062,12.148651,-1.0,-1542087046,-0.122615,6.246131,-0.151022,0.034127,-93.753869,12.14,-1.0,25.409653,0.0,0.08
5731,2024-12-27,12.16,12.19,11.97,12.07,12.07,37624000,ABEV3.SA,-0.070001,12.831797,-1.0,-2870348966,-0.041303,11.97,14.53,33.663951,0.043745,0.092703,-66.336049,12.554056,-1.0,-2492979020,-0.08111,22.814694,-0.017453,0.095829,-77.185306,12.348159,-1.0,-2668196746,-0.099313,14.771826,-0.080376,0.070639,-85.228174,12.231058,-1.0,-1966999396,-0.104035,10.197552,-0.123255,0.046028,-89.802448,12.165146,-1.0,-1713300378,-0.102534,7.622898,-0.150932,0.02704,-92.377102,12.126536,-1.0,-1400963260,-0.098464,6.11469,-0.169311,0.012919,-93.88531,12.103088,-1.0,-1734731248,-0.093457,5.198754,-0.182036,0.002297,-94.801246,12.088173,-1.0,-1644661908,-0.088261,4.616106,-0.191183,-0.005871,-95.383894,12.077865,-1.0,-1579711046,-0.083249,4.213462,-0.197982,-0.012295,-95.786538,12.07,-1.0,24.226632,0.0,0.070001
5732,2024-12-30,12.06,12.16,11.74,11.74,11.74,38907400,ABEV3.SA,-0.33,12.722617,-1.0,-2909256366,-0.047292,11.74,14.5,35.602088,0.01304,0.07677,-64.397912,12.391245,-1.0,-2531886420,-0.085231,23.595839,-0.066624,0.063338,-76.404161,12.165711,-1.0,-2707104146,-0.100223,15.424325,-0.135255,0.02946,-84.575675,12.034634,-1.0,-2005906796,-0.103416,10.675168,-0.179276,0.000967,-89.324832,11.952573,-1.0,-1752207778,-0.102491,7.701931,-0.20712,-0.019792,-92.298069,11.894614,-1.0,-1439870660,-0.100827,5.601978,-0.225709,-0.034807,-94.398022,11.848926,-1.0,-1773638648,-0.099711,3.946615,-0.238926,-0.045948,-96.053385,11.809634,-1.0,-1683569308,-0.099578,2.522991,-0.248883,-0.054473,-97.477009,11.773786,-1.0,-1618618446,-0.100514,1.224149,-0.256775,-0.061191,-98.775851,11.74,-1.0,19.331009,0.0,0.33
5733,2025-01-02,11.72,11.89,11.49,11.6,11.6,33417900,ABEV3.SA,-0.139999,12.610356,-1.0,-2942674266,-0.052049,11.49,14.5,37.221127,-0.020121,0.057392,-62.778873,12.232996,-1.0,-2565304320,-0.087404,24.684261,-0.117013,0.027268,-75.315739,11.995998,-1.0,-2740522046,-0.099167,16.810569,-0.190249,-0.014482,-83.189431,11.860781,-1.0,-2039324696,-0.100751,12.318306,-0.234993,-0.046225,-87.681694,11.776287,-1.0,-1785625678,-0.099586,9.511194,-0.262844,-0.068402,-90.488806,11.717846,-1.0,-1473288560,-0.098305,7.569641,-0.281425,-0.08413,-92.430359,11.674678,-1.0,-1807056548,-0.097531,6.135495,-0.294676,-0.095693,-93.864505,11.641927,-1.0,-1716987208,-0.097185,5.047422,-0.304631,-0.104505,-94.952578,11.617379,-1.0,-1652036346,-0.096974,4.231867,-0.312389,-0.111431,-95.768133,11.6,-1.0,17.590948,0.0,0.139999
5734,2025-01-03,11.49,11.55,11.27,11.33,11.33,41365900,ABEV3.SA,-0.27,12.48232,-1.0,-2984040166,-0.060241,11.27,14.36,39.233659,-0.056086,0.034696,-60.766341,12.052397,-1.0,-2606670220,-0.096772,25.320278,-0.169565,-0.012098,-74.679722,11.796199,-1.0,-2781887946,-0.109828,17.029069,-0.247106,-0.061007,-82.970931,11.648468,-1.0,-2080690596,-0.11444,12.248158,-0.292904,-0.095561,-87.751842,11.553143,-1.0,-1826991578,-0.117365,9.163202,-0.321308,-0.118983,-90.836798,11.485138,-1.0,-1514654460,-0.120462,6.962393,-0.340433,-0.135391,-93.037607,11.433403,-1.0,-1848422448,-0.123938,5.288122,-0.354243,-0.147403,-94.711878,11.392385,-1.0,-1758353108,-0.127624,3.960678,-0.364743,-0.156553,-96.039322,11.358738,-1.0,-1693402246,-0.131362,2.87176,-0.373033,-0.163751,-97.12824,11.33,-1.0,14.655374,0.0,0.27


In [None]:
price_data = price_data[price_data['Ticker'].isin(['ITUB4.SA'])]
price_data.tail()

In [None]:
price_data = price_data[price_data['Ticker'].isin(['MGLU3.SA'])]
price_data.tail

In [None]:
price_data = price_data[price_data['Ticker'].isin(['PETR4.SA'])]
price_data.tail

In [None]:
price_data = price_data[price_data['Ticker'].isin(['VALE3.SA'])]
price_data.tail

In [14]:
price_data.to_excel('C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Datas referências/test_day.xlsx', index=False)

#### Se quiser normal só pular etapa anterior.

In [57]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Para cada alpha, criar as variáveis X_Cols e Y_Cols e treinar o modelo
for alpha in alpha_list:
    # Definindo as colunas de X e Y dinamicamente
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']]
    
    Y_Cols = price_data[f'Prediction_{alpha}']

    # Dividir os dados em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state=0)

    # Criar o classificador Random Forest
    rand_frst_clf = RandomForestClassifier(n_estimators=100, oob_score=True, criterion="gini", random_state=0)

    # Ajustar o modelo
    rand_frst_clf.fit(X_train, y_train)

    # Fazer previsões
    y_pred = rand_frst_clf.predict(X_test)

    # Imprimir a acurácia
    accuracy = accuracy_score(y_test, y_pred, normalize=True) * 100.0
    print(f'Alpha: {alpha}')
    print(f'Correct Prediction (%): {accuracy:.2f}')
    print('---')


Alpha: 0.1
Correct Prediction (%): 85.19
---
Alpha: 0.2
Correct Prediction (%): 78.20
---
Alpha: 0.3
Correct Prediction (%): 76.52
---
Alpha: 0.4
Correct Prediction (%): 74.14
---
Alpha: 0.5
Correct Prediction (%): 73.52
---
Alpha: 0.6
Correct Prediction (%): 73.17
---
Alpha: 0.7
Correct Prediction (%): 70.72
---
Alpha: 0.8
Correct Prediction (%): 72.40
---
Alpha: 0.9
Correct Prediction (%): 70.16
---


In [58]:
import warnings

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}'
    price_data[f'Prediction_{alpha}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    price_data['Match'] = (price_data[f'Prediction_{alpha}'] == price_data['Verify']).astype(int)  
    accuracy = price_data['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo: {accuracy:.2%}')



Precisão para Alpha: 0.1
Precisão do modelo: 55.96%

Precisão para Alpha: 0.2
Precisão do modelo: 60.61%

Precisão para Alpha: 0.3
Precisão do modelo: 65.03%

Precisão para Alpha: 0.4
Precisão do modelo: 67.46%

Precisão para Alpha: 0.5
Precisão do modelo: 70.74%

Precisão para Alpha: 0.6
Precisão do modelo: 75.01%

Precisão para Alpha: 0.7
Precisão do modelo: 80.55%

Precisão para Alpha: 0.8
Precisão do modelo: 84.41%

Precisão para Alpha: 0.9
Precisão do modelo: 91.42%


In [None]:
import warnings
from datetime import timedelta

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Filtrar últimos 30 dias
last_30_days = price_data[price_data['Date'] >= price_data['Date'].max() - timedelta(days=30)]

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = last_30_days[['RSI',
                           f'k_percent_{alpha}',
                           f'r_percent_{alpha}',
                           f'Price_Rate_Of_Change_{alpha}',
                           f'MACD_{alpha}',
                           f'MACD_EMA_{alpha}',
                           f'OBV_{alpha}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}' para os últimos 30 dias
    last_30_days[f'Prediction_{alpha}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    last_30_days['Match'] = (last_30_days[f'Prediction_{alpha}'] == last_30_days['Verify']).astype(int)  # 1 para igual, 0 para diferente
    accuracy = last_30_days['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo nos últimos 30 dias: {accuracy:.2%}')


#### Time Series com Verify

In [70]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Criar listas para armazenar os resultados gerais
all_predictions = []
all_verify_values = []
all_actual_values = []
all_accuracy_scores_ytest = []
all_accuracy_scores_verify = []

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X), o target (y) e a verificação (verify) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha}',
                    f'r_percent_{alpha}',
                    f'Price_Rate_Of_Change_{alpha}',
                    f'MACD_{alpha}',
                    f'MACD_EMA_{alpha}',
                    f'OBV_{alpha}']]
    y = price_data[f'Prediction_{alpha}']
    verify = price_data['Verify']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y, verify], axis=1).dropna()
    X = data.iloc[:, :-2]
    y = data.iloc[:, -2]
    verify = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        y_verify = verify.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0, oob_score=True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Armazenar as predições e valores reais para análise posterior
        all_predictions.extend(y_pred)
        all_verify_values.extend(y_verify.values)
        all_actual_values.extend(y_test.values)

        # Calcular acurácia para y_test e verify
        accuracy_ytest = accuracy_score(y_test, y_pred) * 100.0
        accuracy_verify = accuracy_score(y_verify, y_pred) * 100.0

        # Armazenar os resultados das acurácias
        all_accuracy_scores_ytest.append(accuracy_ytest)
        all_accuracy_scores_verify.append(accuracy_verify)

        # Exibir resultados por fold
        print(f'Fold {fold}:')
        print(f' - Correct Prediction (y_test): {accuracy_ytest:.2f}%')
        print(f' - Correct Prediction (Verify): {accuracy_verify:.2f}%')
        fold += 1

    print('---')

# Resultados gerais
print("Resumo geral:")
print(f'Média de acurácia (y_test): {np.mean(all_accuracy_scores_ytest):.2f}%')
print(f'Média de acurácia (Verify): {np.mean(all_accuracy_scores_verify):.2f}%')


Alpha: 0.1
Fold 1:
 - Correct Prediction (y_test): 60.13%
 - Correct Prediction (Verify): 49.95%
Fold 2:
 - Correct Prediction (y_test): 70.62%
 - Correct Prediction (Verify): 51.42%
Fold 3:
 - Correct Prediction (y_test): 60.34%
 - Correct Prediction (Verify): 45.02%
Fold 4:
 - Correct Prediction (y_test): 67.68%
 - Correct Prediction (Verify): 56.03%
Fold 5:
 - Correct Prediction (y_test): 71.88%
 - Correct Prediction (Verify): 51.10%
---
Alpha: 0.2
Fold 1:
 - Correct Prediction (y_test): 49.53%
 - Correct Prediction (Verify): 47.64%
Fold 2:
 - Correct Prediction (y_test): 55.82%
 - Correct Prediction (Verify): 53.83%
Fold 3:
 - Correct Prediction (y_test): 56.87%
 - Correct Prediction (Verify): 44.49%
Fold 4:
 - Correct Prediction (y_test): 57.61%
 - Correct Prediction (Verify): 51.94%
Fold 5:
 - Correct Prediction (y_test): 56.66%
 - Correct Prediction (Verify): 48.48%
---
Alpha: 0.3
Fold 1:
 - Correct Prediction (y_test): 46.38%
 - Correct Prediction (Verify): 46.17%
Fold 2:
 - Co

#### Time Series sem Verify

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X) e o target (Y) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha}',
                    f'r_percent_{alpha}',
                    f'Price_Rate_Of_Change_{alpha}',
                    f'MACD_{alpha}',
                    f'MACD_EMA_{alpha}',
                    f'OBV_{alpha}']]
    y = price_data[f'Prediction_{alpha}']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y], axis=1).dropna()
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0,oob_score = True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Calcular e imprimir a acurácia para cada divisão
        accuracy = accuracy_score(y_test, y_pred) * 100.0
        print(f'Fold {fold}: Correct Prediction (%): {accuracy:.2f}')
        fold += 1

    print('---')


Alpha: 0.1
Fold 1: Correct Prediction (%): 60.13
Fold 2: Correct Prediction (%): 70.62
Fold 3: Correct Prediction (%): 60.34
Fold 4: Correct Prediction (%): 67.68
Fold 5: Correct Prediction (%): 71.88
---
Alpha: 0.2
Fold 1: Correct Prediction (%): 49.53
Fold 2: Correct Prediction (%): 55.82
Fold 3: Correct Prediction (%): 56.87
Fold 4: Correct Prediction (%): 57.61
Fold 5: Correct Prediction (%): 56.66
---
Alpha: 0.3
Fold 1: Correct Prediction (%): 46.38
Fold 2: Correct Prediction (%): 51.63
Fold 3: Correct Prediction (%): 52.47
Fold 4: Correct Prediction (%): 55.19
Fold 5: Correct Prediction (%): 55.40
---
Alpha: 0.4
Fold 1: Correct Prediction (%): 45.54
Fold 2: Correct Prediction (%): 52.68
Fold 3: Correct Prediction (%): 53.73
Fold 4: Correct Prediction (%): 51.52
Fold 5: Correct Prediction (%): 49.32
---
Alpha: 0.5
Fold 1: Correct Prediction (%): 45.33
Fold 2: Correct Prediction (%): 48.27
Fold 3: Correct Prediction (%): 54.04
Fold 4: Correct Prediction (%): 51.31
Fold 5: Correct P

#### Salvar modelo 

In [8]:
# Salvar o modelo treinado
joblib.dump(rand_frst_clf, 'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl')

['C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl']