In [19]:
import pandas as pd
import numpy as np

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import accuracy_score, classification_report
pd.set_option('display.max_columns',None)
import joblib

In [27]:
price_data = pd.read_parquet(f'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Dados_bolsa_interpolar.parquet')
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.282051,985,ABEV3.SA
1,2000-01-06,0.494478,0.494478,0.494478,0.494478,0.267754,227,ABEV3.SA
2,2000-01-12,0.481293,0.481293,0.481293,0.481293,0.260614,1137,ABEV3.SA
3,2000-01-13,0.484589,0.484589,0.484589,0.484589,0.262399,606,ABEV3.SA
4,2000-01-14,0.494478,0.494478,0.494478,0.494478,0.267754,6445,ABEV3.SA


In [28]:
import numpy as np
import pandas as pd

# Transformar e ordenar os dados
price_data['Date'] = pd.to_datetime(price_data['Date'])
price_data.sort_values(by=['Ticker', 'Date'], inplace=True)

# Criar a coluna de mudança de preço
price_data['change_in_price'] = price_data['Close'].diff()

# Máscara para identificar a virada entre os tickers
mask = price_data['Ticker'] != price_data['Ticker'].shift(1)

# Função para suavização exponencial
def exponential_smoothing(data, alpha):
    smoothed = np.zeros(len(data))
    smoothed[0] = data[0]
    for t in range(1, len(data)):
        smoothed[t] = alpha * data[t] + (1 - alpha) * smoothed[t-1]
    return smoothed

# Função para calcular o target
def calculate_target(data, d):
    target = np.sign(data.shift(-d) - data)
    target[target == 0] = -1
    return target

# Função para calcular o OBV
def obv(group, smoothed_col):
    Volume = group['Volume']
    change = group[smoothed_col].diff()

    prev_obv = 0
    obv_values = []

    for i, j in zip(change, Volume):
        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv

        prev_obv = current_obv
        obv_values.append(current_obv)

    return pd.Series(obv_values, index=group.index)

# Lista de valores alpha
alpha_list = [0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90]

# Período para calcular o target, RSI e estocástico
d = 1
n = 14
e = 9

# Loop para aplicar a suavização exponencial, calcular o target, o RSI e o estocástico
for alpha in alpha_list:
    col_name_smoothed = f"Smoothed_Close_{alpha}"
    col_name_prediction = f"Prediction_{alpha}"
    col_name_k_percent = f"k_percent_{alpha}"
    col_name_macd = f"MACD_{alpha}"
    col_name_macd_ema = f"MACD_EMA_{alpha}"
    col_name_price_rate_of_change = f"Price_Rate_Of_Change_{alpha}"
    col_name_obv = f"OBV_{alpha}"
    col_name_r_percent = f"r_percent_{alpha}"

    # Suavização exponencial
    price_data[col_name_smoothed] = price_data.groupby('Ticker')['Close'].transform(
        lambda x: exponential_smoothing(x.values, alpha)
    )

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_smoothed] = np.where(mask, np.nan, price_data[col_name_smoothed])

    # Cálculo do target
    price_data[col_name_prediction] = price_data.groupby('Ticker')[col_name_smoothed].transform(
        lambda x: calculate_target(x, d)
    )
    price_data[col_name_prediction] = np.where(mask, np.nan, price_data[col_name_prediction])

    # Calculando o OBV usando a suavização exponencial
    obv_groups = price_data.groupby('Ticker').apply(obv, smoothed_col=col_name_smoothed)
    price_data[col_name_obv] = obv_groups.reset_index(level=0, drop=True)

    # Calculando o Price Rate of Change (ROC)
    price_data[col_name_price_rate_of_change] = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.pct_change(periods=e))
    price_data[col_name_price_rate_of_change] = np.where(mask, np.nan, price_data[col_name_price_rate_of_change])

    # Calcular low_14 e high_14 para o estocástico
    low_14 = price_data.groupby('Ticker')['Low'].transform(lambda x: x.rolling(window=n).min())
    high_14 = price_data.groupby('Ticker')['High'].transform(lambda x: x.rolling(window=n).max())

    # Adicionar low_14 e high_14 ao DataFrame
    price_data['low_14'] = low_14
    price_data['high_14'] = high_14

    # Calcular o %K para o estocástico
    price_data[col_name_k_percent] = 100 * ((price_data[col_name_smoothed] - low_14) / (high_14 - low_14))

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_k_percent] = np.where(mask, np.nan, price_data[col_name_k_percent])

    # Cálculo do MACD
    ema_26 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=26).mean())
    ema_12 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=12).mean())
    macd = ema_12 - ema_26

    # Cálculo da EMA do MACD
    ema_9_macd = macd.ewm(span=9).mean()

    # Adicionar MACD e MACD_EMA ao DataFrame
    price_data[col_name_macd] = macd
    price_data[col_name_macd_ema] = ema_9_macd

    # Cálculo do r_percent
    r_percent = ((high_14 - price_data[col_name_smoothed]) / (high_14 - low_14)) * (-100)
    price_data[col_name_r_percent] = r_percent

# Coluna de verificação (sem suavização, alpha = 1)
alpha = 1
price_data['Smoothed_Close_1'] = price_data.groupby('Ticker')['Close'].transform(
    lambda x: exponential_smoothing(x.values, alpha)
)
price_data['Smoothed_Close_1'] = np.where(mask, np.nan, price_data['Smoothed_Close_1'])

price_data['Verify'] = price_data.groupby('Ticker')['Smoothed_Close_1'].transform(
    lambda x: calculate_target(x, d)
)
price_data['Verify'] = np.where(mask, np.nan, price_data['Verify'])

# Cálculo do RSI
up_df = price_data[['Ticker', 'change_in_price']].copy()
down_df = price_data[['Ticker', 'change_in_price']].copy()

up_df['change_in_price'] = up_df['change_in_price'].where(up_df['change_in_price'] > 0, 0)
down_df['change_in_price'] = down_df['change_in_price'].where(down_df['change_in_price'] < 0, 0).abs()

ewma_up = up_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())
ewma_down = down_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())

relative_strength = ewma_up / ewma_down
price_data['RSI'] = 100.0 - (100.0 / (1.0 + relative_strength))

price_data['up_days'] = up_df['change_in_price']
price_data['down_days'] = down_df['change_in_price']

# Remover as linhas com NaN
price_data = price_data.dropna()

# Visualizar o DataFrame atualizado
price_data.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
11746,2024-12-23,54.669998,55.23,54.400002,54.849998,54.849998,18587900,VALE3.SA,0.23,56.671726,-1.0,2629051348,-0.033972,53.330002,60.189999,48.713205,-0.776551,-0.647933,-51.286795,55.597846,-1.0,2223286070,-0.050189,33.058965,-0.937323,-0.757695,-66.941035,55.086422,-1.0,348222262,-0.062138,25.603806,-1.034453,-0.801341,-74.396194,54.839529,1.0,730763466,-0.069891,22.004779,-1.103354,-0.837704,-77.995221,54.741087,1.0,658660226,-0.074958,20.56976,-1.148298,-0.866998,-79.43024,54.722023,1.0,399061538,-0.078361,20.291868,-1.176264,-0.889535,-79.708132,54.74279,1.0,590992242,-0.080604,20.594585,-1.19311,-0.906628,-79.405415,54.780328,1.0,-1011048142,-0.081926,21.141787,-1.202857,-0.919621,-78.858213,54.819666,1.0,-845336838,-0.082466,21.715229,-1.208161,-0.929585,-78.284771,54.849998,1.0,37.065454,0.23,0.0
11747,2024-12-26,54.849998,55.400002,54.400002,55.009998,55.009998,13155800,VALE3.SA,0.16,56.505553,-1.0,2615895548,-0.037093,53.330002,60.189999,46.29086,-0.818763,-0.682099,-53.70914,55.480276,-1.0,2210130270,-0.053182,31.345124,-0.989594,-0.804075,-68.654876,55.063495,-1.0,335066462,-0.063026,25.269591,-1.08372,-0.857817,-74.730409,54.907717,-1.0,743919266,-0.067973,22.99877,-1.141483,-0.89846,-77.00123,54.875543,-1.0,671816026,-0.070039,22.529758,-1.173487,-0.928296,-77.470242,54.894808,-1.0,412217338,-0.070428,22.810601,-1.189711,-0.94957,-77.189399,54.929836,-1.0,604148042,-0.069784,23.321205,-1.197007,-0.964704,-76.678795,54.964064,-1.0,-997892342,-0.068496,23.820162,-1.199513,-0.975599,-76.179838,54.990965,-1.0,-832181038,-0.066853,24.212304,-1.199625,-0.983593,-75.787696,55.009998,-1.0,38.934135,0.16,0.0
11748,2024-12-27,54.73,55.080002,54.459999,54.740002,54.740002,16635300,VALE3.SA,-0.269997,56.328998,-1.0,2599260248,-0.036864,53.330002,60.189999,43.717168,-0.856589,-0.716997,-56.282832,55.332221,-1.0,2193494970,-0.049589,29.186887,-1.03108,-0.849476,-70.813113,54.966447,-1.0,318431162,-0.054749,23.854897,-1.117711,-0.909796,-76.145103,54.840631,-1.0,727283966,-0.054982,22.020839,-1.163699,-0.951508,-77.979161,54.807772,-1.0,655180726,-0.052739,21.541851,-1.185256,-0.979688,-78.458149,54.801924,-1.0,395582038,-0.049363,21.456606,-1.194099,-0.998476,-78.543394,54.796952,-1.0,587512742,-0.045552,21.384122,-1.197019,-1.011167,-78.615878,54.784814,-1.0,-1014527642,-0.041707,21.207187,-1.197522,-1.019984,-78.792813,54.765098,-1.0,-848816338,-0.038062,20.919779,-1.197284,-1.026331,-79.080221,54.740002,-1.0,36.806222,0.0,0.269997
11749,2024-12-30,54.900002,55.189999,54.549999,54.549999,54.549999,11250900,VALE3.SA,-0.190002,56.151098,-1.0,2588009348,-0.035577,53.330002,60.189999,41.123874,-0.890655,-0.751729,-58.876126,55.175777,-1.0,2182244070,-0.044532,26.906355,-1.064313,-0.892443,-73.093645,54.841513,-1.0,307180262,-0.045622,22.033696,-1.141571,-0.956151,-77.966304,54.724378,-1.0,716033066,-0.042657,20.326194,-1.177118,-0.99663,-79.673806,54.678886,-1.0,643929826,-0.038353,19.663039,-1.19125,-1.022,-80.336961,54.650769,-1.0,384331138,-0.034012,19.253179,-1.195987,-1.037978,-80.746821,54.624085,-1.0,576261842,-0.03021,18.864195,-1.197177,-1.048369,-81.135805,54.596962,-1.0,-1025778542,-0.027171,18.468819,-1.1973,-1.055447,-81.531181,54.571509,-1.0,-860067238,-0.024938,18.097782,-1.197249,-1.060515,-81.902218,54.549999,-1.0,35.242227,0.0,0.190002
11750,2025-01-02,54.709999,55.099998,54.23,54.25,54.25,17623900,VALE3.SA,-0.299999,55.960988,-1.0,2570385448,-0.034877,53.330002,59.939999,39.803145,-0.92236,-0.785855,-60.196855,54.990622,-1.0,2164620170,-0.041373,25.122852,-1.092991,-0.932553,-74.877148,54.664059,-1.0,289556362,-0.04053,20.182416,-1.161412,-0.997203,-79.817584,54.534627,-1.0,698409166,-0.036996,18.224291,-1.189353,-1.035174,-81.775709,54.464443,-1.0,626305926,-0.033372,17.162505,-1.199478,-1.057496,-82.837495,54.410308,-1.0,366707238,-0.030603,16.343516,-1.203018,-1.070986,-83.656484,54.362226,-1.0,558637942,-0.028877,15.6161,-1.204547,-1.079605,-84.3839,54.319392,-1.0,-1043402442,-0.028073,14.968095,-1.205625,-1.085483,-85.031905,54.282151,-1.0,-877691138,-0.027963,14.404683,-1.20666,-1.089744,-85.595317,54.25,-1.0,32.709994,0.0,0.299999


#### Se quiser rodar para um Ticker.

In [16]:
price_data = price_data[price_data['Ticker'].isin(['ABEV3.SA'])]
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
13,2000-02-22,0.485255,0.485255,0.485255,0.485255,0.262759,75,ABEV3.SA,-0.029034,0.503659,1.0,15473,-0.012131,0.468107,0.527474,59.884943,-0.001126,-0.000787,-40.115057,0.499179,1.0,27757,-0.005664,52.339618,-0.001025,-0.000797,-47.660382,0.497965,1.0,27757,0.00234,50.294528,-0.000682,-0.000597,-49.705472,0.497261,1.0,40647,0.00736,49.107952,-0.000361,-0.000375,-50.892048,0.496289,1.0,40647,0.008701,47.470848,-0.000112,-0.000178,-52.529152,0.494839,1.0,28363,0.006764,45.029143,6.3e-05,-1.3e-05,-54.970857,0.492914,1.0,28363,0.002296,41.785331,0.000172,0.000122,-58.214669,0.490611,1.0,28363,-0.003869,37.90588,0.000223,0.000231,-62.09412,0.488041,1.0,29575,-0.011005,33.57772,0.000227,0.000318,-66.42228,0.485255,1.0,43.244009,0.0,0.029034
14,2000-02-23,0.494478,0.494478,0.494478,0.494478,0.267754,455,ABEV3.SA,0.009223,0.502741,1.0,15018,-0.01733,0.468107,0.527474,58.338476,-0.001247,-0.000883,-41.661524,0.498239,1.0,27302,-0.017499,50.755749,-0.001123,-0.000865,-49.244251,0.496919,1.0,27302,-0.017954,48.532252,-0.000785,-0.000637,-51.467748,0.496148,1.0,40192,-0.021726,47.232882,-0.000502,-0.000401,-52.767118,0.495384,1.0,40192,-0.028166,45.945562,-0.000307,-0.000205,-54.054438,0.494623,1.0,27908,-0.035993,44.663823,-0.000185,-4.9e-05,-55.336177,0.494009,1.0,28818,-0.04404,43.629792,-0.000115,7.3e-05,-56.370208,0.493705,1.0,28818,-0.051448,43.117397,-7.6e-05,0.000167,-56.882603,0.493834,1.0,30030,-0.057701,43.33602,-5.1e-05,0.000241,-56.66398,0.494478,1.0,47.299662,0.009223,0.0
15,2000-02-24,0.487885,0.487885,0.487885,0.487885,0.264184,5005,ABEV3.SA,-0.006593,0.501255,1.0,10013,-0.022635,0.468107,0.527474,55.836108,-0.001411,-0.000993,-44.163892,0.496168,1.0,22297,-0.028124,47.267558,-0.001306,-0.000956,-52.732442,0.494209,1.0,22297,-0.033722,43.967014,-0.001014,-0.000715,-56.032986,0.492843,1.0,35187,-0.041108,41.665646,-0.000798,-0.000483,-58.334354,0.491634,1.0,35187,-0.048989,39.630177,-0.000672,-0.000301,-60.369823,0.49058,1.0,22903,-0.05611,37.854405,-0.000609,-0.000165,-62.145595,0.489722,1.0,23813,-0.061754,36.409293,-0.000584,-6.4e-05,-63.590707,0.489049,1.0,23813,-0.0657,35.275314,-0.000577,1.3e-05,-64.724686,0.48848,1.0,25025,-0.068084,34.316915,-0.000578,7.1e-05,-65.683085,0.487885,1.0,44.667003,0.0,0.006593
16,2000-02-25,0.4747,0.4747,0.4747,0.4747,0.257044,3033,ABEV3.SA,-0.013185,0.4986,1.0,6980,-0.025578,0.468107,0.527474,51.363045,-0.001678,-0.001134,-48.636955,0.491875,1.0,19264,-0.032965,40.035143,-0.001689,-0.001107,-59.964857,0.488356,1.0,19264,-0.039332,34.108555,-0.001529,-0.000882,-65.891445,0.485586,1.0,32154,-0.045671,29.441581,-0.001452,-0.000683,-70.558419,0.483167,1.0,32154,-0.050811,25.36783,-0.001454,-0.000539,-74.63217,0.481052,1.0,19870,-0.054089,21.805052,-0.001502,-0.00044,-78.194948,0.479207,1.0,20780,-0.055488,18.696626,-0.00157,-0.000374,-81.303374,0.47757,1.0,20780,-0.055385,15.939449,-0.001646,-0.000329,-84.060551,0.476078,1.0,21992,-0.054294,13.426626,-0.001722,-0.000298,-86.573374,0.4747,1.0,39.583155,0.0,0.013185
17,2000-02-29,0.468107,0.468107,0.468107,0.468107,0.253474,11602,ABEV3.SA,-0.006593,0.49555,1.0,-4622,-0.023218,0.468107,0.527474,46.226741,-0.00205,-0.001321,-53.773259,0.487121,1.0,7662,-0.026799,32.028114,-0.002254,-0.001342,-67.971886,0.482281,1.0,7662,-0.028202,23.875988,-0.00228,-0.001168,-76.124012,0.478594,1.0,20552,-0.028309,17.664949,-0.002366,-0.001027,-82.335051,0.475637,1.0,20552,-0.02647,12.683915,-0.002499,-0.00094,-87.316085,0.473285,1.0,8268,-0.02272,8.722021,-0.002644,-0.000891,-91.277979,0.471437,1.0,9178,-0.0176,5.608988,-0.002784,-0.000867,-94.391012,0.47,1.0,9178,-0.011775,3.18789,-0.002909,-0.000857,-96.81211,0.468904,1.0,10390,-0.005795,1.342663,-0.003018,-0.000854,-98.657337,0.468107,1.0,37.143972,0.0,0.006593


#### Se quiser normal só pular etapa anterior.

In [29]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Para cada alpha, criar as variáveis X_Cols e Y_Cols e treinar o modelo
for alpha in alpha_list:
    # Definindo as colunas de X e Y dinamicamente
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']]
    
    Y_Cols = price_data[f'Prediction_{alpha}']

    # Dividir os dados em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state=0)

    # Criar o classificador Random Forest
    rand_frst_clf = RandomForestClassifier(n_estimators=100, oob_score=True, criterion="gini", random_state=0)

    # Ajustar o modelo
    rand_frst_clf.fit(X_train, y_train)

    # Fazer previsões
    y_pred = rand_frst_clf.predict(X_test)

    # Imprimir a acurácia
    accuracy = accuracy_score(y_test, y_pred, normalize=True) * 100.0
    print(f'Alpha: {alpha}')
    print(f'Correct Prediction (%): {accuracy:.2f}')
    print('---')


Alpha: 0.1
Correct Prediction (%): 85.96
---
Alpha: 0.2
Correct Prediction (%): 78.45
---
Alpha: 0.3
Correct Prediction (%): 72.53
---
Alpha: 0.4
Correct Prediction (%): 68.02
---
Alpha: 0.5
Correct Prediction (%): 62.58
---
Alpha: 0.6
Correct Prediction (%): 58.40
---
Alpha: 0.7
Correct Prediction (%): 53.38
---
Alpha: 0.8
Correct Prediction (%): 51.52
---
Alpha: 0.9
Correct Prediction (%): 51.04
---


In [30]:
import warnings

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}'
    price_data[f'Prediction_{alpha}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    price_data['Match'] = (price_data[f'Prediction_{alpha}'] == price_data['Verify']).astype(int)  
    accuracy = price_data['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo: {accuracy:.2%}')



Precisão para Alpha: 0.1
Precisão do modelo: 51.14%

Precisão para Alpha: 0.2
Precisão do modelo: 51.53%

Precisão para Alpha: 0.3
Precisão do modelo: 54.05%

Precisão para Alpha: 0.4
Precisão do modelo: 54.70%

Precisão para Alpha: 0.5
Precisão do modelo: 58.12%

Precisão para Alpha: 0.6
Precisão do modelo: 62.18%

Precisão para Alpha: 0.7
Precisão do modelo: 66.94%

Precisão para Alpha: 0.8
Precisão do modelo: 73.72%

Precisão para Alpha: 0.9
Precisão do modelo: 84.62%


In [None]:
import warnings
from datetime import timedelta

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Filtrar últimos 30 dias
last_30_days = price_data[price_data['Date'] >= price_data['Date'].max() - timedelta(days=30)]

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = last_30_days[['RSI',
                           f'k_percent_{alpha}',
                           f'r_percent_{alpha}',
                           f'Price_Rate_Of_Change_{alpha}',
                           f'MACD_{alpha}',
                           f'MACD_EMA_{alpha}',
                           f'OBV_{alpha}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}' para os últimos 30 dias
    last_30_days[f'Prediction_{alpha}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    last_30_days['Match'] = (last_30_days[f'Prediction_{alpha}'] == last_30_days['Verify']).astype(int)  # 1 para igual, 0 para diferente
    accuracy = last_30_days['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo nos últimos 30 dias: {accuracy:.2%}')


#### Salvar modelo 

In [8]:
# Salvar o modelo treinado
joblib.dump(rand_frst_clf, 'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl')

['C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl']

#### Time Series. Não funcionou.

In [None]:
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestClassifier

# Número de divisões no Time Series Split
n_splits = 5

# Resultados gerais
overall_results = []

for alpha in alpha_list:
    print(f"\n=== Alpha: {alpha} ===")
    
    # Preparando as colunas de treino
    X_Cols = price_data[[f'RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']]
    Y_Cols = price_data[f'Prediction_{alpha}']
    verify = price_data['Verify']
    
    # Criando o modelo Random Forest
    rand_frst_clf = RandomForestClassifier(n_estimators=100, oob_score=True, criterion="gini", random_state=0)
    
    # Time Series Split
    tscv = TimeSeriesSplit(n_splits=n_splits)
    
    # Inicializando variáveis para armazenar resultados
    fold = 1
    accuracy_scores_ytest = []
    accuracy_scores_verify = []
    predictions = []
    verify_values = []
    actual_y_test_values = []
    report_dict = {}
    
    for train_index, test_index in tscv.split(X_Cols):
        print(f"  Fold {fold}:")
        
        # Dividindo os dados em treino e teste para a divisão atual
        X_train, X_test = X_Cols.iloc[train_index], X_Cols.iloc[test_index]
        y_train, y_test = Y_Cols.iloc[train_index], Y_Cols.iloc[test_index]
        y_verify = verify.iloc[test_index]
        
        # Treinando o modelo
        rand_frst_clf.fit(X_train, y_train)
        
        # Fazendo predições
        y_pred = rand_frst_clf.predict(X_test)
        
        # Armazenando predições e valores reais
        predictions.extend(y_pred)
        verify_values.extend(y_verify.values)
        actual_y_test_values.extend(y_test.values)
        
        # Calculando acurácias
        accuracy_ytest = accuracy_score(y_test, y_pred)  # Comparação com Y_Cols (Prediction)
        accuracy_verify = accuracy_score(y_verify, y_pred)  # Comparação com Verify
        accuracy_scores_ytest.append(accuracy_ytest)
        accuracy_scores_verify.append(accuracy_verify)
        
        # Exibindo acurácias para o fold atual
        print(f"    Accuracy with Y_test (Prediction): {accuracy_ytest:.2f}")
        print(f"    Accuracy with Verify: {accuracy_verify:.2f}")
        
        # Gerando o relatório de classificação
        target_names = ['Down Day', 'Up Day']  # Ajuste se necessário
        report = classification_report(y_true=y_test, y_pred=y_pred, target_names=target_names, output_dict=True)
        
        # Convertendo o relatório em DataFrame
        report_flat = pd.DataFrame(report).transpose()
        report_flat['accuracy_ytest'] = accuracy_ytest
        report_flat['accuracy_verify'] = accuracy_verify
        
        # Armazenando o relatório no dicionário
        report_dict[fold] = report_flat
        fold += 1
    
    # Concatenando os relatórios de todas as divisões
    final_report_df = pd.concat(report_dict.values(), keys=report_dict.keys())
    
    # Calculando as médias de acurácia
    overall_accuracy_ytest = sum(accuracy_scores_ytest) / len(accuracy_scores_ytest)
    overall_accuracy_verify = sum(accuracy_scores_verify) / len(accuracy_scores_verify)
    
    # Exibindo as médias de acurácia
    print("\n  Médias das Acurácias:")
    print(f"    Média da acurácia com Y_test (Prediction): {overall_accuracy_ytest:.2f}")
    print(f"    Média da acurácia com Verify: {overall_accuracy_verify:.2f}")
    
    # Armazenando resultados gerais
    overall_results.append({
        'alpha': alpha,
        'accuracy_ytest_mean': overall_accuracy_ytest,
        'accuracy_verify_mean': overall_accuracy_verify,
        'final_report': final_report_df
    })

# Exibindo resumo final
print("\n=== Resumo Final ===")
for result in overall_results:
    print(f"Alpha: {result['alpha']}")
    print(f"  Média da acurácia com Y_test (Prediction): {result['accuracy_ytest_mean']:.2f}")
    print(f"  Média da acurácia com Verify: {result['accuracy_verify_mean']:.2f}")
    print("  ---")
