In [2]:
import pandas as pd
import numpy as np

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import accuracy_score, classification_report
pd.set_option('display.max_columns',None)
import joblib

In [8]:
price_data = pd.read_parquet(f'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Dados_bolsa_interpolar.parquet')
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.273909,985,ABEV3.SA
1,2000-01-06,0.494478,0.494478,0.494478,0.494478,0.260024,227,ABEV3.SA
2,2000-01-12,0.481293,0.481293,0.481293,0.481293,0.25309,1137,ABEV3.SA
3,2000-01-13,0.484589,0.484589,0.484589,0.484589,0.254824,606,ABEV3.SA
4,2000-01-14,0.494478,0.494478,0.494478,0.494478,0.260024,6445,ABEV3.SA


In [9]:
import numpy as np
import pandas as pd

# Transformar e ordenar os dados
price_data['Date'] = pd.to_datetime(price_data['Date'])
price_data.sort_values(by=['Ticker', 'Date'], inplace=True)

# Criar a coluna de mudança de preço
price_data['change_in_price'] = price_data['Close'].diff()

# Máscara para identificar a virada entre os tickers
mask = price_data['Ticker'] != price_data['Ticker'].shift(1)

# Função para suavização exponencial
def exponential_smoothing(data, alpha):
    smoothed = np.zeros(len(data))
    smoothed[0] = data[0]
    for t in range(1, len(data)):
        smoothed[t] = alpha * data[t] + (1 - alpha) * smoothed[t-1]
    return smoothed

# Função para calcular o target
def calculate_target(data, d):
    target = np.sign(data.shift(-d) - data)
    target[target == 0] = -1
    return target

# Função para calcular o OBV
def obv(group, smoothed_col):
    Volume = group['Volume']
    change = group[smoothed_col].diff()

    prev_obv = 0
    obv_values = []

    for i, j in zip(change, Volume):
        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv

        prev_obv = current_obv
        obv_values.append(current_obv)

    return pd.Series(obv_values, index=group.index)

# Lista de valores alpha
alpha_list = [0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00]

# Período para calcular o target, RSI e estocástico
d = 30
n = 14
e = 9

# Loop para aplicar a suavização exponencial, calcular o target, o RSI e o estocástico
for alpha in alpha_list:
    col_name_smoothed = f"Smoothed_Close_{alpha}"
    col_name_prediction = f"Prediction_{alpha}"
    col_name_k_percent = f"k_percent_{alpha}"
    col_name_macd = f"MACD_{alpha}"
    col_name_macd_ema = f"MACD_EMA_{alpha}"
    col_name_price_rate_of_change = f"Price_Rate_Of_Change_{alpha}"
    col_name_obv = f"OBV_{alpha}"
    col_name_r_percent = f"r_percent_{alpha}"

    # Suavização exponencial
    price_data[col_name_smoothed] = price_data.groupby('Ticker')['Close'].transform(
        lambda x: exponential_smoothing(x.values, alpha)
    )

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_smoothed] = np.where(mask, np.nan, price_data[col_name_smoothed])

    # Cálculo do target
    price_data[col_name_prediction] = price_data.groupby('Ticker')[col_name_smoothed].transform(
        lambda x: calculate_target(x, d)
    )
    price_data[col_name_prediction] = np.where(mask, np.nan, price_data[col_name_prediction])

    # Calculando o OBV usando a suavização exponencial
    obv_groups = price_data.groupby('Ticker').apply(obv, smoothed_col=col_name_smoothed)
    price_data[col_name_obv] = obv_groups.reset_index(level=0, drop=True)

    # Calculando o Price Rate of Change (ROC)
    price_data[col_name_price_rate_of_change] = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.pct_change(periods=e))
    price_data[col_name_price_rate_of_change] = np.where(mask, np.nan, price_data[col_name_price_rate_of_change])

    # Calcular low_14 e high_14 para o estocástico
    low_14 = price_data.groupby('Ticker')['Low'].transform(lambda x: x.rolling(window=n).min())
    high_14 = price_data.groupby('Ticker')['High'].transform(lambda x: x.rolling(window=n).max())

    # Adicionar low_14 e high_14 ao DataFrame
    price_data['low_14'] = low_14
    price_data['high_14'] = high_14

    # Calcular o %K para o estocástico
    price_data[col_name_k_percent] = 100 * ((price_data[col_name_smoothed] - low_14) / (high_14 - low_14))

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_k_percent] = np.where(mask, np.nan, price_data[col_name_k_percent])

    # Cálculo do MACD
    ema_26 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=26).mean())
    ema_12 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=12).mean())
    macd = ema_12 - ema_26

    # Cálculo da EMA do MACD
    ema_9_macd = macd.ewm(span=9).mean()

    # Adicionar MACD e MACD_EMA ao DataFrame
    price_data[col_name_macd] = macd
    price_data[col_name_macd_ema] = ema_9_macd

    # Cálculo do r_percent
    r_percent = ((high_14 - price_data[col_name_smoothed]) / (high_14 - low_14)) * (-100)
    price_data[col_name_r_percent] = r_percent

# Coluna de verificação (sem suavização, alpha = 1)
alpha = 1
price_data['Smoothed_Close_1'] = price_data.groupby('Ticker')['Close'].transform(
    lambda x: exponential_smoothing(x.values, alpha)
)
price_data['Smoothed_Close_1'] = np.where(mask, np.nan, price_data['Smoothed_Close_1'])

price_data['Verify'] = price_data.groupby('Ticker')['Smoothed_Close_1'].transform(
    lambda x: calculate_target(x, d)
)
price_data['Verify'] = np.where(mask, np.nan, price_data['Verify'])

# Cálculo do RSI
up_df = price_data[['Ticker', 'change_in_price']].copy()
down_df = price_data[['Ticker', 'change_in_price']].copy()

up_df['change_in_price'] = up_df['change_in_price'].where(up_df['change_in_price'] > 0, 0)
down_df['change_in_price'] = down_df['change_in_price'].where(down_df['change_in_price'] < 0, 0).abs()

ewma_up = up_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())
ewma_down = down_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())

relative_strength = ewma_up / ewma_down
price_data['RSI'] = 100.0 - (100.0 / (1.0 + relative_strength))

price_data['up_days'] = up_df['change_in_price']
price_data['down_days'] = down_df['change_in_price']

# Remover as linhas com NaN
price_data = price_data.dropna()

# Visualizar o DataFrame atualizado
price_data.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.93,Prediction_0.93,OBV_0.93,Price_Rate_Of_Change_0.93,low_14,high_14,k_percent_0.93,MACD_0.93,MACD_EMA_0.93,r_percent_0.93,Smoothed_Close_0.94,Prediction_0.94,OBV_0.94,Price_Rate_Of_Change_0.94,k_percent_0.94,MACD_0.94,MACD_EMA_0.94,r_percent_0.94,Smoothed_Close_0.95,Prediction_0.95,OBV_0.95,Price_Rate_Of_Change_0.95,k_percent_0.95,MACD_0.95,MACD_EMA_0.95,r_percent_0.95,Smoothed_Close_0.96,Prediction_0.96,OBV_0.96,Price_Rate_Of_Change_0.96,k_percent_0.96,MACD_0.96,MACD_EMA_0.96,r_percent_0.96,Smoothed_Close_0.97,Prediction_0.97,OBV_0.97,Price_Rate_Of_Change_0.97,k_percent_0.97,MACD_0.97,MACD_EMA_0.97,r_percent_0.97,Smoothed_Close_0.98,Prediction_0.98,OBV_0.98,Price_Rate_Of_Change_0.98,k_percent_0.98,MACD_0.98,MACD_EMA_0.98,r_percent_0.98,Smoothed_Close_0.99,Prediction_0.99,OBV_0.99,Price_Rate_Of_Change_0.99,k_percent_0.99,MACD_0.99,MACD_EMA_0.99,r_percent_0.99,Smoothed_Close_1.0,Prediction_1.0,OBV_1.0,Price_Rate_Of_Change_1.0,k_percent_1.0,MACD_1.0,MACD_EMA_1.0,r_percent_1.0,Smoothed_Close_1,Verify,RSI,up_days,down_days
11711,2024-11-05,62.950001,63.110001,61.959999,62.119999,61.570454,16368800,VALE3.SA,-0.549999,62.155383,-1.0,-1004078138,0.045966,59.150002,63.439999,70.055568,0.437742,0.384501,-29.944432,62.150707,-1.0,-1060971338,0.046073,69.946558,0.437688,0.384656,-30.053442,62.145905,-1.0,-894599174,0.046178,69.834633,0.437625,0.384807,-30.165367,62.140978,-1.0,-802961846,0.04628,69.719776,0.437552,0.384954,-30.280224,62.135924,-1.0,-881815046,0.046381,69.601971,0.43747,0.385097,-30.398029,62.130743,-1.0,-728272046,0.04648,69.481205,0.437378,0.385236,-30.518795,62.125435,-1.0,-855556446,0.046577,69.35747,0.437277,0.385371,-30.64253,62.119999,-1.0,-974928446,0.046672,69.230756,0.437166,0.385502,-30.769244,62.119999,-1.0,53.903458,0.0,0.549999
11712,2024-11-06,61.299999,61.630001,60.740002,61.419998,60.876644,19726800,VALE3.SA,-0.700001,61.471475,-1.0,-1023804938,0.030006,59.150002,63.439999,54.11364,0.372202,0.382041,-45.88636,61.463841,-1.0,-1080698138,0.029842,53.935681,0.371494,0.382024,-46.064319,61.456294,-1.0,-914325974,0.029675,53.759757,0.370787,0.382003,-46.240243,61.448837,-1.0,-822688646,0.029506,53.585953,0.37008,0.38198,-46.414047,61.441476,-1.0,-901541846,0.029335,53.414358,0.369375,0.381953,-46.585642,61.434213,-1.0,-747998846,0.029162,53.24506,0.368671,0.381923,-46.75494,61.427053,-1.0,-875283246,0.028987,53.078148,0.367969,0.381891,-46.921852,61.419998,-1.0,-994655246,0.028811,52.91371,0.367269,0.381856,-47.08629,61.419998,-1.0,45.492083,0.0,0.700001
11713,2024-11-07,62.02,63.990002,62.0,63.560001,62.997715,38173300,VALE3.SA,2.140003,63.413805,-1.0,-985631638,0.02967,59.150002,63.990002,88.095101,0.471555,0.399944,-11.904899,63.434232,-1.0,-1042524838,0.029657,88.51715,0.472581,0.400135,-11.48285,63.454816,-1.0,-876152674,0.029647,88.942445,0.473621,0.400327,-11.057555,63.475555,-1.0,-784515346,0.02964,89.370933,0.474675,0.400519,-10.629067,63.496446,-1.0,-863368546,0.029636,89.802561,0.475743,0.400711,-10.197439,63.517486,-1.0,-709825546,0.029636,90.237272,0.476826,0.400904,-9.762728,63.538672,-1.0,-837109946,0.029639,90.675005,0.477923,0.401097,-9.324995,63.560001,-1.0,-956481946,0.029645,91.115696,0.479033,0.401291,-8.884304,63.560001,-1.0,64.843702,2.140003,0.0
11714,2024-11-08,61.990002,62.049999,59.700001,60.630001,60.093636,48764700,VALE3.SA,-2.93,60.824867,-1.0,-1034396338,-0.031289,59.150002,63.990002,34.604664,0.337497,0.387454,-65.395336,60.798255,-1.0,-1091289538,-0.031931,34.054821,0.336117,0.387332,-65.945179,60.771242,-1.0,-924917374,-0.032573,33.496699,0.334717,0.387205,-66.503301,60.743823,-1.0,-833280046,-0.033215,32.930199,0.333297,0.387074,-67.069801,60.715994,-1.0,-912133246,-0.033857,32.355224,0.331857,0.38694,-67.644776,60.687751,-1.0,-758590246,-0.034498,31.771677,0.330395,0.386802,-68.228323,60.659088,-1.0,-885874646,-0.03514,31.179467,0.328912,0.38666,-68.820533,60.630001,-1.0,-1005246646,-0.035782,30.578502,0.327407,0.386514,-69.421498,60.630001,-1.0,41.543422,0.0,2.93
11715,2024-11-11,59.75,59.869999,58.650002,58.650002,58.131153,24483700,VALE3.SA,-1.98,58.802242,-1.0,-1058880038,-0.061702,58.650002,63.990002,2.850948,0.067271,0.323418,-97.149052,58.778897,-1.0,-1115773238,-0.062068,2.413768,0.064282,0.322722,-97.586232,58.756064,-1.0,-949401074,-0.062421,1.98618,0.061321,0.322028,-98.01382,58.733754,-1.0,-857763746,-0.062762,1.568406,0.058386,0.321337,-98.431594,58.711981,-1.0,-936616946,-0.063089,1.16067,0.055479,0.320648,-98.83933,58.690757,-1.0,-783073946,-0.063404,0.763202,0.052601,0.319962,-99.236798,58.670092,-1.0,-910358346,-0.063707,0.376233,0.049751,0.319278,-99.623767,58.650002,-1.0,-1029730346,-0.063996,0.0,0.046931,0.318598,-100.0,58.650002,-1.0,32.45121,0.0,1.98


#### Se quiser rodar para um Ticker.

In [10]:
price_data = price_data[price_data['Ticker'].isin(['ABEV3.SA'])]
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.93,Prediction_0.93,OBV_0.93,Price_Rate_Of_Change_0.93,low_14,high_14,k_percent_0.93,MACD_0.93,MACD_EMA_0.93,r_percent_0.93,Smoothed_Close_0.94,Prediction_0.94,OBV_0.94,Price_Rate_Of_Change_0.94,k_percent_0.94,MACD_0.94,MACD_EMA_0.94,r_percent_0.94,Smoothed_Close_0.95,Prediction_0.95,OBV_0.95,Price_Rate_Of_Change_0.95,k_percent_0.95,MACD_0.95,MACD_EMA_0.95,r_percent_0.95,Smoothed_Close_0.96,Prediction_0.96,OBV_0.96,Price_Rate_Of_Change_0.96,k_percent_0.96,MACD_0.96,MACD_EMA_0.96,r_percent_0.96,Smoothed_Close_0.97,Prediction_0.97,OBV_0.97,Price_Rate_Of_Change_0.97,k_percent_0.97,MACD_0.97,MACD_EMA_0.97,r_percent_0.97,Smoothed_Close_0.98,Prediction_0.98,OBV_0.98,Price_Rate_Of_Change_0.98,k_percent_0.98,MACD_0.98,MACD_EMA_0.98,r_percent_0.98,Smoothed_Close_0.99,Prediction_0.99,OBV_0.99,Price_Rate_Of_Change_0.99,k_percent_0.99,MACD_0.99,MACD_EMA_0.99,r_percent_0.99,Smoothed_Close_1.0,Prediction_1.0,OBV_1.0,Price_Rate_Of_Change_1.0,k_percent_1.0,MACD_1.0,MACD_EMA_1.0,r_percent_1.0,Smoothed_Close_1,Verify,RSI,up_days,down_days
13,2000-02-22,0.485255,0.485255,0.485255,0.485255,0.255174,75,ABEV3.SA,-0.029034,0.487228,1.0,26087,-0.013258,0.468107,0.527474,32.208622,0.00022,0.00034,-67.791378,0.486953,1.0,26087,-0.014017,31.745182,0.000217,0.000347,-68.254818,0.486676,1.0,26087,-0.01478,31.278115,0.000214,0.000354,-68.721885,0.486396,1.0,26087,-0.015547,30.807338,0.000211,0.000361,-69.192662,0.486115,1.0,26087,-0.016318,30.332747,0.000207,0.000367,-69.667253,0.485831,1.0,26087,-0.017092,29.854217,0.000203,0.000374,-70.145783,0.485544,1.0,26087,-0.01787,29.371599,0.000199,0.00038,-70.628401,0.485255,1.0,30637,-0.018652,28.884728,0.000194,0.000386,-71.115272,0.485255,1.0,43.244009,0.0,0.029034
14,2000-02-23,0.494478,0.494478,0.494478,0.494478,0.260024,455,ABEV3.SA,0.009223,0.493971,1.0,26542,-0.05931,0.468107,0.527474,43.56546,-4.4e-05,0.00026,-56.43454,0.494026,1.0,26542,-0.059818,43.65977,-4.2e-05,0.000266,-56.34023,0.494088,1.0,26542,-0.060311,43.763168,-3.9e-05,0.000272,-56.236832,0.494155,1.0,26542,-0.060789,43.875758,-3.7e-05,0.000278,-56.124242,0.494227,1.0,26542,-0.061253,43.99765,-3.4e-05,0.000283,-56.00235,0.494305,1.0,26542,-0.061702,44.128955,-3.1e-05,0.000289,-55.871045,0.494389,1.0,26542,-0.062136,44.269789,-2.9e-05,0.000294,-55.730211,0.494478,1.0,31092,-0.062555,44.420276,-2.6e-05,0.0003,-55.579724,0.494478,1.0,47.299662,0.009223,0.0
15,2000-02-24,0.487885,0.487885,0.487885,0.487885,0.256557,5005,ABEV3.SA,-0.006593,0.488311,1.0,21537,-0.068542,0.468107,0.527474,34.032339,-0.000579,8.6e-05,-65.967661,0.488253,1.0,21537,-0.068671,33.935491,-0.000579,9.1e-05,-66.064509,0.488195,1.0,21537,-0.06879,33.837211,-0.00058,9.5e-05,-66.162789,0.488136,1.0,21537,-0.068899,33.737231,-0.00058,0.0001,-66.262769,0.488075,1.0,21537,-0.068997,33.635278,-0.000581,0.000104,-66.364722,0.488013,1.0,21537,-0.069087,33.531076,-0.000581,0.000109,-66.468924,0.48795,1.0,21537,-0.069167,33.424343,-0.000582,0.000113,-66.575657,0.487885,1.0,26087,-0.069238,33.314793,-0.000583,0.000117,-66.685207,0.487885,1.0,44.667003,0.0,0.006593
16,2000-02-25,0.4747,0.4747,0.4747,0.4747,0.249623,3033,ABEV3.SA,-0.013185,0.475653,1.0,18504,-0.053849,0.468107,0.527474,12.710363,-0.001745,-0.000291,-87.289637,0.475513,1.0,18504,-0.053692,12.475284,-0.001752,-0.000289,-87.524716,0.475375,1.0,18504,-0.053532,12.24207,-0.00176,-0.000287,-87.75793,0.475237,1.0,18504,-0.053369,12.010753,-0.001767,-0.000284,-87.989247,0.475101,1.0,18504,-0.053203,11.781377,-0.001775,-0.000283,-88.218623,0.474966,1.0,18504,-0.053034,11.553995,-0.001782,-0.000281,-88.446005,0.474833,1.0,18504,-0.052864,11.328672,-0.00179,-0.000279,-88.671328,0.4747,1.0,23054,-0.052692,11.105483,-0.001797,-0.000277,-88.894517,0.4747,1.0,39.583155,0.0,0.013185
17,2000-02-29,0.468107,0.468107,0.468107,0.468107,0.246157,11602,ABEV3.SA,-0.006593,0.468635,1.0,6902,-0.004027,0.468107,0.527474,0.889725,-0.003047,-0.000855,-99.110275,0.468551,1.0,6902,-0.003443,0.748517,-0.003056,-0.000855,-99.251483,0.46847,1.0,6902,-0.002861,0.612103,-0.003066,-0.000855,-99.387897,0.468392,1.0,6902,-0.002283,0.48043,-0.003075,-0.000855,-99.51957,0.468317,1.0,6902,-0.001707,0.353441,-0.003084,-0.000856,-99.646559,0.468244,1.0,6902,-0.001135,0.23108,-0.003092,-0.000856,-99.76892,0.468174,1.0,6902,-0.000566,0.113287,-0.003101,-0.000856,-99.886713,0.468107,1.0,11452,0.0,0.0,-0.00311,-0.000857,-100.0,0.468107,1.0,37.143972,0.0,0.006593


#### Se quiser normal só pular etapa anterior.

In [11]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Para cada alpha, criar as variáveis X_Cols e Y_Cols e treinar o modelo
for alpha in alpha_list:
    # Definindo as colunas de X e Y dinamicamente
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']]
    
    Y_Cols = price_data[f'Prediction_{alpha}']

    # Dividir os dados em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state=0)

    # Criar o classificador Random Forest
    rand_frst_clf = RandomForestClassifier(n_estimators=100, oob_score=True, criterion="gini", random_state=0)

    # Ajustar o modelo
    rand_frst_clf.fit(X_train, y_train)

    # Fazer previsões
    y_pred = rand_frst_clf.predict(X_test)

    # Imprimir a acurácia
    accuracy = accuracy_score(y_test, y_pred, normalize=True) * 100.0
    print(f'Alpha: {alpha}')
    print(f'Correct Prediction (%): {accuracy:.2f}')
    print('---')


Alpha: 0.93
Correct Prediction (%): 76.95
---
Alpha: 0.94
Correct Prediction (%): 75.83
---
Alpha: 0.95
Correct Prediction (%): 75.05
---
Alpha: 0.96
Correct Prediction (%): 74.98
---
Alpha: 0.97
Correct Prediction (%): 73.72
---
Alpha: 0.98
Correct Prediction (%): 74.77
---
Alpha: 0.99
Correct Prediction (%): 74.28
---
Alpha: 1.0
Correct Prediction (%): 75.19
---


In [12]:
import warnings

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}'
    price_data[f'Prediction_{alpha}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    price_data['Match'] = (price_data[f'Prediction_{alpha}'] == price_data['Verify']).astype(int)  
    accuracy = price_data['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo: {accuracy:.2%}')



Precisão para Alpha: 0.93
Precisão do modelo: 85.73%

Precisão para Alpha: 0.94
Precisão do modelo: 86.17%

Precisão para Alpha: 0.95
Precisão do modelo: 85.94%

Precisão para Alpha: 0.96
Precisão do modelo: 87.12%

Precisão para Alpha: 0.97
Precisão do modelo: 87.92%

Precisão para Alpha: 0.98
Precisão do modelo: 87.57%

Precisão para Alpha: 0.99
Precisão do modelo: 88.36%

Precisão para Alpha: 1.0
Precisão do modelo: 93.80%


In [13]:
import warnings
from datetime import timedelta

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Filtrar últimos 30 dias
last_30_days = price_data[price_data['Date'] >= price_data['Date'].max() - timedelta(days=30)]

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = last_30_days[['RSI',
                           f'k_percent_{alpha}',
                           f'r_percent_{alpha}',
                           f'Price_Rate_Of_Change_{alpha}',
                           f'MACD_{alpha}',
                           f'MACD_EMA_{alpha}',
                           f'OBV_{alpha}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}' para os últimos 30 dias
    last_30_days[f'Prediction_{alpha}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    last_30_days['Match'] = (last_30_days[f'Prediction_{alpha}'] == last_30_days['Verify']).astype(int)  # 1 para igual, 0 para diferente
    accuracy = last_30_days['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo nos últimos 30 dias: {accuracy:.2%}')



Precisão para Alpha: 0.93
Precisão do modelo nos últimos 30 dias: 77.78%

Precisão para Alpha: 0.94
Precisão do modelo nos últimos 30 dias: 77.78%

Precisão para Alpha: 0.95
Precisão do modelo nos últimos 30 dias: 88.89%

Precisão para Alpha: 0.96
Precisão do modelo nos últimos 30 dias: 77.78%

Precisão para Alpha: 0.97
Precisão do modelo nos últimos 30 dias: 77.78%

Precisão para Alpha: 0.98
Precisão do modelo nos últimos 30 dias: 77.78%

Precisão para Alpha: 0.99
Precisão do modelo nos últimos 30 dias: 77.78%

Precisão para Alpha: 1.0
Precisão do modelo nos últimos 30 dias: 88.89%


#### Salvar modelo 

In [14]:
# Salvar o modelo treinado
joblib.dump(rand_frst_clf, 'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model_Ambev.pkl')

['C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model_Ambev.pkl']