In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import accuracy_score, classification_report
pd.set_option('display.max_columns',None)
import joblib

In [2]:
price_data = pd.read_parquet(f'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Dados_bolsa_interpolar.parquet')
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.290425,985,ABEV3.SA
1,2000-01-06,0.494478,0.494478,0.494478,0.494478,0.275703,227,ABEV3.SA
2,2000-01-12,0.481293,0.481293,0.481293,0.481293,0.268352,1137,ABEV3.SA
3,2000-01-13,0.484589,0.484589,0.484589,0.484589,0.270189,606,ABEV3.SA
4,2000-01-14,0.494478,0.494478,0.494478,0.494478,0.275703,6445,ABEV3.SA


In [3]:
import numpy as np
import pandas as pd

# Configuração inicial
price_data['Date'] = pd.to_datetime(price_data['Date'])
price_data.sort_values(by=['Ticker', 'Date'], inplace=True)

# Cálculo da mudança no preço
price_data['change_in_price'] = price_data['Close'].diff()
mask = price_data['Ticker'] != price_data['Ticker'].shift(1)
price_data['change_in_price'] = np.where(mask, np.nan, price_data['change_in_price'])

# Função de suavização exponencial
def exponential_smoothing(data, alpha):
    smoothed = np.zeros(len(data))
    smoothed[0] = data[0]
    for t in range(1, len(data)):
        smoothed[t] = alpha * data[t] + (1 - alpha) * smoothed[t-1]
    return smoothed

# Função para calcular o target
def calculate_target(data, d):
    return np.sign(data.shift(-d) - data)

# Função para calcular o target e ajustar valores zero para -1
def calculate_target_verify(data, d):
    target = np.sign(data.shift(-d) - data)
    target[target == 0] = -1
    return target

# Função OBV ajustada
def obv(group, smoothed_col):
    Volume = group['Volume']
    change = group[smoothed_col].diff()
    prev_obv = 0
    obv_values = []
    for i, j in zip(change, Volume):
        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv
        prev_obv = current_obv
        obv_values.append(current_obv)
    return pd.Series(obv_values, index=group.index)

# Lista de alphas e parâmetros gerais
alpha_list = [0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90]
n = 14
d = 10
e = 9

# Loop para cálculos por alpha
for alpha in alpha_list:
    smoothed_col = f'Smoothed_Close_{alpha:.2f}'
    target_col = f'Prediction_{alpha:.2f}'
    obv_col = f'OBV_{alpha:.2f}'
    price_rate_col = f'Price_Rate_Of_Change_{alpha:.2f}'
    macd_col = f'MACD_{alpha:.2f}'
    macd_ema_col = f'MACD_EMA_{alpha:.2f}'
    low_col = f'low_14_{alpha:.2f}'
    high_col = f'high_14_{alpha:.2f}'
    k_percent_col = f'k_percent_{alpha:.2f}'
    r_percent_col = f'r_percent_{alpha:.2f}'
    
    # Suavização exponencial
    price_data[smoothed_col] = price_data.groupby('Ticker')['Close'].transform(
        lambda x: exponential_smoothing(x.values, alpha)
    )
    
    # Target
    price_data[target_col] = price_data.groupby('Ticker')[smoothed_col].transform(
        lambda x: calculate_target(x, d)
    )
    
    # OBV
    obv_groups = price_data.groupby('Ticker').apply(lambda group: obv(group, smoothed_col))
    price_data[obv_col] = obv_groups.reset_index(level=0, drop=True)
    
    # Price Rate of Change
    price_data[price_rate_col] = price_data.groupby('Ticker')[smoothed_col].transform(
        lambda x: x.pct_change(periods=e)
    )
    
    # RSI
    up_df, down_df = price_data[['Ticker', 'change_in_price']].copy(), price_data[['Ticker', 'change_in_price']].copy()
    up_df['change_in_price'] = up_df['change_in_price'].clip(lower=0)
    down_df['change_in_price'] = down_df['change_in_price'].clip(upper=0).abs()
    ewma_up = up_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())
    ewma_down = down_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())
    relative_strength = ewma_up / ewma_down
    price_data['RSI'] = 100.0 - (100.0 / (1.0 + relative_strength))
    
    # %R
    low_14 = price_data[['Ticker', 'Low']].copy()
    high_14 = price_data[['Ticker', 'High']].copy()
    low_14 = low_14.groupby('Ticker')['Low'].transform(lambda x: x.rolling(window=n).min())
    high_14 = high_14.groupby('Ticker')['High'].transform(lambda x: x.rolling(window=n).max())
    r_percent = ((high_14 - price_data[smoothed_col]) / (high_14 - low_14)) * (-100)
    price_data[low_col] = low_14
    price_data[high_col] = high_14
    price_data[r_percent_col] = r_percent
    
    # MACD
    ema_26 = price_data.groupby('Ticker')[smoothed_col].transform(lambda x: x.ewm(span=26).mean())
    ema_12 = price_data.groupby('Ticker')[smoothed_col].transform(lambda x: x.ewm(span=12).mean())
    macd = ema_12 - ema_26
    ema_9_macd = macd.ewm(span=9).mean()
    price_data[macd_col] = macd
    price_data[macd_ema_col] = ema_9_macd

# Drop linhas com valores ausentes
price_data = price_data.dropna()

# Visualização final
price_data.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.10,Prediction_0.10,OBV_0.10,Price_Rate_Of_Change_0.10,RSI,low_14_0.10,high_14_0.10,r_percent_0.10,MACD_0.10,MACD_EMA_0.10,Smoothed_Close_0.20,Prediction_0.20,OBV_0.20,Price_Rate_Of_Change_0.20,low_14_0.20,high_14_0.20,r_percent_0.20,MACD_0.20,MACD_EMA_0.20,Smoothed_Close_0.30,Prediction_0.30,OBV_0.30,Price_Rate_Of_Change_0.30,low_14_0.30,high_14_0.30,r_percent_0.30,MACD_0.30,MACD_EMA_0.30,Smoothed_Close_0.40,Prediction_0.40,OBV_0.40,Price_Rate_Of_Change_0.40,low_14_0.40,high_14_0.40,r_percent_0.40,MACD_0.40,MACD_EMA_0.40,Smoothed_Close_0.50,Prediction_0.50,OBV_0.50,Price_Rate_Of_Change_0.50,low_14_0.50,high_14_0.50,r_percent_0.50,MACD_0.50,MACD_EMA_0.50,Smoothed_Close_0.60,Prediction_0.60,OBV_0.60,Price_Rate_Of_Change_0.60,low_14_0.60,high_14_0.60,r_percent_0.60,MACD_0.60,MACD_EMA_0.60,Smoothed_Close_0.70,Prediction_0.70,OBV_0.70,Price_Rate_Of_Change_0.70,low_14_0.70,high_14_0.70,r_percent_0.70,MACD_0.70,MACD_EMA_0.70,Smoothed_Close_0.80,Prediction_0.80,OBV_0.80,Price_Rate_Of_Change_0.80,low_14_0.80,high_14_0.80,r_percent_0.80,MACD_0.80,MACD_EMA_0.80,Smoothed_Close_0.90,Prediction_0.90,OBV_0.90,Price_Rate_Of_Change_0.90,low_14_0.90,high_14_0.90,r_percent_0.90,MACD_0.90,MACD_EMA_0.90
11759,2024-12-27,54.73,55.080002,54.459999,54.740002,54.740002,16635300,VALE3.SA,-0.269997,56.328998,-1.0,2598477848,-0.036864,36.806222,53.330002,60.189999,-56.282832,-0.856589,-0.716997,55.332221,-1.0,2192712570,-0.049589,53.330002,60.189999,-70.813113,-1.03108,-0.849476,54.966447,-1.0,317648762,-0.054749,53.330002,60.189999,-76.145103,-1.117711,-0.909796,54.840631,-1.0,726501566,-0.054982,53.330002,60.189999,-77.979161,-1.163699,-0.951508,54.807772,-1.0,654398326,-0.052739,53.330002,60.189999,-78.458149,-1.185256,-0.979688,54.801924,-1.0,394799638,-0.049363,53.330002,60.189999,-78.543394,-1.194099,-0.998476,54.796952,-1.0,586730342,-0.045552,53.330002,60.189999,-78.615878,-1.197019,-1.011167,54.784814,-1.0,-1015310042,-0.041707,53.330002,60.189999,-78.792813,-1.197522,-1.019984,54.765098,-1.0,-849598738,-0.038062,53.330002,60.189999,-79.080221,-1.197284,-1.026331
11760,2024-12-30,54.900002,55.189999,54.549999,54.549999,54.549999,11250900,VALE3.SA,-0.190002,56.151098,-1.0,2587226948,-0.035577,35.242227,53.330002,60.189999,-58.876126,-0.890655,-0.751729,55.175777,-1.0,2181461670,-0.044532,53.330002,60.189999,-73.093645,-1.064313,-0.892443,54.841513,-1.0,306397862,-0.045622,53.330002,60.189999,-77.966304,-1.141571,-0.956151,54.724378,-1.0,715250666,-0.042657,53.330002,60.189999,-79.673806,-1.177118,-0.99663,54.678886,-1.0,643147426,-0.038353,53.330002,60.189999,-80.336961,-1.19125,-1.022,54.650769,-1.0,383548738,-0.034012,53.330002,60.189999,-80.746821,-1.195987,-1.037978,54.624085,-1.0,575479442,-0.03021,53.330002,60.189999,-81.135805,-1.197177,-1.048369,54.596962,-1.0,-1026560942,-0.027171,53.330002,60.189999,-81.531181,-1.1973,-1.055447,54.571509,-1.0,-860849638,-0.024938,53.330002,60.189999,-81.902218,-1.197249,-1.060515
11761,2025-01-02,54.709999,55.099998,54.23,54.25,54.25,17623900,VALE3.SA,-0.299999,55.960988,-1.0,2569603048,-0.034877,32.709994,53.330002,59.939999,-60.196855,-0.92236,-0.785855,54.990622,-1.0,2163837770,-0.041373,53.330002,59.939999,-74.877148,-1.092991,-0.932553,54.664059,-1.0,288773962,-0.04053,53.330002,59.939999,-79.817584,-1.161412,-0.997203,54.534627,-1.0,697626766,-0.036996,53.330002,59.939999,-81.775709,-1.189353,-1.035174,54.464443,-1.0,625523526,-0.033372,53.330002,59.939999,-82.837495,-1.199478,-1.057496,54.410308,-1.0,365924838,-0.030603,53.330002,59.939999,-83.656484,-1.203018,-1.070986,54.362226,-1.0,557855542,-0.028877,53.330002,59.939999,-84.3839,-1.204547,-1.079605,54.319392,-1.0,-1044184842,-0.028073,53.330002,59.939999,-85.031905,-1.205625,-1.085483,54.282151,-1.0,-878473538,-0.027963,53.330002,59.939999,-85.595317,-1.20666,-1.089744
11762,2025-01-03,53.900002,54.0,52.880001,53.240002,53.240002,23608700,VALE3.SA,-1.009998,55.68889,-1.0,2545994348,-0.036456,25.572295,52.880001,59.59,-58.138762,-0.958395,-0.820363,54.640498,-1.0,2140229070,-0.043294,52.880001,59.59,-73.763089,-1.130935,-0.972229,54.236842,-1.0,265165262,-0.043682,52.880001,59.59,-79.778825,-1.197801,-1.037322,54.016777,-1.0,674018066,-0.042626,52.880001,59.59,-83.058482,-1.226695,-1.073479,53.852222,-1.0,601914826,-0.042242,52.880001,59.59,-85.51086,-1.241093,-1.094215,53.708124,-1.0,342316138,-0.042929,52.880001,59.59,-87.658374,-1.250832,-1.106955,53.576669,1.0,534246842,-0.044479,52.880001,59.59,-89.617469,-1.25926,-1.115536,53.45588,1.0,-1067793542,-0.046549,52.880001,59.59,-91.417603,-1.267292,-1.121844,53.344217,1.0,-902082238,-0.048841,52.880001,59.59,-93.081735,-1.275103,-1.126816
11763,2025-01-06,53.48,53.790001,52.48,52.560001,52.560001,21689500,VALE3.SA,-0.68,55.376001,-1.0,2524304848,-0.036894,21.865682,52.48,57.68,-44.307674,-1.000665,-0.856423,54.224398,-1.0,2118539570,-0.04286,52.48,57.68,-66.453874,-1.180967,-1.013977,53.73379,-1.0,243475762,-0.042911,52.48,57.68,-75.888656,-1.25279,-1.080416,53.434067,1.0,652328566,-0.042007,52.48,57.68,-81.652559,-1.288456,-1.116474,53.206112,1.0,580225326,-0.041654,52.48,57.68,-86.036304,-1.311096,-1.137591,53.01925,1.0,320626638,-0.041813,52.48,57.68,-89.629792,-1.328992,-1.151363,52.865002,1.0,512557342,-0.042096,52.48,57.68,-92.596115,-1.344547,-1.161338,52.739177,1.0,-1089483042,-0.042171,52.48,57.68,-95.015818,-1.358337,-1.169143,52.638423,1.0,-923771738,-0.041846,52.48,57.68,-96.953397,-1.370499,-1.175552


#### Se quiser rodar para um Ticker.

In [None]:
price_data = price_data[price_data['Ticker'].isin(['ABEV3.SA'])]
price_data.tail()

In [None]:
price_data = price_data[price_data['Ticker'].isin(['ITUB4.SA'])]
price_data.tail()

In [None]:
price_data = price_data[price_data['Ticker'].isin(['MGLU3.SA'])]
price_data.tail()

In [102]:
price_data = price_data[price_data['Ticker'].isin(['PETR4.SA'])]
price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify,Close_10,down_days,up_days,RSI,low_14_0.10,high_14_0.10,k_percent_0.10,low_14_0.20,high_14_0.20,k_percent_0.20,low_14_0.30,high_14_0.30,k_percent_0.30,low_14_0.40,high_14_0.40,k_percent_0.40,low_14_0.50,high_14_0.50,k_percent_0.50,low_14_0.60,high_14_0.60,k_percent_0.60,low_14_0.70,high_14_0.70,k_percent_0.70,low_14_0.80,high_14_0.80,k_percent_0.80,low_14_0.90,high_14_0.90,k_percent_0.90,r_percent_0.10,r_percent_0.20,r_percent_0.30,r_percent_0.40,r_percent_0.50,r_percent_0.60,r_percent_0.70,r_percent_0.80,r_percent_0.90,MACD_0.10,MACD_EMA_0.10,MACD_0.20,MACD_EMA_0.20,MACD_0.30,MACD_EMA_0.30,MACD_0.40,MACD_EMA_0.40,MACD_0.50,MACD_EMA_0.50,MACD_0.60,MACD_EMA_0.60,MACD_0.70,MACD_EMA_0.70,MACD_0.80,MACD_EMA_0.80,MACD_0.90,MACD_EMA_0.90,Price_Rate_Of_Change_0.10,Price_Rate_Of_Change_0.20,Price_Rate_Of_Change_0.30,Price_Rate_Of_Change_0.40,Price_Rate_Of_Change_0.50,Price_Rate_Of_Change_0.60,Price_Rate_Of_Change_0.70,Price_Rate_Of_Change_0.80,Price_Rate_Of_Change_0.90,OBV_0.10,OBV_0.20,OBV_0.30,OBV_0.40,OBV_0.50,OBV_0.60,OBV_0.70,OBV_0.80,OBV_0.90
17835,2024-12-27,36.0,36.0,35.610001,35.66,35.66,24167200,PETR4.SA,-0.110001,37.649875,-1.0,37.001492,-1.0,36.518488,1.0,36.22008,1.0,36.02593,1.0,35.891076,1.0,35.794376,1.0,35.726123,1.0,35.681923,1.0,35.66,1.0,36.82,0.110001,0.0,20.526365,35.599998,40.759998,39.726295,35.599998,40.759998,27.16072,35.599998,40.759998,17.800177,35.599998,40.759998,12.017076,35.599998,40.759998,8.254485,35.599998,40.759998,5.64104,35.599998,40.759998,3.767008,35.599998,40.759998,2.444283,35.599998,40.759998,1.587692,-60.273705,-72.83928,-82.199823,-87.982924,-91.745515,-94.35896,-96.232992,-97.555717,-98.412308,0.199699,0.355567,0.004863,0.308245,-0.149991,0.221659,-0.247798,0.156459,-0.311362,0.110271,-0.355209,0.076877,-0.387169,0.051907,-0.411444,0.032617,-0.430356,0.017298,-0.03153,-0.060793,-0.074687,-0.080939,-0.083218,-0.083132,-0.081442,-0.078543,-0.074657,-279512648955,-150909929051,3100544117,-157200231747,52727347493,160447705765,161501311149,265285062981,183943810781
17836,2024-12-30,35.779999,36.369999,35.77,36.189999,36.189999,22355600,PETR4.SA,0.529999,37.503888,-1.0,36.839193,1.0,36.419941,1.0,36.208047,1.0,36.107964,1.0,36.07043,1.0,36.071312,1.0,36.097224,1.0,36.139191,1.0,36.189999,1.0,37.290001,0.0,0.529999,32.377384,35.599998,40.759998,36.897077,35.599998,40.759998,24.015398,35.599998,40.759998,15.890357,35.599998,40.759998,11.78389,35.599998,40.759998,9.844298,35.599998,40.759998,9.116883,35.599998,40.759998,9.133981,35.599998,40.759998,9.636146,35.599998,40.759998,10.44947,-63.102923,-75.984602,-84.109643,-88.21611,-90.155702,-90.883117,-90.866019,-90.363854,-89.55053,0.129634,0.310381,-0.0963,0.227336,-0.255279,0.126271,-0.349058,0.055356,-0.406156,0.006986,-0.442653,-0.027029,-0.466815,-0.051838,-0.483048,-0.070516,-0.49389,-0.08494,-0.033356,-0.058717,-0.067501,-0.068865,-0.066931,-0.063546,-0.059673,-0.055893,-0.05261,-279535004555,-150932284651,3078188517,-157222587347,52749703093,160470061365,161523666749,265307418581,183966166381
17837,2025-01-02,36.419998,37.09,36.189999,36.77,36.77,30046800,PETR4.SA,0.580002,37.430499,-1.0,36.825354,1.0,36.524959,1.0,36.432828,1.0,36.438982,1.0,36.490172,1.0,36.560394,1.0,36.635445,1.0,36.70692,1.0,36.77,1.0,37.049999,0.0,0.580002,43.092658,35.599998,40.759998,35.474815,35.599998,40.759998,23.74721,35.599998,40.759998,17.925587,35.599998,40.759998,16.140117,35.599998,40.759998,16.259378,35.599998,40.759998,17.251428,35.599998,40.759998,18.612315,35.599998,40.759998,20.066795,35.599998,40.759998,21.451959,-64.525185,-76.25279,-82.074413,-83.859883,-83.740622,-82.748572,-81.387685,-79.933205,-78.548041,0.067408,0.261786,-0.175564,0.146756,-0.326482,0.035721,-0.406485,-0.037012,-0.44939,-0.084289,-0.472635,-0.11615,-0.484881,-0.138446,-0.490709,-0.154555,-0.49275,-0.166502,-0.03311,-0.053278,-0.056724,-0.053878,-0.049063,-0.04404,-0.039555,-0.03588,-0.033026,-279565051355,-150962331451,3108235317,-157192540547,52779749893,160500108165,161553713549,265337465381,183996213181
17838,2025-01-03,36.880001,37.040001,36.32,36.380001,36.380001,23314200,PETR4.SA,-0.389999,37.325449,-1.0,36.736284,1.0,36.481471,1.0,36.411698,1.0,36.409492,1.0,36.424069,1.0,36.434119,1.0,36.43109,1.0,36.412693,1.0,36.380001,1.0,37.200001,0.389999,0.0,38.374848,35.599998,40.759998,33.438966,35.599998,40.759998,22.021034,35.599998,40.759998,17.08281,35.599998,40.759998,15.730602,35.599998,40.759998,15.687854,35.599998,40.759998,15.970369,35.599998,40.759998,16.165125,35.599998,40.759998,16.106423,35.599998,40.759998,15.749893,-66.561034,-77.978966,-82.91719,-84.269398,-84.312146,-84.029631,-83.834875,-83.893577,-84.250107,0.009508,0.21133,-0.242771,0.06885,-0.382017,-0.047827,-0.44853,-0.119316,-0.480494,-0.16353,-0.496013,-0.192123,-0.503583,-0.211474,-0.50742,-0.225128,-0.509713,-0.235144,-0.034795,-0.052657,-0.054761,-0.052384,-0.049597,-0.047699,-0.046972,-0.047298,-0.048434,-279588365555,-150985645651,3084921117,-157215854747,52756435693,160476793965,161530399349,265314151181,183972898981
17839,2025-01-06,36.599998,36.689999,36.060001,36.209999,36.209999,23760200,PETR4.SA,-0.170002,37.213904,-1.0,36.631027,1.0,36.40003,1.0,36.331018,1.0,36.309745,1.0,36.295627,1.0,36.277235,1.0,36.254217,1.0,36.230268,1.0,36.209999,1.0,37.259998,0.170002,0.0,36.372025,35.599998,38.889999,49.054868,35.599998,38.889999,31.33824,35.599998,38.889999,24.317053,35.599998,38.889999,22.219437,35.599998,38.889999,21.572849,35.599998,38.889999,21.143726,35.599998,38.889999,20.584692,35.599998,38.889999,19.885063,35.599998,38.889999,19.157137,-50.945132,-68.66176,-75.682947,-77.780563,-78.427151,-78.856274,-79.415308,-80.114937,-80.842863,-0.044863,0.160092,-0.301056,-0.005131,-0.42767,-0.123796,-0.482796,-0.192012,-0.507344,-0.232293,-0.518922,-0.257483,-0.525011,-0.274181,-0.52884,-0.28587,-0.531747,-0.294465,-0.034281,-0.048163,-0.047356,-0.043385,-0.039673,-0.036878,-0.034819,-0.033117,-0.031421,-279612125755,-151009405851,3061160917,-157239614947,52732675493,160453033765,161506639149,265290390981,183949138781


In [110]:
price_data = price_data[price_data['Ticker'].isin(['VALE3.SA'])]
price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify,Close_10,down_days,up_days,RSI,low_14_0.10,high_14_0.10,k_percent_0.10,low_14_0.20,high_14_0.20,k_percent_0.20,low_14_0.30,high_14_0.30,k_percent_0.30,low_14_0.40,high_14_0.40,k_percent_0.40,low_14_0.50,high_14_0.50,k_percent_0.50,low_14_0.60,high_14_0.60,k_percent_0.60,low_14_0.70,high_14_0.70,k_percent_0.70,low_14_0.80,high_14_0.80,k_percent_0.80,low_14_0.90,high_14_0.90,k_percent_0.90,r_percent_0.10,r_percent_0.20,r_percent_0.30,r_percent_0.40,r_percent_0.50,r_percent_0.60,r_percent_0.70,r_percent_0.80,r_percent_0.90,MACD_0.10,MACD_EMA_0.10,MACD_0.20,MACD_EMA_0.20,MACD_0.30,MACD_EMA_0.30,MACD_0.40,MACD_EMA_0.40,MACD_0.50,MACD_EMA_0.50,MACD_0.60,MACD_EMA_0.60,MACD_0.70,MACD_EMA_0.70,MACD_0.80,MACD_EMA_0.80,MACD_0.90,MACD_EMA_0.90,Price_Rate_Of_Change_0.10,Price_Rate_Of_Change_0.20,Price_Rate_Of_Change_0.30,Price_Rate_Of_Change_0.40,Price_Rate_Of_Change_0.50,Price_Rate_Of_Change_0.60,Price_Rate_Of_Change_0.70,Price_Rate_Of_Change_0.80,Price_Rate_Of_Change_0.90,OBV_0.10,OBV_0.20,OBV_0.30,OBV_0.40,OBV_0.50,OBV_0.60,OBV_0.70,OBV_0.80,OBV_0.90


In [None]:
price_data.to_excel('C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Datas referências/test_day.xlsx', index=False)

#### Se quiser normal só pular etapa anterior.

In [109]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Para cada alpha, criar as variáveis X_Cols e Y_Cols e treinar o modelo
for alpha in alpha_list:
    # Definindo as colunas de X e Y dinamicamente
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha:.2f}',
                         f'r_percent_{alpha:.2f}',
                         f'Price_Rate_Of_Change_{alpha:.2f}',
                         f'MACD_{alpha:.2f}',
                         f'MACD_EMA_{alpha:.2f}',
                         f'OBV_{alpha:.2f}']]
    
    Y_Cols = price_data[f'Prediction_{alpha:.2f}']

    # Dividir os dados em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state=0)

    # Criar o classificador Random Forest
    rand_frst_clf = RandomForestClassifier(n_estimators=100, oob_score=True, criterion="gini", random_state=0)

    # Ajustar o modelo
    rand_frst_clf.fit(X_train, y_train)

    # Fazer previsões
    y_pred = rand_frst_clf.predict(X_test)

    # Imprimir a acurácia
    accuracy = accuracy_score(y_test, y_pred, normalize=True) * 100.0
    print(f'Alpha: {alpha}')
    print(f'Correct Prediction (%): {accuracy:.5f}')
    print('---')


Alpha: 0.1
Correct Prediction (%): 84.61030
---
Alpha: 0.2
Correct Prediction (%): 77.01453
---
Alpha: 0.3
Correct Prediction (%): 74.10832
---
Alpha: 0.4
Correct Prediction (%): 71.40026
---
Alpha: 0.5
Correct Prediction (%): 70.60766
---
Alpha: 0.6
Correct Prediction (%): 69.55086
---
Alpha: 0.7
Correct Prediction (%): 69.15456
---
Alpha: 0.8
Correct Prediction (%): 68.56011
---
Alpha: 0.9
Correct Prediction (%): 68.75826
---


In [107]:
import warnings
from datetime import timedelta

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Filtrar últimos 30 dias
last_30_days = price_data[price_data['Date'] >= price_data['Date'].max() - timedelta(days=30)]

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = last_30_days[['RSI',
                           f'k_percent_{alpha:.2f}',
                           f'r_percent_{alpha:.2f}',
                           f'Price_Rate_Of_Change_{alpha:.2f}',
                           f'MACD_{alpha:.2f}',
                           f'MACD_EMA_{alpha:.2f}',
                           f'OBV_{alpha:.2f}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}' para os últimos 30 dias
    last_30_days[f'Prediction_{alpha:.2f}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    last_30_days['Match'] = (last_30_days[f'Prediction_{alpha:.2f}'] == last_30_days['Verify']).astype(int)  # 1 para igual, 0 para diferente
    accuracy = last_30_days['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo nos últimos 30 dias: {accuracy:.2%}')



Precisão para Alpha: 0.1
Precisão do modelo nos últimos 30 dias: 41.18%

Precisão para Alpha: 0.2
Precisão do modelo nos últimos 30 dias: 70.59%

Precisão para Alpha: 0.3
Precisão do modelo nos últimos 30 dias: 76.47%

Precisão para Alpha: 0.4
Precisão do modelo nos últimos 30 dias: 58.82%

Precisão para Alpha: 0.5
Precisão do modelo nos últimos 30 dias: 58.82%

Precisão para Alpha: 0.6
Precisão do modelo nos últimos 30 dias: 52.94%

Precisão para Alpha: 0.7
Precisão do modelo nos últimos 30 dias: 58.82%

Precisão para Alpha: 0.8
Precisão do modelo nos últimos 30 dias: 58.82%

Precisão para Alpha: 0.9
Precisão do modelo nos últimos 30 dias: 88.24%


In [108]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Criar listas para armazenar os resultados gerais
all_predictions = []
all_verify_values = []
all_actual_values = []
all_accuracy_scores_ytest = []
all_accuracy_scores_verify = []

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X), o target (y) e a verificação (verify) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha:.2f}',
                    f'r_percent_{alpha:.2f}',
                    f'Price_Rate_Of_Change_{alpha:.2f}',
                    f'MACD_{alpha:.2f}',
                    f'MACD_EMA_{alpha:.2f}',
                    f'OBV_{alpha:.2f}']]
    y = price_data[f'Prediction_{alpha:.2f}']
    verify = price_data['Verify']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y, verify], axis=1).dropna()
    X = data.iloc[:, :-2]
    y = data.iloc[:, -2]
    verify = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        y_verify = verify.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0, oob_score=True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Armazenar as predições e valores reais para análise posterior
        all_predictions.extend(y_pred)
        all_verify_values.extend(y_verify.values)
        all_actual_values.extend(y_test.values)

        # Calcular acurácia para y_test e verify
        accuracy_ytest = accuracy_score(y_test, y_pred) * 100.0
        accuracy_verify = accuracy_score(y_verify, y_pred) * 100.0

        # Armazenar os resultados das acurácias
        all_accuracy_scores_ytest.append(accuracy_ytest)
        all_accuracy_scores_verify.append(accuracy_verify)

        # Exibir resultados por fold
        print(f'Fold {fold}:')
        print(f' - Correct Prediction (y_test): {accuracy_ytest:.2f}%')
        print(f' - Correct Prediction (Verify): {accuracy_verify:.2f}%')
        fold += 1

    print('---')

# Resultados gerais
print("Resumo geral:")
print(f'Média de acurácia (y_test): {np.mean(all_accuracy_scores_ytest):.2f}%')
print(f'Média de acurácia (Verify): {np.mean(all_accuracy_scores_verify):.2f}%')


Alpha: 0.1
Fold 1:
 - Correct Prediction (y_test): 67.66%
 - Correct Prediction (Verify): 53.97%
Fold 2:
 - Correct Prediction (y_test): 51.49%
 - Correct Prediction (Verify): 49.31%
Fold 3:
 - Correct Prediction (y_test): 69.35%
 - Correct Prediction (Verify): 50.10%
Fold 4:
 - Correct Prediction (y_test): 68.35%
 - Correct Prediction (Verify): 53.57%
Fold 5:
 - Correct Prediction (y_test): 66.67%
 - Correct Prediction (Verify): 41.37%
---
Alpha: 0.2
Fold 1:
 - Correct Prediction (y_test): 51.69%
 - Correct Prediction (Verify): 51.59%
Fold 2:
 - Correct Prediction (y_test): 49.01%
 - Correct Prediction (Verify): 45.73%
Fold 3:
 - Correct Prediction (y_test): 57.84%
 - Correct Prediction (Verify): 48.12%
Fold 4:
 - Correct Prediction (y_test): 56.45%
 - Correct Prediction (Verify): 49.40%
Fold 5:
 - Correct Prediction (y_test): 59.03%
 - Correct Prediction (Verify): 44.84%
---
Alpha: 0.3
Fold 1:
 - Correct Prediction (y_test): 48.31%
 - Correct Prediction (Verify): 49.01%
Fold 2:
 - Co

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X) e o target (Y) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha}',
                    f'r_percent_{alpha}',
                    f'Price_Rate_Of_Change_{alpha}',
                    f'MACD_{alpha}',
                    f'MACD_EMA_{alpha}',
                    f'OBV_{alpha}']]
    y = price_data[f'Prediction_{alpha}']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y], axis=1).dropna()
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0,oob_score = True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Calcular e imprimir a acurácia para cada divisão
        accuracy = accuracy_score(y_test, y_pred) * 100.0
        print(f'Fold {fold}: Correct Prediction (%): {accuracy:.2f}')
        fold += 1

    print('---')


In [None]:
# Salvar o modelo treinado
joblib.dump(rand_frst_clf, 'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl')

In [None]:
price_data.to_excel('C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Datas referências/test_day.xlsx', index=False)