In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import accuracy_score, classification_report
pd.set_option('display.max_columns',None)
import joblib

In [2]:
price_data = pd.read_parquet(f'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Dados_bolsa_interpolar.parquet')
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.284432,985,ABEV3.SA
1,2000-01-06,0.494478,0.494478,0.494478,0.494478,0.270014,227,ABEV3.SA
2,2000-01-12,0.481293,0.481293,0.481293,0.481293,0.262814,1137,ABEV3.SA
3,2000-01-13,0.484589,0.484589,0.484589,0.484589,0.264614,606,ABEV3.SA
4,2000-01-14,0.494478,0.494478,0.494478,0.494478,0.270014,6445,ABEV3.SA


In [3]:
import numpy as np
import pandas as pd

# Transformar e ordenar os dados
price_data['Date'] = pd.to_datetime(price_data['Date'])
price_data.sort_values(by=['Ticker', 'Date'], inplace=True)

# Criar a coluna de mudança de preço
price_data['change_in_price'] = price_data['Close'].diff()

# Máscara para identificar a virada entre os tickers
mask = price_data['Ticker'] != price_data['Ticker'].shift(1)

# Função para suavização exponencial
def exponential_smoothing(data, alpha):
    smoothed = np.zeros(len(data))
    smoothed[0] = data[0]
    for t in range(1, len(data)):
        smoothed[t] = alpha * data[t] + (1 - alpha) * smoothed[t-1]
    return smoothed

# Função para calcular o target
def calculate_target(data, d):
    target = np.sign(data.shift(-d) - data)
    target[target == 0] = -1
    return target

# Função para calcular o OBV
def obv(group, smoothed_col):
    Volume = group['Volume']
    change = group[smoothed_col].diff()

    prev_obv = 0
    obv_values = []

    for i, j in zip(change, Volume):
        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv

        prev_obv = current_obv
        obv_values.append(current_obv)

    return pd.Series(obv_values, index=group.index)

# Lista de valores alpha
alpha_list = [0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90]

# Período para calcular o target, RSI e estocástico
d = 30
n = 14
e = 9

# Loop para aplicar a suavização exponencial, calcular o target, o RSI e o estocástico
for alpha in alpha_list:
    col_name_smoothed = f"Smoothed_Close_{alpha}"
    col_name_prediction = f"Prediction_{alpha}"
    col_name_k_percent = f"k_percent_{alpha}"
    col_name_macd = f"MACD_{alpha}"
    col_name_macd_ema = f"MACD_EMA_{alpha}"
    col_name_price_rate_of_change = f"Price_Rate_Of_Change_{alpha}"
    col_name_obv = f"OBV_{alpha}"
    col_name_r_percent = f"r_percent_{alpha}"

    # Suavização exponencial
    price_data[col_name_smoothed] = price_data.groupby('Ticker')['Close'].transform(
        lambda x: exponential_smoothing(x.values, alpha)
    )

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_smoothed] = np.where(mask, np.nan, price_data[col_name_smoothed])

    # Cálculo do target
    price_data[col_name_prediction] = price_data.groupby('Ticker')[col_name_smoothed].transform(
        lambda x: calculate_target(x, d)
    )
    price_data[col_name_prediction] = np.where(mask, np.nan, price_data[col_name_prediction])

    # Calculando o OBV usando a suavização exponencial
    obv_groups = price_data.groupby('Ticker').apply(obv, smoothed_col=col_name_smoothed)
    price_data[col_name_obv] = obv_groups.reset_index(level=0, drop=True)

    # Calculando o Price Rate of Change (ROC)
    price_data[col_name_price_rate_of_change] = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.pct_change(periods=e))
    price_data[col_name_price_rate_of_change] = np.where(mask, np.nan, price_data[col_name_price_rate_of_change])

    # Calcular low_14 e high_14 para o estocástico
    low_14 = price_data.groupby('Ticker')['Low'].transform(lambda x: x.rolling(window=n).min())
    high_14 = price_data.groupby('Ticker')['High'].transform(lambda x: x.rolling(window=n).max())

    # Adicionar low_14 e high_14 ao DataFrame
    price_data['low_14'] = low_14
    price_data['high_14'] = high_14

    # Calcular o %K para o estocástico
    price_data[col_name_k_percent] = 100 * ((price_data[col_name_smoothed] - low_14) / (high_14 - low_14))

    # Aplicando a máscara para evitar cálculos na virada dos tickers
    price_data[col_name_k_percent] = np.where(mask, np.nan, price_data[col_name_k_percent])

    # Cálculo do MACD
    ema_26 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=26).mean())
    ema_12 = price_data.groupby('Ticker')[col_name_smoothed].transform(lambda x: x.ewm(span=12).mean())
    macd = ema_12 - ema_26

    # Cálculo da EMA do MACD
    ema_9_macd = macd.ewm(span=9).mean()

    # Adicionar MACD e MACD_EMA ao DataFrame
    price_data[col_name_macd] = macd
    price_data[col_name_macd_ema] = ema_9_macd

    # Cálculo do r_percent
    r_percent = ((high_14 - price_data[col_name_smoothed]) / (high_14 - low_14)) * (-100)
    price_data[col_name_r_percent] = r_percent

# Coluna de verificação (sem suavização, alpha = 1)
alpha = 1
price_data['Smoothed_Close_1'] = price_data.groupby('Ticker')['Close'].transform(
    lambda x: exponential_smoothing(x.values, alpha)
)
price_data['Smoothed_Close_1'] = np.where(mask, np.nan, price_data['Smoothed_Close_1'])

price_data['Verify'] = price_data.groupby('Ticker')['Smoothed_Close_1'].transform(
    lambda x: calculate_target(x, d)
)
price_data['Verify'] = np.where(mask, np.nan, price_data['Verify'])

# Cálculo do RSI
up_df = price_data[['Ticker', 'change_in_price']].copy()
down_df = price_data[['Ticker', 'change_in_price']].copy()

up_df['change_in_price'] = up_df['change_in_price'].where(up_df['change_in_price'] > 0, 0)
down_df['change_in_price'] = down_df['change_in_price'].where(down_df['change_in_price'] < 0, 0).abs()

ewma_up = up_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())
ewma_down = down_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span=n).mean())

relative_strength = ewma_up / ewma_down
price_data['RSI'] = 100.0 - (100.0 / (1.0 + relative_strength))

price_data['up_days'] = up_df['change_in_price']
price_data['down_days'] = down_df['change_in_price']

# Remover as linhas com NaN
price_data = price_data.dropna()

# Visualizar o DataFrame atualizado
price_data.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
11737,2024-11-26,58.290001,58.490002,57.150002,57.43,56.921944,21406600,VALE3.SA,-0.739998,59.087433,-1.0,2910163448,-0.037813,56.84,63.990002,31.432615,-0.382124,-0.093501,-68.567385,58.15952,-1.0,2410435970,-0.050521,18.454818,-0.742903,-0.386876,-81.545182,57.850635,-1.0,491129762,-0.05036,14.134748,-0.889307,-0.537938,-85.865252,57.765248,-1.0,896772166,-0.045788,12.94052,-0.946317,-0.620855,-87.05948,57.72976,-1.0,856583926,-0.040354,12.444192,-0.966775,-0.668279,-87.555808,57.690334,-1.0,467146238,-0.035199,11.892783,-0.973699,-0.696744,-88.107217,57.637555,-1.0,584781742,-0.030593,11.154609,-0.976349,-0.71489,-88.845391,57.573934,-1.0,-1017258642,-0.026601,10.264812,-0.978129,-0.727222,-89.735188,57.503544,-1.0,-851547338,-0.023298,9.280333,-0.980155,-0.736125,-90.719667,57.43,-1.0,34.815,0.0,0.739998
11738,2024-11-27,57.869999,58.419998,57.599998,58.130001,57.615753,22121200,VALE3.SA,0.700001,58.991689,-1.0,2888042248,-0.032932,56.84,63.990002,30.09355,-0.43769,-0.162339,-69.90645,58.153616,-1.0,2388314770,-0.038264,18.372248,-0.781879,-0.465876,-81.627752,57.934445,-1.0,513250962,-0.031827,15.306914,-0.903849,-0.61112,-84.693086,57.911149,-1.0,918893366,-0.022601,14.981099,-0.942902,-0.685265,-85.018901,57.929881,-1.0,878705126,-0.013769,15.24308,-0.952278,-0.725079,-84.75692,57.954134,-1.0,489267438,-0.006104,15.582294,-0.952148,-0.747825,-84.417706,57.982267,-1.0,606902942,0.000368,15.97576,-0.949351,-0.761782,-84.02424,58.018788,-1.0,-995137442,0.005778,16.486536,-0.94585,-0.770947,-83.513464,58.067355,-1.0,-829426138,0.010297,17.165804,-0.942017,-0.777303,-82.834196,58.130001,-1.0,44.301789,0.700001,0.0
11739,2024-11-28,57.900002,58.799999,57.27,57.529999,57.021057,20467000,VALE3.SA,-0.600002,58.84552,-1.0,2867575248,-0.029216,56.84,63.990002,28.049228,-0.487896,-0.22745,-71.950772,58.028893,-1.0,2367847770,-0.029712,16.627864,-0.813454,-0.535392,-83.372136,57.813111,-1.0,492783962,-0.020702,13.609938,-0.914621,-0.67182,-86.390062,57.758689,-1.0,898426366,-0.011221,12.848791,-0.941643,-0.73654,-87.151209,57.72994,-1.0,858238126,-0.003786,12.446704,-0.946017,-0.769267,-87.553296,57.699653,-1.0,468800438,0.001382,12.023114,-0.944712,-0.787202,-87.976886,57.665679,-1.0,586435942,0.004624,11.547958,-0.942634,-0.797952,-88.452042,57.627757,-1.0,-1015604442,0.006333,11.01757,-0.940974,-0.804953,-88.98243,57.583734,-1.0,-849893138,0.006857,10.401876,-0.939982,-0.809839,-89.598124,57.529999,-1.0,38.727457,0.0,0.600002
11740,2024-11-29,58.25,59.209999,58.150002,58.779999,58.260002,31023600,VALE3.SA,1.25,58.838968,-1.0,2836551648,-0.023239,56.84,62.049999,38.367916,-0.522195,-0.286399,-61.632084,58.179114,-1.0,2398871370,-0.017455,25.702763,-0.816939,-0.591701,-74.297237,58.103177,-1.0,523807562,-0.004685,24.245247,-0.889498,-0.715356,-75.754753,58.167213,-1.0,929449966,0.006623,25.474336,-0.897338,-0.7687,-74.525664,58.254969,-1.0,889261726,0.014989,27.15872,-0.888448,-0.793103,-72.84128,58.34786,-1.0,499824038,0.020924,28.941662,-0.876412,-0.805044,-71.058338,58.445703,-1.0,617459542,0.025218,30.819637,-0.864405,-0.811243,-69.180363,58.54955,-1.0,-984580842,0.028538,32.812869,-0.852898,-0.814542,-67.187131,58.660372,-1.0,-818869538,0.031388,34.939971,-0.841789,-0.816229,-65.060029,58.779999,-1.0,52.956513,1.25,0.0
11741,2024-12-02,58.849998,59.380001,58.689999,58.919998,58.398762,18972100,VALE3.SA,0.139999,58.847071,-1.0,2855523748,-0.018724,56.84,59.869999,66.239996,-0.542469,-0.337613,-33.760004,58.327291,-1.0,2417843470,-0.00939,49.085518,-0.798539,-0.633069,-50.914482,58.348224,-1.0,542779662,0.003777,49.77637,-0.840131,-0.740311,-50.22363,58.468327,-1.0,948422066,0.013479,53.740181,-0.828378,-0.780636,-46.259819,58.587484,-1.0,908233826,0.019403,57.672748,-0.806694,-0.795821,-42.327252,58.691143,-1.0,518796138,0.022658,61.093851,-0.785529,-0.801141,-38.906149,58.77771,-1.0,636431642,0.024226,63.950833,-0.766779,-0.80235,-36.049167,58.845909,-1.0,-965608742,0.024744,66.201626,-0.750531,-0.80174,-33.798374,58.894036,-1.0,-799897438,0.024556,67.789976,-0.736625,-0.800308,-32.210024,58.919998,-1.0,54.327181,0.139999,0.0


#### Se quiser rodar para um Ticker.

In [4]:
price_data = price_data[price_data['Ticker'].isin(['ABEV3.SA'])]
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.1,OBV_0.1,Price_Rate_Of_Change_0.1,low_14,high_14,k_percent_0.1,MACD_0.1,MACD_EMA_0.1,r_percent_0.1,Smoothed_Close_0.2,Prediction_0.2,OBV_0.2,Price_Rate_Of_Change_0.2,k_percent_0.2,MACD_0.2,MACD_EMA_0.2,r_percent_0.2,Smoothed_Close_0.3,Prediction_0.3,OBV_0.3,Price_Rate_Of_Change_0.3,k_percent_0.3,MACD_0.3,MACD_EMA_0.3,r_percent_0.3,Smoothed_Close_0.4,Prediction_0.4,OBV_0.4,Price_Rate_Of_Change_0.4,k_percent_0.4,MACD_0.4,MACD_EMA_0.4,r_percent_0.4,Smoothed_Close_0.5,Prediction_0.5,OBV_0.5,Price_Rate_Of_Change_0.5,k_percent_0.5,MACD_0.5,MACD_EMA_0.5,r_percent_0.5,Smoothed_Close_0.6,Prediction_0.6,OBV_0.6,Price_Rate_Of_Change_0.6,k_percent_0.6,MACD_0.6,MACD_EMA_0.6,r_percent_0.6,Smoothed_Close_0.7,Prediction_0.7,OBV_0.7,Price_Rate_Of_Change_0.7,k_percent_0.7,MACD_0.7,MACD_EMA_0.7,r_percent_0.7,Smoothed_Close_0.8,Prediction_0.8,OBV_0.8,Price_Rate_Of_Change_0.8,k_percent_0.8,MACD_0.8,MACD_EMA_0.8,r_percent_0.8,Smoothed_Close_0.9,Prediction_0.9,OBV_0.9,Price_Rate_Of_Change_0.9,k_percent_0.9,MACD_0.9,MACD_EMA_0.9,r_percent_0.9,Smoothed_Close_1,Verify,RSI,up_days,down_days
13,2000-02-22,0.485255,0.485255,0.485255,0.485255,0.264977,75,ABEV3.SA,-0.029034,0.503659,1.0,15473,-0.012131,0.468107,0.527474,59.884943,-0.001126,-0.000787,-40.115057,0.499179,1.0,27757,-0.005664,52.339618,-0.001025,-0.000797,-47.660382,0.497965,1.0,27757,0.00234,50.294528,-0.000682,-0.000597,-49.705472,0.497261,1.0,40647,0.00736,49.107952,-0.000361,-0.000375,-50.892048,0.496289,1.0,40647,0.008701,47.470848,-0.000112,-0.000178,-52.529152,0.494839,1.0,28363,0.006764,45.029143,6.3e-05,-1.3e-05,-54.970857,0.492914,1.0,28363,0.002296,41.785331,0.000172,0.000122,-58.214669,0.490611,1.0,28363,-0.003869,37.90588,0.000223,0.000231,-62.09412,0.488041,1.0,29575,-0.011005,33.57772,0.000227,0.000318,-66.42228,0.485255,1.0,43.244009,0.0,0.029034
14,2000-02-23,0.494478,0.494478,0.494478,0.494478,0.270014,455,ABEV3.SA,0.009223,0.502741,1.0,15018,-0.01733,0.468107,0.527474,58.338476,-0.001247,-0.000883,-41.661524,0.498239,1.0,27302,-0.017499,50.755749,-0.001123,-0.000865,-49.244251,0.496919,1.0,27302,-0.017954,48.532252,-0.000785,-0.000637,-51.467748,0.496148,1.0,40192,-0.021726,47.232882,-0.000502,-0.000401,-52.767118,0.495384,1.0,40192,-0.028166,45.945562,-0.000307,-0.000205,-54.054438,0.494623,1.0,27908,-0.035993,44.663823,-0.000185,-4.9e-05,-55.336177,0.494009,1.0,28818,-0.04404,43.629792,-0.000115,7.3e-05,-56.370208,0.493705,1.0,28818,-0.051448,43.117397,-7.6e-05,0.000167,-56.882603,0.493834,1.0,30030,-0.057701,43.33602,-5.1e-05,0.000241,-56.66398,0.494478,1.0,47.299662,0.009223,0.0
15,2000-02-24,0.487885,0.487885,0.487885,0.487885,0.266414,5005,ABEV3.SA,-0.006593,0.501255,1.0,10013,-0.022635,0.468107,0.527474,55.836108,-0.001411,-0.000993,-44.163892,0.496168,1.0,22297,-0.028124,47.267558,-0.001306,-0.000956,-52.732442,0.494209,1.0,22297,-0.033722,43.967014,-0.001014,-0.000715,-56.032986,0.492843,1.0,35187,-0.041108,41.665646,-0.000798,-0.000483,-58.334354,0.491634,1.0,35187,-0.048989,39.630177,-0.000672,-0.000301,-60.369823,0.49058,1.0,22903,-0.05611,37.854405,-0.000609,-0.000165,-62.145595,0.489722,1.0,23813,-0.061754,36.409293,-0.000584,-6.4e-05,-63.590707,0.489049,1.0,23813,-0.0657,35.275314,-0.000577,1.3e-05,-64.724686,0.48848,1.0,25025,-0.068084,34.316915,-0.000578,7.1e-05,-65.683085,0.487885,1.0,44.667003,0.0,0.006593
16,2000-02-25,0.4747,0.4747,0.4747,0.4747,0.259214,3033,ABEV3.SA,-0.013185,0.4986,1.0,6980,-0.025578,0.468107,0.527474,51.363045,-0.001678,-0.001134,-48.636955,0.491875,1.0,19264,-0.032965,40.035143,-0.001689,-0.001107,-59.964857,0.488356,1.0,19264,-0.039332,34.108555,-0.001529,-0.000882,-65.891445,0.485586,1.0,32154,-0.045671,29.441581,-0.001452,-0.000683,-70.558419,0.483167,1.0,32154,-0.050811,25.36783,-0.001454,-0.000539,-74.63217,0.481052,1.0,19870,-0.054089,21.805052,-0.001502,-0.00044,-78.194948,0.479207,1.0,20780,-0.055488,18.696626,-0.00157,-0.000374,-81.303374,0.47757,1.0,20780,-0.055385,15.939449,-0.001646,-0.000329,-84.060551,0.476078,1.0,21992,-0.054294,13.426626,-0.001722,-0.000298,-86.573374,0.4747,1.0,39.583155,0.0,0.013185
17,2000-02-29,0.468107,0.468107,0.468107,0.468107,0.255614,11602,ABEV3.SA,-0.006593,0.49555,1.0,-4622,-0.023218,0.468107,0.527474,46.226741,-0.00205,-0.001321,-53.773259,0.487121,1.0,7662,-0.026799,32.028114,-0.002254,-0.001342,-67.971886,0.482281,1.0,7662,-0.028202,23.875988,-0.00228,-0.001168,-76.124012,0.478594,1.0,20552,-0.028309,17.664949,-0.002366,-0.001027,-82.335051,0.475637,1.0,20552,-0.02647,12.683915,-0.002499,-0.00094,-87.316085,0.473285,1.0,8268,-0.02272,8.722021,-0.002644,-0.000891,-91.277979,0.471437,1.0,9178,-0.0176,5.608988,-0.002784,-0.000867,-94.391012,0.47,1.0,9178,-0.011775,3.18789,-0.002909,-0.000857,-96.81211,0.468904,1.0,10390,-0.005795,1.342663,-0.003018,-0.000854,-98.657337,0.468107,1.0,37.143972,0.0,0.006593


In [None]:
price_data.to_excel('C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Datas referências/test_day.xlsx', index=False)

#### Se quiser normal só pular etapa anterior.

In [5]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Para cada alpha, criar as variáveis X_Cols e Y_Cols e treinar o modelo
for alpha in alpha_list:
    # Definindo as colunas de X e Y dinamicamente
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']]
    
    Y_Cols = price_data[f'Prediction_{alpha}']

    # Dividir os dados em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state=0)

    # Criar o classificador Random Forest
    rand_frst_clf = RandomForestClassifier(n_estimators=100, oob_score=True, criterion="gini", random_state=0)

    # Ajustar o modelo
    rand_frst_clf.fit(X_train, y_train)

    # Fazer previsões
    y_pred = rand_frst_clf.predict(X_test)

    # Imprimir a acurácia
    accuracy = accuracy_score(y_test, y_pred, normalize=True) * 100.0
    print(f'Alpha: {alpha}')
    print(f'Correct Prediction (%): {accuracy:.2f}')
    print('---')


Alpha: 0.1
Correct Prediction (%): 87.10
---
Alpha: 0.2
Correct Prediction (%): 82.89
---
Alpha: 0.3
Correct Prediction (%): 78.82
---
Alpha: 0.4
Correct Prediction (%): 78.33
---
Alpha: 0.5
Correct Prediction (%): 77.49
---
Alpha: 0.6
Correct Prediction (%): 75.25
---
Alpha: 0.7
Correct Prediction (%): 76.44
---
Alpha: 0.8
Correct Prediction (%): 76.58
---
Alpha: 0.9
Correct Prediction (%): 75.18
---


In [34]:
import warnings

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}'
    price_data[f'Prediction_{alpha}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    price_data['Match'] = (price_data[f'Prediction_{alpha}'] == price_data['Verify']).astype(int)  
    accuracy = price_data['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo: {accuracy:.2%}')



Precisão para Alpha: 0.1
Precisão do modelo: 54.94%

Precisão para Alpha: 0.2
Precisão do modelo: 56.81%

Precisão para Alpha: 0.3
Precisão do modelo: 59.68%

Precisão para Alpha: 0.4
Precisão do modelo: 62.14%

Precisão para Alpha: 0.5
Precisão do modelo: 63.79%

Precisão para Alpha: 0.6
Precisão do modelo: 66.03%

Precisão para Alpha: 0.7
Precisão do modelo: 69.83%

Precisão para Alpha: 0.8
Precisão do modelo: 73.57%

Precisão para Alpha: 0.9
Precisão do modelo: 92.30%


In [None]:
import warnings
from datetime import timedelta

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Filtrar últimos 30 dias
last_30_days = price_data[price_data['Date'] >= price_data['Date'].max() - timedelta(days=30)]

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = last_30_days[['RSI',
                           f'k_percent_{alpha}',
                           f'r_percent_{alpha}',
                           f'Price_Rate_Of_Change_{alpha}',
                           f'MACD_{alpha}',
                           f'MACD_EMA_{alpha}',
                           f'OBV_{alpha}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}' para os últimos 30 dias
    last_30_days[f'Prediction_{alpha}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    last_30_days['Match'] = (last_30_days[f'Prediction_{alpha}'] == last_30_days['Verify']).astype(int)  # 1 para igual, 0 para diferente
    accuracy = last_30_days['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo nos últimos 30 dias: {accuracy:.2%}')


In [6]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X) e o target (Y) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha}',
                    f'r_percent_{alpha}',
                    f'Price_Rate_Of_Change_{alpha}',
                    f'MACD_{alpha}',
                    f'MACD_EMA_{alpha}',
                    f'OBV_{alpha}']]
    y = price_data[f'Prediction_{alpha}']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y], axis=1).dropna()
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0,oob_score = True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Calcular e imprimir a acurácia para cada divisão
        accuracy = accuracy_score(y_test, y_pred) * 100.0
        print(f'Fold {fold}: Correct Prediction (%): {accuracy:.2f}')
        fold += 1

    print('---')


Alpha: 0.1
Fold 1: Correct Prediction (%): 56.53
Fold 2: Correct Prediction (%): 39.79
Fold 3: Correct Prediction (%): 60.95
Fold 4: Correct Prediction (%): 60.42
Fold 5: Correct Prediction (%): 54.11
---
Alpha: 0.2
Fold 1: Correct Prediction (%): 59.79
Fold 2: Correct Prediction (%): 36.84
Fold 3: Correct Prediction (%): 61.79
Fold 4: Correct Prediction (%): 59.47
Fold 5: Correct Prediction (%): 49.26
---
Alpha: 0.3
Fold 1: Correct Prediction (%): 45.05
Fold 2: Correct Prediction (%): 36.32
Fold 3: Correct Prediction (%): 59.58
Fold 4: Correct Prediction (%): 53.26
Fold 5: Correct Prediction (%): 45.79
---
Alpha: 0.4
Fold 1: Correct Prediction (%): 37.26
Fold 2: Correct Prediction (%): 33.16
Fold 3: Correct Prediction (%): 62.21
Fold 4: Correct Prediction (%): 49.58
Fold 5: Correct Prediction (%): 44.11
---
Alpha: 0.5
Fold 1: Correct Prediction (%): 54.53
Fold 2: Correct Prediction (%): 33.79
Fold 3: Correct Prediction (%): 62.84
Fold 4: Correct Prediction (%): 51.16
Fold 5: Correct P

#### Salvar modelo 

In [8]:
# Salvar o modelo treinado
joblib.dump(rand_frst_clf, 'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl')

['C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl']

#### Time Series. Não funcionou.

In [None]:
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestClassifier

# Número de divisões no Time Series Split
n_splits = 5

# Resultados gerais
overall_results = []

for alpha in alpha_list:
    print(f"\n=== Alpha: {alpha} ===")
    
    # Preparando as colunas de treino
    X_Cols = price_data[[f'RSI',
                         f'k_percent_{alpha}',
                         f'r_percent_{alpha}',
                         f'Price_Rate_Of_Change_{alpha}',
                         f'MACD_{alpha}',
                         f'MACD_EMA_{alpha}',
                         f'OBV_{alpha}']]
    Y_Cols = price_data[f'Prediction_{alpha}']
    verify = price_data['Verify']
    
    # Criando o modelo Random Forest
    rand_frst_clf = RandomForestClassifier(n_estimators=100, oob_score=True, criterion="gini", random_state=0)
    
    # Time Series Split
    tscv = TimeSeriesSplit(n_splits=n_splits)
    
    # Inicializando variáveis para armazenar resultados
    fold = 1
    accuracy_scores_ytest = []
    accuracy_scores_verify = []
    predictions = []
    verify_values = []
    actual_y_test_values = []
    report_dict = {}
    
    for train_index, test_index in tscv.split(X_Cols):
        print(f"  Fold {fold}:")
        
        # Dividindo os dados em treino e teste para a divisão atual
        X_train, X_test = X_Cols.iloc[train_index], X_Cols.iloc[test_index]
        y_train, y_test = Y_Cols.iloc[train_index], Y_Cols.iloc[test_index]
        y_verify = verify.iloc[test_index]
        
        # Treinando o modelo
        rand_frst_clf.fit(X_train, y_train)
        
        # Fazendo predições
        y_pred = rand_frst_clf.predict(X_test)
        
        # Armazenando predições e valores reais
        predictions.extend(y_pred)
        verify_values.extend(y_verify.values)
        actual_y_test_values.extend(y_test.values)
        
        # Calculando acurácias
        accuracy_ytest = accuracy_score(y_test, y_pred)  # Comparação com Y_Cols (Prediction)
        accuracy_verify = accuracy_score(y_verify, y_pred)  # Comparação com Verify
        accuracy_scores_ytest.append(accuracy_ytest)
        accuracy_scores_verify.append(accuracy_verify)
        
        # Exibindo acurácias para o fold atual
        print(f"    Accuracy with Y_test (Prediction): {accuracy_ytest:.2f}")
        print(f"    Accuracy with Verify: {accuracy_verify:.2f}")
        
        # Gerando o relatório de classificação
        target_names = ['Down Day', 'Up Day']  # Ajuste se necessário
        report = classification_report(y_true=y_test, y_pred=y_pred, target_names=target_names, output_dict=True)
        
        # Convertendo o relatório em DataFrame
        report_flat = pd.DataFrame(report).transpose()
        report_flat['accuracy_ytest'] = accuracy_ytest
        report_flat['accuracy_verify'] = accuracy_verify
        
        # Armazenando o relatório no dicionário
        report_dict[fold] = report_flat
        fold += 1
    
    # Concatenando os relatórios de todas as divisões
    final_report_df = pd.concat(report_dict.values(), keys=report_dict.keys())
    
    # Calculando as médias de acurácia
    overall_accuracy_ytest = sum(accuracy_scores_ytest) / len(accuracy_scores_ytest)
    overall_accuracy_verify = sum(accuracy_scores_verify) / len(accuracy_scores_verify)
    
    # Exibindo as médias de acurácia
    print("\n  Médias das Acurácias:")
    print(f"    Média da acurácia com Y_test (Prediction): {overall_accuracy_ytest:.2f}")
    print(f"    Média da acurácia com Verify: {overall_accuracy_verify:.2f}")
    
    # Armazenando resultados gerais
    overall_results.append({
        'alpha': alpha,
        'accuracy_ytest_mean': overall_accuracy_ytest,
        'accuracy_verify_mean': overall_accuracy_verify,
        'final_report': final_report_df
    })

# Exibindo resumo final
print("\n=== Resumo Final ===")
for result in overall_results:
    print(f"Alpha: {result['alpha']}")
    print(f"  Média da acurácia com Y_test (Prediction): {result['accuracy_ytest_mean']:.2f}")
    print(f"  Média da acurácia com Verify: {result['accuracy_verify_mean']:.2f}")
    print("  ---")
