In [3]:
import pandas as pd
import numpy as np

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import accuracy_score, classification_report
pd.set_option('display.max_columns',None)
import joblib

In [4]:
price_data = pd.read_parquet(f'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Dados_bolsa_interpolar.parquet')
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.291125,985,ABEV3.SA
1,2000-01-06,0.494478,0.494478,0.494478,0.494478,0.276367,227,ABEV3.SA
2,2000-01-12,0.481293,0.481293,0.481293,0.481293,0.268998,1137,ABEV3.SA
3,2000-01-13,0.484589,0.484589,0.484589,0.484589,0.27084,606,ABEV3.SA
4,2000-01-14,0.494478,0.494478,0.494478,0.494478,0.276367,6445,ABEV3.SA


In [5]:
import numpy as np
import pandas as pd
import warnings

# Configuração inicial
price_data['Date'] = pd.to_datetime(price_data['Date'])
price_data.sort_values(by=['Ticker', 'Date'], inplace=True)

# Cálculo da mudança no preço
price_data['change_in_price'] = price_data['Close'].diff()
mask = price_data['Ticker'] != price_data['Ticker'].shift(1)
price_data['change_in_price'] = np.where(mask, np.nan, price_data['change_in_price'])
price_data[price_data.isna().any(axis = 1)]


# Função de suavização exponencial
def exponential_smoothing(data, alpha):
    smoothed = np.zeros(len(data))
    smoothed[0] = data[0]
    for t in range(1, len(data)):
        smoothed[t] = alpha * data[t] + (1 - alpha) * smoothed[t-1]
    return smoothed

# Função para calcular o target
def calculate_target(data, d):
    return np.sign(data.shift(-d) - data)

# Função para calcular o target e ajustar valores zero para -1
def calculate_target_verify(data, d):
    target = np.sign(data.shift(-d) - data)
    target[target == 0] = -1
    return target

# Função OBV ajustada
def obv(group, smoothed_col):
    Volume = group['Volume']
    change = group[smoothed_col].diff()
    prev_obv = 0
    obv_values = []
    for i, j in zip(change, Volume):
        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv
        prev_obv = current_obv
        obv_values.append(current_obv)
    return pd.Series(obv_values, index=group.index)

# Lista de alphas e parâmetros gerais
alpha_list = [0.05, 0.08,0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90]
n = 14
d = 60
t = 14
e = 14

# Loop para cálculos por alpha
for alpha in alpha_list:
    smoothed_col = f'Smoothed_Close_{alpha:.2f}'
    target_col = f'Prediction_{alpha:.2f}'
    obv_col = f'OBV_{alpha:.2f}'
    price_rate_col = f'Price_Rate_Of_Change_{alpha:.2f}'
    macd_col = f'MACD_{alpha:.2f}'
    macd_ema_col = f'MACD_EMA_{alpha:.2f}'
    low_col = f'low_{t}_{alpha:.2f}'
    high_col = f'high_{t}_{alpha:.2f}'
    k_percent_col = f'k_percent_{alpha:.2f}'
    r_percent_col = f'r_percent_{alpha:.2f}'
    
    # Suavização exponencial
    price_data[smoothed_col] = price_data.groupby('Ticker')['Close'].transform(
        lambda x: exponential_smoothing(x.values, alpha)
    )
    
    # Target
    price_data[target_col] = price_data.groupby('Ticker')[smoothed_col].transform(
        lambda x: calculate_target(x, d)
    )
    
    # OBV
    obv_groups = price_data.groupby('Ticker').apply(lambda group: obv(group, smoothed_col))
    price_data[obv_col] = obv_groups.reset_index(level=0, drop=True)
    
    # Price Rate of Change
    price_data[price_rate_col] = price_data.groupby('Ticker')[smoothed_col].transform(
        lambda x: x.pct_change(periods=e)
    )
    
    
    # %R
    low_t = price_data[['Ticker', 'Low']].copy()
    high_t = price_data[['Ticker', 'High']].copy()
    low_t = low_t.groupby('Ticker')['Low'].transform(lambda x: x.rolling(window=t).min())
    high_t = high_t.groupby('Ticker')['High'].transform(lambda x: x.rolling(window=t).max())
    r_percent = ((high_t - price_data[smoothed_col]) / (high_t - low_t)) * (-100)
    price_data[low_col] = low_t
    price_data[high_col] = high_t
    price_data[r_percent_col] = r_percent

    # %K    
    k_percent = ((price_data[smoothed_col] - low_t) / (high_t - low_t)) * 100
    price_data[k_percent_col] = k_percent
    
    # MACD
    ema_26 = price_data.groupby('Ticker')[smoothed_col].transform(lambda x: x.ewm(span=26).mean())
    ema_12 = price_data.groupby('Ticker')[smoothed_col].transform(lambda x: x.ewm(span=12).mean())
    macd = ema_12 - ema_26
    ema_9_macd = macd.ewm(span=9).mean()
    price_data[macd_col] = macd
    price_data[macd_ema_col] = ema_9_macd

    # Calculando o Price Rate of Change (ROC) usando a coluna suavizada correspondente
    price_data[price_rate_col] = price_data.groupby('Ticker')[smoothed_col].transform(lambda x: x.pct_change(periods=e))


# Dias de alta e dias de baixa
up_df, down_df = price_data[['Ticker','change_in_price']].copy(), price_data[['Ticker','change_in_price']].copy()

# Salvando a diferença para dias de alta.
up_df.loc['change_in_price'] = up_df.loc[(up_df['change_in_price'] < 0), 'change_in_price'] = 0

# Salvando a diferença para dias de baixa
down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price'] > 0), 'change_in_price'] = 0

# Colocando as diferenças do dia de baixa em termos absolutos
down_df['change_in_price'] = down_df['change_in_price'].abs()


ewma_up = up_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
ewma_down = down_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())

relative_strength = ewma_up / ewma_down

# Calculando o indicador
relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength))

price_data['down_days'] = down_df['change_in_price']
price_data['up_days'] = up_df['change_in_price']
price_data['RSI'] = relative_strength_index

# Aplicando a suavização exponencial
alpha_verify = 1
price_data['Smoothed_Close_1'] = price_data.groupby('Ticker')['Close'].transform(
    lambda x: exponential_smoothing(x.values, alpha_verify)
)


price_data['Verify'] = price_data.groupby('Ticker')['Smoothed_Close_1'].transform(
    lambda x: calculate_target_verify(x, d)
)

# Aplicando o deslocamento de 10 linhas dentro de cada grupo de 'Ticker'
price_data['Close_10'] = price_data.groupby('Ticker')['Close'].shift(-d)

# Drop linhas com valores ausentes
price_data = price_data.dropna()
# Desativa warnings
warnings.filterwarnings("ignore")
# Visualização final
price_data.tail()




  price_data[high_col] = high_t
  price_data[r_percent_col] = r_percent
  price_data[k_percent_col] = k_percent
  price_data[macd_col] = macd
  price_data[macd_ema_col] = ema_9_macd
  price_data[smoothed_col] = price_data.groupby('Ticker')['Close'].transform(
  price_data[target_col] = price_data.groupby('Ticker')[smoothed_col].transform(
  price_data[obv_col] = obv_groups.reset_index(level=0, drop=True)
  price_data[price_rate_col] = price_data.groupby('Ticker')[smoothed_col].transform(
  price_data[low_col] = low_t
  price_data[high_col] = high_t
  price_data[r_percent_col] = r_percent
  price_data[k_percent_col] = k_percent
  price_data[macd_col] = macd
  price_data[macd_ema_col] = ema_9_macd
  price_data['down_days'] = down_df['change_in_price']
  price_data['up_days'] = up_df['change_in_price']
  price_data['RSI'] = relative_strength_index
  price_data['Smoothed_Close_1'] = price_data.groupby('Ticker')['Close'].transform(
  price_data['Verify'] = price_data.groupby('Ticker')['Smoo

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.05,Prediction_0.05,OBV_0.05,Price_Rate_Of_Change_0.05,low_14_0.05,high_14_0.05,r_percent_0.05,k_percent_0.05,MACD_0.05,MACD_EMA_0.05,Smoothed_Close_0.08,Prediction_0.08,OBV_0.08,Price_Rate_Of_Change_0.08,low_14_0.08,high_14_0.08,r_percent_0.08,k_percent_0.08,MACD_0.08,MACD_EMA_0.08,Smoothed_Close_0.10,Prediction_0.10,OBV_0.10,Price_Rate_Of_Change_0.10,low_14_0.10,high_14_0.10,r_percent_0.10,k_percent_0.10,MACD_0.10,MACD_EMA_0.10,Smoothed_Close_0.20,Prediction_0.20,OBV_0.20,Price_Rate_Of_Change_0.20,low_14_0.20,high_14_0.20,r_percent_0.20,k_percent_0.20,MACD_0.20,MACD_EMA_0.20,Smoothed_Close_0.30,Prediction_0.30,OBV_0.30,Price_Rate_Of_Change_0.30,low_14_0.30,high_14_0.30,r_percent_0.30,k_percent_0.30,MACD_0.30,MACD_EMA_0.30,Smoothed_Close_0.40,Prediction_0.40,OBV_0.40,Price_Rate_Of_Change_0.40,low_14_0.40,high_14_0.40,r_percent_0.40,k_percent_0.40,MACD_0.40,MACD_EMA_0.40,Smoothed_Close_0.50,Prediction_0.50,OBV_0.50,Price_Rate_Of_Change_0.50,low_14_0.50,high_14_0.50,r_percent_0.50,k_percent_0.50,MACD_0.50,MACD_EMA_0.50,Smoothed_Close_0.60,Prediction_0.60,OBV_0.60,Price_Rate_Of_Change_0.60,low_14_0.60,high_14_0.60,r_percent_0.60,k_percent_0.60,MACD_0.60,MACD_EMA_0.60,Smoothed_Close_0.70,Prediction_0.70,OBV_0.70,Price_Rate_Of_Change_0.70,low_14_0.70,high_14_0.70,r_percent_0.70,k_percent_0.70,MACD_0.70,MACD_EMA_0.70,Smoothed_Close_0.80,Prediction_0.80,OBV_0.80,Price_Rate_Of_Change_0.80,low_14_0.80,high_14_0.80,r_percent_0.80,k_percent_0.80,MACD_0.80,MACD_EMA_0.80,Smoothed_Close_0.90,Prediction_0.90,OBV_0.90,Price_Rate_Of_Change_0.90,low_14_0.90,high_14_0.90,r_percent_0.90,k_percent_0.90,MACD_0.90,MACD_EMA_0.90,down_days,up_days,RSI,Smoothed_Close_1,Verify,Close_10
11719,2024-10-21,61.349998,61.380001,60.27,60.330002,59.796291,19044900,VALE3.SA,-0.219997,60.598574,-1.0,4210887480,0.01825,60.25,65.349998,-93.165214,6.834786,0.391453,0.312635,61.018168,-1.0,4463534164,0.019125,60.25,65.349998,-84.937872,15.062128,0.609507,0.568167,61.186569,-1.0,3025218948,0.016002,60.25,65.349998,-81.635901,18.364099,0.680267,0.674864,61.210399,-1.0,2525491470,-0.0087,60.25,65.349998,-81.168639,18.831361,0.68702,0.841824,60.967811,-1.0,578852862,-0.027548,60.25,65.349998,-85.925279,14.074721,0.587673,0.826106,60.779218,-1.0,1048074666,-0.038932,60.25,65.349998,-89.623175,10.376825,0.513272,0.791269,60.637876,-1.0,922625226,-0.045603,60.25,65.349998,-92.394595,7.605405,0.462292,0.761929,60.529972,-1.0,533187538,-0.049456,60.25,65.349998,-94.510345,5.489655,0.425407,0.739343,60.449879,-1.0,652686242,-0.051665,60.25,65.349998,-96.080803,3.919197,0.397302,0.721786,60.393414,-1.0,-948125142,-0.052983,60.25,65.349998,-97.187969,2.812031,0.375274,0.707803,60.355572,-1.0,-752530438,-0.05389,60.25,65.349998,-97.929955,2.070045,0.357783,0.696427,0.219997,0.0,39.561996,60.330002,-1.0,54.02
11720,2024-10-22,60.150002,60.66,59.900002,60.41,59.87558,14814000,VALE3.SA,0.079998,60.589145,-1.0,4196073480,0.014123,59.900002,63.799999,-82.329637,17.670363,0.384274,0.326963,60.969515,-1.0,4448720164,0.012499,59.900002,63.799999,-72.576566,27.423434,0.580576,0.570649,61.108912,-1.0,3010404948,0.008105,59.900002,63.799999,-69.00228,30.99772,0.637027,0.667297,61.050319,-1.0,2510677470,-0.01899,59.900002,63.799999,-70.504656,29.495344,0.599191,0.793297,60.800467,-1.0,564038862,-0.037014,59.900002,63.799999,-76.911119,23.088881,0.485733,0.758031,60.631531,-1.0,1033260666,-0.046866,59.900002,63.799999,-81.242831,18.757169,0.407112,0.714438,60.523938,-1.0,907811226,-0.052131,59.900002,63.799999,-84.001627,15.998373,0.355082,0.68056,60.457989,-1.0,518373538,-0.054956,59.900002,63.799999,-85.692625,14.307375,0.318886,0.655252,60.421964,-1.0,637872242,-0.056525,59.900002,63.799999,-86.616349,13.383651,0.292701,0.635969,60.406683,-1.0,-933311142,-0.057478,59.900002,63.799999,-87.00817,12.99183,0.273353,0.620913,60.404557,-1.0,-737716438,-0.058126,59.900002,63.799999,-87.06267,12.93733,0.258863,0.608914,0.0,0.079998,40.59142,60.41,-1.0,52.66
11721,2024-10-23,59.889999,59.900002,59.150002,59.349998,58.824959,18289900,VALE3.SA,-1.060001,60.527188,-1.0,4177783580,0.010385,59.150002,63.740002,-69.99594,30.00406,0.369328,0.335436,60.839954,-1.0,4430430264,0.006705,59.150002,63.740002,-63.181875,36.818125,0.540958,0.56471,60.933021,-1.0,2992115048,0.001367,59.150002,63.740002,-61.154271,38.845729,0.58186,0.650209,60.710255,-1.0,2492387570,-0.026669,59.150002,63.740002,-66.007548,33.992452,0.496422,0.733922,60.365327,-1.0,545748962,-0.043008,59.150002,63.740002,-73.522329,26.477671,0.365618,0.679549,60.118918,-1.0,1014970766,-0.050909,59.150002,63.740002,-78.890714,21.109286,0.278407,0.627232,59.936968,-1.0,889521326,-0.054567,59.150002,63.740002,-82.85476,17.14524,0.220216,0.588491,59.793195,-1.0,500083638,-0.056197,59.150002,63.740002,-85.987079,14.012921,0.178764,0.559954,59.671588,-1.0,619582342,-0.056883,59.150002,63.740002,-88.63646,11.36354,0.147555,0.538286,59.561335,-1.0,-951601042,-0.057128,59.150002,63.740002,-91.03848,8.96152,0.12295,0.52132,59.455454,-1.0,-756006338,-0.057149,59.150002,63.740002,-93.345255,6.654745,0.1027,0.507671,1.060001,0.0,32.204911,59.349998,-1.0,52.32
11722,2024-10-24,59.34,59.700001,59.209999,59.700001,59.171864,11331700,VALE3.SA,0.350002,60.485829,-1.0,4166451880,0.007529,59.150002,63.740002,-70.897015,29.102985,0.350109,0.33837,60.748757,-1.0,4419098564,0.002482,59.150002,63.740002,-65.168721,34.831279,0.496478,0.551064,60.809719,-1.0,2980783348,-0.003329,59.150002,63.740002,-63.840588,36.159412,0.52217,0.624601,60.508204,-1.0,2481055870,-0.030239,59.150002,63.740002,-70.409528,29.590472,0.394131,0.665964,60.165729,-1.0,534417262,-0.043451,59.150002,63.740002,-77.870864,22.129136,0.251422,0.593923,59.951351,-1.0,1003639066,-0.048364,59.150002,63.740002,-82.541407,17.458593,0.161029,0.533991,59.818484,-1.0,878189626,-0.049567,59.150002,63.740002,-85.436103,14.563897,0.10259,0.491311,59.737278,-1.0,488751938,-0.049214,59.150002,63.740002,-87.205299,12.794701,0.062484,0.46046,59.691477,-1.0,630914042,-0.048244,59.150002,63.740002,-88.20315,11.79685,0.033741,0.437377,59.672268,-1.0,-940269342,-0.047043,59.150002,63.740002,-88.621653,11.378347,0.012561,0.419568,59.675546,-1.0,-744674638,-0.04577,59.150002,63.740002,-88.550227,11.449773,-0.003263,0.405485,0.0,0.350002,37.152011,59.700001,-1.0,53.029999
11723,2024-10-25,60.57,62.470001,60.52,61.73,61.183903,38466000,VALE3.SA,2.029999,60.548037,-1.0,4204917880,0.006055,59.150002,62.52,-58.515251,41.484749,0.336025,0.337901,60.827257,-1.0,4457564564,0.000566,59.150002,62.52,-50.229802,49.770198,0.462234,0.533298,60.901747,-1.0,3019249348,-0.00511,59.150002,62.52,-48.019415,51.980585,0.476795,0.59504,60.752563,-1.0,2519521870,-0.0283,59.150002,62.52,-52.446223,47.553777,0.328989,0.598569,60.63501,-1.0,572883262,-0.036593,59.150002,62.52,-55.93445,44.06555,0.196522,0.514443,60.66281,-1.0,1042105066,-0.037265,59.150002,62.52,-55.109514,44.890486,0.123987,0.45199,60.774242,-1.0,916655626,-0.035086,59.150002,62.52,-51.802938,48.197062,0.085506,0.41015,60.932911,-1.0,527217938,-0.032039,59.150002,62.52,-47.094656,52.905344,0.066047,0.381577,61.118443,-1.0,669380042,-0.028882,59.150002,62.52,-41.589262,58.410738,0.058018,0.361505,61.318453,-1.0,-901803342,-0.025881,59.150002,62.52,-35.654234,64.345766,0.05725,0.347105,61.524554,-1.0,-706208638,-0.023122,59.150002,62.52,-29.538474,70.461526,0.061254,0.336638,0.0,2.029999,57.773192,61.73,-1.0,52.91


#### Se quiser rodar para um Ticker.

In [6]:
price_data = price_data[price_data['Ticker'].isin(['ABEV3.SA'])]
price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.05,Prediction_0.05,OBV_0.05,Price_Rate_Of_Change_0.05,low_14_0.05,high_14_0.05,r_percent_0.05,k_percent_0.05,MACD_0.05,MACD_EMA_0.05,Smoothed_Close_0.08,Prediction_0.08,OBV_0.08,Price_Rate_Of_Change_0.08,low_14_0.08,high_14_0.08,r_percent_0.08,k_percent_0.08,MACD_0.08,MACD_EMA_0.08,Smoothed_Close_0.10,Prediction_0.10,OBV_0.10,Price_Rate_Of_Change_0.10,low_14_0.10,high_14_0.10,r_percent_0.10,k_percent_0.10,MACD_0.10,MACD_EMA_0.10,Smoothed_Close_0.20,Prediction_0.20,OBV_0.20,Price_Rate_Of_Change_0.20,low_14_0.20,high_14_0.20,r_percent_0.20,k_percent_0.20,MACD_0.20,MACD_EMA_0.20,Smoothed_Close_0.30,Prediction_0.30,OBV_0.30,Price_Rate_Of_Change_0.30,low_14_0.30,high_14_0.30,r_percent_0.30,k_percent_0.30,MACD_0.30,MACD_EMA_0.30,Smoothed_Close_0.40,Prediction_0.40,OBV_0.40,Price_Rate_Of_Change_0.40,low_14_0.40,high_14_0.40,r_percent_0.40,k_percent_0.40,MACD_0.40,MACD_EMA_0.40,Smoothed_Close_0.50,Prediction_0.50,OBV_0.50,Price_Rate_Of_Change_0.50,low_14_0.50,high_14_0.50,r_percent_0.50,k_percent_0.50,MACD_0.50,MACD_EMA_0.50,Smoothed_Close_0.60,Prediction_0.60,OBV_0.60,Price_Rate_Of_Change_0.60,low_14_0.60,high_14_0.60,r_percent_0.60,k_percent_0.60,MACD_0.60,MACD_EMA_0.60,Smoothed_Close_0.70,Prediction_0.70,OBV_0.70,Price_Rate_Of_Change_0.70,low_14_0.70,high_14_0.70,r_percent_0.70,k_percent_0.70,MACD_0.70,MACD_EMA_0.70,Smoothed_Close_0.80,Prediction_0.80,OBV_0.80,Price_Rate_Of_Change_0.80,low_14_0.80,high_14_0.80,r_percent_0.80,k_percent_0.80,MACD_0.80,MACD_EMA_0.80,Smoothed_Close_0.90,Prediction_0.90,OBV_0.90,Price_Rate_Of_Change_0.90,low_14_0.90,high_14_0.90,r_percent_0.90,k_percent_0.90,MACD_0.90,MACD_EMA_0.90,down_days,up_days,RSI,Smoothed_Close_1,Verify,Close_10
5686,2024-10-17,12.78,12.96,12.75,12.86,12.613667,18266900,ABEV3.SA,-0.08,12.895616,-1.0,-2239069075,0.018963,12.72,13.85,-84.458751,15.541249,0.144057,0.154677,13.016697,-1.0,-2644211417,0.016107,12.72,13.85,-73.743682,26.256318,0.137883,0.155829,13.042728,-1.0,-2318440493,0.014899,12.72,13.85,-71.439993,28.560007,0.129701,0.150074,13.033806,-1.0,-1995026547,0.013629,12.72,13.85,-72.229588,27.770412,0.104235,0.131437,12.986868,-1.0,-2308176873,0.012777,12.72,13.85,-76.383351,23.616649,0.089484,0.124887,12.947368,-1.0,-1604317523,0.012166,12.72,13.85,-79.878986,20.121014,0.077601,0.120215,12.91981,-1.0,-1505337305,0.012174,12.72,13.85,-82.317765,17.682235,0.068414,0.116176,12.900865,-1.0,-1074592587,0.012606,12.72,13.85,-83.994256,16.005744,0.061542,0.112774,12.887189,-1.0,-1303409175,0.013195,12.72,13.85,-85.204558,14.795442,0.056379,0.109978,12.876679,-1.0,-1300078835,0.013738,12.72,13.85,-86.134603,13.865397,0.052412,0.10769,12.867987,-1.0,-1235127973,0.014103,12.72,13.85,-86.903852,13.096148,0.049282,0.105805,0.08,0.0,36.610573,12.86,-1.0,11.25
5687,2024-10-18,12.96,12.96,12.8,12.88,12.633285,19422900,ABEV3.SA,0.02,12.894836,-1.0,-2258491975,0.01816,12.75,13.85,-86.833133,13.166867,0.138341,0.151409,13.005761,-1.0,-2663634317,0.015065,12.75,13.85,-76.749011,23.250989,0.12923,0.150509,13.026456,-1.0,-2337863393,0.013722,12.75,13.85,-74.867686,25.132314,0.119703,0.144,13.003045,-1.0,-2014449447,0.011529,12.75,13.85,-76.995937,23.004063,0.089332,0.123016,12.954808,-1.0,-2327599773,0.009876,12.75,13.85,-81.381103,18.618897,0.071555,0.114221,12.920421,-1.0,-1623740423,0.008538,12.75,13.85,-84.507215,15.492785,0.05845,0.107862,12.899905,-1.0,-1524760205,0.007631,12.75,13.85,-86.372293,13.627707,0.049068,0.102754,12.888346,-1.0,-1094015487,0.006894,12.75,13.85,-87.423082,12.576918,0.04245,0.098709,12.882157,-1.0,-1322832075,0.006119,12.75,13.85,-87.985758,12.014242,0.037719,0.095526,12.879336,-1.0,-1280655935,0.005222,12.75,13.85,-88.242191,11.757809,0.03426,0.093004,12.878799,-1.0,-1215705073,0.004202,12.75,13.85,-88.291024,11.708976,0.031678,0.09098,0.0,0.02,38.324338,12.88,-1.0,11.02
5688,2024-10-21,12.88,12.94,12.81,12.83,12.584243,28877400,ABEV3.SA,-0.05,12.891594,-1.0,-2287369375,0.01688,12.75,13.85,-87.127841,12.872159,0.132026,0.147533,12.9917,-1.0,-2692511717,0.013289,12.75,13.85,-78.027273,21.972727,0.119856,0.144378,13.00681,-1.0,-2366740793,0.011643,12.75,13.85,-76.653646,23.346354,0.108939,0.136988,12.968436,-1.0,-2043326847,0.007815,12.75,13.85,-80.142206,19.857794,0.073878,0.113189,12.917366,-1.0,-2356477173,0.004798,12.75,13.85,-84.784957,15.215043,0.053706,0.102118,12.884252,-1.0,-1652617823,0.002305,12.75,13.85,-87.795242,12.204758,0.039893,0.094268,12.864952,-1.0,-1553637605,0.000299,12.75,13.85,-89.549788,10.450212,0.030563,0.088316,12.853338,-1.0,-1122892887,-0.001453,12.75,13.85,-90.605602,9.394398,0.024215,0.08381,12.845647,-1.0,-1351709475,-0.003065,12.75,13.85,-91.304825,8.695175,0.019757,0.080372,12.839867,-1.0,-1309533335,-0.004545,12.75,13.85,-91.830264,8.169736,0.016499,0.077703,12.83488,-1.0,-1244582473,-0.005857,12.75,13.85,-92.283656,7.716344,0.014021,0.075588,0.05,0.0,35.551811,12.83,-1.0,11.05
5689,2024-10-22,12.77,12.9,12.73,12.82,12.574433,34756100,ABEV3.SA,-0.01,12.888014,-1.0,-2322125475,0.014986,12.73,13.85,-85.891568,14.108432,0.125289,0.143084,12.977964,-1.0,-2727267817,0.010587,12.73,13.85,-77.860328,22.139672,0.110051,0.137513,12.988129,-1.0,-2401496893,0.008442,12.73,13.85,-76.952748,23.047252,0.097774,0.129145,12.938749,-1.0,-2078082947,0.002204,12.73,13.85,-81.361706,18.638294,0.058559,0.102263,12.888156,-1.0,-2391233273,-0.002695,12.73,13.85,-85.878918,14.121082,0.03678,0.08905,12.858551,-1.0,-1687373923,-0.006661,12.73,13.85,-88.522172,11.477828,0.02285,0.079985,12.842476,-1.0,-1588393705,-0.009874,12.73,13.85,-89.957463,10.042537,0.013924,0.073438,12.833335,-1.0,-1157648987,-0.012579,12.73,13.85,-90.77361,9.22639,0.008057,0.06866,12.827694,-1.0,-1386465575,-0.014887,12.73,13.85,-91.277298,8.722702,0.004027,0.065103,12.823973,-1.0,-1344289435,-0.016845,12.73,13.85,-91.609503,8.390497,0.001128,0.062388,12.821488,-1.0,-1279338573,-0.018492,12.73,13.85,-91.83142,8.16858,-0.001041,0.060262,0.01,0.0,34.968018,12.82,-1.0,10.87
5690,2024-10-24,12.72,12.89,12.68,12.83,12.584243,19295300,ABEV3.SA,0.01,12.885113,-1.0,-2341420775,0.011165,12.68,13.85,-82.468969,17.531031,0.118352,0.138138,12.966127,-1.0,-2746563117,0.00492,12.68,13.85,-75.544735,24.455265,0.10017,0.130044,12.972316,-1.0,-2420792193,0.00161,12.68,13.85,-75.015749,24.984251,0.08665,0.120646,12.916999,-1.0,-2097378247,-0.010058,12.68,13.85,-79.743715,20.256285,0.044155,0.090641,12.870709,-1.0,-2410528573,-0.019456,12.68,13.85,-83.700111,16.299889,0.021708,0.075582,12.847131,-1.0,-1706669223,-0.027239,12.68,13.85,-85.715347,14.284653,0.008325,0.065653,12.836238,-1.0,-1607689005,-0.0338,12.68,13.85,-86.646353,13.353647,0.000231,0.058796,12.831334,-1.0,-1176944287,-0.039479,12.68,13.85,-87.065495,12.934505,-0.004854,0.053957,12.829308,-1.0,-1367170275,-0.044476,12.68,13.85,-87.23865,12.76135,-0.008215,0.05044,12.828795,-1.0,-1324994135,-0.048926,12.68,13.85,-87.282542,12.717458,-0.010543,0.047802,12.829149,-1.0,-1260043273,-0.052941,12.68,13.85,-87.252275,12.747725,-0.012219,0.045766,0.0,0.01,36.177279,12.83,-1.0,10.91


In [2]:
price_data = price_data[price_data['Ticker'].isin(['ITUB4.SA'])]
price_data.tail()

NameError: name 'price_data' is not defined

In [None]:
price_data = price_data[price_data['Ticker'].isin(['MGLU3.SA'])]
price_data.tail()

In [None]:
price_data = price_data[price_data['Ticker'].isin(['PETR4.SA'])]
price_data.tail()

In [None]:
price_data = price_data[price_data['Ticker'].isin(['VALE3.SA'])]
price_data.tail()

#### Se quiser normal só pular etapa anterior.

In [8]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Para cada alpha, criar as variáveis X_Cols e Y_Cols e treinar o modelo
for alpha in alpha_list:
    # Definindo as colunas de X e Y dinamicamente
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha:.2f}',
                         f'r_percent_{alpha:.2f}',
                         f'Price_Rate_Of_Change_{alpha:.2f}',
                         f'MACD_{alpha:.2f}',
                         f'MACD_EMA_{alpha:.2f}',
                         f'OBV_{alpha:.2f}']]
    
    Y_Cols = price_data[f'Prediction_{alpha:.2f}']

    # Dividir os dados em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state=0)

    # Criar o classificador Random Forest
    rand_frst_clf = RandomForestClassifier(n_estimators=100, oob_score=True, criterion="gini", random_state=0)

    # Ajustar o modelo
    rand_frst_clf.fit(X_train, y_train)

    # Fazer previsões
    y_pred = rand_frst_clf.predict(X_test)

    # Imprimir a acurácia
    accuracy = accuracy_score(y_test, y_pred, normalize=True) * 100.0
    print(f'Alpha: {alpha}')
    print(f'Correct Prediction (%): {accuracy:.5f}')
    print('---')


Alpha: 0.1
Correct Prediction (%): 88.19396
---
Alpha: 0.2
Correct Prediction (%): 84.11806
---
Alpha: 0.3
Correct Prediction (%): 84.04779
---
Alpha: 0.4
Correct Prediction (%): 83.69642
---
Alpha: 0.5
Correct Prediction (%): 81.09628
---
Alpha: 0.6
Correct Prediction (%): 80.81518
---
Alpha: 0.7
Correct Prediction (%): 81.09628
---
Alpha: 0.8
Correct Prediction (%): 79.05833
---
Alpha: 0.9
Correct Prediction (%): 78.77723
---


#### Últimos 30 dias

In [26]:
import warnings
from datetime import timedelta

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Filtrar últimos 30 dias
last_30_days = price_data[price_data['Date'] >= price_data['Date'].max() - timedelta(days=30)]

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = last_30_days[['RSI',
                           f'k_percent_{alpha:.2f}',
                           f'r_percent_{alpha:.2f}',
                           f'Price_Rate_Of_Change_{alpha:.2f}',
                           f'MACD_{alpha:.2f}',
                           f'MACD_EMA_{alpha:.2f}',
                           f'OBV_{alpha:.2f}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}' para os últimos 30 dias
    last_30_days[f'Prediction_{alpha:.2f}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    last_30_days['Match'] = (last_30_days[f'Prediction_{alpha:.2f}'] == last_30_days['Verify']).astype(int)  # 1 para igual, 0 para diferente
    accuracy = last_30_days['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo nos últimos 30 dias: {accuracy:.2%}')



Precisão para Alpha: 0.1
Precisão do modelo nos últimos 30 dias: 68.42%

Precisão para Alpha: 0.2
Precisão do modelo nos últimos 30 dias: 89.47%

Precisão para Alpha: 0.3
Precisão do modelo nos últimos 30 dias: 100.00%

Precisão para Alpha: 0.4
Precisão do modelo nos últimos 30 dias: 100.00%

Precisão para Alpha: 0.5
Precisão do modelo nos últimos 30 dias: 100.00%

Precisão para Alpha: 0.6
Precisão do modelo nos últimos 30 dias: 100.00%

Precisão para Alpha: 0.7
Precisão do modelo nos últimos 30 dias: 100.00%

Precisão para Alpha: 0.8
Precisão do modelo nos últimos 30 dias: 100.00%

Precisão para Alpha: 0.9
Precisão do modelo nos últimos 30 dias: 100.00%


#### Time Series com Verify e plotagem

In [7]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score
import winsound


# Definir o número de divisões para a validação cruzada
n_splits = 5

# Criar listas para armazenar os resultados gerais
all_predictions = []
all_verify_values = []
all_actual_values = []
all_accuracy_scores_ytest = []
all_accuracy_scores_verify = []

# Criar uma lista para armazenar as médias das acurácias por alpha
mean_accuracy_by_alpha = []

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X), o target (y) e a verificação (verify) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha:.2f}',
                    f'r_percent_{alpha:.2f}',
                    f'Price_Rate_Of_Change_{alpha:.2f}',
                    f'MACD_{alpha:.2f}',
                    f'MACD_EMA_{alpha:.2f}',
                    f'OBV_{alpha:.2f}']]
    y = price_data[f'Prediction_{alpha:.2f}']
    verify = price_data['Verify']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y, verify], axis=1).dropna()
    X = data.iloc[:, :-2]
    y = data.iloc[:, -2]
    verify = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    # Listas para armazenar as acurácias por fold para esse alpha
    accuracy_scores_ytest_fold = []
    accuracy_scores_verify_fold = []

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        y_verify = verify.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0, oob_score=True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Armazenar as predições e valores reais para análise posterior
        all_predictions.extend(y_pred)
        all_verify_values.extend(y_verify.values)
        all_actual_values.extend(y_test.values)

        # Calcular acurácia para y_test e verify
        accuracy_ytest = accuracy_score(y_test, y_pred) * 100.0
        accuracy_verify = accuracy_score(y_verify, y_pred) * 100.0

        # Armazenar as acurácias por fold
        accuracy_scores_ytest_fold.append(accuracy_ytest)
        accuracy_scores_verify_fold.append(accuracy_verify)

        # Exibir resultados por fold
        print(f'Fold {fold}:')
        print(f' - Correct Prediction (y_test): {accuracy_ytest:.2f}%')
        print(f' - Correct Prediction (Verify): {accuracy_verify:.2f}%')
        fold += 1

    # Calcular a média de acurácia por alpha
    mean_accuracy_ytest = np.mean(accuracy_scores_ytest_fold)
    mean_accuracy_verify = np.mean(accuracy_scores_verify_fold)
    mean_accuracy_by_alpha.append((alpha, mean_accuracy_ytest, mean_accuracy_verify))

    print(f'Média de acurácia (y_test) para alpha {alpha}: {mean_accuracy_ytest:.2f}%')
    print(f'Média de acurácia (Verify) para alpha {alpha}: {mean_accuracy_verify:.2f}%')
    print('---')

# Exibir o resumo geral das médias
print("Resumo geral das médias:")
for alpha, acc_ytest, acc_verify in mean_accuracy_by_alpha:
    print(f'Alpha: {alpha:.2f} - Média Acurácia (y_test): {acc_ytest:.2f}% - Média Acurácia (Verify): {acc_verify:.2f}%')

# Opcional: Plotando as médias por alpha
import matplotlib.pyplot as plt

alphas, acc_ytest_values, acc_verify_values = zip(*mean_accuracy_by_alpha)

plt.plot(alphas, acc_ytest_values, label='Acurácia (y_test)', marker='o')
plt.plot(alphas, acc_verify_values, label='Acurácia (Verify)', marker='x')
plt.title("Média de Acurácia para Diferentes Alphas")
plt.xlabel('Alpha')
plt.ylabel('Acurácia (%)')
plt.legend()
plt.grid(True)
plt.show()
winsound.Beep(440, 300)

Alpha: 0.05
Fold 1:
 - Correct Prediction (y_test): 50.53%
 - Correct Prediction (Verify): 48.73%
Fold 2:
 - Correct Prediction (y_test): 64.48%
 - Correct Prediction (Verify): 70.82%
Fold 3:
 - Correct Prediction (y_test): 56.55%
 - Correct Prediction (Verify): 55.29%
Fold 4:
 - Correct Prediction (y_test): 52.33%
 - Correct Prediction (Verify): 45.88%
Fold 5:
 - Correct Prediction (y_test): 50.85%
 - Correct Prediction (Verify): 47.36%
Média de acurácia (y_test) para alpha 0.05: 54.95%
Média de acurácia (Verify) para alpha 0.05: 53.62%
---
Alpha: 0.08
Fold 1:
 - Correct Prediction (y_test): 40.91%
 - Correct Prediction (Verify): 42.28%
Fold 2:
 - Correct Prediction (y_test): 53.38%
 - Correct Prediction (Verify): 58.46%
Fold 3:
 - Correct Prediction (y_test): 60.15%
 - Correct Prediction (Verify): 56.24%
Fold 4:
 - Correct Prediction (y_test): 55.18%
 - Correct Prediction (Verify): 49.68%
Fold 5:
 - Correct Prediction (y_test): 49.89%
 - Correct Prediction (Verify): 48.73%
Média de a

: 

#### Time Series com verify

In [12]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Criar listas para armazenar os resultados gerais
all_predictions = []
all_verify_values = []
all_actual_values = []
all_accuracy_scores_ytest = []
all_accuracy_scores_verify = []

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X), o target (y) e a verificação (verify) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha:.2f}',
                    f'r_percent_{alpha:.2f}',
                    f'Price_Rate_Of_Change_{alpha:.2f}',
                    f'MACD_{alpha:.2f}',
                    f'MACD_EMA_{alpha:.2f}',
                    f'OBV_{alpha:.2f}']]
    y = price_data[f'Prediction_{alpha:.2f}']
    verify = price_data['Verify']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y, verify], axis=1).dropna()
    X = data.iloc[:, :-2]
    y = data.iloc[:, -2]
    verify = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        y_verify = verify.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0, oob_score=True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Armazenar as predições e valores reais para análise posterior
        all_predictions.extend(y_pred)
        all_verify_values.extend(y_verify.values)
        all_actual_values.extend(y_test.values)

        # Calcular acurácia para y_test e verify
        accuracy_ytest = accuracy_score(y_test, y_pred) * 100.0
        accuracy_verify = accuracy_score(y_verify, y_pred) * 100.0

        # Armazenar os resultados das acurácias
        all_accuracy_scores_ytest.append(accuracy_ytest)
        all_accuracy_scores_verify.append(accuracy_verify)

        # Exibir resultados por fold
        print(f'Fold {fold}:')
        print(f' - Correct Prediction (y_test): {accuracy_ytest:.2f}%')
        print(f' - Correct Prediction (Verify): {accuracy_verify:.2f}%')
        fold += 1

    print('---')

# Resultados gerais
print("Resumo geral:")
print(f'Média de acurácia (y_test): {np.mean(all_accuracy_scores_ytest):.2f}%')
print(f'Média de acurácia (Verify): {np.mean(all_accuracy_scores_verify):.2f}%')


Alpha: 0.1
Fold 1:
 - Correct Prediction (y_test): 60.90%
 - Correct Prediction (Verify): 49.16%
Fold 2:
 - Correct Prediction (y_test): 67.82%
 - Correct Prediction (Verify): 48.74%
Fold 3:
 - Correct Prediction (y_test): 61.64%
 - Correct Prediction (Verify): 40.67%
Fold 4:
 - Correct Prediction (y_test): 64.05%
 - Correct Prediction (Verify): 57.44%
Fold 5:
 - Correct Prediction (y_test): 70.65%
 - Correct Prediction (Verify): 51.57%
---
Alpha: 0.2
Fold 1:
 - Correct Prediction (y_test): 53.25%
 - Correct Prediction (Verify): 50.21%
Fold 2:
 - Correct Prediction (y_test): 59.22%
 - Correct Prediction (Verify): 55.24%
Fold 3:
 - Correct Prediction (y_test): 58.07%
 - Correct Prediction (Verify): 45.70%
Fold 4:
 - Correct Prediction (y_test): 58.60%
 - Correct Prediction (Verify): 55.45%
Fold 5:
 - Correct Prediction (y_test): 55.45%
 - Correct Prediction (Verify): 48.32%
---
Alpha: 0.3
Fold 1:
 - Correct Prediction (y_test): 49.37%
 - Correct Prediction (Verify): 48.22%
Fold 2:
 - Co

#### Time Series sem Verify

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X) e o target (Y) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha:.2f}',
                    f'r_percent_{alpha:.2f}',
                    f'Price_Rate_Of_Change_{alpha:.2f}',
                    f'MACD_{alpha:.2f}',
                    f'MACD_EMA_{alpha:.2f}',
                    f'OBV_{alpha:.2f}']]
    y = price_data[f'Prediction_{alpha:.2f}']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y], axis=1).dropna()
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0,oob_score = True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Calcular e imprimir a acurácia para cada divisão
        accuracy = accuracy_score(y_test, y_pred) * 100.0
        print(f'Fold {fold}: Correct Prediction (%): {accuracy:.2f}')
        fold += 1

    print('---')


#### Salvando

In [None]:
# Salvar o modelo treinado
joblib.dump(rand_frst_clf, 'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl')

In [None]:
price_data.to_excel('C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Datas referências/test_day.xlsx', index=False)