In [2]:
import pandas as pd
import numpy as np

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import accuracy_score, classification_report
pd.set_option('display.max_columns',None)
import joblib

In [113]:
price_data = pd.read_parquet(f'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Dados_bolsa_interpolar.parquet')
price_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.290425,985,ABEV3.SA
1,2000-01-06,0.494478,0.494478,0.494478,0.494478,0.275703,227,ABEV3.SA
2,2000-01-12,0.481293,0.481293,0.481293,0.481293,0.268352,1137,ABEV3.SA
3,2000-01-13,0.484589,0.484589,0.484589,0.484589,0.270189,606,ABEV3.SA
4,2000-01-14,0.494478,0.494478,0.494478,0.494478,0.275703,6445,ABEV3.SA


In [112]:
price_data['Date'] = pd.to_datetime(price_data['Date'])
price_data.sort_values(by = ['Ticker','Date'], inplace = True)

price_data['change_in_price'] = price_data['Close'].diff()

mask = price_data['Ticker'] != price_data['Ticker'].shift(1)

price_data['change_in_price'] = np.where(mask == True, np.nan, price_data['change_in_price'])

price_data[price_data.isna().any(axis = 1)]


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.290425,985,ABEV3.SA,
17850,2000-12-21,2.815505,2.988535,2.815174,2.960552,1.685388,74224,ITUB4.SA,
23692,2011-05-02,4.705845,4.851989,4.705845,4.808146,4.428368,29936513,MGLU3.SA,
11774,2000-01-03,5.875,5.875,5.875,5.875,1.278907,35389440000,PETR4.SA,
5746,2000-01-03,3.5,3.5425,3.5,3.5,1.439834,585600,VALE3.SA,


In [93]:
import numpy as np
import pandas as pd

# Função para suavização exponencial
def exponential_smoothing(data, alpha):
    """
    Aplica a suavização exponencial nos dados.
    :param data: Série de dados (array-like ou pandas Series).
    :param alpha: Fator de suavização (0 < alpha < 1).
    :return: Série suavizada.
    """
    smoothed = np.zeros(len(data))
    smoothed[0] = data[0]  # Inicializa com o primeiro valor
    for t in range(1, len(data)):
        smoothed[t] = alpha * data[t] + (1 - alpha) * smoothed[t-1]
    return smoothed

# Função para calcular o target
def calculate_target(data, d):
    """
    Calcula o target usando a fórmula do sinal da diferença de preços.
    :param data: Série de preços de fechamento (array-like ou pandas Series).
    :param d: Período para calcular a diferença futura.
    :return: Série com os valores do target (-1, 0, 1).
    """
    return np.sign(data.shift(-d) - data)

# Função para calcular o target
def calculate_target_verify(data, d):
    """
    Calcula o target usando a fórmula do sinal da diferença de preços.
    :param data: Série de preços de fechamento (array-like ou pandas Series).
    :param d: Período para calcular a diferença futura.
    :return: Série com os valores do target (-1, 0, 1).
    """
    target = np.sign(data.shift(-d) - data)
    target[target == 0] = -1  # Substitui os 0 por -1
    return target


# Lista de alphas para suavização exponencial
alpha_list = [0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90]
d = 10

# Loop para gerar as colunas de preço suavizado para cada alpha
for alpha in alpha_list:
    smoothed_column = f'Smoothed_Close_{alpha}'  # Nome da coluna para o alpha atual
    price_data[smoothed_column] = price_data.groupby('Ticker')['Close'].transform(
        lambda x: exponential_smoothing(x.values, alpha)
    )
    
    # Calculando o target para d = 10 dias (pode ser ajustado conforme necessário)
    target_column = f'Prediction_{alpha:.2f}'  # Nome da coluna de target
    price_data[target_column] = price_data.groupby('Ticker')[smoothed_column].transform(
        lambda x: calculate_target(x, d)
    )


# Aplicando a suavização exponencial
alpha_verify = 1
price_data['Smoothed_Close_1'] = price_data.groupby('Ticker')['Close'].transform(
    lambda x: exponential_smoothing(x.values, alpha_verify)
)

# Calculando o target para d = 10 dias
d = 10
price_data['Verify'] = price_data.groupby('Ticker')['Smoothed_Close_1'].transform(
    lambda x: calculate_target_verify(x, d)
)

price_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify
0,2000-01-05,0.520882,0.520882,0.520882,0.520882,0.290425,985,ABEV3.SA,,0.520882,-1.0,0.520882,-1.0,0.520882,-1.0,0.520882,-1.0,0.520882,-1.0,0.520882,-1.0,0.520882,-1.0,0.520882,-1.0,0.520882,-1.0,0.520882,-1.0
1,2000-01-06,0.494478,0.494478,0.494478,0.494478,0.275703,227,ABEV3.SA,-0.026404,0.518242,-1.0,0.515601,-1.0,0.512961,-1.0,0.510320,-1.0,0.507680,-1.0,0.505040,-1.0,0.502399,1.0,0.499759,1.0,0.497118,1.0,0.494478,1.0
2,2000-01-12,0.481293,0.481293,0.481293,0.481293,0.268352,1137,ABEV3.SA,-0.013185,0.514547,-1.0,0.508740,-1.0,0.503460,-1.0,0.498709,1.0,0.494486,1.0,0.490792,1.0,0.487625,1.0,0.484986,1.0,0.482876,1.0,0.481293,1.0
3,2000-01-13,0.484589,0.484589,0.484589,0.484589,0.270189,606,ABEV3.SA,0.003296,0.511551,-1.0,0.503909,-1.0,0.497799,1.0,0.493061,1.0,0.489538,1.0,0.487070,1.0,0.485500,1.0,0.484668,1.0,0.484418,1.0,0.484589,1.0
4,2000-01-14,0.494478,0.494478,0.494478,0.494478,0.275703,6445,ABEV3.SA,0.009889,0.509844,-1.0,0.502023,-1.0,0.496803,1.0,0.493628,1.0,0.492008,1.0,0.491515,1.0,0.491785,1.0,0.492516,1.0,0.493472,1.0,0.494478,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11769,2025-01-14,51.950001,52.430000,51.630001,51.849998,51.849998,21379000,VALE3.SA,0.340000,53.613460,,52.300837,,51.876392,,51.731886,,51.699938,,51.714321,,51.745470,,51.780795,,51.815846,,51.849998,
11770,2025-01-15,52.139999,52.680000,51.630001,52.599998,52.599998,35137700,VALE3.SA,0.750000,53.512114,,52.360669,,52.093474,,52.079131,,52.149968,,52.245727,,52.343640,,52.436158,,52.521583,,52.599998,
11771,2025-01-16,52.290001,53.270000,52.000000,52.669998,52.669998,220262800,VALE3.SA,0.070000,53.427902,,52.422535,,52.266431,,52.315478,,52.409983,,52.500290,,52.572091,,52.623230,,52.655157,,52.669998,
11772,2025-01-17,53.150002,54.490002,52.980000,54.490002,54.490002,54658700,VALE3.SA,1.820004,53.534112,,52.836028,,52.933502,,53.185287,,53.449992,,53.694117,,53.914628,,54.116647,,54.306517,,54.490002,


In [94]:
# Aplicando o deslocamento de 10 linhas dentro de cada grupo de 'Ticker'
price_data['Close_10'] = price_data.groupby('Ticker')['Close'].shift(-d)


In [95]:
# Período do indicador.
n = 14

# Dias de alta e dias de baixa
up_df, down_df = price_data[['Ticker','change_in_price']].copy(), price_data[['Ticker','change_in_price']].copy()

# Salvando a diferença para dias de alta.
up_df.loc['change_in_price'] = up_df.loc[(up_df['change_in_price'] < 0), 'change_in_price'] = 0

# Salvando a diferença para dias de baixa
down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price'] > 0), 'change_in_price'] = 0

# Colocando as diferenças do dia de baixa em termos absolutos
down_df['change_in_price'] = down_df['change_in_price'].abs()


ewma_up = up_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
ewma_down = down_df.groupby('Ticker')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())

relative_strength = ewma_up / ewma_down

# Calculando o indicador
relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength))

price_data['down_days'] = down_df['change_in_price']
price_data['up_days'] = up_df['change_in_price']
price_data['RSI'] = relative_strength_index

price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify,Close_10,down_days,up_days,RSI
11769,2025-01-14,51.950001,52.43,51.630001,51.849998,51.849998,21379000,VALE3.SA,0.34,53.61346,,52.300837,,51.876392,,51.731886,,51.699938,,51.714321,,51.74547,,51.780795,,51.815846,,51.849998,,,0.0,0.34,30.128698
11770,2025-01-15,52.139999,52.68,51.630001,52.599998,52.599998,35137700,VALE3.SA,0.75,53.512114,,52.360669,,52.093474,,52.079131,,52.149968,,52.245727,,52.34364,,52.436158,,52.521583,,52.599998,,,0.0,0.75,44.66501
11771,2025-01-16,52.290001,53.27,52.0,52.669998,52.669998,220262800,VALE3.SA,0.07,53.427902,,52.422535,,52.266431,,52.315478,,52.409983,,52.50029,,52.572091,,52.62323,,52.655157,,52.669998,,,0.0,0.07,45.877603
11772,2025-01-17,53.150002,54.490002,52.98,54.490002,54.490002,54658700,VALE3.SA,1.820004,53.534112,,52.836028,,52.933502,,53.185287,,53.449992,,53.694117,,53.914628,,54.116647,,54.306517,,54.490002,,,0.0,1.820004,67.345288
11773,2025-01-20,54.220001,54.490002,53.799999,54.009998,54.009998,3561500,VALE3.SA,-0.480003,53.581701,,53.070822,,53.256451,,53.515172,,53.729995,,53.883646,,53.981387,,54.031328,,54.03965,,54.009998,,,0.480003,0.0,60.091853


In [96]:


# Iterando sobre cada alpha
for alpha in alpha_list:
    # Nome da coluna Smoothed_Close correspondente ao alpha
    smoothed_col = f'Smoothed_Close_{alpha}'
    
    # Calculando low_14 e high_14 para o alpha atual
    low_14 = price_data[['Ticker', 'Low']].copy()
    high_14 = price_data[['Ticker', 'High']].copy()
    low_14 = low_14.groupby('Ticker')['Low'].transform(lambda x: x.rolling(window=n).min())
    high_14 = high_14.groupby('Ticker')['High'].transform(lambda x: x.rolling(window=n).max())
    
    # Calculando k_percent para o alpha atual
    k_percent = 100 * ((price_data[smoothed_col] - low_14) / (high_14 - low_14))
    
    # Adicionando as colunas ao DataFrame para o alpha atual
    price_data[f'low_14_{alpha:.2f}'] = low_14
    price_data[f'high_14_{alpha:.2f}'] = high_14
    price_data[f'k_percent_{alpha:.2f}'] = k_percent

# Visualizando o DataFrame resultante
price_data.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify,Close_10,down_days,up_days,RSI,low_14_0.10,high_14_0.10,k_percent_0.10,low_14_0.20,high_14_0.20,k_percent_0.20,low_14_0.30,high_14_0.30,k_percent_0.30,low_14_0.40,high_14_0.40,k_percent_0.40,low_14_0.50,high_14_0.50,k_percent_0.50,low_14_0.60,high_14_0.60,k_percent_0.60,low_14_0.70,high_14_0.70,k_percent_0.70,low_14_0.80,high_14_0.80,k_percent_0.80,low_14_0.90,high_14_0.90,k_percent_0.90
11769,2025-01-14,51.950001,52.43,51.630001,51.849998,51.849998,21379000,VALE3.SA,0.34,53.61346,,52.300837,,51.876392,,51.731886,,51.699938,,51.714321,,51.74547,,51.780795,,51.815846,,51.849998,,,0.0,0.34,30.128698,50.869999,55.400002,60.562017,50.869999,55.400002,31.585807,50.869999,55.400002,22.216176,50.869999,55.400002,19.026193,50.869999,55.400002,18.320942,50.869999,55.400002,18.63844,50.869999,55.400002,19.326055,50.869999,55.400002,20.105854,50.869999,55.400002,20.879616
11770,2025-01-15,52.139999,52.68,51.630001,52.599998,52.599998,35137700,VALE3.SA,0.75,53.512114,,52.360669,,52.093474,,52.079131,,52.149968,,52.245727,,52.34364,,52.436158,,52.521583,,52.599998,,,0.0,0.75,44.66501,50.869999,55.400002,58.324797,50.869999,55.400002,32.906608,50.869999,55.400002,27.008267,50.869999,55.400002,26.691641,50.869999,55.400002,28.255378,50.869999,55.400002,30.369264,50.869999,55.400002,32.530686,50.869999,55.400002,34.573022,50.869999,55.400002,36.458794
11771,2025-01-16,52.290001,53.27,52.0,52.669998,52.669998,220262800,VALE3.SA,0.07,53.427902,,52.422535,,52.266431,,52.315478,,52.409983,,52.50029,,52.572091,,52.62323,,52.655157,,52.669998,,,0.0,0.07,45.877603,50.869999,55.400002,56.465823,50.869999,55.400002,34.272299,50.869999,55.400002,30.826305,50.869999,55.400002,31.909009,50.869999,55.400002,33.995219,50.869999,55.400002,35.988741,50.869999,55.400002,37.573748,50.869999,55.400002,38.702652,50.869999,55.400002,39.407433
11772,2025-01-17,53.150002,54.490002,52.98,54.490002,54.490002,54658700,VALE3.SA,1.820004,53.534112,,52.836028,,52.933502,,53.185287,,53.449992,,53.694117,,53.914628,,54.116647,,54.306517,,54.490002,,,0.0,1.820004,67.345288,50.869999,55.189999,61.669291,50.869999,55.189999,45.509942,50.869999,55.189999,47.766288,50.869999,55.189999,53.594644,50.869999,55.189999,59.722076,50.869999,55.189999,65.373107,50.869999,55.189999,70.477538,50.869999,55.189999,75.153904,50.869999,55.189999,79.549039
11773,2025-01-20,54.220001,54.490002,53.799999,54.009998,54.009998,3561500,VALE3.SA,-0.480003,53.581701,,53.070822,,53.256451,,53.515172,,53.729995,,53.883646,,53.981387,,54.031328,,54.03965,,54.009998,,,0.480003,0.0,60.091853,50.869999,55.189999,62.77088,50.869999,55.189999,50.944989,50.869999,55.189999,55.241955,50.869999,55.189999,61.230857,50.869999,55.189999,66.203626,50.869999,55.189999,69.760348,50.869999,55.189999,72.022885,50.869999,55.189999,73.178922,50.869999,55.189999,73.371562


In [97]:
# Iterando pelos alphas
for alpha in alpha_list:
    # Nome da coluna Smoothed_Close correspondente ao alpha
    smoothed_col = f'Smoothed_Close_{alpha}'
    
    # Calculando low_14 e high_14 para o grupo
    low_14 = price_data[['Ticker', 'Low']].copy()
    high_14 = price_data[['Ticker', 'High']].copy()
    low_14 = low_14.groupby('Ticker')['Low'].transform(lambda x: x.rolling(window=n).min())
    high_14 = high_14.groupby('Ticker')['High'].transform(lambda x: x.rolling(window=n).max())
    
    # Calculando o indicador %R para o alpha atual
    r_percent = ((high_14 - price_data[smoothed_col]) / (high_14 - low_14)) * (-100)
    
    # Adicionando a nova coluna ao DataFrame
    price_data[f'r_percent_{alpha:.2f}'] = r_percent

# Visualizando as últimas linhas do DataFrame com as novas colunas
price_data.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify,Close_10,down_days,up_days,RSI,low_14_0.10,high_14_0.10,k_percent_0.10,low_14_0.20,high_14_0.20,k_percent_0.20,low_14_0.30,high_14_0.30,k_percent_0.30,low_14_0.40,high_14_0.40,k_percent_0.40,low_14_0.50,high_14_0.50,k_percent_0.50,low_14_0.60,high_14_0.60,k_percent_0.60,low_14_0.70,high_14_0.70,k_percent_0.70,low_14_0.80,high_14_0.80,k_percent_0.80,low_14_0.90,high_14_0.90,k_percent_0.90,r_percent_0.10,r_percent_0.20,r_percent_0.30,r_percent_0.40,r_percent_0.50,r_percent_0.60,r_percent_0.70,r_percent_0.80,r_percent_0.90
11769,2025-01-14,51.950001,52.43,51.630001,51.849998,51.849998,21379000,VALE3.SA,0.34,53.61346,,52.300837,,51.876392,,51.731886,,51.699938,,51.714321,,51.74547,,51.780795,,51.815846,,51.849998,,,0.0,0.34,30.128698,50.869999,55.400002,60.562017,50.869999,55.400002,31.585807,50.869999,55.400002,22.216176,50.869999,55.400002,19.026193,50.869999,55.400002,18.320942,50.869999,55.400002,18.63844,50.869999,55.400002,19.326055,50.869999,55.400002,20.105854,50.869999,55.400002,20.879616,-39.437983,-68.414193,-77.783824,-80.973807,-81.679058,-81.36156,-80.673945,-79.894146,-79.120384
11770,2025-01-15,52.139999,52.68,51.630001,52.599998,52.599998,35137700,VALE3.SA,0.75,53.512114,,52.360669,,52.093474,,52.079131,,52.149968,,52.245727,,52.34364,,52.436158,,52.521583,,52.599998,,,0.0,0.75,44.66501,50.869999,55.400002,58.324797,50.869999,55.400002,32.906608,50.869999,55.400002,27.008267,50.869999,55.400002,26.691641,50.869999,55.400002,28.255378,50.869999,55.400002,30.369264,50.869999,55.400002,32.530686,50.869999,55.400002,34.573022,50.869999,55.400002,36.458794,-41.675203,-67.093392,-72.991733,-73.308359,-71.744622,-69.630736,-67.469314,-65.426978,-63.541206
11771,2025-01-16,52.290001,53.27,52.0,52.669998,52.669998,220262800,VALE3.SA,0.07,53.427902,,52.422535,,52.266431,,52.315478,,52.409983,,52.50029,,52.572091,,52.62323,,52.655157,,52.669998,,,0.0,0.07,45.877603,50.869999,55.400002,56.465823,50.869999,55.400002,34.272299,50.869999,55.400002,30.826305,50.869999,55.400002,31.909009,50.869999,55.400002,33.995219,50.869999,55.400002,35.988741,50.869999,55.400002,37.573748,50.869999,55.400002,38.702652,50.869999,55.400002,39.407433,-43.534177,-65.727701,-69.173695,-68.090991,-66.004781,-64.011259,-62.426252,-61.297348,-60.592567
11772,2025-01-17,53.150002,54.490002,52.98,54.490002,54.490002,54658700,VALE3.SA,1.820004,53.534112,,52.836028,,52.933502,,53.185287,,53.449992,,53.694117,,53.914628,,54.116647,,54.306517,,54.490002,,,0.0,1.820004,67.345288,50.869999,55.189999,61.669291,50.869999,55.189999,45.509942,50.869999,55.189999,47.766288,50.869999,55.189999,53.594644,50.869999,55.189999,59.722076,50.869999,55.189999,65.373107,50.869999,55.189999,70.477538,50.869999,55.189999,75.153904,50.869999,55.189999,79.549039,-38.330709,-54.490058,-52.233712,-46.405356,-40.277924,-34.626893,-29.522462,-24.846096,-20.450961
11773,2025-01-20,54.220001,54.490002,53.799999,54.009998,54.009998,3561500,VALE3.SA,-0.480003,53.581701,,53.070822,,53.256451,,53.515172,,53.729995,,53.883646,,53.981387,,54.031328,,54.03965,,54.009998,,,0.480003,0.0,60.091853,50.869999,55.189999,62.77088,50.869999,55.189999,50.944989,50.869999,55.189999,55.241955,50.869999,55.189999,61.230857,50.869999,55.189999,66.203626,50.869999,55.189999,69.760348,50.869999,55.189999,72.022885,50.869999,55.189999,73.178922,50.869999,55.189999,73.371562,-37.22912,-49.055011,-44.758045,-38.769143,-33.796374,-30.239652,-27.977115,-26.821078,-26.628438


In [98]:
# Iterando pelos alphas
for alpha in alpha_list:
    # Nome da coluna Smoothed_Close correspondente ao alpha
    smoothed_col = f'Smoothed_Close_{alpha}'
    
    # Calculando o MACD usando a coluna suavizada correspondente
    ema_26 = price_data.groupby('Ticker')[smoothed_col].transform(lambda x: x.ewm(span=26).mean())
    ema_12 = price_data.groupby('Ticker')[smoothed_col].transform(lambda x: x.ewm(span=12).mean())
    macd = ema_12 - ema_26
    
    # Calculando a EMA do MACD
    ema_9_macd = macd.ewm(span=9).mean()
    
    # Adicionando as novas colunas ao DataFrame
    price_data[f'MACD_{alpha:.2f}'] = macd
    price_data[f'MACD_EMA_{alpha:.2f}'] = ema_9_macd

# Visualizando as últimas linhas do DataFrame com as novas colunas
price_data.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify,Close_10,down_days,up_days,RSI,low_14_0.10,high_14_0.10,k_percent_0.10,low_14_0.20,high_14_0.20,k_percent_0.20,low_14_0.30,high_14_0.30,k_percent_0.30,low_14_0.40,high_14_0.40,k_percent_0.40,low_14_0.50,high_14_0.50,k_percent_0.50,low_14_0.60,high_14_0.60,k_percent_0.60,low_14_0.70,high_14_0.70,k_percent_0.70,low_14_0.80,high_14_0.80,k_percent_0.80,low_14_0.90,high_14_0.90,k_percent_0.90,r_percent_0.10,r_percent_0.20,r_percent_0.30,r_percent_0.40,r_percent_0.50,r_percent_0.60,r_percent_0.70,r_percent_0.80,r_percent_0.90,MACD_0.10,MACD_EMA_0.10,MACD_0.20,MACD_EMA_0.20,MACD_0.30,MACD_EMA_0.30,MACD_0.40,MACD_EMA_0.40,MACD_0.50,MACD_EMA_0.50,MACD_0.60,MACD_EMA_0.60,MACD_0.70,MACD_EMA_0.70,MACD_0.80,MACD_EMA_0.80,MACD_0.90,MACD_EMA_0.90
11769,2025-01-14,51.950001,52.43,51.630001,51.849998,51.849998,21379000,VALE3.SA,0.34,53.61346,,52.300837,,51.876392,,51.731886,,51.699938,,51.714321,,51.74547,,51.780795,,51.815846,,51.849998,,,0.0,0.34,30.128698,50.869999,55.400002,60.562017,50.869999,55.400002,31.585807,50.869999,55.400002,22.216176,50.869999,55.400002,19.026193,50.869999,55.400002,18.320942,50.869999,55.400002,18.63844,50.869999,55.400002,19.326055,50.869999,55.400002,20.105854,50.869999,55.400002,20.879616,-39.437983,-68.414193,-77.783824,-80.973807,-81.679058,-81.36156,-80.673945,-79.894146,-79.120384,-1.302429,-1.120255,-1.530146,-1.327018,-1.614043,-1.41269,-1.650342,-1.458028,-1.665675,-1.484969,-1.670533,-1.502068,-1.670105,-1.513351,-1.667153,-1.521012,-1.663121,-1.526346
11770,2025-01-15,52.139999,52.68,51.630001,52.599998,52.599998,35137700,VALE3.SA,0.75,53.512114,,52.360669,,52.093474,,52.079131,,52.149968,,52.245727,,52.34364,,52.436158,,52.521583,,52.599998,,,0.0,0.75,44.66501,50.869999,55.400002,58.324797,50.869999,55.400002,32.906608,50.869999,55.400002,27.008267,50.869999,55.400002,26.691641,50.869999,55.400002,28.255378,50.869999,55.400002,30.369264,50.869999,55.400002,32.530686,50.869999,55.400002,34.573022,50.869999,55.400002,36.458794,-41.675203,-67.093392,-72.991733,-73.308359,-71.744622,-69.630736,-67.469314,-65.426978,-63.541206,-1.326687,-1.161541,-1.533119,-1.368238,-1.593333,-1.448818,-1.608209,-1.488064,-1.605342,-1.509044,-1.595219,-1.520698,-1.582538,-1.527188,-1.569438,-1.530697,-1.556821,-1.532441
11771,2025-01-16,52.290001,53.27,52.0,52.669998,52.669998,220262800,VALE3.SA,0.07,53.427902,,52.422535,,52.266431,,52.315478,,52.409983,,52.50029,,52.572091,,52.62323,,52.655157,,52.669998,,,0.0,0.07,45.877603,50.869999,55.400002,56.465823,50.869999,55.400002,34.272299,50.869999,55.400002,30.826305,50.869999,55.400002,31.909009,50.869999,55.400002,33.995219,50.869999,55.400002,35.988741,50.869999,55.400002,37.573748,50.869999,55.400002,38.702652,50.869999,55.400002,39.407433,-43.534177,-65.727701,-69.173695,-68.090991,-66.004781,-64.011259,-62.426252,-61.297348,-60.592567,-1.337291,-1.196691,-1.513041,-1.397199,-1.545152,-1.468085,-1.538018,-1.498055,-1.519037,-1.511042,-1.497726,-1.516104,-1.477673,-1.517285,-1.460072,-1.516572,-1.44514,-1.514981
11772,2025-01-17,53.150002,54.490002,52.98,54.490002,54.490002,54658700,VALE3.SA,1.820004,53.534112,,52.836028,,52.933502,,53.185287,,53.449992,,53.694117,,53.914628,,54.116647,,54.306517,,54.490002,,,0.0,1.820004,67.345288,50.869999,55.189999,61.669291,50.869999,55.189999,45.509942,50.869999,55.189999,47.766288,50.869999,55.189999,53.594644,50.869999,55.189999,59.722076,50.869999,55.189999,65.373107,50.869999,55.189999,70.477538,50.869999,55.189999,75.153904,50.869999,55.189999,79.549039,-38.330709,-54.490058,-52.233712,-46.405356,-40.277924,-34.626893,-29.522462,-24.846096,-20.450961,-1.321887,-1.22173,-1.447083,-1.407175,-1.436582,-1.461784,-1.396111,-1.477666,-1.351144,-1.479063,-1.309041,-1.474691,-1.271577,-1.468144,-1.238615,-1.460981,-1.209439,-1.453873
11773,2025-01-20,54.220001,54.490002,53.799999,54.009998,54.009998,3561500,VALE3.SA,-0.480003,53.581701,,53.070822,,53.256451,,53.515172,,53.729995,,53.883646,,53.981387,,54.031328,,54.03965,,54.009998,,,0.480003,0.0,60.091853,50.869999,55.189999,62.77088,50.869999,55.189999,50.944989,50.869999,55.189999,55.241955,50.869999,55.189999,61.230857,50.869999,55.189999,66.203626,50.869999,55.189999,69.760348,50.869999,55.189999,72.022885,50.869999,55.189999,73.178922,50.869999,55.189999,73.371562,-37.22912,-49.055011,-44.758045,-38.769143,-33.796374,-30.239652,-27.977115,-26.821078,-26.628438,-1.290958,-1.235576,-1.360185,-1.397777,-1.309386,-1.431305,-1.242705,-1.430674,-1.181869,-1.419624,-1.131173,-1.405988,-1.09029,-1.392573,-1.057799,-1.380344,-1.03228,-1.369554


In [99]:
# Período do indicador
e = 9

# Iterando pelos alphas
for alpha in alpha_list:
    # Nome da coluna Smoothed_Close correspondente ao alpha
    smoothed_col = f'Smoothed_Close_{alpha}'
    
    # Calculando o Price Rate of Change (ROC) usando a coluna suavizada correspondente
    price_rate_col = f'Price_Rate_Of_Change_{alpha:.2f}'
    price_data[price_rate_col] = price_data.groupby('Ticker')[smoothed_col].transform(lambda x: x.pct_change(periods=e))

# Visualizando as últimas linhas do DataFrame com as novas colunas
price_data.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify,Close_10,down_days,up_days,RSI,low_14_0.10,high_14_0.10,k_percent_0.10,low_14_0.20,high_14_0.20,k_percent_0.20,low_14_0.30,high_14_0.30,k_percent_0.30,low_14_0.40,high_14_0.40,k_percent_0.40,low_14_0.50,high_14_0.50,k_percent_0.50,low_14_0.60,high_14_0.60,k_percent_0.60,low_14_0.70,high_14_0.70,k_percent_0.70,low_14_0.80,high_14_0.80,k_percent_0.80,low_14_0.90,high_14_0.90,k_percent_0.90,r_percent_0.10,r_percent_0.20,r_percent_0.30,r_percent_0.40,r_percent_0.50,r_percent_0.60,r_percent_0.70,r_percent_0.80,r_percent_0.90,MACD_0.10,MACD_EMA_0.10,MACD_0.20,MACD_EMA_0.20,MACD_0.30,MACD_EMA_0.30,MACD_0.40,MACD_EMA_0.40,MACD_0.50,MACD_EMA_0.50,MACD_0.60,MACD_EMA_0.60,MACD_0.70,MACD_EMA_0.70,MACD_0.80,MACD_EMA_0.80,MACD_0.90,MACD_EMA_0.90,Price_Rate_Of_Change_0.10,Price_Rate_Of_Change_0.20,Price_Rate_Of_Change_0.30,Price_Rate_Of_Change_0.40,Price_Rate_Of_Change_0.50,Price_Rate_Of_Change_0.60,Price_Rate_Of_Change_0.70,Price_Rate_Of_Change_0.80,Price_Rate_Of_Change_0.90
11769,2025-01-14,51.950001,52.43,51.630001,51.849998,51.849998,21379000,VALE3.SA,0.34,53.61346,,52.300837,,51.876392,,51.731886,,51.699938,,51.714321,,51.74547,,51.780795,,51.815846,,51.849998,,,0.0,0.34,30.128698,50.869999,55.400002,60.562017,50.869999,55.400002,31.585807,50.869999,55.400002,22.216176,50.869999,55.400002,19.026193,50.869999,55.400002,18.320942,50.869999,55.400002,18.63844,50.869999,55.400002,19.326055,50.869999,55.400002,20.105854,50.869999,55.400002,20.879616,-39.437983,-68.414193,-77.783824,-80.973807,-81.679058,-81.36156,-80.673945,-79.894146,-79.120384,-1.302429,-1.120255,-1.530146,-1.327018,-1.614043,-1.41269,-1.650342,-1.458028,-1.665675,-1.484969,-1.670533,-1.502068,-1.670105,-1.513351,-1.667153,-1.521012,-1.663121,-1.526346,-0.045193,-0.052105,-0.054067,-0.054683,-0.054481,-0.053731,-0.052699,-0.051581,-0.050496
11770,2025-01-15,52.139999,52.68,51.630001,52.599998,52.599998,35137700,VALE3.SA,0.75,53.512114,,52.360669,,52.093474,,52.079131,,52.149968,,52.245727,,52.34364,,52.436158,,52.521583,,52.599998,,,0.0,0.75,44.66501,50.869999,55.400002,58.324797,50.869999,55.400002,32.906608,50.869999,55.400002,27.008267,50.869999,55.400002,26.691641,50.869999,55.400002,28.255378,50.869999,55.400002,30.369264,50.869999,55.400002,32.530686,50.869999,55.400002,34.573022,50.869999,55.400002,36.458794,-41.675203,-67.093392,-72.991733,-73.308359,-71.744622,-69.630736,-67.469314,-65.426978,-63.541206,-1.326687,-1.161541,-1.533119,-1.368238,-1.593333,-1.448818,-1.608209,-1.488064,-1.605342,-1.509044,-1.595219,-1.520698,-1.582538,-1.527188,-1.569438,-1.530697,-1.556821,-1.532441,-0.04376,-0.047825,-0.047025,-0.045026,-0.042495,-0.039783,-0.037132,-0.03467,-0.032434
11771,2025-01-16,52.290001,53.27,52.0,52.669998,52.669998,220262800,VALE3.SA,0.07,53.427902,,52.422535,,52.266431,,52.315478,,52.409983,,52.50029,,52.572091,,52.62323,,52.655157,,52.669998,,,0.0,0.07,45.877603,50.869999,55.400002,56.465823,50.869999,55.400002,34.272299,50.869999,55.400002,30.826305,50.869999,55.400002,31.909009,50.869999,55.400002,33.995219,50.869999,55.400002,35.988741,50.869999,55.400002,37.573748,50.869999,55.400002,38.702652,50.869999,55.400002,39.407433,-43.534177,-65.727701,-69.173695,-68.090991,-66.004781,-64.011259,-62.426252,-61.297348,-60.592567,-1.337291,-1.196691,-1.513041,-1.397199,-1.545152,-1.468085,-1.538018,-1.498055,-1.519037,-1.511042,-1.497726,-1.516104,-1.477673,-1.517285,-1.460072,-1.516572,-1.44514,-1.514981,-0.0406,-0.040592,-0.03633,-0.031496,-0.026781,-0.022489,-0.01875,-0.015576,-0.012917
11772,2025-01-17,53.150002,54.490002,52.98,54.490002,54.490002,54658700,VALE3.SA,1.820004,53.534112,,52.836028,,52.933502,,53.185287,,53.449992,,53.694117,,53.914628,,54.116647,,54.306517,,54.490002,,,0.0,1.820004,67.345288,50.869999,55.189999,61.669291,50.869999,55.189999,45.509942,50.869999,55.189999,47.766288,50.869999,55.189999,53.594644,50.869999,55.189999,59.722076,50.869999,55.189999,65.373107,50.869999,55.189999,70.477538,50.869999,55.189999,75.153904,50.869999,55.189999,79.549039,-38.330709,-54.490058,-52.233712,-46.405356,-40.277924,-34.626893,-29.522462,-24.846096,-20.450961,-1.321887,-1.22173,-1.447083,-1.407175,-1.436582,-1.461784,-1.396111,-1.477666,-1.351144,-1.479063,-1.309041,-1.474691,-1.271577,-1.468144,-1.238615,-1.460981,-1.209439,-1.453873,-0.033261,-0.025604,-0.014894,-0.004656,0.004584,0.012729,0.019855,0.026119,0.03169
11773,2025-01-20,54.220001,54.490002,53.799999,54.009998,54.009998,3561500,VALE3.SA,-0.480003,53.581701,,53.070822,,53.256451,,53.515172,,53.729995,,53.883646,,53.981387,,54.031328,,54.03965,,54.009998,,,0.480003,0.0,60.091853,50.869999,55.189999,62.77088,50.869999,55.189999,50.944989,50.869999,55.189999,55.241955,50.869999,55.189999,61.230857,50.869999,55.189999,66.203626,50.869999,55.189999,69.760348,50.869999,55.189999,72.022885,50.869999,55.189999,73.178922,50.869999,55.189999,73.371562,-37.22912,-49.055011,-44.758045,-38.769143,-33.796374,-30.239652,-27.977115,-26.821078,-26.628438,-1.290958,-1.235576,-1.360185,-1.397777,-1.309386,-1.431305,-1.242705,-1.430674,-1.181869,-1.419624,-1.131173,-1.405988,-1.09029,-1.392573,-1.057799,-1.380344,-1.03228,-1.369554,-0.026555,-0.013361,0.000522,0.012003,0.020938,0.027575,0.032257,0.035324,0.037053


In [100]:

# Função OBV ajustada para receber uma coluna de preço suavizado específica
def obv(group, smoothed_col):
    Volume = group['Volume']
    change = group[smoothed_col].diff()

    prev_obv = 0
    obv_values = []

    # Calculando o indicador
    for i, j in zip(change, Volume):
        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv

        prev_obv = current_obv
        obv_values.append(current_obv)
    
    # Retornando em pandas series
    return pd.Series(obv_values, index=group.index)

# Iterando sobre os alphas para calcular o OBV para cada preço suavizado
for alpha in alpha_list:
    smoothed_col = f'Smoothed_Close_{alpha}'  # Nome da coluna de preço suavizado
    obv_col = f'OBV_{alpha:.2f}'  # Nome da coluna OBV correspondente

    # Aplicando a função OBV por grupo (Ticker) para o preço suavizado específico
    obv_groups = price_data.groupby('Ticker').apply(lambda group: obv(group, smoothed_col))

    # Adicionando a coluna OBV ao DataFrame
    price_data[obv_col] = obv_groups.reset_index(level=0, drop=True)

# Exibindo as últimas linhas do DataFrame para verificar as colunas OBV criadas
price_data.tail()


  price_data[obv_col] = obv_groups.reset_index(level=0, drop=True)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify,Close_10,down_days,up_days,RSI,low_14_0.10,high_14_0.10,k_percent_0.10,low_14_0.20,high_14_0.20,k_percent_0.20,low_14_0.30,high_14_0.30,k_percent_0.30,low_14_0.40,high_14_0.40,k_percent_0.40,low_14_0.50,high_14_0.50,k_percent_0.50,low_14_0.60,high_14_0.60,k_percent_0.60,low_14_0.70,high_14_0.70,k_percent_0.70,low_14_0.80,high_14_0.80,k_percent_0.80,low_14_0.90,high_14_0.90,k_percent_0.90,r_percent_0.10,r_percent_0.20,r_percent_0.30,r_percent_0.40,r_percent_0.50,r_percent_0.60,r_percent_0.70,r_percent_0.80,r_percent_0.90,MACD_0.10,MACD_EMA_0.10,MACD_0.20,MACD_EMA_0.20,MACD_0.30,MACD_EMA_0.30,MACD_0.40,MACD_EMA_0.40,MACD_0.50,MACD_EMA_0.50,MACD_0.60,MACD_EMA_0.60,MACD_0.70,MACD_EMA_0.70,MACD_0.80,MACD_EMA_0.80,MACD_0.90,MACD_EMA_0.90,Price_Rate_Of_Change_0.10,Price_Rate_Of_Change_0.20,Price_Rate_Of_Change_0.30,Price_Rate_Of_Change_0.40,Price_Rate_Of_Change_0.50,Price_Rate_Of_Change_0.60,Price_Rate_Of_Change_0.70,Price_Rate_Of_Change_0.80,Price_Rate_Of_Change_0.90,OBV_0.10,OBV_0.20,OBV_0.30,OBV_0.40,OBV_0.50,OBV_0.60,OBV_0.70,OBV_0.80,OBV_0.90
11769,2025-01-14,51.950001,52.43,51.630001,51.849998,51.849998,21379000,VALE3.SA,0.34,53.61346,,52.300837,,51.876392,,51.731886,,51.699938,,51.714321,,51.74547,,51.780795,,51.815846,,51.849998,,,0.0,0.34,30.128698,50.869999,55.400002,60.562017,50.869999,55.400002,31.585807,50.869999,55.400002,22.216176,50.869999,55.400002,19.026193,50.869999,55.400002,18.320942,50.869999,55.400002,18.63844,50.869999,55.400002,19.326055,50.869999,55.400002,20.105854,50.869999,55.400002,20.879616,-39.437983,-68.414193,-77.783824,-80.973807,-81.679058,-81.36156,-80.673945,-79.894146,-79.120384,-1.302429,-1.120255,-1.530146,-1.327018,-1.614043,-1.41269,-1.650342,-1.458028,-1.665675,-1.484969,-1.670533,-1.502068,-1.670105,-1.513351,-1.667153,-1.521012,-1.663121,-1.526346,-0.045193,-0.052105,-0.054067,-0.054683,-0.054481,-0.053731,-0.052699,-0.051581,-0.050496,2383887448,1978122170,103058362,554669166,482565926,274635038,528256342,-1073784042,-908072738
11770,2025-01-15,52.139999,52.68,51.630001,52.599998,52.599998,35137700,VALE3.SA,0.75,53.512114,,52.360669,,52.093474,,52.079131,,52.149968,,52.245727,,52.34364,,52.436158,,52.521583,,52.599998,,,0.0,0.75,44.66501,50.869999,55.400002,58.324797,50.869999,55.400002,32.906608,50.869999,55.400002,27.008267,50.869999,55.400002,26.691641,50.869999,55.400002,28.255378,50.869999,55.400002,30.369264,50.869999,55.400002,32.530686,50.869999,55.400002,34.573022,50.869999,55.400002,36.458794,-41.675203,-67.093392,-72.991733,-73.308359,-71.744622,-69.630736,-67.469314,-65.426978,-63.541206,-1.326687,-1.161541,-1.533119,-1.368238,-1.593333,-1.448818,-1.608209,-1.488064,-1.605342,-1.509044,-1.595219,-1.520698,-1.582538,-1.527188,-1.569438,-1.530697,-1.556821,-1.532441,-0.04376,-0.047825,-0.047025,-0.045026,-0.042495,-0.039783,-0.037132,-0.03467,-0.032434,2348749748,2013259870,138196062,589806866,517703626,309772738,563394042,-1038646342,-872935038
11771,2025-01-16,52.290001,53.27,52.0,52.669998,52.669998,220262800,VALE3.SA,0.07,53.427902,,52.422535,,52.266431,,52.315478,,52.409983,,52.50029,,52.572091,,52.62323,,52.655157,,52.669998,,,0.0,0.07,45.877603,50.869999,55.400002,56.465823,50.869999,55.400002,34.272299,50.869999,55.400002,30.826305,50.869999,55.400002,31.909009,50.869999,55.400002,33.995219,50.869999,55.400002,35.988741,50.869999,55.400002,37.573748,50.869999,55.400002,38.702652,50.869999,55.400002,39.407433,-43.534177,-65.727701,-69.173695,-68.090991,-66.004781,-64.011259,-62.426252,-61.297348,-60.592567,-1.337291,-1.196691,-1.513041,-1.397199,-1.545152,-1.468085,-1.538018,-1.498055,-1.519037,-1.511042,-1.497726,-1.516104,-1.477673,-1.517285,-1.460072,-1.516572,-1.44514,-1.514981,-0.0406,-0.040592,-0.03633,-0.031496,-0.026781,-0.022489,-0.01875,-0.015576,-0.012917,2128486948,2233522670,358458862,810069666,737966426,530035538,783656842,-818383542,-652672238
11772,2025-01-17,53.150002,54.490002,52.98,54.490002,54.490002,54658700,VALE3.SA,1.820004,53.534112,,52.836028,,52.933502,,53.185287,,53.449992,,53.694117,,53.914628,,54.116647,,54.306517,,54.490002,,,0.0,1.820004,67.345288,50.869999,55.189999,61.669291,50.869999,55.189999,45.509942,50.869999,55.189999,47.766288,50.869999,55.189999,53.594644,50.869999,55.189999,59.722076,50.869999,55.189999,65.373107,50.869999,55.189999,70.477538,50.869999,55.189999,75.153904,50.869999,55.189999,79.549039,-38.330709,-54.490058,-52.233712,-46.405356,-40.277924,-34.626893,-29.522462,-24.846096,-20.450961,-1.321887,-1.22173,-1.447083,-1.407175,-1.436582,-1.461784,-1.396111,-1.477666,-1.351144,-1.479063,-1.309041,-1.474691,-1.271577,-1.468144,-1.238615,-1.460981,-1.209439,-1.453873,-0.033261,-0.025604,-0.014894,-0.004656,0.004584,0.012729,0.019855,0.026119,0.03169,2183145648,2288181370,413117562,864728366,792625126,584694238,838315542,-763724842,-598013538
11773,2025-01-20,54.220001,54.490002,53.799999,54.009998,54.009998,3561500,VALE3.SA,-0.480003,53.581701,,53.070822,,53.256451,,53.515172,,53.729995,,53.883646,,53.981387,,54.031328,,54.03965,,54.009998,,,0.480003,0.0,60.091853,50.869999,55.189999,62.77088,50.869999,55.189999,50.944989,50.869999,55.189999,55.241955,50.869999,55.189999,61.230857,50.869999,55.189999,66.203626,50.869999,55.189999,69.760348,50.869999,55.189999,72.022885,50.869999,55.189999,73.178922,50.869999,55.189999,73.371562,-37.22912,-49.055011,-44.758045,-38.769143,-33.796374,-30.239652,-27.977115,-26.821078,-26.628438,-1.290958,-1.235576,-1.360185,-1.397777,-1.309386,-1.431305,-1.242705,-1.430674,-1.181869,-1.419624,-1.131173,-1.405988,-1.09029,-1.392573,-1.057799,-1.380344,-1.03228,-1.369554,-0.026555,-0.013361,0.000522,0.012003,0.020938,0.027575,0.032257,0.035324,0.037053,2186707148,2291742870,416679062,868289866,796186626,588255738,841877042,-767286342,-601575038


In [101]:
# Any row that has a `NaN` value will be dropped.
price_data = price_data.dropna()

#### Se quiser rodar para um Ticker.

In [None]:
price_data = price_data[price_data['Ticker'].isin(['ABEV3.SA'])]
price_data.tail()

In [None]:
price_data = price_data[price_data['Ticker'].isin(['ITUB4.SA'])]
price_data.tail()

In [None]:
price_data = price_data[price_data['Ticker'].isin(['MGLU3.SA'])]
price_data.tail()

In [102]:
price_data = price_data[price_data['Ticker'].isin(['PETR4.SA'])]
price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify,Close_10,down_days,up_days,RSI,low_14_0.10,high_14_0.10,k_percent_0.10,low_14_0.20,high_14_0.20,k_percent_0.20,low_14_0.30,high_14_0.30,k_percent_0.30,low_14_0.40,high_14_0.40,k_percent_0.40,low_14_0.50,high_14_0.50,k_percent_0.50,low_14_0.60,high_14_0.60,k_percent_0.60,low_14_0.70,high_14_0.70,k_percent_0.70,low_14_0.80,high_14_0.80,k_percent_0.80,low_14_0.90,high_14_0.90,k_percent_0.90,r_percent_0.10,r_percent_0.20,r_percent_0.30,r_percent_0.40,r_percent_0.50,r_percent_0.60,r_percent_0.70,r_percent_0.80,r_percent_0.90,MACD_0.10,MACD_EMA_0.10,MACD_0.20,MACD_EMA_0.20,MACD_0.30,MACD_EMA_0.30,MACD_0.40,MACD_EMA_0.40,MACD_0.50,MACD_EMA_0.50,MACD_0.60,MACD_EMA_0.60,MACD_0.70,MACD_EMA_0.70,MACD_0.80,MACD_EMA_0.80,MACD_0.90,MACD_EMA_0.90,Price_Rate_Of_Change_0.10,Price_Rate_Of_Change_0.20,Price_Rate_Of_Change_0.30,Price_Rate_Of_Change_0.40,Price_Rate_Of_Change_0.50,Price_Rate_Of_Change_0.60,Price_Rate_Of_Change_0.70,Price_Rate_Of_Change_0.80,Price_Rate_Of_Change_0.90,OBV_0.10,OBV_0.20,OBV_0.30,OBV_0.40,OBV_0.50,OBV_0.60,OBV_0.70,OBV_0.80,OBV_0.90
17835,2024-12-27,36.0,36.0,35.610001,35.66,35.66,24167200,PETR4.SA,-0.110001,37.649875,-1.0,37.001492,-1.0,36.518488,1.0,36.22008,1.0,36.02593,1.0,35.891076,1.0,35.794376,1.0,35.726123,1.0,35.681923,1.0,35.66,1.0,36.82,0.110001,0.0,20.526365,35.599998,40.759998,39.726295,35.599998,40.759998,27.16072,35.599998,40.759998,17.800177,35.599998,40.759998,12.017076,35.599998,40.759998,8.254485,35.599998,40.759998,5.64104,35.599998,40.759998,3.767008,35.599998,40.759998,2.444283,35.599998,40.759998,1.587692,-60.273705,-72.83928,-82.199823,-87.982924,-91.745515,-94.35896,-96.232992,-97.555717,-98.412308,0.199699,0.355567,0.004863,0.308245,-0.149991,0.221659,-0.247798,0.156459,-0.311362,0.110271,-0.355209,0.076877,-0.387169,0.051907,-0.411444,0.032617,-0.430356,0.017298,-0.03153,-0.060793,-0.074687,-0.080939,-0.083218,-0.083132,-0.081442,-0.078543,-0.074657,-279512648955,-150909929051,3100544117,-157200231747,52727347493,160447705765,161501311149,265285062981,183943810781
17836,2024-12-30,35.779999,36.369999,35.77,36.189999,36.189999,22355600,PETR4.SA,0.529999,37.503888,-1.0,36.839193,1.0,36.419941,1.0,36.208047,1.0,36.107964,1.0,36.07043,1.0,36.071312,1.0,36.097224,1.0,36.139191,1.0,36.189999,1.0,37.290001,0.0,0.529999,32.377384,35.599998,40.759998,36.897077,35.599998,40.759998,24.015398,35.599998,40.759998,15.890357,35.599998,40.759998,11.78389,35.599998,40.759998,9.844298,35.599998,40.759998,9.116883,35.599998,40.759998,9.133981,35.599998,40.759998,9.636146,35.599998,40.759998,10.44947,-63.102923,-75.984602,-84.109643,-88.21611,-90.155702,-90.883117,-90.866019,-90.363854,-89.55053,0.129634,0.310381,-0.0963,0.227336,-0.255279,0.126271,-0.349058,0.055356,-0.406156,0.006986,-0.442653,-0.027029,-0.466815,-0.051838,-0.483048,-0.070516,-0.49389,-0.08494,-0.033356,-0.058717,-0.067501,-0.068865,-0.066931,-0.063546,-0.059673,-0.055893,-0.05261,-279535004555,-150932284651,3078188517,-157222587347,52749703093,160470061365,161523666749,265307418581,183966166381
17837,2025-01-02,36.419998,37.09,36.189999,36.77,36.77,30046800,PETR4.SA,0.580002,37.430499,-1.0,36.825354,1.0,36.524959,1.0,36.432828,1.0,36.438982,1.0,36.490172,1.0,36.560394,1.0,36.635445,1.0,36.70692,1.0,36.77,1.0,37.049999,0.0,0.580002,43.092658,35.599998,40.759998,35.474815,35.599998,40.759998,23.74721,35.599998,40.759998,17.925587,35.599998,40.759998,16.140117,35.599998,40.759998,16.259378,35.599998,40.759998,17.251428,35.599998,40.759998,18.612315,35.599998,40.759998,20.066795,35.599998,40.759998,21.451959,-64.525185,-76.25279,-82.074413,-83.859883,-83.740622,-82.748572,-81.387685,-79.933205,-78.548041,0.067408,0.261786,-0.175564,0.146756,-0.326482,0.035721,-0.406485,-0.037012,-0.44939,-0.084289,-0.472635,-0.11615,-0.484881,-0.138446,-0.490709,-0.154555,-0.49275,-0.166502,-0.03311,-0.053278,-0.056724,-0.053878,-0.049063,-0.04404,-0.039555,-0.03588,-0.033026,-279565051355,-150962331451,3108235317,-157192540547,52779749893,160500108165,161553713549,265337465381,183996213181
17838,2025-01-03,36.880001,37.040001,36.32,36.380001,36.380001,23314200,PETR4.SA,-0.389999,37.325449,-1.0,36.736284,1.0,36.481471,1.0,36.411698,1.0,36.409492,1.0,36.424069,1.0,36.434119,1.0,36.43109,1.0,36.412693,1.0,36.380001,1.0,37.200001,0.389999,0.0,38.374848,35.599998,40.759998,33.438966,35.599998,40.759998,22.021034,35.599998,40.759998,17.08281,35.599998,40.759998,15.730602,35.599998,40.759998,15.687854,35.599998,40.759998,15.970369,35.599998,40.759998,16.165125,35.599998,40.759998,16.106423,35.599998,40.759998,15.749893,-66.561034,-77.978966,-82.91719,-84.269398,-84.312146,-84.029631,-83.834875,-83.893577,-84.250107,0.009508,0.21133,-0.242771,0.06885,-0.382017,-0.047827,-0.44853,-0.119316,-0.480494,-0.16353,-0.496013,-0.192123,-0.503583,-0.211474,-0.50742,-0.225128,-0.509713,-0.235144,-0.034795,-0.052657,-0.054761,-0.052384,-0.049597,-0.047699,-0.046972,-0.047298,-0.048434,-279588365555,-150985645651,3084921117,-157215854747,52756435693,160476793965,161530399349,265314151181,183972898981
17839,2025-01-06,36.599998,36.689999,36.060001,36.209999,36.209999,23760200,PETR4.SA,-0.170002,37.213904,-1.0,36.631027,1.0,36.40003,1.0,36.331018,1.0,36.309745,1.0,36.295627,1.0,36.277235,1.0,36.254217,1.0,36.230268,1.0,36.209999,1.0,37.259998,0.170002,0.0,36.372025,35.599998,38.889999,49.054868,35.599998,38.889999,31.33824,35.599998,38.889999,24.317053,35.599998,38.889999,22.219437,35.599998,38.889999,21.572849,35.599998,38.889999,21.143726,35.599998,38.889999,20.584692,35.599998,38.889999,19.885063,35.599998,38.889999,19.157137,-50.945132,-68.66176,-75.682947,-77.780563,-78.427151,-78.856274,-79.415308,-80.114937,-80.842863,-0.044863,0.160092,-0.301056,-0.005131,-0.42767,-0.123796,-0.482796,-0.192012,-0.507344,-0.232293,-0.518922,-0.257483,-0.525011,-0.274181,-0.52884,-0.28587,-0.531747,-0.294465,-0.034281,-0.048163,-0.047356,-0.043385,-0.039673,-0.036878,-0.034819,-0.033117,-0.031421,-279612125755,-151009405851,3061160917,-157239614947,52732675493,160453033765,161506639149,265290390981,183949138781


In [110]:
price_data = price_data[price_data['Ticker'].isin(['VALE3.SA'])]
price_data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,change_in_price,Smoothed_Close_0.1,Prediction_0.10,Smoothed_Close_0.2,Prediction_0.20,Smoothed_Close_0.3,Prediction_0.30,Smoothed_Close_0.4,Prediction_0.40,Smoothed_Close_0.5,Prediction_0.50,Smoothed_Close_0.6,Prediction_0.60,Smoothed_Close_0.7,Prediction_0.70,Smoothed_Close_0.8,Prediction_0.80,Smoothed_Close_0.9,Prediction_0.90,Smoothed_Close_1,Verify,Close_10,down_days,up_days,RSI,low_14_0.10,high_14_0.10,k_percent_0.10,low_14_0.20,high_14_0.20,k_percent_0.20,low_14_0.30,high_14_0.30,k_percent_0.30,low_14_0.40,high_14_0.40,k_percent_0.40,low_14_0.50,high_14_0.50,k_percent_0.50,low_14_0.60,high_14_0.60,k_percent_0.60,low_14_0.70,high_14_0.70,k_percent_0.70,low_14_0.80,high_14_0.80,k_percent_0.80,low_14_0.90,high_14_0.90,k_percent_0.90,r_percent_0.10,r_percent_0.20,r_percent_0.30,r_percent_0.40,r_percent_0.50,r_percent_0.60,r_percent_0.70,r_percent_0.80,r_percent_0.90,MACD_0.10,MACD_EMA_0.10,MACD_0.20,MACD_EMA_0.20,MACD_0.30,MACD_EMA_0.30,MACD_0.40,MACD_EMA_0.40,MACD_0.50,MACD_EMA_0.50,MACD_0.60,MACD_EMA_0.60,MACD_0.70,MACD_EMA_0.70,MACD_0.80,MACD_EMA_0.80,MACD_0.90,MACD_EMA_0.90,Price_Rate_Of_Change_0.10,Price_Rate_Of_Change_0.20,Price_Rate_Of_Change_0.30,Price_Rate_Of_Change_0.40,Price_Rate_Of_Change_0.50,Price_Rate_Of_Change_0.60,Price_Rate_Of_Change_0.70,Price_Rate_Of_Change_0.80,Price_Rate_Of_Change_0.90,OBV_0.10,OBV_0.20,OBV_0.30,OBV_0.40,OBV_0.50,OBV_0.60,OBV_0.70,OBV_0.80,OBV_0.90


In [None]:
price_data.to_excel('C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Datas referências/test_day.xlsx', index=False)

#### Se quiser normal só pular etapa anterior.

In [109]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Para cada alpha, criar as variáveis X_Cols e Y_Cols e treinar o modelo
for alpha in alpha_list:
    # Definindo as colunas de X e Y dinamicamente
    X_Cols = price_data[['RSI',
                         f'k_percent_{alpha:.2f}',
                         f'r_percent_{alpha:.2f}',
                         f'Price_Rate_Of_Change_{alpha:.2f}',
                         f'MACD_{alpha:.2f}',
                         f'MACD_EMA_{alpha:.2f}',
                         f'OBV_{alpha:.2f}']]
    
    Y_Cols = price_data[f'Prediction_{alpha:.2f}']

    # Dividir os dados em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state=0)

    # Criar o classificador Random Forest
    rand_frst_clf = RandomForestClassifier(n_estimators=100, oob_score=True, criterion="gini", random_state=0)

    # Ajustar o modelo
    rand_frst_clf.fit(X_train, y_train)

    # Fazer previsões
    y_pred = rand_frst_clf.predict(X_test)

    # Imprimir a acurácia
    accuracy = accuracy_score(y_test, y_pred, normalize=True) * 100.0
    print(f'Alpha: {alpha}')
    print(f'Correct Prediction (%): {accuracy:.5f}')
    print('---')


Alpha: 0.1
Correct Prediction (%): 84.61030
---
Alpha: 0.2
Correct Prediction (%): 77.01453
---
Alpha: 0.3
Correct Prediction (%): 74.10832
---
Alpha: 0.4
Correct Prediction (%): 71.40026
---
Alpha: 0.5
Correct Prediction (%): 70.60766
---
Alpha: 0.6
Correct Prediction (%): 69.55086
---
Alpha: 0.7
Correct Prediction (%): 69.15456
---
Alpha: 0.8
Correct Prediction (%): 68.56011
---
Alpha: 0.9
Correct Prediction (%): 68.75826
---


In [107]:
import warnings
from datetime import timedelta

# Desativa todos os warnings
warnings.filterwarnings("ignore")

# Filtrar últimos 30 dias
last_30_days = price_data[price_data['Date'] >= price_data['Date'].max() - timedelta(days=30)]

# Para cada alpha, selecionar as colunas de X, gerar previsões e comparar com a coluna 'Verify'
for alpha in alpha_list:
    # Selecionar as colunas de X como um array NumPy
    X_Cols = last_30_days[['RSI',
                           f'k_percent_{alpha:.2f}',
                           f'r_percent_{alpha:.2f}',
                           f'Price_Rate_Of_Change_{alpha:.2f}',
                           f'MACD_{alpha:.2f}',
                           f'MACD_EMA_{alpha:.2f}',
                           f'OBV_{alpha:.2f}']].values

    # Gerar previsões e atualizar a coluna 'Prediction_{alpha}' para os últimos 30 dias
    last_30_days[f'Prediction_{alpha:.2f}'] = rand_frst_clf.predict(X_Cols)

    # Comparar as previsões de 'Prediction_{alpha}' com 'Verify' e calcular a precisão
    last_30_days['Match'] = (last_30_days[f'Prediction_{alpha:.2f}'] == last_30_days['Verify']).astype(int)  # 1 para igual, 0 para diferente
    accuracy = last_30_days['Match'].mean()  # Proporção de acertos (iguais / total)

    # Imprimir a precisão para cada alpha
    print(f'\nPrecisão para Alpha: {alpha}')
    print(f'Precisão do modelo nos últimos 30 dias: {accuracy:.2%}')



Precisão para Alpha: 0.1
Precisão do modelo nos últimos 30 dias: 41.18%

Precisão para Alpha: 0.2
Precisão do modelo nos últimos 30 dias: 70.59%

Precisão para Alpha: 0.3
Precisão do modelo nos últimos 30 dias: 76.47%

Precisão para Alpha: 0.4
Precisão do modelo nos últimos 30 dias: 58.82%

Precisão para Alpha: 0.5
Precisão do modelo nos últimos 30 dias: 58.82%

Precisão para Alpha: 0.6
Precisão do modelo nos últimos 30 dias: 52.94%

Precisão para Alpha: 0.7
Precisão do modelo nos últimos 30 dias: 58.82%

Precisão para Alpha: 0.8
Precisão do modelo nos últimos 30 dias: 58.82%

Precisão para Alpha: 0.9
Precisão do modelo nos últimos 30 dias: 88.24%


In [108]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Criar listas para armazenar os resultados gerais
all_predictions = []
all_verify_values = []
all_actual_values = []
all_accuracy_scores_ytest = []
all_accuracy_scores_verify = []

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X), o target (y) e a verificação (verify) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha:.2f}',
                    f'r_percent_{alpha:.2f}',
                    f'Price_Rate_Of_Change_{alpha:.2f}',
                    f'MACD_{alpha:.2f}',
                    f'MACD_EMA_{alpha:.2f}',
                    f'OBV_{alpha:.2f}']]
    y = price_data[f'Prediction_{alpha:.2f}']
    verify = price_data['Verify']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y, verify], axis=1).dropna()
    X = data.iloc[:, :-2]
    y = data.iloc[:, -2]
    verify = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        y_verify = verify.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0, oob_score=True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Armazenar as predições e valores reais para análise posterior
        all_predictions.extend(y_pred)
        all_verify_values.extend(y_verify.values)
        all_actual_values.extend(y_test.values)

        # Calcular acurácia para y_test e verify
        accuracy_ytest = accuracy_score(y_test, y_pred) * 100.0
        accuracy_verify = accuracy_score(y_verify, y_pred) * 100.0

        # Armazenar os resultados das acurácias
        all_accuracy_scores_ytest.append(accuracy_ytest)
        all_accuracy_scores_verify.append(accuracy_verify)

        # Exibir resultados por fold
        print(f'Fold {fold}:')
        print(f' - Correct Prediction (y_test): {accuracy_ytest:.2f}%')
        print(f' - Correct Prediction (Verify): {accuracy_verify:.2f}%')
        fold += 1

    print('---')

# Resultados gerais
print("Resumo geral:")
print(f'Média de acurácia (y_test): {np.mean(all_accuracy_scores_ytest):.2f}%')
print(f'Média de acurácia (Verify): {np.mean(all_accuracy_scores_verify):.2f}%')


Alpha: 0.1
Fold 1:
 - Correct Prediction (y_test): 67.66%
 - Correct Prediction (Verify): 53.97%
Fold 2:
 - Correct Prediction (y_test): 51.49%
 - Correct Prediction (Verify): 49.31%
Fold 3:
 - Correct Prediction (y_test): 69.35%
 - Correct Prediction (Verify): 50.10%
Fold 4:
 - Correct Prediction (y_test): 68.35%
 - Correct Prediction (Verify): 53.57%
Fold 5:
 - Correct Prediction (y_test): 66.67%
 - Correct Prediction (Verify): 41.37%
---
Alpha: 0.2
Fold 1:
 - Correct Prediction (y_test): 51.69%
 - Correct Prediction (Verify): 51.59%
Fold 2:
 - Correct Prediction (y_test): 49.01%
 - Correct Prediction (Verify): 45.73%
Fold 3:
 - Correct Prediction (y_test): 57.84%
 - Correct Prediction (Verify): 48.12%
Fold 4:
 - Correct Prediction (y_test): 56.45%
 - Correct Prediction (Verify): 49.40%
Fold 5:
 - Correct Prediction (y_test): 59.03%
 - Correct Prediction (Verify): 44.84%
---
Alpha: 0.3
Fold 1:
 - Correct Prediction (y_test): 48.31%
 - Correct Prediction (Verify): 49.01%
Fold 2:
 - Co

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# Definir o número de divisões para a validação cruzada
n_splits = 5

# Loop para cada alpha e realização de validação cruzada em série temporal
for alpha in alpha_list:
    # Definir as features (X) e o target (Y) dinamicamente
    X = price_data[['RSI',
                    f'k_percent_{alpha}',
                    f'r_percent_{alpha}',
                    f'Price_Rate_Of_Change_{alpha}',
                    f'MACD_{alpha}',
                    f'MACD_EMA_{alpha}',
                    f'OBV_{alpha}']]
    y = price_data[f'Prediction_{alpha}']

    # Remover possíveis valores NaN nas colunas selecionadas
    data = pd.concat([X, y], axis=1).dropna()
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]

    # Configurar o TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    print(f'Alpha: {alpha}')
    fold = 1
    for train_index, test_index in tscv.split(X):
        # Dividir os dados em treino e teste com base nos índices
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Criar o modelo de Random Forest
        rand_frst_clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=0,oob_score = True)

        # Ajustar o modelo aos dados de treino
        rand_frst_clf.fit(X_train, y_train)

        # Fazer previsões nos dados de teste
        y_pred = rand_frst_clf.predict(X_test)

        # Calcular e imprimir a acurácia para cada divisão
        accuracy = accuracy_score(y_test, y_pred) * 100.0
        print(f'Fold {fold}: Correct Prediction (%): {accuracy:.2f}')
        fold += 1

    print('---')


In [None]:
# Salvar o modelo treinado
joblib.dump(rand_frst_clf, 'C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/random_forest_model.pkl')

In [None]:
price_data.to_excel('C:/Users/Samsung/Documents/GitHub-Datas/TCC_Machine_Learning/Datas referências/test_day.xlsx', index=False)