In [1]:
import yfinance as yf
from datetime import datetime, timedelta
import pandas as pd


# Definir o ticker da ação
ticker_symbol = 'PETR4.SA'

# Calcular as datas para o último ano até hoje
end_date = datetime.now()
start_date = end_date - timedelta(days=365)

# Carregar os dados da ação para o último ano
stock_data = yf.download(ticker_symbol, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))

# Verificar os dados carregados
print(stock_data.tail())  # Exibe as últimas linhas para conferência


# Assumindo que 'stock_data' contém os dados históricos da ação
stock_data['Target'] = (stock_data['Close'].diff() > 0).astype(int)
stock_data.dropna(inplace=True)  # Remove linhas com NaN resultantes do cálculo da diferença

# Agora 'stock_data' tem uma coluna 'Target' para classificação


[*********************100%%**********************]  1 of 1 completed

                 Open       High        Low      Close  Adj Close     Volume
Date                                                                        
2024-03-05  40.099998  40.369999  39.610001  39.959999  39.959999   30293300
2024-03-06  40.200001  40.840000  40.110001  40.840000  40.840000   45481000
2024-03-07  40.639999  40.939999  40.279999  40.389999  40.389999   50199800
2024-03-08  35.900002  37.060001  35.099998  36.700001  36.700001  230369800
2024-03-11  35.669998  37.740002  35.299999  35.650002  35.650002  127528700





In [2]:
from pycaret.classification import *

# Configurando o ambiente do PyCaret para classificação sem o argumento 'silent'
clf1 = setup(data=stock_data, target='Target', session_id=123, verbose=False)


In [3]:
best_model = compare_models(sort='Accuracy')  # Você pode ajustar o 'sort' para qualquer métrica de interesse


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ridge,Ridge Classifier,0.8435,0.0,0.9122,0.8216,0.8587,0.6815,0.6987,0.007
lda,Linear Discriminant Analysis,0.8376,0.9349,0.9233,0.8075,0.8587,0.6683,0.6858,0.006
qda,Quadratic Discriminant Analysis,0.7797,0.8804,0.8378,0.7773,0.8015,0.551,0.5609,0.007
gbc,Gradient Boosting Classifier,0.6062,0.596,0.64,0.6664,0.6393,0.1941,0.2058,0.021
et,Extra Trees Classifier,0.5837,0.6411,0.64,0.632,0.6258,0.1509,0.158,0.026
lightgbm,Light Gradient Boosting Machine,0.5833,0.6032,0.6456,0.6402,0.6194,0.157,0.1828,0.029
ada,Ada Boost Classifier,0.5549,0.5956,0.5989,0.624,0.5938,0.0938,0.1091,0.017
lr,Logistic Regression,0.5497,0.4864,0.9689,0.5521,0.7022,0.0055,-0.0006,1.195
dt,Decision Tree Classifier,0.5497,0.5499,0.5356,0.651,0.558,0.0939,0.1122,0.009
dummy,Dummy Classifier,0.549,0.5,1.0,0.549,0.7086,0.0,0.0,0.006


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

In [4]:
from pycaret.regression import *
import pandas as pd

# Supondo que 'stock_data' é o seu DataFrame e que você deseja prever o preço de fechamento ('Close')
# Selecionando features para o modelo
features = ['Open', 'High', 'Low', 'Volume']  # Exemplo de features selecionadas
data = stock_data[features + ['Close']]  # Incluindo a variável-alvo

# Dividindo os dados (uma abordagem simples, pode-se usar métodos mais sofisticados para séries temporais)
train_data = data.iloc[:-60]  # Todos os dados exceto os últimos 60 dias para treinamento
test_data = data.iloc[-60:]  # Os últimos 60 dias para teste

# Configuração do PyCaret
exp_name = setup(data=train_data, target='Close', session_id=123)


Unnamed: 0,Description,Value
0,Session id,123
1,Target,Close
2,Target type,Regression
3,Original data shape,"(188, 5)"
4,Transformed data shape,"(188, 5)"
5,Transformed train set shape,"(131, 5)"
6,Transformed test set shape,"(57, 5)"
7,Numeric features,4
8,Preprocess,True
9,Imputation type,simple


In [5]:
# Compara modelos para encontrar o melhor com base numa métrica específica, como RMSE
best_model = compare_models()


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lr,Linear Regression,0.1557,0.0433,0.2018,0.9965,0.0067,0.0053,0.006
ridge,Ridge Regression,0.1648,0.0459,0.2093,0.9965,0.0069,0.0056,0.006
br,Bayesian Ridge,0.1563,0.0434,0.2024,0.9965,0.0067,0.0053,0.005
et,Extra Trees Regressor,0.2336,0.0862,0.2887,0.9942,0.0095,0.0079,0.024
lar,Least Angle Regression,0.2143,0.0799,0.2726,0.9935,0.0091,0.0073,0.006
gbr,Gradient Boosting Regressor,0.2398,0.0989,0.3049,0.9934,0.01,0.0081,0.012
catboost,CatBoost Regressor,0.2528,0.1134,0.3285,0.9927,0.0107,0.0085,0.259
rf,Random Forest Regressor,0.2655,0.1143,0.3298,0.9924,0.0107,0.0089,0.028
ada,AdaBoost Regressor,0.282,0.1326,0.3584,0.991,0.0118,0.0096,0.013
en,Elastic Net,0.2978,0.1435,0.3714,0.9901,0.0124,0.0103,0.006


Processing:   0%|          | 0/81 [00:00<?, ?it/s]

In [6]:
# Ajuste fino do modelo
tuned_model = tune_model(best_model, optimize='RMSE')

# Avaliação do modelo
evaluate_model(tuned_model)

# Previsões no conjunto de teste
predictions = predict_model(tuned_model, data=test_data)


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.1146,0.0184,0.1358,0.9992,0.0044,0.0038
1,0.1297,0.0264,0.1623,0.9985,0.0059,0.0048
2,0.1353,0.0329,0.1813,0.9981,0.0051,0.0041
3,0.1758,0.0447,0.2115,0.9976,0.0068,0.0058
4,0.1668,0.043,0.2072,0.9976,0.007,0.0058
5,0.1347,0.0496,0.2227,0.9967,0.0079,0.0049
6,0.1622,0.0513,0.2265,0.9966,0.0075,0.0055
7,0.1539,0.0423,0.2056,0.9946,0.0063,0.0048
8,0.1045,0.0189,0.1373,0.999,0.005,0.0036
9,0.2609,0.1064,0.3263,0.9875,0.0113,0.0089


Fitting 10 folds for each of 2 candidates, totalling 20 fits


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Linear Regression,0.2094,0.0868,0.2946,0.984,0.0075,0.0054


In [7]:
# Finaliza o modelo
final_model = finalize_model(tuned_model)

# Salva o modelo
save_model(final_model, 'final_model_petr4')

# Para carregar o modelo
loaded_model = load_model('final_model_petr4')


Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


In [9]:
from ydata_profiling import ProfileReport
# ProfileReport(data)