In [1]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import OneHotEncoder

from src.data_ingestion import DataIngestion
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv

from src.featureenginner import ClassFeatureEngineering
from src.modelagem import Modeling
from src.preprocessing import ClassPreprocessing
from src.utils.save_mlflow import save_mlflow



In [2]:
load_dotenv()

ingestion_data = DataIngestion(db_params={
    "database": os.getenv("DB_NAME"),
    "user": os.getenv("DB_USER"),
    "password": os.getenv("DB_PASSWORD"),
    "host": os.getenv("DB_HOST"),
    "port": os.getenv("DB_PORT"),
})

df = ingestion_data.read_db()

  df = pd.read_sql('SELECT * FROM zara', conn)


In [3]:
preprocessor = ClassPreprocessing(df=df)
col_drop = ['product_id', 'url', 'terms', 'description']
df_preprocessing = preprocessor(drop_columns=col_drop)
ffe = ClassFeatureEngineering(df_preprocessing)
df_final = ffe(date_cols=['scraped_at'], onehot_cols=['product_position', 'section'])


In [4]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 19 columns):
 #   Column                           Non-Null Count  Dtype         
---  ------                           --------------  -----         
 0   promotion                        252 non-null    bool          
 1   seasonal                         252 non-null    bool          
 2   sales_volume                     252 non-null    float64       
 3   name                             252 non-null    object        
 4   price                            252 non-null    float64       
 5   scraped_at                       252 non-null    datetime64[ns]
 6   scraped_at_year                  252 non-null    float64       
 7   scraped_at_month                 252 non-null    float64       
 8   scraped_at_day                   252 non-null    float64       
 9   scraped_at_weekday               252 non-null    float64       
 10  scraped_at_is_weekend            252 non-null    bool         

In [5]:
df_final = df_final.drop(columns=['scraped_at'], axis=1)

In [6]:
df_final.head()

Unnamed: 0,promotion,seasonal,sales_volume,name,price,scraped_at_year,scraped_at_month,scraped_at_day,scraped_at_weekday,scraped_at_is_weekend,scraped_at_hour,product_position_Aisle,product_position_End-cap,product_position_Front of Store,section_MAN,section_WOMAN,receita,price_diff
0,False,False,1.435117,20,-1.274776,0.0,0.0,0.0,0.0,False,-0.73252,True,False,False,True,False,-0.920157,-1.274776
1,False,False,-1.679839,177,1.591917,0.0,0.0,0.0,0.0,False,-0.73252,True,False,False,True,False,-0.413717,1.591917
2,True,True,0.569133,142,0.822387,0.0,0.0,0.0,0.0,False,-0.73252,False,True,False,True,False,1.232661,0.822387
3,True,True,-0.367221,147,0.822387,0.0,0.0,0.0,0.0,False,-0.73252,True,False,False,True,False,0.445226,0.822387
4,False,True,1.606016,52,1.014769,0.0,0.0,0.0,0.0,False,-0.73252,False,True,False,True,False,2.380072,1.014769


In [7]:
modeling = Modeling(df_final)

In [8]:
selected_features = modeling.feature_selection_rfe(RandomForestRegressor())
print("Features selecionadas:", selected_features)

Features selecionadas: Index(['promotion', 'name', 'price', 'receita', 'price_diff'], dtype='object')


In [9]:
params = {
    'n_estimators': [50, 100, 200,250],
    'max_depth': [3, 5, 10,15],
    'min_samples_split': [2, 5, 10,15]
}

# modeling.tunning_hiper(GradientBoostingRegressor(), params)
best_model, best_params, best_score = modeling.tunning_hiper(GradientBoostingRegressor(), params)
print("Melhores parâmetros:", best_params)
print("Melhor score CV (MSE negativo):", best_score)


Melhores parâmetros: {'n_estimators': 100, 'min_samples_split': 15, 'max_depth': 15}
Melhor score CV (MSE negativo): 0.17640582398231042


In [10]:
results = modeling.evaluate_model(modeling.best_model)
print(results)


{'RMSE': 0.008437292485967758, 'MAE': 0.006631096363432791, 'R2': 0.9999209020412533}


In [11]:
save_mlflow(best_model,best_params,results)

Successfully registered model 'elasticidade_model'.
2025/09/08 15:50:05 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: elasticidade_model, version 1
Created version '1' of model 'elasticidade_model'.


Modelo, params e results, registrados com sucesso!
