# Pipeline de Regressão (m³/h) – FLEXO

Notebook automatizado para treinar e persistir o modelo de throughput (m³/h) usando o `pipelines.run_pipeline`.

In [1]:
# === CONFIGURAÇÃO DO AMBIENTE ===
import sys
from pathlib import Path
import pandas as pd
import numpy as np
from pprint import pprint

# Encontrar raiz do projeto
def find_project_root(marker="pyproject.toml"):
    """Retorna o diretório do projeto procurando pelo arquivo marcador."""
    current = Path.cwd().resolve()
    for path in (current,) + tuple(current.parents):
        if (path / marker).exists():
            return path
    return current

PROJECT_ROOT = find_project_root()
SRC_DIR = PROJECT_ROOT / "src"
DATA_DIR = PROJECT_ROOT / "data"

# Adicionar src ao path se necessário
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

print(f"✓ Projeto: {PROJECT_ROOT}")
print(f"✓ Source: {SRC_DIR}")
print(f"✓ Data: {DATA_DIR}")

✓ Projeto: /home/adami/Documentos/Projeto_IA_AMCOM/project_data_science
✓ Source: /home/adami/Documentos/Projeto_IA_AMCOM/project_data_science/src
✓ Data: /home/adami/Documentos/Projeto_IA_AMCOM/project_data_science/data


In [2]:
# Importar módulos do pipeline
try:
    # Importar diretamente do módulo pipelines.py
    from pipelines.DS.pipelines import run_pipeline
    from model import save_model_artifacts, load_model_artifacts
    print("✓ Imports realizados com sucesso")
except ImportError as e:
    print(f"❌ Erro no import: {e}")
    # Fallback: importar usando caminho absoluto
    import importlib.util
    
    # Carregar pipelines.py diretamente
    pipelines_path = SRC_DIR / "pipelines" / "DS" / "pipelines.py"
    spec = importlib.util.spec_from_file_location("pipelines_module", pipelines_path)
    pipelines_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(pipelines_module)
    run_pipeline = pipelines_module.run_pipeline
    
    # Carregar model persistence
    model_path = SRC_DIR / "model" / "model_persistence.py"
    spec = importlib.util.spec_from_file_location("model_module", model_path)
    model_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(model_module)
    save_model_artifacts = model_module.save_model_artifacts
    load_model_artifacts = model_module.load_model_artifacts
    
    print("✓ Imports realizados via fallback")

❌ Erro no import: No module named 'pipelines.DS.feature_pipeline'
✓ Imports realizados via fallback


In [5]:
from pathlib import Path
import sys

SRC_DIR = Path.cwd().parents[2]
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

from pipelines.DS import pipelines


## Parâmetros principais

In [7]:
MACHINE_TYPE = 'flexo'
TASK_TYPE = 'regression'  # mantém compatibilidade com pipeline
MODEL_TYPE = 'catboost'
MODEL_NAME = f'regressor_m3h_{MODEL_TYPE}'
SAVE_MODEL = True
RANDOM_STATE = 42
SHAP_SAMPLE_SIZE = 0
CLASSIFICATION_THRESHOLD = 0.7  # ignorado em regressão mas mantido no dicionário


## Execução do pipeline

In [8]:
results = pipelines.run_pipeline(
    machine_type=MACHINE_TYPE,
    task_type=TASK_TYPE,
    model_type=MODEL_TYPE,
    shap_sample_size=SHAP_SAMPLE_SIZE,
    random_state=RANDOM_STATE,
    save_model=SAVE_MODEL,
    model_name=MODEL_NAME,
    classification_threshold=CLASSIFICATION_THRESHOLD,
)
results_keys = list(results.keys())
results_keys


Model artifacts saved to: /home/adami/Documentos/Projeto_IA_AMCOM/project_data_science/src/model/flexo_regressor_m3h_catboost_20251118_191409
Streamlit-compatible model saved to: /home/adami/Documentos/Projeto_IA_AMCOM/project_data_science/src/model/flexo_model_artifacts.pkl


['df',
 'clustering_artifacts',
 'gmm',
 'metrics',
 'feature_importance',
 'shap_values',
 'top_features',
 'cluster_k',
 'selected_features',
 'exclude_features',
 'model_type',
 'task_type',
 'target_column',
 'estimator',
 'regressor',
 'model_save_path']

## Métricas e artefatos

In [9]:
metrics = results['metrics']
metrics


{'mae': 39.0181076053311, 'rmse': 62.551847932207295, 'r2': 0.5925219529274022}

In [10]:
model_path = results.get('model_save_path')
model_path


PosixPath('/home/adami/Documentos/Projeto_IA_AMCOM/project_data_science/src/model/flexo_regressor_m3h_catboost_20251118_191409')

## Pré-visualização dos dados enriquecidos

In [11]:
df_preview = results['df'].copy()
df_preview[['CD_OP', 'QT_PRODUZIDA', 'VL_DURACAO_PRODUCAO', 'y_volume_por_hora']].head()


Unnamed: 0,CD_OP,QT_PRODUZIDA,VL_DURACAO_PRODUCAO,y_volume_por_hora
0,639837-3/795630,3360.0,63.0,65.24352
1,640041-1/779500,7200.0,63.0,121.882011
2,640477-2/841380,3640.0,43.0,93.805624
3,640579-2/792980,4800.0,38.0,182.064899
4,640836-1/854630,3093.0,46.0,71.708354


Este notebook gera automaticamente o pickle consumido pelo Streamlit (`<machine>_model_artifacts.pkl`).