In [None]:
!pip install papermill

In [25]:
import papermill as pm
from datetime import datetime
import os
from google.colab import userdata

run_id = datetime.now().strftime("%Y%m%d_%H%M%S")

root = "/content/drive/MyDrive/Unicaba/Programacion Avanzada/TP Final/workflow"
nb = f"{root}/notebooks"
data = f"{root}/data"
art = f"{root}/artifacts/{run_id}"
models = f"{root}/models"
results = f"{root}/results"
runs = f"{root}/runs"

os.makedirs(art, exist_ok=True)
os.makedirs(models, exist_ok=True)
os.makedirs(results, exist_ok=True)
os.makedirs(runs, exist_ok=True)

def run_nb(input_path, output_path, params):
    """Ejecuta un notebook en dos etapas para evitar bugs de Colab con tags."""

    tmp_path = output_path.replace(".ipynb", "_prepared.ipynb")

    # 1) Inyectar parÃ¡metros en un notebook temporal
    pm.execute_notebook(
        input_path,
        tmp_path,
        parameters=params,
        prepare_only=True
    )

    # 2) Ejecutar el notebook ya preparado
    pm.execute_notebook(
        tmp_path,
        output_path
    )


# ===========================
#     1) EDA
# ===========================
run_nb(
    f"{nb}/1_EDA.ipynb",
    f"{runs}/1_EDA_{run_id}.ipynb",
    params=dict(
        sqlite_path=f"{data}/bsas_realstate_on_sale_properati_dataset_2020.csv",
        sqlite_query=None,
        clean_output_path=f"{data}/clean_{run_id}.csv"
    )
)


# ===========================
#   2) Preprocesamiento
# ===========================
run_nb(
    f"{nb}/2_Preprocesamiento.ipynb",
    f"{runs}/2_Preprocesamiento_{run_id}.ipynb",
    params=dict(
        input_clean_path=f"{data}/clean_{run_id}.csv",
        output_folder=art
    )
)

# ===========================
#       3) Modelos
# ===========================
run_nb(
    f"{nb}/3_Modelos.ipynb",
    f"{runs}/3_Modelos_{run_id}.ipynb",
    params=dict(
        X_train=f"{art}/X_train.pkl",
        X_test=f"{art}/X_test.pkl",
        y_train=f"{art}/y_train.pkl",
        y_test=f"{art}/y_test.pkl",
        model_output_path=f"{models}/model_{run_id}.pkl",
        results_output_path=f"{results}/results_{run_id}.csv"
    )
)

# ===========================
# 4) EvaluaciÃ³n + Guardados
# ===========================
run_nb(
    f"{nb}/4_Evaluacion_y_Guardado.ipynb",
    f"{runs}/4_Evaluacion_y_Guardado_{run_id}.ipynb",
    params=dict(
        USER="",
        PASS="",
        results_input_path=f"{results}/results_{run_id}.csv"
    )
)

print("ðŸ”¥ Pipeline ejecutado CORRECTAMENTE ðŸ”¥")


Executing:   0%|          | 0/58 [00:00<?, ?cell/s]

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

KeyboardInterrupt: 