In [None]:
# -------------------------------------------
# Notebook: ML_Pipeline_Trained.ipynb
# Purpose: Train and evaluate the ML pipeline
# -------------------------------------------

import sys
from pathlib import Path

from Tools.ML_Pipeline import PipelineConfig, MLPipeline
from sklearn.datasets import load_breast_cancer
import pandas as pd

# -----------------------------
# Load dataset
# -----------------------------
data = load_breast_cancer(as_frame=True)
df = data.frame

X = df.drop("target", axis=1)
y = df["target"]

# -----------------------------
# Initialize pipeline
# -----------------------------
config = PipelineConfig(
    test_size=0.2,
    random_state=42,
    cv_folds=5,
    log_level="INFO"
)

pipeline = MLPipeline(config)

# -----------------------------
# Prepare data, train, evaluate
# -----------------------------
pipeline.prepare_data(X, y)
pipeline.train()
metrics = pipeline.evaluate()
cv_results = pipeline.cross_validate()

print("Evaluation Metrics:", metrics)
print("Cross-Validation Results:", cv_results)

# -----------------------------
# Save pipeline and config
# -----------------------------

pipeline.save_model(str(save_dir / "ML_Pipeline_Model.joblib"))
pipeline.save_config(str(save_dir / "ML_Pipeline_Config.json"))

print("Pipeline and config saved successfully!")
