In [24]:
# -------------------------------------------
# Notebook: ML_Pipeline_Trained.ipynb
# Purpose: Train and evaluate the ML pipeline
# -------------------------------------------

import sys
from pathlib import Path

from Tools.ML_Pipeline import PipelineConfig, MLPipeline
from sklearn.datasets import load_breast_cancer
import pandas as pd

# -----------------------------
# Load dataset
# -----------------------------
data = load_breast_cancer(as_frame=True)
df = data.frame

X = df.drop("target", axis=1)
y = df["target"]

# -----------------------------
# Initialize pipeline
# -----------------------------
config = PipelineConfig(
    test_size=0.2,
    random_state=42,
    cv_folds=5,
    log_level="INFO"
)

pipeline = MLPipeline(config)

# -----------------------------
# Prepare data, train, evaluate
# -----------------------------
pipeline.prepare_data(X, y)
pipeline.train()
metrics = pipeline.evaluate()
cv_results = pipeline.cross_validate()

print("Evaluation Metrics:", metrics)
print("Cross-Validation Results:", cv_results)

# -----------------------------
# Save pipeline and config
# -----------------------------

pipeline.save_model(str(save_dir / "ML_Pipeline_Model.joblib"))
pipeline.save_config(str(save_dir / "ML_Pipeline_Config.json"))

print("Pipeline and config saved successfully!")


2025-12-07 16:17:05,940 - MLPipeline - INFO - Initialized pipeline with config: {'test_size': 0.2, 'random_state': 42, 'scaler_type': 'standard', 'model_type': 'random_forest', 'model_params': {'n_estimators': 100, 'max_depth': 10, 'random_state': 42}, 'cv_folds': 5, 'log_level': 'INFO'}
2025-12-07 16:17:05,940 - MLPipeline - INFO - Initialized pipeline with config: {'test_size': 0.2, 'random_state': 42, 'scaler_type': 'standard', 'model_type': 'random_forest', 'model_params': {'n_estimators': 100, 'max_depth': 10, 'random_state': 42}, 'cv_folds': 5, 'log_level': 'INFO'}


2025-12-07 16:17:05,940 - MLPipeline - INFO - Initialized pipeline with config: {'test_size': 0.2, 'random_state': 42, 'scaler_type': 'standard', 'model_type': 'random_forest', 'model_params': {'n_estimators': 100, 'max_depth': 10, 'random_state': 42}, 'cv_folds': 5, 'log_level': 'INFO'}
2025-12-07 16:17:05,940 - MLPipeline - INFO - Initialized pipeline with config: {'test_size': 0.2, 'random_state': 42, 'scaler_type': 'standard', 'model_type': 'random_forest', 'model_params': {'n_estimators': 100, 'max_depth': 10, 'random_state': 42}, 'cv_folds': 5, 'log_level': 'INFO'}
2025-12-07 16:17:05,940 - MLPipeline - INFO - Initialized pipeline with config: {'test_size': 0.2, 'random_state': 42, 'scaler_type': 'standard', 'model_type': 'random_forest', 'model_params': {'n_estimators': 100, 'max_depth': 10, 'random_state': 42}, 'cv_folds': 5, 'log_level': 'INFO'}
2025-12-07 16:17:05,940 - MLPipeline - INFO - Initialized pipeline with config: {'test_size': 0.2, 'random_state': 42, 'scaler_type':

Evaluation Metrics: {'accuracy': 0.9649122807017544, 'precision': 0.9652053622194477, 'recall': 0.9649122807017544, 'f1': 0.9647382344750765}
Cross-Validation Results: {'mean_cv_score': np.float64(0.9582417582417582), 'std_cv_score': np.float64(0.017582417582417565)}
Pipeline and config saved successfully!
