# Basic Decision Tree Demo

This notebook mirrors the behaviour of `basic/demo.py` while keeping the code interactive.
It reuses the same helper modules from the repository to load data, run the pipeline, and
summarise model performance.


In [None]:
from pathlib import Path
import importlib
import sys

NOTEBOOK_DIR = Path.cwd()
PARENT_DIR = NOTEBOOK_DIR.parent
if str(PARENT_DIR) not in sys.path:
    sys.path.insert(0, str(PARENT_DIR))

PACKAGE_CANDIDATES = []
for name in (NOTEBOOK_DIR.name, 'basic', 'based'):
    if name and name not in PACKAGE_CANDIDATES:
        PACKAGE_CANDIDATES.append(name)

PACKAGE_NAME = None
for candidate in PACKAGE_CANDIDATES:
    try:
        importlib.import_module(candidate)
    except ModuleNotFoundError:
        continue
    else:
        PACKAGE_NAME = candidate
        break

if PACKAGE_NAME is None:
    available = ', '.join(PACKAGE_CANDIDATES)
    raise ModuleNotFoundError(
        'Unable to locate the project package. Ensure the notebook is executed '
        'from inside the project directory. Tried package names: ' + (available or 'none')
    )

data_module = importlib.import_module(f"{PACKAGE_NAME}.data")
evaluation_module = importlib.import_module(f"{PACKAGE_NAME}.evaluation")
pipeline_module = importlib.import_module(f"{PACKAGE_NAME}.pipeline")

load_iris_data = data_module.load_iris_data
evaluate_model = evaluation_module.evaluate_model
plot_accuracy_progression = evaluation_module.plot_accuracy_progression
create_dataset_splits = pipeline_module.create_dataset_splits
perform_hyperparameter_search = pipeline_module.perform_hyperparameter_search
train_final_model = pipeline_module.train_final_model

OUTPUT_DIR = NOTEBOOK_DIR / 'outputs'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


In [None]:
def run_demo() -> None:
    """Execute the training pipeline and report metrics."""

    features, target, _ = load_iris_data()
    splits = create_dataset_splits(features, target)

    estimator, best_params = perform_hyperparameter_search(splits)
    print("Best hyper-parameters:")
    for key, value in best_params.items():
        print(f"  {key}: {value}")

    tuned_metrics = evaluate_model(estimator, splits)
    print("\nAccuracy after hyper-parameter tuning:")
    for split_name, data in tuned_metrics.items():
        print(f"  {split_name.capitalize()}: {data['accuracy']:.3f}")

    print("\nValidation classification report:")
    print(tuned_metrics["validation"]["classification_report"])

    final_estimator = train_final_model(estimator, splits)
    final_metrics = evaluate_model(final_estimator, splits)

    print("\nFinal model accuracy (retrained on train + validation):")
    for split_name, data in final_metrics.items():
        print(f"  {split_name.capitalize()}: {data['accuracy']:.3f}")

    chart_path = plot_accuracy_progression(
        tuned_metrics, OUTPUT_DIR / "accuracy_progression.png"
    )
    print(f"\nAccuracy chart saved to: {chart_path}")


In [None]:
run_demo()
