In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    accuracy_score,
    classification_report
)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import optuna

import os

In [2]:
scenario_datasets = os.listdir("./Scenarios")
scenario_datasets = [f"./Scenarios/{x}" for x in scenario_datasets]
scenario_datasets

In [3]:
scenarios_description = [
    {"name": "Scenario 1_N", "encoding": "Label encoding", "remove_outliers": False, "remove_errors": False, "scaling": "MinMax"},
    {"name": "Scenario 1_S", "encoding": "Label encoding", "remove_outliers": False, "remove_errors": False, "scaling": "Standard"},
    {"name": "Scenario 2_N", "encoding": "Label encoding", "remove_outliers": False, "remove_errors": False,
     "impute_zeros": {"RestingBP": "mean", "Cholesterol": "mean"}, "oldpeak_abs": True, "scaling": "MinMax"},
    {"name": "Scenario 2_S", "encoding": "Label encoding", "remove_outliers": False, "remove_errors": False,
     "impute_zeros": {"RestingBP": "mean", "Cholesterol": "mean"}, "oldpeak_abs": True, "scaling": "Standard"},
    {"name": "Scenario 3_N", "encoding": "Label encoding", "remove_outliers": True, "remove_errors": True, "scaling": "None"},
    {"name": "Scenario 3_S", "encoding": "Label encoding", "remove_outliers": True, "remove_errors": True, "scaling": "Standard"},
    {"name": "Scenario 4_N", "encoding": "Label encoding", "remove_outliers": "replace_with_mean", "remove_errors": True, "scaling": "MinMax"},
    {"name": "Scenario 4_S", "encoding": "Label encoding", "remove_outliers": "replace_with_mean", "remove_errors": True, "scaling": "Standard"},
    {"name": "Scenario 5_N", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": False, "remove_errors": False, "scaling": "MinMax"},
    {"name": "Scenario 5_S", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": False, "remove_errors": False, "scaling": "Standard"},
    {"name": "Scenario 6_N", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": False, "remove_errors": True, "scaling": "None"},
    {"name": "Scenario 6_S", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": False, "remove_errors": True, "scaling": "Standard"},
    {"name": "Scenario 7_N", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": True, "remove_errors": True, "scaling": "MinMax"},
    {"name": "Scenario 7_S", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": True, "remove_errors": True, "scaling": "Standard"},
    {"name": "Scenario 8_N", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": "replace_with_mean", "remove_errors": True, "scaling": "None"},
    {"name": "Scenario 8_S", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": "replace_with_mean", "remove_errors": True, "scaling": "Standard"},
]


In [None]:
def get_search_hyperparameters(model_type):
    if model_type == "NaiveBayes":
        return {
            "var_smoothing": [1e-9, 1e-8, 1e-7, 1e-6, 1e-5],
            "priors": [None],
        }
    elif model_type == "SVM":
        return {
            "C": [0.1, 1, 10, 100, 1000],
            "gamma": [1, 0.1, 0.01, 0.001, 0.0001],
            "kernel": ["rbf", "linear", "poly", "sigmoid"],
        }
    elif model_type == "KNN":
        return {
            "n_neighbors": [3, 5, 7, 9, 11],  # Number of neighbors
            "weights": ["uniform", "distance"],  # Weighting scheme
            "metric": ["euclidean", "manhattan", "minkowski"],  # Distance metrics
        }
    elif model_type == "DecisionTree":
        return {
            "criterion": ["gini", "entropy", "log_loss"],  # Criterion for split
            "max_depth": [None, 10, 20, 30, 40, 50],  # Maximum depth of the tree
            "min_samples_split": [2, 5, 10],  # Minimum number of samples to split a node
            "min_samples_leaf": [1, 2, 4],  # Minimum number of samples per leaf node
        }
    else:
        return {}


In [None]:
class optuna_tuner:
    def __init__(self, model, X_train, y_train, direction="maximize", n_trials=100, n_jobs=-1, scoring="accuracy", cv=5):
        self.model = model
        self.X_train = X_train
        self.y_train = y_train
        self.direction = direction
        self.n_trials = n_trials
        self.n_jobs = n_jobs
        self.scoring = scoring
        self.cv = cv

    def objective(self, trial):
        # Handle Naive Bayes
        if isinstance(self.model, GaussianNB):
            model = GaussianNB(
                var_smoothing=trial.suggest_float("var_smoothing", 1e-9, 1e-5)
            )
        
        # Handle SVM
        elif isinstance(self.model, SVC):
            model = SVC(
                C=trial.suggest_float("C", 1e-3, 10),
                gamma=trial.suggest_float("gamma", 1e-3, 10),
                kernel=trial.suggest_categorical("kernel", ["rbf", "linear", "poly", "sigmoid"]),
            )
        
        # Handle KNN
        elif isinstance(self.model, KNeighborsClassifier):
            model = KNeighborsClassifier(
                n_neighbors=trial.suggest_int("n_neighbors", 3, 15),
                weights=trial.suggest_categorical("weights", ["uniform", "distance"]),
                metric=trial.suggest_categorical("metric", ["euclidean", "manhattan", "minkowski"]),
            )

        # Handle Decision Tree
        elif isinstance(self.model, DecisionTreeClassifier):
            model = DecisionTreeClassifier(
                criterion=trial.suggest_categorical("criterion", ["gini", "entropy", "log_loss"]),
                max_depth=trial.suggest_int("max_depth", 5, 50),
                min_samples_split=trial.suggest_int("min_samples_split", 2, 10),
                min_samples_leaf=trial.suggest_int("min_samples_leaf", 1, 5),
            )
        
        else:
            raise ValueError("Unsupported model type for optimization.")

        # Perform cross-validation and return the mean score
        scores = cross_val_score(model, self.X_train, self.y_train, cv=self.cv, scoring=self.scoring, n_jobs=self.n_jobs)
        return scores.mean()

    def optimize_study(self):
        study = optuna.create_study(direction=self.direction)
        study.optimize(self.objective, n_trials=self.n_trials)
        return study.best_params


In [None]:
class ModelTrainer:
    def __init__(self, dataset, target_column, model):
        self.dataset = dataset
        self.target_column = target_column
        self.X = dataset.drop(target_column, axis=1)
        self.y = dataset[target_column]
        self.model = model
        self.results = {}

    def split_data(self, test_size=0.2, val_size=0.1, random_state=42):
        X_train, X_temp, y_train, y_temp = train_test_split(
            self.X, self.y, test_size=test_size + val_size, random_state=random_state
        )
        val_ratio = val_size / (test_size + val_size)
        self.X_val, self.X_test, self.y_val, self.y_test = train_test_split(
            X_temp, y_temp, test_size=val_ratio, random_state=random_state
        )
        self.X_train, self.y_train = X_train, y_train
        
    def train(self):
        self.model.fit(self.X_train, self.y_train)

    def tune_and_train(self,search_type , search_config, param_grid):

        clf = self.model
        if search_type == "grid":
            search = GridSearchCV(clf, param_grid, cv=search_config["cv"], scoring=search_config["scoring"])
        elif search_type == "random":
            search = RandomizedSearchCV(clf, param_grid, cv=search_config["cv"], scoring=search_config["scoring"], n_iter=search_config["n_iter"])
        elif search_type == "optuna":
            search = optuna_tuner(self.model, self.X_train, self.y_train, search_config["direction"], search_config["n_trials"], search_config["n_jobs"], search_config["scoring"], search_config["cv"])

        best_params = None
        if isinstance(search, optuna_tuner):
            best_params = search.optimize_study()
            self.model = self.model.set_params(**best_params)
            self.model.fit(self.X_train, self.y_train)
        elif search_type == "grid" or search_type == "random":
            search.fit(self.X_train, self.y_train)
            best_params = search.best_params_
            self.model = search.best_estimator_

        print("Best hyperparameters: ", best_params)

        # TODO: Get scores for train, val, and test sets

    def test_and_evaluate(self, save_dir=None):
        # Predict on validation and test sets
        y_val_pred = self.model.predict(self.X_val)
        y_test_pred = self.model.predict(self.X_test)

        # Calculate metrics for validation and test sets
        metrics_val = {
            "accuracy": accuracy_score(self.y_val, y_val_pred),
            "precision": precision_score(self.y_val, y_val_pred, average="weighted"),
            "recall": recall_score(self.y_val, y_val_pred, average="weighted"),
            "f1_score": f1_score(self.y_val, y_val_pred, average="weighted"),
            "confusion_matrix": confusion_matrix(self.y_val, y_val_pred),
        }

        metrics_test = {
            "accuracy": accuracy_score(self.y_test, y_test_pred),
            "precision": precision_score(self.y_test, y_test_pred, average="weighted"),
            "recall": recall_score(self.y_test, y_test_pred, average="weighted"),
            "f1_score": f1_score(self.y_test, y_test_pred, average="weighted"),
            "confusion_matrix": confusion_matrix(self.y_test, y_test_pred),
        }

        # Store results
        self.results["validation"] = metrics_val
        self.results["test"] = metrics_test

        # Print evaluation metrics
        print("Validation Metrics:")
        print(f"  Accuracy: {metrics_val['accuracy']:.4f}")
        print(f"  Precision: {metrics_val['precision']:.4f}")
        print(f"  Recall: {metrics_val['recall']:.4f}")
        print(f"  F1 Score: {metrics_val['f1_score']:.4f}")
        print("\nTest Metrics:")
        print(f"  Accuracy: {metrics_test['accuracy']:.4f}")
        print(f"  Precision: {metrics_test['precision']:.4f}")
        print(f"  Recall: {metrics_test['recall']:.4f}")
        print(f"  F1 Score: {metrics_test['f1_score']:.4f}")

        # Confusion matrix visualization for test set
        print("\nConfusion Matrix (Test Set):")
        cm = metrics_test["confusion_matrix"]
        print(cm)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=True, yticklabels=True)
        plt.title("Confusion Matrix - Test Set")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        if save_dir:
            os.makedirs(save_dir, exist_ok=True)
            plt.savefig(os.path.join(save_dir, "confusion_matrix.png"))
        plt.show()

    def plot_results(self, save_dir=None):
        # Create the directory if saving is required
        if save_dir:
            os.makedirs(save_dir, exist_ok=True)

        # Plot Accuracy Comparison
        if "validation" in self.results and "test" in self.results:
            accuracies = {
                "Validation": self.results["validation"]["accuracy"],
                "Test": self.results["test"]["accuracy"],
            }
            plt.figure(figsize=(6, 4))
            plt.bar(accuracies.keys(), accuracies.values(), color=["skyblue", "lightgreen"])
            plt.title("Accuracy Comparison")
            plt.ylabel("Accuracy")
            plt.ylim(0, 1)
            for i, v in enumerate(accuracies.values()):
                plt.text(i, v + 0.02, f"{v:.2f}", ha="center")
            if save_dir:
                plt.savefig(os.path.join(save_dir, "accuracy_comparison.png"))
            plt.show()

        # Confusion Matrix Heatmap for Test Set
        y_test_pred = self.model.predict(self.X_test)
        cm = confusion_matrix(self.y_test, y_test_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=True, yticklabels=True)
        plt.title("Confusion Matrix - Test Set")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        if save_dir:
            plt.savefig(os.path.join(save_dir, "confusion_matrix_heatmap.png"))
        plt.show()

        # Plot Class-wise Precision, Recall, and F1-Score
        if hasattr(self.model, "classes_"):  # Ensure the model supports classes_
            report = classification_report(self.y_test, y_test_pred, output_dict=True)
            metrics = ["precision", "recall", "f1-score"]
            class_names = list(report.keys())[:-3]  # Exclude avg/total keys
            class_metrics = {metric: [report[class_name][metric] for class_name in class_names] for metric in metrics}

            for metric, values in class_metrics.items():
                plt.figure(figsize=(8, 4))
                plt.bar(class_names, values, color="lightcoral")
                plt.title(f"{metric.capitalize()} by Class")
                plt.ylabel(metric.capitalize())
                plt.ylim(0, 1)
                for i, v in enumerate(values):
                    plt.text(i, v + 0.02, f"{v:.2f}", ha="center")
                if save_dir:
                    plt.savefig(os.path.join(save_dir, f"{metric}_by_class.png"))
                plt.show()


In [7]:
search_config_grid = {
    "cv": 5,
    "scoring": "accuracy",
}
search_config_random = {
    "cv": 5,
    "scoring": "accuracy",
    "n_iter": 50,
}
search_config_optuna = {
    "direction": "maximize",
    "n_trials": 100,
    "n_jobs": -1,
    "scoring": "accuracy",
    "cv": 5,
}

target_column = "HeartDisease"

In [None]:
def run_scenario(scenario, model_type, search_type, search_config, param_grid):
    # Load the dataset
    dataset = pd.read_csv(scenario)
    model = None

    # Initialize the model based on the model_type
    if model_type == "NaiveBayes":
        model = GaussianNB()
    elif model_type == "SVM":
        model = SVC()
    elif model_type == "KNN":
        model = KNeighborsClassifier()
    elif model_type == "DecisionTree":
        model = DecisionTreeClassifier()
    else:
        raise ValueError(f"Model type {model_type} is not supported.")

    # Create a directory for saving plots and results
    scenario_name = os.path.splitext(os.path.basename(scenario))[0]  # Extract scenario name from file
    save_dir = os.path.join("SCORES", model_type, search_type, scenario_name)  # SCORES as top-level directory
    os.makedirs(save_dir, exist_ok=True)

    print(f"Running {model_type} with {search_type} for {scenario_name}...")
    print(f"Results will be saved in: {save_dir}")

    # Initialize the trainer
    trainer = ModelTrainer(dataset, target_column, model)
    trainer.split_data()

    # Train and tune the model
    if search_type:
        trainer.tune_and_train(search_type, search_config, param_grid)
    else:
        trainer.train()  # Train without hyperparameter tuning

    # Evaluate the model
    trainer.test_and_evaluate(save_dir=save_dir)

    # Plot results for the current scenario
    trainer.plot_results(save_dir=save_dir)

    print(f"Scenario {scenario_name} with {search_type} completed and results saved.\n")


# Naive Bayes

## 1. Grid Search

In [9]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    # Run for Naive Bayes
    run_scenario(scenario, "NaiveBayes", "grid", search_config_grid, get_search_hyperparameters("NaiveBayes"))
    
    print("-" * 40)
    print("\n\n")
    

## 2. Random Search

In [10]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    # Run for Naive Bayes
    run_scenario(scenario, "NaiveBayes", "random", search_config_random, get_search_hyperparameters("NaiveBayes"))
    
    print("-" * 40)
    print("\n\n")

## 3. Bayesian Optimization (Optuna)

In [11]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    # Run for Naive Bayes
    run_scenario(scenario, "NaiveBayes", "optuna", search_config_optuna, get_search_hyperparameters("NaiveBayes"))
    
    print("-" * 40)
    print("\n\n")

# SVM

## 1. Grid Search

In [12]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    run_scenario(scenario, "SVM", "grid", search_config_grid, get_search_hyperparameters("SVM"))

    print("-" * 40)
    print("\n\n")

## 2. Random Search

In [13]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    run_scenario(scenario, "SVM", "random", search_config_random, get_search_hyperparameters("SVM"))

    print("-" * 40)
    print("\n\n")

## 3. Bayesian Optimization (Optuna)

In [14]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")
    # here the get_search_hyperparameters("SVM") is not used as the optuna_tuner class is used to tune the hyperparameters
    run_scenario(scenario, "SVM", "optuna", search_config_optuna, get_search_hyperparameters("SVM"))

    print("-" * 40)
    print("\n\n")

# KNN

## 1. Grid Search

In [15]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    run_scenario(scenario, "KNN", "grid", search_config_grid, get_search_hyperparameters("KNN"))

    print("-" * 40)
    print("\n\n")

## 2. Random Search

In [None]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    run_scenario(scenario, "KNN", "random", search_config_random, get_search_hyperparameters("KNN"))

    print("-" * 40)
    print("\n\n")

## 3. Bayesian Optimization (Optuna)

In [17]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    run_scenario(scenario, "KNN", "optuna", search_config_optuna, get_search_hyperparameters("KNN"))
    
    print("-" * 40)
    print("\n\n")

# Decision Tree

## 1. Grid Search

In [None]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    run_scenario(scenario, "DecisionTree", "grid", search_config_grid, get_search_hyperparameters("DecisionTree"))

    print("-" * 40)
    print("\n\n")

## 2. Random Search

In [20]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    run_scenario(scenario, "DecisionTree", "random", search_config_random, get_search_hyperparameters("DecisionTree"))

    print("-" * 40)
    print("\n\n")

## 3. Bayesian Optimization (Optuna)

In [21]:
for scenario, scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")

    run_scenario(scenario, "DecisionTree", "optuna", search_config_optuna, get_search_hyperparameters("DecisionTree"))

    print("-" * 40)
    print("\n\n")