In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    accuracy_score,
)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

import optuna

import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
scenario_datasets = os.listdir("./Scenarios")
scenario_datasets = [f"./Scenarios/{x}" for x in scenario_datasets]
scenario_datasets

['./Scenarios/dataset_scenario_1_N.csv',
 './Scenarios/dataset_scenario_1_S.csv',
 './Scenarios/dataset_scenario_2_N.csv',
 './Scenarios/dataset_scenario_2_S.csv',
 './Scenarios/dataset_scenario_3_N.csv',
 './Scenarios/dataset_scenario_3_S.csv',
 './Scenarios/dataset_scenario_4_N.csv',
 './Scenarios/dataset_scenario_4_S.csv',
 './Scenarios/dataset_scenario_5_N.csv',
 './Scenarios/dataset_scenario_5_S.csv',
 './Scenarios/dataset_scenario_6_N.csv',
 './Scenarios/dataset_scenario_6_S.csv',
 './Scenarios/dataset_scenario_7_N.csv',
 './Scenarios/dataset_scenario_7_S.csv',
 './Scenarios/dataset_scenario_8_N.csv',
 './Scenarios/dataset_scenario_8_S.csv']

In [3]:
scenarios_description = [
    {"name": "Scenario 1_N", "encoding": "Label encoding", "remove_outliers": False, "remove_errors": False, "scaling": "MinMax"},
    {"name": "Scenario 1_S", "encoding": "Label encoding", "remove_outliers": False, "remove_errors": False, "scaling": "Standard"},
    {"name": "Scenario 2_N", "encoding": "Label encoding", "remove_outliers": False, "remove_errors": False,
     "impute_zeros": {"RestingBP": "mean", "Cholesterol": "mean"}, "oldpeak_abs": True, "scaling": "MinMax"},
    {"name": "Scenario 2_S", "encoding": "Label encoding", "remove_outliers": False, "remove_errors": False,
     "impute_zeros": {"RestingBP": "mean", "Cholesterol": "mean"}, "oldpeak_abs": True, "scaling": "Standard"},
    {"name": "Scenario 3_N", "encoding": "Label encoding", "remove_outliers": True, "remove_errors": True, "scaling": "None"},
    {"name": "Scenario 3_S", "encoding": "Label encoding", "remove_outliers": True, "remove_errors": True, "scaling": "Standard"},
    {"name": "Scenario 4_N", "encoding": "Label encoding", "remove_outliers": "replace_with_mean", "remove_errors": True, "scaling": "MinMax"},
    {"name": "Scenario 4_S", "encoding": "Label encoding", "remove_outliers": "replace_with_mean", "remove_errors": True, "scaling": "Standard"},
    {"name": "Scenario 5_N", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": False, "remove_errors": False, "scaling": "MinMax"},
    {"name": "Scenario 5_S", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": False, "remove_errors": False, "scaling": "Standard"},
    {"name": "Scenario 6_N", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": False, "remove_errors": True, "scaling": "None"},
    {"name": "Scenario 6_S", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": False, "remove_errors": True, "scaling": "Standard"},
    {"name": "Scenario 7_N", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": True, "remove_errors": True, "scaling": "MinMax"},
    {"name": "Scenario 7_S", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": True, "remove_errors": True, "scaling": "Standard"},
    {"name": "Scenario 8_N", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": "replace_with_mean", "remove_errors": True, "scaling": "None"},
    {"name": "Scenario 8_S", "encoding": {"Sex": "Label", "ExerciseAngina": "Label", "ST_Slope": "Label", "RestingECG": "Label", "ChestPainType": "OneHot"},
     "remove_outliers": "replace_with_mean", "remove_errors": True, "scaling": "Standard"},
]


In [4]:
def get_search_hyperparameters(model_type):
    if model_type == "NaiveBayes":
        return {}
    elif model_type == "SVM":
        return {
            "C": [0.1, 1, 10, 100, 1000],
            "gamma": [1, 0.1, 0.01, 0.001, 0.0001],
            "kernel": ["rbf", "linear", "poly", "sigmoid"],
        }
    else:
        return {}

In [38]:
class optuna_tuner:
    def __init__(self, model, X_train, y_train, direction="maximize", n_trials=100, n_jobs=-1, scoring="accuracy", cv=5):
        self.model = model
        self.X_train = X_train
        self.y_train = y_train
        self.direction = direction
        self.n_trials = n_trials
        self.n_jobs = n_jobs
        self.scoring = scoring
        self.cv = cv

    def objective(self, trial):
        if isinstance(self.model, GaussianNB):
            pass
        elif isinstance(self.model, SVC):
            model = SVC(
                C=trial.suggest_float("C", 1e-3, 10),
                gamma=trial.suggest_float("gamma", 1e-3, 10),
                kernel=trial.suggest_categorical("kernel", ["rbf", "linear", "poly", "sigmoid"]),
            )
            scores = cross_val_score(model, self.X_train, self.y_train, cv=self.cv, scoring=self.scoring, n_jobs=self.n_jobs)
            return scores.mean()
        
    
    def optimize_study(self):
        study = optuna.create_study(direction=self.direction)
        study.optimize(self.objective, n_trials=self.n_trials, n_jobs=self.n_jobs)
        return study.best_params

    

In [31]:
class ModelTrainer:
    def __init__(self, dataset, target_column, model):
        self.dataset = dataset
        self.target_column = target_column
        self.X = dataset.drop(target_column, axis=1)
        self.y = dataset[target_column]
        self.model = model
        self.results = {}

    def split_data(self, test_size=0.2, val_size=0.1, random_state=42):
        X_train, X_temp, y_train, y_temp = train_test_split(
            self.X, self.y, test_size=test_size + val_size, random_state=random_state
        )
        val_ratio = val_size / (test_size + val_size)
        self.X_val, self.X_test, self.y_val, self.y_test = train_test_split(
            X_temp, y_temp, test_size=val_ratio, random_state=random_state
        )
        self.X_train, self.y_train = X_train, y_train

    def tune_and_train(self,search_type , search_config, param_grid):

        clf = self.model
        if search_type == "grid":
            search = GridSearchCV(clf, param_grid, cv=search_config["cv"], scoring=search_config["scoring"])
        elif search_type == "random":
            search = RandomizedSearchCV(clf, param_grid, cv=search_config["cv"], scoring=search_config["scoring"], n_iter=search_config["n_iter"])
        elif search_type == "optuna":
            search = optuna_tuner(self.model, self.X_train, self.y_train, search_config["direction"], search_config["n_trials"], search_config["n_jobs"], search_config["scoring"], search_config["cv"])

        best_params = None
        if isinstance(search, optuna_tuner):
            best_params = search.optimize_study()
            self.model = self.model.set_params(**best_params)
        elif search_type == "grid" or search_type == "random":
            search.fit(self.X_train, self.y_train)
            best_params = search.best_params_
            self.model = search.best_estimator_

        print("Best hyperparameters: ", best_params)

        # TODO: Get scores for train, val, and test sets


    def test_and_evaluate(self):
        pass

    def plot_results(self):
        pass


In [32]:
search_config_grid = {
    "cv": 5,
    "scoring": "accuracy",
}
search_config_random = {
    "cv": 5,
    "scoring": "accuracy",
    "n_iter": 50,
}
search_config_optuna = {
    "direction": "maximize",
    "n_trials": 100,
    "n_jobs": -1,
    "scoring": "accuracy",
    "cv": 5,
}

target_column = "HeartDisease"

In [33]:
def run_scenario(scenario, model_type, search_type, search_config, param_grid):
    dataset = pd.read_csv(scenario)
    model = None
    if model_type == "NaiveBayes":
        pass
    elif model_type == "SVM":
        model = SVC()

    trainer = ModelTrainer(dataset, target_column, model)
    trainer.split_data()
    trainer.tune_and_train(search_type, search_config, param_grid)
    # trainer.test_and_evaluate()
    # trainer.plot_results()

# Naive Bayes

# SVM

## 1. Grid Search

In [14]:
for scenario,scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")
    run_scenario(scenario, "SVM", "grid", search_config_grid, get_search_hyperparameters("SVM"))

    print("-" * 40)
    print("\n\n")

Scenario Name: Scenario 1_N
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: MinMax
Best hyperparameters:  {'C': 0.1, 'gamma': 1, 'kernel': 'poly'}
----------------------------------------



Scenario Name: Scenario 1_S
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: Standard
Best hyperparameters:  {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
----------------------------------------



Scenario Name: Scenario 2_N
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: {'RestingBP': 'mean', 'Cholesterol': 'mean'}
  Oldpeak Absolute: True
  Scaling: MinMax
Best hyperparameters:  {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
----------------------------------------



Scenario Name: Scenario 2_S
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute 

## 2. Random Search

In [15]:
for scenario,scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")
    run_scenario(scenario, "SVM", "random", search_config_random, get_search_hyperparameters("SVM"))

    print("-" * 40)
    print("\n\n")

Scenario Name: Scenario 1_N
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: MinMax
Best hyperparameters:  {'kernel': 'poly', 'gamma': 1, 'C': 0.1}
----------------------------------------



Scenario Name: Scenario 1_S
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: Standard
Best hyperparameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1}
----------------------------------------



Scenario Name: Scenario 2_N
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: {'RestingBP': 'mean', 'Cholesterol': 'mean'}
  Oldpeak Absolute: True
  Scaling: MinMax
Best hyperparameters:  {'kernel': 'rbf', 'gamma': 0.001, 'C': 1000}
----------------------------------------



Scenario Name: Scenario 2_S
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute 

## 3. Bayesian Optimization (Optuna)

In [39]:
for scenario,scenario_description in zip(scenario_datasets, scenarios_description):
    print(f"Scenario Name: {scenario_description['name']}")
    print(f"  Encoding: {scenario_description['encoding']}")
    print(f"  Remove Outliers: {scenario_description['remove_outliers']}")
    print(f"  Remove Errors: {scenario_description.get('remove_errors', 'Not Specified')}")
    print(f"  Impute Zeros: {scenario_description.get('impute_zeros', 'None')}")
    print(f"  Oldpeak Absolute: {scenario_description.get('oldpeak_abs', 'Not Specified')}")
    print(f"  Scaling: {scenario_description['scaling']}")
    # here the get_search_hyperparameters("SVM") is not used as the optuna_tuner class is used to tune the hyperparameters
    run_scenario(scenario, "SVM", "optuna", search_config_optuna, get_search_hyperparameters("SVM"))

    print("-" * 40)
    print("\n\n")

[I 2024-11-30 12:48:38,640] A new study created in memory with name: no-name-01d637bc-5e07-422f-b5f8-53978850d05e
[I 2024-11-30 12:48:38,748] Trial 0 finished with value: 0.8473231589147285 and parameters: {'C': 7.26237994975051, 'gamma': 0.2937391886759249, 'kernel': 'rbf'}. Best is trial 0 with value: 0.8473231589147285.
[I 2024-11-30 12:48:38,753] Trial 2 finished with value: 0.8410610465116278 and parameters: {'C': 2.9209376292464078, 'gamma': 0.9646524570987972, 'kernel': 'rbf'}. Best is trial 0 with value: 0.8473231589147285.
[I 2024-11-30 12:48:38,788] Trial 1 finished with value: 0.8442223837209302 and parameters: {'C': 8.343660802130607, 'gamma': 8.606156154741576, 'kernel': 'linear'}. Best is trial 0 with value: 0.8473231589147285.
[I 2024-11-30 12:48:38,810] Trial 5 finished with value: 0.8442223837209302 and parameters: {'C': 8.980044584801023, 'gamma': 3.1166618536542146, 'kernel': 'linear'}. Best is trial 0 with value: 0.8473231589147285.


Scenario Name: Scenario 1_N
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: MinMax


[I 2024-11-30 12:48:38,846] Trial 3 finished with value: 0.8348594961240309 and parameters: {'C': 2.1330174327289186, 'gamma': 0.030188244511243783, 'kernel': 'rbf'}. Best is trial 0 with value: 0.8473231589147285.
[I 2024-11-30 12:48:38,884] Trial 6 finished with value: 0.8457848837209301 and parameters: {'C': 3.7677922170115536, 'gamma': 9.307317678732998, 'kernel': 'linear'}. Best is trial 0 with value: 0.8473231589147285.
[I 2024-11-30 12:48:38,935] Trial 11 finished with value: 0.8457848837209301 and parameters: {'C': 5.824631888460823, 'gamma': 2.170918268018874, 'kernel': 'linear'}. Best is trial 0 with value: 0.8473231589147285.
[I 2024-11-30 12:48:39,000] Trial 8 finished with value: 0.8052446705426357 and parameters: {'C': 9.284862209819115, 'gamma': 6.454072467050267, 'kernel': 'rbf'}. Best is trial 0 with value: 0.8473231589147285.
[I 2024-11-30 12:48:39,094] Trial 9 finished with value: 0.8083212209302324 and parameters: {'C': 7.348239499174422, 'gamma': 9.033334848494034,

Best hyperparameters:  {'C': 7.26237994975051, 'gamma': 0.2937391886759249, 'kernel': 'rbf'}
----------------------------------------



Scenario Name: Scenario 1_S
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: Standard


[I 2024-11-30 12:48:43,459] Trial 1 finished with value: 0.7741157945736434 and parameters: {'C': 5.623265042952017, 'gamma': 3.9504735579550343, 'kernel': 'poly'}. Best is trial 2 with value: 0.8442223837209302.
[I 2024-11-30 12:48:43,463] Trial 7 finished with value: 0.7741157945736434 and parameters: {'C': 7.273910000216464, 'gamma': 2.8857063810496086, 'kernel': 'poly'}. Best is trial 2 with value: 0.8442223837209302.
[I 2024-11-30 12:48:43,487] Trial 8 finished with value: 0.6541182170542635 and parameters: {'C': 1.4147479172388433, 'gamma': 3.694055383501027, 'kernel': 'rbf'}. Best is trial 2 with value: 0.8442223837209302.
[I 2024-11-30 12:48:43,491] Trial 5 finished with value: 0.6012112403100776 and parameters: {'C': 2.1962841415794334, 'gamma': 7.508569191399667, 'kernel': 'rbf'}. Best is trial 2 with value: 0.8442223837209302.
[I 2024-11-30 12:48:43,542] Trial 4 finished with value: 0.7741157945736434 and parameters: {'C': 8.205324561328393, 'gamma': 9.87159462457618, 'kerne

Best hyperparameters:  {'C': 6.660955762280234, 'gamma': 6.103807105621752, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 2_N
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: {'RestingBP': 'mean', 'Cholesterol': 'mean'}
  Oldpeak Absolute: True
  Scaling: MinMax


[I 2024-11-30 12:48:46,003] Trial 6 finished with value: 0.8005692829457365 and parameters: {'C': 6.170490623107402, 'gamma': 6.400513101661967, 'kernel': 'rbf'}. Best is trial 6 with value: 0.8005692829457365.
[I 2024-11-30 12:48:46,775] Trial 7 finished with value: 0.5358284883720931 and parameters: {'C': 9.076245654909458, 'gamma': 5.834347406107416, 'kernel': 'sigmoid'}. Best is trial 6 with value: 0.8005692829457365.
[I 2024-11-30 12:48:46,950] Trial 8 finished with value: 0.5358284883720931 and parameters: {'C': 1.4005247342766276, 'gamma': 5.730908119201437, 'kernel': 'sigmoid'}. Best is trial 6 with value: 0.8005692829457365.
[I 2024-11-30 12:48:46,971] Trial 9 finished with value: 0.8426598837209301 and parameters: {'C': 9.603053426083708, 'gamma': 3.403276622405434, 'kernel': 'linear'}. Best is trial 9 with value: 0.8426598837209301.
[I 2024-11-30 12:48:46,990] Trial 5 finished with value: 0.5358284883720931 and parameters: {'C': 2.181899766353356, 'gamma': 9.950328268205311,

Best hyperparameters:  {'C': 3.0868261178421283, 'gamma': 0.37679425732484695, 'kernel': 'rbf'}
----------------------------------------



Scenario Name: Scenario 2_S
  Encoding: Label encoding
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: {'RestingBP': 'mean', 'Cholesterol': 'mean'}
  Oldpeak Absolute: True
  Scaling: Standard


[I 2024-11-30 12:48:50,986] Trial 10 finished with value: 0.7756782945736435 and parameters: {'C': 3.172953320143794, 'gamma': 4.497074670205807, 'kernel': 'poly'}. Best is trial 2 with value: 0.8442102713178293.
[I 2024-11-30 12:48:50,988] Trial 7 finished with value: 0.7586240310077519 and parameters: {'C': 8.921103880085, 'gamma': 7.351518083055463, 'kernel': 'sigmoid'}. Best is trial 2 with value: 0.8442102713178293.
[I 2024-11-30 12:48:50,988] Trial 11 finished with value: 0.699406492248062 and parameters: {'C': 1.36412886508364, 'gamma': 8.577130503862211, 'kernel': 'sigmoid'}. Best is trial 2 with value: 0.8442102713178293.
[I 2024-11-30 12:48:50,988] Trial 6 finished with value: 0.7756782945736435 and parameters: {'C': 1.8837231843334692, 'gamma': 3.347775965999471, 'kernel': 'poly'}. Best is trial 2 with value: 0.8442102713178293.
[I 2024-11-30 12:48:51,016] Trial 8 finished with value: 0.8442102713178293 and parameters: {'C': 2.8467022106152995, 'gamma': 4.635722785809739, 'k

Best hyperparameters:  {'C': 5.905740279281135, 'gamma': 3.2112131604972802, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 3_N
  Encoding: Label encoding
  Remove Outliers: True
  Remove Errors: True
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: None


[I 2024-11-30 12:48:53,401] Trial 6 finished with value: 0.5295403009688725 and parameters: {'C': 4.122587270267565, 'gamma': 6.178942256550991, 'kernel': 'sigmoid'}. Best is trial 0 with value: 0.8431869717584004.
[I 2024-11-30 12:48:53,420] Trial 3 finished with value: 0.7698206555349412 and parameters: {'C': 7.867842791772911, 'gamma': 3.061073205770741, 'kernel': 'poly'}. Best is trial 0 with value: 0.8431869717584004.
[I 2024-11-30 12:48:53,426] Trial 10 finished with value: 0.8146567717996289 and parameters: {'C': 8.445173379013635, 'gamma': 1.3928686692266414, 'kernel': 'rbf'}. Best is trial 0 with value: 0.8431869717584004.
[I 2024-11-30 12:48:53,462] Trial 2 finished with value: 0.8003710575139147 and parameters: {'C': 3.8422507643321104, 'gamma': 2.794207058927985, 'kernel': 'rbf'}. Best is trial 0 with value: 0.8431869717584004.
[I 2024-11-30 12:48:53,477] Trial 4 finished with value: 0.7657596371882086 and parameters: {'C': 7.838682042928022, 'gamma': 2.1932921952673032, 'k

Best hyperparameters:  {'C': 0.7195053739933218, 'gamma': 5.41783621986043, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 3_S
  Encoding: Label encoding
  Remove Outliers: True
  Remove Errors: True
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: Standard


[I 2024-11-30 12:48:55,912] Trial 11 finished with value: 0.7312100597814883 and parameters: {'C': 8.239803379586636, 'gamma': 4.788547099029158, 'kernel': 'sigmoid'}. Best is trial 0 with value: 0.8431869717584004.
[I 2024-11-30 12:48:55,927] Trial 6 finished with value: 0.8431869717584004 and parameters: {'C': 9.061502127887074, 'gamma': 9.98622973531379, 'kernel': 'linear'}. Best is trial 0 with value: 0.8431869717584004.
[I 2024-11-30 12:48:55,938] Trial 8 finished with value: 0.7556586270871983 and parameters: {'C': 4.026107195779373, 'gamma': 7.2574293275260775, 'kernel': 'poly'}. Best is trial 0 with value: 0.8431869717584004.
[I 2024-11-30 12:48:55,975] Trial 3 finished with value: 0.8431869717584004 and parameters: {'C': 7.083074556886412, 'gamma': 6.9919467433751, 'kernel': 'linear'}. Best is trial 0 with value: 0.8431869717584004.
[I 2024-11-30 12:48:55,980] Trial 13 finished with value: 0.7108843537414966 and parameters: {'C': 0.37395787946915693, 'gamma': 0.729720500386398

Best hyperparameters:  {'C': 0.0456632210364214, 'gamma': 9.229481878506098, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 4_N
  Encoding: Label encoding
  Remove Outliers: replace_with_mean
  Remove Errors: True
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: MinMax


[I 2024-11-30 12:48:57,963] Trial 1 finished with value: 0.5358284883720931 and parameters: {'C': 8.840300946352869, 'gamma': 6.2012351403277215, 'kernel': 'sigmoid'}. Best is trial 9 with value: 0.848861434108527.
[I 2024-11-30 12:48:57,963] Trial 6 finished with value: 0.6433624031007752 and parameters: {'C': 9.693327599241945, 'gamma': 1.2249399082851014, 'kernel': 'sigmoid'}. Best is trial 9 with value: 0.848861434108527.
[I 2024-11-30 12:48:57,964] Trial 3 finished with value: 0.8410973837209301 and parameters: {'C': 0.10333986750804419, 'gamma': 8.000769561044399, 'kernel': 'linear'}. Best is trial 9 with value: 0.848861434108527.
[I 2024-11-30 12:48:58,001] Trial 7 finished with value: 0.8021075581395349 and parameters: {'C': 6.835791892617046, 'gamma': 4.27029390888124, 'kernel': 'rbf'}. Best is trial 9 with value: 0.848861434108527.
[I 2024-11-30 12:48:58,011] Trial 5 finished with value: 0.8426598837209301 and parameters: {'C': 3.7081140366900667, 'gamma': 2.872161475031678, 

Best hyperparameters:  {'C': 0.9931855481267946, 'gamma': 1.3160139792060683, 'kernel': 'rbf'}
----------------------------------------



Scenario Name: Scenario 4_S
  Encoding: Label encoding
  Remove Outliers: replace_with_mean
  Remove Errors: True
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: Standard


[I 2024-11-30 12:49:00,530] Trial 4 finished with value: 0.5716812015503876 and parameters: {'C': 1.626957020070095, 'gamma': 6.809981949061281, 'kernel': 'rbf'}. Best is trial 0 with value: 0.8457727713178294.
[I 2024-11-30 12:49:00,557] Trial 7 finished with value: 0.6790697674418604 and parameters: {'C': 9.195624086167953, 'gamma': 1.8908282358764732, 'kernel': 'rbf'}. Best is trial 0 with value: 0.8457727713178294.
[I 2024-11-30 12:49:00,566] Trial 1 finished with value: 0.8457727713178294 and parameters: {'C': 7.799562031380874, 'gamma': 3.2195459538662607, 'kernel': 'linear'}. Best is trial 0 with value: 0.8457727713178294.
[I 2024-11-30 12:49:00,584] Trial 10 finished with value: 0.5607558139534884 and parameters: {'C': 1.768620627689666, 'gamma': 8.224177593467555, 'kernel': 'rbf'}. Best is trial 0 with value: 0.8457727713178294.
[I 2024-11-30 12:49:00,623] Trial 11 finished with value: 0.5358284883720931 and parameters: {'C': 0.4047759779708816, 'gamma': 3.5767197984871117, 'k

Best hyperparameters:  {'C': 7.632509935961091, 'gamma': 4.1741501856504915, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 5_N
  Encoding: {'Sex': 'Label', 'ExerciseAngina': 'Label', 'ST_Slope': 'Label', 'RestingECG': 'Label', 'ChestPainType': 'OneHot'}
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: MinMax


[I 2024-11-30 12:49:03,389] Trial 0 finished with value: 0.8379844961240309 and parameters: {'C': 3.3416367859441705, 'gamma': 3.6611899048300893, 'kernel': 'linear'}. Best is trial 5 with value: 0.8395348837209301.
[I 2024-11-30 12:49:03,427] Trial 9 finished with value: 0.8410852713178294 and parameters: {'C': 0.46711887003823604, 'gamma': 8.019087642962186, 'kernel': 'linear'}. Best is trial 9 with value: 0.8410852713178294.
[I 2024-11-30 12:49:03,440] Trial 8 finished with value: 0.7943435077519381 and parameters: {'C': 2.0193553540127445, 'gamma': 5.8613633022024265, 'kernel': 'rbf'}. Best is trial 9 with value: 0.8410852713178294.
[I 2024-11-30 12:49:03,446] Trial 11 finished with value: 0.8379723837209301 and parameters: {'C': 1.6762349041547022, 'gamma': 0.44333692895197263, 'kernel': 'linear'}. Best is trial 9 with value: 0.8410852713178294.
[I 2024-11-30 12:49:03,476] Trial 12 finished with value: 0.5358284883720931 and parameters: {'C': 2.7782907702462483, 'gamma': 3.4742895

Best hyperparameters:  {'C': 2.8808583725876424, 'gamma': 4.776184317751132, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 5_S
  Encoding: {'Sex': 'Label', 'ExerciseAngina': 'Label', 'ST_Slope': 'Label', 'RestingECG': 'Label', 'ChestPainType': 'OneHot'}
  Remove Outliers: False
  Remove Errors: False
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: Standard


[I 2024-11-30 12:49:08,788] Trial 0 finished with value: 0.8364341085271316 and parameters: {'C': 6.9655899598681765, 'gamma': 7.984076468255109, 'kernel': 'linear'}. Best is trial 0 with value: 0.8364341085271316.
[I 2024-11-30 12:49:08,830] Trial 2 finished with value: 0.7647286821705427 and parameters: {'C': 1.5569996349830126, 'gamma': 8.225284610490508, 'kernel': 'poly'}. Best is trial 0 with value: 0.8364341085271316.
[I 2024-11-30 12:49:08,842] Trial 3 finished with value: 0.7196220930232557 and parameters: {'C': 6.482503354551868, 'gamma': 5.657402182003737, 'kernel': 'sigmoid'}. Best is trial 0 with value: 0.8364341085271316.
[I 2024-11-30 12:49:08,845] Trial 8 finished with value: 0.7304748062015504 and parameters: {'C': 6.338485168836612, 'gamma': 6.5487971310597555, 'kernel': 'sigmoid'}. Best is trial 0 with value: 0.8364341085271316.
[I 2024-11-30 12:49:08,878] Trial 11 finished with value: 0.7242248062015504 and parameters: {'C': 9.799912166660425, 'gamma': 8.869260284553

Best hyperparameters:  {'C': 7.383212511197904, 'gamma': 0.023143940294657384, 'kernel': 'rbf'}
----------------------------------------



Scenario Name: Scenario 6_N
  Encoding: {'Sex': 'Label', 'ExerciseAngina': 'Label', 'ST_Slope': 'Label', 'RestingECG': 'Label', 'ChestPainType': 'OneHot'}
  Remove Outliers: False
  Remove Errors: True
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: None


[I 2024-11-30 12:49:11,573] Trial 10 finished with value: 0.8270348837209301 and parameters: {'C': 5.192312786531853, 'gamma': 1.2854475348751386, 'kernel': 'rbf'}. Best is trial 10 with value: 0.8270348837209301.
[I 2024-11-30 12:49:11,599] Trial 7 finished with value: 0.8426235465116279 and parameters: {'C': 1.7304667621969148, 'gamma': 2.849630754436874, 'kernel': 'linear'}. Best is trial 7 with value: 0.8426235465116279.
[I 2024-11-30 12:49:12,127] Trial 1 finished with value: 0.7569767441860465 and parameters: {'C': 2.1980886196197997, 'gamma': 7.660703855860221, 'kernel': 'poly'}. Best is trial 7 with value: 0.8426235465116279.
[I 2024-11-30 12:49:12,196] Trial 4 finished with value: 0.5947795542635659 and parameters: {'C': 2.5909949010786915, 'gamma': 1.2461769965230862, 'kernel': 'sigmoid'}. Best is trial 7 with value: 0.8426235465116279.
[I 2024-11-30 12:49:12,278] Trial 11 finished with value: 0.809907945736434 and parameters: {'C': 2.934438182072689, 'gamma': 4.1416106635801

Best hyperparameters:  {'C': 9.497098015615158, 'gamma': 2.595888362820074, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 6_S
  Encoding: {'Sex': 'Label', 'ExerciseAngina': 'Label', 'ST_Slope': 'Label', 'RestingECG': 'Label', 'ChestPainType': 'OneHot'}
  Remove Outliers: False
  Remove Errors: True
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: Standard


[I 2024-11-30 12:49:15,715] Trial 2 finished with value: 0.6416908914728683 and parameters: {'C': 4.111340441194772, 'gamma': 3.3524541157097176, 'kernel': 'rbf'}. Best is trial 6 with value: 0.8426356589147286.
[I 2024-11-30 12:49:15,727] Trial 5 finished with value: 0.7553657945736434 and parameters: {'C': 5.0249228063578535, 'gamma': 3.4390872082203763, 'kernel': 'poly'}. Best is trial 6 with value: 0.8426356589147286.
[I 2024-11-30 12:49:15,735] Trial 11 finished with value: 0.7087451550387597 and parameters: {'C': 6.926487216110812, 'gamma': 3.8293220356901765, 'kernel': 'sigmoid'}. Best is trial 6 with value: 0.8426356589147286.
[I 2024-11-30 12:49:15,742] Trial 10 finished with value: 0.8426356589147286 and parameters: {'C': 1.5092994215435867, 'gamma': 8.51000332403868, 'kernel': 'linear'}. Best is trial 6 with value: 0.8426356589147286.
[I 2024-11-30 12:49:15,760] Trial 9 finished with value: 0.5654433139534885 and parameters: {'C': 2.5345433160256845, 'gamma': 8.7240285855244

Best hyperparameters:  {'C': 9.205394109764523, 'gamma': 8.605873916792685, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 7_N
  Encoding: {'Sex': 'Label', 'ExerciseAngina': 'Label', 'ST_Slope': 'Label', 'RestingECG': 'Label', 'ChestPainType': 'OneHot'}
  Remove Outliers: True
  Remove Errors: True
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: MinMax


[I 2024-11-30 12:49:18,309] Trial 4 finished with value: 0.7597402597402597 and parameters: {'C': 6.113976999939252, 'gamma': 8.438193577664128, 'kernel': 'poly'}. Best is trial 2 with value: 0.8492888064316635.
[I 2024-11-30 12:49:18,331] Trial 3 finished with value: 0.7576994434137291 and parameters: {'C': 7.888597184649927, 'gamma': 6.5189237937426805, 'kernel': 'poly'}. Best is trial 2 with value: 0.8492888064316635.
[I 2024-11-30 12:49:18,341] Trial 9 finished with value: 0.8493094207379922 and parameters: {'C': 5.299932268728591, 'gamma': 3.1292668397039596, 'kernel': 'linear'}. Best is trial 9 with value: 0.8493094207379922.
[I 2024-11-30 12:49:18,352] Trial 6 finished with value: 0.8492888064316635 and parameters: {'C': 9.627083464073348, 'gamma': 8.861841368221114, 'kernel': 'linear'}. Best is trial 9 with value: 0.8493094207379922.
[I 2024-11-30 12:49:18,388] Trial 8 finished with value: 0.8492888064316635 and parameters: {'C': 9.41615011937811, 'gamma': 5.154623565007242, 'k

Best hyperparameters:  {'C': 4.345398005049943, 'gamma': 2.587263955865018, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 7_S
  Encoding: {'Sex': 'Label', 'ExerciseAngina': 'Label', 'ST_Slope': 'Label', 'RestingECG': 'Label', 'ChestPainType': 'OneHot'}
  Remove Outliers: True
  Remove Errors: True
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: Standard


[I 2024-11-30 12:49:20,410] Trial 8 finished with value: 0.7596784168212739 and parameters: {'C': 6.564475851872188, 'gamma': 8.81600047472743, 'kernel': 'poly'}. Best is trial 1 with value: 0.8452071737786022.
[I 2024-11-30 12:49:20,417] Trial 2 finished with value: 0.8452071737786022 and parameters: {'C': 9.84147315231586, 'gamma': 9.001997591317378, 'kernel': 'linear'}. Best is trial 1 with value: 0.8452071737786022.
[I 2024-11-30 12:49:20,427] Trial 7 finished with value: 0.7596784168212739 and parameters: {'C': 5.413008135637599, 'gamma': 9.53029637836531, 'kernel': 'poly'}. Best is trial 1 with value: 0.8452071737786022.
[I 2024-11-30 12:49:20,477] Trial 10 finished with value: 0.7698206555349413 and parameters: {'C': 0.6315965162369538, 'gamma': 7.456326517013579, 'kernel': 'sigmoid'}. Best is trial 1 with value: 0.8452071737786022.
[I 2024-11-30 12:49:20,480] Trial 11 finished with value: 0.8513502370645227 and parameters: {'C': 0.3442424892306778, 'gamma': 1.3545476663879383, 

Best hyperparameters:  {'C': 0.4732614657967059, 'gamma': 0.039675789414968216, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 8_N
  Encoding: {'Sex': 'Label', 'ExerciseAngina': 'Label', 'ST_Slope': 'Label', 'RestingECG': 'Label', 'ChestPainType': 'OneHot'}
  Remove Outliers: replace_with_mean
  Remove Errors: True
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: None


[I 2024-11-30 12:49:22,547] Trial 7 finished with value: 0.6509932170542635 and parameters: {'C': 6.204415268338885, 'gamma': 1.000711122659414, 'kernel': 'sigmoid'}. Best is trial 5 with value: 0.8410852713178294.
[I 2024-11-30 12:49:22,589] Trial 11 finished with value: 0.8410852713178294 and parameters: {'C': 7.169253149988003, 'gamma': 3.67812384048594, 'kernel': 'linear'}. Best is trial 5 with value: 0.8410852713178294.
[I 2024-11-30 12:49:22,650] Trial 9 finished with value: 0.8395227713178294 and parameters: {'C': 4.331988315663916, 'gamma': 5.010618671210604, 'kernel': 'linear'}. Best is trial 5 with value: 0.8410852713178294.
[I 2024-11-30 12:49:22,866] Trial 10 finished with value: 0.6028706395348837 and parameters: {'C': 9.80973014579516, 'gamma': 2.5747553375732632, 'kernel': 'sigmoid'}. Best is trial 5 with value: 0.8410852713178294.
[I 2024-11-30 12:49:22,909] Trial 12 finished with value: 0.8410852713178294 and parameters: {'C': 8.782227282635997, 'gamma': 4.953738680490

Best hyperparameters:  {'C': 1.1758009960696496, 'gamma': 8.599120983741958, 'kernel': 'linear'}
----------------------------------------



Scenario Name: Scenario 8_S
  Encoding: {'Sex': 'Label', 'ExerciseAngina': 'Label', 'ST_Slope': 'Label', 'RestingECG': 'Label', 'ChestPainType': 'OneHot'}
  Remove Outliers: replace_with_mean
  Remove Errors: True
  Impute Zeros: None
  Oldpeak Absolute: Not Specified
  Scaling: Standard


[I 2024-11-30 12:49:26,901] Trial 9 finished with value: 0.7025193798449612 and parameters: {'C': 3.79926253353986, 'gamma': 4.4336824999266256, 'kernel': 'sigmoid'}. Best is trial 2 with value: 0.7819282945736434.
[I 2024-11-30 12:49:26,914] Trial 7 finished with value: 0.7446220930232558 and parameters: {'C': 6.527718345997648, 'gamma': 7.2511488955233, 'kernel': 'sigmoid'}. Best is trial 2 with value: 0.7819282945736434.
[I 2024-11-30 12:49:26,922] Trial 1 finished with value: 0.5623183139534884 and parameters: {'C': 9.506901737907056, 'gamma': 8.190319485803665, 'kernel': 'rbf'}. Best is trial 2 with value: 0.7819282945736434.
[I 2024-11-30 12:49:26,932] Trial 6 finished with value: 0.7506904069767442 and parameters: {'C': 0.13798686644552763, 'gamma': 6.904526119543761, 'kernel': 'poly'}. Best is trial 2 with value: 0.7819282945736434.
[I 2024-11-30 12:49:26,938] Trial 8 finished with value: 0.8410852713178294 and parameters: {'C': 6.0766852180914865, 'gamma': 8.295732736461876, '

Best hyperparameters:  {'C': 6.0766852180914865, 'gamma': 8.295732736461876, 'kernel': 'linear'}
----------------------------------------



