In [1]:
# Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
heart_disease = pd.read_csv("Data/heart-disease.csv")
heart_disease.head()

x = heart_disease.drop("target", axis=1)
y = heart_disease["target"]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

In [3]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.78      0.84        36
           1       0.73      0.88      0.80        25

    accuracy                           0.82        61
   macro avg       0.82      0.83      0.82        61
weighted avg       0.83      0.82      0.82        61



In [4]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

0.819672131147541


In [5]:
# Let's Try Improving our model

In [6]:
# 3 Layers Rule
x_train_val, x_test, y_train_val, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)

x_train, x_val, y_train, y_val = train_test_split(
    x_train_val, y_train_val, test_size=0.25, random_state=42
)

clf = RandomForestClassifier()
clf.fit(x_train, y_train)
y_val_pred = clf.predict(x_val)
y_test_pred = clf.predict(x_test)
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

           0       0.86      0.83      0.84        29
           1       0.85      0.88      0.86        32

    accuracy                           0.85        61
   macro avg       0.85      0.85      0.85        61
weighted avg       0.85      0.85      0.85        61



In [7]:
accuracy = accuracy_score(y_test, y_test_pred)
print(accuracy)

0.8524590163934426


In [8]:
# Optuna - Look for best combinations of parameters

In [10]:
import optuna

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200, 1500),
        "max_depth": trial.suggest_int("max_depth", 5, 40),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 4),
        "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
        "random_state": 42,
        "n_jobs": -1
    }
    
    clf_opt = RandomForestClassifier(**params)
    clf_opt.fit(x_train, y_train)
    return clf_opt.score(x_val, y_val)  # ocena na walidacji

# Uruchomienie Optuny
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

optuna.logging.disable_default_handler()
optuna.logging.set_verbosity(optuna.logging.CRITICAL)
print(study.best_params)
print(study.best_value)

{'n_estimators': 1371, 'max_depth': 9, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2'}
0.7868852459016393
