In [None]:
# Importer nødvendige biblioteker
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, recall_score


# Definér input- og målvariabler
X = data.drop(columns=["HeartDisease"])
y = data["HeartDisease"]

# Opdel datasættet i trænings- og testdatasæt
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Definér hyperparameter-rummet til tuning
param_distributions = {
    "n_estimators": [50, 100, 200, 300],
    "max_depth": [None, 10, 20, 30, 40],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "max_features": ["sqrt", "log2", None]
}

# Opsæt RandomizedSearchCV for at finde de bedste parametre
random_search = RandomizedSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_distributions=param_distributions,
    n_iter=50,  # Antal kombinationer at teste
    scoring="recall",  # Optimering for recall
    cv=3,  # 3-fold krydsvalidering
    random_state=42,
    verbose=1,
    n_jobs=-1  # Brug alle tilgængelige processorer
)

# Udfør tuning
random_search.fit(X_train, y_train)

# Find den bedste model og parametre
best_model = random_search.best_estimator_
print("Bedste parametre: ", random_search.best_params_)

# Evaluér modellen med de bedste parametre
y_pred_best = best_model.predict(X_test)
best_recall = recall_score(y_test, y_pred_best)
print("\nRecall for den tunede model: ", best_recall)

# Vis klassifikationsrapport
print("\nKlassifikationsrapport:")
print(classification_report(y_test, y_pred_best, target_names=["No Heart Disease", "Heart Disease"]))

# Ekstra: Justér beslutningstærsklen for yderligere optimering af recall
y_proba = best_model.predict_proba(X_test)[:, 1]
threshold = 0.4  # Justér denne værdi for at optimere recall
y_pred_adjusted = (y_proba >= threshold).astype(int)
adjusted_recall = recall_score(y_test, y_pred_adjusted)
print("\nRecall med justeret tærskel (threshold=0.4): ", adjusted_recall)
