In [2]:
import optuna
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df = pd.read_csv("heart.csv")
df = pd.get_dummies(df)
df.info()
X = df.drop("HeartDisease", axis=1).copy()
y = df["HeartDisease"].copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=369)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Age                918 non-null    int64  
 1   RestingBP          918 non-null    int64  
 2   Cholesterol        918 non-null    int64  
 3   FastingBS          918 non-null    int64  
 4   MaxHR              918 non-null    int64  
 5   Oldpeak            918 non-null    float64
 6   HeartDisease       918 non-null    int64  
 7   Sex_F              918 non-null    bool   
 8   Sex_M              918 non-null    bool   
 9   ChestPainType_ASY  918 non-null    bool   
 10  ChestPainType_ATA  918 non-null    bool   
 11  ChestPainType_NAP  918 non-null    bool   
 12  ChestPainType_TA   918 non-null    bool   
 13  RestingECG_LVH     918 non-null    bool   
 14  RestingECG_Normal  918 non-null    bool   
 15  RestingECG_ST      918 non-null    bool   
 16  ExerciseAngina_N   918 non

In [10]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

0.8876811594202898

In [5]:
def objective(trial):
    solver = trial.suggest_categorical('solver', ['liblinear', 'lbfgs', 'newton-cg', 'sag', 'saga'])
    penalty = trial.suggest_categorical('penalty', ['l1', 'l2', 'elasticnet', None])
    C = trial.suggest_float('C', 1e-4, 1e4, log=True)
    if solver == 'liblinear' and penalty not in ['l1', 'l2']:
        raise optuna.TrialPruned()
    if solver in ['lbfgs', 'newton-cg', 'sag'] and penalty not in ['l2', None]:
        raise optuna.TrialPruned()
    if penalty == 'elasticnet':
        l1_ratio = trial.suggest_float('l1_ratio', 0.0, 1.0)
    else:
        l1_ratio = None
    hiper = {
        'solver': solver,
        'penalty': penalty,
        'C': C,
        'l1_ratio': l1_ratio,
        'max_iter': trial.suggest_int('max_iter', 100, 2000),
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced']),
    }
    if penalty is None:
        hiper.pop('C', None)
        hiper.pop('l1_ratio', None)
    model = LogisticRegression(**hiper)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return acc
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=1000, n_jobs=-1)

[I 2025-08-21 21:44:20,995] A new study created in memory with name: no-name-2166a12a-ab11-4398-ac72-e2e41ca5d027
[I 2025-08-21 21:44:21,006] Trial 3 pruned. 
[I 2025-08-21 21:44:21,008] Trial 4 pruned. 
[I 2025-08-21 21:44:21,009] Trial 5 pruned. 
[I 2025-08-21 21:44:21,020] Trial 8 pruned. 
[I 2025-08-21 21:44:21,021] Trial 9 pruned. 
[I 2025-08-21 21:44:21,029] Trial 12 pruned. 
[I 2025-08-21 21:44:21,041] Trial 16 pruned. 
[I 2025-08-21 21:44:21,045] Trial 17 pruned. 
[I 2025-08-21 21:44:21,047] Trial 18 pruned. 
[I 2025-08-21 21:44:21,059] Trial 10 finished with value: 0.8913043478260869 and parameters: {'solver': 'liblinear', 'penalty': 'l1', 'C': 0.586646732439566, 'max_iter': 334, 'fit_intercept': True, 'class_weight': 'balanced'}. Best is trial 10 with value: 0.8913043478260869.
[I 2025-08-21 21:44:21,096] Trial 14 finished with value: 0.7355072463768116 and parameters: {'solver': 'sag', 'penalty': 'l2', 'C': 0.00026895231919888367, 'max_iter': 1064, 'fit_intercept': True, 'cl

In [8]:
best_hip = study.best_params
print(best_hip)
model = LogisticRegression(**best_hip)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

{'solver': 'newton-cg', 'penalty': 'l2', 'C': 0.11095769681414665, 'max_iter': 112, 'fit_intercept': True, 'class_weight': None}


0.8985507246376812