In [1]:
import optuna
import pandas as pd

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier

In [2]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
           'DiabetesPedigreeFunction', 'Age', 'Outcome']
df = pd.read_csv(url, names=columns)

In [3]:
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [4]:
df.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [5]:
df.duplicated().sum()

np.int64(0)

In [6]:
x = df.drop("Outcome", axis=1)
y = df["Outcome"]

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [8]:
def objective(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 200)
    max_depth = trial.suggest_int("max_depth", 3, 10)

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=42
    )

    score = cross_val_score(model, x_train, y_train, scoring="accuracy").mean()

    return score

In [9]:
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=50)

[32m[I 2026-02-03 17:25:09,628][0m A new study created in memory with name: no-name-d47b05f0-78ee-410c-b4db-3f38ca484304[0m


[32m[I 2026-02-03 17:25:10,895][0m Trial 0 finished with value: 0.7687458349993337 and parameters: {'n_estimators': 144, 'max_depth': 7}. Best is trial 0 with value: 0.7687458349993337.[0m
[32m[I 2026-02-03 17:25:11,697][0m Trial 1 finished with value: 0.760629081700653 and parameters: {'n_estimators': 96, 'max_depth': 4}. Best is trial 0 with value: 0.7687458349993337.[0m
[32m[I 2026-02-03 17:25:13,357][0m Trial 2 finished with value: 0.7736372117819539 and parameters: {'n_estimators': 196, 'max_depth': 7}. Best is trial 2 with value: 0.7736372117819539.[0m
[32m[I 2026-02-03 17:25:14,160][0m Trial 3 finished with value: 0.7622550979608156 and parameters: {'n_estimators': 92, 'max_depth': 9}. Best is trial 2 with value: 0.7736372117819539.[0m
[32m[I 2026-02-03 17:25:15,343][0m Trial 4 finished with value: 0.7606157536985206 and parameters: {'n_estimators': 160, 'max_depth': 3}. Best is trial 2 with value: 0.7736372117819539.[0m
[32m[I 2026-02-03 17:25:16,889][0m Trial 

In [10]:
study.best_params

{'n_estimators': 61, 'max_depth': 7}

In [11]:
study.best_trial.value

0.786658669865387