In [None]:
# !pip install optuna

In [None]:
import optuna
from optuna.visualization import plot_parallel_coordinate
from sklearn.datasets import load_diabetes
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [None]:
url = "pima-indians-diabetes.data.csv"

In [None]:
columns = ['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age','Outcome']

In [None]:
df = pd.read_csv(url, names=columns)
df.head()

In [None]:
df.fillna(0, inplace=True)
df.isna()

In [None]:
import numpy as np

In [None]:
colswithmissingvalues = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI']
df[colswithmissingvalues]=df[colswithmissingvalues].replace(0,np.nan)

In [None]:
x = df.drop('Outcome',axis=1)
y = df['Outcome']

In [None]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.3,random_state=42)

In [None]:
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.transform(xtest)

In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_score

In [None]:
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50,200)
    max_depth = trial.suggest_int('max_depth', 3,20)
    models = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth)

    score = cross_val_score(models, xtrain, ytrain, cv=3, scoring='accuracy').mean()
    return score

In [None]:
study =optuna.create_study(direction='maximize',sampler=optuna.samplers.TPESampler())
study.optimize(objective,n_trials=50)

In [None]:
print(f'best trial: {study.best_params}')
print(study.best_value)

In [None]:
bestmodel = RandomForestClassifier(**study.best_params,random_state=42)
bestmodel.fit(xtrain,ytrain)
ypred = bestmodel.predict(xtest)
acc = accuracy_score(ytest,ypred)
print(acc)

In [None]:
search_space = {
    'n_estimators':[50,100,150,200],
    'max_depth':[5,10,15,20],
}

In [None]:
study = optuna.create_study(direction='maximize',sampler=optuna.samplers.GridSampler(search_space))
study.optimize(objective)

In [None]:
bestmodel = RandomForestClassifier(**study.best_trial.params,random_state=42)
bestmodel.fit(xtrain,ytrain)
ypred = bestmodel.predict(xtest)
acc = accuracy_score(ytest,ypred)
print(acc)

In [None]:
from optuna.visualization import plot_parallel_coordinate,plot_optimization_history,plot_slice,plot_contour,plot_param_importances
import plotly
from plotly import __version__ as plotly_version
import matplotlib.pyplot as plt

In [None]:
# plot_optimization_history(study).show()

In [None]:
# plot_parallel_coordinate(study).show()

In [61]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, random_state=42)
from sklearn.impute import SimpleImputer

#imputing bcz we will get an error for NaN values in xtrain
imputer = SimpleImputer(strategy='mean')
xtrain = imputer.fit_transform(xtrain)
xtest = imputer.transform(xtest)
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.transform(xtest)


In [62]:
#selecting algorithm for training using optuna

In [63]:
from sklearn.svm import SVC


def objective(trial):
    classifiername = trial.suggest_categorical('classifier',['RandomForestClassifier','SVM','GradientBoosting'])
    if classifiername == 'SVM':
        c = trial.suggest_float('C',0.1,100,log=True)
        kernal = trial.suggest_categorical('kernal',['linear','rbf','poly','sigmoid'])
        gamma = trial.suggest_categorical('gamma',['scale','auto'])
        model = SVC(C=c,kernel=kernal,gamma=gamma,random_state=42)

    elif classifiername == 'RandomForestClassifier':
        n_estimators = trial.suggest_int('n_estimators',50,300)
        max_depth = trial.suggest_int('max_depth',3,20)
        min_samples_split = trial.suggest_int('min_samples_split',2,10)
        min_samples_leaf = trial.suggest_int('min_samples_leaf',1,10)
        bootstrap = trial.suggest_categorical('bootstrap',[True,False])
        model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,random_state=42,min_samples_split=min_samples_split,min_samples_leaf=min_samples_leaf,bootstrap=bootstrap)

    elif classifiername == 'GradientBoosting':
        n_estimators = trial.suggest_int('n_estimators',50,300)
        learning_rate = trial.suggest_float('learning_rate',0.01,0.3,log=True)
        max_depth = trial.suggest_int('max_depth',3,20)
        min_samples_split = trial.suggest_int('min_samples_split',2,10)
        min_samples_leaf = trial.suggest_int('min_samples_leaf',1,10)
        model = GradientBoostingClassifier(n_estimators=n_estimators,learning_rate=learning_rate,max_depth=max_depth,min_samples_leaf=min_samples_leaf,random_state=42,min_samples_split=min_samples_split)

    score = cross_val_score(model, xtrain, ytrain, cv=3, scoring='accuracy').mean()
    return score


In [64]:
study = optuna.create_study(direction='maximize')
study.optimize(objective,n_trials=100)

[I 2025-09-12 21:52:25,212] A new study created in memory with name: no-name-ae6c145c-97c5-417f-9c0e-f3dfe8d9c1ff
[I 2025-09-12 21:52:25,227] Trial 0 finished with value: 0.7839851024208566 and parameters: {'classifier': 'SVM', 'C': 0.36718134394705737, 'kernal': 'linear', 'gamma': 'scale'}. Best is trial 0 with value: 0.7839851024208566.
[I 2025-09-12 21:52:25,420] Trial 1 finished with value: 0.7690875232774674 and parameters: {'classifier': 'RandomForestClassifier', 'n_estimators': 114, 'max_depth': 15, 'min_samples_split': 5, 'min_samples_leaf': 5, 'bootstrap': True}. Best is trial 0 with value: 0.7839851024208566.
[I 2025-09-12 21:52:25,624] Trial 2 finished with value: 0.7746741154562384 and parameters: {'classifier': 'RandomForestClassifier', 'n_estimators': 145, 'max_depth': 8, 'min_samples_split': 6, 'min_samples_leaf': 8, 'bootstrap': False}. Best is trial 0 with value: 0.7839851024208566.
[I 2025-09-12 21:52:25,635] Trial 3 finished with value: 0.7709497206703911 and paramet

In [65]:
besttrial = study.best_trial
print(f'best trial: {study.best_params}')
print(study.best_value)

best trial: {'classifier': 'RandomForestClassifier', 'n_estimators': 51, 'max_depth': 13, 'min_samples_split': 8, 'min_samples_leaf': 10, 'bootstrap': False}
0.7895716945996276


In [66]:
study.trials_dataframe()['params_classifier'].value_counts()

params_classifier
RandomForestClassifier    66
SVM                       25
GradientBoosting           9
Name: count, dtype: int64