<a href="https://colab.research.google.com/github/Swapn2/PyTorch/blob/main/hyperparameter_tunning_bayesian.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.4-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.4-py3-none-any.whl (247 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.0/247.0 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.4 colorlog-6.9.0 optuna-4.4.0


In [40]:
import optuna
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# For visualizations
from optuna.visualization import plot_optimization_history, plot_parallel_coordinate, plot_slice, plot_contour, plot_param_importances

In [41]:
import pandas as pd

In [42]:
# Load the Pima Indian Diabetes dataset (from UCI repository)
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
           'DiabetesPedigreeFunction', 'Age', 'Outcome']

# Load the dataset
df = pd.read_csv(url, names=columns)

df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [43]:
import numpy as np
cols_with_missing_vals = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI']
df[cols_with_missing_vals] = df[cols_with_missing_vals].replace(0,np.nan)
df.fillna(df.mean(),inplace=True)

print(df.isnull().sum())

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


In [44]:
x= df.iloc[:,:-1]
y= df.iloc[:,-1]

In [45]:
x

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148.0,72.0,35.00000,155.548223,33.6,0.627,50
1,1,85.0,66.0,29.00000,155.548223,26.6,0.351,31
2,8,183.0,64.0,29.15342,155.548223,23.3,0.672,32
3,1,89.0,66.0,23.00000,94.000000,28.1,0.167,21
4,0,137.0,40.0,35.00000,168.000000,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101.0,76.0,48.00000,180.000000,32.9,0.171,63
764,2,122.0,70.0,27.00000,155.548223,36.8,0.340,27
765,5,121.0,72.0,23.00000,112.000000,26.2,0.245,30
766,1,126.0,60.0,29.15342,155.548223,30.1,0.349,47


In [46]:
y

Unnamed: 0,Outcome
0,1
1,0
2,1
3,0
4,1
...,...
763,0
764,0
765,0
766,1


In [47]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size =.3, random_state= 42)

In [48]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [49]:
x_train.shape

(537, 8)

In [50]:
x_test.shape

(231, 8)

In [51]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [52]:
def objective(trial):
  #  sugesting the values of the hyperparameters search space
  n_estimators = trial.suggest_int('n_estimators', 50 ,200)
  max_depth = trial.suggest_int('max_depth' ,3,20)

  #  creating the randomforestclassifier with suggested hyperparameters
  model = RandomForestClassifier(
      n_estimators= n_estimators,
      max_depth= max_depth,
      random_state=42
  )
  score = cross_val_score(model , x_train , y_train,cv = 3 , scoring= 'accuracy').mean()
  return score



In [53]:
study = optuna.create_study(direction = 'maximize', sampler = optuna.samplers.TPESampler())
study.optimize(objective, n_trials= 50)

[I 2025-07-19 05:45:34,888] A new study created in memory with name: no-name-d0500a42-ddad-4cd8-90d1-29de859e0059
[I 2025-07-19 05:45:35,506] Trial 0 finished with value: 0.7560521415270017 and parameters: {'n_estimators': 134, 'max_depth': 3}. Best is trial 0 with value: 0.7560521415270017.
[I 2025-07-19 05:45:36,154] Trial 1 finished with value: 0.7672253258845437 and parameters: {'n_estimators': 132, 'max_depth': 6}. Best is trial 1 with value: 0.7672253258845437.
[I 2025-07-19 05:45:36,544] Trial 2 finished with value: 0.7653631284916201 and parameters: {'n_estimators': 73, 'max_depth': 14}. Best is trial 1 with value: 0.7672253258845437.
[I 2025-07-19 05:45:37,088] Trial 3 finished with value: 0.7746741154562384 and parameters: {'n_estimators': 109, 'max_depth': 18}. Best is trial 3 with value: 0.7746741154562384.
[I 2025-07-19 05:45:37,775] Trial 4 finished with value: 0.7635009310986964 and parameters: {'n_estimators': 137, 'max_depth': 9}. Best is trial 3 with value: 0.77467411

In [54]:

# Print the best result
print(f'Best trial accuracy: {study.best_trial.value}')
print(f'Best hyperparameters: {study.best_trial.params}')

Best trial accuracy: 0.7802607076350093
Best hyperparameters: {'n_estimators': 115, 'max_depth': 16}


In [55]:
from sklearn.metrics import accuracy_score

# Train a RandomForestClassifier using the best hyperparameters from Optuna
best_model = RandomForestClassifier(**study.best_trial.params, random_state=42)

# Fit the model to the training data
best_model.fit(x_train, y_train)

# Make predictions on the test set
y_pred = best_model.predict(x_test)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(y_test, y_pred)

# Print the test accuracy
print(f'Test Accuracy with best hyperparameters: {test_accuracy:.2f}')


Test Accuracy with best hyperparameters: 0.74


In [56]:
# 1. Optimization History
plot_optimization_history(study).show()

# 2. Parallel Coordinates Plot
plot_parallel_coordinate(study).show()

# 3. Slice Plot
plot_slice(study).show()

# 4. Contour Plot
plot_contour(study).show()

# 5. Hyperparameter Importance
plot_param_importances(study).show()

In [57]:
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.RandomSampler())  # We aim to maximize accuracy
study.optimize(objective, n_trials=50)  # Run 50 trials to find the best hyperparameters

[I 2025-07-19 05:50:28,143] A new study created in memory with name: no-name-b3c1af42-ecec-44ac-a082-db009c851d56
[I 2025-07-19 05:50:28,689] Trial 0 finished with value: 0.7672253258845437 and parameters: {'n_estimators': 110, 'max_depth': 14}. Best is trial 0 with value: 0.7672253258845437.
[I 2025-07-19 05:50:29,735] Trial 1 finished with value: 0.7746741154562384 and parameters: {'n_estimators': 200, 'max_depth': 17}. Best is trial 1 with value: 0.7746741154562384.
[I 2025-07-19 05:50:30,203] Trial 2 finished with value: 0.7653631284916201 and parameters: {'n_estimators': 89, 'max_depth': 8}. Best is trial 1 with value: 0.7746741154562384.
[I 2025-07-19 05:50:31,272] Trial 3 finished with value: 0.7728119180633147 and parameters: {'n_estimators': 144, 'max_depth': 7}. Best is trial 1 with value: 0.7746741154562384.
[I 2025-07-19 05:50:32,651] Trial 4 finished with value: 0.7616387337057727 and parameters: {'n_estimators': 196, 'max_depth': 9}. Best is trial 1 with value: 0.77467411

In [58]:

# Print the best result
print(f'Best trial accuracy: {study.best_trial.value}')
print(f'Best hyperparameters: {study.best_trial.params}')

Best trial accuracy: 0.7802607076350093
Best hyperparameters: {'n_estimators': 131, 'max_depth': 12}


In [59]:
from sklearn.metrics import accuracy_score

# Train a RandomForestClassifier using the best hyperparameters from Optuna
best_model = RandomForestClassifier(**study.best_trial.params, random_state=42)

# Fit the model to the training data
best_model.fit(x_train, y_train)

# Make predictions on the test set
y_pred = best_model.predict(x_test)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(y_test, y_pred)

# Print the test accuracy
print(f'Test Accuracy with best hyperparameters: {test_accuracy:.2f}')


Test Accuracy with best hyperparameters: 0.75


In [60]:
# 1. Optimization History
plot_optimization_history(study).show()

# 2. Parallel Coordinates Plot
plot_parallel_coordinate(study).show()

# 3. Slice Plot
plot_slice(study).show()

# 4. Contour Plot
plot_contour(study).show()

# 5. Hyperparameter Importance
plot_param_importances(study).show()

In [61]:
search_space = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [5, 10, 15, 20]
}

In [62]:
# Create a study and optimize it using GridSampler
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.GridSampler(search_space))
study.optimize(objective)

[I 2025-07-19 05:51:14,543] A new study created in memory with name: no-name-1edd1fc9-f600-4d9d-bcdf-35d3fcf5451c
[I 2025-07-19 05:51:15,021] Trial 0 finished with value: 0.7690875232774674 and parameters: {'n_estimators': 100, 'max_depth': 5}. Best is trial 0 with value: 0.7690875232774674.
[I 2025-07-19 05:51:15,852] Trial 1 finished with value: 0.7672253258845437 and parameters: {'n_estimators': 150, 'max_depth': 10}. Best is trial 0 with value: 0.7690875232774674.
[I 2025-07-19 05:51:16,110] Trial 2 finished with value: 0.7728119180633147 and parameters: {'n_estimators': 50, 'max_depth': 15}. Best is trial 2 with value: 0.7728119180633147.
[I 2025-07-19 05:51:16,636] Trial 3 finished with value: 0.7653631284916201 and parameters: {'n_estimators': 100, 'max_depth': 15}. Best is trial 2 with value: 0.7728119180633147.
[I 2025-07-19 05:51:17,142] Trial 4 finished with value: 0.7690875232774674 and parameters: {'n_estimators': 100, 'max_depth': 20}. Best is trial 2 with value: 0.772811

In [63]:

# Print the best result
print(f'Best trial accuracy: {study.best_trial.value}')
print(f'Best hyperparameters: {study.best_trial.params}')

Best trial accuracy: 0.7746741154562384
Best hyperparameters: {'n_estimators': 50, 'max_depth': 5}


In [64]:
from sklearn.metrics import accuracy_score

# Train a RandomForestClassifier using the best hyperparameters from Optuna
best_model = RandomForestClassifier(**study.best_trial.params, random_state=42)

# Fit the model to the training data
best_model.fit(x_train, y_train)

# Make predictions on the test set
y_pred = best_model.predict(x_test)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(y_test, y_pred)

# Print the test accuracy
print(f'Test Accuracy with best hyperparameters: {test_accuracy:.2f}')


Test Accuracy with best hyperparameters: 0.74


In [65]:
# 1. Optimization History
plot_optimization_history(study).show()

# 2. Parallel Coordinates Plot
plot_parallel_coordinate(study).show()

# 3. Slice Plot
plot_slice(study).show()

# 4. Contour Plot
plot_contour(study).show()

# 5. Hyperparameter Importance
plot_param_importances(study).show()