In [None]:
# importing the required libraries
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV,cross_val_score
from sklearn.metrics import accuracy_score,auc,roc_curve,confusion_matrix, classification_report,roc_auc_score


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
heart_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Datasets/heartdisease.csv')

In [None]:
heart_data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [None]:
y = heart_data['target']
X = heart_data.drop(['target'], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 42,stratify=y)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((212, 13), (91, 13), (212,), (91,))

#Random Forest

In [None]:
model_rf = RandomForestClassifier(n_estimators=200,max_depth=4,max_features=5,oob_score=True,verbose=0,random_state=50)
model_rf.fit(X_train,y_train)

print(" \n\nTraining Accuracy Score : ",model_rf.score(X_train,y_train))

y_pred_rf_test = model_rf.predict(X_test)
print(" \n\nTest Score : ",accuracy_score(y_test,y_pred_rf_test))

 

Training Accuracy Score :  0.9339622641509434
 

Test Score :  0.7912087912087912


## Parameter Tunning using GridSearch CV

In [None]:
#hyperparameter tunning
#Define Parameter space
parameters = {'max_features':np.arange(2,6),'max_depth':np.arange(1,6)}

In [None]:
start = time.time()
print("Optimizing the parameters")
#optimize parameters by cross validation Grid Search

#Gridsearch cv 
tune_model = GridSearchCV(model_rf,parameters,cv=5,scoring='accuracy',verbose=2)
tune_model.fit(X_train,y_train)

end = time.time()
duration = end - start
print ('\n Parameter Optimization took %0.2f seconds (%0.1f minutes)'%(duration, duration/60) )

print("\n\nBEST PARAMETRS : ",tune_model.best_params_ , "\n\nBEST SCORE : ",tune_model.best_score_)

Optimizing the parameters
Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV] max_depth=1, max_features=2 .....................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ...................... max_depth=1, max_features=2, total=   0.4s
[CV] max_depth=1, max_features=2 .....................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


[CV] ...................... max_depth=1, max_features=2, total=   0.4s
[CV] max_depth=1, max_features=2 .....................................
[CV] ...................... max_depth=1, max_features=2, total=   0.4s
[CV] max_depth=1, max_features=2 .....................................
[CV] ...................... max_depth=1, max_features=2, total=   0.4s
[CV] max_depth=1, max_features=2 .....................................
[CV] ...................... max_depth=1, max_features=2, total=   0.4s
[CV] max_depth=1, max_features=3 .....................................
[CV] ...................... max_depth=1, max_features=3, total=   0.4s
[CV] max_depth=1, max_features=3 .....................................
[CV] ...................... max_depth=1, max_features=3, total=   0.4s
[CV] max_depth=1, max_features=3 .....................................
[CV] ...................... max_depth=1, max_features=3, total=   0.4s
[CV] max_depth=1, max_features=3 .....................................
[CV] .

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   40.8s finished



 Parameter Optimization took 41.24 seconds (0.7 minutes)


BEST PARAMETRS :  {'max_depth': 2, 'max_features': 4} 

BEST SCORE :  0.8498338870431894


## PARAMETR OPTIMIZATION USING OPTUNA

Optuna formulates the hyperparameter optimization
as a process of minimizing/maximizing an objective function
that takes a set of hyperparameters as an input and returns its
(validation) score.
Optuna refers to each process of optimization as a study, and to each evaluation of objective function as a trial. 

In [None]:
!pip install optuna -q

[K     |████████████████████████████████| 302 kB 5.4 MB/s 
[K     |████████████████████████████████| 164 kB 49.1 MB/s 
[K     |████████████████████████████████| 80 kB 7.9 MB/s 
[K     |████████████████████████████████| 75 kB 4.0 MB/s 
[K     |████████████████████████████████| 49 kB 4.9 MB/s 
[K     |████████████████████████████████| 141 kB 44.8 MB/s 
[K     |████████████████████████████████| 111 kB 58.7 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [None]:
import optuna

Here we are trying to optimize 3 parameters max_depth , max_features and extimators.

Firstly we defined search space for these hyperparameters. 

As seen, n_estimators are integer ranging from 10 to 100 with step of 10,

max_depth is taken from log uniform ranging from 1 to 7 and 

max_features is taken from log uniform withing range of 1 to 10 .


Values and type of values can be changed depending on type of parameter , eg. we can use suggest_categorical to specifi the classifiers 



In [None]:
def objective(trial):
    
    rf_max_depth = trial.suggest_int("rf_max_depth", 1, 7, log=True)
    rf_max_features = trial.suggest_int("rf_max_features",1,10,log=True)
    rf_estimators = trial.suggest_int('rf_estimators', 10,100,step=10)
    classifier_obj = RandomForestClassifier(max_depth=rf_max_depth, n_estimators=rf_estimators,max_features = rf_max_features)

    score = cross_val_score(classifier_obj, X_train, y_train, n_jobs=-1, cv=5)
    accuracy = score.mean()
    return accuracy

Secondly , evaluate the objective function value using study object.

Direction can be ‘maximize’ or ‘minimize’ depending upon the nature of the objective. Here we need to  maximize the cross_val_score, 

other paramter to optimize func is No of trials as 100. 

We haven’t specified the sampler to be used here, by default it is the bayesian optimizer. 
Once we call the optimize method, the optimization process starts.

In [None]:
start = time.time()
print("Optimizing the parameters")

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)


end = time.time()
duration = end - start
print ('\n Parameter Optimization took %0.2f seconds (%0.1f minutes)'%(duration, duration/60) )

print("\n\nBEST TRIAL : \n",study.best_trial) #get best trial

[32m[I 2021-08-25 05:31:51,598][0m A new study created in memory with name: no-name-edb3dcca-7119-44c6-9bf9-d71091b8ccc8[0m


Optimizing the parameters


[32m[I 2021-08-25 05:31:53,123][0m Trial 0 finished with value: 0.7883720930232558 and parameters: {'rf_max_depth': 1, 'rf_max_features': 2, 'rf_estimators': 10}. Best is trial 0 with value: 0.7883720930232558.[0m
[32m[I 2021-08-25 05:31:53,239][0m Trial 1 finished with value: 0.7502768549280177 and parameters: {'rf_max_depth': 1, 'rf_max_features': 1, 'rf_estimators': 10}. Best is trial 0 with value: 0.7883720930232558.[0m
[32m[I 2021-08-25 05:31:53,724][0m Trial 2 finished with value: 0.8119601328903656 and parameters: {'rf_max_depth': 3, 'rf_max_features': 8, 'rf_estimators': 60}. Best is trial 2 with value: 0.8119601328903656.[0m
[32m[I 2021-08-25 05:31:53,841][0m Trial 3 finished with value: 0.8067552602436322 and parameters: {'rf_max_depth': 1, 'rf_max_features': 1, 'rf_estimators': 10}. Best is trial 2 with value: 0.8119601328903656.[0m
[32m[I 2021-08-25 05:31:53,955][0m Trial 4 finished with value: 0.8545957918050942 and parameters: {'rf_max_depth': 4, 'rf_max_fea


 Parameter Optimization took 57.14 seconds (1.0 minutes)


BEST TRIAL : 
 FrozenTrial(number=45, values=[0.8688815060908086], datetime_start=datetime.datetime(2021, 8, 25, 5, 32, 16, 916709), datetime_complete=datetime.datetime(2021, 8, 25, 5, 32, 17, 505335), params={'rf_max_depth': 2, 'rf_max_features': 3, 'rf_estimators': 70}, distributions={'rf_max_depth': IntLogUniformDistribution(high=7, low=1, step=1), 'rf_max_features': IntLogUniformDistribution(high=10, low=1, step=1), 'rf_estimators': IntUniformDistribution(high=100, low=10, step=10)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=45, state=TrialState.COMPLETE, value=None)


In [None]:
print("\n\nBEST PARAMETERS : \n",study.best_params) #get best parameters



BEST PARAMETERS : 
 {'rf_max_depth': 2, 'rf_max_features': 3, 'rf_estimators': 70}


## Visualization Plots available in Optuna

In [None]:
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [None]:
plot_optimization_history(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

Optuna is very vast optimization software framework for hyperparameter optimization along with various applications in Machine Learning and Non-Machine Learnig Task . 
Here we just tried to demo simple example of hyperparameter optimization with optuna for random forest classifier's hyperparameters. 