# Optuna

In [1]:
import pandas as pd
import numpy as np

from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing

In [2]:
import optuna

from functools import partial
from skopt import space
from skopt import gp_minimize
from hyperopt.pyll.base import scope #for the format int

In [3]:
df = pd.read_csv('../input/mobile-price-classification/train.csv')
X  = df.drop('price_range', axis = 1).values
y  = df['price_range'].values

In [4]:
def optimize(trials, x, y):
    criterion     = trials.suggest_categorical("criterion", ["gini", "entropy"])
    n_estimators  = trials.suggest_int("n_estimators", 20, 2000)
    max_depth     = trials.suggest_int("max_depth", 3, 25)
    max_features  = trials.suggest_uniform("max_features", 0.1, 1.0)
    
    model  = ensemble.RandomForestClassifier(
        n_estimators = n_estimators,
        max_depth    = max_depth,
        max_features = max_features,
        criterion    = criterion
    )
    kf     = model_selection.StratifiedKFold(n_splits = 5)
    
    accuracies = []
    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        
        xtrain = x[train_idx]
        ytrain = y[train_idx]
        xtest = x[test_idx]
        ytest = y[test_idx]
        
        model.fit(xtrain, ytrain)
        preds = model.predict(xtest)
        fold_acc = metrics.accuracy_score(ytest, preds)
        
        accuracies.append(fold_acc)
    
    return -1*np.mean(accuracies)

In [5]:
optimization_func = partial(optimize, x = X, y = y)

In [6]:
# direction = "minimize" bcz we are multiplying np.mean(accuracies) with -1
# direction = "maximize" if  we are multiplying np.mean(accuracies) with 1

study = optuna.create_study(direction = "minimize")
study.optimize(optimization_func, n_trials = 15)

[I 2020-07-28 20:31:08,413] Finished trial#0 with value: -0.8574999999999999 with parameters: {'criterion': 'entropy', 'n_estimators': 1822, 'max_depth': 15, 'max_features': 0.1246602252581683}. Best is trial#0 with value: -0.8574999999999999.
[I 2020-07-28 20:31:28,752] Finished trial#1 with value: -0.9065 with parameters: {'criterion': 'entropy', 'n_estimators': 539, 'max_depth': 21, 'max_features': 0.42398280226882}. Best is trial#1 with value: -0.9065.
[I 2020-07-28 20:31:59,001] Finished trial#2 with value: -0.9019999999999999 with parameters: {'criterion': 'gini', 'n_estimators': 1039, 'max_depth': 23, 'max_features': 0.5187436671403274}. Best is trial#1 with value: -0.9065.
[I 2020-07-28 20:32:06,433] Finished trial#3 with value: -0.9040000000000001 with parameters: {'criterion': 'entropy', 'n_estimators': 125, 'max_depth': 9, 'max_features': 0.9763052585302266}. Best is trial#1 with value: -0.9065.
[I 2020-07-28 20:32:22,092] Finished trial#4 with value: -0.7190000000000001 wit

https://optuna.org/

In [7]:
 print(study.best_trial)

FrozenTrial(number=13, value=-0.9095000000000001, datetime_start=datetime.datetime(2020, 7, 28, 20, 38, 3, 451321), datetime_complete=datetime.datetime(2020, 7, 28, 20, 39, 13, 844092), params={'criterion': 'entropy', 'n_estimators': 1448, 'max_depth': 18, 'max_features': 0.7278596576449761}, distributions={'criterion': CategoricalDistribution(choices=('gini', 'entropy')), 'n_estimators': IntUniformDistribution(high=2000, low=20, step=1), 'max_depth': IntUniformDistribution(high=25, low=3, step=1), 'max_features': UniformDistribution(high=1.0, low=0.1)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=13, state=TrialState.COMPLETE)


In [8]:
classifier = ensemble.RandomForestClassifier(criterion ='entropy', max_depth = 16, 
                                        max_features = 0.8356537680916444, n_estimators = 1383, n_jobs=-1)

In [9]:
from sklearn.model_selection import cross_val_score
score = cross_val_score(classifier,X,y, cv=10)
print('scores\n',score)
print('\ncv values', score.shape)
print('\nScore_Mean', score.mean())

scores
 [0.91  0.93  0.925 0.905 0.91  0.925 0.895 0.9   0.9   0.9  ]

cv values (10,)

Score_Mean 0.9099999999999999
