# Hyperopt

In [1]:
import pandas as pd
import numpy as np

from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing
from sklearn import decomposition
from sklearn import pipeline

In [2]:
from functools import partial
from skopt import space
from skopt import gp_minimize

In [3]:
from hyperopt import fmin
from hyperopt import hp
from hyperopt import tpe
from hyperopt import Trials

In [4]:
from hyperopt.pyll.base import scope #for the format int

In [5]:
df = pd.read_csv('../input/mobile-price-classification/train.csv')
X  = df.drop('price_range', axis = 1).values
y  = df['price_range'].values

In [6]:
def optimize(params, x, y):
    
    model  = ensemble.RandomForestClassifier(**params) #**params to read the dict
    kf     = model_selection.StratifiedKFold(n_splits = 5)
    
    accuracies = []
    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        
        xtrain = x[train_idx]
        ytrain = y[train_idx]
        xtest = x[test_idx]
        ytest = y[test_idx]
        
        model.fit(xtrain, ytrain)
        preds = model.predict(xtest)
        fold_acc = metrics.accuracy_score(ytest, preds)
        
        accuracies.append(fold_acc)
    
    return -1*np.mean(accuracies)

https://www.youtube.com/watch?v=5nYqK-HaoKY&t=2153s

In [7]:
#dictionalry
param_space = {
    "max_depth":scope.int(hp.quniform("max_depth", 3,15, 1)), #hp.quniform(label, low, high, q)
    "n_estimators":scope.int(hp.quniform("n_estimators", 100, 600, 1)),
    "criterion":hp.choice("criterion", ["gini", "entropy"]),
    
    "max_features":hp.uniform("max_features", 0.1,1)
}

In [8]:
optimization_func = partial(optimize, x = X, y = y)

In [9]:
trials = Trials()

In [10]:
result = fmin(fn = optimization_func, space = param_space, algo = tpe.suggest, max_evals = 15, trials = trials, verbose = 10)

100%|██████████| 15/15 [03:43<00:00, 14.90s/trial, best loss: -0.907]


In [11]:
print(result)

{'criterion': 1, 'max_depth': 11.0, 'max_features': 0.9846554325509123, 'n_estimators': 288.0}


In [12]:
classifier = ensemble.RandomForestClassifier(criterion ='entropy', max_depth = 8, 
                                        max_features = 0.8917683974762745, n_estimators = 411, n_jobs=-1)

In [13]:
from sklearn.model_selection import cross_val_score
score = cross_val_score(classifier,X,y, cv=10)
print('scores\n',score)
print('\ncv values', score.shape)
print('\nScore_Mean', score.mean())

scores
 [0.92  0.925 0.93  0.905 0.91  0.92  0.885 0.895 0.895 0.89 ]

cv values (10,)

Score_Mean 0.9075000000000001
