In [1]:
import sys, os 
current_dir = os.getcwd()
path = os.path.dirname(current_dir)
sys.path.append(path)

from ml_workflow.calibrated_pipeline_hyperopt_cv import CalibratedPipelineHyperOptCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
import numpy as np
import pandas as pd 

In [2]:
X,y = make_classification(n_samples=100000, random_state=42, class_sep=0.7)
X = pd.DataFrame(X)

In [3]:
# Create a hyperparameter grid to search over. In this case, 
# I am searching over hyperparameters from a random forest. 
param_grid = {  'n_estimators' : [100,150,300,400,500], 
                'max_depth' : [6,8,10,15,20],
                'max_features' : [5,6,8,10],
                'min_samples_split' : [4,5,8,10,15,20,25,50],
                'min_samples_leaf' : [4,5,8,10,15,20,25,50],
             }

# Initialize the estimator that will be using.
estimator = RandomForestClassifier(n_jobs=12, random_state=30, criterion = 'entropy',) 

clf = CalibratedPipelineHyperOptCV( base_estimator = estimator,  
                                    param_grid = param_grid,
                                    imputer=None, 
                                    scaler = None,
                                    resample='under',
                                    n_jobs=1,
                                    max_iter=10,
                                    hyperopt='atpe', 
                                    scorer_kwargs = {'known_skew': np.mean(y)}, 
                                  )

In [4]:
clf.fit(X,y)

 10%|█         | 10/100 [11:01<1:39:15, 66.17s/trial, best loss: 0.6582808939832876]


In [5]:
df = pd.read_pickle('hyperopt_results.pkl')

In [6]:
df

Unnamed: 0,loss,loss_std,iteration,train_time,max_depth,max_features,min_samples_leaf,min_samples_split,n_estimators
0,0.658894,0.0111,1,42.856,8,10,8,4,100
1,0.658326,0.011073,2,115.314794,8,10,4,4,300
2,0.658967,0.011743,3,38.050945,10,5,4,25,150
3,0.658281,0.00984,4,37.988993,10,8,20,15,100
4,0.658667,0.01062,5,94.077768,8,8,10,10,300
5,0.658497,0.010766,6,47.456477,15,5,4,50,150
6,0.665826,0.010446,7,82.206999,6,6,25,8,400
7,0.659404,0.010285,8,38.723546,15,6,10,50,100
8,0.66004,0.011652,9,120.663692,8,6,20,20,500
