# Hyperparameter Optimization Example



SPORF example using car dataset

In [1]:
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
import openml
from rerf.rerfClassifier import rerfClassifier
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier

In [2]:
def hyperparameter_optimization(X,y,*argv):
    
    clf_best_params = {}
    for clf, params in argv:

        # run randomized search     
        n_iter_search = 10
        random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                           n_iter=n_iter_search, cv=10, iid=False)
        random_search.fit(X, y)       
        clf_best_params[clf] = random_search.best_params_  
    return clf_best_params

def hyperparameter_optimization_grid(X, y, *argv):    
    clf_best_params = {}

    # Iterate over all (classifier, hyperparameters) pairs
    for clf, params in argv:

        # Run grid search
        grid_search = GridSearchCV(
            clf, param_grid=params, cv=10, iid=False
        )
        grid_search.fit(X, y)

        # Save results
        clf_best_params[clf] = grid_search.best_params_

    return clf_best_params        
        

In [3]:
from scipy.stats import randint as sp_randint
import math
from math import log

# get some data
task_id = 146821 #car
openml.config.apikey = 'c9ea8896542dd998ea42685f14e2bc14'
benchmark_suite = openml.study.get_suite('OpenML-CC18')
task = openml.tasks.get_task(task_id)
X, y = task.get_X_and_y()
n_features = np.shape(X)[1]
n_samples = np.shape(X)[0]


# build a classifier
clf = rerfClassifier(n_estimators=100)

#specify max_depth and min_sample_splits ranges
max_depth_array = (np.unique(np.round((np.arange(2,math.log(n_samples),
                    (math.log(n_samples)-2)/10))))).astype(int)
max_depth_range = np.append(max_depth_array, None)

min_sample_splits_range = (np.unique(np.round((np.arange(1,math.log(n_samples),
                            (math.log(n_samples)-2)/10))))).astype(int)

# specify parameters and distributions to sample from
param_dist = {"n_estimators": np.arange(50,550,50),
              "max_depth": max_depth_range,
              "min_samples_split": min_sample_splits_range,
              "feature_combinations": [1,2,3,4,5], 
              "max_features": ["auto", "sqrt","log2", None, n_features**2]}

In [3]:
clf_best_params = hyperparameter_optimization(X, y, (clf, param_dist))
print(clf_best_params)

{rerfClassifier(feature_combinations=1.5, image_height=None, image_width=None,
               max_depth=None, max_features='auto', min_samples_split=1,
               n_estimators=100, n_jobs=None, oob_score=False,
               patch_height_max=None, patch_height_min=1, patch_width_max=None,
               patch_width_min=1, projection_matrix='RerF', random_state=None): {'feature_combinations': 3, 'max_depth': None, 'max_features': 36, 'min_samples_split': 2, 'n_estimators': 302}}


In [16]:
clf_best_params

{rerfClassifier(feature_combinations=1.5, image_height=None, image_width=None,
                max_depth=None, max_features='auto', min_samples_split=1,
                n_estimators=100, n_jobs=None, oob_score=False,
                patch_height_max=None, patch_height_min=1, patch_width_max=None,
                patch_width_min=1, projection_matrix='RerF', random_state=None): {'feature_combinations': 2,
  'max_depth': 8,
  'max_features': None,
  'min_samples_split': 3,
  'n_estimators': 203,
  'n_jobs': None,
  'oob_score': True}}

In [None]:
clf_best_params_grid = hyperparameter_optimization_grid(X, y, (clf, param_dist))
clf_best_params_grid

In [2]:
from scipy.stats import randint as sp_randint
import math
from math import log

# get some data
task_id = 146821 #car
openml.config.apikey = 'c9ea8896542dd998ea42685f14e2bc14'
benchmark_suite = openml.study.get_suite('OpenML-CC18')
task = openml.tasks.get_task(task_id)
X, y = task.get_X_and_y()
n_features = np.shape(X)[1]
n_samples = np.shape(X)[0]


# build a classifier
clf = RandomForestClassifier(n_estimators=100)

#specify max_depth and min_sample_splits ranges
max_depth_array = (np.unique(np.round((np.arange(2,math.log(n_samples),
                    (math.log(n_samples)-2)/10))))).astype(int)
max_depth_range = np.append(max_depth_array, None)

min_sample_splits_range = (np.unique(np.round((np.arange(1,math.log(n_samples),
                            (math.log(n_samples)-2)/10))))).astype(int)

# specify parameters and distributions to sample from
param_dist = {"n_estimators": np.arange(50,550,50),
              "max_depth": max_depth_range,
              "min_samples_split": min_sample_splits_range, 
              "max_features": ["auto", "sqrt","log2", None, n_features**2]}

In [None]:
clf_best_params_random = hyperparameter_optimization_grid(X, y, (clf, param_dist))
clf_best_params_random