

# Efficient Hyperparameter Search Service

## Fall 2021 - Team 2


Below is a hyperparameter search service which can be used through the "Tune" package we wrote, containing all functionality for creating a study, suggesting hyperparameters, keeping track of results, and providing best performance for a model.

In [None]:
# import relevant packages
import numpy as np
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import optuna
import requests

In [None]:
# Tune package for efficient hyperparameter search

class Tune:
    study_name = None
    search_space_C = None
    search_space_solver = None
    study = None
    trial = None
    objective = None
    performance = None
    stopping_criteria = None
    C = None
    solver = None
    
    def __init__(self, name):
        self.study_name = name
    
    # initialize study
    def create_study(self, C, solver):
        self.search_space_C = C
        self.search_space_solver = solver
        self.study = optuna.create_study(direction="maximize", study_name=self.study_name)
        self.trial = self.study.ask()
        self.C = self.trial.suggest_loguniform("C", self.search_space_C[0], self.search_space_C[1])
        self.solver = self.trial.suggest_categorical("solver", set(self.search_space_solver))
    
    # get single update from study
    def update_study(self, n=1, output=True):
        self.get_performance()
        self.trial = self.study.ask()
        self.C = self.trial.suggest_loguniform("C", self.search_space_C[0], self.search_space_C[1])
        self.solver = self.trial.suggest_categorical("solver", set(self.search_space_solver))
        self.study.tell(self.trial, self.performance)
    
        # push data to update SQL table using Flask
        study = {"study-name" : self.study_name,
         "search-space-hyperparameter" : str(self.search_space_C),
         "search-space-solver" : str(self.search_space_solver),
         "hyperparameter" : self.C,
         "solver" : self.solver,
         "stopping-criteria" : self.stopping_criteria,
         "num-updates" : n,
         "performance" : self.performance}
        res = requests.post('http://localhost:5000///updateDB', json=study)
        
        if output:
            print("Hyperparameter:", self.C)
            print("Solver:", self.solver)
            print("Performance:", self.performance)
    
    # run study multiple times (defined by user), and get best results
    def update_study_repeat(self, n):
        for _ in range(n):
            self.update_study(n, False)
            
        # retrieve best study data from SQL table using Flask
        study = {"study-name" : self.study_name}
        res = requests.post('http://localhost:5000///getBestStudy', json=study).json()
        
        print("Best Results Across Study - ", self.study_name)
        print("Best Hyperparameters:", res['hyperparameter'])
        print("Best Solver:", res["solver"])
        print("Best Model Performance:", res['performance'])
    
    # set objective function for study
    def set_objective(self, objective):
        self.objective = objective
        print("Objective has been set.")
    
    # retrieve current performance data using current recommended hyperparameter configuration
    def get_performance(self):
        self.performance = self.objective(self.C, self.solver)
        return self.performance

In [None]:
# sample dataset #1 for demo
X, y = make_classification(n_features=10)
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
# user defines objective function to evaluate
def objective(C, solver):
    clf = LogisticRegression(C=C, solver=solver)
    clf.fit(X_train, y_train)
    val_accuracy = clf.score(X_test, y_test)
    return val_accuracy

In [None]:
# initialize Tune object for efficient hyperparameter search
tune = Tune("dataX_test1")

In [None]:
# create study, setting search spaces for hyperparameters
tune.create_study([1e-7, 10.0], ["lbfgs", "saga", "newton-cg"])

In [None]:
# set user-defined objective within study
tune.set_objective(objective)

In [None]:
# single update to study
tune.update_study()

In [None]:
# repeated update to study over multiple trials, returning best results at end
tune.update_study_repeat(50)