

# Hyperparameter Optimization Project

## Fall 2021 - Team 2


Then you try to optimize hyperparameters ``C`` and ``solver`` of the classifier by using optuna.
When you introduce optuna naively, you define an ``objective`` function
such that it takes ``trial`` and calls ``suggest_*`` methods of ``trial`` to sample the hyperparameters:



In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import optuna
import requests

In [2]:
# building a Python class for project

class Tune:
    study_name = None
    search_space_C = None
    search_space_solver = None
    study = None
    trial = None
    objective = None
    performance = None
    stopping_criteria = None
    C = None
    solver = None
    
    def __init__(self, name):
        self.study_name = name
    
    def create_study(self, C, solver):
        self.search_space_C = C
        self.search_space_solver = solver
        
        # The function needs to be more generalizbale, later change required
        self.study = optuna.create_study(direction="maximize", study_name=self.study_name)
        self.trial = self.study.ask()
        self.C = self.trial.suggest_loguniform("C", self.search_space_C[0], self.search_space_C[1])
        self.solver = self.trial.suggest_categorical("solver", set(self.search_space_solver))
#         print("Hyperparameter:", self.C)
#         print("Solver:", self.solver)
    
    # get single update from study
    def update_study(self, n=1, output=True):
        self.get_performance()
        self.trial = self.study.ask()
        self.C = self.trial.suggest_loguniform("C", self.search_space_C[0], self.search_space_C[1])
        self.solver = self.trial.suggest_categorical("solver", set(self.search_space_solver))
        self.study.tell(self.trial, self.performance)
    
        # push data using Flask for updating SQL table
        study = {"study-name" : self.study_name,
         "search-space-hyperparameter" : str(self.search_space_C),
         "search-space-solver" : str(self.search_space_solver),
         "hyperparameter" : self.C,
         "solver" : self.solver,
         "stopping-criteria" : self.stopping_criteria,
         "num-updates" : n,
         "performance" : self.performance}
        res = requests.post('http://localhost:5000///updateDB', json=study)
        
        if output:
            print("Hyperparameter:", self.C)
            print("Solver:", self.solver)
            print("Performance:", self.performance)
    
    def update_study_repeat(self, n):
        for _ in range(n):
            self.update_study(n, False)
            
        # retrieve best study data from SQL table using Flask
        study = {"study-name" : self.study_name}
        res = requests.post('http://localhost:5000///getBestStudy', json=study).json()
        
        print("Best Results Across Study - ", self.study_name)
        print("Best Hyperparameters:", res['hyperparameter'])
        print("Best Solver:", res["solver"])
        print("Best Model Performance:", res['performance'])
    
    # set objective function for study
    def set_objective(self, objective):
        self.objective = objective
        print("Objective has been set.")
    
    # retrieve current performance data using current recommended hyperparameter and solver
    def get_performance(self):
        self.performance = self.objective(self.C, self.solver)
        return self.performance
#         print("Current Model Performance:", self.performance)

In [3]:
# user defines data and split

X, y = make_classification(n_features=10)
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [4]:
# user defines objective function

def objective(C, solver):
    clf = LogisticRegression(C=C, solver=solver)
    clf.fit(X_train, y_train)
    val_accuracy = clf.score(X_test, y_test)
    return val_accuracy

In [5]:
tune = Tune("winston_test2")

In [6]:
tune.create_study([1e-7, 10.0], ["lbfgs", "saga"])

[32m[I 2021-12-02 19:38:48,906][0m A new study created in memory with name: winston_test2[0m


In [7]:
tune.set_objective(objective)

Objective has been set.


In [8]:
tune.update_study()

Hyperparameter: 1.027081406050604e-05
Solver: saga
Performance: 0.44


In [9]:
tune.update_study_repeat(10)

Best Results Across Study -  winston_test2
Best Hyperparameters: 0.000004482307998114341
Best Solver: lbfgs
Best Model Performance: 0.920000000
