In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
import optuna

In [2]:
def objective(trial):
    
    x = trial.suggest_float('x', -10, 10)
    return (x - 2) ** 2

study = optuna.create_study()
study.optimize(objective, n_trials = 100)
study.best_params

[32m[I 2023-04-27 01:07:58,723][0m A new study created in memory with name: no-name-1add3ccd-1bf5-43a6-b3aa-a3375a8b9c08[0m
[32m[I 2023-04-27 01:07:58,725][0m Trial 0 finished with value: 30.492007174016308 and parameters: {'x': 7.521956824714977}. Best is trial 0 with value: 30.492007174016308.[0m
[32m[I 2023-04-27 01:07:58,726][0m Trial 1 finished with value: 2.4566233434823967 and parameters: {'x': 3.567361905713673}. Best is trial 1 with value: 2.4566233434823967.[0m
[32m[I 2023-04-27 01:07:58,726][0m Trial 2 finished with value: 95.36403704212324 and parameters: {'x': -7.765451195010051}. Best is trial 1 with value: 2.4566233434823967.[0m
[32m[I 2023-04-27 01:07:58,727][0m Trial 3 finished with value: 18.584451416048974 and parameters: {'x': 6.310968732900875}. Best is trial 1 with value: 2.4566233434823967.[0m
[32m[I 2023-04-27 01:07:58,728][0m Trial 4 finished with value: 57.52379886235849 and parameters: {'x': -5.584444532222415}. Best is trial 1 with value: 2.

{'x': 2.0076095312937023}

In [3]:
def objective(trial):
    
    iris = load_iris()
    x, y = iris.data, iris.target
    
    ## Classifier 중 SVC, RandomForest를 이용하여 탐색
    ## 탐색하고자 하는 파라미터 공간은 아래에 지정
    classifier_name = trial.suggest_categorical(
                                                'classifier',
                                                ['SVC', 'RandomForest']
                                            )
    
    if classifier_name == 'SVC':
        ## 파라미터 값이 float 형이므로 suggest_float으로 지정
        svc_c          = trial.suggest_float('svc_c', 1e-10, 1e10, log = True)
        classifier_obj = SVC(C = svc_c, gamma = 'auto')
        
    else:
        ## 파라미터 값이 int 형이므로 suggest_int로 지정 
        rf_max_depth   = trial.suggest_int('rm_max_depth', 2, 32, log = True)
        classifier_obj = RandomForestClassifier(max_depth    = rf_max_depth,
                                                n_estimators = 10)
        
    
    ## 교차 검증 수행
    score    = cross_val_score(classifier_obj, x, y, n_jobs = -1, cv = 3)
    accuracy = score.mean()
    
    return accuracy

In [4]:
## accuracy를 최대화 하도록 설정하여 탐색해봄.
study = optuna.create_study(direction = 'maximize')
study.optimize(objective, n_trials=100)
study.best_trial

[32m[I 2023-04-27 01:07:59,077][0m A new study created in memory with name: no-name-addf188c-75a6-4ae5-bbb6-8b77c954c43f[0m
[32m[I 2023-04-27 01:08:01,836][0m Trial 0 finished with value: 0.96 and parameters: {'classifier': 'RandomForest', 'rm_max_depth': 19}. Best is trial 0 with value: 0.96.[0m
[32m[I 2023-04-27 01:08:02,072][0m Trial 1 finished with value: 0.96 and parameters: {'classifier': 'SVC', 'svc_c': 873570754.8126411}. Best is trial 0 with value: 0.96.[0m
[32m[I 2023-04-27 01:08:02,323][0m Trial 2 finished with value: 0.94 and parameters: {'classifier': 'RandomForest', 'rm_max_depth': 2}. Best is trial 0 with value: 0.96.[0m
[32m[I 2023-04-27 01:08:02,551][0m Trial 3 finished with value: 0.32 and parameters: {'classifier': 'SVC', 'svc_c': 4.389634237017089e-10}. Best is trial 0 with value: 0.96.[0m
[32m[I 2023-04-27 01:08:02,785][0m Trial 4 finished with value: 0.32 and parameters: {'classifier': 'SVC', 'svc_c': 0.00012885240442741885}. Best is trial 0 with 

FrozenTrial(number=27, state=TrialState.COMPLETE, values=[0.9866666666666667], datetime_start=datetime.datetime(2023, 4, 27, 1, 8, 8, 52977), datetime_complete=datetime.datetime(2023, 4, 27, 1, 8, 8, 280178), params={'classifier': 'SVC', 'svc_c': 4.291760089415581}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'classifier': CategoricalDistribution(choices=('SVC', 'RandomForest')), 'svc_c': FloatDistribution(high=10000000000.0, log=True, low=1e-10, step=None)}, trial_id=27, value=None)

In [5]:
from sklearn.model_selection import train_test_split
from optuna.trial import TrialState
from sklearn import linear_model

In [6]:
def objective(trial):
    
    iris    = load_iris()
    classes = list(set(iris.target))
    
    train_x, test_x, train_y, test_y = train_test_split(iris.data, iris.target,
                                                        test_size = 0.25)
    alpha = trial.suggest_float('alpha', 1e-5, 1e-1, log = True)
    clf   = linear_model.SGDClassifier(alpha = alpha)
    
    for step in range(100):
        
        clf.partial_fit(train_x, train_y, classes = classes)
        intermediate_value = clf.score(test_x, test_y)
        
        trial.report(intermediate_value, step)
        
        ## 모델의 개선 여지가 없는 모델의 학습을 조기종료 시키는 부분
        if trial.should_prune():
            raise optuna.TrialPruned()
            
            
    return clf.score(test_x, test_y)
        

In [7]:
study = optuna.create_study(direction = 'maximize')
study.optimize(objective, n_trials = 100)

pruned_trials   = study.get_trials(deepcopy = False, states = [TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy = False, states = [TrialState.COMPLETE])

[32m[I 2023-04-27 01:08:22,220][0m A new study created in memory with name: no-name-aff0ee17-2ea2-4274-97c9-4b5570d9e3d6[0m
[32m[I 2023-04-27 01:08:22,461][0m Trial 0 finished with value: 0.8947368421052632 and parameters: {'alpha': 0.0012040317031911225}. Best is trial 0 with value: 0.8947368421052632.[0m
[32m[I 2023-04-27 01:08:22,615][0m Trial 1 finished with value: 0.7368421052631579 and parameters: {'alpha': 0.00012813322786151007}. Best is trial 0 with value: 0.8947368421052632.[0m
[32m[I 2023-04-27 01:08:22,739][0m Trial 2 finished with value: 0.868421052631579 and parameters: {'alpha': 0.00022052425038517615}. Best is trial 0 with value: 0.8947368421052632.[0m
[32m[I 2023-04-27 01:08:22,862][0m Trial 3 finished with value: 0.6578947368421053 and parameters: {'alpha': 3.3020047487051216e-05}. Best is trial 0 with value: 0.8947368421052632.[0m
[32m[I 2023-04-27 01:08:22,984][0m Trial 4 finished with value: 0.9210526315789473 and parameters: {'alpha': 1.3815062920

In [8]:
print(f'# of finished trials : {len(study.trials)}')
print(f'# of   pruned trials : {len(pruned_trials)}')
print(f'# of complete trials : {len(complete_trials)}')
trial = study.best_trial

print(f'value : {trial.value}')
for k, v in trial.params.items():
    print(f'{k} : {v}')

# of finished trials : 100
# of   pruned trials : 83
# of complete trials : 17
value : 0.9736842105263158
alpha : 0.0002219294821633734
