In [10]:
from trainer import Trainer
from utils import Paramset
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
import numpy as np
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from neuralnetwork import NNClassifier
import optuna


class Objective:
     
    '''
    # Usage
    obj = Objective(LGBMRegressor(), X, y)
    study = optuna.create_study(
        sampler=optuna.samplers.RandomSampler(seed=123))
    study.optimize(obj, n_trials=10, n_jobs=-1)
    '''

    def __init__(self, model, x, y):
        self.model = model
        self.model_type = type(self.model).__name__
        self.x = x
        self.y = y
        self.n_splits = 5
        self.random_state = 1214
        self.early_stopping_rounds = 20
        paramset = Paramset(self.model)
        paramset.swiching_lr('params_search')
        self.PARAMS = paramset.generate_params()
    
    def __call__(self, trial):
        if self.model_type == 'LGBMClassifier':
            SPACE = {
                'num_leaves': trial.suggest_int(
                'num_leaves', 32, 2*32),
                'subsample': trial.suggest_uniform('subsample', 0.60, 0.80),
                'colsample_bytree': trial.suggest_uniform(
                    'colsample_bytree', 0.60, 0.80),
                'bagging_freq': trial.suggest_int(
                    'bagging_freq', 1, 51, 5),
                'min_child_weight': trial.suggest_loguniform(
                    'min_child_weight', 1, 32),
                'min_child_samples': int(trial.suggest_discrete_uniform(
                    'min_child_samples', 128, 512, 16)),
                'min_split_gain': trial.suggest_loguniform(
                    'min_split_gain', 1e-5, 1e-1)
            }
            self.PARAMS.update(SPACE)
            # cross validation
            skf = StratifiedKFold(n_splits=self.n_splits,
            random_state=self.random_state, shuffle=True)
            LOGLOSS = []
            for tr_idx, va_idx in skf.split(self.x, self.y):
                clf = Trainer(LGBMClassifier(**self.PARAMS))
                clf.fit(
                    self.x[tr_idx],
                    self.y[tr_idx],
                    self.x[va_idx],
                    self.y[va_idx],
                    self.early_stopping_rounds
                )
                y_pred = clf.predict_proba(self.x[va_idx])  # best_iteration
                logloss = log_loss(self.y[va_idx], y_pred)
                LOGLOSS.append(logloss)
            return np.mean(LOGLOSS)
        elif self.model_type == 'XGBClassifier':
            SPACE = {
                'subsample': trial.suggest_uniform(
                    'subsample', 0.65, 0.85),
                'colsample_bytree': trial.suggest_uniform(
                    'colsample_bytree', 0.65, 0.80),
                'gamma': trial.suggest_loguniform(
                    'gamma', 1e-8, 1.0),
                'min_child_weight': trial.suggest_loguniform(
                    'min_child_weight', 1, 32)
            }
            self.PARAMS.update(SPACE)
            # cross validation
            skf = StratifiedKFold(n_splits=self.n_splits,
            random_state=self.random_state, shuffle=True)
            LOGLOSS = []
            for tr_idx, va_idx in skf.split(self.x, self.y):
                clf = Trainer(XGBClassifier(**self.PARAMS))
                clf.fit(
                    self.x[tr_idx],
                    self.y[tr_idx],
                    self.x[va_idx],
                    self.y[va_idx],
                    self.early_stopping_rounds
                )
                y_pred = clf.predict_proba(self.x[va_idx])  # best_iteration
                logloss = log_loss(self.y[va_idx], y_pred)
                LOGLOSS.append(logloss)
            return np.mean(LOGLOSS)
        elif self.model_type == 'NNClassifier':
            SPACE = {
                "input_dropout": trial.suggest_uniform(
                    "input_dropout", 0.0, 1.0),
                "hidden_layers": trial.suggest_int(
                    "hidden_layers", 1, 3),
                'hidden_units': int(trial.suggest_discrete_uniform(
                    'hidden_units', 64, 1024, 64)),
                'hidden_dropout': trial.suggest_uniform(
                    'hidden_dropout', 0.0, 1.0),
                'batch_norm': trial.suggest_categorical(
                'batch_norm', ['before_act', 'non']),
                'batch_size': int(trial.suggest_discrete_uniform(
                    'batch_size', 16, 96, 16))
            }
            self.PARAMS.update(SPACE)
            self.PARAMS['input_shape'] = self.x.shape[1]
            print(self.PARAMS)
            # cross validation
            skf = StratifiedKFold(n_splits=self.n_splits,
            random_state=self.random_state, shuffle=True)
            LOGLOSS = []
            for tr_idx, va_idx in skf.split(self.x, self.y):
                clf = Trainer(NNClassifier(**self.PARAMS))
                clf.fit(
                    self.x[tr_idx],
                    self.y[tr_idx],
                    self.x[va_idx],
                    self.y[va_idx],
                    self.early_stopping_rounds
                )
                y_pred = clf.predict_proba(self.x[va_idx])
                logloss = clf.get_model().history.history["val_loss"][-(self.early_stopping_rounds+1)]
                LOGLOSS.append(logloss)
            return np.mean(LOGLOSS)
            
def optuna_search(obj, n_trials, n_jobs, random_state):
    study = optuna.create_study(
        sampler=optuna.samplers.RandomSampler(seed=random_state))
    study.optimize(obj, n_trials=n_trials, n_jobs=n_jobs)
    return study.best_params


if __name__ == "__main__":
    pass

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
brest_c = load_breast_cancer()
X = brest_c['data']
y = brest_c['target']

In [14]:
n_trials = 3
n_jobs=-1
random_state=0
obj = Objective(LGBMClassifier(), X, y)
optuna_search(obj, n_trials, n_jobs, random_state)

model type is LGBMClassifiermodel type is LGBMClassifiermodel type is LGBMClassifier

None
None

None
model type is LGBMClassifier
Nonemodel type is LGBMClassifiermodel type is LGBMClassifier


NoneNone

model type is LGBMClassifier
None
model type is LGBMClassifiermodel type is LGBMClassifier

NoneNone

model type is LGBMClassifier
None
model type is LGBMClassifiermodel type is LGBMClassifier

NoneNone

model type is LGBMClassifier
None


[I 2020-09-30 16:15:18,561] Finished trial#0 with value: 0.6603343289651606 with parameters: {'num_leaves': 32, 'subsample': 0.7205526752143288, 'colsample_bytree': 0.7089766365993794, 'bagging_freq': 46, 'min_child_weight': 8.680742718279156, 'min_child_samples': 272.0, 'min_split_gain': 0.00015493103643906671}. Best is trial#0 with value: 0.6603343289651606.

model type is LGBMClassifiermodel type is LGBMClassifier






NoneNone



[I 2020-09-30 16:15:19,576] Finished trial#1 with value: 0.6603343289651606 with parameters: {'num_leaves': 56, 'subsample': 0.7927325521002059, 'colsample_bytree': 0.6766883037651555, 'bagging_freq': 31, 'min_child_weight': 16.689208658650642, 'min_child_samples': 304.0, 'min_split_gain': 0.00037251107093913994}. Best is trial#0 with value: 0.6603343289651606.
[I 2020-09-30 16:15:19,577] Finished trial#2 with value: 0.6603343289651606 with parameters: {'num_leaves': 57, 'subsample': 0.6142072116395774, 'colsample_bytree': 0.6174258599403081, 'bagging_freq': 16, 'min_child_weight': 3.583098488008466, 'min_child_samples': 496.0, 'min_split_gain': 3.642529868570474e-05}. Best is trial#0 with value: 0.6603343289651606.


{'num_leaves': 32,
 'subsample': 0.7205526752143288,
 'colsample_bytree': 0.7089766365993794,
 'bagging_freq': 46,
 'min_child_weight': 8.680742718279156,
 'min_child_samples': 272.0,
 'min_split_gain': 0.00015493103643906671}

In [1]:
from trainer import Trainer
from utils import Paramset
from neuralnetwork import NNClassifier
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
import numpy as np
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
import optuna
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

def obj(trial):
    early_stopping_rounds = 10
    n_splits = 3
    random_state = 0
    paramset = Paramset(NNClassifier())
    paramset.swiching_lr('params_search')
    PARAMS = paramset.generate_params()
    
    SPACE = {
        "input_dropout": trial.suggest_uniform(
            "input_dropout", 0.0, 1.0),
        "hidden_layers": trial.suggest_int(
            "hidden_layers", 1, 3),
        'hidden_units': int(trial.suggest_discrete_uniform(
            'hidden_units', 64, 1024, 64)),
        'hidden_dropout': trial.suggest_uniform(
            'hidden_dropout', 0.0, 1.0),
        'batch_norm': trial.suggest_categorical(
        'batch_norm', ['before_act', 'non']),
        'batch_size': int(trial.suggest_discrete_uniform(
            'batch_size', 16, 96, 16))
    }
    PARAMS.update(SPACE)
    PARAMS['input_shape'] = x.shape[1]
    print(PARAMS)
    # cross validation
    skf = StratifiedKFold(n_splits=n_splits,
    random_state=random_state, shuffle=True)
    LOGLOSS = []
    for tr_idx, va_idx in skf.split(x, y):
        clf = Trainer(NNClassifier(**PARAMS))
        clf.fit(
            x[tr_idx],
            y[tr_idx],
            x[va_idx],
            y[va_idx],
            early_stopping_rounds
        )
        y_pred = clf.predict_proba(x[va_idx])
        logloss = clf.get_model().history.history["val_loss"][-(early_stopping_rounds+1)]
        LOGLOSS.append(logloss)
    return np.mean(LOGLOSS)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [2]:
from trainer import Trainer
from utils import Paramset
from neuralnetwork import NNClassifier
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
import numpy as np
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
import optuna
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
brest_c = load_breast_cancer()
x = brest_c['data']
y = brest_c['target']


n_trials = 2
n_jobs=-1
random_state=0
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(seed=random_state))
study.optimize(obj, n_trials=n_trials, n_jobs=n_jobs)

{'learning_rate': 0.05, 'input_shape': 30, 'input_dropout': 0.5488135039273248, 'hidden_layers': 2, 'hidden_units': 896, 'hidden_dropout': 0.8579456176227568, 'batch_norm': 'non', 'batch_size': 48, 'epochs': 10000}{'learning_rate': 0.05, 'input_shape': 30, 'input_dropout': 0.6458941130666561, 'hidden_layers': 1, 'hidden_units': 320, 'hidden_dropout': 0.05671297731744318, 'batch_norm': 'before_act', 'batch_size': 48, 'epochs': 10000}

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 379 samples, validate on 190 samples
Epoch 1/10000
Train on 379 samples, validate on 190 samples
Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
 48/379 [==>...........................] - ETA: 0s - loss: 1.6680 - accuracy: 0.8958Epoch 2/10000
Epoch 3/10000
 - 0s 448us/step - loss: 1.8765 - accuracy: 0.8813 - val_loss: 0.8455 - val_accuracy: 0.9474
 48/379 [==>...........................] - ETA:

Epoch 16/10000
Epoch 17/10000
Epoch 20/10000
Epoch 21/10000
Epoch 18/10000
Epoch 19/10000
 48/379 [==>...........................] - ETA: 0s - loss: 2.3359 - accuracy: 0.8542model type is NNClassifier
None
model type is NNClassifier
None
Train on 379 samples, validate on 190 samples
Epoch 1/10000
Train on 379 samples, validate on 190 samples
Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
 - 1s 3ms/step - loss: 0.3976 - accuracy: 0.8575 - val_loss: 0.2120 - val_accuracy: 0.9632
Epoch 6/10000
Epoch 3/10000
Epoch 7/10000
 48/379 [==>...........................] - ETA: 0s - loss: 1.3537 - accuracy: 0.8958Epoch 4/10000
Epoch 5/10000
Epoch 8/10000
 - 0s 451us/step - loss: 2.3565 - accuracy: 0.8522 - val_loss: 2.0986 - val_accuracy: 0.8684 - loss: 2.4683 - accuracy: 0.84 - ETA: 0s - loss: 0.2106 - accuracy: 0.91
Epoch 9/10000
Epoch 7/10000
Epoch 8/10000
Epoch 10/10000
 48/379 [==>...........................]
Epoch 10/100
Epoch 11/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epo

Epoch 16/10000
Epoch 17/10000
model type is NNClassifier
None
Train on 380 samples, validate on 189 samples
Epoch 1/10000
Train on 380 samples, validate on 189 samples
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
 48/380 [==>...........................] - ETA: 0s - loss: 1.6643 - accuracy: 0.8958Epoch 2/10000
Epoch 12/10000
Epoch 3/10000
Epoch 13/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
 48/380 [==>...........................]
None - ETA: 0s - loss: 0.1798 - accuracy: 0.8958
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000

[I 2020-09-30 16:04:46,752] Finished trial#0 with value: 0.8047021689104478 with parameters: {'input_dropout': 0.5488135039273248, 'hidden_layers': 2, 'hidden_units': 896.0, 'hidden_dropout': 0.8579456176227568, 'batch_norm': 'non', 'batch_size': 48.0}. Best is trial#0 with value: 0.8047021689104478.


Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
model type is NNClassifier
None


[I 2020-09-30 16:04:47,267] Finished trial#1 with value: 0.08951741930276812 with parameters: {'input_dropout': 0.6458941130666561, 'hidden_layers': 1, 'hidden_units': 320.0, 'hidden_dropout': 0.05671297731744318, 'batch_norm': 'before_act', 'batch_size': 48.0}. Best is trial#1 with value: 0.08951741930276812.
