In [1]:
import os
print('사용가능한 cpu 개수:', os.cpu_count())

사용가능한 cpu 개수: 40


In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.regularizers import l2
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from keras.layers import Dense, Conv1D, MaxPooling1D, GlobalMaxPool1D, BatchNormalization, Dropout, Activation, GlobalAveragePooling1D, Flatten, SeparableConv1D
from keras.layers.advanced_activations import PReLU
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV
# sklearn의 grid search보다 5배 빠르다고 함
from tune_sklearn import TuneGridSearchCV


# training, test set loading
if os.path.exists('../x_train.npz'):
    print('loading train...', flush=True, end='')
    x_train = np.load('../x_train.npz', allow_pickle=True)['arr_0']
    y_train = np.load('../y_train.npz')['arr_0']
    x_test = np.load('../x_test.npz', allow_pickle=True)['arr_0']
    y_test = np.load('../y_test.npz')['arr_0']
    print('done', flush=True)
else:
    print('no saved training, test set')



loading train...done


In [2]:
# KerasClassifier 사용을 위한 함수
def build_model(num_l1=256, kernel_l1 = 10, bool_flatten=False, dropout=0.2):
    #strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"])
    #with strategy.scope():
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.compat.v1.Session(config=config)
    model = Sequential()
    #num_l2=64, num_l3=64, kernel_l2=3, kernel_l3=3
    num_l2=64
    num_l3=64
    kernel_l2=3
    kernel_l3=3

    #for (num_node, kernel_size) in conv_layers:
    #    model.add(Conv1D(filters=num_node, kernel_size=kernel_size, padding='valid'))
    #    model.add(BatchNormalization())
    #    model.add(MaxPooling1D(pool_size=2))

    # Conv Layer 1
    model.add(Conv1D(filters=num_l1, kernel_size=kernel_l1, padding='valid'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2)) 

    # Conv Layer 2
    model.add(Conv1D(filters=num_l2, kernel_size=kernel_l2, padding='valid'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2)) 

    # Conv Layer 3
    model.add(Conv1D(filters=num_l3, kernel_size=kernel_l3, padding='valid'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2)) 

    # FC layer 이전의 작업
    if bool_flatten:
        model.add(Flatten())
    else:
        model.add(GlobalMaxPool1D())

    model.add(Dropout(dropout))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])


    return model


#hist = model.fit(x_train, y_train, validation_split=0.1, epochs=100, batch_size=BATCH_SIZE, #class_weight={0:1, 1:3}, 
#                            callbacks=[ModelCheckpoint(monitor='val_loss', filepath=weight_path, verbose=1, save_best_only=True),
#                                        EarlyStopping(monitor='val_loss', patience=1, verbose=0, mode='auto')])
    

In [3]:
from keras.wrappers.scikit_learn import KerasClassifier

### RandomizedSearchCV 인자들 ###
# KerasClassifier 생성
model_keras = KerasClassifier(build_fn = build_model)

# RandomSearch로 탐색할 parameters
dropout_rate_opts  = [0, 0.1, 0.2, 0.5]
num_l1, num_l2, num_l3 = [256, 512], [64, 128, 256], [64, 128, 256]
kernel_l1, kernel_l2, kernel_l3 = [3, 15], [3, 5, 10, 15], [3, 5, 10, 15]

# param_distributions
keras_param_options = {
    'num_l1': num_l1, # 'num_l2': num_l2, 'num_l3': num_l3,
    'kernel_l1': kernel_l1, # 'kernel_l2': kernel_l2, 'kernel_l3': kernel_l3,
    'bool_flatten': [True, False],
    'dropout': dropout_rate_opts#, 'batch_size': [256, 512]
}


# randomizedSearchCV
random_search = RandomizedSearchCV(model_keras, param_distributions=keras_param_options,
    scoring='roc_auc', n_iter=2, cv=2, n_jobs=1, verbose=1)


In [None]:
# fit_params
callbacks = [EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='auto')]
keras_fit_params = {
    'callbacks': callbacks, 'epoch': 100, 'batch_size': 512, 'verbose': 0
}

# start finding hyperparameters
random_search.fit(x_train, y_train, fit_params = keras_fit_params)


# results
print('Best score obtained: {0}'.format(rs_keras.best_score_))
print('Parameters:')
for param, value in rs_keras.best_params_.items():
    print('\t{}: {}'.format(param, value))

Fitting 2 folds for each of 2 candidates, totalling 4 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
