In [1]:
import keras
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.layers import Input, Dense, BatchNormalization, Dropout, concatenate, regularizers, \
                          Conv2D, MaxPool2D, Flatten, Activation, GlobalAveragePooling2D
from keras.layers.advanced_activations import LeakyReLU
from keras.optimizers import Adam
from keras.models import Model
import sys
sys.path.append('.')
import utils
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def cnn_model(input_shape):
    model_input = Input(shape=input_shape)
    
    x = Conv2D(512, kernel_size=(1, 7), padding='same')(model_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#     x = MaxPool2D(pool_size=(1, 4))(x)
    x = Dropout(0.1)(x)
    
    x = Conv2D(256, kernel_size=(1, 5), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#     x = MaxPool2D(pool_size=(1, 4))(x)
    x = Dropout(0.1)(x)
    
    x = Conv2D(128, kernel_size=(1, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#     x = MaxPool2D(pool_size=(1, 4))(x)
    x = Dropout(0.1)(x)
    
    x = Conv2D(64, kernel_size=(1, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#     x = MaxPool2D(pool_size=(1, 4))(x)
    x = Dropout(0.2)(x)
    
    x = Conv2D(32, kernel_size=(1, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
#     x = MaxPool2D(pool_size=(1, 4))(x)
    x = Dropout(0.2)(x)
    
#     x = GlobalAveragePooling2D()(x)
    x = Flatten()(x)
#     x = BatchNormalization()(x)
#     x = Dropout(0.5)(x)
#     x = Dense(256, activation='relu')(x)
#     x = Dropout(0.5)(x)
    
    model_output = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=model_input, outputs=model_output)
    
    return model

In [3]:
data = pd.read_csv('data.csv')
y = data.pop('label').values
X = data.values

In [6]:
kfold = 10
epochs = 30
batch_size = 64
opt = Adam(lr=0.0001)
class_weight = {1:20, 0:1}
train_size = 0.8
val_size = 0.1
test_size = 0.1
random_state = None

In [9]:
best_train_auc_record = []
best_train_ks_record = []
best_val_auc_record = []
best_val_ks_record = []
best_test_auc_record = []
best_test_ks_record = []

fold = 1
for X_train, X_val, X_test, y_train, y_val, y_test in utils.kfold(X, y, num_fold=kfold):
    print('\n%d fold start..............\n' % (fold))
    #准备数据
    min_max_scaler = MinMaxScaler()
    min_max_scaler.fit(X_train)

    X_train = min_max_scaler.transform(X_train)
    X_val = min_max_scaler.transform(X_val)
    X_test = min_max_scaler.transform(X_test)

    X_train_conv = utils.process_data_for_conv2D(X_train)
    X_val_conv = utils.process_data_for_conv2D(X_val)
    X_test_conv = utils.process_data_for_conv2D(X_test)
    
    train_data_iter = utils.data_iter(X_train_conv, y_train, batch_size)
    
    #建立模型
    model = cnn_model(input_shape=(1, 285, 1))
    model.compile(
                loss='binary_crossentropy',
                metrics=['accuracy'],
                optimizer=opt)
#     model.summary()
    
    #训练
    best_train_auc = 0
    best_train_ks = 0
    best_val_auc = 0
    best_val_ks = 0
    best_test_auc = 0
    best_test_ks = 0
    
    for e in range(epochs):
        batchs = 0
        for X_batch, y_batch in train_data_iter:
            model.train_on_batch(X_batch, y_batch, class_weight=class_weight)
            batchs += 1
            if batchs >= len(X_train) / batch_size:
                break
        
        train_loss, train_acc = model.evaluate(X_train_conv, y_train, verbose=0)
        
        y_train_pre = model.predict(X_train_conv)
        y_val_pre = model.predict(X_val_conv)
        y_test_pre = model.predict(X_test_conv)
        
        train_auc = utils.cal_auc(y_train, y_train_pre)
        val_auc = utils.cal_auc(y_val, y_val_pre)
        test_auc = utils.cal_auc(y_test, y_test_pre)

        train_ks = utils.get_ks_score(y_train, y_train_pre)
        val_ks = utils.get_ks_score(y_val, y_val_pre)
        test_ks = utils.get_ks_score(y_test, y_test_pre)
        
        train_performance = utils.get_model_key_performance(y_train, y_train_pre)
        val_performance = utils.get_model_key_performance(y_val, y_val_pre)
        test_performance = utils.get_model_key_performance(y_test, y_test_pre)
        if val_ks > best_val_ks:
            model.save('best_model_' + str(fold) + '.hdf5')
            best_train_auc = train_auc
            best_train_ks = train_ks
            best_val_auc = val_auc
            best_val_ks = val_ks
            best_test_auc = test_auc
            best_test_ks = test_ks
            
        print('Epoch %d train_loss %.4f train_acc %.4f, train_auc %.4f train_ks %.4f, val_auc %.4f val_ks %.4f, test_auc %.4f test_ks %.4f' % 
              (e+1, train_loss, train_acc, train_auc, train_ks, val_auc, val_ks, test_auc, test_ks))
        print('Eopch %d train_performance %.4f val_performance %.4f test_performance %.4f' % 
              (train_performance, val_performance, test_performance))
    fold = fold + 1
    
    
    best_train_auc_record.append(best_train_auc)
    best_train_ks_record.append(best_train_ks)
    best_val_auc_record.append(best_val_auc)
    best_val_ks_record.append(best_val_ks)
    best_test_auc_record.append(best_test_auc)
    best_test_ks_record.append(best_test_ks)

best_train_auc_avg = np.array(best_train_auc_record).mean()
best_train_ks_avg = np.array(best_train_ks_record).mean()
best_val_auc_avg = np.array(best_val_auc_record).mean()
best_val_ks_avg = np.array(best_val_ks_record).mean()
best_test_auc_avg = np.array(best_test_auc_record).mean()
best_test_ks_avg = np.array(best_test_ks_record).mean()
print('\n\n%d fold train_auc_avg %.4f train_ks_avg %.4f, val_auc_avg %.4f val_ks_avg %.4f, test_auc_avg %.4f test_ks_avg %.4f' %
    (kfold, best_train_auc_avg, best_train_ks_avg, best_val_auc_avg, best_val_ks_avg, best_test_auc_avg, best_test_ks_avg))


1 fold start..............

Epoch 1 train_loss: 0.1463 train_acc: 0.9801, train_auc: 0.7787 train_ks: 0.4383, val_auc: 0.7646 val_ks: 0.4180, test_auc: 0.7909 test_ks: 0.4663
Epoch 2 train_loss: 0.1432 train_acc: 0.9758, train_auc: 0.7908 train_ks: 0.4575, val_auc: 0.7804 val_ks: 0.4364, test_auc: 0.8029 test_ks: 0.4826
Epoch 3 train_loss: 0.1335 train_acc: 0.9800, train_auc: 0.7986 train_ks: 0.4649, val_auc: 0.7800 val_ks: 0.4482, test_auc: 0.7974 test_ks: 0.4660
Epoch 4 train_loss: 0.1422 train_acc: 0.9756, train_auc: 0.8018 train_ks: 0.4739, val_auc: 0.7792 val_ks: 0.4430, test_auc: 0.7946 test_ks: 0.4515
Epoch 5 train_loss: 0.1384 train_acc: 0.9780, train_auc: 0.8041 train_ks: 0.4694, val_auc: 0.7808 val_ks: 0.4451, test_auc: 0.7906 test_ks: 0.4571
Epoch 6 train_loss: 0.1450 train_acc: 0.9732, train_auc: 0.8087 train_ks: 0.4793, val_auc: 0.7828 val_ks: 0.4525, test_auc: 0.7926 test_ks: 0.4567
Epoch 7 train_loss: 0.1468 train_acc: 0.9740, train_auc: 0.8088 train_ks: 0.4769, val_auc

Epoch 27 train_loss: 0.1636 train_acc: 0.9581, train_auc: 0.8338 train_ks: 0.5199, val_auc: 0.7688 val_ks: 0.4227, test_auc: 0.7889 test_ks: 0.4850
Epoch 28 train_loss: 0.1601 train_acc: 0.9599, train_auc: 0.8359 train_ks: 0.5253, val_auc: 0.7700 val_ks: 0.4237, test_auc: 0.7868 test_ks: 0.4818
Epoch 29 train_loss: 0.1583 train_acc: 0.9593, train_auc: 0.8339 train_ks: 0.5250, val_auc: 0.7710 val_ks: 0.4266, test_auc: 0.7887 test_ks: 0.4839
Epoch 30 train_loss: 0.1510 train_acc: 0.9630, train_auc: 0.8374 train_ks: 0.5316, val_auc: 0.7703 val_ks: 0.4386, test_auc: 0.7820 test_ks: 0.4821

3 fold start..............

Epoch 1 train_loss: 0.2974 train_acc: 0.9285, train_auc: 0.7751 train_ks: 0.4229, val_auc: 0.7922 val_ks: 0.4783, test_auc: 0.7620 test_ks: 0.4080
Epoch 2 train_loss: 0.2537 train_acc: 0.9400, train_auc: 0.7876 train_ks: 0.4435, val_auc: 0.7872 val_ks: 0.4427, test_auc: 0.7667 test_ks: 0.4294
Epoch 3 train_loss: 0.2820 train_acc: 0.9263, train_auc: 0.7990 train_ks: 0.4583, val

Epoch 23 train_loss: 0.2090 train_acc: 0.9485, train_auc: 0.8336 train_ks: 0.5056, val_auc: 0.7573 val_ks: 0.4367, test_auc: 0.7275 test_ks: 0.4013
Epoch 24 train_loss: 0.2006 train_acc: 0.9553, train_auc: 0.8359 train_ks: 0.5108, val_auc: 0.7536 val_ks: 0.4420, test_auc: 0.7241 test_ks: 0.3937
Epoch 25 train_loss: 0.2005 train_acc: 0.9524, train_auc: 0.8367 train_ks: 0.5137, val_auc: 0.7539 val_ks: 0.4220, test_auc: 0.7297 test_ks: 0.4163
Epoch 26 train_loss: 0.2164 train_acc: 0.9493, train_auc: 0.8388 train_ks: 0.5171, val_auc: 0.7581 val_ks: 0.4415, test_auc: 0.7323 test_ks: 0.4134
Epoch 27 train_loss: 0.1966 train_acc: 0.9551, train_auc: 0.8418 train_ks: 0.5183, val_auc: 0.7598 val_ks: 0.4463, test_auc: 0.7235 test_ks: 0.4084
Epoch 28 train_loss: 0.1998 train_acc: 0.9553, train_auc: 0.8413 train_ks: 0.5214, val_auc: 0.7566 val_ks: 0.4387, test_auc: 0.7257 test_ks: 0.4235
Epoch 29 train_loss: 0.1972 train_acc: 0.9543, train_auc: 0.8435 train_ks: 0.5253, val_auc: 0.7593 val_ks: 0.452

Epoch 19 train_loss: 0.1807 train_acc: 0.9590, train_auc: 0.8247 train_ks: 0.5053, val_auc: 0.8072 val_ks: 0.5297, test_auc: 0.7541 test_ks: 0.4307
Epoch 20 train_loss: 0.1816 train_acc: 0.9575, train_auc: 0.8243 train_ks: 0.4974, val_auc: 0.8033 val_ks: 0.5308, test_auc: 0.7493 test_ks: 0.4187
Epoch 21 train_loss: 0.1862 train_acc: 0.9552, train_auc: 0.8268 train_ks: 0.5025, val_auc: 0.8071 val_ks: 0.5333, test_auc: 0.7509 test_ks: 0.4072
Epoch 22 train_loss: 0.1779 train_acc: 0.9570, train_auc: 0.8274 train_ks: 0.5031, val_auc: 0.8065 val_ks: 0.5251, test_auc: 0.7488 test_ks: 0.4071
Epoch 23 train_loss: 0.1705 train_acc: 0.9625, train_auc: 0.8299 train_ks: 0.5078, val_auc: 0.7999 val_ks: 0.5292, test_auc: 0.7450 test_ks: 0.4020
Epoch 24 train_loss: 0.1784 train_acc: 0.9579, train_auc: 0.8294 train_ks: 0.5075, val_auc: 0.8000 val_ks: 0.5187, test_auc: 0.7457 test_ks: 0.3944
Epoch 25 train_loss: 0.1760 train_acc: 0.9598, train_auc: 0.8334 train_ks: 0.5158, val_auc: 0.7958 val_ks: 0.515

Epoch 15 train_loss: 0.1961 train_acc: 0.9545, train_auc: 0.8362 train_ks: 0.5333, val_auc: 0.7318 val_ks: 0.3522, test_auc: 0.7815 test_ks: 0.4566
Epoch 16 train_loss: 0.2116 train_acc: 0.9474, train_auc: 0.8382 train_ks: 0.5368, val_auc: 0.7288 val_ks: 0.3439, test_auc: 0.7769 test_ks: 0.4678
Epoch 17 train_loss: 0.2126 train_acc: 0.9474, train_auc: 0.8400 train_ks: 0.5410, val_auc: 0.7341 val_ks: 0.3548, test_auc: 0.7817 test_ks: 0.4518
Epoch 18 train_loss: 0.1970 train_acc: 0.9541, train_auc: 0.8413 train_ks: 0.5407, val_auc: 0.7337 val_ks: 0.3546, test_auc: 0.7820 test_ks: 0.4637
Epoch 19 train_loss: 0.1877 train_acc: 0.9560, train_auc: 0.8428 train_ks: 0.5479, val_auc: 0.7348 val_ks: 0.3598, test_auc: 0.7822 test_ks: 0.4785
Epoch 20 train_loss: 0.1937 train_acc: 0.9548, train_auc: 0.8446 train_ks: 0.5488, val_auc: 0.7372 val_ks: 0.3590, test_auc: 0.7813 test_ks: 0.4516
Epoch 21 train_loss: 0.1745 train_acc: 0.9615, train_auc: 0.8462 train_ks: 0.5482, val_auc: 0.7360 val_ks: 0.353

Epoch 11 train_loss: 0.1677 train_acc: 0.9689, train_auc: 0.8126 train_ks: 0.4825, val_auc: 0.7558 val_ks: 0.4298, test_auc: 0.8353 test_ks: 0.5383
Epoch 12 train_loss: 0.1541 train_acc: 0.9717, train_auc: 0.8151 train_ks: 0.4906, val_auc: 0.7637 val_ks: 0.4469, test_auc: 0.8307 test_ks: 0.5490
Epoch 13 train_loss: 0.1550 train_acc: 0.9721, train_auc: 0.8185 train_ks: 0.4965, val_auc: 0.7657 val_ks: 0.4485, test_auc: 0.8332 test_ks: 0.5421
Epoch 14 train_loss: 0.1435 train_acc: 0.9740, train_auc: 0.8183 train_ks: 0.4860, val_auc: 0.7560 val_ks: 0.4253, test_auc: 0.8328 test_ks: 0.5512
Epoch 15 train_loss: 0.1448 train_acc: 0.9740, train_auc: 0.8203 train_ks: 0.4960, val_auc: 0.7561 val_ks: 0.4442, test_auc: 0.8336 test_ks: 0.5554
Epoch 16 train_loss: 0.1419 train_acc: 0.9735, train_auc: 0.8218 train_ks: 0.4959, val_auc: 0.7576 val_ks: 0.4415, test_auc: 0.8344 test_ks: 0.5402
Epoch 17 train_loss: 0.1527 train_acc: 0.9717, train_auc: 0.8242 train_ks: 0.5040, val_auc: 0.7576 val_ks: 0.443

In [8]:
best_val_ks_record

[0.48606601731601734,
 0.571900123685838,
 0.47901205936920227,
 0.3877203153988868,
 0.38022959183673466,
 0.563006338899196,
 0.4800711193568336,
 0.5027829313543599,
 0.4773732220160791,
 0.4956516697588126]

In [9]:
best_test_ks_record

[0.40354477611940304,
 0.4559255007016201,
 0.41153208317387424,
 0.4621299098911039,
 0.41234967354370333,
 0.41531851233343775,
 0.3917881455194888,
 0.45880156327917526,
 0.44102912013359774,
 0.43589743589743596]