In [1]:
import mxnet as mx
import numpy as np
import pandas as pd
from mxnet import ndarray as nd
from sklearn.model_selection import train_test_split, StratifiedKFold
from mxnet import gluon as gl
from mxnet import init
from mxnet.gluon import nn
from mxnet import autograd as ag
from sklearn import metrics
from matplotlib import pyplot as plt
from sklearn import feature_selection
import itertools
%matplotlib inline
np.random.seed(0)

  import OpenSSL.SSL


In [2]:
def CNN():
    net = nn.Sequential()
    with net.name_scope():
        net.add(
            nn.Conv1D(channels = 16, kernel_size = 7, padding = 3),
            nn.BatchNorm(axis = 1),
            nn.Activation('relu'),
#             nn.MaxPool1D(pool_size = 2, strides = 2),
            nn.Dropout(0.1),
            nn.Conv1D(channels=16, kernel_size=7, padding=3),
            nn.BatchNorm(axis=1),
            nn.Activation('relu'),
#             nn.MaxPool1D(pool_size = 2, strides = 2),
            nn.Dropout(0.1),
            
            nn.Conv1D(channels=32, kernel_size=3, padding=1),
            nn.BatchNorm(axis=1),
            nn.Activation('relu'),
#             nn.MaxPool1D(pool_size = 4, strides = 4),
            nn.Dropout(0.1),
        
            nn.Conv1D(channels=64, kernel_size=3, padding=1),
            nn.BatchNorm(axis=1),
            nn.Activation('relu'),
#             nn.MaxPool1D(pool_size = 4, strides = 4),
            nn.Dropout(0.2),
        
            nn.Conv1D(channels=128, kernel_size=3, padding=1),
            nn.BatchNorm(),
            nn.Activation('relu'),
#             nn.MaxPool1D(pool_size=4, strides=4),
            nn.Dropout(0.2),
#             nn.GlobalMaxPool1D(),
            nn.Flatten(),
            nn.Dense(2)
        )
    return net

In [3]:
%%time
import random

def dataIter(X, y, batch_size, shuffle = True):
    num_examples = y.shape[0]
    index = list(range(num_examples))
    if shuffle: random.shuffle(index)
    for i in range(0, num_examples, batch_size):
        j = index[i: min(i + batch_size, num_examples)]
        yield nd.array(X[j]), nd.array(y[j]), len(j)
        
def getWeight(params, label_pred, label_true):
    weights = []
    for pred, label in zip(label_pred, label_true):
        tp_tn = (pred == label) * params['tp_tn_weight']
        fp = ((pred == 0) * (label == 1)) * params['fp_weight']  
        fn = ((pred == 1) * (label == 0)) * params['fn_weight']
        weights.append(tp_tn + fp + fn)
    return weights

def testDataIter(X, batch_size):
    num_examples = X.shape[0]
    index = list(range(num_examples))
    for i in range(0, num_examples, batch_size):
        j = index[i: min(i + batch_size, num_examples)]
        yield nd.array(X[j])
        
def accuracy(output, label):
    return nd.sum(output.argmax(axis = 1) == label).asscalar()

def testAccuracy(net, ctx, X, y):
    acc = 0.
    pred_labels = []
    pred_probs = []
    index = 0
    for data, label, bs in dataIter(X, y, 64, shuffle = False):
        data = nd.array(data).as_in_context(ctx)
        label = nd.array(label).as_in_context(ctx)
        output = net(data) # the size of output is (64,2)
        output = nd.softmax(output, axis = 1)   # the size of output is (64,2)
        pred_label = nd.argmax(output, axis = 1) # the size of pre_label is (64,)
        pred_prob = output[:, 0] 
        pred_labels.append(pred_label)
        pred_probs.append(pred_prob)
        index += bs
        
        predict = nd.sum(pred_label == label).asscalar()
        acc += predict
        
    pred_labels = nd.concatenate(pred_labels, axis = 0).asnumpy()
    pred_probs = nd.concatenate(pred_probs, axis = 0).asnumpy()
    return acc / y.shape[0] * 100, pred_labels, pred_probs

def testProb(net, ctx, X):
    pred_labels = []
    pred_probs = []
    for data in testDataIter(X, 64):
        data = nd.array(data).as_in_context(ctx)
        output = net(data)
        output = nd.softmax(output, axis = 1)
        pred_label = nd.argmax(output, axis = 1)
        pred_prob = output[:, 0]
        pred_labels.append(pred_label)
        pred_probs.append(pred_prob)
    
    pred_labels = nd.concatenate(pred_labels, axis = 0).asnumpy()
    pred_probs = nd.concatenate(pred_probs, axis = 0).asnumpy()
    print(pred_labels, pred_prob)
    return pred_labels, pred_probs

def calculateLoss(params, net, loss_fuc, ctx, X, y):
    total_loss = 0.
    for data, label, bs in dataIter(X, y, 64, shuffle = False):
        data = nd.array(data).as_in_context(ctx)
        label = nd.array(label).as_in_context(ctx)
        output = net(data)
        weight = getWeight(params, [nd.argmax(output, axis = 1)], [label])[0]
        loss = loss_fuc(output, label, weight)
        total_loss += nd.sum(loss).asscalar()
    return total_loss / y.shape[0]

def plotConfusionMatrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
    
def evaluate(test_y, test_pred_label, test_pred_prob, pos_label = 0):
    auc = calculateAUC(test_y, test_pred_prob, pos_label = pos_label)
    ks_their, ks_mine = calculateKS(test_y, test_pred_prob)
    cm = metrics.confusion_matrix(test_y, test_pred_label)
#     plotConfusionMatrix(cm, ['good', 'bad'])
    return auc, ks_their, ks_mine

def calculateAUC(label, prob, pos_label):
    fpr, tpr, thresholds = metrics.roc_curve(label, prob, pos_label = pos_label)  
    ks = max(tpr - fpr)
    roc_auc = metrics.auc(fpr, tpr)
    return roc_auc

def calculateKS(label, prob):
    df = pd.DataFrame(data = {'label': label, 'prob': prob})
    df['prob'] = df['prob'].map(lambda x: round(x, 3))
    total = pd.DataFrame({'total': df.groupby('prob')['label'].count()})  
    bad = pd.DataFrame({'bad': df.groupby('prob')['label'].sum()})
    all_data = total.merge(bad, how = 'left', left_index = True, right_index = True)
    all_data['good'] = all_data['total'] - all_data['bad']
    all_data.reset_index(inplace = True)
    all_data.sort_index(ascending = False, inplace = True)
    all_data['goodCumPer'] = all_data['good'].cumsum() / all_data['good'].sum()
    all_data['badCumPer'] = all_data['bad'].cumsum() / all_data['bad'].sum()
    KS_m = all_data.apply(lambda x: x.goodCumPer - x.badCumPer, axis = 1)
    KS_t = all_data.apply(lambda x: x.badCumPer - x.goodCumPer, axis = 1)
    return max(KS_t), max(KS_m)

Wall time: 0 ns


In [4]:
def train(fold, train_X, train_y, valid_X, valid_y, test_X, test_y, params):
    ctx = [mx.gpu(i) for i in params['ctx']]
    net = params['model']()
    net.initialize(ctx = ctx, init = init.Xavier())
    trainer = gl.Trainer(net.collect_params(), params['optimizer'], {'learning_rate': params['lr'], 
                                                                     'wd': params['wd']})
    loss = gl.loss.SoftmaxCrossEntropyLoss()
    
    keep_indicator = -1

    for epoch in range(1, params['epochs'] + 1):
        train_loss = 0
        train_acc = 0
        for data, label, bs in dataIter(train_X, train_y, params['bs']):
            data = gl.utils.split_and_load(data, ctx)
            label = gl.utils.split_and_load(label, ctx)
            with ag.record():
                outputs = [net(x) for x in data]
                label_preds = [nd.argmax(nd.softmax(output, axis = 1), axis = 1) for output in outputs]
                weights = getWeight(params, label_preds, label)
                losses = [loss(x, y, z) for x, y, z in zip(outputs, label, weights)]
            for l in losses: l.backward()
            
            trainer.step(params['bs'])
            train_loss += sum([l.sum().asscalar() for l in losses])
            train_acc += sum([accuracy(x, y) for x, y in zip(outputs, label)])
        trainer.set_learning_rate(params['lr'] - params['lr'] / params['epochs'] * epoch)
        
        _, train_pred_label, train_pred_prob = testAccuracy(net, ctx[0], train_X, train_y)
        train_auc, train_ks_their, train_ks_mine = \
        evaluate(train_y, train_pred_label, train_pred_prob, pos_label = 0)
        
        valid_loss = calculateLoss(params, net, loss, ctx[0], valid_X, valid_y)
        valid_acc, valid_pred_label, valid_pred_prob = testAccuracy(net, ctx[0], valid_X, valid_y)
        valid_auc, valid_ks_their, valid_ks_mine = \
        evaluate(valid_y, valid_pred_label, valid_pred_prob, pos_label = 0)
        
        test_loss = calculateLoss(params, net, loss, ctx[0], test_X, test_y)
        test_acc, test_pred_label, test_pred_prob = testAccuracy(net, ctx[0], test_X, test_y)
        test_auc, test_ks_their, test_ks_mine = evaluate(test_y, test_pred_label, test_pred_prob, pos_label = 0)
        
        f1 = valid_auc * valid_ks_mine / (valid_auc + valid_ks_mine) 
        if f1 > keep_indicator:
            keep_indicator = f1
            net.save_params(params['model_path'] + '%s-%d-fold.model' % (params['prefix'], fold))
        
        print("%3d. L: %.3f,Acc %.1f,AUC %.3f,KS %.3f|"
                   "L: %.3f,Acc %.1f,AUC %.3f,KS %.3f|"
                   "L: %.3f,Acc %.1f,AUC %.3f,KS %.3f" %
              (epoch, train_loss / train_X.shape[0], train_acc / train_X.shape[0] * 100, train_auc, train_ks_mine,
               valid_loss, valid_acc, valid_auc, valid_ks_mine, 
               test_loss, test_acc, test_auc, test_ks_mine))

In [5]:
def predict(fold, params, X, y):
    ctx = [mx.gpu(i) for i in params['ctx']][0]
    net = params['model']()
    net.load_params(params['model_path'] + '%s-%d-fold.model' % (params['prefix'], fold), ctx = ctx)

    acc, pred_label, pred_prob = testAccuracy(net, ctx, X, y)
    auc, ks_their, ks_mine = evaluate(y, pred_label, pred_prob, pos_label = 0)
    return auc, ks_mine, pred_prob

def blindPredict(params, X):
    ctx = [mx.gpu(i) for i in params['ctx']][0]
    net = params['model']()
    net.load_params(params['model_path'], ctx = ctx)

    pred_label, pred_prob = testProb(net, ctx, X)
    return pred_label, pred_prob

In [8]:
def loadTrain(filename):
    X = []; y = []
    with open(filename, 'r') as f:
        line_count = 0
        for line in f:
            line_count += 1
            if line_count == 1: continue
            line = line.strip().split(',')
            line = [float(x) for x in line]
            X.append(line[:-1])
            y.append(line[-1])
    return np.array(X).astype('float32'), np.array(y).astype('int8')

def loadTest(filename):
    X = []
    with open(filename, 'r') as f:
        line_count = 0
        for line in f:
            line_count += 1
            if line_count == 1: continue
            line = line.strip().split(',')
            line = [float(x) for x in line]
            X.append(line)
    return np.array(X).astype('float32')

def subsample(X, y, sample_rate):
    positive_index = np.argwhere(y == 0)
    negtive_index = np.argwhere(y == 1)
    random.shuffle(positive_index)
    sampled_index = positive_index[0: sample_rate * len(negtive_index)]
    index = np.vstack((sampled_index, negtive_index)).squeeze()
    return X[index], y[index]

def deleteMissLotFeature(X, y, rate = 0.7):
    empty_flag = (X == -2) + (X == -1)
    empty_rate = empty_flag.sum(axis = 0) * 1.0 / empty_flag.shape[0]
    feature_index = np.argwhere(empty_rate <= rate)
    feature_index = [x[0] for x in feature_index]
    new_X = X[:, feature_index]
    return new_X

def deleteMissLotSample(X, y, rate = 0.4):
    empty_flag = (X == -2) + (X == -1)
    empty_rate = empty_flag.sum(axis = 1) * 1.0 / empty_flag.shape[1]
    sample_index = np.argwhere(empty_rate <= rate)
    sample_index = [x[0] for x in sample_index]
    new_X = X[sample_index]
    new_y = y[sample_index]
    return new_X, new_y

def featureSelection(X, y):
    sel = feature_selection.VarianceThreshold(threshold = 0)
    X = sel.fit_transform(X)
    return X, y

def splitDataset(X, y):
    train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.2)
    valid_X, test_X, valid_y, test_y = train_test_split(test_X, test_y, test_size = 0.5)
    
    return train_X, train_y, valid_X, valid_y, test_X, test_y

def addFeature(func, X):
    feature = func(X)
    X = np.column_stack((X, feature))
    return X

def missingRate(X):
    empty_flag = (X == -2) + (X == -1)
    empty_rate = empty_flag.sum(axis = 1) * 1.0 / empty_flag.shape[1]
    return empty_rate

def param():
    params = {
        'lr': 0.002,
        'bs': 64,
        'ctx': [0],
        'optimizer': 'adam',
        'epochs': 50,
        'sample_rate': 100,
        'feature_delete_rate': 1,
        'sample_delete_rate': 1,
        'model_path': 'model/',
        'wd': 0.0000,
        'model': CNN,
#         'model': furuizeModel,
        'nfold': 10,
        'fp_weight': 10,
        'fn_weight': 5,
        'tp_tn_weight': 1,
        'prefix': 'best-model-cost-sensitive'
    }
    return params

def main():
    train_file = 'X_final.csv'
    train_X = pd.read_csv(train_file)
    X = train_X.as_matrix()
    
#     train_file = 'train_feature_selection.csv'
    params = param()
    label_data = pd.read_csv("y_final.csv")
    label = label_data.as_matrix()
    y = label[:,-1]
    #X, y = loadTrain(train_file)
    print(type(X))
    print(X.shape, y.shape)
    
#     X = addFeature(missingRate, X)
    
    X = deleteMissLotFeature(X, y, rate = params['feature_delete_rate'])
    print('After feature delete: ', X.shape)
    X, y = deleteMissLotSample(X, y, rate = params['sample_delete_rate'])
    print('After Sample delete:', X.shape)
    X, y = featureSelection(X, y)
    print('After Feature Selection: ', X.shape, y.shape)
    X = np.expand_dims(X, axis = 1)
    
    nfold = params['nfold']
    skf = StratifiedKFold(n_splits = nfold)
    
    fold = 1
    for train_index, test_index in skf.split(X, y):
        print('fold %d' % fold)
        X_train, y_train = X[train_index], y[train_index]
        X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size = 1.0 / (nfold - 1))
        X_test, y_test = X[test_index], y[test_index]

#         train_X, train_y = subsample(train_X, train_y, params['sample_rate'])
    
        mu = X_train.mean(axis = (0, 1), keepdims = True)
        std = X_train.std(axis = (0, 1), keepdims = True)
    
        X_train = (X_train - mu) / std
        X_valid = (X_valid - mu) / std
        X_test = (X_test - mu) / std
    
        train(fold, X_train, y_train, X_valid, y_valid, X_test, y_test, params)
    
        valid_auc, valid_ks, valid_pred_prob = predict(fold, params, X_valid, y_valid)
        test_auc, test_ks, test_pred_prob = predict(fold, params, X_test, y_test)
        print('Valid AUC: %.3f, Valid KS: %.3f' % (valid_auc, valid_ks))
        print('Test AUC: %.3f, Test KS: %.3f' % (test_auc, test_ks))
        fold += 1
#     save(valid_pred_prob)
#     save(test_pred_prob)

main()

<class 'numpy.ndarray'>
(79054, 122) (79054,)
After feature delete:  (79054, 122)
After Sample delete: (79054, 122)
After Feature Selection:  (79054, 122) (79054,)
fold 1
  1. L: 0.105,Acc 99.0,AUC 0.706,KS 0.304|L: 0.052,Acc 99.3,AUC 0.710,KS 0.319|L: 0.061,Acc 99.1,AUC 0.699,KS 0.315
  2. L: 0.062,Acc 99.1,AUC 0.764,KS 0.397|L: 0.048,Acc 99.3,AUC 0.728,KS 0.387|L: 0.057,Acc 99.2,AUC 0.720,KS 0.349
  3. L: 0.057,Acc 99.1,AUC 0.779,KS 0.423|L: 0.049,Acc 99.3,AUC 0.742,KS 0.369|L: 0.057,Acc 99.2,AUC 0.731,KS 0.368
  4. L: 0.055,Acc 99.1,AUC 0.791,KS 0.444|L: 0.046,Acc 99.3,AUC 0.761,KS 0.402|L: 0.057,Acc 99.2,AUC 0.725,KS 0.371
  5. L: 0.055,Acc 99.1,AUC 0.790,KS 0.435|L: 0.047,Acc 99.3,AUC 0.745,KS 0.376|L: 0.056,Acc 99.2,AUC 0.749,KS 0.391
  6. L: 0.054,Acc 99.1,AUC 0.794,KS 0.438|L: 0.048,Acc 99.3,AUC 0.748,KS 0.387|L: 0.057,Acc 99.1,AUC 0.747,KS 0.393
  7. L: 0.054,Acc 99.1,AUC 0.797,KS 0.447|L: 0.047,Acc 99.3,AUC 0.741,KS 0.401|L: 0.057,Acc 99.2,AUC 0.737,KS 0.361
  8. L: 0.053,Acc

 20. L: 0.050,Acc 99.1,AUC 0.846,KS 0.538|L: 0.052,Acc 99.2,AUC 0.757,KS 0.383|L: 0.263,Acc 99.1,AUC 0.726,KS 0.365
 21. L: 0.050,Acc 99.2,AUC 0.852,KS 0.541|L: 0.052,Acc 99.2,AUC 0.761,KS 0.392|L: 0.258,Acc 99.1,AUC 0.723,KS 0.355
 22. L: 0.050,Acc 99.2,AUC 0.838,KS 0.517|L: 0.052,Acc 99.2,AUC 0.771,KS 0.395|L: 0.270,Acc 99.2,AUC 0.725,KS 0.393
 23. L: 0.049,Acc 99.2,AUC 0.853,KS 0.547|L: 0.054,Acc 99.2,AUC 0.762,KS 0.373|L: 0.283,Acc 99.1,AUC 0.723,KS 0.361
 24. L: 0.049,Acc 99.2,AUC 0.859,KS 0.552|L: 0.052,Acc 99.2,AUC 0.747,KS 0.350|L: 0.248,Acc 99.1,AUC 0.728,KS 0.336
 25. L: 0.049,Acc 99.2,AUC 0.850,KS 0.537|L: 0.051,Acc 99.2,AUC 0.761,KS 0.382|L: 0.252,Acc 99.1,AUC 0.731,KS 0.369
 26. L: 0.049,Acc 99.2,AUC 0.861,KS 0.558|L: 0.053,Acc 99.2,AUC 0.753,KS 0.386|L: 0.263,Acc 99.1,AUC 0.729,KS 0.375
 27. L: 0.049,Acc 99.2,AUC 0.866,KS 0.562|L: 0.055,Acc 99.2,AUC 0.746,KS 0.391|L: 0.234,Acc 99.1,AUC 0.719,KS 0.362
 28. L: 0.049,Acc 99.2,AUC 0.868,KS 0.561|L: 0.054,Acc 99.2,AUC 0.733,KS

 40. L: 0.046,Acc 99.2,AUC 0.877,KS 0.583|L: 0.061,Acc 99.0,AUC 0.744,KS 0.372|L: 0.381,Acc 99.1,AUC 0.765,KS 0.440
 41. L: 0.046,Acc 99.2,AUC 0.873,KS 0.569|L: 0.061,Acc 99.1,AUC 0.742,KS 0.384|L: 0.377,Acc 99.1,AUC 0.770,KS 0.469
 42. L: 0.047,Acc 99.2,AUC 0.857,KS 0.546|L: 0.060,Acc 99.1,AUC 0.742,KS 0.371|L: 0.368,Acc 99.1,AUC 0.778,KS 0.477
 43. L: 0.047,Acc 99.2,AUC 0.872,KS 0.569|L: 0.060,Acc 99.1,AUC 0.744,KS 0.397|L: 0.374,Acc 99.1,AUC 0.766,KS 0.480
 44. L: 0.046,Acc 99.2,AUC 0.876,KS 0.577|L: 0.061,Acc 99.0,AUC 0.745,KS 0.397|L: 0.380,Acc 99.1,AUC 0.769,KS 0.483
 45. L: 0.047,Acc 99.2,AUC 0.877,KS 0.575|L: 0.061,Acc 99.1,AUC 0.743,KS 0.385|L: 0.387,Acc 99.1,AUC 0.774,KS 0.487
 46. L: 0.046,Acc 99.2,AUC 0.878,KS 0.584|L: 0.061,Acc 99.1,AUC 0.743,KS 0.401|L: 0.385,Acc 99.1,AUC 0.767,KS 0.480
 47. L: 0.046,Acc 99.2,AUC 0.876,KS 0.581|L: 0.061,Acc 99.1,AUC 0.743,KS 0.396|L: 0.387,Acc 99.1,AUC 0.763,KS 0.482
 48. L: 0.046,Acc 99.2,AUC 0.835,KS 0.495|L: 0.061,Acc 99.1,AUC 0.736,KS

 10. L: 0.056,Acc 99.1,AUC 0.803,KS 0.453|L: 0.042,Acc 99.3,AUC 0.802,KS 0.509|L: 0.415,Acc 99.2,AUC 0.732,KS 0.400
 11. L: 0.053,Acc 99.1,AUC 0.814,KS 0.466|L: 0.041,Acc 99.3,AUC 0.807,KS 0.488|L: 0.352,Acc 99.2,AUC 0.726,KS 0.434
 12. L: 0.053,Acc 99.1,AUC 0.815,KS 0.490|L: 0.041,Acc 99.3,AUC 0.809,KS 0.471|L: 0.371,Acc 99.2,AUC 0.736,KS 0.422
 13. L: 0.054,Acc 99.1,AUC 0.815,KS 0.465|L: 0.042,Acc 99.3,AUC 0.801,KS 0.517|L: 0.406,Acc 99.2,AUC 0.733,KS 0.405
 14. L: 0.052,Acc 99.1,AUC 0.815,KS 0.476|L: 0.042,Acc 99.3,AUC 0.811,KS 0.465|L: 0.409,Acc 99.2,AUC 0.762,KS 0.437
 15. L: 0.053,Acc 99.1,AUC 0.826,KS 0.495|L: 0.041,Acc 99.3,AUC 0.823,KS 0.510|L: 0.344,Acc 99.2,AUC 0.749,KS 0.437
 16. L: 0.053,Acc 99.1,AUC 0.832,KS 0.504|L: 0.041,Acc 99.3,AUC 0.804,KS 0.455|L: 0.357,Acc 99.2,AUC 0.728,KS 0.417
 17. L: 0.054,Acc 99.1,AUC 0.825,KS 0.489|L: 0.041,Acc 99.3,AUC 0.801,KS 0.445|L: 0.393,Acc 99.2,AUC 0.738,KS 0.400
 18. L: 0.052,Acc 99.1,AUC 0.815,KS 0.465|L: 0.042,Acc 99.3,AUC 0.777,KS

 30. L: 0.050,Acc 99.2,AUC 0.828,KS 0.506|L: 0.050,Acc 99.2,AUC 0.749,KS 0.385|L: 0.274,Acc 99.2,AUC 0.808,KS 0.483
 31. L: 0.050,Acc 99.2,AUC 0.837,KS 0.519|L: 0.050,Acc 99.2,AUC 0.757,KS 0.421|L: 0.267,Acc 99.2,AUC 0.823,KS 0.527
 32. L: 0.050,Acc 99.2,AUC 0.841,KS 0.535|L: 0.051,Acc 99.2,AUC 0.759,KS 0.410|L: 0.301,Acc 99.2,AUC 0.824,KS 0.499
 33. L: 0.050,Acc 99.2,AUC 0.810,KS 0.474|L: 0.050,Acc 99.2,AUC 0.756,KS 0.421|L: 0.280,Acc 99.2,AUC 0.801,KS 0.453
 34. L: 0.049,Acc 99.2,AUC 0.843,KS 0.535|L: 0.053,Acc 99.2,AUC 0.757,KS 0.419|L: 0.306,Acc 99.2,AUC 0.829,KS 0.492
 35. L: 0.049,Acc 99.2,AUC 0.849,KS 0.543|L: 0.051,Acc 99.2,AUC 0.757,KS 0.421|L: 0.266,Acc 99.2,AUC 0.829,KS 0.528
 36. L: 0.049,Acc 99.2,AUC 0.849,KS 0.538|L: 0.051,Acc 99.2,AUC 0.751,KS 0.402|L: 0.258,Acc 99.2,AUC 0.827,KS 0.520
 37. L: 0.049,Acc 99.2,AUC 0.847,KS 0.530|L: 0.050,Acc 99.2,AUC 0.752,KS 0.404|L: 0.267,Acc 99.2,AUC 0.817,KS 0.509
 38. L: 0.049,Acc 99.2,AUC 0.849,KS 0.545|L: 0.051,Acc 99.2,AUC 0.750,KS

 50. L: 0.046,Acc 99.2,AUC 0.870,KS 0.562|L: 0.062,Acc 99.0,AUC 0.746,KS 0.410|L: 0.369,Acc 99.2,AUC 0.789,KS 0.452
Valid AUC: 0.771, Valid KS: 0.466
Test AUC: 0.809, Test KS: 0.469
fold 8
  1. L: 0.104,Acc 99.0,AUC 0.754,KS 0.382|L: 0.066,Acc 99.1,AUC 0.709,KS 0.305|L: 0.189,Acc 99.2,AUC 0.704,KS 0.313
  2. L: 0.057,Acc 99.2,AUC 0.786,KS 0.440|L: 0.060,Acc 99.1,AUC 0.720,KS 0.328|L: 0.163,Acc 99.2,AUC 0.746,KS 0.352
  3. L: 0.055,Acc 99.2,AUC 0.793,KS 0.422|L: 0.062,Acc 99.1,AUC 0.741,KS 0.343|L: 0.172,Acc 99.2,AUC 0.768,KS 0.387
  4. L: 0.054,Acc 99.2,AUC 0.793,KS 0.429|L: 0.061,Acc 99.1,AUC 0.726,KS 0.296|L: 0.167,Acc 99.2,AUC 0.768,KS 0.433
  5. L: 0.053,Acc 99.2,AUC 0.796,KS 0.450|L: 0.057,Acc 99.1,AUC 0.743,KS 0.363|L: 0.148,Acc 99.2,AUC 0.757,KS 0.402
  6. L: 0.055,Acc 99.1,AUC 0.798,KS 0.466|L: 0.058,Acc 99.1,AUC 0.737,KS 0.360|L: 0.154,Acc 99.2,AUC 0.745,KS 0.414
  7. L: 0.053,Acc 99.2,AUC 0.752,KS 0.399|L: 0.063,Acc 99.1,AUC 0.685,KS 0.275|L: 0.173,Acc 99.2,AUC 0.715,KS 0.375

 20. L: 0.051,Acc 99.1,AUC 0.832,KS 0.502|L: 0.045,Acc 99.3,AUC 0.762,KS 0.405|L: 0.246,Acc 99.2,AUC 0.783,KS 0.439
 21. L: 0.051,Acc 99.1,AUC 0.841,KS 0.519|L: 0.045,Acc 99.3,AUC 0.758,KS 0.401|L: 0.233,Acc 99.2,AUC 0.786,KS 0.450
 22. L: 0.051,Acc 99.1,AUC 0.839,KS 0.513|L: 0.044,Acc 99.3,AUC 0.772,KS 0.421|L: 0.234,Acc 99.2,AUC 0.778,KS 0.460
 23. L: 0.051,Acc 99.1,AUC 0.839,KS 0.523|L: 0.045,Acc 99.3,AUC 0.757,KS 0.390|L: 0.246,Acc 99.2,AUC 0.774,KS 0.409
 24. L: 0.051,Acc 99.1,AUC 0.844,KS 0.526|L: 0.046,Acc 99.3,AUC 0.755,KS 0.416|L: 0.234,Acc 99.2,AUC 0.780,KS 0.431
 25. L: 0.051,Acc 99.1,AUC 0.839,KS 0.513|L: 0.045,Acc 99.3,AUC 0.766,KS 0.422|L: 0.234,Acc 99.2,AUC 0.791,KS 0.450
 26. L: 0.050,Acc 99.1,AUC 0.845,KS 0.515|L: 0.045,Acc 99.3,AUC 0.757,KS 0.411|L: 0.244,Acc 99.2,AUC 0.784,KS 0.430
 27. L: 0.050,Acc 99.1,AUC 0.853,KS 0.539|L: 0.046,Acc 99.3,AUC 0.758,KS 0.410|L: 0.268,Acc 99.2,AUC 0.765,KS 0.444
 28. L: 0.050,Acc 99.1,AUC 0.853,KS 0.539|L: 0.046,Acc 99.3,AUC 0.759,KS

 40. L: 0.047,Acc 99.2,AUC 0.883,KS 0.596|L: 0.051,Acc 99.2,AUC 0.774,KS 0.448|L: 0.097,Acc 99.2,AUC 0.715,KS 0.373
 41. L: 0.047,Acc 99.2,AUC 0.862,KS 0.565|L: 0.049,Acc 99.2,AUC 0.782,KS 0.426|L: 0.093,Acc 99.2,AUC 0.735,KS 0.412
 42. L: 0.047,Acc 99.2,AUC 0.886,KS 0.596|L: 0.051,Acc 99.2,AUC 0.767,KS 0.416|L: 0.102,Acc 99.2,AUC 0.713,KS 0.373
 43. L: 0.047,Acc 99.2,AUC 0.873,KS 0.583|L: 0.051,Acc 99.2,AUC 0.778,KS 0.438|L: 0.100,Acc 99.2,AUC 0.726,KS 0.390
 44. L: 0.047,Acc 99.2,AUC 0.881,KS 0.593|L: 0.050,Acc 99.2,AUC 0.773,KS 0.413|L: 0.095,Acc 99.2,AUC 0.724,KS 0.401
 45. L: 0.046,Acc 99.2,AUC 0.890,KS 0.616|L: 0.052,Acc 99.2,AUC 0.759,KS 0.407|L: 0.100,Acc 99.2,AUC 0.712,KS 0.378
 46. L: 0.047,Acc 99.2,AUC 0.890,KS 0.608|L: 0.051,Acc 99.2,AUC 0.761,KS 0.409|L: 0.098,Acc 99.2,AUC 0.715,KS 0.387
 47. L: 0.047,Acc 99.2,AUC 0.889,KS 0.606|L: 0.051,Acc 99.2,AUC 0.765,KS 0.431|L: 0.098,Acc 99.2,AUC 0.712,KS 0.389
 48. L: 0.047,Acc 99.2,AUC 0.890,KS 0.601|L: 0.051,Acc 99.2,AUC 0.766,KS

In [None]:
def blindEvaluate():
    test_file = 'test.csv'
    params = param()
    test_X = loadTest(test_file)
    test_X = np.expand_dims(test_X, axis = 1)
    test_pred_label, test_pred_prob = blindPredict(params, test_X)
    return test_pred_label, test_pred_prob

# pred_label, pred_prob = blindEvaluate()
# with open('predict-probability.csv', 'w') as f:
#     f.write('label,good-prob,bad-prob\n')
#     for label, prob in zip(pred_label, pred_prob):
#         f.write('%d,%d,%d\n' % (label, prob, 1 - prob))