In [1]:
import xgboost as xgb
from xgboost import XGBClassifier
import keras
import pickle
import numpy as np
import pandas as pd
from tflearn.data_utils import pad_sequences
from collections import defaultdict
from sklearn.metrics import f1_score
from keras.layers import *
from keras.models import Sequential
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF
from keras import initializers
from sklearn.model_selection import train_test_split
import os
import gc

config = tf.ConfigProto()  
config.gpu_options.allow_growth=True   #不全部占满显存, 按需分配
sess = tf.Session(config=config)

KTF.set_session(sess)

EMBEDDING_DIM = 300
VOCAB_LENGTH = 3000

from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2993060671581812978
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 6431991067815918392
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 13557213458687819206
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 7648542720
locality {
  bus_id: 1
  links {
  }
}
incarnation: 3199392792380803727
physical_device_desc: "device: 0, name: TITAN X (Pascal), pci bus id: 0000:02:00.0, compute capability: 6.1"
]


In [2]:

def get_cnn_output(input_x1, params):
    # cnn0模块，kernel_size = 2
    conv0_1 = Convolution1D(256, 2, padding='same')(input_x1)
    bn0_1 = BatchNormalization()(conv0_1)
    relu0_1 = Activation('relu')(bn0_1)
    conv0_2 = Convolution1D(128, 2, padding='same')(relu0_1)
    bn0_2 = BatchNormalization()(conv0_2)
    relu0_2 = Activation('relu')(bn0_2)
    cnn0 = MaxPool1D(pool_size=4)(relu0_2)
    # cnn1模块，kernel_size = 3
    conv1_1 = Convolution1D(256, 3, padding='same')(input_x1)
    bn1_1 = BatchNormalization()(conv1_1)
    relu1_1 = Activation('relu')(bn1_1)
    conv1_2 = Convolution1D(128, 3, padding='same')(relu1_1)
    bn1_2 = BatchNormalization()(conv1_2)
    relu1_2 = Activation('relu')(bn1_2)
    cnn1 = MaxPool1D(pool_size=4)(relu1_2)
    # cnn2模块，kernel_size = 4
    conv2_1 = Convolution1D(256, 4, padding='same')(input_x1)
    bn2_1 = BatchNormalization()(conv2_1)
    relu2_1 = Activation('relu')(bn2_1)
    conv2_2 = Convolution1D(128, 4, padding='same')(relu2_1)
    bn2_2 = BatchNormalization()(conv2_2)
    relu2_2 = Activation('relu')(bn2_2)
    cnn2 = MaxPool1D(pool_size=4)(relu2_2)
    # cnn3模块，kernel_size = 5
    conv3_1 = Convolution1D(256, 5, padding='same')(input_x1)
    bn3_1 = BatchNormalization()(conv3_1)
    relu3_1 = Activation('relu')(bn3_1)
    conv3_2 = Convolution1D(128, 5, padding='same')(relu3_1)
    bn3_2 = BatchNormalization()(conv3_2)
    relu3_2 = Activation('relu')(bn3_2)
    cnn3 = MaxPool1D(pool_size=4)(relu3_2)
    # 拼接三个模块
    cnn = concatenate([cnn0,cnn1,cnn2,cnn3], axis=-1)
    return cnn

def create_model_dssm(params):
    # dssm是一个简单的双塔模型
    input1 = keras.Input(shape=(MAX_SEQUENCE_LENGTH_WORD, ), dtype='int32')
    input2 = keras.Input(shape=(MAX_SEQUENCE_LENGTH_WORD, ), dtype='int32')
    input1c = keras.Input(shape=(MAX_SEQUENCE_LENGTH_CHAR, ), dtype='int32')
    input2c = keras.Input(shape=(MAX_SEQUENCE_LENGTH_CHAR, ), dtype='int32')
    
    embedder = Embedding(5002, 300, input_length = MAX_SEQUENCE_LENGTH_WORD, 
                         weights = [embedding_matrix_word])# , mask_zero=True, trainable = False
    # CuDNN
    lstm0 = LSTM(params['num_lstm'],
        dropout=params['lstm_dropout_rate'],
        recurrent_dropout=params['lstm_dropout_rate'],
                 return_sequences = True)
    lstm1 = Bidirectional(LSTM(params['num_lstm'],
        dropout=params['lstm_dropout_rate'],
        recurrent_dropout=params['lstm_dropout_rate']))
    lstm2 = LSTM(params['num_lstm'])
    den = Dense(64,activation = 'tanh')

    att1 = Lambda(lambda x: K.max(x,axis = 1))
    v1 = embedder(input1)
    v2 = embedder(input2)
    v11 = lstm1(v1)
    v22 = lstm1(v2)
    v1ls = lstm2(lstm0(v1))
    v2ls = lstm2(lstm0(v2))
    v1 = Concatenate(axis=1)([att1(v1),v11])
    v2 = Concatenate(axis=1)([att1(v2),v22])
    
    embedder = Embedding(2087, 300, input_length = MAX_SEQUENCE_LENGTH_CHAR, 
                         weights = [embedding_matrix_char])# , mask_zero=True, trainable = False
    lstm1c = Bidirectional(LSTM(params['num_lstm']))
    att1c = Lambda(lambda x: K.max(x,axis = 1))
    v1c = embedder(input1c)
    v2c = embedder(input2c)
    v11c = lstm1c(v1c)
    v22c = lstm1c(v2c)
    v1c = Concatenate(axis=1)([att1c(v1c),v11c])
    v2c = Concatenate(axis=1)([att1c(v2c),v22c])


    mul = Multiply()([v1,v2])
    sub = Lambda(lambda x: K.abs(x))(Subtract()([v1,v2]))
    maximum = Maximum()([Multiply()([v1,v1]),Multiply()([v2,v2])])
    mulc = Multiply()([v1c,v2c])
    subc = Lambda(lambda x: K.abs(x))(Subtract()([v1c,v2c]))
    maximumc = Maximum()([Multiply()([v1c,v1c]),Multiply()([v2c,v2c])])
    sub2 = Lambda(lambda x: K.abs(x))(Subtract()([v1ls,v2ls]))
    matchlist = Concatenate(axis=1)([mul,sub,mulc,subc,maximum,maximumc,sub2])
    matchlist = Dropout(0.2)(matchlist)
    

    merged = Concatenate(axis=1)([Dense(32,activation = 'relu')(matchlist),
                                     Dense(48,activation = 'sigmoid')(matchlist)])
    
    merged = keras.layers.Dense(params['num_dense'], activation='relu')(merged)
    # merged = keras.layers.Dropout(params['dense_dropout_rate'])(merged)
    merged = keras.layers.BatchNormalization()(merged)
    res = Dense(1, activation = 'sigmoid')(merged)


    model = keras.Model(inputs=[input1c, input2c, input1, input2], outputs=res)
    model.compile(optimizer= keras.optimizers.Adam(lr = 0.001), loss="binary_crossentropy", 
        metrics=['accuracy'])
    return model

### CNN
def create_model_cnn(params):
    input1 = keras.Input(shape=(MAX_SEQUENCE_LENGTH_WORD, ), dtype='int32')
    input2 = keras.Input(shape=(MAX_SEQUENCE_LENGTH_WORD, ), dtype='int32')
    input1c = keras.Input(shape=(MAX_SEQUENCE_LENGTH_CHAR, ), dtype='int32')
    input2c = keras.Input(shape=(MAX_SEQUENCE_LENGTH_CHAR, ), dtype='int32')
    
    embedder = Embedding(5002, 300, input_length = MAX_SEQUENCE_LENGTH_WORD, 
                         weights = [embedding_matrix_word], trainable = False)#mask_zero=True, 
    v1 = embedder(input1)
    v2 = embedder(input2)

    att1 = Lambda(lambda x: K.max(x,axis = 1))
    
    v11 = get_cnn_output(v1, params)
    v22 = get_cnn_output(v2, params)
    
    v1 = Concatenate(axis=1)([att1(v1),att1(v11)])
    v2 = Concatenate(axis=1)([att1(v2),att1(v22)])
    
    embedder = Embedding(2087, 300, input_length = MAX_SEQUENCE_LENGTH_CHAR, 
                         weights = [embedding_matrix_char], trainable = False)# , mask_zero=True
    att1c = Lambda(lambda x: K.max(x,axis = 1))
    v1c = embedder(input1c)
    v2c = embedder(input2c)
    v11c = get_cnn_output(v1c, params)
    v22c = get_cnn_output(v2c, params)
    v1c = Concatenate(axis=1)([att1c(v1c),att1(v11c)])
    v2c = Concatenate(axis=1)([att1c(v2c),att1(v22c)])
    
    mul = Multiply()([v1,v2])
    sub = Lambda(lambda x: K.abs(x))(Subtract()([v1,v2]))
    maximum = Maximum()([Multiply()([v1,v1]),Multiply()([v2,v2])])
    mulc = Multiply()([v1c,v2c])
    subc = Lambda(lambda x: K.abs(x))(Subtract()([v1c,v2c]))
    maximumc = Maximum()([Multiply()([v1c,v1c]),Multiply()([v2c,v2c])])
    matchlist = Concatenate(axis=1)([mul,sub,mulc,subc,maximum,maximumc])
    matchlist = Dropout(0.2)(matchlist)

    merged = Concatenate(axis=1)([Dense(32,activation = 'relu')(matchlist),
                                     Dense(48,activation = 'sigmoid')(matchlist)])
    
    merged = keras.layers.Dense(params['num_dense'], activation='relu')(merged)
    merged = keras.layers.Dropout(params['dense_dropout_rate'])(merged)
    merged = keras.layers.BatchNormalization()(merged)
    res = Dense(1, activation = 'sigmoid')(merged)


    model = keras.Model(inputs=[input1c, input2c, input1, input2], outputs=res)
    model.compile(optimizer= keras.optimizers.Adam(lr = 0.001), loss="binary_crossentropy", 
        metrics=['accuracy'])
    return model


In [3]:
train_data_word = pd.read_csv('../data/aux/train_word_indexvec.csv')
train_data_char = pd.read_csv('../data/aux/train_char_indexvec.csv')
word_squence_ques1_char = list(train_data_char.iloc[:, 1])
word_squence_ques1_char = [[int(im) for im in item.split(' ')] for item in word_squence_ques1_char]
word_squence_ques2_char = list(train_data_char.iloc[:, 2])
word_squence_ques2_char = [[int(im) for im in item.split(' ')] for item in word_squence_ques2_char]

word_squence_ques1_word = list(train_data_word.iloc[:, 1])
word_squence_ques1_word = [[int(im) for im in item.split(' ')] for item in word_squence_ques1_word]
word_squence_ques2_word = list(train_data_word.iloc[:, 2])
word_squence_ques2_word = [[int(im) for im in item.split(' ')] for item in word_squence_ques2_word]

MAX_SEQUENCE_LENGTH_WORD = 50 # char 40 word 30
MAX_SEQUENCE_LENGTH_CHAR = 60 # char 40 word 30
word_squence_ques1_char_len = [len(item) for item in word_squence_ques1_char]
word_squence_ques2_char_len = [len(item) for item in word_squence_ques2_char]
word_squence_ques1_word_len = [len(item) for item in word_squence_ques1_word]
word_squence_ques2_word_len = [len(item) for item in word_squence_ques2_word]
word_squence_ques1_char = pad_sequences(word_squence_ques1_char, maxlen=MAX_SEQUENCE_LENGTH_CHAR)
word_squence_ques2_char = pad_sequences(word_squence_ques2_char, maxlen=MAX_SEQUENCE_LENGTH_CHAR)
word_squence_ques1_word = pad_sequences(word_squence_ques1_word, maxlen=MAX_SEQUENCE_LENGTH_WORD)
word_squence_ques2_word = pad_sequences(word_squence_ques2_word, maxlen=MAX_SEQUENCE_LENGTH_WORD)



embedding_matrix_char = np.load('../data/aux/vec_char.npy')

embedding_matrix_word = np.load('../data/aux/vec_word.npy')

In [4]:
lda_feas_char = pd.read_csv('../lda_features_char.csv')
lda_feas_char = lda_feas_char.values
print(lda_feas_char.shape)
ngram_feas_char = pd.read_csv('../ngram_features_char.csv')
ngram_feas_char = ngram_feas_char.values
print(ngram_feas_char.shape)
simsummary_feas_char = pd.read_csv('../simsummary_features_char.csv')
simsummary_feas_char = simsummary_feas_char.values
print(simsummary_feas_char.shape)
lda_feas_word = pd.read_csv('../lda_features_word.csv')
lda_feas_word = lda_feas_word.values
print(lda_feas_word.shape)
ngram_feas_word = pd.read_csv('../ngram_features_word.csv')
ngram_feas_word = ngram_feas_word.values
print(ngram_feas_word.shape)
simsummary_feas_word = pd.read_csv('../simsummary_features_word.csv')
simsummary_feas_word = simsummary_feas_word.values
print(simsummary_feas_word.shape)
tfidf_feas = pd.read_csv('../tfidf_features.csv')
tfidf_feas = tfidf_feas.values
print(tfidf_feas.shape)
all_feas_char = np.concatenate([ngram_feas_char,lda_feas_char,simsummary_feas_char], axis=1)
all_feas_word = np.concatenate([ngram_feas_word, lda_feas_word, simsummary_feas_word], axis=1)
# , tfidf_feas
all_feas = np.concatenate([all_feas_word, tfidf_feas], axis=1)
print all_feas.shape

(98976, 2)
(98976, 16)
(98976, 5)
(98976, 2)
(98976, 16)
(98976, 5)
(98976, 4)
(98976, 27)


In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.cross_validation import StratifiedKFold
from keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau

class Metrics(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.val_f1s = []

    def on_epoch_end(self, epoch, logs={}):
        val_predict = (np.asarray(self.model.predict(self.validation_data[:-3])))#
        val_predict = [1 if item > 0.5 else 0 for item in val_predict]
        val_targ = self.validation_data[-3]
        _val_f1 = f1_score(val_targ, val_predict)
        self.val_f1s.append(_val_f1)
        print "epoch end, f1-score: ", _val_f1
metrics = Metrics()
class_weight = {0: 1.,
                1: 5.,}

y_label = train_data_char.iloc[:, 3]

def stacking_model(X_train, X_val, y_train, y_val):
    clf = LogisticRegression(class_weight={0: 1.,1: 5,})
    clf.fit(X_train, y_train)
    f1 = f1_score(y_val, clf.predict(X_val))
    print 'weights: ', f1
    return clf.predict_proba(X_val)[:, 1].reshape(-1,1)
            

NUM_FOLDS = 5
kfold = StratifiedKFold(
    y_label,
    n_folds=NUM_FOLDS,
    shuffle=True,
    random_state=30
)

stacking_res = []

for X in [ngram_feas_char,lda_feas_char,simsummary_feas_char, 
        ngram_feas_word, lda_feas_word, simsummary_feas_word,
        tfidf_feas]:
    train_oofp = np.zeros((X.shape[0], 1))
    for fold_num, (ix_train, ix_val) in enumerate(kfold):
        X_train = X[ix_train]
        X_val = X[ix_val]
        y_train = y_label[ix_train]
        y_val = y_label[ix_val]
        train_oofp[ix_val] = (stacking_model(X_train, X_val, y_train, y_val))
    stacking_res.append(train_oofp)



weights:  0.37521274949713757
weights:  0.3723700495049505
weights:  0.3766884200940963
weights:  0.37189891027127286
weights:  0.3794212218649518
weights:  0.3489984004874705
weights:  0.3496300816108611
weights:  0.3490580487057742
weights:  0.3436352509179927
weights:  0.35096774193548386
weights:  0.3974818571303663
weights:  0.40378051982147545
weights:  0.40455740578439964
weights:  0.4006353688669255
weights:  0.4038128249566724
weights:  0.3370007535795026
weights:  0.3399484926526284
weights:  0.33963396339633967
weights:  0.3385537654552267
weights:  0.33866705813270703
weights:  0.33136719781670926
weights:  0.332860184528034
weights:  0.33968502566779774
weights:  0.32959463039830433
weights:  0.33044695180617506
weights:  0.33928943892094837
weights:  0.3360961458765023
weights:  0.34210158108297184
weights:  0.3383160675797817
weights:  0.34126603113641035
weights:  0.3759075418580531
weights:  0.38115277673404974
weights:  0.3849384717412129
weights:  0.37946001618480096

In [6]:
X_train_q1 = word_squence_ques1_char
X_train_q2 = word_squence_ques2_char
X_train_q10 = word_squence_ques1_word
X_train_q20 = word_squence_ques2_word
y_train = y_label
def parse_train_data(ix_train, ix_val):
    
    X_fold_train_q1 = np.vstack([X_train_q1[ix_train], X_train_q2[ix_train]])
    X_fold_train_q2 = np.vstack([X_train_q2[ix_train], X_train_q1[ix_train]])
    X_fold_train_q10 = np.vstack([X_train_q10[ix_train], X_train_q20[ix_train]])
    X_fold_train_q20 = np.vstack([X_train_q20[ix_train], X_train_q10[ix_train]])
   
    X_fold_val_q1 = X_train_q1[ix_val]# np.vstack([X_train_q1[ix_val], X_train_q2[ix_val]])
    X_fold_val_q2 = X_train_q2[ix_val]# np.vstack([X_train_q2[ix_val], X_train_q1[ix_val]])
    X_fold_val_q10 = X_train_q10[ix_val]# np.vstack([X_train_q10[ix_val], X_train_q20[ix_val]])
    X_fold_val_q20 = X_train_q20[ix_val]# np.vstack([X_train_q20[ix_val], X_train_q10[ix_val]])
    
    # Ground truth should also be "mirrored".
    y_fold_train = np.concatenate([y_train[ix_train], y_train[ix_train]])
    y_fold_val = y_train[ix_val]# np.concatenate([y_train[ix_val], y_train[ix_val]])
    return [X_fold_train_q1, X_fold_train_q2, X_fold_train_q10, X_fold_train_q20], y_fold_train, [
        X_fold_val_q1, X_fold_val_q2, X_fold_val_q10, X_fold_val_q20], y_fold_val

model_params = {
    'dense_dropout_rate': 0.3,
    'lstm_dropout_rate': 0.3,
    'num_dense': 150,
    'num_lstm': 128,
    'num_filters':32
}
BATCH_SIZE = 256
MAX_EPOCHS = 100
for model_func in [create_model_dssm, create_model_cnn]:
    train_oofp = np.zeros((y_train.shape[0], 1))
    for fold_num, (ix_train, ix_val) in enumerate(kfold):
        train_x, train_y, val_x, val_y = parse_train_data(ix_train, ix_val)
        model = model_func(model_params)
        model.fit(train_x, train_y,
                    validation_data=(val_x, val_y),
                  batch_size=BATCH_SIZE,
                    epochs=MAX_EPOCHS,
                    verbose=1,
                    shuffle=True,
                    class_weight=class_weight,
                    callbacks=[
                        # Stop training when the validation loss stops improving.
                        EarlyStopping(
                            monitor='val_loss',
                            min_delta=0.001,
                            patience=20,
                            verbose=1,
                            mode='auto',
                        ),
                        ReduceLROnPlateau(
                            monitor='val_loss', 
                            factor=0.1, 
                            patience=5, 
                            min_delta=0.0001,
                            cooldown=1, 
                        ),
                        ModelCheckpoint(
                            os.path.join('../data/checkpoints/stack', str(fold_num) + 
                                         str(model_func.func_name) + 'weights.hdf5'),
                            monitor='val_loss',
                            save_best_only=True,
                            verbose=2,
                            mode='auto',
                        ),
                        metrics
                    ],
                )
        train_oofp[ix_val] = model.predict(val_x)
        
        K.clear_session()
        del train_x, val_x, train_y, val_y
        del model
        gc.collect()
    stacking_res.append(train_oofp)

Train on 158360 samples, validate on 19796 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.38865, saving model to ../data/checkpoints/stack/0create_model_cnnweights.hdf5
epoch end, f1-score:  0.5031277926720286
Epoch 2/100

Epoch 00002: val_loss did not improve from 0.38865
epoch end, f1-score:  0.5009154861713405
Epoch 3/100

Epoch 00003: val_loss did not improve from 0.38865
epoch end, f1-score:  0.49280418665503706
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.38865
epoch end, f1-score:  0.5039659508609016
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.38865
epoch end, f1-score:  0.5029393877964727
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.38865
epoch end, f1-score:  0.4927887855506133
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.38865
epoch end, f1-score:  0.49364227182820003
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.38865
epoch end, f1-score:  0.4893922018348623
Epoch 9/100

Epoch 00009: val_loss

epoch end, f1-score:  0.49458327314747935
Epoch 14/100

Epoch 00014: val_loss did not improve from 0.39651
epoch end, f1-score:  0.49134545454545453
Epoch 15/100

Epoch 00015: val_loss did not improve from 0.39651
epoch end, f1-score:  0.4908136482939633
Epoch 16/100

Epoch 00016: val_loss did not improve from 0.39651
epoch end, f1-score:  0.49263218722912455
Epoch 17/100

Epoch 00017: val_loss did not improve from 0.39651
epoch end, f1-score:  0.49252214316828813
Epoch 18/100

Epoch 00018: val_loss did not improve from 0.39651
epoch end, f1-score:  0.4899211218229623
Epoch 19/100

Epoch 00019: val_loss did not improve from 0.39651
epoch end, f1-score:  0.4895330112721417
Epoch 20/100

Epoch 00020: val_loss did not improve from 0.39651
epoch end, f1-score:  0.48967032967032964
Epoch 21/100

Epoch 00021: val_loss did not improve from 0.39651
epoch end, f1-score:  0.48929932571093526
Epoch 00021: early stopping
Train on 158362 samples, validate on 19795 samples
Epoch 1/100

Epoch 00001: 


Epoch 00004: val_loss did not improve from 0.44351
epoch end, f1-score:  0.4896947156361382
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.44351
epoch end, f1-score:  0.5254216721271691
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.44351
epoch end, f1-score:  0.5219479653102067
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.44351
epoch end, f1-score:  0.49594229035166815
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.44351
epoch end, f1-score:  0.5222709746044409
Epoch 9/100

Epoch 00009: val_loss did not improve from 0.44351
epoch end, f1-score:  0.5240713423628806
Epoch 10/100

Epoch 00010: val_loss did not improve from 0.44351
epoch end, f1-score:  0.5224522721161602
Epoch 11/100

Epoch 00011: val_loss did not improve from 0.44351
epoch end, f1-score:  0.48986434994665445
Epoch 12/100

Epoch 00012: val_loss did not improve from 0.44351
epoch end, f1-score:  0.5046323103647944
Epoch 13/100

Epoch 00013: val_loss did not improve from 0.443


Epoch 00017: val_loss did not improve from 0.42561
epoch end, f1-score:  0.4961149391584812
Epoch 18/100

Epoch 00018: val_loss did not improve from 0.42561
epoch end, f1-score:  0.494169741697417
Epoch 19/100

Epoch 00019: val_loss did not improve from 0.42561
epoch end, f1-score:  0.4954371504268473
Epoch 20/100

Epoch 00020: val_loss did not improve from 0.42561
epoch end, f1-score:  0.4953560371517028
Epoch 21/100

Epoch 00021: val_loss did not improve from 0.42561
epoch end, f1-score:  0.49655576725780454
Epoch 00021: early stopping
Train on 158360 samples, validate on 19796 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.68400, saving model to ../data/checkpoints/stack/0create_model_cnnweights.hdf5
epoch end, f1-score:  0.3621262458471761
Epoch 2/100

Epoch 00002: val_loss improved from 0.68400 to 0.62831, saving model to ../data/checkpoints/stack/0create_model_cnnweights.hdf5
epoch end, f1-score:  0.4216065115514265
Epoch 3/100

Epoch 00003: val_loss did not i


Epoch 00029: val_loss did not improve from 0.42848
epoch end, f1-score:  0.5083154506437769
Epoch 00029: early stopping
Train on 158362 samples, validate on 19795 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.68023, saving model to ../data/checkpoints/stack/1create_model_cnnweights.hdf5
epoch end, f1-score:  0.3730458221024259
Epoch 2/100

Epoch 00002: val_loss did not improve from 0.68023
epoch end, f1-score:  0.36982397317686505
Epoch 3/100

Epoch 00003: val_loss did not improve from 0.68023
epoch end, f1-score:  0.38968281631230395
Epoch 4/100

Epoch 00004: val_loss improved from 0.68023 to 0.46233, saving model to ../data/checkpoints/stack/1create_model_cnnweights.hdf5
epoch end, f1-score:  0.5036004114755972
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.46233
epoch end, f1-score:  0.49432813796919667
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.46233
epoch end, f1-score:  0.4992205410362219
Epoch 7/100

Epoch 00007: val_loss improved f


Epoch 00005: val_loss did not improve from 0.46021
epoch end, f1-score:  0.4796000645057249
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.46021
epoch end, f1-score:  0.4480983700314556
Epoch 7/100

Epoch 00007: val_loss improved from 0.46021 to 0.39718, saving model to ../data/checkpoints/stack/2create_model_cnnweights.hdf5
epoch end, f1-score:  0.5093877551020408
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.39718
epoch end, f1-score:  0.5224036123654047
Epoch 9/100

Epoch 00009: val_loss did not improve from 0.39718
epoch end, f1-score:  0.5071689626089401
Epoch 10/100

Epoch 00010: val_loss did not improve from 0.39718
epoch end, f1-score:  0.5240209931368591
Epoch 11/100

Epoch 00011: val_loss did not improve from 0.39718
epoch end, f1-score:  0.520174482006543
Epoch 12/100

Epoch 00012: val_loss did not improve from 0.39718
epoch end, f1-score:  0.524634726469589
Epoch 13/100

Epoch 00013: val_loss did not improve from 0.39718
epoch end, f1-score:  0.53490


Epoch 00011: val_loss did not improve from 0.45525
epoch end, f1-score:  0.5354887057060271
Epoch 12/100

Epoch 00012: val_loss did not improve from 0.45525
epoch end, f1-score:  0.5336174746936602
Epoch 13/100

Epoch 00013: val_loss did not improve from 0.45525
epoch end, f1-score:  0.532418818990462
Epoch 14/100

Epoch 00014: val_loss did not improve from 0.45525
epoch end, f1-score:  0.533811820118874
Epoch 15/100

Epoch 00015: val_loss did not improve from 0.45525
epoch end, f1-score:  0.5333181455746668
Epoch 16/100

Epoch 00016: val_loss did not improve from 0.45525
epoch end, f1-score:  0.5333979876348648
Epoch 17/100

Epoch 00017: val_loss did not improve from 0.45525
epoch end, f1-score:  0.5322115384615385
Epoch 18/100

Epoch 00018: val_loss did not improve from 0.45525
epoch end, f1-score:  0.5313998082454459
Epoch 19/100

Epoch 00019: val_loss did not improve from 0.45525
epoch end, f1-score:  0.5279766962009953
Epoch 20/100

Epoch 00020: val_loss did not improve from 0.45


Epoch 00019: val_loss did not improve from 0.41848
epoch end, f1-score:  0.5164367667912854
Epoch 20/100

Epoch 00020: val_loss did not improve from 0.41848
epoch end, f1-score:  0.5159841479524439
Epoch 21/100

Epoch 00021: val_loss did not improve from 0.41848
epoch end, f1-score:  0.5166537567776918
Epoch 22/100

Epoch 00022: val_loss did not improve from 0.41848
epoch end, f1-score:  0.5165354330708661
Epoch 23/100

Epoch 00023: val_loss did not improve from 0.41848
epoch end, f1-score:  0.5141663402532968
Epoch 24/100

Epoch 00024: val_loss did not improve from 0.41848
epoch end, f1-score:  0.5132743362831859
Epoch 25/100

Epoch 00025: val_loss did not improve from 0.41848
epoch end, f1-score:  0.5165615141955836
Epoch 26/100

Epoch 00026: val_loss did not improve from 0.41848
epoch end, f1-score:  0.5162478621234048
Epoch 27/100

Epoch 00027: val_loss did not improve from 0.41848
epoch end, f1-score:  0.5178925128746863
Epoch 28/100

Epoch 00028: val_loss did not improve from 0.

In [8]:
x_train_stack = np.concatenate(stacking_res, axis=1)

In [7]:
len(stacking_res)

9

In [9]:
# 设置boosting迭代计算次数  
Xtr, Xv, ytr, yv = train_test_split(x_train_stack, y_train, test_size=0.2, random_state=2000)
dtrain = xgb.DMatrix(Xtr, label=ytr)
dvalid = xgb.DMatrix(Xv, label=yv)
num_round = 2  
param = {'max_depth':2, 'eta':1, 'silent':0, 'objective':'binary:logistic' }  


THRESHOLD = 0.5

#自定义马修斯相关系数
def evalacc(preds, dtrain):
    labels = dtrain.get_label()
    _val_f1 = f1_score(labels, preds > THRESHOLD)
    return 'F1', _val_f1

xgb_params = {
    'seed': 0,
    'colsample_bytree': 0.5,
    'silent': 1,
    'subsample': 0.5,
    'learning_rate': 0.001,
    'objective': 'binary:logistic',
    'max_depth': 2,
    'min_child_weight': 1,
     'scale_pos_weight':5,
}
watchlist = [(dtrain, 'train'), (dvalid, 'valid')]
model_xgb = xgb.train(xgb_params, dtrain, 500, watchlist, early_stopping_rounds=200,
                  maximize=True, verbose_eval=15,  feval=evalacc)

[0]	train-error:0.314372	valid-error:0.317741	train-F1:0.406627	valid-F1:0.40638
Multiple eval metrics have been passed: 'valid-F1' will be used for early stopping.

Will train until valid-F1 hasn't improved in 200 rounds.
[15]	train-error:0.255633	valid-error:0.255961	train-F1:0.527775	valid-F1:0.53518
[30]	train-error:0.253827	valid-error:0.254395	train-F1:0.529937	valid-F1:0.536365
[45]	train-error:0.254471	valid-error:0.254799	train-F1:0.530074	valid-F1:0.536823
[60]	train-error:0.254092	valid-error:0.254445	train-F1:0.530687	valid-F1:0.538102
[75]	train-error:0.255418	valid-error:0.256365	train-F1:0.528512	valid-F1:0.535639
[90]	train-error:0.255481	valid-error:0.256264	train-F1:0.529306	valid-F1:0.536924
[105]	train-error:0.255759	valid-error:0.255405	train-F1:0.528092	valid-F1:0.538012
[120]	train-error:0.255948	valid-error:0.2549	train-F1:0.527929	valid-F1:0.538166
[135]	train-error:0.256151	valid-error:0.255961	train-F1:0.527974	valid-F1:0.536964
[150]	train-error:0.25581	vali

In [None]:

xgb_params = {
    'seed': 0,
    'colsample_bytree': 0.5,
    'silent': 1,
    'subsample': 0.5,
    'learning_rate': 0.001,
    'objective': 'binary:logistic',
    'max_depth': 2,
    'min_child_weight': 1,
}

model = XGBClassifier(**xgb_params)

model.fit(Xtr, ytr)

In [None]:
dtest = xgb.DMatrix(Xv)
predicted_test_xgb = model_xgb.predict(dtest)

In [None]:
model_xgb.save_model('../xgb.model')

'create_model_dssm'