In [1]:
data_dir = './data/mlboot_dataset/'
model_name = 'nn_3br_fm_style_bin'
results_dir = './results/'

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
import scipy.sparse as sp

In [2]:
df = pd.read_csv(data_dir + 'preprocessed_new.csv') 
q = pd.read_csv(data_dir + 'sessions.csv')
df = df.merge(q, on='uid', how='left')
del q
y = pd.read_table(data_dir + 'mlboot_train_answers.tsv')
y.columns = ['uid','target']
df = df.merge(y, on='uid', how='left')

df_train_index = df[~df.target.isnull()].index
df_test_index = df[df.target.isnull()].index

mat1 = sp.load_npz(data_dir+'dmat1.npz').tolil()
mat2 = sp.load_npz(data_dir+'dmat2.npz').tolil()
mat3 = sp.load_npz(data_dir+'dmat3.npz').tolil()
print(mat1.shape, mat2.shape, mat3.shape)

train_mat1 = mat1[df_train_index.tolist()]
test_mat1 = mat1[df_test_index.tolist()]
train_mat2 = mat2[df_train_index.tolist()]
test_mat2 = mat2[df_test_index.tolist()]
train_mat3 = mat3[df_train_index.tolist()]
test_mat3 = mat3[df_test_index.tolist()]

df['max_f1'] = mat1.tocsr().max(axis=1).todense()
df['max_f2'] = mat2.tocsr().max(axis=1).todense()
df['max_f3'] = mat3.tocsr().max(axis=1).todense()

limit = 9
mat1 = mat1.tocsc()[:, np.where((train_mat1.getnnz(axis=0) > limit) & (test_mat1.getnnz(axis=0) > limit))[0]].tocsr().astype(np.bool).astype(np.int8)
mat2 = mat2.tocsc()[:, np.where((train_mat2.getnnz(axis=0) > limit) & (test_mat2.getnnz(axis=0) > limit))[0]].tocsr().astype(np.bool).astype(np.int8)
mat3 = mat3.tocsc()[:, np.where((train_mat3.getnnz(axis=0) > limit) & (test_mat3.getnnz(axis=0) > limit))[0]].tocsr().astype(np.bool).astype(np.int8)
print(mat1.shape, mat2.shape, mat3.shape)

(609018, 2053602) (609018, 2812610) (609018, 1057788)
(609018, 101794) (609018, 20261) (609018, 9386)


In [3]:
from sklearn.preprocessing import MaxAbsScaler 

X = df.loc[~df.target.isnull(),:].reset_index(drop=True)
x_te = df.loc[df.target.isnull(),:].reset_index(drop=True)

mat_pca = np.load(data_dir + 'pca_cat100.npy')

scaler_mat = MaxAbsScaler()
mat_pca = scaler_mat.fit_transform(mat_pca)
train_mat_pcat = mat_pca[df_train_index.tolist()]
test_mat_pcat = mat_pca[df_test_index.tolist()]

In [4]:
%%time
scaler_mat = MaxAbsScaler()
mat1 = scaler_mat.fit_transform(mat1)
scaler_mat = MaxAbsScaler()
mat2 = scaler_mat.fit_transform(mat2)
scaler_mat = MaxAbsScaler()
mat3 = scaler_mat.fit_transform(mat3)

train_mat1 = mat1[df_train_index.tolist()]
test_mat1 = mat1[df_test_index.tolist()]
train_mat2 = mat2[df_train_index.tolist()]
test_mat2 = mat2[df_test_index.tolist()]
train_mat3 = mat3[df_train_index.tolist()]
test_mat3 = mat3[df_test_index.tolist()]
import gc
del mat1,mat2,mat3
gc.collect()

CPU times: user 54.8 s, sys: 6.05 s, total: 1min
Wall time: 52.3 s


In [5]:
import multiprocessing
cpu_cores = multiprocessing.cpu_count()

import tensorflow as tf
import keras
from keras.layers import *
from keras.models import *
from keras.callbacks import *
from keras import regularizers
from keras import optimizers
from keras import backend as K
from keras.utils import Sequence
cfg = K.tf.ConfigProto()
cfg.gpu_options.allow_growth = True
K.set_session(K.tf.Session(config=cfg))

Using TensorFlow backend.


# Data preprocessing

In [6]:
train_cols = ['num_times_cat_eq_0', 'num_times_cat_eq_1', 'num_times_cat_eq_2',
       'num_times_cat_eq_3', 'num_times_cat_eq_4', 'num_times_cat_eq_5',
       'records', 'max_days', 'min_days', 'sum_values_f1_max',
       'num_keys_f1_max', 'sum_values_f2_max', 'num_keys_f2_max',
       'sum_values_f3_max', 'num_keys_f3_max', 'sum_values_f1_mean',
       'num_keys_f1_mean', 'sum_values_f2_mean', 'num_keys_f2_mean',
       'sum_values_f3_mean', 'num_keys_f3_mean', 'max_day_cntr',
       'mean_day_cntr', 'diff_num_cats', 'unique_days',
        'sess_keys_mean', 'sess_keys_max', 'diff_key1_mean',
       'diff_key1_max', 'diff_key2_mean', 'diff_key2_max', 'diff_key3_mean',
       'diff_key3_max', 'quot_key1_mean', 'quot_key1_max', 'quot_key2_mean',
       'quot_key2_max', 'quot_key3_mean', 'quot_key3_max'
    ]

In [7]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1,1))
scaler.fit(X[train_cols].fillna(0).values)
X[train_cols] = scaler.transform(X[train_cols].fillna(0).values)
x_te[train_cols] = scaler.transform(x_te[train_cols].fillna(0).values)

In [8]:
import math
from sklearn.metrics import log_loss
class CyclicLR(Callback):
    """This callback implements a cyclical learning rate policy (CLR).
    The method cycles the learning rate between two boundaries with
    some constant frequency, as detailed in this paper (https://arxiv.org/abs/1506.01186).
    The amplitude of the cycle can be scaled on a per-iteration or 
    per-cycle basis.
    This class has three built-in policies, as put forth in the paper.
    "triangular":
        A basic triangular cycle w/ no amplitude scaling.
    "triangular2":
        A basic triangular cycle that scales initial amplitude by half each cycle.
    "exp_range":
        A cycle that scales initial amplitude by gamma**(cycle iterations) at each 
        cycle iteration.
    For more detail, please see paper.
    
    # Example
        ```python
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., mode='triangular')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```
    
    Class also supports custom scaling functions:
        ```python
            clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., scale_fn=clr_fn,
                                scale_mode='cycle')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```    
    # Arguments
        base_lr: initial learning rate which is the
            lower boundary in the cycle.
        max_lr: upper boundary in the cycle. Functionally,
            it defines the cycle amplitude (max_lr - base_lr).
            The lr at any cycle is the sum of base_lr
            and some scaling of the amplitude; therefore 
            max_lr may not actually be reached depending on
            scaling function.
        step_size: number of training iterations per
            half cycle. Authors suggest setting step_size
            2-8 x training iterations in epoch.
        mode: one of {triangular, triangular2, exp_range}.
            Default 'triangular'.
            Values correspond to policies detailed above.
            If scale_fn is not None, this argument is ignored.
        gamma: constant in 'exp_range' scaling function:
            gamma**(cycle iterations)
        scale_fn: Custom scaling policy defined by a single
            argument lambda function, where 
            0 <= scale_fn(x) <= 1 for all x >= 0.
            mode paramater is ignored 
        scale_mode: {'cycle', 'iterations'}.
            Defines whether scale_fn is evaluated on 
            cycle number or cycle iterations (training
            iterations since start of cycle). Default is 'cycle'.
    """

    def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
                 gamma=1., scale_fn=None, scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == 'triangular':
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = lambda x: 1/(2.**(x-1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = lambda x: gamma**(x)
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None,
               new_step_size=None):
        """Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.
        
    def clr(self):
        cycle = np.floor(1+self.clr_iterations/(2*self.step_size))
        x = np.abs(self.clr_iterations/self.step_size - 2*cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(self.clr_iterations)
        
    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())        
            
    def on_batch_end(self, epoch, logs=None):
        
        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1
        K.set_value(self.model.optimizer.lr, self.clr())

        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)

            
class RocAucEvaluation(Callback):
    def __init__(self, X_seq, y, name, interval=1):
        super(Callback, self).__init__()
        self.X_seq, self.y = X_seq, y
        self.name = name
        self.interval = interval

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict_generator(self.X_seq, steps=len(self.X_seq), 
                                                          use_multiprocessing=False, workers=1, 
                                                          max_queue_size=4*cpu_cores).ravel()
            auc = roc_auc_score(self.y, y_pred)
            logloss = log_loss(self.y, y_pred)
            logs[self.name+"_auc"] = auc
            logs[self.name+"_logloss"] = logloss
            print((self.name+"_auc: {:.8f}; "+"_logloss: {:.8f}; ").format(auc,logloss))
            
class FeatureSequence(Sequence):
    
    def __init__(self, X, y, inx, batch_size, shuffle=False):
        
        self.X, self.y = X, y
        self.batch_size = batch_size
        
        self.inx = inx
        self.shuffle = shuffle
        if self.shuffle:
            np.random.shuffle(self.inx)

    def __len__(self):
        return math.ceil(self.inx.shape[0] / self.batch_size)

    def __getitem__(self, i):
        batch_inx = self.inx[i*self.batch_size:(i+1)*self.batch_size]
        
        batch = [x[batch_inx] for x in self.X[:2]] +  [x[batch_inx].todense() for x in self.X[-3:]]
        #batch = [x[batch_inx].todense() for x in self.X]
        return batch, self.y[batch_inx]
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.inx)            

In [9]:
model_name = "all_in_focal_loss"

def split_inputs(X):
    return np.split(X, X.shape[-1], axis=-1)

def buildBaseModel(sparse_len1, sparse_len2, sparse_len3):   
    num_inp = Input((len(train_cols),), name='num_inp')
    num_x = BatchNormalization()(num_inp)
    num_x = Dense(64, activation="relu")(num_x)
    num_x = BatchNormalization()(num_x)
    num_x = Dropout(0.3)(num_x)
    
    dense_inp = Input((train_mat_pcat.shape[1],), name='dense_inp')
    dense_x = BatchNormalization()(dense_inp)
    dense_x = Dropout(0.3)(dense_x)    
    
    sparse_inp1 = Input((sparse_len1,), name='sparse_inp1')
    sparse1_x = sparse_inp1
    sparse1_x = BatchNormalization()(sparse_inp1)
    sparse1_x = Dense(64, activation="relu")(sparse1_x)
    sparse1_x = BatchNormalization()(sparse1_x)
    sparse1_x = Dropout(0.3)(sparse1_x)
    
    sparse_inp2 = Input((sparse_len2,), name='sparse_inp2')
    sparse2_x = sparse_inp2
    sparse2_x = Dense(64, activation="relu")(sparse2_x)
    sparse2_x = BatchNormalization()(sparse2_x)
    sparse2_x = Dropout(0.3)(sparse2_x)
    
    sparse_inp3 = Input((sparse_len3,), name='sparse_inp3')
    sparse3_x = sparse_inp3
    sparse3_x = BatchNormalization()(sparse_inp3)
    sparse3_x = Dense(64, activation="relu")(sparse3_x)
    sparse3_x = BatchNormalization()(sparse3_x)
    sparse3_x = Dropout(0.3)(sparse3_x)    
    
    x = concatenate([num_x, dense_x, sparse1_x, sparse3_x, sparse2_x, 
                     multiply([sparse1_x, sparse2_x]),
                     multiply([sparse1_x, sparse3_x]),
                     multiply([sparse2_x, sparse3_x]),
                     multiply([sparse1_x, sparse2_x, sparse3_x]),
                              ])
    
    x1 = Dense(1024, activation="relu")(x)
    x1 = BatchNormalization()(x1)
    x1 = Dropout(0.5)(x1)
    
    x2 = concatenate([x, x1])
    x2 = Dense(512, activation="relu")(x2)
    x2 = BatchNormalization()(x2)
    x2 = Dropout(0.5)(x2)
    
    x3 = concatenate([x, x1, x2])
    x3 = Dense(256, activation="relu")(x3)
    x3 = BatchNormalization()(x3)
    x3 = Dropout(0.5)(x3)
    
    x4 = x3
    x4 = Dense(128, activation="relu")(x4)
    x4 = BatchNormalization()(x4)
    x4 = Dropout(0.15)(x4)
    
    x_output = Dense(1, activation="sigmoid", name="output")(x4)
    return Model(inputs = [num_inp, dense_inp, sparse_inp1, sparse_inp2, sparse_inp3], outputs=x_output) #num_inp, ,  sparse_inp3

model = buildBaseModel(500, 500, 500)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sparse_inp1 (InputLayer)        (None, 500)          0                                            
__________________________________________________________________________________________________
sparse_inp3 (InputLayer)        (None, 500)          0                                            
__________________________________________________________________________________________________
num_inp (InputLayer)            (None, 39)           0                                            
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 500)          2000        sparse_inp1[0][0]                
__________________________________________________________________________________________________
batch_norm

In [10]:
y = X.target.values

In [13]:
from sklearn.model_selection import KFold
TRN_BATCH_SIZE = 512
INF_BATCH_SIZE = 512

n_folds = 5
kf = KFold(n_splits=n_folds, shuffle=True, random_state=6239)
epochs = 32
pred = np.zeros(y.shape)
test_pred = 0
ifold = 0

fold_auc = []

In [None]:
cpu_cores = 4
from sklearn.feature_selection import SelectPercentile
from sklearn.preprocessing import MaxAbsScaler 
from sklearn.preprocessing import minmax_scale

def focal_loss(y_true, y_pred, alpha, gamma=0.5):
    alpha = K.variable(alpha)
    pt = K.abs(1. - y_true - y_pred)
    pt = K.clip(pt, K.epsilon(), 1. - K.epsilon())
    return K.mean(-alpha * K.pow(1. - pt, gamma) * K.log(pt), axis=-1)


for trn_inx, val_inx in kf.split(y):
    print("Training fold {}".format(ifold))
    K.clear_session()

    
    model_file_name = model_name+"__f"+str(ifold)
    model_file = results_dir+model_file_name+'.h5'   
    
    sp_train_mat1 = train_mat1[trn_inx]
    sp_val_mat1 = train_mat1[val_inx]
    sp_test_mat1 = test_mat1
    
    sp_train_mat2 = train_mat2[trn_inx]
    sp_val_mat2 = train_mat2[val_inx]
    sp_test_mat2 = test_mat2  
    
    sp_train_mat3 = train_mat3[trn_inx]
    sp_val_mat3 = train_mat3[val_inx]
    sp_test_mat3 = test_mat3      
    
    yy = X.target.values[trn_inx]
    ssp = SelectPercentile(percentile=0.3)  
    ssp.fit(sp_train_mat1, yy)   
    sp_train_mat1 = ssp.transform(sp_train_mat1)
    sp_val_mat1 = ssp.transform(sp_val_mat1)
    sp_test_mat1 = ssp.transform(sp_test_mat1) 
    del ssp
    
    scaler = MaxAbsScaler()
    scaler.fit(sp_train_mat1)
    sp_train_mat1 = scaler.transform(sp_train_mat1)
    sp_val_mat1 = scaler.transform(sp_val_mat1)
    sp_test_mat1 = scaler.transform(sp_test_mat1)
    del scaler
    
    ssp = SelectPercentile(percentile=2)  
    ssp.fit(sp_train_mat2, yy)   
    sp_train_mat2 = ssp.transform(sp_train_mat2)
    sp_val_mat2 = ssp.transform(sp_val_mat2)
    sp_test_mat2 = ssp.transform(sp_test_mat2) 
    del ssp
    
    scaler = MaxAbsScaler()
    scaler.fit(sp_train_mat2)
    sp_train_mat2 = scaler.transform(sp_train_mat2)
    sp_val_mat2 = scaler.transform(sp_val_mat2)
    sp_test_mat2 = scaler.transform(sp_test_mat2)
    del scaler    
    
    ssp = SelectPercentile(percentile=3)  
    ssp.fit(sp_train_mat3, yy)   
    sp_train_mat3 = ssp.transform(sp_train_mat3)
    sp_val_mat3 = ssp.transform(sp_val_mat3)
    sp_test_mat3 = ssp.transform(sp_test_mat3) 
    del ssp    
    
    scaler = MaxAbsScaler()
    scaler.fit(sp_train_mat3)
    sp_train_mat3 = scaler.transform(sp_train_mat3)
    sp_val_mat3 = scaler.transform(sp_val_mat3)
    sp_test_mat3 = scaler.transform(sp_test_mat3)
    del scaler
    
    print(sp_train_mat1.shape[1],sp_train_mat2.shape[1],sp_train_mat3.shape[1])    
    
    model = buildBaseModel(sp_train_mat1.shape[1],sp_train_mat2.shape[1],sp_train_mat3.shape[1])    
    
    trn_seq = FeatureSequence([X.loc[trn_inx,train_cols].values, 
                               train_mat_pcat[trn_inx],
                               sp_train_mat1,
                               sp_train_mat2,
                               sp_train_mat3
                              ], 
                              y[trn_inx], np.array(list(range(len(trn_inx)))), TRN_BATCH_SIZE, shuffle=True)
    val_seq = FeatureSequence([X.loc[val_inx,train_cols].values, 
                               train_mat_pcat[val_inx],
                               sp_val_mat1,
                               sp_val_mat2,
                               sp_val_mat3,
                              ], 
                              y[val_inx], np.array(list(range(len(val_inx)))), INF_BATCH_SIZE, shuffle=False)
    te_seq = FeatureSequence([x_te[train_cols].values, 
                               test_mat_pcat,
                               sp_test_mat1,
                               sp_test_mat2,
                               sp_test_mat3
                             ], 
                              y, np.array(list(range(len(x_te)))), INF_BATCH_SIZE, shuffle=False)
    
    
    # Callbacks
    model_checkpoint = ModelCheckpoint(model_file, monitor='val_auc', verbose=1, mode='max',
                                       save_best_only=True, save_weights_only=False, period=1)
    clr = CyclicLR(base_lr=0.0001, max_lr=0.001, step_size=2*math.ceil(len(trn_seq)), mode='triangular2')
    early_stop = EarlyStopping(monitor='val_auc', min_delta=0, patience=3, verbose=1, mode='max')
    mse_eval = RocAucEvaluation(val_seq, y[val_inx], 'val')
    
    # Training
    opt=optimizers.Nadam()
    model.compile(optimizer=opt, loss='binary_crossentropy')

    model.fit_generator(
        generator=trn_seq, steps_per_epoch=len(trn_seq),
        initial_epoch=0, epochs=epochs, shuffle=False, verbose=1,
        callbacks=[mse_eval, model_checkpoint, early_stop, clr], #
        use_multiprocessing=False, workers=1, max_queue_size=4*cpu_cores)
    
     
    # Predicting
    print("\nPredicting fold {}".format(ifold))
    del model  
    model = load_model(model_file, compile=True, custom_objects={'<lambda>':lambda y_true, y_pred: focal_loss(y_true, y_pred, 1.6, 2)})
    pred[val_inx] = model.predict_generator(val_seq, steps=len(val_seq), 
                                                    use_multiprocessing=False, workers=1, 
                                                    max_queue_size=4*cpu_cores).ravel()
    
    auc = roc_auc_score(y[val_inx], pred[val_inx])
    logloss = log_loss(y[val_inx], pred[val_inx])
    fold_auc.append(auc)
    print("fold: {}, auc: {}".format(ifold, auc))
    print("fold: {}, logloss: {}".format(ifold, logloss))
    print()
    
    test_pred += minmax_scale(model.predict_generator(te_seq, steps=len(te_seq), 
                                                    use_multiprocessing=False, workers=1, 
                                                    max_queue_size=4*cpu_cores).ravel())/n_folds
    ifold += 1
    
    del sp_train_mat1,sp_val_mat1,sp_test_mat1
    del sp_train_mat2,sp_val_mat2,sp_test_mat2
    del sp_train_mat3,sp_val_mat3,sp_test_mat3    
    gc.collect()

Training fold 0
306 406 281
Epoch 1/32
val_auc: 0.60120503; _logloss: 0.20380703; 

Epoch 00001: val_auc improved from -inf to 0.60121, saving model to ./results/all_in_focal_loss__f0.h5
Epoch 2/32
val_auc: 0.64159607; _logloss: 0.19348564; 

Epoch 00002: val_auc improved from 0.60121 to 0.64160, saving model to ./results/all_in_focal_loss__f0.h5
Epoch 3/32


  loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)


val_auc: 0.65891602; _logloss: nan; 

Epoch 00003: val_auc improved from 0.64160 to 0.65892, saving model to ./results/all_in_focal_loss__f0.h5
Epoch 4/32
val_auc: 0.66420710; _logloss: 0.18957561; 

Epoch 00004: val_auc improved from 0.65892 to 0.66421, saving model to ./results/all_in_focal_loss__f0.h5
Epoch 5/32
val_auc: 0.66640176; _logloss: nan; 

Epoch 00005: val_auc improved from 0.66421 to 0.66640, saving model to ./results/all_in_focal_loss__f0.h5
Epoch 6/32
val_auc: 0.66685560; _logloss: 0.18971975; 

Epoch 00006: val_auc improved from 0.66640 to 0.66686, saving model to ./results/all_in_focal_loss__f0.h5
Epoch 7/32
val_auc: 0.66926958; _logloss: 0.18928524; 

Epoch 00007: val_auc improved from 0.66686 to 0.66927, saving model to ./results/all_in_focal_loss__f0.h5
Epoch 8/32
val_auc: 0.66976656; _logloss: 0.18948176; 

Epoch 00008: val_auc improved from 0.66927 to 0.66977, saving model to ./results/all_in_focal_loss__f0.h5
Epoch 9/32
val_auc: 0.66961460; _logloss: 0.18945022;

val_auc: 0.66689005; _logloss: 0.19010152; 

Epoch 00007: val_auc improved from 0.66471 to 0.66689, saving model to ./results/all_in_focal_loss__f3.h5
Epoch 8/32
val_auc: 0.66621930; _logloss: 0.19056328; 

Epoch 00008: val_auc did not improve
Epoch 9/32
val_auc: 0.66680594; _logloss: 0.19045245; 

Epoch 00009: val_auc did not improve
Epoch 10/32
val_auc: 0.66596701; _logloss: 0.19054705; 

Epoch 00010: val_auc did not improve
Epoch 00010: early stopping

Predicting fold 3
fold: 3, auc: 0.666890047997716
fold: 3, logloss: 0.1901015202828917

Training fold 4
306 406 281
Epoch 1/32
val_auc: 0.58366240; _logloss: 0.20648250; 

Epoch 00001: val_auc improved from -inf to 0.58366, saving model to ./results/all_in_focal_loss__f4.h5
Epoch 2/32
val_auc: 0.64111826; _logloss: inf; 

Epoch 00002: val_auc improved from 0.58366 to 0.64112, saving model to ./results/all_in_focal_loss__f4.h5
Epoch 3/32
val_auc: 0.65253208; _logloss: 0.19329167; 

Epoch 00003: val_auc improved from 0.64112 to 0.65253,

In [None]:
model_name = 'nn_3br_fm_style_bin'
print(fold_auc)
print(np.mean(fold_auc), np.std(fold_auc))
roc_auc_score(X.target.values, pred)

In [None]:
np.save(results_dir + 'train_' + model_name +'.npy', pred)
sample_sub = pd.read_table(data_dir+'mlboot_test.tsv')

In [None]:
sub = x_te[['uid','target']].copy()
sub['target'] = test_pred
sub.columns = ['cuid','target']
sample_sub = sample_sub.merge(sub, on='cuid', how='left')
np.save(results_dir + 'test_' + model_name +'.npy', sample_sub.target.values)
print('isnull?',sample_sub.target.isnull().any())
sample_sub.head()

In [None]:
print(fold_auc)
print(np.mean(fold_auc), np.std(fold_auc))

In [None]:
sample_sub[['target']].to_csv(results_dir + model_name + '.csv', header=False, index=False)

In [None]:
sample_sub.head()