In [1]:
import pickle
import glob
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
import xgboost as xgb

# load feats
train_x,test_x = [],[]
for feat in sorted(glob.glob('../features/*.pkl')):
#     if 'tfidf' in feat or 'lr' in feat or 'mnb' in feat:
#         continue
    print('file path',feat)
    a,b = pickle.load(open(feat,'rb'))
    print(a.shape,b.shape)
    train_x.append(a)
    test_x.append(b)
train_x = np.nan_to_num(np.hstack(train_x))
test_x = np.nan_to_num(np.hstack(test_x))
print(train_x.shape)
    
# load y
train = pd.read_csv("../input/train.csv")
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
train_y = train[list_classes].values.astype('int')
print(train_x.shape)




file path ../features/cnn_glove_1_feat.pkl
(95851, 6) (226998, 6)
file path ../features/cnn_glove_2_feat.pkl
(95851, 6) (226998, 6)
file path ../features/cnn_gru_glove_1_feat.pkl
(95851, 6) (226998, 6)
file path ../features/cnn_gru_glove_1_trainable_feat.pkl
(95851, 6) (226998, 6)
file path ../features/cnn_gru_muse_1_feat.pkl
(95851, 6) (226998, 6)
file path ../features/cnn_muse_1_feat.pkl
(95851, 6) (226998, 6)
file path ../features/cnn_muse_adj_1_feat.pkl
(95851, 6) (226998, 6)
file path ../features/cnn_muse_adj_1_feat_de_fr.pkl
(95851, 6) (226998, 6)
file path ../features/cnn_muse_adj_2_feat.pkl
(95851, 6) (226998, 6)
file path ../features/gru_glove_1_feat.pkl
(95851, 6) (226998, 6)
file path ../features/gru_muse_1_feat.pkl
(95851, 6) (226998, 6)
file path ../features/lr_feat1.pkl
(95851, 6) (226998, 6)
file path ../features/lr_feat2.pkl
(95851, 6) (226998, 6)
file path ../features/lstm_1_feat.pkl
(95851, 6) (226998, 6)
file path ../features/lstm_2_feat.pkl
(95851, 6) (226998, 6)
fi

In [31]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

input_len = train_x.shape[1]
num_classes = 6

def get_nn_model():
    model = Sequential()
    model.add(Dense(256, activation='relu', input_shape=(input_len,)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='sigmoid'))
    return model

tmp_m = get_nn_model()
tmp_m.summary()
    

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_55 (Dense)             (None, 256)               61184     
_________________________________________________________________
dense_56 (Dense)             (None, 128)               32896     
_________________________________________________________________
dense_57 (Dense)             (None, 6)                 774       
Total params: 94,854
Trainable params: 94,854
Non-trainable params: 0
_________________________________________________________________


In [36]:
from sklearn.model_selection import KFold
from keras.callbacks import ModelCheckpoint,LearningRateScheduler,ReduceLROnPlateau,EarlyStopping
from keras.models import load_model
from keras.optimizers import Adam

def lr_s(e):
    if e < 5:
        return 0.001
    elif e < 10:
        return 0.0001
    else:
        return 0.00001
    

def nn_eval(y_true,y_pred):
    res = []
    for i in range(6):
        a = y_true[:,i]
        b = y_pred[:,i]
        res.append(log_loss(a,b))
    return res

def simple_ens(model_name,k=3,rnd=233):
    kf = KFold(n_splits=k, shuffle=True, random_state=rnd)
    test_pred = np.zeros((226998,6))
    all_train_loss_l,all_val_loss_l = 0,0
    
    for train_index, test_index in kf.split(train_x):
        # x,y
        curr_x,curr_y = train_x[train_index],train_y[train_index]
        hold_out_x,hold_out_y = train_x[test_index],train_y[test_index]
        val_loss_l,train_loss_l = 0,0

        model = get_nn_model()
        model.compile(loss='binary_crossentropy',
              optimizer=Adam(0.001),
              metrics=['accuracy'])
        
        file_path = 'nn_ens.h5'
        chk = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',
                                                patience=3,
                                                verbose=1,
                                                factor=0.5,
                                                min_lr=0.000001)
        
        early = EarlyStopping(monitor='val_loss', patience=10)
        
        model.fit(curr_x,curr_y,
                  batch_size=128,
                  epochs=100,
                  validation_data=(hold_out_x,hold_out_y),
                  callbacks=[chk,learning_rate_reduction,early]
                 )
        model = load_model(file_path)
        
        curr_tr_pred = model.predict(curr_x)
        curr_val_pred = model.predict(hold_out_x)
        curr_test_pred = model.predict(test_x)
        test_pred += curr_test_pred
        
        tr_res = nn_eval(curr_y,curr_tr_pred)
        val_res = nn_eval(hold_out_y,curr_val_pred)
        train_loss_l = np.mean(tr_res)
        val_loss_l = np.mean(val_res)
        print(tr_res,train_loss_l)
        print(val_res,val_loss_l)
        all_train_loss_l += train_loss_l/k
        all_val_loss_l += val_loss_l/k

        print('========================')
    test_pred = test_pred/k
    print('all train avg',all_train_loss_l,'all val avg',all_val_loss_l)
    return test_pred

print('done')

done


In [37]:
xgb_res = simple_ens('xgb',k=5)
sample_submission = pd.read_csv("../input/sample_submission.csv")
sample_submission[list_classes] = xgb_res
sample_submission.to_csv("../results/nn_ens_new_csv_fold5.gz", index=False, compression='gzip')
print(sample_submission.head())
print('save done')




Train on 76680 samples, validate on 19171 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100

Epoch 00020: reducing learning rate to 0.0005000000237487257.
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100

Epoch 00027: reducing learning rate to 0.0002500000118743628.
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100

Epoch 00032: reducing learning rate to 0.0001250000059371814.
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100

Epoch 00037: reducing learning rate to 6.25000029685907e-05.
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100

Epoch 00042: reducing learning rate to 3.125000148429535e-05.
Epoch 43/100
Epoch 44/100
Epoch 45/100

Epoch 00045: reducing learning rate to 1.5625000742147677e-

  loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)
  loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)


[nan, 0.019479271746140725, nan, 0.0069934080193730947, 0.054630047234466232, 0.017970996869438958] nan
[nan, 0.024324997959981706, nan, 0.0091981501719756928, 0.055430236062574835, 0.0208025882635689] nan
Train on 76681 samples, validate on 19170 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100

Epoch 00019: reducing learning rate to 0.0005000000237487257.
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100

Epoch 00024: reducing learning rate to 0.0002500000118743628.
Epoch 25/100
Epoch 26/100
Epoch 27/100

Epoch 00027: reducing learning rate to 0.0001250000059371814.
Epoch 28/100
Epoch 29/100
Epoch 30/100

Epoch 00030: reducing learning rate to 6.25000029685907e-05.
[nan, 0.021745649571959215, nan, 0.0081255361518935629, 0.056864404160075391, 0.019767038236930479] nan
[nan, 0.