In [10]:
from keras.models import Model
from keras.layers import *
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold 
from sklearn.metrics import roc_auc_score, mean_squared_error, log_loss
import os
print(os.listdir("../input"))

['test.csv', 'train.csv', 'sample_submission.csv']


In [11]:
df = pd.read_csv('../input/train.csv')
df.head()
# features selected by RFECV with lasso
features = ['16', '33', '43', '45', '52', '63', '65', '73', '90', '91', '117', '133', '134', '149', '189', '199', '217', '237', '258', '295']

In [12]:
X = df[features].values
y = df.values[:,1]
print(X.shape, y.shape)

(250, 20) (250,)


In [13]:
def simple_model(input_shape):
    """
    define neural network model
    """
    inp = Input(shape=(input_shape[1],))
    x = Dense(3, activation='sigmoid')(inp)
    # only keep this layer, then the model becomes logistic regression
    x = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.005), metrics=['accuracy'])
    return model

In [14]:
N_SPLITS = 10
splits = list(StratifiedKFold(n_splits=N_SPLITS, shuffle=True).split(X, y))
preds_val = []
y_val = []
best_models = []

for idx, (train_idx, val_idx) in enumerate(splits):
    print("Beginning fold {}".format(idx+1))
    X_train, y_train, X_val, y_val = X[train_idx], y[train_idx], X[val_idx], y[val_idx]
    model = simple_model(X_train.shape)
    cb = ModelCheckpoint('weights.h5', monitor='val_acc', mode='max', save_best_only=True, save_weights_only=True)
    model.fit(X_train, y_train, epochs=200, validation_data=(X_val, y_val), callbacks=[cb], verbose=0)
    model.load_weights('weights.h5')
    score = roc_auc_score(y_val, model.predict(X_val))
    print((model, score))
    best_models.append((model, score))

Beginning fold 1
(<keras.engine.training.Model object at 0x7fa2cf56ee10>, 0.9027777777777779)
Beginning fold 2
(<keras.engine.training.Model object at 0x7fa2cf1437b8>, 0.8819444444444445)
Beginning fold 3
(<keras.engine.training.Model object at 0x7fa2ced8fe48>, 0.9375)
Beginning fold 4
(<keras.engine.training.Model object at 0x7fa2ce9fbd68>, 0.9791666666666667)
Beginning fold 5
(<keras.engine.training.Model object at 0x7fa2ce677dd8>, 0.5625)
Beginning fold 6
(<keras.engine.training.Model object at 0x7fa2ce2457b8>, 0.9027777777777778)
Beginning fold 7
(<keras.engine.training.Model object at 0x7fa2cdeb46a0>, 0.8958333333333333)
Beginning fold 8
(<keras.engine.training.Model object at 0x7fa2cdafef60>, 0.9444444444444444)
Beginning fold 9
(<keras.engine.training.Model object at 0x7fa2cd6e8be0>, 0.9652777777777777)
Beginning fold 10
(<keras.engine.training.Model object at 0x7fa2cd2ce400>, 0.9652777777777777)


In [15]:
df_test = pd.read_csv('../input/test.csv')
print(len(df_test))
df_test.head()


19750


Unnamed: 0,id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,...,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299
0,250,0.5,-1.033,-1.595,0.309,-0.714,0.502,0.535,-0.129,-0.687,1.291,0.507,-0.317,1.848,-0.232,-0.34,-0.051,0.804,0.764,1.86,0.262,1.112,-0.491,-1.039,-0.492,0.183,-0.671,-1.313,0.149,0.244,1.072,-1.003,0.832,-1.075,1.988,1.201,-2.065,-0.826,-0.016,0.49,...,0.824,0.928,1.372,1.505,0.645,0.641,-1.132,1.009,0.998,0.21,-1.634,1.046,0.114,-0.806,0.301,0.145,-0.684,0.794,-0.29,-1.688,0.313,1.14,0.447,-0.616,1.294,0.785,0.453,1.55,-0.866,1.007,-0.088,-2.628,-0.845,2.078,-0.277,2.132,0.609,-0.104,0.312,0.979
1,251,0.776,0.914,-0.494,1.347,-0.867,0.48,0.578,-0.313,0.203,1.356,-1.086,0.322,0.876,-0.563,-1.394,0.385,1.891,-2.107,-0.636,-0.055,-0.843,0.041,0.253,0.557,0.475,-0.839,-1.146,1.21,1.427,0.347,1.077,-0.194,0.323,0.543,0.894,1.19,0.342,-0.858,0.756,...,-1.791,0.122,-0.669,-1.558,-0.244,2.583,-0.829,0.133,-2.746,0.341,-1.145,0.492,0.437,-0.628,0.271,2.639,0.481,-0.687,1.017,1.648,-1.272,-0.797,-0.87,-1.582,-1.987,-0.052,-0.194,0.539,-1.788,-0.433,-0.683,-0.066,0.025,0.606,-0.353,-1.133,-3.138,0.281,-0.625,-0.761
2,252,1.75,0.509,-0.057,0.835,-0.476,1.428,-0.701,-2.009,-1.378,0.167,-0.132,0.459,-0.341,0.014,0.184,-0.46,-0.991,-1.039,0.992,1.036,1.552,-0.83,1.374,-0.914,0.427,0.027,0.327,1.117,0.871,-2.556,-0.036,-0.081,0.744,-1.191,-1.784,0.239,0.5,0.437,0.746,...,-1.167,1.009,-0.18,-0.683,-1.383,1.02,0.268,-1.558,0.62,-0.489,-2.09,-0.977,1.672,-0.655,-0.801,-1.846,0.761,-0.846,0.181,0.962,-0.611,1.45,0.021,0.32,-0.951,-2.662,0.761,-0.665,-0.619,-0.645,-0.094,0.351,-0.607,-0.737,-0.031,0.701,0.976,0.135,-1.327,2.463
3,253,-0.556,-1.855,-0.682,0.578,1.592,0.512,-1.419,0.722,0.511,0.567,0.356,-0.06,0.767,-0.196,0.359,0.08,-0.956,0.857,-0.655,-0.09,-0.008,-0.596,-0.413,-1.03,0.173,-0.969,0.998,0.079,0.79,-0.776,-0.374,-1.995,0.572,0.542,0.547,0.307,-0.074,1.703,-0.003,...,-1.029,-0.34,0.052,2.122,-0.136,-1.799,1.45,1.866,-0.273,-0.237,-0.207,-0.196,-1.106,-1.56,-0.934,2.167,0.323,0.583,1.48,-0.685,-0.473,-1.066,-0.271,0.506,-0.753,1.048,-0.45,-0.3,-1.221,0.235,-0.336,-0.787,0.255,-0.031,-0.836,0.916,2.411,1.053,-1.601,-1.529
4,254,0.754,-0.245,1.173,-1.623,0.009,0.37,0.781,-1.763,-1.432,-0.93,-0.098,0.896,0.293,-0.259,0.03,-0.661,0.921,0.006,-0.631,1.284,-1.167,-0.744,-2.184,2.146,1.13,0.017,1.421,-0.59,1.938,-0.194,0.794,0.579,0.521,0.635,-0.023,-0.892,-0.363,-0.36,0.405,...,-0.486,-0.068,-0.534,-1.322,0.5,0.263,-0.745,0.578,-0.064,0.738,-0.28,0.745,-0.588,-0.429,-0.588,0.154,-1.187,1.681,-0.832,-0.437,-0.038,-1.096,-0.156,3.565,-0.428,-0.384,1.243,-0.966,1.525,0.458,2.184,-1.09,0.216,1.186,-0.143,0.322,-0.068,-0.156,-1.153,0.825


In [16]:
X_test = df_test[features].values

In [17]:
y_preds = []
for mod, score in best_models:
    y_preds.append(mod.predict(X_test))
y_preds = np.concatenate(y_preds, axis=1)
y_preds.shape

(19750, 10)

In [18]:
subs = pd.read_csv('../input/sample_submission.csv')
mean_preds = y_preds.mean(axis=1)
subs['target'] = mean_preds
subs.to_csv('submission.csv', index=False)