In [39]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook
from scipy.optimize import minimize
from sklearn.metrics import log_loss
import matplotlib.pylab as plt

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import optimizers

plt.style.use('ggplot')

In [43]:
train = pd.read_csv('train.csv', delimiter=';')
test  = pd.read_csv('test.csv', delimiter=';')

test.replace({'None' : 999}, inplace=True)
test[['smoke', 'alco', 'active']] = test[['smoke', 'alco', 'active']].astype(int)

test.replace({999 : np.nan}, inplace=True)
test[['gluc', 'cholesterol']] = test[['gluc', 'cholesterol']].astype(int)

N_train = train.shape[0]

data = pd.concat([train, test], 0).reset_index(drop=True)

In [44]:
cat_feat = ['alco', 'smoke', 'active', 'gluc', 'cholesterol', 'gender']
for col in cat_feat:
    dummy = pd.get_dummies(data[col], prefix = col)
    data = pd.concat([data, dummy], 1)
data.drop(cat_feat, 1, inplace=True)

In [45]:
feat = data.drop('cardio', 1).columns
for col in feat:
    data[feat] = (data[feat] - data[feat].mean())/data[feat].std()

In [46]:
##### for submit
x_train = data[:N_train].drop(['id', 'cardio'], 1).values
y_train = data[:N_train]['cardio'].values

x_test = data[N_train:].drop(['id', 'cardio'], 1).values
##### for cv
x_train = data.loc[:int(0.8*N_train)].drop(['id', 'cardio'], 1).values
y_train = data.loc[:int(0.8*N_train)]['cardio'].values

x_test = data.loc[int(0.8*N_train):N_train-1].drop(['id', 'cardio'], 1).values
y_test = data.loc[int(0.8*N_train):N_train-1]['cardio'].values

In [48]:
model = Sequential()
opt = optimizers.adam(lr = 0.05)
model.add(Dense(32, activation='relu', input_dim=x_train.shape[1]))
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size=1024, epochs = 20, verbose=0)
y1 = model.predict_proba(x_test)
print 
print log_loss(y_test, y1)
del model

0.55033045933


In [49]:
model = Sequential()
opt = optimizers.adam(lr = 0.05)
model.add(Dense(32, activation='sigmoid', input_dim=x_train.shape[1]))
model.add(Dense(32, activation='sigmoid'))
model.add(Dense(32, activation='sigmoid'))
model.add(Dense(32, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size = 1024, epochs = 10, verbose=0)
y2 = model.predict_proba(x_test)
print 
print log_loss(y_test, y2)
del model

0.550536226197


In [50]:
model = Sequential()
opt = optimizers.adam(lr = 0.01)
model.add(Dense(64, activation='relu', input_dim=x_train.shape[1]))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size=1024, epochs = 20, verbose=0)
y3 = model.predict_proba(x_test)
print 
print log_loss(y_test, y3)
del model

0.551655084857


In [51]:
model = Sequential()
opt = optimizers.adam(lr = 0.01)
model.add(Dense(64, activation='sigmoid', input_dim=x_train.shape[1]))
model.add(Dense(32, activation='sigmoid'))
model.add(Dense(16, activation='sigmoid'))
model.add(Dense(8, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size=1024, epochs = 20, verbose=0)
y4 = model.predict_proba(x_test)
print 
print log_loss(y_test, y4)
del model

0.549145329597


In [52]:
model = Sequential()
opt = optimizers.adam(lr = 0.01)
model.add(Dense(12, activation='relu', input_dim=x_train.shape[1]))
model.add(Dense(32, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size=1024, epochs = 20, verbose=0)
y5 = model.predict_proba(x_test)
print 
print log_loss(y_test, y5)
del model

0.548997860692


In [53]:
model = Sequential()
opt = optimizers.adam(lr = 0.03)
model.add(Dense(12, activation='sigmoid', input_dim=x_train.shape[1]))
model.add(Dense(32, activation='sigmoid'))
model.add(Dense(12, activation='sigmoid'))
model.add(Dense(6, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size=1024, epochs = 20, verbose=0)
y6 = model.predict_proba(x_test)
print 
print log_loss(y_test, y6)
del model

0.551745479349


In [54]:
model = Sequential()
opt = optimizers.adam(lr = 0.03)
model.add(Dense(128, activation='sigmoid', input_dim=x_train.shape[1]))
model.add(Dense(128, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size=1024, epochs = 20, verbose=0)
y7 = model.predict_proba(x_test)
print 
print log_loss(y_test, y7)
del model

0.553103063048


In [55]:
model = Sequential()
opt = optimizers.adam(lr = 0.03)
model.add(Dense(128, activation='relu', input_dim=x_train.shape[1]))
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size=1024, epochs = 20, verbose=0)
y8 = model.predict_proba(x_test)
print 
print log_loss(y_test, y8)
del model

0.550387466933


In [56]:
model = Sequential()
opt = optimizers.adam(lr = 0.03)
model.add(Dense(128, activation='sigmoid', input_dim=x_train.shape[1]))
model.add(Dense(32, activation='sigmoid'))
model.add(Dense(4, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size=1024, epochs = 20, verbose=0)
y9 = model.predict_proba(x_test)
print 
print log_loss(y_test, y9)
del model

0.549253208594


In [57]:
model = Sequential()
opt = optimizers.adam(lr = 0.03)
model.add(Dense(64, activation='relu', input_dim=x_train.shape[1]))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size=1024, epochs = 20, verbose=0)
y10 = model.predict_proba(x_test)
print 
print log_loss(y_test, y10)
del model

0.550358667218


In [58]:
model = Sequential()
opt = optimizers.adam(lr = 0.03)
model.add(Dense(64, activation='sigmoid', input_dim=x_train.shape[1]))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size=1024, epochs = 20, verbose=0)
y11 = model.predict_proba(x_test)
print 
print log_loss(y_test, y11)
del model

0.548657273827


In [59]:
model = Sequential()
opt = optimizers.adam(lr = 0.01)
model.add(Dense(20, activation='relu', input_dim=x_train.shape[1]))
model.add(Dense(40, activation='relu'))
model.add(Dense(60, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(60, activation='relu'))
model.add(Dense(40, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = opt,
              loss = 'binary_crossentropy',
              metrics = ['binary_crossentropy'])

model.fit(x_train, y_train, batch_size = 1034, epochs = 20, verbose=0)
y12 = model.predict_proba(x_test)
print 
print log_loss(y_test, y12)
del model

0.549103642769


In [62]:
# cv
y_nn = ((y1 + y2 + y3 + y4 + y5 + y6 + y7 + y8 + y9 + y10 + y11 + y12)/12).T[0]
log_loss(y_test, y_nn)

0.54613444609274819