## Импорты

In [32]:
import pandas as pd
import numpy as np
from sklearn.utils import check_array
import keras.backend as K
import tensorflow as tf


import warnings
warnings.filterwarnings('ignore')

from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split

from keras.layers import Dense,Dropout,BatchNormalization ,Input,Conv1D
from keras.models import Model
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.callbacks import LearningRateScheduler,TensorBoard,Callback
from keras.utils import np_utils



PATH_TO_DATA="/home/roman/finunichamp2018/2/"
PATH_TO_SUBMIT="/home/roman/finunichamp2018/2/submits/"
PATH_TO_SAMPLE_SUBMIT="/home/roman/finunichamp2018/2/insclass_sample.csv"

sep=151405

In [4]:
def submit(y_pred,name):
    sub=pd.read_csv(PATH_TO_SAMPLE_SUBMIT,index_col="id")
    sub["target"]=y_pred
    sub.to_csv(PATH_TO_SUBMIT+name+".csv")

In [6]:
#-----------------------------------------------------------------------------------------------------------------------------------------------------
# AUC for a binary classifier
def auc(y_true, y_pred):   
    ptas = tf.stack([binary_PTA(y_true,y_pred,k) for k in np.linspace(0, 1, 1000)],axis=0)
    pfas = tf.stack([binary_PFA(y_true,y_pred,k) for k in np.linspace(0, 1, 1000)],axis=0)
    pfas = tf.concat([tf.ones((1,)) ,pfas],axis=0)
    binSizes = -(pfas[1:]-pfas[:-1])
    s = ptas*binSizes
    return K.sum(s, axis=0)

#-----------------------------------------------------------------------------------------------------------------------------------------------------
# PFA, prob false alert for binary classifier
def binary_PFA(y_true, y_pred, threshold=K.variable(value=0.5)):
    y_pred = K.cast(y_pred >= threshold, 'float32')
    # N = total number of negative labels
    N = K.sum(1 - y_true)
    # FP = total number of false alerts, alerts from the negative class labels
    FP = K.sum(y_pred - y_pred * y_true)    
    return FP/N
#-----------------------------------------------------------------------------------------------------------------------------------------------------
# P_TA prob true alerts for binary classifier
def binary_PTA(y_true, y_pred, threshold=K.variable(value=0.5)):
    y_pred = K.cast(y_pred >= threshold, 'float32')
    # P = total number of positive labels
    P = K.sum(y_true)
    # TP = total number of correct alerts, alerts from the positive class labels
    TP = K.sum(y_pred * y_true)    
    return TP/P

In [9]:
linear=pd.read_csv(PATH_TO_DATA+"cleared_linear.csv")
y=pd.read_csv(PATH_TO_DATA+"target.csv")
y=y["target"]

In [10]:
X_train=linear[:sep]
X_test=linear[sep:]

In [11]:
X_train_part, X_val, y_train_part, y_val=train_test_split(X_train,
                                                          y,
                                                          test_size=0.3,
                                                          random_state=17,
                                                          stratify=y)


y_train_part = np_utils.to_categorical(y_train_part, 2)
y_val = np_utils.to_categorical(y_val, 2)

In [12]:
batch_size=20
epochs=10

In [13]:
data=X_train

##  Построение моделей

In [14]:
#try1
inp=Input((data.shape[1],))

dense1=Dense(128, activation='selu',kernel_initializer='he_uniform')(inp)
batch_norm1=BatchNormalization()(dense1)
dropout1=Dropout(0.4)(batch_norm1)

dense2=Dense(128, activation='selu',kernel_initializer='he_uniform')(dropout1)
batch_norm2=BatchNormalization()(dense2)
dropout2=Dropout(0.4)(batch_norm2)

dense3=Dense(128, activation='selu',kernel_initializer='he_uniform')(dropout2)
batch_norm3=BatchNormalization()(dense3)
dropout3=Dropout(0.4)(batch_norm3)

dense4=Dense(256, activation='selu',kernel_initializer='he_uniform')(dropout3)
batch_norm4=BatchNormalization()(dense4)
dropout4=Dropout(0.4)(batch_norm4)


out=Dense(2,activation='softmax',kernel_initializer='glorot_uniform')(dropout4)


model=Model(inputs=inp,outputs=out)
model.compile(optimizer="adam",
              loss='categorical_crossentropy',
              metrics=["accuracy",auc])

In [15]:
model.fit(X_train_part,
          y_train_part,
          batch_size=20,
          epochs=10,
          validation_split=0.1,
          verbose=2)

Train on 95384 samples, validate on 10599 samples
Epoch 1/10
 - 528s - loss: 0.3265 - acc: 0.9001 - auc: 0.9182 - val_loss: 0.2734 - val_acc: 0.9163 - val_auc: 0.9354
Epoch 2/10
 - 513s - loss: 0.2835 - acc: 0.9164 - auc: 0.9329 - val_loss: 0.2626 - val_acc: 0.9210 - val_auc: 0.9370
Epoch 3/10
 - 521s - loss: 0.2728 - acc: 0.9210 - auc: 0.9361 - val_loss: 0.2561 - val_acc: 0.9270 - val_auc: 0.9397
Epoch 4/10
 - 526s - loss: 0.2675 - acc: 0.9232 - auc: 0.9381 - val_loss: 0.2570 - val_acc: 0.9282 - val_auc: 0.9354
Epoch 5/10
 - 531s - loss: 0.2648 - acc: 0.9242 - auc: 0.9392 - val_loss: 0.2546 - val_acc: 0.9264 - val_auc: 0.9432
Epoch 6/10
 - 538s - loss: 0.2613 - acc: 0.9255 - auc: 0.9411 - val_loss: 0.2663 - val_acc: 0.9235 - val_auc: 0.9331
Epoch 7/10
 - 530s - loss: 0.2602 - acc: 0.9254 - auc: 0.9418 - val_loss: 0.2720 - val_acc: 0.9234 - val_auc: 0.9354
Epoch 8/10
 - 521s - loss: 0.2588 - acc: 0.9262 - auc: 0.9420 - val_loss: 0.2501 - val_acc: 0.9290 - val_auc: 0.9438
Epoch 9/10
 - 

<keras.callbacks.History at 0x7f002b1e9898>

In [16]:
model.evaluate(X_val,y_val)



[0.24699621957515056, 0.929945841243311, 0.9468392047367261]

In [17]:
pred=model.predict(X_test)

In [33]:
submit(pred[:,1],"nn")