In [1]:
import numpy as np 
import pandas as pd 

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.utils import shuffle
from sklearn.metrics import f1_score

from keras.layers import Dense, Activation, Dropout
from keras.models import Sequential
from keras.losses import binary_crossentropy
from keras.metrics import mean_absolute_error
from keras.optimizers import Adam, Adagrad, RMSprop
from keras.regularizers import l2

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
path = '../../project1/data/'
train = pd.read_csv(path + 'train_all2.csv')
test = pd.read_csv(path + 'test_all2.csv')
X_train = train.drop(['id1', 'id2', 'link', 'rno1', 'rno2', 'pa'], axis=1).values
y_train = train['link']
X_test = test.drop(['id1', 'id2', 'rno1', 'rno2', 'pa'], axis=1).values

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train, y_train = shuffle(X_train, y_train, random_state=0)

In [3]:
dropout = 0.4
model = Sequential()
model.add(Dense(input_dim=X_train.shape[1], units=50, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(dropout))
model.add(Dense(units=30, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(dropout))
model.add(Dense(units=15, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(dropout))
model.add(Dense(units=5, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f7610834ba8>

In [4]:
y_train_pred = model.predict(X_train)

In [11]:
y_train_pred[:,0]

array([0.02634344, 0.9977798 , 0.02920996, ..., 0.99942493, 0.02290037,
       0.01602364], dtype=float32)

In [12]:
reg_train = np.zeros((len(y_train), 2))
reg_train[:, 0] = np.array(y_train)
reg_train[:, 1] = y_train_pred[:,0]
indice = np.argsort(reg_train[:,1])[::-1]
reg_train = reg_train[indice]
p_best, r_best, f1_best, ts = 0, 0, 0, 0
num_ones = sum(y_train)
tp = 0
for idx, row in enumerate(reg_train):
    if row[0] == 1:
        tp += 1
    p = tp / (idx + 1)
    r = tp / num_ones
    f1 = 2 * p * r / (p + r)
    if f1 > f1_best:
        p_best, r_best, f1_best = p, r, f1
        ts = row[1]
print (p_best, r_best, f1_best, ts)

0.9752246891411045 0.9658341539104228 0.9705067065647427 0.2721087336540222


In [None]:
y_test_pred = model.predict(X_test)
y_test_pred = y_test_pred > ts
y_test_pred = y_test_pred.astype(int)
df = pd.DataFrame(y_test_pred, columns=['category'])
df.index.name = 'id'
df.to_csv('../../project1/result/nn2_test_pred.csv', index=True, header=True)