In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import mltools as ml
from numpy import atleast_2d as twod
import sklearn

In [2]:
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
sklearn.set_config(working_memory=4096)

In [3]:
X = np.genfromtxt('Data/X_train.txt', delimiter=None)
Y = np.genfromtxt('Data/Y_train.txt', delimiter=None)
X,Y = ml.shuffleData(X,Y)

In [4]:
Xtr,Xte,Ytr,Yte = ml.splitData(X,Y, 0.5)

In [5]:
scaler = StandardScaler()
scaler.fit(Xtr)
Xtr = scaler.transform(Xtr)
Xte = scaler.transform(Xte)

In [6]:
reg = MLPClassifier(activation='tanh',
                    solver='adam', 
                    alpha=1e-5,
                    hidden_layer_sizes=(250, 375, 750, 250, 50, 125), 
                    random_state=1,
                    early_stopping=True,
                    max_iter=500,
                    learning_rate='adaptive')
reg.out_activation_ = 'softmax'

In [7]:
reg.fit(Xtr, Ytr)

MLPClassifier(activation='tanh', alpha=1e-05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=(250, 375, 750, 250, 50, 125),
              learning_rate='adaptive', learning_rate_init=0.001, max_fun=15000,
              max_iter=500, momentum=0.9, n_iter_no_change=10,
              nesterovs_momentum=True, power_t=0.5, random_state=1,
              shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
              verbose=False, warm_start=False)

In [8]:
def auc(X, Y):
    try:                  # compute 'response' (soft binary classification score)
        soft = reg.predict_proba(X)[:,1]  # p(class = 2nd)
    except (AttributeError, IndexError):  # or we can use 'hard' binary prediction if soft is unavailable
        soft = reg.predict(X)

    n,d = twod(soft).shape             # ensure soft is the correct shape
    soft = soft.flatten() if n==1 else soft.T.flatten()

    indices = np.argsort(soft)         # sort data by score value
    Y = Y[indices]
    sorted_soft = soft[indices]

    # compute rank (averaged for ties) of sorted data
    dif = np.hstack( ([True],np.diff(sorted_soft)!=0,[True]) )
    r1  = np.argwhere(dif).flatten()
    r2  = r1[0:-1] + 0.5*(r1[1:]-r1[0:-1]) + 0.5
    rnk = r2[np.cumsum(dif[:-1])-1]

    # number of true negatives and positives
    n0,n1 = sum(Y == 0), sum(Y == 1)

    if n0 == 0 or n1 == 0:
        raise ValueError('Data of both class values not found')

    # compute AUC using Mann-Whitney U statistic
    result = (np.sum(rnk[Y == 1]) - n1 * (n1 + 1.0) / 2.0) / n1 / n0
    return result

In [9]:
auc(Xtr, Ytr)

0.8282433523903281

In [10]:
auc(Xte, Yte)

0.732008026921417

In [11]:
Xleader = np.genfromtxt('Data/X_test.txt', delimiter=None)
Xleader = scaler.transform(Xleader)
predict_test = reg.predict_proba(Xleader)
Yte = np.vstack((np.arange(Xleader.shape[0]), predict_test[:,1])).T
np.savetxt('Y_submit.txt',Yte,'%d, %.2f',header='ID,Prob1',comments='',delimiter=',')