In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingClassifier
import time

In [2]:
X = np.genfromtxt('data/X.csv', delimiter=',')
Y = np.genfromtxt('data/Y.csv', delimiter=',')

X_comp, X_test, Y_comp, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
Xtr, Xva, Ytr, Yva = train_test_split(X_comp, Y_comp, test_size=0.2)

In [4]:
# grid search results (hyper-values): 

lr = LogisticRegression(C=2, penalty='l1')

mlp = MLPClassifier(max_iter=1000, hidden_layer_sizes=(100,), activation='identity', 
                            solver='sgd', alpha=0.01, learning_rate='adaptive')

gb = GradientBoostingClassifier(max_depth=3,loss="deviance",learning_rate=0.1,
                                 n_estimators=1500,min_samples_split=20,min_samples_leaf=9,
                                 max_features="sqrt", subsample=0.5)


In [5]:
combined = VotingClassifier(estimators=[('lr', lr), ('nn', mlp), ('gb', gb)],
                         voting='soft', flatten_transform=True) # weights=?

print("training started")
combined.fit(Xtr,Ytr)
print("training finished")

combined_roc = roc_auc_score(Yva, combined.predict_proba(Xva)[:,1])
print("validation roc:", combined_roc)

print("training error:", 1 - combined.score(Xtr, Ytr))
print("validation error:", 1 - combined.score(Xva, Yva))

training started




training finished
validation roc: 0.7553522171989928
training error: 0.2995640148141203
validation error: 0.31064441527595477


In [7]:
Y_test = np.vstack((np.arange(X_test.shape[0]), combined.predict_proba(X_test)[:,1])).T
np.savetxt('Y_test.txt',Y_test,'%d,%.2f',header='Id,Predicted',comments='',delimiter=',')

In [8]:
df_X_test = pd.DataFrame(X_test)
df_X_test.to_csv('X_test.csv',index=False)

In [None]:
df_X_test = pd.DataFrame(Y_test)
df_X_test.to_csv('X_test.csv',index=False)