In [2]:
import pandas as pd
import numpy as np


X_train = pd.read_csv("data/X_train.csv")
X_test = pd.read_csv("data/X_test.csv")
y_train = pd.read_csv("data/y_train.csv")
y_train = y_train.ix[:,1]
y_test = pd.read_csv("data/y_test.csv")
y_test = y_test.ix[:,1]

In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

gbc = GradientBoostingClassifier(random_state=0)
params={
    "n_estimators":[500],
    "min_samples_split":[6],
    "min_samples_leaf":[2],
    "max_depth":[3]
}

cv = GridSearchCV(gbc,cv=5,param_grid=params,n_jobs=-1).fit(X_train,y_train)

print(cv.best_params_)
print(cv.best_score_)

{'n_estimators': 500, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_depth': 3}
0.617704517705


In [4]:
y_pred = cv.predict(X_test)
y_pred_proba = cv.predict_proba(X_test)


accuracy = np.mean(y_pred == y_test)
print("Accuracy: %.4f" % accuracy)

from sklearn.metrics import confusion_matrix
conmat = np.array(confusion_matrix(y_test, y_pred))
confusion = pd.DataFrame(conmat, index=['not_granted', 'granted'],columns=['predicted_not_granted', 'predicted_granted'])
print(confusion)

from sklearn.metrics import classification_report
cls_rep = classification_report(y_test, y_pred)
print(cls_rep)

from sklearn.metrics import roc_auc_score
auc = roc_auc_score(y_test, y_pred_proba[:,1])
print("ROC AUC: %.5f" % auc)

Accuracy: 0.9015
             predicted_not_granted  predicted_granted
not_granted                    301                 28
granted                         23                166
             precision    recall  f1-score   support

          0       0.93      0.91      0.92       329
          1       0.86      0.88      0.87       189

avg / total       0.90      0.90      0.90       518

ROC AUC: 0.96341


In [5]:
result = pd.DataFrame(y_pred_proba[:,1])
result.to_csv("result/result_gb.csv",header=False,index=False)