# Training Model Examples

## K-Nearest neighbors

In [None]:
import numpy as np
from sklearn import neighbors
from sklearn import metrics


X = np.genfromtxt('data/X_train.txt', delimiter=None)
Y = np.genfromtxt('data/Y_train.txt', delimiter=None)

Xtr = X[:10000,:]
Ytr = Y[:10000]

Xval = X[10000:20000]
Yval = Y[10000:20000]

knnN = [1, 2, 3, 5, 10, 15, 20]
for n in knnN:
    knnClassifier = neighbors.KNeighborsClassifier(n_neighbors=n, weights="distance", n_jobs=-1)
    knnClassifier.fit(Xtr, Ytr)

    Yhat = knnClassifier.predict_proba(Xval)[:,1]

    print("ROC :" metrics.roc_auc_score(Yval, Yhat), "Training error: ", 1 - knnClassifier.score(Xtr, Ytr), "Validation error: ", 1 - knnClassifier.score(Xval, Yval))

## Random Forest

In [20]:
from sklearn import ensemble

rfc = ensemble.RandomForestClassifier(
    n_estimators=500, min_samples_leaf=5, n_jobs=-1, oob_score=True)

rfc.fit(Xtr, Ytr)

rfcRoc = metrics.roc_auc_score(Yval, rfc.predict_proba(Xval)[:,1])

print("ROC :", rfcRoc)
print("Training Error: ", 1 - rfc.score(Xtr, Ytr))
print("Validation Error: ", 1 - rfc.score(Xval, Yval))

ROC : 0.695675438847
Training Error:  0.1296
Validation Error:  0.3045


## Neural network

In [18]:
from sklearn import neural_network

mlpc = neural_network.MLPClassifier(hidden_layer_sizes=(100,))
mlpc.fit(Xtr, Ytr)
mlpRoc = metrics.roc_auc_score(Yval, mlpc.predict_proba(Xval)[:,1])

print("ROC :", mlpRoc)
print("Training error: ", 1 - mlpc.score(Xtr, Ytr))
print("Validation error: ", 1 - mlpc.score(Xval, Yval))

ROC : 0.585523056124
Training error:  0.3206
Validation error:  0.3331


## Naive Bayes

In [19]:
from sklearn import naive_bayes

nbgc = naive_bayes.GaussianNB()
nbgc.fit(X, Y)
nbgRoc = metrics.roc_auc_score(Yval, nbgc.predict_proba(Xval)[:,1])

print("ROC :", rfcRoc)
print("Training Error: ", 1 - nbgc.score(Xtr, Ytr))
print("Validation Error: ", 1 - nbgc.score(Xval, Yval))

ROC : 0.697099758619
Training Error:  0.3885
Validation Error:  0.391
