# Training Model Examples

## K-Nearest neighbors

In [None]:
import numpy as np
from sklearn import neighbors
from sklearn import metrics


X = np.genfromtxt('data/X_train.txt', delimiter=None)
Y = np.genfromtxt('data/Y_train.txt', delimiter=None)

Xtr = X[:10000,:]
Ytr = Y[:10000]

Xval = X[10000:20000]
Yval = Y[10000:20000]

knnN = [1, 2, 3, 5, 10, 15, 20]
for n in knnN:
    knnClassifier = neighbors.KNeighborsClassifier(n_neighbors=n, weights="distance", n_jobs=-1)
    knnClassifier.fit(Xtr, Ytr)

    Yhat = knnClassifier.predict_proba(Xval)[:,1]

    print("ROC :" metrics.roc_auc_score(Yval, Yhat), "Training error: ", 1 - knnClassifier.score(Xtr, Ytr), "Validation error: ", 1 - knnClassifier.score(Xval, Yval))

## Random Forest

In [11]:
from sklearn import ensemble

rfc = ensemble.RandomForestClassifier(
    n_estimators=1000, min_samples_leaf=4, n_jobs=-1, oob_score=True)

rfc.fit(Xtr, Ytr)

rfcRoc = metrics.roc_auc_score(Yval, rfc.predict_proba(Xval)[:,1])
print(rfcRoc)

print("training error:", 1 - rfc.score(Xtr, Ytr))
print("validation error:", 1 - rfc.score(Xval, Yval))

training started
0.697231282355
training error: 0.0997
validation error: 0.3033


## Neural network

In [14]:
from sklearn import neural_network

mlpc = neural_network.MLPClassifier(hidden_layer_sizes=(100,))
mlpc.fit(Xtr, Ytr)
rfcRoc = metrics.roc_auc_score(Yval, mlpc.predict_proba(Xval)[:,1])
print(rfcRoc)

print("training error:", 1 - mlpc.score(Xtr, Ytr))
print("validation error:", 1 - mlpc.score(Xval, Yval))

0.568182252207
training error: 0.4626
validation error: 0.4713
