# Training Model Examples

## K-Nearest Neighbors

In [25]:
import numpy as np
from sklearn import neighbors
from sklearn import metrics
from sklearn.utils import shuffle


X = np.genfromtxt('data/X_train.txt', delimiter=None)
Y = np.genfromtxt('data/Y_train.txt', delimiter=None)

X,Y = shuffle(X, Y)

Xtr = X[:10000,:]
Ytr = Y[:10000]

Xval = X[10000:20000]
Yval = Y[10000:20000]

knnN = [1, 2, 3, 5, 10, 15, 20]
for n in knnN:
    knnClassifier = neighbors.KNeighborsClassifier(n_neighbors=n, weights="distance", n_jobs=-1)
    knnClassifier.fit(Xtr, Ytr)

    Yhat = knnClassifier.predict_proba(Xval)[:,1]

    print("ROC :", metrics.roc_auc_score(Yval, Yhat), "Training error: ", 1 - knnClassifier.score(Xtr, Ytr), "Validation error: ", 1 - knnClassifier.score(Xval, Yval))

ROC : 0.585716254583 Training error:  0.0062 Validation error:  0.373
ROC : 0.607340444885 Training error:  0.0062 Validation error:  0.3725
ROC : 0.614868193721 Training error:  0.0059 Validation error:  0.3593
ROC : 0.623432685576 Training error:  0.0059 Validation error:  0.3492
ROC : 0.630496518647 Training error:  0.0059 Validation error:  0.3411
ROC : 0.631805066292 Training error:  0.0059 Validation error:  0.336
ROC : 0.63483055211 Training error:  0.0059 Validation error:  0.3324


## Random Forest

In [23]:
from sklearn import ensemble

rfc = ensemble.RandomForestClassifier(
    n_estimators=500, min_samples_leaf=5, n_jobs=-1, oob_score=True)

rfc.fit(Xtr, Ytr)

rfcRoc = metrics.roc_auc_score(Yval, rfc.predict_proba(Xval)[:,1])

print("ROC :", rfcRoc)
print("Training Error: ", 1 - rfc.score(Xtr, Ytr))
print("Validation Error: ", 1 - rfc.score(Xval, Yval))

ROC : 0.696520420545
Training Error:  0.1295
Validation Error:  0.3047


## Neural network

In [4]:
from sklearn import neural_network

hiddenNodes = [100,200,300,400,500,600,700,800,900,1000]

print( "2 Layers ")
for i in hiddenNodes:
    twoLayerModel = neural_network.MLPClassifier(hidden_layer_sizes=(i,i))
    twoLayerModel.fit(Xtr, Ytr)
    twoLayerModelRoc = metrics.roc_auc_score(Yval, twoLayerModel.predict_proba(Xval)[:,1])
    print("Hidden Nodes: " + str(i) + ", ROC :", twoLayerModelRoc, "Training error: ", 1 - twoLayerModel.score(Xtr, Ytr),"Validation error: ", 1 - twoLayerModel.score(Xval, Yval))
    
print( "3 Layers ")
for i in hiddenNodes:
    threeLayerModel = neural_network.MLPClassifier(hidden_layer_sizes=(i,i,i))
    threeLayerModel.fit(Xtr, Ytr)
    threeLayerModelRoc = metrics.roc_auc_score(Yval, threeLayerModel.predict_proba(Xval)[:,1])
    print("Hidden Nodes: " + str(i) + ", ROC :", threeLayerModelRoc, "Training error: ", 1 - threeLayerModel.score(Xtr, Ytr),"Validation error: ", 1 - threeLayerModel.score(Xval, Yval))

2 Layers 
('Hidden Nodes: 100, ROC :', 0.53406107692307692, 'Training error: ', 0.33589999999999998, 'Validation error: ', 0.34140000000000004)
('Hidden Nodes: 200, ROC :', 0.58897450549450547, 'Training error: ', 0.32920000000000005, 'Validation error: ', 0.33530000000000004)
('Hidden Nodes: 300, ROC :', 0.61125767032967038, 'Training error: ', 0.35460000000000003, 'Validation error: ', 0.36009999999999998)
('Hidden Nodes: 400, ROC :', 0.49531958241758239, 'Training error: ', 0.34279999999999999, 'Validation error: ', 0.3468)
('Hidden Nodes: 500, ROC :', 0.57363252747252746, 'Training error: ', 0.33979999999999999, 'Validation error: ', 0.34179999999999999)
('Hidden Nodes: 600, ROC :', 0.57990276923076922, 'Training error: ', 0.62490000000000001, 'Validation error: ', 0.62359999999999993)
('Hidden Nodes: 700, ROC :', 0.56125274725274721, 'Training error: ', 0.52800000000000002, 'Validation error: ', 0.5363)
('Hidden Nodes: 800, ROC :', 0.59888718681318676, 'Training error: ', 0.483199

## Naive Bayes

In [22]:
from sklearn import naive_bayes

nbgc = naive_bayes.GaussianNB()
nbgc.fit(X, Y)
nbgRoc = metrics.roc_auc_score(Yval, nbgc.predict_proba(Xval)[:,1])

print("ROC :", rfcRoc)
print("Training Error: ", 1 - nbgc.score(Xtr, Ytr))
print("Validation Error: ", 1 - nbgc.score(Xval, Yval))

ROC : 0.695675438847
Training Error:  0.3885
Validation Error:  0.391
