In [83]:
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier

In [30]:
#!/usr/bin/python
import random


def makeTerrainData(n_points=1000):
###############################################################################
### make the toy dataset
    random.seed(42)
    grade = [random.random() for ii in range(0,n_points)]
    bumpy = [random.random() for ii in range(0,n_points)]
    error = [random.random() for ii in range(0,n_points)]
    y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii]) for ii in range(0,n_points)]
    for ii in range(0, len(y)):
        if grade[ii]>0.8 or bumpy[ii]>0.8:
            y[ii] = 1.0

### split into train/test sets
    X = [[gg, ss] for gg, ss in zip(grade, bumpy)]
    split = int(0.75*n_points)
    X_train = X[0:split]
    X_test  = X[split:]
    y_train = y[0:split]
    y_test  = y[split:]

    grade_sig = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==0]
    bumpy_sig = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==0]
    grade_bkg = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==1]
    bumpy_bkg = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==1]

#    training_data = {"fast":{"grade":grade_sig, "bumpiness":bumpy_sig}
#            , "slow":{"grade":grade_bkg, "bumpiness":bumpy_bkg}}


    grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]
    bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]
    grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]
    bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]

    test_data = {"fast":{"grade":grade_sig, "bumpiness":bumpy_sig}
            , "slow":{"grade":grade_bkg, "bumpiness":bumpy_bkg}}

    return X_train, y_train, X_test, y_test
#    return training_data, test_data

In [118]:
nb = GaussianNB()
sv = SVC()
nn = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
dt = DecisionTreeClassifier(min_samples_split = 5)
rf = RandomForestClassifier(n_estimators = 100, min_samples_split = 5, bootstrap = True)
kn = KNeighborsClassifier(15,weights='distance')
ad =AdaBoostClassifier(n_estimators = 100)

In [119]:
xtrain, ytrain,xtest,ytest = makeTerrainData()

In [120]:
nb.fit(xtrain,ytrain)
sv.fit(xtrain,ytrain)
nn.fit(xtrain,ytrain)
dt.fit(xtrain,ytrain)
rf.fit(xtrain,ytrain)
kn.fit(xtrain,ytrain)
ad.fit(xtrain,ytrain)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=100, random_state=None)

In [121]:
ypredictnb = nb.predict(xtest)
ypredictsv = sv.predict(xtest)
ypredictnn = nn.predict(xtest)
ypredictdt = dt.predict(xtest)
ypredictrf = rf.predict(xtest)
ypredictkn = kn.predict(xtest)
ypredictad = ad.predict(xtest)

In [123]:
from sklearn.metrics import accuracy_score
print 'naive bayes: ' + str(round(accuracy_score(ytest, ypredictnb),4))
print 'svm: ' + str(round(accuracy_score(ytest, ypredictsv),4))
print 'neural network: ' + str(round(accuracy_score(ytest, ypredictnn),4))
print 'decision trees: ' + str(round(accuracy_score(ytest, ypredictdt),4))
print 'random forest: ' + str(round(accuracy_score(ytest, ypredictrf),4))
print 'k nearest neighbor: ' + str(round(accuracy_score(ytest,ypredictkn),4))
print 'adaboost: ' + str(round(accuracy_score(ytest,ypredictad),4))

naive bayes: 0.884
svm: 0.92
neural network: 0.912
decision trees: 0.912
random forest: 0.916
k nearest neighbor: 0.94
adaboost: 0.924
