## Testing the Classical Models
### Imports
Durch Sklearn ist es sehr einfach verschiedene Module auf unser Datenset anzuwenden.

In [135]:
import numpy as np
from sklearn.neighbors import NearestCentroid
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import sys

In [136]:
def run(x_train, y_train, x_test, y_test, classifier):
    classifier.fit(x_train, y_train)
    return classifier.score(x_test, y_test)

In [137]:
def split(x, y, currentFold, totalFold):
    foldSize = int(y.shape[0]/totalFold)
    folds = []
    for i in range(totalFold):
        folds.append([x[(foldSize * i):(foldSize * i + foldSize)],
        y[(foldSize * i):(foldSize * i + foldSize)]])
    x_test, y_test = folds[currentFold]
    x_train = []
    y_train = []
    for i in range(totalFold):
        if (i==currentFold):
            continue
        else:
            a,b = folds[i]
            print(a,b)
            x_train.append(a)
            y_train.append(b)
    x_train = np.array(x_train).reshape(((totalFold-1)*foldSize, 30))
    y_train = np.array(y_train).reshape((totalFold-1)*foldSize)
    return [x_train, y_train, x_test, y_test]

In [138]:
def prettyPrint(splitData, fold, string):
    labelShape = splitData.shape[1]
    print("%-19s: \nmean:%0.4f +/- std/sqrt(n):%0.4f \n" %(string,splitData[fold].mean(),
        splitData[fold].std()/np.sqrt(labelShape)), end='')
    for i in range(labelShape):
        print("fold:%d score:%0.4f " %(i,splitData[fold,i]), end='\n')

In [139]:
x = np.load("iris_features.npy")
y = np.load("iris_labels.npy")
N = 120
x_train = x[:N]; x_test = x[N:]
y_train = y[:N]; y_test = y[N:]
xa_train = np.load("iris_train_features_augmented.npy")
ya_train = np.load("iris_train_labels_augmented.npy")
xa_test = np.load("iris_test_features_augmented.npy")
ya_test = np.load("iris_test_labels_augmented.npy")

featBC = np.load("bc_features_standard.npy")
labelBC = np.load("bc_labels.npy")
idx = np.argsort(np.random.random(labelBC.shape[0]))
featBC = featBC[idx]
labelBC = labelBC[idx]
numberFolds = int(10)
splitData = np.zeros((8,numberFolds))

In [140]:
print("Nearest Centroid:")
run(x_train, y_train, x_test, y_test, NearestCentroid())

Nearest Centroid:


0.9666666666666667

In [141]:
print("k-NN classifier (k=3):")
run(x_train, y_train, x_test, y_test,
    KNeighborsClassifier(n_neighbors = 3))

k-NN classifier (k=3):


0.9333333333333333

In [142]:
print("Naive Bayes classifier (Gaussian):")
run(x_train, y_train, x_test, y_test, GaussianNB())

Naive Bayes classifier (Gaussian):


0.9666666666666667

In [143]:
print("Naive Bayes classifier (Multinomial):")
run(x_train, y_train, x_test, y_test, MultinomialNB())

Naive Bayes classifier (Multinomial):


0.9666666666666667

In [144]:
print("Decision Tree classifier:")
run(x_train, y_train, x_test, y_test, DecisionTreeClassifier())

Decision Tree classifier:


0.9666666666666667

In [145]:
print("Random Forest classifier (estimators = 5):")
run(xa_train, ya_train, xa_test, ya_test,
    RandomForestClassifier(n_estimators=5))

Random Forest classifier (estimators = 5):


0.9666666666666667

In [146]:
print("SVM (linear, C=1.0):")
run(xa_train, ya_train, xa_test, ya_test, SVC(kernel="linear", C=1.0))

SVM (linear, C=1.0):


0.9333333333333333

In [147]:
print("SVM (RBF, C=1.0, gamma=0.25):")
run(xa_train, ya_train, xa_test, ya_test,
    SVC(kernel = "rbf", C=1.0, gamma = 0.25))

SVM (RBF, C=1.0, gamma=0.25):


0.9666666666666667

In [148]:
print("SVM (RBF, C = 1.0, gamma = 0.0001, augmented)")
run(xa_train, ya_train, xa_test, ya_test,
    SVC(kernel="rbf", C = 1.0, gamma = 0.001))

SVM (RBF, C = 1.0, gamma = 0.0001, augmented)


0.9333333333333333

In [149]:
print("SVM (RBF, C = 1.0, gamma = 0.0001, original)")
run(x_train, y_train, x_test, y_test,
    SVC(kernel="rbf", C = 1.0, gamma = 0.001))

SVM (RBF, C = 1.0, gamma = 0.0001, original)


0.6

### BreastCancer Fold Method

In [150]:
for currentFold in range(numberFolds):
    x_train, y_train, x_test, y_test = split(featBC,
        labelBC, currentFold, numberFolds)
    splitData[0, currentFold] = run(x_train, y_train,
        x_test, y_test, NearestCentroid())
    splitData[1, currentFold] = run(x_train, y_train,
        x_test, y_test, KNeighborsClassifier(n_neighbors = 3))
    splitData[2, currentFold] = run(x_train, y_train,
        x_test, y_test, KNeighborsClassifier(n_neighbors = 5))
    splitData[3, currentFold] = run(x_train, y_train,
        x_test, y_test, GaussianNB())
    splitData[4, currentFold] = run(x_train, y_train,
        x_test, y_test, DecisionTreeClassifier())
    splitData[5, currentFold] = run(x_train, y_train,
        x_test, y_test, RandomForestClassifier(n_estimators=5))
    splitData[6, currentFold] = run(x_train, y_train,
        x_test, y_test, RandomForestClassifier(n_estimators=50))
    splitData[7, currentFold] = run(x_train, y_train,
        x_test, y_test, SVC(kernel="linear", C=1.0))

[[-1.33608564  1.99903246 -1.34729164 ... -1.74506282 -0.68877934
  -0.78999777]
 [-0.98760022  1.3800326  -0.98687738 ... -0.8107588   0.82222776
  -0.13719944]
 [ 0.14561642 -0.94238039  0.15656258 ...  0.38209284  0.40484036
   0.04345613]
 ...
 [ 1.41231974  1.62902878  1.52943195 ...  1.05815376 -0.95409536
   0.4479916 ]
 [-0.26052388  2.04091966 -0.2919987  ... -0.5333302  -0.6920149
  -1.08148496]
 [ 0.20809955  0.91229211  0.3472732  ...  0.88457055  0.16055549
   0.16980419]] [0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 0 1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 1 0 0
 0 0 1 0 0 0 0 1 0 0 1 0 1 0 0 1 1 0 1]
[[-0.47353452 -1.50320357 -0.54119942 ... -1.33699001 -1.00424655
  -0.75730243]
 [-0.15259848  0.59348391 -0.19808505 ... -0.80268871 -0.73569497
  -0.75951907]
 [ 0.81873007  0.22580731  0.73034207 ... -0.07622775  0.60544515
  -1.06707685]
 ...
 [ 1.42936059  1.70116786  1.40998037 ...  1.10535621  0.57794288
   0.73449137]
 [ 1.80141919  0.32121706  1.76009707 ...  0.71098734  0.51646722

In [151]:
prettyPrint(splitData, 0, "Nearest");
prettyPrint(splitData, 1, "3-KNeighbors");
prettyPrint(splitData, 2, "7-KNeighbors");
prettyPrint(splitData, 3, "Naive Bayes");
prettyPrint(splitData, 4, "Decision Tree");
prettyPrint(splitData, 5, "Random Forest(5)");
prettyPrint(splitData, 6, "Random Forest(5)");
prettyPrint(splitData, 7, "SVM (linear)");

Nearest            : 
mean:0.9339 +/- std/sqrt(n):0.0116 
fold:0 score:0.9464 
fold:1 score:0.8571 
fold:2 score:0.8929 
fold:3 score:0.9286 
fold:4 score:0.9286 
fold:5 score:0.9286 
fold:6 score:0.9286 
fold:7 score:0.9821 
fold:8 score:0.9821 
fold:9 score:0.9643 
3-KNeighbors       : 
mean:0.9750 +/- std/sqrt(n):0.0052 
fold:0 score:0.9821 
fold:1 score:0.9643 
fold:2 score:0.9464 
fold:3 score:0.9821 
fold:4 score:1.0000 
fold:5 score:0.9821 
fold:6 score:0.9821 
fold:7 score:0.9821 
fold:8 score:0.9821 
fold:9 score:0.9464 
7-KNeighbors       : 
mean:0.9732 +/- std/sqrt(n):0.0058 
fold:0 score:0.9821 
fold:1 score:0.9643 
fold:2 score:0.9286 
fold:3 score:0.9821 
fold:4 score:0.9821 
fold:5 score:1.0000 
fold:6 score:0.9643 
fold:7 score:0.9821 
fold:8 score:0.9821 
fold:9 score:0.9643 
Naive Bayes        : 
mean:0.9411 +/- std/sqrt(n):0.0121 
fold:0 score:0.9286 
fold:1 score:0.8571 
fold:2 score:0.9286 
fold:3 score:0.9643 
fold:4 score:0.8929 
fold:5 score:0.9464 
fold:6 score