In [1]:
import numpy as np
from numpy import load
from numpy import expand_dims
from numpy import asarray
from numpy import savez_compressed
from keras.models import load_model
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import LabelEncoder
#from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pickle
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC


In [2]:
# load dataset
data = load('/d/project2/src/svm_processed_data.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
print('Dataset: train=%d, test=%d' % (trainX.shape[0], testX.shape[0]))

Dataset: train=1296, test=139


In [3]:
# normalize input vectors
in_encoder = Normalizer(norm='l2')
trainX = in_encoder.transform(trainX)
testX = in_encoder.transform(testX)

In [4]:
trainy = np.argmax(trainy, axis=1)
testy = np.argmax(testy, axis=1)

In [5]:
# label encode targets
out_encoder = LabelEncoder()
out_encoder.fit(trainy)
trainy = out_encoder.transform(trainy)
testy = out_encoder.transform(testy)

In [7]:
classification_models = []
classification_models.append(('Logistic Regression', LogisticRegression(solver="liblinear")))
classification_models.append(('K Nearest Neighbor', KNeighborsClassifier(n_neighbors=5, metric="minkowski",p=2)))
classification_models.append(('Kernel SVM', SVC(kernel = 'rbf',gamma='scale')))
classification_models.append(('Naive Bayes', GaussianNB()))
classification_models.append(('Decision Tree', DecisionTreeClassifier(criterion = "entropy")))
classification_models.append(('Random Forest', RandomForestClassifier(n_estimators=100, criterion="entropy")))

for name, model in classification_models:
  kfold = KFold(n_splits=10)
  result = cross_val_score(model, trainX, trainy, cv=kfold, scoring='accuracy')
  print("%s: Mean Accuracy = %.2f%% - SD Accuracy = %.2f%%" % (name, result.mean()*100, result.std()*100))


Logistic Regression: Mean Accuracy = 71.23% - SD Accuracy = 7.36%
K Nearest Neighbor: Mean Accuracy = 79.23% - SD Accuracy = 10.40%
Kernel SVM: Mean Accuracy = 92.44% - SD Accuracy = 4.99%
Naive Bayes: Mean Accuracy = 77.01% - SD Accuracy = 8.72%
Decision Tree: Mean Accuracy = 72.23% - SD Accuracy = 7.25%
Random Forest: Mean Accuracy = 83.89% - SD Accuracy = 10.00%
