# Shogun Vs Sklearn : Classification

In [1]:
%matplotlib inline
from modshogun import *
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
import time

# Datasets

In [34]:
generation1 = datasets.make_classification(n_samples=700, n_features=15,n_classes=2,n_informative=5)
generation2 = datasets.make_classification(n_samples=5000, n_features=20,n_classes=5,n_informative=5)
generation3 = datasets.make_classification(n_samples=20000, n_features=50,n_classes=10,n_informative=5)

feats1 = generation1[0]
labels1 = generation1[1]

feats2 = generation2[0]
labels2 = generation2[1]

feats3 = generation3[0]
labels3 = generation3[1]

feats1_train = feats1[:600]
feats1_test = feats1[600:]
labels1_train = labels1[:600]
labels1_test = labels1[600:]

feats2_train = feats2[:4500]
feats2_test = feats2[4500:]
labels2_train = labels2[:4500]
labels2_test = labels2[4500:]

feats3_train = feats3[:19000]
feats3_test = feats3[19000:]
labels3_train = labels3[:19000]
labels3_test = labels3[19000:]

feats1_train.reshape(len(feats1_train[0,:]),len(feats1_train[:,0]))
shogun_feats1_train = RealFeatures(feats1_train.reshape(len(feats1_train[0,:]),len(feats1_train[:,0])))
shogun_feats1_test = RealFeatures(feats1_test.reshape(len(feats1_test[0,:]),len(feats1_test[:,0])))
shogun_labels1_train = MulticlassLabels(labels1_train*1.0)
shogun_labels1_test = MulticlassLabels(labels1_test*1.0)

shogun_feats2_train = RealFeatures(feats2_train.reshape(len(feats2_train[0,:]),len(feats2_train[:,0])))
shogun_feats2_test = RealFeatures(feats2_test.reshape(len(feats2_test[0,:]),len(feats2_test[:,0])))
shogun_labels2_train = MulticlassLabels(labels2_train*1.0)
shogun_labels2_test = MulticlassLabels(labels2_test*1.0)

shogun_feats3_train = RealFeatures(feats3_train.reshape(len(feats3_train[0,:]),len(feats3_train[:,0])))
shogun_feats3_test = RealFeatures(feats3_test.reshape(len(feats3_test[0,:]),len(feats3_test[:,0])))
shogun_labels3_train = MulticlassLabels(labels3_train*1.0)
shogun_labels3_test = MulticlassLabels(labels3_test*1.0)

# KNN

### dataset 1 , size 600 , Dimensions 15 , classes 2

In [35]:
number_of_neighbors = 10

start = time.time()
distances1 = EuclideanDistance(shogun_feats1_train, shogun_feats1_train)
shogun_knn1 = KNN(number_of_neighbors,distances1,shogun_labels1_train)
shogun_knn1.train()
end = time.time()

print np.sum((labels1_test == shogun_knn1.apply(shogun_feats1_test).get_labels()))/100.0
print end-start

from sklearn.neighbors import KNeighborsClassifier

start = time.time()
sklearn_knn1 =KNeighborsClassifier(n_neighbors=number_of_neighbors)
sklearn_knn1.fit(feats1_train,labels1_train)
end = time.time()
print np.sum(sklearn_knn1.predict(feats1_test) == labels1_test)/100.0
print end-start

0.55
0.00019907951355
0.84
0.000859022140503


In [36]:
number_of_neighbors = 10

start = time.time()
distances2 = EuclideanDistance(shogun_feats2_train, shogun_feats2_train)
shogun_knn2 = KNN(number_of_neighbors,distances2,shogun_labels2_train)
shogun_knn2.train()
end = time.time()
print sum(labels2_test == shogun_knn2.apply(shogun_feats2_test).get_labels())/500.0
print end-start

from sklearn.neighbors import KNeighborsClassifier

start = time.time()
sklearn_knn2 =KNeighborsClassifier(n_neighbors=number_of_neighbors)
sklearn_knn2.fit(feats2_train,labels2_train)
end = time.time()
print sum(sklearn_knn2.predict(feats2_test) == labels2_test)/500.0
print end-start

0.198
0.000261068344116
0.672
0.00358915328979


In [37]:
number_of_neighbors = 10

distances3 = EuclideanDistance(shogun_feats3_train, shogun_feats3_train)
shogun_knn3 = KNN(number_of_neighbors,distances3,shogun_labels3_train)
shogun_knn3.train()
print sum(labels3_test == shogun_knn3.apply(shogun_feats3_test).get_labels())/1000.0

from sklearn.neighbors import KNeighborsClassifier

sklearn_knn3 =KNeighborsClassifier(n_neighbors=number_of_neighbors)
sklearn_knn3.fit(feats3_train,labels3_train)
print sum(sklearn_knn3.predict(feats3_test) == labels3_test)/1000.0

0.327
0.764


### comments on KNN

1- very low accuracy !!!

2-many options at sklearn

# Naive Bayes

In [38]:
shogun_naive1 = GaussianNaiveBayes()
shogun_naive1.set_features(shogun_feats1_train)
shogun_naive1.set_labels(shogun_labels1_train)
shogun_naive1.train()
print np.sum((labels1_test == shogun_naive1.apply(shogun_feats1_test).get_labels()))/100.0

from sklearn.naive_bayes import GaussianNB
sklearn_naive1 = GaussianNB()
sklearn_naive1.fit(feats1_train, labels1_train)

print np.sum(labels1_test == sklearn_naive1.predict(feats1_test))/100.0

0.39
0.64


In [39]:
shogun_naive2 = GaussianNaiveBayes()
shogun_naive2.set_features(shogun_feats2_train)
shogun_naive2.set_labels(shogun_labels2_train)
shogun_naive2.train()
print np.sum((labels2_test == shogun_naive2.apply(shogun_feats2_test).get_labels()))/500.0

from sklearn.naive_bayes import GaussianNB
sklearn_naive2 = GaussianNB()
sklearn_naive2.fit(feats2_train, labels2_train)

print np.sum(labels2_test == sklearn_naive2.predict(feats2_test))/500.0

0.196
0.552


In [40]:
shogun_naive3 = GaussianNaiveBayes()
shogun_naive3.set_features(shogun_feats3_train)
shogun_naive3.set_labels(shogun_labels3_train)
shogun_naive3.train()
print np.sum((labels3_test == shogun_naive3.apply(shogun_feats3_test).get_labels()))/1000.0

from sklearn.naive_bayes import GaussianNB
sklearn_naive3 = GaussianNB()
sklearn_naive3.fit(feats3_train, labels3_train)

print np.sum(labels3_test == sklearn_naive3.predict(feats3_test))/1000.0

0.348
0.649


### comments on Naive Bayes

lower accuracy

# QDA

In [41]:
shogun_qda1 = QDA(shogun_feats1_train, shogun_labels1_train)
shogun_qda1.train()
print np.sum((labels1_test == shogun_qda1.apply(shogun_feats1_test).get_labels()))/100.0

from sklearn import qda
sklearn_qda1 = qda.QDA()
sklearn_qda1.fit(feats1_train,labels1_train)
print np.sum(labels1_test == sklearn_qda1.predict(feats1_test))/100.0

0.51
0.81


In [42]:
shogun_qda2 = QDA(shogun_feats2_train, shogun_labels2_train)
shogun_qda2.train()
print np.sum((labels2_test == shogun_qda2.apply(shogun_feats2_test).get_labels()))/500.0

sklearn_qda2 = qda.QDA()
sklearn_qda2.fit(feats2_train,labels2_train)
print np.sum(labels2_test == sklearn_qda2.predict(feats2_test))/500.0

0.188
0.57


In [43]:
shogun_qda3 = QDA(shogun_feats3_train, shogun_labels3_train)
shogun_qda3.train()
print np.sum((labels3_test == shogun_qda3.apply(shogun_feats3_test).get_labels()))/1000.0

sklearn_qda3 = qda.QDA()
sklearn_qda3.fit(feats3_train,labels3_train)
print np.sum(labels3_test == sklearn_qda3.predict(feats3_test))/1000.0

0.332
0.723


# Multiclass Logestic Regression

In [52]:
shogun_log1 = MulticlassLogisticRegression(1,shogun_feats1_train,shogun_labels1_train)
shogun_log1.train()

print np.sum((labels1_test == shogun_log1.apply(shogun_feats1_test).get_labels()))/100.0

from sklearn import linear_model
sklearn_log1 = linear_model.LogisticRegression()
sklearn_log1.fit(feats1_train,labels1_train)
print np.sum(labels1_test == sklearn_qda1.predict(feats1_test))/100.0

0.47
0.81


In [57]:
shogun_log2 = MulticlassLogisticRegression(1,shogun_feats2_train,shogun_labels2_train)
shogun_log2.train()

print np.sum((labels2_test == shogun_log2.apply(shogun_feats2_test).get_labels()))/500.0

from sklearn import linear_model
sklearn_log2 = linear_model.LogisticRegression()
sklearn_log2.fit(feats2_train,labels2_train)
print np.sum(labels2_test == sklearn_qda2.predict(feats2_test))/500.0

0.208
0.57


In [58]:
shogun_log3 = MulticlassLogisticRegression(1,shogun_feats3_train,shogun_labels3_train)
shogun_log3.train()

print np.sum((labels3_test == shogun_log3.apply(shogun_feats3_test).get_labels()))/1000.0

from sklearn import linear_model
sklearn_log3 = linear_model.LogisticRegression()
sklearn_log3.fit(feats3_train,labels3_train)
print np.sum(labels3_test == sklearn_qda3.predict(feats3_test))/1000.0

0.344
0.723
