# Support Vector Machines
For this module, we need to do some pre-processing of the data, so that we can separate it using a support vector machine

In [1]:
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
iris_attributes, iris_classes = load_iris(return_X_y=True)
import numpy as np

Since support vector machines can only separate two groups of variables, we need to condense 3 variables into 2.  We will do this for every combination of variables, then have each SVM vote on what the classification of the unknown iris.

In [2]:
classes_not_0 = iris_classes != 0
print(classes_not_0)

[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True]


In [3]:
classes_not_1 = iris_classes != 1
print(classes_not_1)

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True]


In [4]:
classes_not_2 = iris_classes != 2
print(classes_not_2)

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False]


In [5]:
attributes_train, attributes_test, classes_train, classes_test = train_test_split(iris_attributes, iris_classes, test_size=0.2, random_state=50)

If all of the random states are the same, then all of these should split the same way.  This is important to verify, because if the split is not done correctly, then the model will make garbage predictions. Note that the test portion of the split is smaller because it improves readability in the notebook

In [9]:
attributes_train_not_0, attributes_test_not_0, classes_train_not_0, classes_test_not_0 = train_test_split(iris_attributes, classes_not_0, test_size=0.075, random_state=50)
attributes_train_not_1, attributes_test_not_1, classes_train_not_1, classes_test_not_1 = train_test_split(iris_attributes, classes_not_1, test_size=0.075, random_state=50)
attributes_train_not_2, attributes_test_not_2, classes_train_not_2, classes_test_not_2 = train_test_split(iris_attributes, classes_not_2, test_size=0.075, random_state=50)

In [10]:
#Verify that everything worked as you expected it to
print("master,", classes_test)
print('Not_0, ', classes_test_not_0)
print('Not_1, ', classes_test_not_1)
print('Not_2, ', classes_test_not_2)

master, [1 1 0 0 2 2 2 0 0 1 0 2 0 2 1 0 1 0 1 1 2 1 0 2 1 2 1 1 1 2]
Not_0,  [ True  True False False  True  True  True False False  True False  True]
Not_1,  [False False  True  True  True  True  True  True  True False  True  True]
Not_2,  [ True  True  True  True False False False  True  True  True  True False]


Now we train three SVM's to tell us what the sample is not

In [11]:
SVMnot_0 = SVC(kernel='linear')
SVMnot_1 = SVC(kernel='linear')
SVMnot_2 = SVC(kernel='linear')

In [12]:
SVMnot_0.fit(attributes_train_not_0, classes_train_not_0)
SVMnot_1.fit(attributes_train_not_1, classes_train_not_1)
SVMnot_2.fit(attributes_train_not_2, classes_train_not_2)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [13]:
#Run some data through the SVM's to test them
outputnot_0 = SVMnot_0.predict(attributes_test_not_0)
outputnot_1 = SVMnot_1.predict(attributes_test_not_1)
outputnot_2 = SVMnot_2.predict(attributes_test_not_2)

In [16]:
#Print out the predictions of the svm's, then the true values
print("not_0")
print("predictions", outputnot_0)
print("true_values", classes_test_not_0)
print("not_1")
print("predictions", outputnot_1)
print("true_values", classes_test_not_1)
print("not_2")
print("predictions", outputnot_2)
print("true_values", classes_test_not_2)

not_0
predictions [ True  True False False  True  True  True False False  True False  True]
true_values [ True  True False False  True  True  True False False  True False  True]
not_1
predictions [ True False  True  True  True  True False  True  True False  True False]
true_values [False False  True  True  True  True  True  True  True False  True  True]
not_2
predictions [ True False  True  True False False False  True  True  True  True False]
true_values [ True  True  True  True False False False  True  True  True  True False]


In [12]:
print(np.sum(np.logical_xor(outputnot_0, classes_test_not_0)))
print(np.sum(np.logical_xor(outputnot_1, classes_test_not_1)))
print(np.sum(np.logical_xor(outputnot_2, classes_test_not_2)))

0
3
1


In [13]:
print(classes_test)

[1 1 0 0 2 2 2 0 0 1 0 2 0 2 1 0 1 0 1 1 2 1 0 2 1 2 1 1 1 2]


In [14]:
#trying out different kernels
SVM_RBF_not_0 = SVC(kernel='precomputed', gamma='scale')
SVMnot_0.fit(attributes_train_not_0, classes_train_not_0)
output_RBF_not_0 = SVMnot_0.predict(attributes_test_not_0)
SVM_RBF_not_1 = SVC(kernel='precomputed', gamma='scale')
SVMnot_1.fit(attributes_train_not_1, classes_train_not_1)
output_RBF_not_1 = SVMnot_1.predict(attributes_test_not_1)
SVM_RBF_not_2 = SVC(kernel='precomputed', gamma='scale')
SVMnot_2.fit(attributes_train_not_2, classes_train_not_2)
output_RBF_not_2 = SVMnot_2.predict(attributes_test_not_2)

In [15]:
print("not_0")
print(output_RBF_not_0)
print(classes_test_not_0)
print("not_1")
print(output_RBF_not_1)
print(classes_test_not_1)
print("not_2")
print(output_RBF_not_2)
print(classes_test_not_2)

not_0
[ True  True False False  True  True  True False False  True False  True]
[ True  True False False  True  True  True False False  True False  True]
not_1
[ True False  True  True  True  True False  True  True False  True False]
[False False  True  True  True  True  True  True  True False  True  True]
not_2
[ True False  True  True False False False  True  True  True  True False]
[ True  True  True  True False False False  True  True  True  True False]


In [16]:
print(np.sum(np.logical_xor(output_RBF_not_0, classes_test_not_0)))
print(np.sum(np.logical_xor(output_RBF_not_1, classes_test_not_1)))
print(np.sum(np.logical_xor(output_RBF_not_2, classes_test_not_2)))

0
3
1
