# Nonlinear SVM Classification

In [1]:
# importing the required libraries
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.svm import LinearSVC

# define model for use later
svm_class_poly = Pipeline([("poly_features",PolynomialFeatures(degree=2)),
                           ("scaler",StandardScaler()),
                           ("svm_svc", LinearSVC(C=10, loss="hinge")),])
# C controls the balance between the width of the street and margin violations
# svm_class_linear = LinearSVC(random_state=0, tol=1e-5)

In [2]:
# loading in digits data
dataset = make_moons()
max_length = len(dataset[0])

In [3]:
# training and validation dataset
# dummy_value = random.random() # generate a random number of between 0.0 and 1.0
# print(round(dummy_value,3))
dummy_value = 0.7
dummy_pos = int(dummy_value*max_length)
diff = max_length - dummy_pos

X_train = []
y_train = []
X_test = []
y_test = []

# training dataset
for i in range(dummy_pos):
    X_train.append(dataset[0][i])
    y_train.append(dataset[1][i])

# validation dataset
for i in range(diff):
    X_test.append(dataset[0][i+dummy_pos])
    y_test.append(dataset[1][i+dummy_pos])


In [7]:
# model training
result1 = svm_class_poly.fit(X_train,y_train)

# model validation
y_predict = result1.predict(X_test)
y_test = np.array(y_test)
y_test = y_test.reshape(len(y_test),-1)
y_predict = np.array(y_predict)
y_predict = y_predict.reshape(len(y_predict),-1)

correct = 0
wrong = 0
for i in range(len(y_test)):
    if y_test[i] == y_predict[i]:
        correct += 1
    else:
        wrong += 1
print('The number of matching ones is ' + str(correct) + '.')
print('The number of non-matching ones is ' + str(wrong) + '.')
accuracy = (correct/(len(y_test))) * 100
print('Average model accuracy is ' + str(round(accuracy,1)) + '%' + '.')

The number of matching ones is 25.
The number of non-matching ones is 5.
Average model accuracy is 83.3%.
