In [None]:
from sklearn.svm import SVC
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
# creating higher complexity clustered data
X_D2, y_D2 = make_blobs(
    n_samples = 100, 
    n_features = 2, 
    # centers are like places where blobs form around
    centers = 8,
    # standard deviation of clusters
    cluster_std = 1.3, 
    random_state = 4
)

# we can see that a simple line will not be able to accurately classify the data
plt.figure()
plt.title("random blob data")
plt.scatter(X_D2[:, 0], X_D2[:, 1], c=y_D2, marker='o', s=50)
plt.grid(True)
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_D2, y_D2, random_state=0)

# The kernalized support vector machines are able to classify high complexity differences in clusters
# by expanding the dimensions of the data. In our dataset here, we have two features to a data point.
# by expanding this data to a higher dimension, we can form a surface that when cut with a plane,
# isolates the data points in circle like figures in 2d space. For example, if we had a cluster of data
# that had one class around the origin and another class surrounding it, when raising the points to
# a higher dimension, and cutting the paraboloid along the x-y plane, we would get an ellipse like
# image when projected onto 2d space

# the default kernal for SVC is the radial basis function (RBF). SVC also has a gamma parameter which
# dictates how much each data point affects the area of classification (high gamma leads to classification
# regions that are very small while low gamma lead to classification regions that are large)
kvm = SVC().fit(X_train, y_train)

print("results for radial basis function")
print("accuracy of kvm (train): {}".format(kvm.score(X_train, y_train)))
print("accuracy of kvm (test): {}".format(kvm.score(X_test, y_test)))

# using polynomial kernel of degree 3
kvm = SVC(kernel='poly', degree=3).fit(X_train, y_train)

print("\nresults for polynomial function")
print("accuracy of kvm (train): {}".format(kvm.score(X_train, y_train)))
print("accuracy of kvm (test): {}".format(kvm.score(X_test, y_test)))
