## Computer Exercise \#03: Support Vector Machines



### Instructions ###

Included below is the basic code for this computer exercise that is given in the writeup for your experiments.
You will need to make changes and modifications to perform the experiements and to experiment with other ideas.  

### Common Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC

### Plotting Function ###

In [None]:
#
# Plot the decision function for a classifier clf
#    Optional argument, SV, if set to 1 will plot the support vectors
#    For data sets with many support vectors, it is best to keep SV=0
#
def plot_classifier(X,y,sv=0):   #Default value sv=0: Do not show vectors
    data = pd.DataFrame(X,columns=["x1","x2"])
    sns.scatterplot(x="x1", y="x2", data=data, hue=y, palette=['red','blue'])
    # plot the decision function
    ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    # create grid to evaluate model
    xx = np.linspace(xlim[0], xlim[1], 30)
    yy = np.linspace(ylim[0], ylim[1], 30)
    YY, XX = np.meshgrid(yy, xx)
    xy = np.vstack([XX.ravel(), YY.ravel()]).T
    Z = clf.decision_function(xy).reshape(XX.shape)
    # plot decision boundary and margins
    ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
           linestyles=['--', '-', '--'])
    # plot support vectors
    if(sv==1):
        ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
            linewidth=1, facecolors='none', edgecolors='k')


### Make Dataset ###

In [None]:
from sklearn.datasets import make_classification
#Input the last thee numbers of your GMU G-Number for the random_state
X, y = make_classification(n_features=2, n_samples=200,n_redundant=0, n_informative=2,
                           n_clusters_per_class=2,class_sep=1,random_state=xxx)   

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X, y)

In [None]:
plt.scatter(X_train[:,0],X_train[:,1],marker="o",c=y_train,s=25,edgecolor="k")

### Soft Margin Linear SVM

In [None]:
clf = SVC(kernel='linear')    # Default value of C=1
clf.fit(X_train, y_train)

In [None]:
plot_classifier(X,y,sv=1)

### Nonlinear SVMs

In [None]:
#clf = SVC(kernel='poly')    # polynomial
#clf = SVC(kernel='rbf')     # RBF

## Cancer Data Set ##

In [None]:
from sklearn.datasets import load_breast_cancer
#Load Cancer Data Set
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

In [None]:
print("Feature names:\n{}".format(cancer.feature_names))

### Using only two features ###

Here is a line of code that will create a new data set containing only feature 24 and 28,

In [None]:
X_2f=np.column_stack((X[:,[23]],X[:,[27]]))