# Homework2 Support Vector Machine
------


This is the README session for:
* Metric Number: `A0148008J`
* Email: `e0012680@u.nus.edu`

## Optional Bonus - US Postal Service Zip Code dataset
------

In [44]:
#%matplotlib notebook
import numpy as np
import os
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from IPython.display import display, Image
from matplotlib import cm


In [13]:
#Set up details
current_pwd = os.path.join(os.getcwd(),'homework/homework2')

train_data_path = os.path.join(current_pwd, 'hw2-1-train.txt')
test_data_path = os.path.join(current_pwd, 'hw2-1-test.txt')

In [14]:
# Load DataSet
setTrain = np.loadtxt(train_data_path)
setTest = np.loadtxt(test_data_path)
# split the data sets
labelTrain, dataTrain = setTrain[:,0], setTrain[:,1:3]
labelTest, dataTest = setTest[:,0], setTest[:,1:3]

In [19]:
dataTrain[0]

array([ 0.2479082, -4.1050625])

##### Self-Defined Functions
-----

In [6]:
def svmTrain(dataTrain, labelTrain, cost, kernel, gamma, degree):
    model = SVC(C=cost, kernel=kernel, gamma=gamma, degree=degree)
    model.fit(dataTrain, labelTrain)
    return model, model.support_.shape[0]

In [7]:
def svmPredict(data, label, svmModel):
    predict = svmModel.predict(data)
    N = predict.shape[0]
    return 1 - np.sum(predict!=label)/(N*1.0)

In [8]:
def testAccuracy(dataTrain, labelTrain, dataTest, labelTest, cost, kernel, gamma, degree):
    model, totalSV = svmTrain(dataTrain, labelTrain, cost, kernel, gamma, degree)
    return svmPredict(dataTest, labelTest, model), model, totalSV

In [9]:
def printToVariousCostRBF(dataTrain, labelTrain, dataTest, labelTest, gamma, degree):
    # Cost has the following values: C ∈ {0.01, 1, 10^2, 10^4, 10^6}
    # RBF kernel for model fitting
    # The rest parameter chosen to be the default values
    kernel = 'rbf'
    for cost in [0.01, 1, 100, 10000, 1000000]:
        out_err, model, totalSV = testAccuracy(dataTrain, labelTrain, dataTest, labelTest, cost, 
                                           kernel, gamma, degree)
        in_err = svmPredict(dataTrain, labelTrain, model)
        print("Cost: "+ str(cost) +"  in_error: "
              + str(1-in_err)+"  out_error: "+ str(1-out_err)
              +"  total SV: "+ str(totalSV))

In [10]:
printToVariousCostRBF(dataTrain, labelTrain, dataTest, labelTest, 'auto', 2)

Cost: 0.01  in_error: 0.00384368994234  out_error: 0.0212264150943  total SV: 347
Cost: 1  in_error: 0.00448430493274  out_error: 0.0212264150943  total SV: 31
Cost: 100  in_error: 0.00320307495195  out_error: 0.0188679245283  total SV: 20
Cost: 10000  in_error: 0.00256245996156  out_error: 0.0188679245283  total SV: 17
Cost: 1000000  in_error: 0.00128122998078  out_error: 0.0212264150943  total SV: 18


In [None]:
def plotDecisionBoundaryRBFSVM(X, y, kernel, gamma = 'auto', degree = 2):
    for cost in [0.01, 1, 100, 10000, 1000000]:
        model = svmTrain(X, y, cost=cost, kernel=kernel, degree = degree, gamma=gamma)[0]
        
        fig = plt.figure()
        for i in range(X[:,0].size):
            if y[i] > 0.0:
                plt.scatter(X[i,0],X[i,1], c= 'red', marker='+')
            else:
                plt.scatter(X[i,0],X[i,1], c= 'blue', marker='o')
                
        # step size in the mesh
        h = 0.02  
        
        # create a mesh to plot in
        x_min, x_max = X[:, 0].min(), X[:, 0].max()
        y_min, y_max = X[:, 1].min(), X[:, 1].max()
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))
        Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])

        # Put the result into a color plot
        Z = Z.reshape(xx.shape)
        plt.pcolormesh(xx, yy, Z>0, cmap=plt.cm.Paired)
        plt.contour(xx, yy, Z, colors=['k', 'k', 'k'],
                linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
    
        plt.title("Cost = " + str(cost))
        
        plt.show()

In [48]:
def make_meshgrid(x, y, h=.02):
    """Create a mesh of points to plot in

    Parameters
    ----------
    x: data to base x-axis meshgrid on
    y: data to base y-axis meshgrid on
    h: stepsize for meshgrid, optional

    Returns
    -------
    xx, yy : ndarray
    """
    x_min, x_max = x.min() - 1, x.max() + 1
    y_min, y_max = y.min() - 1, y.max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    return xx, yy

In [49]:
def plot_contours(ax, clf, xx, yy, **params):
    """Plot the decision boundaries for a classifier.

    Parameters
    ----------
    ax: matplotlib axes object
    clf: a classifier
    xx: meshgrid ndarray
    yy: meshgrid ndarray
    params: dictionary of params to pass to contourf, optional
    """
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    out = ax.contourf(xx, yy, Z, **params)
    return out

In [None]:
def plot(X, y, kernel, gamma = 'auto', degree = 2):
    for cost in [0.01, 1, 100, 10000, 1000000]:
        model = svmTrain(X, y, cost=cost, kernel=kernel, degree = degree, gamma=gamma)[0]
        plot_contours(ax, model, xx, yy,
                      cmap=plt.cm.coolwarm, alpha=0.8)
        ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xlabel('Sepal length')
        ax.set_ylabel('Sepal width')
        ax.set_xticks(())
        ax.set_yticks(())
        ax.set_title(title)

    plt.show()

In [51]:
plotDecisionBoundaryRBFSVM(dataTrain, labelTrain, kernel = "rbf", gamma = 'auto', degree = 2)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [46]:
# Set up parameters
X = dataTrain
y = labelTrain
kernel = "rbf"
degree = 2
gamma = 'auto'


## Statement of Individual Work

Please initial (between the square brackets) one of the following statements.

[Dong Shaocong] I, <*A0148008J*>, certify that I have followed the CS 3244 Machine Learning class guidelines for homework assignments.  In particular, I expressly vow that I have followed the Facebook rule in discussing with others in doing the assignment and did not take notes (digital or printed) from the discussions.  
