In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Instruction: use these hyperparameters for both (b) and (d)
eta = 0.5
C = 5
iterNums = [5, 50, 100, 1000, 5000, 6000]

def svm_train_bgd(matrix, label, nIter):
    # Implement your algorithm and return
    state = {}
    N, D = matrix.shape
    
    ############################
    # Implement your code here #
    w = np.zeros((D,1))
    w = w.reshape(-1,1)
    b = 0
    #print(N, D)
    for j in range(nIter):
        sum_w = np.zeros((D,1))
        sum_b = 0
        for i in range(N):
            #print((label[i]*(w.T.dot(matrix[i,:]) + b)).shape)
            if (label[i]*(w.T.dot(matrix[i,:]) + b)) < 1:
                sum_w += (label[i]*matrix[i, :]).reshape(-1,1)
                sum_b += label[i]
        grad_w = w - C*sum_w
        grad_b = -C*sum_b
        w = w - (eta/(1 + j*eta))*grad_w
        b = b - 0.01*(eta/(1 + j*eta))*grad_b
        #w = w_new
        #b = b_new
    print('\nFor Iter %i' %nIter)
    print('parameter w is:')
    print(w)
    print('parameter b is:')
    print(b)
    state['w'] = w
    state['b'] = b
    ############################
    
    return state

def svm_train_sgd(matrix, label, nIter):
    # Implement your algorithm and return
    state = {}
    N, D = matrix.shape
    w = np.zeros((D,1))
    w = w.reshape(-1,1)
    b = 0
    #print(N, D)
    for j in range(nIter):
        for i in range(N):
            sum_w = np.zeros((D,1))
            sum_b = 0
            #print((label[i]*(w.T.dot(matrix[i,:]) + b)).shape)
            if (label[i]*(w.T.dot(matrix[i,:]) + b)) < 1:
                sum_w = (label[i]*matrix[i, :]).reshape(-1,1)
                sum_b = label[i]
            grad_w = w/N - C*sum_w
            grad_b = -C*sum_b
            w = w - (eta/(1 + j*eta))*grad_w
            b = b - 0.01*(eta/(1 + j*eta))*grad_b
        #w = w_new
        #b = b_new
    print('\nFor Iter %i' %nIter)
    print('parameter w is:')
    print(w)
    print('parameter b is:')
    print(b)
    state['w'] = w
    state['b'] = b
    
    ############################
    # Implement your code here #
    ############################
    
    return state

def svm_test(matrix, state):
    # Classify each test data as +1 or -1
    output = np.ones( (matrix.shape[0], 1) )
    
    ############################
    # Implement your code here #
    w = state['w']
    b = state['b']
    y = w.T.dot(matrix.T) + b
    #print(matrix.shape)
    #print(y.shape)
    y[y>=1] = 1
    y[y<=-1] = -1
    #print(y)
    output = y.reshape(-1,1)
    #print(output)
    
    ############################
    
    return output

def evaluate(output, label, nIter):
    # Use the code below to obtain the accuracy of your algorithm
    accuracy = (label * output > 0).sum() * 1. / len(output)
    print('[Iter {:4d}: accuracy = {:2.4f}%'.format(nIter, 100*accuracy))

In [2]:
# Note1: label is {-1, +1}
# Note2: data matrix shape  = [Ndata, 4]
# Note3: label matrix shape = [Ndata, 1]

# Load data
q4_data = np.load('q4_data/q4_data.npy', allow_pickle=True).item()

train_x = q4_data['q4x_train']
train_y = q4_data['q4y_train']


test_x = q4_data['q4x_test']
test_y = q4_data['q4y_test']

(b) Implement SVM using **batch gradient descent**. Print out the followings:

*   Parameter w
*   Parameter b
*   Test accuracy (%)

In [3]:
for nIter in iterNums:
  # Train
  state = svm_train_bgd(train_x, train_y, nIter)

  # Test and evluate
  prediction = svm_test(test_x, state)
  evaluate(prediction, test_y, nIter)


For Iter 5
parameter w is:
[[112.  ]
 [-42.75]
 [272.5 ]
 [103.  ]]
parameter b is:
[-0.12416667]
[Iter    5: accuracy = 54.1667%

For Iter 50
parameter w is:
[[ -2.01960784]
 [-11.94117647]
 [ 25.85294118]
 [ 11.54901961]]
parameter b is:
[-0.37280358]
[Iter   50: accuracy = 95.8333%

For Iter 100
parameter w is:
[[-2.55940594]
 [-5.28217822]
 [11.37623762]
 [ 5.75742574]]
parameter b is:
[-0.38285]
[Iter  100: accuracy = 95.8333%

For Iter 1000
parameter w is:
[[-0.46353646]
 [-0.32617383]
 [ 1.05394605]
 [ 1.27872128]]
parameter b is:
[-0.40401205]
[Iter 1000: accuracy = 95.8333%

For Iter 5000
parameter w is:
[[-0.32083583]
 [-0.27904419]
 [ 0.89262148]
 [ 0.98660268]]
parameter b is:
[-0.4184513]
[Iter 5000: accuracy = 95.8333%

For Iter 6000
parameter w is:
[[-0.32919513]
 [-0.28186969]
 [ 0.886019  ]
 [ 0.97483753]]
parameter b is:
[-0.4199084]
[Iter 6000: accuracy = 95.8333%


(d) Implement SVM using **stochastic gradient descent**. Print out the followings:

*   Parameter w
*   Parameter b
*   Test accuracy (%)

[Note: use the same hyperparameters as (b)]

[Note: if you implement it correctly, the running time will be ~15 sec]

In [4]:
for nIter in iterNums:
  # Train
  state = svm_train_sgd(train_x, train_y, nIter)

  # Test and evluate
  prediction = svm_test(test_x, state)
  evaluate(prediction, test_y, nIter)


For Iter 5
parameter w is:
[[-1.78136842]
 [-3.12818738]
 [ 8.55400016]
 [ 5.20287663]]
parameter b is:
[-0.05416667]
[Iter    5: accuracy = 95.8333%

For Iter 50
parameter w is:
[[-1.37946899e+00]
 [ 9.07974830e-04]
 [ 2.58689377e+00]
 [ 2.85570760e+00]]
parameter b is:
[-0.08671111]
[Iter   50: accuracy = 95.8333%

For Iter 100
parameter w is:
[[-1.25745166]
 [ 0.11439094]
 [ 1.70851556]
 [ 2.31719145]]
parameter b is:
[-0.09433571]
[Iter  100: accuracy = 95.8333%

For Iter 1000
parameter w is:
[[-0.48895966]
 [-0.18986655]
 [ 0.95735748]
 [ 1.14001054]]
parameter b is:
[-0.12014856]
[Iter 1000: accuracy = 95.8333%

For Iter 5000
parameter w is:
[[-0.42761221]
 [-0.23477963]
 [ 0.88908395]
 [ 1.06544336]]
parameter b is:
[-0.13850557]
[Iter 5000: accuracy = 95.8333%

For Iter 6000
parameter w is:
[[-0.44211714]
 [-0.21435765]
 [ 0.90972215]
 [ 1.06365376]]
parameter b is:
[-0.14003648]
[Iter 6000: accuracy = 95.8333%
