<a href="https://colab.research.google.com/github/datle2403/datle2403/blob/master/pythonProject/SVM/Multi_SVM_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [85]:
import numpy as np
from random import shuffle
np.random.seed(1)
# naive way to calculate loss and grad
def svm_loss_naive(W, X, y, reg):
    d, C = W.shape 
    _, N = X.shape 
    
    ## naive loss and grad
    loss = 0 
    dW = np.zeros_like(W)
    for n in range(N):
        xn = X[:, n]
        score = W.T.dot(xn)
        for j in range(C):
            if j == y[n]:
                continue 
            margin = 1 - score[y[n]] + score[j]
            print(margin)
            if margin > 0:
                loss += margin 
                dW[:, j] += xn 
                dW[:, y[n]] -= xn
    
    loss /= N 
    loss += 0.5*reg*np.sum(W * W) 
    
    dW /= N 
    dW += reg*W
    return loss, dW
    
# random, small data
N, C, d = 10, 3, 5
reg = .1 
W = np.random.randn(d, C)
X = np.random.randn(d, N)
y = np.random.randint(C, size = N)

# sanity check
#print (svm_loss_naive(W, X, y, 0))
#print (svm_loss_naive(W, X, y, .1)[0])

In [84]:
def loss_function(W, X, y, reg):
  total=0
  dW=np.zeros_like(W)
  for x in range(X.shape[1]):
    loss=0
    W_t=W.T
    X_t=X.T[x].reshape(X.shape[0],1)
    score=np.dot(W_t,X_t)
    for z_score in range (W.shape[1]):
      if z_score==y[x]:
        continue
      margin=np.maximum(0,1-score[y[x]]+score[z_score])[0]
      if margin>0:
        loss+=margin
        dW[:,z_score]+=X.T[x]
        dW[:,y[x]]-=X.T[x]
    total+=loss
  total/=N
  total+=(reg/2)*np.sum(W * W)
  dW /= N 
  dW += reg*W
  return total,dW
loss_function(W, X, y, 0)
print (loss_function(W, X, y, .1)[0])

2.732456384363057


In [87]:
W.shape

(5, 3)

In [102]:
def loss_gradient(W, X, y, reg):
  d, C = W.shape #d: data features, C: num of class
  _, N = X.shape # N: num of data_points
  loss = 0 
  dW = np.zeros_like(W)
  Z=np.dot(W.T,X)
  y_v=np.zeros_like(Z.T)
  one=np.full(Z.shape, 1)
  for x in range(Z.shape[1]):
    k=Z[:,x]
    y_val=k[y[x]]
    m=np.full(k.shape, y_val)
    y_v[x]=m
  y_v=y_v.T
  margin=np.maximum(0,one-y_v+Z)
  margin[y, np.arange(margin.shape[1])] = 0
  loss=np.sum(margin,axis=(0,1))
  loss/=N
  loss+=(reg/2)*np.sum(W*W)
  F = (margin > 0).astype(int)
  F[y, np.arange(F.shape[1])] = np.sum(-F, axis = 0)
  dW = X.dot(F.T)/N + reg*W
  return loss, dW
loss_gradient(W, X, y, .1)
    

(2.732456384363057, array([[ 0.16473429, -0.42195177,  0.3056592 ],
        [-0.26799842,  0.07033599, -0.05324754],
        [ 0.20300352,  0.02970681, -0.10244593],
        [ 0.16419905,  0.40894064, -0.65788001],
        [ 0.1097203 , -0.20233699,  0.13534648]]))

In [104]:
def svm_loss_vectorized(W, X, y, reg):
    d, C = W.shape 
    _, N = X.shape 
    loss = 0 
    dW = np.zeros_like(W)
    
    Z = W.T.dot(X)     
    
    correct_class_score = np.choose(y, Z).reshape(N,1).T
    margins = np.maximum(0, Z - correct_class_score + 1) 
    margins[y, np.arange(margins.shape[1])] = 0
    loss = np.sum(margins, axis = (0, 1))
    loss /= N 
    loss += 0.5 * reg * np.sum(W * W)
    
    F = (margins > 0).astype(int)
    F[y, np.arange(F.shape[1])] = np.sum(-F, axis = 0)
    dW = X.dot(F.T)/N + reg*W
    return loss, dW
svm_loss_vectorized(W, X, y, .1)


(2.732456384363057, array([[ 0.16473429, -0.42195177,  0.3056592 ],
        [-0.26799842,  0.07033599, -0.05324754],
        [ 0.20300352,  0.02970681, -0.10244593],
        [ 0.16419905,  0.40894064, -0.65788001],
        [ 0.1097203 , -0.20233699,  0.13534648]]))

In [106]:
def multiclass_svm_GD(X, y, Winit, reg, lr=.1, \
        batch_size = 100, num_iters = 1000, print_every = 100):
    W = Winit 
    loss_history = np.zeros((num_iters))
    for it in range(num_iters):
        # randomly pick a batch of X
        idx = np.random.choice(X.shape[1], batch_size)
        X_batch = X[:, idx]
        y_batch = y[idx]

        loss_history[it], dW = \
            svm_loss_vectorized(W, X_batch, y_batch, reg)

        W -= lr*dW 
        if it % print_every == 1:
            print ('it %d/%d, loss = %f' \
                %(it, num_iters, loss_history[it]))

    return W, loss_history 

N, C, d = 49000, 10, 3073
reg = .1 
W = np.random.randn(d, C)
X = np.random.randn(d, N)
y = np.random.randint(C, size = N)

W, loss_history = multiclass_svm_GD(X, y, W, reg)

it 1/1000, loss = 1750.647495
it 101/1000, loss = 241.986769
it 201/1000, loss = 65.299006
it 301/1000, loss = 41.633488
it 401/1000, loss = 44.655925
it 501/1000, loss = 36.225107
it 601/1000, loss = 40.578468
it 701/1000, loss = 46.293795
it 801/1000, loss = 39.804542
it 901/1000, loss = 44.903964
