In [1]:
import pandas as pd
import numpy as np
import cvxpy as cp
from cvxpy.atoms.affine.wraps import psd_wrap
from read_data import *
from sklearn.model_selection import train_test_split
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%%       MGT - 418         %%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%      Convex Optimization - Project 2          %%%%%%%%%%%%%%
#%%%%%%%%%%%%%%             2021-2022 Fall                    %%%%%%%%%%%%%%
#%%%%%%%%%%%%%%      Learning the Kernel Function             %%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

In [246]:
def kernel_learning(K1, K2, K3, y_tr, rho):
    """
    Kernel learning for soft margin SVM. 
    Implementation of problem (5)
    Use cvxpy.atoms.affine.psd_wrap for each G(\hat K^l) matrix when it appear in the constraints and in the objective
    """
        
    lambda_ = cp.Variable(len(y_tr))
    z = cp.Variable(1)
    
    c = np.trace(K1+K2+K3)
    
    cons = []
    K = [K1,K2,K3]
    for k_i in K : 
        cons.append(z * np.trace(k_i) >= 1/ (2 * rho) * cp.quad_form(lambda_, psd_wrap(np.diag(y_tr) @ k_i @ np.diag(y_tr))))
    cons.append(lambda_<= 1)
    cons.append(lambda_>=0)
    cons.append(lambda_.T @ y_tr == 0)
    
    obj = cp.Maximize(cp.vstack([lambda_.T]) @ np.ones(len(y_tr)) - c*z)
    
    
    prob = cp.Problem(obj, cons)
    prob.solve(solver=cp.MOSEK)

    
    mu_opt1 = cons[0].dual_value
    mu_opt2 = cons[1].dual_value
    mu_opt3 = cons[2].dual_value

    
    b_opt = cons[5].dual_value
    return mu_opt1, mu_opt2, mu_opt3, lambda_.value, b_opt

In [139]:
def svm_fit(kernel, y_tr, rho):
    """
    Dual of soft-margin SVM problem (2)
    Use cvxpy.atoms.affine.psd_wrap for each G(\hat K^l) matrix when it appear in the constraints and in the objective
    """
    n_tr = len(y_tr)
    G =  np.diag(y_tr) @ kernel @ np.diag(y_tr)
    lambda_ = cp.Variable(n_tr)
    dual_obj = cp.Maximize(cp.sum(cp.vstack([lambda_])) - (1/2*rho)* cp.quad_form(lambda_, psd_wrap(G)))
    cons = []
    cons.append(lambda_.T @ y_tr == 0)
    cons.append(lambda_<= 1)
    cons.append(lambda_>=0)
    
    prob = cp.Problem(dual_obj, cons)
    prob.solve(solver=cp.MOSEK)
    lambda_opt = lambda_.value
    b_opt =  cons[0].dual_value
    return lambda_opt, b_opt

In [259]:
def svm_predict(kernel, y_tr, y_te, lambda_opt, b_opt, rho):
    """
    Predict function for kernel SVM. 
    See lecture slide 183.
    """
    n_te = len(y_te)
    n_tr = len(y_tr)
    good = 0
    
    for i in range(n_te): 
        tot = 0
        for j in range(n_tr): 
            tot = tot + lambda_opt[j]*y_tr[j]*kernel[i,j] + b_opt
        #print("/")
        #tot = 1
        if int(np.sign((1/rho)*tot)) == y_te[i] : 
            
            good = good + 1
  
    acc = good/n_te
    return acc


In [189]:
#4.a
data, label = prepare_ionosphere_dataset()

msk = np.random.rand(a[0].shape[0]) <= 0.8
X_train = data[msk]
X_test = data[~msk]
y_train = label[msk]
y_test = label[~msk]

In [None]:
#4.b 

def k_1(x,y, arg): 
    k_1 = (1.0 + np.dot(x.T,y))**int(arg)
    return k_1

def k_2(x,y,arg) : 
    k_2 = np.exp(-np.dot((x-y).T,(x-y))/2*0.5)
    return k_2

def k_3(x,y,arg): 
    k_3 = np.dot(x.T,y)
    return k_3

def K_creator(X_train, k_func,arg): 
    K = np.zeros((X_train.shape[0],X_train.shape[0]))
    for i in range(X_train.shape[0]) : 
        for j in range(X_train.shape[0]): 
            K[i,j] = k_func(X_train[i,:],X_train[j,:],arg)
    return K

K_func = [k_1,k_2, k_3]
args = [2.0,0.5,None]
K = [None]*3
for i in range(3):
    K[i] = K_creator(X_train,K_func[i],args[i])

In [248]:
mu_opt1, mu_opt2, mu_opt3, lambda_opt, b_opt = kernel_learning(K[0],K[1],K[2],y_train,2)

print("mu opt1 : " + str(mu_opt1[0]))
print( "mu opt2 : " + str(mu_opt2[0]))
print( "mu opt3 : " + str(mu_opt3[0]))


mu opt1 : 1.0411825399208081e-07
mu opt2 : 268.1433539203483
mu opt3 : 2.098807295265703e-06


In [261]:
#4.c
kernel = mu_opt1*K[0] + mu_opt2*K[1]+ mu_opt3*K[2]

accuracy = svm_predict(kernel, y_train,y_test,lambda_opt,b_opt, 2)

print("The accuracy is : " + str(accuracy))

The accuracy is : 0.42105263157894735


In [263]:
#5 and 6
acc_opt_kernel = []    
acc_poly_kernel = []    
acc_gauss_kernel = []    
acc_linear_kernel = []    
rho = 0.01
data, labels = prepare_ionosphere_dataset()
for iters in range(100): 
    ## Please do not change the random seed.
    np.random.seed(iters)
    ### Training-test split
    msk = np.random.rand(data.shape[0]) <= 0.2
    x_tr = data[msk]
    x_te = data[~msk]
    y_tr = labels[msk]
    y_te = labels[~msk]
 
    n_tr = y_tr.shape[0]
    n_te = y_te.shape[0]
    n_tr = x_tr.shape[0]
    n_te = x_te.shape[0]
    
    x_all = np.vstack([x_tr, x_te])
    n_all = x_all.shape[0]

    ## Prepare the initial choice of kernels 
    # It is recommended to prepare the kernels for all the training and the test data
    # Then, the kernel size will be (n_tr + n_te)x(n_tr + n_te).
    # Use only the training block (like K1[0:n_tr, 0:n_tr] ) to learn the classifier 
    # (for the functions svm_fit and kernel_learning).
    # When predicting you may use the whole kernel as it is. 
    K1 = K_creator(x_all,k_1,2)
    K2 = K_creator(x_all,k_2,0.5)
    K3 = K_creator(x_all,k_3,None)

    mu_opt1, mu_opt2, mu_opt3, lambda_opt, b_opt = kernel_learning(K1[0:n_tr, 0:n_tr],
                                                                   K2[0:n_tr, 0:n_tr],
                                                                   K3[0:n_tr, 0:n_tr],
                                                                   y_tr,2)
    opt_kernel = mu_opt1*K1 + mu_opt2*K2 + mu_opt3*K3
    acc_opt_kernel.append(svm_predict(opt_kernel,y_tr,y_te,lambda_opt,b_opt,2))
    
    lambda_opt, b_opt = svm_fit(K1[0:n_tr, 0:n_tr],y_tr,2)
    acc_poly_kernel.append(svm_predict(K1,y_tr,y_te,lambda_opt,b_opt,2))
    
    lambda_opt, b_opt = svm_fit(K2[0:n_tr, 0:n_tr],y_tr,2)
    acc_gauss_kernel.append(svm_predict(K2,y_tr,y_te,lambda_opt,b_opt,2))
    
    lambda_opt, b_opt = svm_fit(K3[0:n_tr, 0:n_tr],y_tr,2)
    acc_linear_kernel.append(svm_predict(K3,y_tr,y_te,lambda_opt,b_opt,2))
    print('Iteration-->' + str(iters))
print('Average dual accuracy with optimal kernel is ' + str(np.mean(acc_opt_kernel)))
print('Average dual accuracy with polynomial kernel is ' + str(np.mean(acc_poly_kernel)))
print('Average dual accuracy with gaussian kernel is ' + str(np.mean(acc_gauss_kernel)))
print('Average dual accuracy with linear kernel is ' + str(np.mean(acc_linear_kernel)))

Iteration-->0
Iteration-->1
Iteration-->2
Iteration-->3
Iteration-->4
Iteration-->5
Iteration-->6
Iteration-->7
Iteration-->8
Iteration-->9
Iteration-->10
Iteration-->11
Iteration-->12
Iteration-->13
Iteration-->14
Iteration-->15
Iteration-->16
Iteration-->17
Iteration-->18
Iteration-->19
Iteration-->20
Iteration-->21
Iteration-->22
Iteration-->23
Iteration-->24
Iteration-->25
Iteration-->26
Iteration-->27
Iteration-->28
Iteration-->29
Iteration-->30
Iteration-->31
Iteration-->32
Iteration-->33
Iteration-->34
Iteration-->35
Iteration-->36
Iteration-->37
Iteration-->38
Iteration-->39
Iteration-->40
Iteration-->41
Iteration-->42
Iteration-->43
Iteration-->44
Iteration-->45
Iteration-->46
Iteration-->47
Iteration-->48
Iteration-->49
Iteration-->50
Iteration-->51
Iteration-->52
Iteration-->53
Iteration-->54
Iteration-->55
Iteration-->56
Iteration-->57
Iteration-->58
Iteration-->59
Iteration-->60
Iteration-->61
Iteration-->62
Iteration-->63
Iteration-->64
Iteration-->65
Iteration-->66
Itera