In [1]:
import numpy as np
from scipy.optimize import minimize
from cvxopt import matrix, solvers

In [2]:
#q2a

def objective_function(params, y, lamb, K):
    n = len(y)
    
    alpha = params[:n]
    alpha0 = params[n]
    
    linear_combination = K @ alpha + alpha0
    loss = np.log(1 + np.exp(-y * linear_combination)).sum()
    
    regularization = (lamb / 2) * np.dot(alpha.T, K @ alpha)
    return loss + regularization


def adjBinDev(X, y, lamb, kernel_func):
    n, d = X.shape
    K = kernel_func(X, X)
    initial_params = np.zeros(n + 1)
    
    result = minimize(objective_function, initial_params, args=(y, lamb, K))
    
    alpha = result.x[:-1]
    alpha0 = result.x[-1]

    return alpha, alpha0



#  usage
X = np.random.randn(100, 1) 
y = np.random.choice([-1, 1], size=100) 
lamb = 0.1  
kernel_func = lambda X1, X2: np.dot(X1, X2.T)

alpha, alpha_0 = adjBinDev(X, y, lamb,kernel_func)

print("Optimized weights:", alpha)
print("Optimized bias:", alpha_0) 

Optimized weights: [ 1.04581218e-04  1.49123405e-04  1.27003680e-03  1.25226563e-04
 -7.41857540e-04  8.45268027e-04 -2.47937875e-03  2.55484507e-03
 -3.71837010e-05 -5.05092650e-04 -1.94790866e-03  2.46312479e-03
 -1.35317342e-04  3.31283621e-04 -9.19264719e-04 -1.93753261e-03
  7.15133065e-04  4.92864877e-04 -2.46077531e-04 -5.22533540e-04
 -1.21697494e-03  1.55150189e-03 -3.67443973e-04  2.23744140e-03
 -1.14613299e-03  1.10942023e-03 -3.37402607e-04  4.70113445e-04
  6.64126797e-04  3.39925783e-04 -1.63947387e-03 -1.43933676e-03
 -2.52085364e-03 -1.40576855e-03  9.43907279e-04  8.74463215e-04
  1.41855310e-03 -1.01614974e-03  5.73755848e-04  1.35105193e-03
  1.97909016e-03 -9.96424241e-04  3.27680601e-04  6.11965454e-04
  7.75388445e-05 -4.89436372e-04 -1.68994911e-03  3.30983292e-05
  3.74508250e-03 -8.59930481e-04  4.53308893e-04 -1.16565023e-03
 -1.96621573e-05  9.62750458e-04  1.41964641e-03 -3.11630800e-04
  5.59757454e-04 -3.41554000e-04  8.81927825e-04 -1.54213672e-03
 -1.62

In [3]:
#q2b

def minHinge(X, y, lamb):
    n, d = X.shape 

    # P matrix: d weights, 1 bias, n slacks
    P = np.zeros((d + 1 + n, d + 1 + n))
    P[:d, :d] = lamb * np.eye(d) #only regularization terms are non zero
    
    stabilizer = 0.005
    P += stabilizer * np.eye(d + 1 + n) #to ensure stability with cvxopt

    # q vector: zero for weights and bias, 1 for each slack variable
    q = np.hstack([np.zeros(d + 1), np.ones(n)])

    # G matrix: For 2n constraints (hinge loss constraints and non-negativity of slack variables)
    G = np.zeros((2 * n, d + 1 + n))

    #G1  for non negativity of slack variables
    G[:n, d+1:] = -np.eye(n)    # G13 Identity matrix for slack variables in hinge constraints
    
    #G2 (Hinge loss constraints: y_i*(w.T*x_i + b) >= 1 - xi)
    # G21
    G[n:, :d] = np.diag(y) @ X  # y*X
    # G22
    G[n:, d] = y                # y (for bias)
    # G23
    G[n:, d+1:] = -np.eye(n)    # -I (for slacks)
     
    h = np.hstack([np.zeros(n), -np.ones(n)])

    #convert to matrices
    P = matrix(P)
    q = matrix(q)
    G = matrix(G)
    h = matrix(h)

    solvers.options['show_progress'] = False
    solution = solvers.qp(P, q, G, h)

    w = np.array(solution['x'][:d])
    w0 = np.array(solution['x'][d])

    return w, w0

# Example usage
X = np.random.randn(100, 2)  # 100 samples, 2 features each
y = np.random.choice([-1, 1], size=100)  # Binary labels
lamb = 1.0  # Regularization parameter


w, w0 = minHinge(X, y, lamb)

print("Optimized weights:", w.flatten())
print("Optimized bias:", w0)

Optimized weights: [1.01215868e-07 1.24890759e-07]
Optimized bias: 0.9999994348153625


In [4]:
#q2c

def adjClassify(Xtest, a, a0, X, kernel_func)

