# This is the project in Optimization 1

In [None]:
from functions import *

import numpy as np
import matplotlib.pyplot as plt
from numpy.random import default_rng
from sklearn.metrics.pairwise import pairwise_kernels


In [None]:
w = np.array([1.,1.])
b = 1.

n= 10
n_A = np.random.randint(0,n)
n_B = n-n_A
margin = 5.e-1
listA,listB = TestLinear(w,b,n_A,n_B,margin)
[plt.scatter(x[0],x[1],color="r") for x in listA]
[plt.scatter(x[0],x[1],color="b") for x in listB]
plt.show()


In [None]:
x= np.concatenate((np.array(listA),np.array(listB)))

y = np.concatenate((np.ones(n_A), -np.ones(n_B)))


G = pairwise_kernels(x, metric = kernal_gaussian, sigma = 1)

print(np.shape(G))


### Projected gradient descent algorithm

\begin{align*}
    \alpha^{(k+1)} &= \alpha^{(k)}+ d^{(k)}\\
    \text{where}&\\
    d^{(k)} & = \pi _{\Omega} \left(\alpha^{(k)}-\tau_k \nabla f(\alpha^{(k)}) \right) - \alpha^{(k)}
\end{align*}

\begin{align*}
    f(\alpha) &:= \frac{1}{2} \langle\alpha, YGY\alpha\rangle- \langle1_M,\alpha\rangle\\
    \Omega &= \{\alpha \in \mathbb{R}^M : \langle y, \alpha \rangle=0 \quad \text{and} \quad 0\leq \alpha \leq C \}
\end{align*}

is a feasible set, $\pi_{\Omega}$ denotes the projection onto $\Omega$ and $\tau_k >0$ is a suitible steplength.




In [None]:
def gradient_descent(alpha0, G, y , tau0, niter, C=100):
    alpha = alpha0
    Y = np.diag(y)
    A = np.dot(Y,np.dot(G,Y))
    tau = tau0

    for i in range(niter):
        
        d_k = projection(alpha - tau*gradientf(alpha, A), y=y, Y=Y, C=C) - alpha
        alpha = alpha + d_k 

        tau = BB_step_length(alpha-d_k, alpha, gradientf, A, taumax=1e5, taumin=1e-5)


    return alpha

def BB_step_length(ak, ak1, grad_f, A, taumax=1e5, taumin=1e-5):
    '''
    Determine the Barzilai-Borwein step length for the projected gradient descent
    algorithm.

    s^k = a ^{k+1} - a^k
    z^k = grad_f(a^{k+1}) - grad_f(a^k)
    '''
    nevner =np.dot(ak1 - ak, grad_f(ak1, A) - grad_f(ak, A))
    if  nevner<= 0:
        return taumax
    
    tau = np.dot(ak1 - ak, ak1 - ak) / nevner
    return min(max(tau, taumax), taumin)
    

alpha0 = np.ones(n_A+n_B)
tau = 0.01
niter = 1000
C = 0.8
alpha = gradient_descent(alpha0, G, y, tau, niter, C=0.8)
print(alpha)



In [None]:
def w_b(alpha, y, x, C=0.8):
    I_s = np.where(alpha > 0)
    w = np.sum(alpha[I_s]*y[I_s]*x[I_s].T, axis=1) 
    temp = np.where(alpha[I_s]<C)
    print()
    index = temp[0][0]
    b = y[index] - np.dot(w,x[index])
    return w, b
    
def plot_solution(x, y, alpha, w, b):
    plt.scatter(x[:,0], x[:,1], c=y)
    plt.plot([-3, 3], [(b - w[0] * (-3)) / w[1], (b - w[0] * 3) / w[1]], 'k-')
    plt.show()

    
w, b= w_b(alpha, y, x, C=0.8)

plot_solution(x, y, alpha, w, b)




In [None]:



G = pairwise_kernels(x, metric = kernal_gaussian, sigma=0.5)
alpha = gradient_descent(alpha0, G, y, tau, niter, C=0.8)

print(alpha)
w, b= w_b(alpha, y, x)
plot_solution(x, y, alpha, w, b)

G = pairwise_kernels(x, metric = kernal_inv_multiquadratic, sigma=0.5)
alpha = gradient_descent(alpha0, G, y, tau, niter, C=0.8)

w, b= w_b(alpha, y, x)
plot_solution(x, y, alpha, w, b)


G = pairwise_kernels(x, metric = kernal_laplacian, sigma=0.5)
alpha = gradient_descent(alpha0, G, y, tau, niter, C=0.8)

w, b= w_b(alpha, y, x)
plot_solution(x, y, alpha, w, b)


