# This is the project in Optimization 1

In [None]:
from functions import *

import numpy as np
import matplotlib.pyplot as plt
from numpy.random import default_rng
from sklearn.metrics.pairwise import pairwise_kernels


In [None]:
w = np.array([1.,1.])
b = 1.

n= 100
n_A = np.random.randint(0,n)
n_B = n-n_A
margin = 5.e-1
listA,listB = TestLinear(w,b,n_A,n_B,margin)
[plt.scatter(x[0],x[1],color="r") for x in listA]
[plt.scatter(x[0],x[1],color="b") for x in listB]
plt.show()


In [None]:
x= np.concatenate((np.array(listA),np.array(listB)))

y = np.concatenate((np.ones(n_A), -np.ones(n_B)))


G = pairwise_kernels(x, metric = kernal_gaussian, sigma = 1)

print(np.shape(G))


### Projected gradient descent algorithm

\begin{align*}
    \alpha^{(k+1)} &= \alpha^{(k)}+ d^{(k)}\\
    \text{where}&\\
    d^{(k)} & = \pi _{\Omega} \left(\alpha^{(k)}-\tau_k \nabla f(\alpha^{(k)}) \right) - \alpha^{(k)}
\end{align*}

\begin{align*}
    f(\alpha) &:= \frac{1}{2} \langle\alpha, YGY\alpha\rangle- \langle1_M,\alpha\rangle\\
    \Omega &= \{\alpha \in \mathbb{R}^M : \langle y, \alpha \rangle=0 \quad \text{and} \quad 0\leq \alpha \leq C \}
\end{align*}

is a feasible set, $\pi_{\Omega}$ denotes the projection onto $\Omega$ and $\tau_k >0$ is a suitible steplength.




In [None]:

alpha0 = np.ones(n_A+n_B)
tau = 0.01
niter = 1000
C = 0.8
# alpha = gradient_descent(alpha0, G, y, tau, niter, C=0.8, tol=1e-1)
# print(alpha)



In [None]:
alpha = gradient_descent(alpha0, G, y, tau, niter=100, C=0.8, tol=1e-7)
# print(alpha)

In [None]:
def w_b(alpha, y, x, C=0.8):
    I_s = np.where(alpha > 0)
    w = np.sum(alpha[I_s]*y[I_s]*x[I_s].T, axis=1) 
    temp = np.where(alpha[I_s]<C)
    print()
    index = temp[0][0]
    b = y[index] - np.dot(w,x[index])
    return w, b
    


def plot_solution(x, y, alpha, w, b):

    plt.scatter(x[:,0], x[:,1], c=y)
    plt.plot([-3, 3], [(b - w[0] * (-3)) / w[1], (b - w[0] * 3) / w[1]], 'k-')
    #excact solution
    plt.plot([-3, 3], [(1 - 1 * (-3)), (1 - 1 * 3) ], 'r--')
    plt.show()

    
w, b = w_b(alpha, y, x, C=0.8)

plot_solution(x, y, alpha, w, b)




In [None]:

G = pairwise_kernels(x, metric = kernal_gaussian, sigma=0.5)
alpha = gradient_descent(alpha0, G, y, tau, niter, C=0.8)

print(alpha)
w, b= w_b(alpha, y, x)
plot_solution(x, y, alpha, w, b)

G = pairwise_kernels(x, metric = kernal_inv_multiquadratic, sigma=0.5)
alpha = gradient_descent(alpha0, G, y, tau, niter, C=0.8)

w, b= w_b(alpha, y, x)
plot_solution(x, y, alpha, w, b)


G = pairwise_kernels(x, metric = kernal_laplacian, sigma=0.5)
alpha = gradient_descent(alpha0, G, y, tau, niter, C=0.8)

w, b= w_b(alpha, y, x)
plot_solution(x, y, alpha, w, b)




In [None]:
def gradient_descent_linesearch(alpha0, G, y , tau0, niter, C=100, L = 10, tol = 1e-10):
    alpha = alpha0
    Y = np.diag(y)
    A = np.dot(Y,np.dot(G,Y))
    tau = tau0

    f_ref = np.inf
    f_best = f(alpha, A)
    f_c = f_best
    ell = 0
    f_ks = np.zeros(niter)
    for i in range(niter):


        d_k = projection(alpha - tau*gradientf(alpha, A), y=y, Y=Y, C=C) - alpha

        if np.max(np.abs(d_k)) < tol:
            print("Converged after", i, "iterations")
            return alpha, f_ks
        
        
        f_k = f(alpha, A)
        f_ks[i] = f_k
        if f_k < f_best:
            f_best = f_k
            f_c = f_k
            ell = 0
        else:
            f_c = np.max([f_c, f_k])
            ell = ell + 1
        if ell == L:
            f_ref = f_c
            f_c = f_k
            ell = 0

        if ell!=0:
            print(ell, end=" ")

        if f(alpha + d_k, A) > f_ref:
            dot1 = np.dot(d_k, np.dot(A, d_k))
            dot2 = np.dot(d_k, np.dot(A, alpha))
            dot3 = np.dot(alpha, np.dot(A, d_k))
            dot4 = np.sum(d_k)
            theta = - (0.5*dot2 + 0.5 *dot3 - dot4)/dot1
            print("theta", theta, np.shape(alpha), np.shape(d_k))
            
        else:
            theta = 1

        alpha = alpha + theta * d_k
        
        tau = BB_step_length(alpha-theta*d_k, alpha, gradientf, A, taumax=1e5, taumin=1e-5)


    print("Did not converge after", niter, "iterations")
    
    return alpha, f_ks

In [None]:
alpha0 = np.ones(n_A+n_B)*0.5

alpha_test1, fks = gradient_descent_linesearch(alpha0, G, y, tau, niter=5000, C=0.8, L=10, tol=1e-5)
alpha_test2 = gradient_descent(alpha0, G, y, tau, niter=5000, C=0.8, tol=1e-3)


In [None]:
plt.scatter(range(len(fks[1:-1])), fks[1:-1])
plt.show()
plt.scatter(range(len(alpha0)),alpha0)
plt.scatter( range(len(alpha_test1)),alpha_test1)
plt.show()

In [None]:
np.random.seed(0)
alpha0 = np.random.rand((n_A+n_B))
print("alpha0", alpha0)

In [None]:
alpha0 = np.random.rand((n_A+n_B))

niter = 5000
alpha_line= gradient_descent_linesearch(alpha0, G, y, tau, niter, C=100, L=10, tol=1e-5)
alpha_grad = gradient_descent(alpha0, G, y, tau, niter, C=0.8, tol=1e-3)


In [None]:
plt.scatter(range(len(alpha0)), alpha0)
plt.scatter(range(len(alpha_line[0])), alpha_line[0], label="line search")
plt.scatter(range(len(alpha_test1)),alpha_test1, label="line search test")
plt.legend()
plt.show()
# print(alpha_line)

In [None]:
w, b= w_b(alpha_test1, y, x)
print(w,b)
plot_solution(x, y, alpha_test1, w, b)
w, b= w_b(alpha_line[0], y, x)
print(w,b)

plot_solution(x, y, alpha_line[0], w, b)


In [None]:
plt.scatter(range(len(fks[1:-1])), fks[1:-1], label="line search test")
plt.scatter(range(len(alpha_line[1])), alpha_line[1], label="line search")
plt.legend()
plt.show()