# Project in Optimization 1



In [None]:
from functions import *
import numpy as np
import matplotlib.pyplot as plt
# from numpy.random import default_rng
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.datasets import load_iris
from scipy.optimize import approx_fprime

### Creating classes 

Here we create data of random points seperated by a decision boundary at $w=[1,1], b=1$. We use this data to test our solution algorithms and functions. In order to test a larger number of points, simply choose $npoints$ to be the number of points you wish. The plot illustrates the points. Since the number of points assigned to a class is random, keep the same seed to repeat the results from the report.

In [None]:
np.random.seed(42)
w = np.array([1.,1.])
b = 1.

npoints = 100
n_A = np.random.randint(0,npoints)
n_B = npoints-n_A
margin = 5.e-1
listA, listB = TestLinear(w,b,n_A,n_B,margin)

x = np.concatenate((np.array(listA),np.array(listB)))

y = np.concatenate((np.ones(n_A), -np.ones(n_B)))

plt.scatter(x[:, 0], x[:, 1], c=y, cmap='coolwarm', edgecolors='k')
plt.title("Data points")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")

plt.show()


### Testing the linear classification

Here we test the linear classification, using gradient descent to find the solution. When choosing C too small, the algorithm wont converge, so we havent run it in the final version.

In [None]:

alpha0 = np.zeros(n_A+n_B)
tau = 0.1
niter = 1000
C = 3
G = pairwise_kernels(x, metric = kernal_linear)  

alpha = gradient_descent(alpha0, G, y, tau0=tau, niter=niter, C=C, tol=1e-6, projection=projection)

w, b = w_b(alpha, y,x ,  C=C)
plot_solution(x, y, w, b)


C = 45
G = pairwise_kernels(x, metric = kernal_linear)  

alpha = gradient_descent(alpha0, G, y, tau0=tau, niter=niter, C=C, tol=1e-6, projection=projection)

w, b = w_b(alpha, y,x ,  C=C)
plot_solution(x, y, w, b)

Here we test the gradient descent with a linesearch, which gives the same resulting boundary but with a shorter runtime, as linesearch finds the solution more optimally. 

In [None]:



alpha0 = np.zeros(n_A+n_B)
tau = 1
niter = 1000
C = 50
G = pairwise_kernels(x, metric = kernal_linear)


alpha = gradient_descent(alpha0, G, y, tau0=tau, niter=niter, C=C, tol=1e-7, projection=projection)
alphagrad, fks = gradient_descent_linesearch(alpha0, G, y, tau0=tau, niter=niter, C=C, tol=1e-7, project=projection)

w, b = w_b(alpha, y,x ,  C=C)
plt.title("Gradient descent without linesearch")
plot_solution(x, y, w, b)

w, b = w_b(alphagrad, y,x ,  C=C)
plt.title("Gradient descent with linesearch")
plot_solution(x, y, w, b)



Using the plot_db function to visualize the decision boundary, this is a bit overkill, and takes longer to compute, but gives the same result. 

In [None]:
#  
plot_db(x, y, alpha, ker = kernal_linear, C=C)
plot_db(x, y, alphagrad, ker = kernal_linear, C=C)


### Testing the different kernels

Here we plot the different kernels we implemented. It uses the test_kernel function, which computes the w function and plots the resulting decision boundary. 

In [None]:

niter = 1000
C = 5
tol = 1e-6

alpha0 = np.zeros(n_A+n_B)
test_kernel(alpha0, x, y, kernal_gaussian, niter=niter, C=C, tau0=tau, tol=tol)
test_kernel(alpha0, x, y, kernal_inv_multiquadratic, niter=niter, C=C, tau0=tau, tol=tol)
test_kernel(alpha0, x, y, kernal_laplacian, niter=niter, C=C, tau0=tau, tol=tol)




### Testing with different intial conditions

To confirm that the initial conditions have little to no effect on the end result we tested how the different kernels get effected by alpha0. The plots didnt get effected, but we observe that the number of iterations to convergence was larger. So the solution becomes the same but the convergence time is longer based on the initial guess, which makes sense.

We didnt explore this any further, but its worth noting so we can justify using $\text{alpha}_0 = (0,0, \dots, 0)$

In [None]:
alpha0s = [
    ("alpha0_random", np.random.rand(n_A + n_B)),
    ("alpha0_random2", np.random.rand(n_A + n_B) * 20),
    ("alpha0_0", np.zeros(n_A + n_B)),
    ("alpha0_1", np.ones(n_A + n_B)),
    
]

kernels= [kernal_linear,kernal_gaussian, kernal_inv_multiquadratic, kernal_laplacian]

for ker in kernels:
    for name, alpha0 in alpha0s:
        print("Kernel: ", ker.__name__, "and Alpha0:" ,name)
        test_kernel(alpha0, x, y, ker, niter=niter, C=C, tau0=tau, tol=tol, plot=False)






### Using gradient descent on the iris dataset

Since data isn't 

In [None]:

iris = load_iris()
X = iris.data[:, :2][50:150]  # We'll use only the first two features for visualization
Y = iris.target[50:150]  
Y[Y==1] = -1  # Convert to -1 and 1 for SVM
Y[Y==2] = 1


plt.figure(figsize=(10, 5))
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Set1, s=50, edgecolors='k')
plt.title('Iris Dataset (First Two Features)')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.show()

alpha0 = np.zeros(len(Y))
ker = kernal_gaussian
G = pairwise_kernels(X, metric=ker)
C= 100

for ker in kernels:
    print("Kernel: ", ker.__name__)
    test_kernel(alpha0, X, Y, ker, niter=niter, C=C, tau0=tau, tol=tol)

# test_kernel(alpha0, X, Y, ker, niter=niter, C=C, tau0=tau, tol=tol)

# alpha, f = gradient_descent_linesearch(alpha0, G, Y, tau0=tau, niter=niter, C=C, L=10, tol=1e-7, project=projection)

# plot_db(X, Y, alpha, ker=ker, C=C)

### Bound constrained lagrangian method

Here we have tested out the bound constrained lagrangian method, we sadly have a numerical error that we were not able to fix.

In [None]:
d = len(x[0])
M = len(x)
epsilon = 1e-7

C = 10

startpunkt = np.ones(d+1+2*M)
lambd_0 = np.ones(M)
mu_0 = 10
tol_1 = 1e-7
tol_2 = 1e-7
maxiter = 1000

AL_par = [lambd_0, mu_0, d, M, x, y, C]
gradAL_par = [lambd_0, mu_0, d, M, x, y, C]
constr_par = [x, y]

lower_bound = np.append([- np.inf]*(d+1), [0]*2*M)
upper_bound = [np.inf]*(d+1+2*M)
project_par = [lower_bound, upper_bound]
                                         
linesearch_par = [lambd_0, mu_0, d, M, x, y, C]

BCLM(startpunkt, lambd_0, mu_0, tol_1, tol_2, maxiter, AL, AL_par, constraints, constr_par, grad_AL, gradAL_par, projection_AL, project_par, linesearch_AL, linesearch_par)