## IMPORT

In [698]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import scipy
from scipy.optimize import fmin_l_bfgs_b 
from cvxopt import matrix, solvers
import pickle as pkl
from scipy import optimize
from scipy.linalg import cho_factor, cho_solve

In [788]:
Xtr = np.array(pd.read_csv('data_image/Xtr.csv',header=None,sep=',',usecols=range(3072))) 
Xte = np.array(pd.read_csv('data_image/Xte.csv',header=None,sep=',',usecols=range(3072))) 
Ytr = np.array(pd.read_csv('data_image/Ytr.csv',sep=',',usecols=[1])).squeeze() 

In [791]:
Xte.shape

(2000, 3072)

## VIZUALISATION

Let's visualize data :

In [700]:
print(2)

2


In [None]:
def plot_images_grid(data, nrows, ncols):
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*2, nrows*2))
    random=np.random.choice(data.shape[0],size=nrows*ncols)
    #data=(data-np.min(data))/(np.max(data)-np.min(data))
    for j, ax in enumerate(axes.flat):
        i=random[j]
        if i < data.shape[0]:
            image_data = data[i, :]
            
            # Normaliser les données dans l'intervalle [0, 1]
            # min_val = image_data.min()
            # max_val = image_data.max()
            # print(min_val,max_val)
            # image_data = (image_data - min_val) / (max_val - min_val)
            # print(image_data.max())
            
            red_channel = image_data[:1024].reshape((32, 32))
            red_channel=(red_channel-red_channel.min())/(red_channel.max()-red_channel.min())
            green_channel = image_data[1024:2048].reshape((32, 32))
            green_channel=(green_channel-green_channel.min())/(green_channel.max()-green_channel.min())
            blue_channel = image_data[2048:].reshape((32, 32))
            blue_channel=(blue_channel-blue_channel.min())/(blue_channel.max()-blue_channel.min())
            image = np.stack((red_channel, green_channel, blue_channel), axis=-1)

            ax.imshow(image)
            ax.axis('off')
        else:
            ax.axis('off')

    plt.tight_layout()
    plt.show()
plot_images_grid(Xtr,2,2)

## KERNELS

Let's define some kernels :

In [4]:
class RBF():
    """
    Compute the matrix of the Gaussian Kernel : 
    
    (K)_ij=K(X_i,Y_j)=exp( 1/2*sigma^2 * ||X_i-Y_j||^2 ) the Gaussian Kernel evaluated between the ith data and jth data

    Parameters : 
    X : 2d array size (n,p) 
        the Data matrix with n the number of data and p the size of data
    Y : 2d array size (q,p) 
        the Data matrix with q the number of data and p the size of data
    sigma : float 
             Variance of the GaussianKernel 

    Outputs :
    2d array size (n,q)
    Matrix of the Gaussian Kernel
    """   
    def __init__(self, sigma=1.):
        self.sigma = sigma  ## the variance of the kernel
    def kernel(self,X,Y):
        ## Input vectors X and Y of shape Nxd and Mxd
        diff2 = np.sum(X**2, axis=1)[:, None] + np.sum(Y**2, axis=1)[None, :] - 2 * np.dot(X, Y.T)
        return  np.exp(-diff2/(2*self.sigma**2)) ## Matrix of shape NxM

In [5]:
class polynomial():
    """
    Compute the matrix of the Polynomial Kernel : 
    
    (K)_ij=K(X_i,Y_j)=(<X_i,Y_j>)^d the Polynomial Kernel of degree d evaluated between the ith data and jth data

    Parameters : 
    X : 2d array size (n,p) 
        the Data matrix with n the number of data and p the size of data
    Y : 2d array size (q,p) 
        the Data matrix with q the number of data and p the size of data
    d : Integer
            Degree of the polynomial kernel

    Outputs :
    2d array size (n,q)
    Matrix of the Gaussian Kernel
    """
    def __init__(self,d=2):
        self.d=d

    def kernel(self,X,Y):
        return np.dot(X,Y.T) ** self.d
        # X_intercept = np.concatenate((X,np.ones(X.shape[0]).reshape(-1,1)), axis=1)
        # Y_intercept= np.concatenate((Y,np.ones(Y.shape[0]).reshape(-1,1)), axis=1)
        # return np.sum(X_intercept * Y_intercept[:,None,:], axis=-1) ** self.d

## MODELS

In [None]:
class Kernel_ridge_reg():
    """
    Class which comput the solution to the Kernel Ridge Regression with regularization parameter lambda (lmbda)

    ----> We have a close form for the solution : 
            alpha=(K+lambda*n*I)^-1y
            pred=K@alpha
    """

    def __init__(self,lmbda):
        self.lmbda=lmbda

    def train(self,K,y):
        mat=K+self.lmbda*K.shape[0]*np.identity(K.shape[0])
        self.alpha=scipy.linalg.solve(mat,y)
    
    def fit(self,K):
        print(self.alpha.shape,K.shape)
        return self.alpha@K

In [None]:
class Kernel_logistic_reg():
    """
    Class which comput the solution to the Kernel logistic Regression with regularization parameter lambda (lmbda)

    ----> We have to solve the smooth convex opti problem : 
            alpha=argmin 1/n sum_i=1^n logisitic(y_i[Kalpha]_i)+lambda/2 *alpha^T@K@alpha

          We solve this with the L-FBGS form scipy

          and then : 
            pred=K@alpha
    """

    def __init__(self,lmbda,alpha0):
        self.lmbda=lmbda
        self.alpha0=alpha0

    def obj(self,K,y,alpha):
        n=len(K)
        return np.sum(np.log(1+np.exp(-y*(K@alpha))))/n+self.lmbda*alpha.T@K@alpha/2

    def derivative(self,K,y,alpha):
        n=len(K)
        P=-np.diag(1/(1+np.exp(y*(K@alpha))))
        return K@P@y/n+self.lmbda*K@alpha

    def train(self,K,y):
        ob= lambda alpha : self.obj(K=K,y=y,alpha=alpha)
        der=lambda alpha : self.derivative(K=K,y=y,alpha=alpha)
        alpha,_,_=fmin_l_bfgs_b(ob, self.alpha0, der, args=(), pgtol=1e-50, factr =1e-50)
        self.alpha=alpha
    
    def fit(self,K):
        return K@self.alpha

In [None]:
class SVM():
     """
    Class which train SVM models with Kernels methods takes a dataset with labels in {-1,1} 

    """
     def __init__(self,lmbda):
          self.lmbda = lmbda

     def train(self,K,y):
          """ 
          We want to solve max mu^T@1 - 1/4*lambda mu^t@diag(y)@K@diag(y)@mu for 0<=mu<=1/n
          and then we have :
               alpha=diag(y)@mu/2*lambda

          We use the package cvxopt wich solve : 
               min 1/2x^T@P@x + q^T@x for Gx<=h and Ax=b
          """
          n=len(K)
          
          q = -np.ones(n)
          P = np.diag(y) @ K @np.diag(y) / (2*self.lmbda)
          G = np.concatenate((np.identity(n),-np.identity(n)),axis=0)
          h = np.concatenate((np.ones(n)/n , np.zeros(n)),axis=0)[:,None]

          mu=solvers.qp(P=matrix(P),q=matrix(q),G=matrix(G),h=matrix(h))['x']
          
          self.alpha=np.diag(y)@mu / (2*self.lmbda)

     
     def fit(self,K):
          return np.sign(K@self.alpha)
     
     def pred_prob(self,K):
          return K@self.alpha
     


In [None]:
class KernelSVC:

    """
    Class which train SVM models with Kernels methods takes a dataset with labels in {-1,1} 

    """
    
    def __init__(self, C, kernel, epsilon = 1e-1):
        self.type = 'non-linear'
        self.C = C                               
        self.kernel = kernel        
        self.alpha = None
        self.support = None
        self.epsilon = epsilon 
        self.norm_f = None
       
    
    def fit(self, X, y):
        N = len(y)
        K=self.kernel(X,X)
        print('kernel computed')
        diag=np.diag(y)

        # Lagrange dual problem
        def loss(alpha):
            return  (1/2)*(diag@alpha).T@K@(diag@alpha)-np.sum(alpha) 

        # Partial derivate of Ld on alpha
        def grad_loss(alpha):
            return diag@K@diag@alpha-np.ones_like(alpha) 


        # Constraints on alpha of the shape :
        # -  d - C*alpha  = 0
        # -  b - A*alpha >= 0

        fun_eq = lambda alpha: alpha.T@y  # '''----------------function defining the equality constraint------------------'''        
        jac_eq = lambda alpha: y   #'''----------------jacobian wrt alpha of the  equality constraint------------------'''
        fun_ineq = lambda alpha: np.concatenate((alpha,self.C-alpha))   # '''---------------function defining the inequality constraint-------------------'''     
        jac_ineq = lambda alpha:  np.concatenate((np.identity(len(alpha)),-np.identity(len(alpha)))) # '''---------------jacobian wrt alpha of the  inequality constraint-------------------'''
        
        constraints = ({'type': 'eq',  'fun': fun_eq, 'jac': jac_eq},
                       {'type': 'ineq', 
                        'fun': fun_ineq , 
                        'jac': jac_ineq})
        print('begin opti :')
        optRes = optimize.minimize(fun=lambda alpha: loss(alpha),
                                   x0=np.ones(N), 
                                   method='SLSQP', 
                                   jac=lambda alpha: grad_loss(alpha), 
                                   constraints=constraints,tol=self.epsilon)
        self.alpha = optRes.x
        print('end opti')
        ## Attributes
        indice_sv=np.where(np.abs(self.alpha)>1e-5)[0]
        self.support=X[indice_sv]
        self.alpha_support=self.alpha[indice_sv]
        self.beta=diag@self.alpha
        self.beta_support=self.beta[indice_sv]
        margin_indices = np.where((self.alpha > 1e-5) & (self.alpha < self.C-1e-5))[0]
        self.margin_points = X[margin_indices]  #'''------------------- A matrix with each row corresponding to a point that falls on the margin ------------------'''
        self.b = np.mean(y[indice_sv]-(K@self.beta)[indice_sv])  #''' -----------------offset of the classifier------------------ '''
        self.norm_f = self.beta[indice_sv].T@(K@self.beta)[indice_sv]# '''------------------------RKHS norm of the function f ------------------------------'''

 
    def separating_function(self,x):
        # Input : matrix x of shape N data points times d dimension
        # Output: vector of size N
        return self.kernel(x,self.support)@self.beta_support
    
    
    def predict(self, X):
        """ Predict y values in {-1, 1} """
        d = self.separating_function(X)
        return 2 * (d+self.b> 0) - 1

In [194]:
class KernelSVC_cvxopt:

    
    def __init__(self, C, kernel, epsilon = 1e-3):
        self.type = 'non-linear'
        self.C = C                               
        self.kernel = kernel        
        self.alpha = None
        self.support = None
        self.epsilon = epsilon 
        self.norm_f = None
       
    
    def fit(self, X, y):
       #### You might define here any variable needed for the rest of the code
        N = len(y)
        K=self.kernel(X,X)
        diag=np.diag(y)

        P = diag@K@diag
        q = -np.ones(N)

        G1 = -np.identity(N)
        h1 = np.zeros(N)

        # Pour alpha <= C
        G2 = np.identity(N)
        h2 = np.ones(N) * self.C

        # Combiner en matrices G et h pour cvxopt
        G = np.vstack([G1, G2])
        h = np.hstack([h1, h2])
        
        
        self.alpha = solvers.qp(P=matrix(P),q=matrix(q),G=matrix(G),h=matrix(h))['x']
        self.alpha = np.array(self.alpha).reshape(-1)
        
        ## Assign the required attributes

        indice_sv=np.where(np.abs(self.alpha)>1e-4)[0]
        #print(indice_sv)
        #print(np.array(self.alpha).reshape(-1))
        
        self.support=X[indice_sv]
        self.alpha_support=self.alpha[indice_sv]

        self.beta=diag@self.alpha
        self.beta_support=self.beta[indice_sv]

        margin_indices = np.where((self.alpha > 1e-5) & (self.alpha < self.C-1e-5))[0]
        self.margin_points = X[margin_indices]  #'''------------------- A matrix with each row corresponding to a point that falls on the margin ------------------'''
        
        self.b = np.mean(y[indice_sv]-(K@self.beta)[indice_sv])  #''' -----------------offset of the classifier------------------ '''

        self.norm_f = self.beta[indice_sv].T@(K@self.beta)[indice_sv]# '''------------------------RKHS norm of the function f ------------------------------'''


    ### Implementation of the separting function $f$ 
    def separating_function(self,x):
        # Input : matrix x of shape N data points times d dimension
        # Output: vector of size N
        return self.kernel(x,self.support)@self.beta_support
    
    
    def predict(self, X):
        """ Predict y values in {-1, 1} """
        d = self.separating_function(X)
        return 2 * (d+self.b> 0) - 1

## UTILS FUNCTION

In [None]:
def create_reduce_dataset(X,Y,size):
    """ 
    Create a dataset of size = 10*size with 10% of each class
    
    and transform the labels of class k in 1 and the labels of the others classes in -1 
    """

    Y_new=np.array([])
    X_new=np.random.randint(2,size=(1,X.shape[1]))

    for i in range(len(np.unique(Y))):
        ind=np.random.choice(np.array(np.where(Y==i))[0],size=size)
        X_new=np.concatenate((X_new,X[ind]))
        Y_new=np.concatenate((Y_new,i*np.ones(size)))
    X_new=X_new[1:]
    ind=np.arange(Y_new.shape[0])
    np.random.shuffle(ind)
    Y_shuffled=Y_new[ind]
    X_shuffled=X_new[ind]
    return X_shuffled,Y_shuffled


In [None]:
def create_reduce_dataset_onevsall(X,Y,k,size):
    """ 
    Create a dataset of size = 18*size with 50% of class k and 50% random classes  
    
    and transform the labels of class k in 1 and the labels of the others classes in -1 
    """

    Y_new=np.array([])
    X_new=np.random.randint(2,size=(1,X.shape[1]))

    for i in range(len(np.unique(Y))):
        if i == k :
            ind=np.random.choice(np.array(np.where(Y==i))[0],size=size*9)
        else :
            ind=np.random.choice(np.array(np.where(Y==i))[0],size=size)
        X_new=np.concatenate((X_new,X[ind]))
        if i==k:
            Y_new=np.concatenate((Y_new,np.ones(size*9)))
        else :
            Y_new=np.concatenate((Y_new,-np.ones(size)))
            
    X_new=X_new[1:]
    ind=np.arange(Y_new.shape[0])
    np.random.shuffle(ind)
    Y_shuffled=Y_new[ind]
    X_shuffled=X_new[ind]
    return X_shuffled,Y_shuffled

In [6]:
def accuracy(y_pred,y_test):
    return np.sum(y_pred==y_test)/len(y_pred)

## RUN CODE

Let's try with the class k=4 and a dataset of size train : 600 and test : 300 

The goal is that the classifier classify well the class k from the others class 

In [None]:
X,Y=create_reduce_dataset_onevsall(Xtr,Ytr,k=4,size=50)
X_train=X[:600]
X_test=X[600:900]
Y_train=Y[:600]
Y_test=Y[600:900]
Big_X=np.concatenate((X_train,X_test),axis=0)

Let's compute the Grams Matrix (let's try with polynomial kernel), it takes a lot of time (1min40 for 900x900 kernel) : 

In [None]:
K = polynomial().kernel(Big_X,X_train)
K_train = K[:len(X_train),:len(X_train)]
K_test = K[:,len(X_train):]

In [None]:
K_train.shape,K_test.shape

Let's try differents models : 

In [None]:
# RIDGE REG not very appropriated for the problem (classification != regression )
classifier=Kernel_ridge_reg(lmbda=0.1)

classifier.train(K_train,Y_train) 

Y_pred = classifier.fit(K_test)

accuracy(np.sign(Y_pred),Y_test)

In [None]:
# Logistic REG 
classifier=Kernel_logistic_reg(lmbda=0.1,alpha0=np.random.rand(len(X_train)))

classifier.train(K_train,Y_train)

Y_pred = classifier.fit(K_test.T)

accuracy(np.sign(Y_pred),Y_test)

In [None]:
classifier=SVM(lmbda=0.2)

classifier.train(K_train,Y_train)

Y_pred = classifier.fit(K_test.T)
print(classifier.alpha)

accuracy(Y_pred[:,0],Y_test)

In [None]:
K=RBF().kernel
classifier=KernelSVC(C=1,kernel=K)
classifier.fit(X_train,Y_train)
Y_pred=classifier.predict(X_test)
accuracy(Y_pred,Y_test)

Let's try to do a loop and attributing an SVM for all classes : 
ONE VS REST strategy 
ONE VS ONE strategy 

### NYSTROM APPROXIMATION

https://github.com/fredhallgren/nystrompca/blob/develop/nystrompca/algorithms/nystrom_KPCA.py

In [717]:
class NystromKPCA():
    """
    Compute an approximation of kernel with the PCA techniques, 
    taking m data points, projecting on p directions (p<=m)

    """
    def __init__(self,kernel,p,m) :
       self.kernel=kernel
       self.m=m
       self.p=p
       self.alphas=None
       self.big_approximated_kernel=None
       self.big_approximated_repr=None
       self.X_subset=None

    def fit_PCA(self, X):
        # assigns the vectors
        set=self.choose_subset(X)

        K=self.kernel(self.X_subset,self.X_subset)

        #We have to center the kernel
        #center = (I-U)K(I-U)
        
        U=np.ones(K.shape)
        I=np.eye(K.shape[0])
        K=(I-U)@K@(I-U)
        
        valp,vectp=np.linalg.eigh(K)

        #we take the last r eingenvalues
        lmbda=valp[-self.p:]
        inverse=1/np.sqrt(lmbda)

        self.alphas=(inverse*vectp[:,-self.p:]).T
    

    def approximated_repr(self,x):
        return self.alphas@self.kernel(self.X_subset,x)

    def appro_kernel(self,X,Y):
        repr_X=self.alphas@self.kernel(self.X_subset,X)
        repr_Y=self.alphas@self.kernel(self.X_subset,Y)
        return repr_X.T@repr_Y

    def choose_subset(self,X):
        self.n=X.shape[0]
        set=np.random.choice(self.n,self.m)
        self.X_subset=X[set]
        return set

In [713]:
Xtr[np.random.choice(Xtr.shape[0],500)].shape

(500, 3072)

In [718]:
kernel=RBF().kernel
K=NystromKPCA(kernel,p=200,m=500)
K.fit_PCA(Xtr)

In [723]:
kernel(Xtr,Xtr)

array([[1.        , 0.00866644, 0.02227502, ..., 0.0104907 , 0.02156173,
        0.01812238],
       [0.00866644, 1.        , 0.00345238, ..., 0.00275313, 0.00322442,
        0.00347302],
       [0.02227502, 0.00345238, 1.        , ..., 0.00580637, 0.0075947 ,
        0.01033267],
       ...,
       [0.0104907 , 0.00275313, 0.00580637, ..., 1.        , 0.0033863 ,
        0.00503564],
       [0.02156173, 0.00322442, 0.0075947 , ..., 0.0033863 , 1.        ,
        0.00542766],
       [0.01812238, 0.00347302, 0.01033267, ..., 0.00503564, 0.00542766,
        1.        ]])

In [57]:
def create_dataset_onevsall(Y,k):
    """  
    Transform the labels of class k in 1 and the labels of the others classes in -1 
    """
    Y_onevall=-np.ones_like(Y)
    Y_onevall[Y==k]=1
    return Y_onevall

In [253]:
Xtr.shape

(5000, 3072)

In [614]:
def create_dataset_onevone(X,Y,k,j) :

    n_class1 = len(Y[Y==k])
    n_class2 = len(Y[Y==j])
    N=n_class1+n_class2

    y=-np.ones(N)
    y[:n_class1]=1

    x = np.zeros((N,X.shape[1]))
    x[:n_class1]=X[Y==k]
    x[n_class1:]=X[Y==j]

    return x,y

In [615]:
def create_test_set(X,Y,nbre):

    ind = np.random.choice(np.arange(len(X)),size=nbre,replace=False)

    y_test = Y[ind]
    x_test = X[ind]

    mask = np.ones(len(X), dtype=bool)
    mask[ind] = False 

    x_train = X[mask]
    y_train = Y[mask]

    return x_train,y_train,x_test,y_test

In [None]:
Y_onevall=create_dataset_onevsall(Ytr,1)
Xtr[:100,:].shape,Y_onevall[:100].shape

In [None]:
kernel=RBF().kernel
classifier=KernelSVC(C=1,kernel=kernel)
classifier.fit(Xtr[:2000,:],Y_onevall[:2000])

In [None]:
np.abs(classifier.separating_function(Xtr[:2000,:]))

In [None]:
l=classifier.predict(Xtr[2000:3000,:])==Y_onevall[2000:3000]
np.sum(l)/len(l)

In [None]:
classifier.alpha

## ONE vs REST

In [None]:
from tqdm import tqdm
SVMModels = []

X_reduce,Y_reduce = create_reduce_dataset(Xtr,Ytr,150)

# Étape 4-8: Générer N modèles de classe binaire
for j in tqdm(range(10)):

    Y = create_dataset_onevsall(Y_reduce,k=j)
    
    kernel=RBF().kernel
    classifier=KernelSVC(C=1,kernel=kernel)
    
    classifier.fit(X_reduce,Y_reduce)
    
    # Stocker le modèle SVM entraîné
    SVMModels.append(classifier)


In [None]:
x_test,y_test = create_reduce_dataset(Xtr,Ytr,20)
x_test=X_reduce

# Initialisation d'un dictionnaire pour stocker les scores pour chaque classe
scores = np.zeros((10,x_test.shape[0]))

# Étape 9-12: Calculer les scores pour chaque classe
for j, svm_model in enumerate(SVMModels):
    # Prédire le score pour la classe actuelle
    score = svm_model.separating_function(x_test)
    
    # Stocker le score pour la classe actuelle
    scores[j] = score

# Étape 13: Attribuer à chaque observation la classe avec le score le plus élevé
final_classes = np.argmax(scores,axis=0)+1
accuracy(final_classes,Y_reduce)

In [None]:
SVMModels[0].alpha[116]
SVMModels[0].beta[421]

## APPRO DE LA DIM PAR PCA 

In [610]:
class KernelPCA:
    
    def __init__(self,kernel, r=2):                             
        self.kernel = kernel          # <---
        self.alpha = None # Matrix of shape N times d representing the d eingenvectors alpha corresp
        self.lmbda = None # Vector of size d representing the top d eingenvalues
        self.support = None # Data points where the features are evaluated
        self.r =r ## Number of principal components

    def compute_PCA(self, X):
        # assigns the vectors
        
        self.support = X
        K=self.kernel(X,X)

        #We have to center the kernel
        #center = (I-U)K(I-U)
        
        U=np.ones(K.shape)
        I=np.eye(K.shape[0])
        K=(I-U)@K@(I-U)
        
        valp,vectp=np.linalg.eigh(K)

        #we take the last r eingenvalues
        self.lmbda=valp[-self.r:]
        inverse=1/np.sqrt(self.lmbda)

        self.alpha = inverse*vectp[:,-self.r:]

        
        #constraints = ({})
        # Maximize by minimizing the opposite
        
    def transform(self,x):
        # Input : matrix x of shape N data points times d dimension
        # Output: vector of size N
        K=self.kernel(x,self.support)
        return K@self.alpha

In [612]:

kernel=RBF().kernel
PCA=KernelPCA(kernel=kernel,r=500)
PCA.compute_PCA(Xtr) # We choose the most dim of X
X=PCA.transform(Xtr)

print(X.shape)


#classifier.fit(Xtr[:1000,0:10],Y[:1000])

(5000, 500)


In [None]:
Y = create_dataset_onevsall(Ytr,k=3)

kernel=RBF().kernel
classifier=KernelSVC(C=1,kernel=kernel)

classifier.fit(Xtr,Y)

In [None]:
from tqdm import tqdm
SVMModels = []

# Étape 4-8: Générer N modèles de classe binaire
for j in tqdm(range(10)):

    Y = create_dataset_onevsall(Ytr,k=j)
    
    kernel=RBF().kernel
    classifier=KernelSVC(C=1,kernel=kernel)
    
    classifier.fit(X,Y)
    
    # Stocker le modèle SVM entraîné
    SVMModels.append(classifier)

## Kernel Logistic reg

In [None]:
class Kernel_logistic_reg_optimize():
    """
    Class which comput the solution to the Kernel logistic Regression with regularization parameter lambda (lmbda)

    ----> We have to solve the smooth convex opti problem : 
            alpha=argmin 1/n sum_i=1^n logisitic(y_i[Kalpha]_i)+lambda/2 *alpha^T@K@alpha

          We solve this with the L-FBGS form scipy

          and then : 
            pred=K@alpha
    """

    def __init__(self,lmbda,alpha0):
        self.lmbda=lmbda
        self.alpha0=alpha0

    def obj(self,K,y,alpha):
        n=len(K)
        return np.sum(np.log(1+np.exp(-y*(K@alpha))))/n+self.lmbda*alpha.T@K@alpha/2
    
    def sigmoid(self,u):
        return 1 / (1 + np.exp(-u))

    def logistic(self,u):
        return np.log(1 + np.exp(-u))


    def logistic_prime(self,u):
        return -self.sigmoid(-u)


    def logistic_prime2(self,u):
        return self.sigmoid(u) * self.sigmoid(-u)


    def derivative(self,K,y,alpha):
        n=len(K)
        P=-np.diag(1/(1+np.exp(y*(K@alpha))))
        return K@P@y/n+self.lmbda*K@alpha
    
    def second_derivative(self,K,y,alpha):
       n=len(K)
       W=np.diag(self.logistic_prime2(y*(K@alpha)))
       return (1/n)* K@W@K + self.lmbda*K

    def train(self,K,y):
        ob= lambda alpha : self.obj(K=K,y=y,alpha=alpha)
        der=lambda alpha : self.derivative(K=K,y=y,alpha=alpha)
        secder=lambda alpha : self.second_derivative(K=K,y=y,alpha=alpha)
        
        optRes = optimize.minimize(fun=ob,
                                   x0=np.ones(len(K)), 
                                   method='SLSQP', 
                                   jac=der,hess=secder
                                   ,tol=1e-5)
        self.alpha = optRes.x
    
    def fit(self,K):
        return self.sigmoid(K@self.alpha)

In [654]:
class KernelLogisticRegression:

    def __init__(self, kernel, reg_param=0, epsilon=1e-8):
        self.alpha = None
        self.reg_param = reg_param
        self.beta = None
        self.kernel = kernel
        self.eps = epsilon
        self.support=None

    def fit(self, X, y):
        N = X.shape[0]
        self.support=X
        #features_X = self.kernel.fit_subtree(X)
        k = self.kernel(X,X)
        alpha = np.zeros(N)
        alpha_old = alpha + np.inf
        sig = np.vectorize(sigmoid)
        logpp = np.vectorize(logistic_prime2)
        i=0
        while (np.abs(alpha - alpha_old) > self.eps).any():
            # Update coefs
            m = k @ alpha
            W = np.diag(logpp(y * m))
            z = m + y / sig(y * m)

            # Solve Weighted KRR

            sqrt_W = np.sqrt(W)

            alpha_old = alpha
            alpha = sqrt_W @ np.linalg.inv(
                sqrt_W @ k @ sqrt_W + N * self.reg_param * np.eye(N)
            ) @ sqrt_W @ z
            print(f'{i}ème iteration, epsilon :{np.max(np.abs(alpha - alpha_old))}')
            i+=1
            if i ==50 :
                break

        self.alpha = alpha

        return sigmoid(np.einsum('i, ij->j', self.alpha, k))

    def predict(self, X):
        # features_pred = self.kernel.predict(X)
        # print(features_pred.shape, self.features.shape)
        K_Xx = self.kernel(X, self.support)
        predictions = sigmoid(np.einsum('i, ij->j', self.alpha, K_Xx.T))
        return predictions  # *-1 because inverted prediction on 1 et -1


def logistic(u):
    return np.log(1 + np.exp(-u))


def logistic_prime(u):
    return -sigmoid(-u)


def logistic_prime2(u):
    return sigmoid(u) * sigmoid(-u)


def sigmoid(u):
    return 1 / (1 + np.exp(-u))

In [652]:
sigmoid(1000)

1.0

In [653]:
Y = create_dataset_onevsall(Ytr,k=1)
    
kernel=RBF().kernel
classifier=KernelLogisticRegression(kernel=kernel)
classifier.fit(Xtr,Y)
classifier.predict(Xtr[300:400])

0ème iteration, epsilon :6.634167304263428
1ème iteration, epsilon :3.7660021077124863


KeyboardInterrupt: 

In [None]:
Y = create_dataset_onevsall(Ytr,k=1)
    
kernel=RBF().kernel
classifier=KernelLogisticRegression(kernel=kernel)
classifier.fit(Xtr,Y)

In [None]:
pred=np.zeros(5000)
pred[classifier.predict(Xtr)>0.5]=1
pred[classifier.predict(Xtr)<=0.5]=-1
accuracy(pred,Ytr)

In [None]:
Y = create_dataset_onevsall(Ytr,k=1)
    
kernel=RBF().kernel
classifier=Kernel_logistic_reg(0.01,np.zeros(len(Xtr)))
K=kernel(Xtr,Xtr)
classifier.train(K,Ytr)

## ONE VS ALL

In [269]:
from tqdm import tqdm
LogModels = []

# Étape 4-8: Générer N modèles de classe binaire
for j in tqdm(range(10)):

    Y = create_dataset_onevsall(Ytr,k=j)
    
    kernel=RBF(sigma=1).kernel
    classifier=KernelSVC_cvxopt(C=0.08,kernel=kernel)
    
    classifier.fit(Xtr,Y)
    
    # Stocker le modèle SVM entraîné
    LogModels.append(classifier)


  0%|          | 0/10 [00:00<?, ?it/s]

     pcost       dcost       gap    pres   dres
 0: -8.2544e+02 -1.2075e+03  6e+04  4e+01  1e-15
 1: -2.6560e+02 -1.0461e+03  4e+03  3e+00  1e-15
 2: -1.4634e+02 -7.1919e+02  7e+02  1e-01  2e-15
 3: -1.5292e+02 -2.1484e+02  6e+01  1e-02  2e-15
 4: -1.7631e+02 -1.8633e+02  1e+01  1e-03  1e-15
 5: -1.8122e+02 -1.8215e+02  9e-01  7e-05  1e-15
 6: -1.8170e+02 -1.8173e+02  3e-02  2e-06  1e-15
 7: -1.8172e+02 -1.8172e+02  1e-03  3e-08  1e-15
 8: -1.8172e+02 -1.8172e+02  3e-05  3e-10  1e-15
Optimal solution found.


 10%|█         | 1/10 [00:42<06:21, 42.40s/it]

     pcost       dcost       gap    pres   dres
 0: -6.1432e+02 -9.1467e+02  3e+04  3e+01  1e-15
 1: -2.0828e+02 -7.8552e+02  3e+03  2e+00  1e-15
 2: -1.1536e+02 -5.0380e+02  5e+02  1e-01  2e-15
 3: -1.2377e+02 -1.6947e+02  5e+01  1e-02  2e-15
 4: -1.4024e+02 -1.4791e+02  8e+00  1e-03  1e-15
 5: -1.4362e+02 -1.4462e+02  1e+00  1e-04  1e-15
 6: -1.4412e+02 -1.4415e+02  3e-02  2e-06  1e-15
 7: -1.4414e+02 -1.4414e+02  1e-03  4e-08  1e-15
 8: -1.4414e+02 -1.4414e+02  4e-05  5e-10  1e-15
Optimal solution found.


 20%|██        | 2/10 [01:22<05:26, 40.81s/it]

     pcost       dcost       gap    pres   dres
 0: -8.5098e+02 -1.0822e+03  4e+04  4e+01  1e-15
 1: -2.7338e+02 -9.1969e+02  3e+03  2e+00  1e-15
 2: -1.5256e+02 -6.3302e+02  6e+02  1e-01  3e-15
 3: -1.5984e+02 -2.1333e+02  6e+01  1e-02  2e-15
 4: -1.8121e+02 -1.8928e+02  8e+00  1e-03  1e-15
 5: -1.8517e+02 -1.8601e+02  9e-01  9e-05  1e-15
 6: -1.8561e+02 -1.8564e+02  3e-02  2e-06  1e-15
 7: -1.8563e+02 -1.8563e+02  9e-04  3e-08  1e-15
 8: -1.8563e+02 -1.8563e+02  4e-05  4e-10  1e-15
Optimal solution found.


 30%|███       | 3/10 [02:01<04:41, 40.23s/it]

     pcost       dcost       gap    pres   dres
 0: -8.0143e+02 -1.1289e+03  5e+04  4e+01  1e-15
 1: -2.5023e+02 -9.7274e+02  3e+03  2e+00  1e-15
 2: -1.3934e+02 -6.3682e+02  6e+02  8e-02  3e-15
 3: -1.5071e+02 -2.0557e+02  6e+01  8e-03  1e-15
 4: -1.7201e+02 -1.8050e+02  9e+00  9e-04  1e-15
 5: -1.7611e+02 -1.7688e+02  8e-01  5e-05  1e-15
 6: -1.7651e+02 -1.7653e+02  3e-02  1e-06  1e-15
 7: -1.7652e+02 -1.7652e+02  9e-04  2e-08  1e-15
 8: -1.7652e+02 -1.7652e+02  3e-05  2e-10  1e-15
Optimal solution found.


 40%|████      | 4/10 [02:40<03:58, 39.70s/it]

     pcost       dcost       gap    pres   dres
 0: -8.4905e+02 -1.0928e+03  4e+04  4e+01  1e-15
 1: -2.7011e+02 -9.3042e+02  3e+03  2e+00  1e-15
 2: -1.5087e+02 -6.2981e+02  6e+02  1e-01  2e-15
 3: -1.5950e+02 -2.1236e+02  5e+01  1e-02  2e-15
 4: -1.8055e+02 -1.8843e+02  8e+00  1e-03  1e-15
 5: -1.8449e+02 -1.8508e+02  6e-01  4e-05  1e-15
 6: -1.8480e+02 -1.8482e+02  2e-02  1e-06  1e-15
 7: -1.8481e+02 -1.8481e+02  7e-04  2e-08  1e-15
 8: -1.8481e+02 -1.8481e+02  3e-05  2e-10  1e-15
Optimal solution found.


 50%|█████     | 5/10 [03:19<03:17, 39.55s/it]

     pcost       dcost       gap    pres   dres
 0: -7.5160e+02 -1.0810e+03  4e+04  4e+01  1e-15
 1: -2.4360e+02 -9.3158e+02  3e+03  2e+00  1e-15
 2: -1.3622e+02 -6.1425e+02  6e+02  1e-01  2e-15
 3: -1.4557e+02 -1.9921e+02  6e+01  1e-02  2e-15
 4: -1.6604e+02 -1.7464e+02  9e+00  1e-03  1e-15
 5: -1.7010e+02 -1.7092e+02  8e-01  6e-05  1e-15
 6: -1.7052e+02 -1.7055e+02  3e-02  1e-06  1e-15
 7: -1.7054e+02 -1.7054e+02  1e-03  3e-08  1e-15
 8: -1.7054e+02 -1.7054e+02  3e-05  4e-10  1e-15
Optimal solution found.


 60%|██████    | 6/10 [03:58<02:37, 39.29s/it]

     pcost       dcost       gap    pres   dres
 0: -7.2925e+02 -1.0475e+03  4e+04  4e+01  1e-15
 1: -2.3732e+02 -9.0166e+02  3e+03  2e+00  1e-15
 2: -1.3224e+02 -5.9610e+02  6e+02  1e-01  2e-15
 3: -1.4086e+02 -1.9311e+02  5e+01  1e-02  2e-15
 4: -1.6067e+02 -1.6901e+02  9e+00  1e-03  1e-15
 5: -1.6467e+02 -1.6541e+02  8e-01  6e-05  1e-15
 6: -1.6505e+02 -1.6507e+02  2e-02  1e-06  1e-15
 7: -1.6506e+02 -1.6506e+02  9e-04  3e-08  1e-15
 8: -1.6506e+02 -1.6506e+02  3e-05  3e-10  1e-15
Optimal solution found.


 70%|███████   | 7/10 [04:39<01:59, 39.89s/it]

     pcost       dcost       gap    pres   dres
 0: -7.0279e+02 -9.5731e+02  3e+04  3e+01  1e-15
 1: -2.3174e+02 -8.1508e+02  3e+03  2e+00  1e-15
 2: -1.2918e+02 -5.2402e+02  4e+02  8e-02  2e-15
 3: -1.4013e+02 -1.8546e+02  5e+01  8e-03  2e-15
 4: -1.5761e+02 -1.6455e+02  7e+00  9e-04  1e-15
 5: -1.6080e+02 -1.6160e+02  8e-01  7e-05  1e-15
 6: -1.6121e+02 -1.6123e+02  3e-02  2e-06  1e-15
 7: -1.6122e+02 -1.6122e+02  7e-04  3e-08  1e-15
 8: -1.6122e+02 -1.6122e+02  3e-05  3e-10  1e-15
Optimal solution found.


 80%|████████  | 8/10 [05:19<01:20, 40.01s/it]

     pcost       dcost       gap    pres   dres
 0: -8.4891e+02 -1.1411e+03  5e+04  4e+01  1e-15
 1: -2.7029e+02 -9.7669e+02  4e+03  2e+00  1e-15
 2: -1.4962e+02 -6.5778e+02  6e+02  1e-01  3e-15
 3: -1.5889e+02 -2.1464e+02  6e+01  9e-03  2e-15
 4: -1.8071e+02 -1.8933e+02  9e+00  1e-03  1e-15
 5: -1.8489e+02 -1.8575e+02  9e-01  7e-05  1e-15
 6: -1.8533e+02 -1.8537e+02  4e-02  2e-06  1e-15
 7: -1.8535e+02 -1.8535e+02  1e-03  5e-08  1e-15
 8: -1.8535e+02 -1.8535e+02  5e-05  5e-10  1e-15
Optimal solution found.


 90%|█████████ | 9/10 [05:59<00:39, 39.88s/it]

     pcost       dcost       gap    pres   dres
 0: -6.3587e+02 -9.0597e+02  3e+04  3e+01  1e-15
 1: -2.1090e+02 -7.7496e+02  3e+03  2e+00  1e-15
 2: -1.1733e+02 -4.9079e+02  4e+02  9e-02  2e-15
 3: -1.2712e+02 -1.7079e+02  5e+01  9e-03  2e-15
 4: -1.4330e+02 -1.5023e+02  7e+00  1e-03  1e-15
 5: -1.4642e+02 -1.4723e+02  8e-01  9e-05  1e-15
 6: -1.4682e+02 -1.4685e+02  3e-02  2e-06  1e-15
 7: -1.4684e+02 -1.4684e+02  8e-04  3e-08  1e-15
 8: -1.4684e+02 -1.4684e+02  3e-05  4e-10  1e-15
Optimal solution found.


100%|██████████| 10/10 [06:39<00:00, 39.97s/it]


In [273]:
x_test = Xte

# Initialisation d'un dictionnaire pour stocker les scores pour chaque classe
scores = np.zeros((10,x_test.shape[0]))

# Étape 9-12: Calculer les scores pour chaque classe
for j, log_model in enumerate(LogModels):
    # Prédire le score pour la classe actuelle
    score = log_model.separating_function(x_test)
    
    # Stocker le score pour la classe actuelle
    scores[j] = score

# Étape 13: Attribuer à chaque observation la classe avec le score le plus élevé
final_classes = np.argmax(scores,axis=0)
final_classes

array([4, 8, 2, ..., 4, 2, 4])

In [285]:
np.sum(final_classes==9)

31

In [271]:
accuracy(Ytr,final_classes)

0.9544

In [192]:
np.sum(LogModels[-1].alpha>1e-4),len(LogModels[-1].support)

(2739, 2776)

In [148]:
sup=LogModels[-1].support[1404]

LogModels[-1].separating_function(sup[None,:])

array([0.41211554])

In [125]:
s=np.zeros_like(score)
s[score>0]=1
s[score<0]=-1
np.where(LogModels[-1].predict(Xte)!=s)
score[np.where(LogModels[-1].predict(Xte)!=s)]

array([-0.04670798, -0.09419489, -0.06725612, -0.11625016, -0.11638212,
       -0.097503  ])

In [101]:
np.sum(final_classes==5)

108

In [274]:
import pandas as pd

# Créer un DataFrame à partir des prédictions
# Remplacez 'predictions' par vos données de prédiction
df = pd.DataFrame({
    'Id': range(1, 2001),  # Créer une colonne d'ID de 1 à 2000
    'Prediction': final_classes  # Utilisez vos propres résultats de prédiction ici
})

In [275]:
df

Unnamed: 0,Id,Prediction
0,1,4
1,2,8
2,3,2
3,4,8
4,5,8
...,...,...
1995,1996,8
1996,1997,8
1997,1998,4
1998,1999,2


In [233]:
df.to_csv('submission5.csv', index=False)

## One vs one RBF

In [287]:
K=10
K*(K-1)/2

45.0

In [291]:
Models={}
K=10
nbre_modele=int(K*(K-1)/2)


for i in range(K):

    for j in range(i+1,K):
        
        print(i,j)
        X,Y = create_dataset_onevone(Xtr,Ytr,i,j)

        kernel = RBF(sigma=1).kernel
        mod = KernelSVC_cvxopt(C = 1,kernel = kernel)
        
        mod.fit(X,Y)

        Models[(i, j)] = mod


0 1
     pcost       dcost       gap    pres   dres
 0: -3.8985e+02 -1.8322e+03  5e+03  1e+00  2e-15
 1: -3.7807e+02 -1.0385e+03  7e+02  3e-16  2e-15
 2: -4.0680e+02 -5.0370e+02  1e+02  2e-16  2e-15
 3: -4.2554e+02 -4.3573e+02  1e+01  1e-16  2e-15
 4: -4.2893e+02 -4.2977e+02  8e-01  2e-16  3e-15
 5: -4.2930e+02 -4.2936e+02  6e-02  2e-16  2e-15
 6: -4.2933e+02 -4.2933e+02  3e-03  2e-16  3e-15
 7: -4.2933e+02 -4.2933e+02  2e-04  2e-16  3e-15
Optimal solution found.
0 2
     pcost       dcost       gap    pres   dres
 0: -4.5055e+02 -1.8202e+03  1e+03  0e+00  4e-15
 1: -4.7346e+02 -5.6120e+02  9e+01  2e-16  4e-15
 2: -5.0201e+02 -5.1483e+02  1e+01  2e-16  5e-15
 3: -5.0698e+02 -5.0837e+02  1e+00  2e-16  6e-15
 4: -5.0757e+02 -5.0768e+02  1e-01  2e-16  6e-15
 5: -5.0762e+02 -5.0763e+02  7e-03  2e-16  6e-15
 6: -5.0762e+02 -5.0762e+02  4e-04  2e-16  6e-15
Optimal solution found.
0 3
     pcost       dcost       gap    pres   dres
 0: -4.4125e+02 -1.7201e+03  1e+03  0e+00  3e-15
 1: -4.6345e

In [297]:
x_test = Xte

votes = np.zeros((len(x_test), K))

for (class1,class2), model in Models.items():

    predictions = model.predict(x_test)

    for i, prediction in enumerate(predictions):
        if prediction == 1:
            votes[i, class1] += 1  # La première classe du tuple reçoit un vote
        else:
            votes[i, class2] += 1  # La seconde classe du tuple reçoit un vote

final_classes = np.argmax(votes,axis=1)

In [308]:
np.sum(final_classes==9)

100

In [295]:
accuracy(final_classes,Ytr)

0.9998

In [309]:
df = pd.DataFrame({
    'Id': range(1, 2001),  # Créer une colonne d'ID de 1 à 2000
    'Prediction': final_classes  # Utilisez vos propres résultats de prédiction ici
})

In [310]:
df

Unnamed: 0,Id,Prediction
0,1,3
1,2,5
2,3,2
3,4,7
4,5,7
...,...,...
1995,1996,8
1996,1997,8
1997,1998,4
1998,1999,2


In [None]:
df.to_csv('submission5.csv', index=False)

## ONE V ONE POLY

In [740]:
Models={}
K=10
nbre_modele=int(K*(K-1)/2)


for i in range(K):

    for j in range(i+1,K):
        
        print(i,j)
        X,Y = create_dataset_onevone(Xtr,Ytr,i,j)

        kernel = polynomial(d=4).kernel
        mod = KernelSVC_cvxopt(C = 0.5,kernel = kernel)
        
        mod.fit(X,Y)

        Models[(i, j)] = mod

0 1
     pcost       dcost       gap    pres   dres
 0: -3.7590e+00 -5.4668e+02  3e+03  3e+00  8e-16
 1: -1.7648e+00 -2.7095e+02  3e+02  3e-02  6e-16
 2: -3.4628e+00 -2.5324e+01  2e+01  2e-03  4e-16
 3: -4.2688e+00 -5.1313e+00  9e-01  3e-05  6e-16
 4: -4.3088e+00 -4.3331e+00  2e-02  4e-07  3e-16
 5: -4.3090e+00 -4.3093e+00  3e-04  4e-09  2e-16
 6: -4.3090e+00 -4.3090e+00  3e-06  4e-11  2e-16
Optimal solution found.
0 2
     pcost       dcost       gap    pres   dres
 0: -6.7234e+00 -5.5708e+02  3e+03  3e+00  9e-16
 1: -3.5219e+00 -2.8636e+02  3e+02  3e-02  6e-16
 2: -6.6791e+00 -3.0137e+01  2e+01  2e-03  5e-16
 3: -7.6577e+00 -8.6174e+00  1e+00  3e-05  5e-16
 4: -7.6884e+00 -7.7072e+00  2e-02  3e-07  2e-16
 5: -7.6884e+00 -7.6886e+00  2e-04  3e-09  2e-16
 6: -7.6884e+00 -7.6884e+00  2e-06  3e-11  2e-16
Optimal solution found.
0 3
     pcost       dcost       gap    pres   dres
 0: -5.5266e+00 -5.5295e+02  3e+03  3e+00  9e-16
 1: -2.7167e+00 -2.8045e+02  3e+02  3e-02  6e-16
 2: -5.3714e

In [742]:
x_test = Xte

votes = np.zeros((len(x_test), K))

for (class1,class2), model in Models.items():

    predictions = model.predict(x_test)

    for i, prediction in enumerate(predictions):
        if prediction == 1:
            votes[i, class1] += 1  # La première classe du tuple reçoit un vote
        else:
            votes[i, class2] += 1  # La seconde classe du tuple reçoit un vote

final_classes = np.argmax(votes,axis=1)

In [752]:
np.sum(final_classes==9)

3

In [753]:
df = pd.DataFrame({
    'Id': range(1, 2001),  # Créer une colonne d'ID de 1 à 2000
    'Prediction': final_classes  # Utilisez vos propres résultats de prédiction ici
})

In [754]:
df.to_csv('submission6.csv', index=False)

## ONE v ONE logistic

In [677]:
Models={}
K=10
nbre_modele=int(K*(K-1)/2)


for i in range(K):

    for j in range(i+1,K):
        
        print(i,j)
        X,Y = create_dataset_onevone(Xtr,Ytr,i,j)

        kernel = polynomial(d=2).kernel
        mod =KernelLogisticRegression(kernel=kernel,reg_param=1,epsilon=1e-5)
        
        mod.fit(X,Y)

        Models[(i, j)] = mod

0 1
0ème iteration, epsilon :0.0005028024827979508
1ème iteration, epsilon :6.969016956559753e-09
0 2
0ème iteration, epsilon :0.0005000439534249149
1ème iteration, epsilon :3.224523049830466e-10
0 3
0ème iteration, epsilon :0.000501151632138375
1ème iteration, epsilon :4.136721309521297e-09
0 4
0ème iteration, epsilon :0.0005026646446318912
1ème iteration, epsilon :4.05683430170509e-09
0 5
0ème iteration, epsilon :0.0004999748521271975
1ème iteration, epsilon :2.240955786767581e-09
0 6
0ème iteration, epsilon :0.0005010550337907527
1ème iteration, epsilon :2.152465513065218e-09
0 7
0ème iteration, epsilon :0.0005030159159078194
1ème iteration, epsilon :2.2187399219318962e-09
0 8
0ème iteration, epsilon :0.0005002024324855494
1ème iteration, epsilon :7.273703359457831e-10
0 9
0ème iteration, epsilon :0.0005045603301699967
1ème iteration, epsilon :5.511869379697717e-09
1 2
0ème iteration, epsilon :0.000507014805401572
1ème iteration, epsilon :6.864181660625e-09
1 3
0ème iteration, epsil

In [664]:
p=mod.predict(Xte)
p[p<0.5].shape

(1778,)

In [678]:
x_test = Xte

votes = np.zeros((len(x_test), K))

for (class1,class2), model in Models.items():

    predictions = model.predict(x_test)
    predictions[predictions>0.5]=1

    for i, prediction in enumerate(predictions):
        if prediction == 1:
            votes[i, class1] += 1  # La première classe du tuple reçoit un vote
        else:
            votes[i, class2] += 1  # La seconde classe du tuple reçoit un vote

final_classes = np.argmax(votes,axis=1)

In [685]:
np.sum(final_classes==3)

7

## BEST PAREMETERS 

### one v one SVM 

In [755]:
def onevone_test_param_SVM(X,Y,list_C,list_param,ker):

    accuracys=[]
    Models={}
    K=10
    nbre_modele=int(K*(K-1)/2)
    
    x_train,y_train,x_test,y_test = create_test_set(X,Y,nbre=500)

    for C in list_C : 
        for param in list_param :

            #TRAIN

            for i in range(K):

                for j in range(i+1,K):
                    
                    print(i,j)
                    x,y = create_dataset_onevone(x_train,y_train,i,j)

                    if ker == 'RBF':
                        kernel = RBF(sigma=param).kernel
                    elif ker == 'poly':
                        kernel = polynomial(d=param).kernel

                    mod = KernelSVC_cvxopt(C = C,kernel = kernel)
                    
                    mod.fit(x,y)

                    Models[(i, j)] = mod


            #TEST

            votes = np.zeros((len(x_test), K))

            for (class1,class2), model in Models.items():

                predictions = model.predict(x_test)

                for i, prediction in enumerate(predictions):
                    if prediction == 1:
                        votes[i, class1] += 1  # La première classe du tuple reçoit un vote
                    else:
                        votes[i, class2] += 1  # La seconde classe du tuple reçoit un vote

            final_classes = np.argmax(votes,axis=1)
            acc = accuracy(final_classes,y_test)
            if ker == 'RBF':
                print(f'Accuracy : {acc} | C = {C} | sigma = {param}')
            elif ker == 'poly':
                print(f'Accuracy : {acc} | C = {C} | d = {param}')
            accuracys.append(acc)
    
    return accuracys
        

In [758]:
acc = onevone_test_paramRBF(Xtr,Ytr,[2],[2],'poly')

0 1
     pcost       dcost       gap    pres   dres
 0:  3.8817e+01 -2.2351e+03  2e+03  3e-17  2e-15
 1: -1.7815e+01 -1.3874e+02  1e+02  2e-16  2e-15
 2: -2.7027e+01 -3.3944e+01  7e+00  2e-16  7e-16
 3: -2.7271e+01 -2.7418e+01  1e-01  2e-16  4e-16
 4: -2.7271e+01 -2.7273e+01  2e-03  2e-16  4e-16
 5: -2.7271e+01 -2.7272e+01  2e-05  2e-16  4e-16
Optimal solution found.
0 2
     pcost       dcost       gap    pres   dres
 0:  4.0559e+01 -2.3416e+03  2e+03  3e-17  2e-15
 1: -2.8901e+01 -1.7420e+02  1e+02  2e-16  2e-15
 2: -4.0806e+01 -4.8799e+01  8e+00  2e-16  6e-16
 3: -4.1078e+01 -4.1243e+01  2e-01  2e-16  4e-16
 4: -4.1078e+01 -4.1079e+01  2e-03  2e-16  4e-16
 5: -4.1078e+01 -4.1078e+01  2e-05  2e-16  4e-16
Optimal solution found.
0 3
     pcost       dcost       gap    pres   dres
 0:  4.1496e+01 -2.3437e+03  2e+03  3e-17  2e-15
 1: -2.4190e+01 -1.6242e+02  1e+02  2e-16  2e-15
 2: -3.5293e+01 -4.3270e+01  8e+00  2e-16  7e-16
 3: -3.5572e+01 -3.5746e+01  2e-01  2e-16  4e-16
 4: -3.5572e

In [759]:
acc

[0.288]

In [624]:
def onevone_test_param_poly(X,Y,list_C,list_d):

    accuracys=[]
    Models={}
    K=10
    nbre_modele=int(K*(K-1)/2)
    
    x_train,y_train,x_test,y_test = create_test_set(X,Y,nbre=500)

    for C in list_C : 
        for d in list_d :

            #TRAIN

            for i in range(K):

                for j in range(i+1,K):
                    
                    print(i,j)
                    x,y = create_dataset_onevone(x_train,y_train,i,j)

                    kernel = polynomial(d=d).kernel
                    mod = KernelSVC_cvxopt(C = C,kernel = kernel)
                    
                    mod.fit(x,y)

                    Models[(i, j)] = mod


            #TEST

            votes = np.zeros((len(x_test), K))

            for (class1,class2), model in Models.items():

                predictions = model.predict(x_test)

                for i, prediction in enumerate(predictions):
                    if prediction == 1:
                        votes[i, class1] += 1  # La première classe du tuple reçoit un vote
                    else:
                        votes[i, class2] += 1  # La seconde classe du tuple reçoit un vote

            final_classes = np.argmax(votes,axis=1)
            acc = accuracy(final_classes,y_test)
            print(f'Accuracy : {acc} | C = {C} | d = {d}')
            accuracys.append(acc)
    
    return accuracys
        

In [738]:
acc = onevone_test_param_poly(Xtr,Ytr,[0.5,0.6,0.7,0.8,0.9,1,1.2],[4,5,6])

0 1
0ème iteration, epsilon :0.0011094689084107914
1ème iteration, epsilon :4.666087713849876e-05
2ème iteration, epsilon :4.111282977125566e-06
3ème iteration, epsilon :2.9546402432328867e-08
4ème iteration, epsilon :1.5120089967395056e-12
0 2
0ème iteration, epsilon :0.0011169231952361711
1ème iteration, epsilon :4.111120951755655e-05
2ème iteration, epsilon :1.5716221197821382e-06
3ème iteration, epsilon :2.2484861731840097e-09
0 3
0ème iteration, epsilon :0.0011045651997568552
1ème iteration, epsilon :4.5072704482082755e-05
2ème iteration, epsilon :2.615543125460433e-06
3ème iteration, epsilon :8.352965051572608e-09
0 4
0ème iteration, epsilon :0.0011045596914623216
1ème iteration, epsilon :4.7558996608476145e-05
2ème iteration, epsilon :3.4676225725498817e-06
3ème iteration, epsilon :1.4382780387143211e-08
4ème iteration, epsilon :2.221412615487084e-13
0 5
0ème iteration, epsilon :0.001110708003821087
1ème iteration, epsilon :4.0919555636076475e-05
2ème iteration, epsilon :2.17235

In [739]:
acc

[0.278,
 0.252,
 0.268,
 0.27,
 0.252,
 0.264,
 0.27,
 0.252,
 0.268,
 0.266,
 0.252,
 0.266,
 0.262,
 0.248,
 0.266,
 0.258,
 0.248,
 0.272,
 0.256,
 0.248,
 0.278]

### one v one logistic 

In [693]:
def onevone_test_param_RBF_log(X,Y,list_C,list_sigma):

    accuracys=[]
    Models={}
    K=10
    nbre_modele=int(K*(K-1)/2)
    
    x_train,y_train,x_test,y_test = create_test_set(X,Y,nbre=500)

    for C in list_C : 
        for sigma in list_sigma :

            #TRAIN

            for i in range(K):

                for j in range(i+1,K):
                    
                    print(i,j)
                    x,y = create_dataset_onevone(x_train,y_train,i,j)

                    kernel = RBF(sigma=sigma).kernel
                    mod = KernelLogisticRegression(kernel,reg_param=C)
                    
                    mod.fit(x,y)

                    Models[(i, j)] = mod


            #TEST

            votes = np.zeros((len(x_test), K))

            for (class1,class2), model in Models.items():

                predictions = model.predict(x_test)
                predictions[predictions>0.5]=1

                for i, prediction in enumerate(predictions):
                    if prediction == 1:
                        votes[i, class1] += 1  # La première classe du tuple reçoit un vote
                    else:
                        votes[i, class2] += 1  # La seconde classe du tuple reçoit un vote

            final_classes = np.argmax(votes,axis=1)
            acc = accuracy(final_classes,y_test)
            print(f'Accuracy : {acc} | C = {C} | d = {sigma}')
            accuracys.append(acc)
    
    return accuracys
        

In [696]:
acc = onevone_test_param_RBF_log(Xtr,Ytr,[1,2,3],[0.01,1,2])

0 1
0ème iteration, epsilon :0.0005529444290848811
1ème iteration, epsilon :3.89532156530592e-15
0 2
0ème iteration, epsilon :0.0005505092210294563
1ème iteration, epsilon :3.827125248656582e-15
0 3
0ème iteration, epsilon :0.0005499037668408069
1ème iteration, epsilon :3.810211694765808e-15
0 4
0ème iteration, epsilon :0.0005529444290848811
1ème iteration, epsilon :3.89532156530592e-15
0 5
0ème iteration, epsilon :0.0005505092210294563
1ème iteration, epsilon :3.827125248656582e-15
0 6
0ème iteration, epsilon :0.0005511160099200922
1ème iteration, epsilon :3.843821962112859e-15
0 7
0ème iteration, epsilon :0.000553556601162473
1ème iteration, epsilon :3.912343539413943e-15
0 8
0ème iteration, epsilon :0.0005480953685941394
1ème iteration, epsilon :3.760338394831475e-15
0 9
0ème iteration, epsilon :0.0005499037668408069
1ème iteration, epsilon :3.810211694765808e-15
1 2
0ème iteration, epsilon :0.0005578800557880097
1ème iteration, epsilon :4.036159427511787e-15
1 3
0ème iteration, eps

In [697]:
acc

[0.098, 0.1, 0.09, 0.098, 0.1, 0.09, 0.098, 0.1, 0.09]

In [688]:
def onevone_test_param_poly_log(X,Y,list_C,list_d):

    accuracys=[]
    Models={}
    K=10
    nbre_modele=int(K*(K-1)/2)
    
    x_train,y_train,x_test,y_test = create_test_set(X,Y,nbre=500)

    for C in list_C : 
        for d in list_d :

            #TRAIN

            for i in range(K):

                for j in range(i+1,K):
                    
                    print(i,j)
                    x,y = create_dataset_onevone(x_train,y_train,i,j)

                    kernel = polynomial(d=d).kernel
                    mod = KernelLogisticRegression(kernel,reg_param=C)
                    
                    mod.fit(x,y)

                    Models[(i, j)] = mod


            #TEST

            votes = np.zeros((len(x_test), K))

            for (class1,class2), model in Models.items():

                predictions = model.predict(x_test)
                predictions[predictions>0.5]=1

                for i, prediction in enumerate(predictions):
                    if prediction == 1:
                        votes[i, class1] += 1  # La première classe du tuple reçoit un vote
                    else:
                        votes[i, class2] += 1  # La seconde classe du tuple reçoit un vote

            final_classes = np.argmax(votes,axis=1)
            acc = accuracy(final_classes,y_test)
            print(f'Accuracy : {acc} | C = {C} | d = {d}')
            accuracys.append(acc)
    
    return accuracys
        

In [689]:
acc = onevone_test_param_poly_log(Xtr,Ytr,[1,2,3],[2,3])

0 1
0ème iteration, epsilon :0.000558035783161323
1ème iteration, epsilon :9.382220434372632e-09
0 2
0ème iteration, epsilon :0.0005555264308228916
1ème iteration, epsilon :4.932592667650226e-10
0 3
0ème iteration, epsilon :0.0005582576353526729
1ème iteration, epsilon :6.434473276894712e-09
0 4
0ème iteration, epsilon :0.0005566584236161284
1ème iteration, epsilon :5.300883317255027e-09
0 5
0ème iteration, epsilon :0.0005556973477516189
1ème iteration, epsilon :3.808291272936436e-09
0 6
0ème iteration, epsilon :0.0005525068737247874
1ème iteration, epsilon :2.457143978519191e-09
0 7
0ème iteration, epsilon :0.0005597852879928695
1ème iteration, epsilon :3.429997354068362e-09
0 8
0ème iteration, epsilon :0.0005581405149047051
1ème iteration, epsilon :1.1002590493422795e-09
0 9
0ème iteration, epsilon :0.0005580409484923737
1ème iteration, epsilon :6.93119647213105e-09
1 2
0ème iteration, epsilon :0.0005625586406158372
1ème iteration, epsilon :1.0954198207801386e-08
2ème iteration, epsi

In [690]:
acc

[0.234, 0.208, 0.232, 0.206, 0.232, 0.208]

## PCA + SVM 

In [621]:
def onevone_test_param_PCA_SVM(X,Y,list_C,list_sigma):

    accuracys=[]
    Models={}
    K=10
    nbre_modele=int(K*(K-1)/2)

    x_train,y_train,x_test,y_test = create_test_set(X,Y,nbre=2000)


    kernel=RBF().kernel
    PCA=KernelPCA(kernel=kernel,r=500)
    PCA.compute_PCA(x_train) # We choose the most dim of X
    x_train=PCA.transform(x_train)


    for C in list_C : 
        for sigma in list_sigma :

            #TRAIN

            for i in range(K):

                for j in range(i+1,K):
                    
                    print(i,j)
                    x,y = create_dataset_onevone(x_train,y_train,i,j)

                    kernel = RBF(sigma=sigma).kernel
                    mod = KernelSVC_cvxopt(C = C,kernel = kernel)
                    
                    mod.fit(x,y)

                    Models[(i, j)] = mod


            #TEST

            votes = np.zeros((len(x_test), K))

            for (class1,class2), model in Models.items():

                predictions = model.predict(x_test)

                for i, prediction in enumerate(predictions):
                    if prediction == 1:
                        votes[i, class1] += 1  # La première classe du tuple reçoit un vote
                    else:
                        votes[i, class2] += 1  # La seconde classe du tuple reçoit un vote

            final_classes = np.argmax(votes,axis=1)
            acc = accuracy(final_classes,y_test)
            print(f'Accuracy : {acc} | C = {C} | sigma = {sigma}')
            accuracys.append(acc)
    
    return accuracys
        

In [622]:
acc = onevone_test_param_PCA(Xtr,Ytr,[1],[1])

0 1
     pcost       dcost       gap    pres   dres
 0: -3.1967e+02 -1.4388e+03  6e+03  3e+00  3e-14
 1: -2.2881e+02 -9.5826e+02  9e+02  1e-01  3e-14
 2: -2.5086e+02 -3.5060e+02  1e+02  1e-02  3e-14
 3: -2.8338e+02 -3.0771e+02  3e+01  2e-03  3e-14
 4: -2.9253e+02 -2.9748e+02  5e+00  2e-04  4e-14
 5: -2.9463e+02 -2.9509e+02  5e-01  9e-06  4e-14
 6: -2.9484e+02 -2.9485e+02  1e-02  2e-07  4e-14
 7: -2.9485e+02 -2.9485e+02  4e-04  4e-09  4e-14
 8: -2.9485e+02 -2.9485e+02  1e-05  5e-11  3e-14
Optimal solution found.
0 2
     pcost       dcost       gap    pres   dres
 0: -4.7196e+02 -1.2541e+03  3e+03  2e+00  4e-14
 1: -3.9036e+02 -8.3245e+02  4e+02  3e-16  5e-14
 2: -4.6421e+02 -5.0703e+02  4e+01  2e-16  5e-14
 3: -4.7672e+02 -4.9567e+02  2e+01  2e-16  5e-14
 4: -4.8167e+02 -4.9256e+02  1e+01  2e-16  5e-14
 5: -4.8591e+02 -4.8818e+02  2e+00  2e-16  5e-14
 6: -4.8671e+02 -4.8737e+02  7e-01  2e-16  5e-14
 7: -4.8699e+02 -4.8708e+02  9e-02  2e-16  6e-14
 8: -4.8703e+02 -4.8704e+02  1e-03  2e-

## KRR 


In [760]:
class KernelRR:
    
    def __init__(self,kernel,lmbda):
        self.lmbda = lmbda                    
        self.kernel = kernel    
        self.alpha = None 
        self.b = None
        self.support = None
        self.type='ridge'
        
    def fit(self, X, y):
        N=len(y)
        self.support = X
        ones=np.ones((N,1))
        K=self.kernel(X,X)
        K_prime=np.block([[K, ones], [ones.T, np.ones((1, 1))]])
        y_prime=np.append(y,[0])

        mat=K_prime+(N/2)*self.lmbda*np.identity(N+1)

        alpha_prime=np.linalg.solve(mat,y_prime)  
        self.alpha = alpha_prime[:-1]  
        self.b=alpha_prime[-1]     
        
    ### Implementation of the separting function $f$ 
    def regression_function(self,x):
        # Input : matrix x of shape N data points times d dimension
        # Output: vector of size N
        K=self.kernel(x,self.support)
        return K@self.alpha


    
    def predict(self, X):
        """ Predict y values in {-1, 1} """
        return self.regression_function(X)+self.b

In [761]:
Xtr.shape

(5000, 3072)

In [785]:

x_train,y_train,x_test,y_test = create_test_set(Xtr,Ytr,nbre=500)

sigma =0.01
lmbda= 0.01
kernel=polynomial(d=2).kernel
ridge=KernelRR(kernel,lmbda=lmbda)
ridge.fit(x_train,y_train)

In [786]:
np.round(ridge.predict
         (x_test))

array([ 6.,  8.,  7.,  9.,  8.,  5.,  6.,  5.,  9.,  5.,  6.,  4.,  4.,
        6.,  7.,  5.,  3.,  9.,  6.,  6.,  6.,  7.,  6.,  6.,  7.,  7.,
        8.,  4.,  4.,  9.,  5.,  8.,  8.,  8.,  8.,  4.,  8.,  7.,  8.,
        8.,  6.,  0.,  5.,  8.,  5.,  8.,  8.,  5.,  6.,  5.,  5.,  7.,
        7.,  7.,  7.,  8.,  6.,  8.,  8.,  8.,  6.,  7.,  6.,  6.,  7.,
        7.,  8.,  7.,  5.,  8.,  6.,  7.,  6.,  8.,  2.,  7.,  7.,  3.,
        9.,  6.,  7.,  6.,  5.,  9.,  7.,  6.,  5.,  7.,  7.,  8.,  7.,
        8.,  6.,  8.,  6.,  5.,  5.,  7.,  6.,  2.,  8.,  8.,  3.,  6.,
        8.,  8.,  8.,  7.,  6.,  6.,  3.,  8.,  9.,  8.,  8.,  6.,  5.,
        7.,  7.,  7.,  5.,  6.,  7.,  5.,  5.,  8.,  7.,  9.,  8.,  8.,
        7.,  8.,  6.,  8.,  7.,  7.,  6.,  6.,  7.,  6.,  5.,  2.,  9.,
        6.,  6.,  7.,  7.,  7.,  6.,  6.,  9.,  6.,  5.,  4.,  5.,  5.,
        7.,  4.,  5.,  7.,  8.,  7.,  7.,  7.,  5.,  8.,  9.,  6.,  8.,
        5.,  7.,  7.,  3.,  7.,  6.,  7.,  5.,  7.,  6.,  5.,  7

In [787]:
accuracy(np.round(ridge.predict(x_test)),y_test)

0.088

array([ 1., -0.])

In [765]:
y_test

array([6, 2, 0, 2, 2, 0, 1, 3, 7, 4, 3, 4, 6, 9, 1, 0, 7, 7, 5, 1, 6, 1,
       7, 0, 5, 7, 0, 1, 3, 7, 4, 2, 6, 5, 1, 7, 3, 1, 1, 7, 9, 8, 1, 7,
       2, 2, 0, 8, 7, 3, 3, 9, 5, 2, 2, 5, 6, 4, 2, 7, 7, 1, 2, 7, 0, 3,
       8, 3, 9, 9, 2, 3, 1, 9, 6, 9, 1, 7, 0, 2, 3, 2, 3, 4, 4, 7, 5, 9,
       5, 9, 1, 2, 5, 3, 4, 2, 9, 6, 5, 7, 7, 4, 0, 8, 1, 4, 0, 1, 1, 5,
       5, 1, 3, 6, 0, 3, 9, 0, 6, 5, 8, 8, 3, 9, 4, 4, 0, 9, 0, 4, 6, 6,
       2, 5, 5, 0, 5, 8, 6, 7, 7, 5, 2, 4, 3, 4, 6, 0, 0, 4, 3, 6, 4, 8,
       4, 2, 1, 2, 3, 7, 7, 7, 5, 5, 5, 4, 2, 9, 6, 4, 0, 1, 1, 2, 5, 1,
       7, 9, 4, 5, 6, 3, 8, 5, 5, 5, 5, 8, 6, 0, 0, 0, 0, 8, 7, 9, 3, 7,
       3, 8, 2, 3, 8, 1, 8, 2, 4, 1, 0, 1, 7, 9, 6, 9, 0, 1, 3, 4, 0, 6,
       0, 0, 9, 8, 7, 6, 2, 3, 8, 9, 4, 6, 9, 4, 7, 9, 1, 2, 6, 5, 0, 0,
       4, 9, 8, 9, 7, 0, 1, 3, 0, 5, 4, 6, 2, 5, 6, 9, 4, 6, 6, 9, 7, 7,
       4, 4, 5, 1, 0, 4, 1, 5, 5, 2, 5, 5, 9, 0, 8, 6, 1, 0, 1, 1, 2, 7,
       7, 0, 8, 7, 2, 7, 4, 8, 8, 9, 3, 6, 4, 9, 0,