In [33]:
#@title Installation
!pip install --quie ipdb # debug
!pip install --quie ipython-autotime  # timming
!pip install --quie optuna  # hyperparaeters

time: 53.4 s


# <font color='red'>You need to run the cell below twice before to proceed </font>

In [34]:
# @title Imports

import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import cvxopt
np.random.seed(54321)

import ipdb

import optuna

from sklearn.model_selection import KFold 
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from sklearn.preprocessing import OneHotEncoder

from kernels.basic import RBF, Linear, Polynomial
from kernels.regular import rbf_kernel, Exponential, Laplacian, RationalQuadratic, \
                            InverseMultiquadratic, Cauchy, TStudent, ANOVA, Fourier, Tanimoto, Sorensen

%load_ext autotime

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 9.01 ms


In [35]:
X_train=pd.read_csv('./data/Xtr.csv', sep=',') #we use this dataset to train our model
Y_train=pd.read_csv('./data/Ytr.csv', sep=',') #we use this dataset to train our model
X_test=pd.read_csv('./data/Xte.csv', sep=',') #we will use this data set later to validate our model

# X_train_mat=pd.read_csv('./data/Xtr_mat100.csv', sep=',') #we use this dataset to train our model
# X_test_mat=pd.read_csv('./data/Xte_mat100.csv', sep=',') #we will use this data set later to validate our model

time: 28.1 ms


In [36]:
X_train.tail()

Unnamed: 0,Id,seq
1995,1995,TAACTTTTGACAGGTCAGAATACAAAACTGATTTATTTACAGTGTC...
1996,1996,ACGCCCATTCCGCCCTGCTAAGCCTCGCCCATTACATCCAGACTGC...
1997,1997,TGGCTACTAGCTAGAGATAGCATCTCTCTGTGGACAACTCTCCAGC...
1998,1998,CCCAGCTGTCAAAAAGCAGCCCAAAGGAAGCTCACGGTGTGCCGGC...
1999,1999,TGCTAGTTGATGAAACAATAACTGCTAAAAGGTATACAGCCATGTC...


time: 21 ms


In [37]:
print('The shape of the X_train dataset is:',X_train.shape)
print('The shape of the Y_train dataset is:',Y_train.shape)

The shape of the X_train dataset is: (2000, 2)
The shape of the Y_train dataset is: (2000, 2)
time: 1.44 ms


In [38]:
# X_train['len'] = X_train.seq.apply(lambda x : len(x))

time: 1.06 ms


## Models

In [39]:
class LogisticRegressionBinary():
    def __init__(self, lr=0.1, num_iter=100000, batch_size=1, verbose=False):
        self.lr = lr
        self.num_iter = num_iter
        self.batch_size = batch_size
        self.verbose = verbose
    
    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)
    
    def __sigmoid_func(self, z):
        return 1 / (1 + np.exp(-z))
    
    def __loss(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
    
    def fit(self, X, y):
        y = self.trans_y(y)
        X = self.__add_intercept(X)
        self.theta = np.zeros(X.shape[1])
        
        for i in range(self.num_iter):
            z = np.dot(X, self.theta)
            h = self.__sigmoid_func(z)
                        
            rand = np.random.choice(y.size, self.batch_size).squeeze()
            gradient = np.dot(X[rand].T, (h[rand] - y[rand]))/y.size   
        
            self.theta -= self.lr * gradient
            #print('theta and grad',self.theta.shape ,  gradient.shape )
            if(self.verbose == True and i % 100 == 0):
                z = np.dot(X, self.theta)
                h = self.__sigmoid(z)
                print(f'loss: {self.__loss(h, y)} \t')
    
    def predict_probability(self, X):
        X = self.__add_intercept(X)
    
        return self.__sigmoid_func(np.dot(X, self.theta))
    
    def predict(self, X, threshold=.5):
          return np.where(self.predict_probability(X) >= 0.5, 1, 0)
        
          
    def Accuracy_check(self, X, y, threshold = 0.5):
        return np.mean(self.predict(X, threshold)==y)
    
    def trans_y(self, y):
        if isinstance(y, pd.Series):
            y = y.values
        if isinstance(y, list):
            y = np.array(y)
        return y

time: 154 ms


In [40]:
# Ridge Regression (RR)

class solveRR():
    def __init__(self, X, y, lam=0.1):
        self.beta = None
        self.X = X
        self.y = y
        self.lam = lam
            
    def fit(self):
        
        X = self.X
        y = self.y
        lam = self.lam 
        
        n, p = X.shape
        assert (len(y) == n)

        A = (X.T.dot(X)) + np.eye(p)*lam*n
        b = X.T.dot(y)
        
        self.beta = np.linalg.solve(A, b)
        
        return self.beta
    
        
    def predict(self, X, threshold=.5):
        return np.where(X.dot(self.beta) >= threshold, 1, 0)
        
          
    def Accuracy_check(self,X, y, threshold=.5):
        return np.mean(self.predict(X, threshold)==y)
    

# Weighted Ridge Regression (WRR)
class solveWRR():
    def __init__(self, X, y, w, lam=0.1):
        self.beta = None
        self.X = X
        self.y = y
        self.lam = lam
        self.w = w
    
    def fit(self):
        
        X = self.X
        y = self.y
        lam = self.lam 
        w = self.w
        
        n, p = X.shape
        assert (len(y) == len(w) == n)

        y1 = np.sqrt(w) * y
        X1 = (np.sqrt(w) * X.T).T
        
        # Hint:
        # Find y1 and X1 such that:
        
        self.beta = solveRR(X1, y1, lam).fit()
                
        return self.beta
    
        
    def predict(self, X, threshold):
        return np.where(X.dot(self.beta) >= threshold, 1, 0)
        
          
    def Accuracy_check(self,X, y, threshold=.5):
        return np.mean(self.predict(X, threshold)==y)
    

# Logistic Ridge Regression (LRR)
class solveLRR():
    def __init__(self, X, y, lam=0.1):
        self.beta = None
        self.X = X
        self.y = y
        self.lam = lam
    
    def fit(self):
        
        X = self.X
        y = self.y
        
        n, p = X.shape
        assert (len(y) == n)
    
        lam = self.lam 
        max_iter = 50
        eps = 1e-3
        sigmoid = lambda a: 1/(1 + np.exp(-a))
        
        
        
        # Initialize
        self.beta = np.zeros(p)

        # Hint: Use IRLS
        for i in range(max_iter):
            beta_old = self.beta
            f = X.dot(beta_old)
            w = sigmoid(f) * sigmoid(-f)
            z = f + y / sigmoid(y*f)
            self.beta = solveWRR(X, z, w, 2*lam).fit()
            # Break condition (achieved convergence)
            #if np.sum((beta-beta_old)**2) < eps:
            #    break                
        return self.beta
    
        
    def predict(self, X, threshold):
        return np.where(X.dot(self.beta) >= threshold, 1, 0)
        
          
    def Accuracy_check(self,X, y, threshold=.5):
        return np.mean(self.predict(X, threshold)==y)

time: 341 ms


# Kernel

In [41]:
import numpy as np

### Functions for you to fill in ###

def polynomial_kernel(X, Y, c, p):
    """
        Compute the polynomial kernel between two matrices X and Y::
            K(x, y) = (<x, y> + c)^p
        for each pair of rows x in X and y in Y.

        Args:
            X - (n, d) NumPy array (n datapoints each with d features)
            Y - (m, d) NumPy array (m datapoints each with d features)
            c - a coefficient to trade off high-order and low-order terms (scalar)
            p - the degree of the polynomial kernel

        Returns:
            kernel_matrix - (n, m) Numpy array containing the kernel matrix
    """
    # YOUR CODE HERE
    # raise NotImplementedError
    kernel_matrix = (X.dot(Y.T) + c)**p
    
    return kernel_matrix


def rbf_kernel_element_wise(x, y, sigma=1):
    '''
    returns the RBF (Gaussian) kernel k(x, y)
    
    Input:
    ------
    x and y are p-dimensional vectors 
    '''
    K = np.exp(- np.sum((x - y)**2) / (2 * sigma ** 2))
    return K

# def rbf_kernel(X1, X2, sigma=10):
#     '''
#     Returns the kernel matrix K(X1_i, X2_j): size (n1, n2)
    
#     Input:
#     ------
#     X1: an (n1, p) matrix
#     X2: an (n2, p) matrix
#     '''
#     # For loop with rbf_kernel_element works but is slow in python
#     # Use matrix operations!
#     X2_norm = np.sum(X2 ** 2, axis=-1)
#     X1_norm = np.sum(X1 ** 2, axis=-1)
#     gamma = 1 / (2 * sigma ** 2)
#     K = np.exp(- gamma * (X1_norm[:, None] + X2_norm[None, :] - 2 * np.dot(X1, X2.T)))
#     return K

# class rbf_kernel():
#     '''
#     Returns the kernel matrix K(X1_i, X2_j): size (n1, n2)
    
#     Input:
#     ------
#     X1: an (n1, p) matrix
#     X2: a
#     '''

#     def __init__(self, sigma=2.0):
#         self.sigma = sigma

#     def _compute(self, X1, X2):
#         # For loop with rbf_kernel_element works but is slow in python
#         # Use matrix operations!
#         X2_norm = np.sum(X2 ** 2, axis=-1)
#         X1_norm = np.sum(X1 ** 2, axis=-1)
#         gamma = 1 / (2 * self.sigma ** 2)
#         K = np.exp(- gamma * (X1_norm[:, None] + X2_norm[None, :] - 2 * np.dot(X1, X2.T)))
#         return K



def laplace(X1, X2, alpha=10):
    return np.exp(-alpha*np.abs(X1-X2))


def linear_kernel(X1, X2):
    '''
    Returns the kernel matrix K(X1_i, X2_j): size (n1, n2)
    where K is the linear kernel
    
    Input:
    ------
    X1: an (n1, p) matrix
    X2: an (n2, p) matrix
    '''
    return X1.dot(X2.T)

def quadratic_kernel(X1, X2, power=2):
    '''
    Returns the kernel matrix K(X1_i, X2_j): size (n1, n2)
    where K is the quadratic kernel
    
    Input:
    ------
    X1: an (n1, p) matrix
    X2: an (n2, p) matrix
    '''
    return (1 + linear_kernel(X1, X2))**power

def rbf_poly_kernel(X1, X2, sigma=10, d=2, rbf=1.0, poly=1.0):
    '''
    Returns the kernel matrix K(X1_i, X2_j): size (n1, n2)
    
    Input:
    ------
    X1: an (n1, p) matrix
    X2: an (n2, p) matrix
    '''
    # For loop with rbf_kernel_element works but is slow in python
    # Use matrix operations!
    X2_norm = np.sum(X2 ** 2, axis=-1)
    X1_norm = np.sum(X1 ** 2, axis=-1)
    gamma = 1 / (2 * sigma ** 2)
    K = np.exp(- gamma * (X1_norm[:, None] + X2_norm[None, :] - 2 * np.dot(X1, X2.T)))
    
    return rbf*K + poly*(X1.dot(X2.T) +1)**d

time: 108 ms


In [44]:
class ksolveRR_2():
    def __init__(self, X, y, lam= 0.00015, sample_weights = None, kernel = None):
        self.alpha = None
        self.X = X
        self.y = y
        self.lam = lam
        self.kernel = kernel
        self.sample_weights = sample_weights
            
    
    def fit(self):
        if self.sample_weights is not None:
            self.X *= self.sample_weights[:, None]
        
        X = self.X
        y = self.y
        lam = self.lam
        
        n, p = X.shape
        assert (len(y) == n)
        
        A = self.kernel(X, X)+n*self.lam*np.eye(n)
        self.alpha = np.linalg.solve(A, y)
        
        return self
    
        
    def predict(self, X, threshold=.5):
        K_x = self.kernel(X, self.X)
        return np.where(K_x.dot(self.alpha) >= threshold, 1, 0)
        
          
    def Accuracy_check(self,X, y, threshold=.5):
        return np.mean(self.predict(X, threshold)==y)

time: 50.6 ms


In [45]:
class ksolveRR():
    def __init__(self, X, y, lam= 0.0001, kernel=None):
        self.beta = None
        self.X = X
        self.y = y
        self.lam = lam
        self.kernel = kernel
            
    
    def fit(self):
        X = self.X
        y = self.y
        lam = self.lam 
        
        n, p = X.shape
        assert (len(y) == n)
        
#         if self.sigma is None:
#             self.sigma = sigma_from_median(X)
            
#         A = self.kernel(X, X, self.sigma)+n*self.lam*np.eye(n)
        A = self.kernel(X, X) + n*self.lam*np.eye(n)
        self.alpha = np.linalg.solve(A, y)
        
        return self.beta
    
        
    def predict(self, X, threshold=.5):
#         K_x = self.kernel(X, self.X, self.sigma)
        K_x = self.kernel(X, self.X)
        return np.where(K_x.dot(self.alpha) >= threshold, 1, 0)
        
          
    def Accuracy_check(self,X, y, threshold=.5):
        return np.mean(self.predict(X, threshold)==y)

time: 42.9 ms


In [46]:
# Logistic Ridge Regression (LRR)
class ksolveLRR():
    def __init__(self, X, y, lam = 0.1, sigma = 4, max_iter=100, tol=1e-5, kernel=None):
        self.alpha = None
        self.X = X
        self.y = y
        self.lam = lam
        
        self.kernel = kernel
        
        self.sigma = sigma
        self.max_iter = max_iter
        self.tol = tol
        
    
    def fit(self):
        
        X = self.X
        y = self.y
        
        n, p = X.shape
        assert (len(y) == n)

        sigmoid = lambda a: 1/(1 + np.exp(-a))
        
        K = self.kernel(X, X)

        # Initialize
        alpha = np.zeros(n)
        
        # Hint: Use IRLS
        for n_iter in range(self.max_iter):
            alpha_old = alpha
            f = K.dot(alpha_old)
            w = sigmoid(f) * sigmoid(-f)
            z = f + y / sigmoid(y*(f))
            
            alpha = ksolveRR_2(X, y, lam = 2*self.lam, \
                               sigma=self.sigma, sample_weights = w).fit().alpha
            
            # Break condition (achieved convergence)
            if np.sum((alpha-alpha_old)**2) < self.tol:
                break  
                
                
        self.n_iter = n_iter
        self.alpha = alpha
        
        return self
    
        
    def predict(self, X, threshold):
        K_x = self.kernel(X, self.X)
        return np.where(K_x.dot(self.alpha) >= threshold, 1, 0)
        
          
    def Accuracy_check(self,X, y, threshold=.5):
        return np.mean(self.predict(X, threshold)==y)

time: 95.5 ms


In [55]:
# You don't need to look at this, this is just to adapt our matrices
# to the solver being used
solver='cvxopt'

import cvxopt

def cvxopt_qp(P, q, G, h, A, b):
    P = .5 * (P + P.T)
    cvx_matrices = [
        cvxopt.matrix(M) if M is not None else None for M in [P, q, G, h, A, b] 
    ]
    cvxopt.solvers.options['show_progress'] = False
    solution = cvxopt.solvers.qp(*cvx_matrices, options={'show_progress': False})
    return np.array(solution['x']).flatten()

solve_qp = cvxopt_qp

# def quadprog_solve_qp(P, q, G=None, h=None, A=None, b=None):
#     qp_G = .5 * (P + P.T)   # make sure P is symmetric
#     qp_a = -q
#     if A is not None:
#         qp_C = -np.vstack([A, G]).T
#         qp_b = -np.hstack([b, h])
#         meq = A.shape[0]
#     else:  # no equality constraint
#         qp_C = - G.T
#         qp_b = - h
#         meq = 0
#     return quadprog.solve_qp(qp_G, qp_a, qp_C, qp_b, meq)[0]



# solve_qp = {'quadprog': quadprog_solve_qp, 'cvxopt': cvxopt_qp}[solver]

def svm_dual_soft_to_qp_kernel(K, y, C=1):
    n = K.shape[0]
    assert (len(y) == n)
        
    # Dual formulation, soft margin
    P = np.diag(y).dot(K).dot(np.diag(y))
    # As a regularization, we add epsilon * identity to P
    eps = 1e-12
    P += eps * np.eye(n)
    q = - np.ones(n)
    G = np.vstack([-np.eye(n), np.eye(n)])
    h = np.hstack([np.zeros(n), C * np.ones(n)])
    A = y[np.newaxis, :]
    A = A.astype('float')
    b = np.array([0.])
    return P, q, G, h, A, b

# SVM primal soft
class KernelSVM():
    def __init__(self, X, y, C=0.1, lam = 0.1, tol = 1e-3, kernel=None):
        self.alpha = None
        self.X = X
        self.y = y
        self.w = None
        self.b = None
        self.C = C
        self.kernel = kernel
        self.lam = lam        
        self.tol = tol
    
    def fit(self):
        
        X = self.X
        y = self.y
        C = self.C
        
        n, p = X.shape
        assert (len(y) == n)
        K = self.kernel(X, X)
        
        # Solve dual problem
        self.alpha = solve_qp(*svm_dual_soft_to_qp_kernel(K, y, C=self.C))
        
        
       # Compute support vectors and bias b
        sv = np.logical_and((self.alpha>self.tol), \
                            (self.C - self.alpha > self.tol))
        
        self.bias = y[sv] - K[sv].dot(self.alpha*y)
        self.bias =  self.bias.mean()

        self.support_vector_indices = np.nonzero(sv)[0]
        
        return self
        
        
        
    def predict(self, X, threshold):
        K_x = self.kernel(X, self.X)
        return np.where((K_x.dot(self.alpha * self.y) +  self.bias) >= threshold, 1, 0)
        
          
    def Accuracy_check(self,X, y, threshold=.5):
        return np.mean(self.predict(X, threshold)==y)

time: 254 ms


# Cross Validation

In [56]:
X_train_ = pd.read_csv('./data/Xtr.csv', sep=',') #we use this dataset to train our model
Y_train_ = pd.read_csv('./data/Ytr.csv', sep=',') #we use this dataset to train our model
X_test_ = pd.read_csv('./data/Xte.csv', sep=',')

kfold=KFold(n_splits=5)

def spectrum_kernal(X_train, y, X_test, n=2, encoder=8, one_hot = True, normalise = False):
    
    d = {'A': 1, 'C':2, 'G':3, 'T':4}
    
    for i in range(0, 101-n+1, 1):
        X_train['seq_'+str(i)] = X_train.seq.apply(lambda x :x[i:i+n])
        X_test['seq_'+str(i)] = X_test.seq.apply(lambda x :x[i:i+n])
        
        X_train['seq_'+str(i)] = X_train['seq_'+str(i)].apply(lambda x : sum([d[x[ii]]*encoder**(ii+1) for ii in range(n)]))
        X_test['seq_'+str(i)] = X_test['seq_'+str(i)].apply(lambda x : sum([d[x[ii]]*encoder**(ii+1) for ii in range(n)]))
        
        
        
    X = X_train.drop(['seq', 'Id'], axis=1)
    X_t = X_test.drop(['seq', 'Id'], axis=1)
    y = Y_train.Bound
    
#     print(f'Train: \n{X.tail()}\n -----------------------\n')
#     print(f'Test: \n {X_t.tail()}')

    if one_hot:
        onehot_encoder = OneHotEncoder(sparse=False, categories='auto', handle_unknown='ignore')

        X_cross = X.values
        X_t = X_t.values
        
        enc = onehot_encoder.fit(X)
        X_cross = enc.transform(X)
        X_t_enc = enc.transform(X_t)
        
    elif normalise:
        scaler = MinMaxScaler()#MinMaxScaler() # StandardScaler()
        scaler.fit(X)
        
        X_cross = scaler.transform(X)
        X_t_enc = scaler.transform(X_t)
        
    else :
        
        X_cross = X.values
        X_t_enc = X_t.values
    
    y_cross = y.values
    
    return X_cross, y_cross, X_t_enc

time: 185 ms


# Hyper Parameters search 

In [57]:
def objective_sgd(trial):
    
    q  = trial.suggest_loguniform('q', 1e-5, 3e+0)
    
    sigma  = trial.suggest_loguniform('sigma', 1e-1, 8e+0) # trial.suggest_float('sigma', 1e-5, 1e-3, log=True)
    
    kernel = trial.suggest_categorical('kernel', [0, 8, 4])#[Exponential, Laplacian, RationalQuadratic, InverseMultiquadratic, \
                                               #Cauchy, TStudent, ANOVA, Fourier, Tanimoto, Sorensen])
    
#     lam = trial.suggest_loguniform('lam', 1e-15, 1e-4)
    
#     d = trial.suggest_int('d', 2, 8)
    c = trial.suggest_loguniform('c', 1e-2, 5)
    
    degree = trial.suggest_int('degree', 2, 10)
    
    # trick to avoid warnning from opma that support only basic datatype
    trick_list = [rbf_kernel, Exponential, Laplacian, RationalQuadratic, InverseMultiquadratic, \
                        Cauchy, TStudent, ANOVA, Fourier, Tanimoto, Sorensen]
        
    n = trial.suggest_int('n', 1, 3)
    
    tol = 1e-3 # trial.suggest_loguniform('tol', 1e-7, 1e-0)

#     sigma=10, d=2, rbf=1.0, poly=1.0
    
#     lr  = trial.suggest_loguniform('lr', 2e-5, 1e-1)
    
#     normalise = trial.suggest_categorical('normalise', [False , True])
    
#     rbf = trial.suggest_loguniform('rbf', 1e-3, 3e+0)
    
#     poly = trial.suggest_loguniform('poly', 1e-3, 3e+0)
    
    
#     model = trial.suggest_categorical('models', [ksolveRR , ksolveRR_2, ksolveLRR, KernelSVM])
    
#     models = {ksolveRR : 'k Ridge Reg', ksolveRR_2: 'weigh Ridge Reg', \
#               ksolveLRR: 'k Logistic Ridge Reg', KernelSVM : 'Kernal SVM', LogisticRegressionBinary: 'log reg'} 
#     
    kernels = {rbf_kernel : 'rbf',
               Exponential : 'Exponential kernel (self, sigma=None)',
               Laplacian : 'Laplacian kernel (self, sigma=None)',
               Cauchy : 'Cauchy kernel (self, sigma=None)',
               
               RationalQuadratic : 'Rational quadratic kernel (self, c=1)',
               InverseMultiquadratic : 'Inverse multiquadratic kernel (self, c=1)',
               
               TStudent : 'T-Student kernel (self, degree=2)',
               
               ANOVA : 'ANOVA kernel (self, sigma=1., d=2)',
               
               Fourier : 'Fourier kernel (self, q=0.1)',
               
               Tanimoto : 'Tanimoto kernel',
               Sorensen : 'Sorensen kernel'
              }
    
    if kernels[trick_list[kernel]] == 'ANOVA kernel (self, sigma=sigma, d=d)':
        kernel = trick_list[kernel](sigma=sigma, d=d)
        
    elif kernels[trick_list[kernel]] == 'Exponential kernel (self, sigma=None)' or \
                        kernels[trick_list[kernel]] == 'Laplacian kernel (self, sigma=None)' or \
                        kernels[trick_list[kernel]] == 'Cauchy kernel (self, sigma=None)'or \
                        kernels[trick_list[kernel]] == 'rbf':
        
        kernel = trick_list[kernel](sigma=sigma)
        
    elif kernels[trick_list[kernel]] == 'Rational quadratic kernel (self, c=1)' or \
                        kernels[trick_list[kernel]] == 'Inverse multiquadratic kernel (self, c=1)':
        kernel = trick_list[kernel](c=c)
        
    elif kernels[trick_list[kernel]] == 'T-Student kernel (self, degree=2)':
        kernel = trick_list[kernel](degree=degree)
        
    elif kernels[trick_list[kernel]] == 'Fourier kernel (self, q=0.1)':
        kernel = trick_list[kernel](q=q)
        
    else:
        kernel = trick_list[kernel]()
        
    
#     n = 1
    
    models = {KernelSVM : 'Kernal SVM'}
    

    X_cross, y_cross, X_t_enc = spectrum_kernal(X_train_, Y_train_, X_test_, n=n, encoder=8, one_hot = True, normalise = False)
    
    for model in models:
        accuracy = []
        for i, (train_index, validate_index) in enumerate(kfold.split(X_cross)):
            X_train, y_train = X_cross[train_index], y_cross[train_index]
            X_valid, y_valid = X_cross[validate_index], y_cross[validate_index]

            if models[model] == 'weigh Ridge Reg':
                sample_weights = np.random.rand(len(y_train))
                model_curr = model(X_train, y_train, lam = lam, sigma = sigma, sample_weights = sample_weights, kernel = kernel)

            elif models[model] == 'k Logistic Ridge Reg':
                model_curr = model(X_train, y_train, lam = lam, sigma = sigma, max_iter=100, tol = tol, kernel = kernel)
            elif models[model] == 'k Ridge Reg':
                
                model_curr = model(X_train, y_train, lam= lam, kernel=kernel)
            elif models[model] == 'log reg':
                model_curr = model(lr=lr, num_iter=5000, batch_size=1)
            else:
                model_curr = model(X_train, y_train, C=c, lam = lam, tol = tol, kernel = kernel)
                
            if models[model] == 'log reg':
                model_curr.fit(X_train, y_train)
            else:
                model_curr.fit()
            

            accuracy.append(model_curr.Accuracy_check(X_valid, y_valid, threshold=0.5))

    return np.mean(accuracy)

sampler = optuna.samplers.TPESampler()

study = optuna.create_study(sampler=sampler, direction='maximize')
study.optimize(func=objective_sgd, n_trials=1500, show_progress_bar=True)


trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))


Progress bar is experimental (supported from v1.2.0). The interface can change in the future.



HBox(children=(FloatProgress(value=0.0, max=1500.0), HTML(value='')))


Mean of empty slice.


invalid value encountered in double_scalars


invalid value encountered in greater_equal



[32m[I 2020-05-31 21:13:24,381][0m Finished trial#0 with value: 0.501 with parameters: {'q': 0.0042002248492288225, 'sigma': 0.3283446594764692, 'kernel': 4, 'c': 1.1062885582554294, 'degree': 5, 'n': 1}. Best is trial#0 with value: 0.501.[0m



Mean of empty slice.


invalid value encountered in double_scalars


invalid value encountered in greater_equal






KeyboardInterrupt: 

time: 1min 6s


In [None]:
print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

In [70]:
2+2

4

time: 9.9 ms


In [None]:
# Accuracy: 0.7025 (overfitting on accuracy 4)
# Best hyperparameters: {'sigma': 4.133127895830191, 'lam': 6.196055115305764e-14}

In [None]:
# Augmented data

# sigma  = trial.suggest_loguniform('sigma', 1e-20, 20)
# lam = trial.suggest_loguniform('lam', 1e-20, 1e-1)

# models = {ksolveRR: 'k Ridge Reg'} (Augmented data)
    # kenel = quadratic_kernel
    # Accuracy: 0.6555
    # Best hyperparameters: {'sigma': 4.065620491225982, 'lam': 0.013374107290659768}
    # time: 49min 21s
    #----
    # kenel = rbf_kernel
    # Accuracy: 0.6575 # 0.6575000000000001
    # Best hyperparameters: {'sigma': 4.119788517147901, 'lam': 1.2752298700618223e-14}, {'sigma': 4.063158315715049, 'lam': 8.30834772639223e-05}
    # time: 32min 57s
    
    # ------
    # kenel = rbf_kernel
    # Accuracy: 0.6595000000000001
    # Best hyperparameters: {'sigma': 4.1032895001889464, 'lam': 0.00014526367793975609}
    # time: --

    
# sigma  = trial.suggest_loguniform('sigma', 1e-3, 20)
# lam = trial.suggest_loguniform('lam', 1e-15, 1e-0)

# models = {ksolveRR: 'k Ridge Reg'} (Non-Augmented data)
    # kenel = quadratic_kernel
    # Accuracy: 0.6555
    # Best hyperparameters: {'sigma': 4.069738272304652, 'lam': 0.053817561640154984}
    # time: 48min 12s
    #----
    # kenel = rbf_kernel
    # Accuracy: 0.6575000000000001
    # Best hyperparameters: {'sigma': 4.001185698777986, 'lam': 1.9770379950513775e-10}
    # time: 29min 58s
    

# Accuracy: 0.6605000000000001
# Best hyperparameters: {'sigma': 4.538118805230398, 'lam': 8.583924705371449e-15}
# time: 1.32 ms

# Original data

# sigma  = trial.suggest_loguniform('sigma', 1e-3, 20)
# lam = trial.suggest_loguniform('lam', 1e-15, 1e-0)
  
# models = {ksolveRR: 'k Ridge Reg'} (Non-Augmented data standardize)
    # kenel = rbf_kernel
    # Accuracy: 0.612
    # Best hyperparameters: {'sigma': 2.8925324917718167, 'lam': 1.2575244169567934e-08}
    # time: 28min 33s 
    #----
    # kenel = quadratic_kernel
    # Accuracy: EROOR LinearAlg
    # Best hyperparameters: 
    # time: 

In [None]:
# {'sigma': 4.119788517147901, 'lam': 1.2752298700618223e-14}    # time: 32min 57s BEST(69.4)
# ksolveRR_65_cv_0.00000000001_4.csv Second (69.2)


In [44]:
trial.params

{'sigma': 4.001616944273439, 'lam': 4.873253388094652e-09}

time: 6.78 ms


In [67]:
class KernelMethodBase(object):
    '''
    Base class for kernel methods models
    
    Methods
    ----
    fit
    predict
    '''
    kernels_ = {
        'linear': linear_kernel,
        'quadratic': quadratic_kernel,
        'rbf': rbf_kernel,
        #if you want to add your own kernel
        #'customer_kernel':custom_kernel
    }
    def __init__(self, kernel='linear', **kwargs):
        self.kernel_name = kernel
        self.kernel_function_ = self.kernels_[kernel]
        self.kernel_parameters = self.get_kernel_parameters(**kwargs)
        
    def get_kernel_parameters(self, **kwargs):
        params = {}
        if self.kernel_name == 'rbf':
            params['sigma'] = kwargs.get('sigma', None)
            
#         if self.kernel_name == 'customer_kernel':
#             params['parameter_1'] = kwargs.get('parameter_1', None)
#             params['parameter_2'] = kwargs.get('parameter_2', None)
        return params

    def fit(self, X, y, **kwargs):
        return self
        
    def decision_function(self, X):
        pass

    def predict(self, X):
        pass

time: 22.2 ms


In [None]:
def rbf_kernel(X1, X2, sigma=10):
    '''
    Returns the kernel matrix K(X1_i, X2_j): size (n1, n2)
    where K is the RBF kernel with parameter sigma
    
    Input:
    ------
    X1: an (n1, p) matrix
    X2: an (n2, p) matrix
    sigma: float
    '''
    # For loop with rbf_kernel_element works but is slow in python
    # Use matrix operations!
    X2_norm = np.sum(X2 ** 2, axis = -1)
    X1_norm = np.sum(X1 ** 2, axis = -1)
    gamma = 1 / (2 * sigma ** 2)
    K = np.exp(- gamma * (X1_norm[:, None] + X2_norm[None, :] - 2 * np.dot(X1, X2.T)))
    return K

In [None]:
def svm_dual_soft_to_qp_kernel(K, y, C=1):
    n = K.shape[0]
    assert (len(y) == n)
        
    # Dual formulation, soft margin
    P = np.diag(y).dot(K).dot(np.diag(y))
    # As a regularization, we add epsilon * identity to P
    eps = 1e-12
    P += eps * np.eye(n)
    q = - np.ones(n)
    G = np.vstack([-np.eye(n), np.eye(n)])
    h = np.hstack([np.zeros(n), C * np.ones(n)])
    A = y[np.newaxis, :]
    b = np.array([0.])
    return P, q, G, h, A, b

K = linear_kernel(X_train, X_train)
alphas = solve_qp(*svm_dual_soft_to_qp_kernel(K, y_train, C=1.))

class KernelSVM(KernelMethodBase):
    '''
    Kernel SVM Classification
    
    Methods
    ----
    fit
    predict
    '''
    def __init__(self, C=0.1, **kwargs):
        self.C = C
        # Python 3: replace the following line by
        # super().__init__(**kwargs)
        super(KernelSVM, self).__init__(**kwargs)

    def fit(self, X, y, tol=1e-1):
        n, p = X.shape
        assert (n == len(y))
    
        self.X_train = X
        self.y_train = y
        
        # Kernel matrix
        K = self.kernel_function_(X, X, **self.kernel_parameters)
        
        # Solve dual problem
        self.alpha = solve_qp(*svm_dual_soft_to_qp_kernel(K, y, C=self.C))
        
        # Compute support vectors and bias b
        sv = np.logical_and((self.alpha>tol), (self.C - self.alpha > tol))
        self.bias = y[sv] - K[sv].dot(self.alpha*y)
        self.bias =  self.bias.mean()

        self.support_vector_indices = np.nonzero(sv)[0]

        return self
        
    def predict(self, X, threshold):
        K_x = self.kernel(X, self.X)
        return np.where((K_x.dot(self.alpha * self.y) +  self.bias) >= threshold, 1, 0)
        
          
    def Accuracy_check(self,X, y, threshold=.5):
        return np.mean(self.predict(X, threshold)==y)

In [None]:
X_cross, y_cross, X_t_enc = spectrum_kernal(X_train_, Y_train_, X_test_, n=1, encoder=8, one_hot = True, normalise = False)

In [None]:
kernel = 'rbf'
sigma = 1.
C = 1.
model = KernelSVM(C=C, kernel=kernel, sigma=sigma)
y_pred = model.fit(X_cross, y_cross).predict(X_t_enc)
plot_decision_function(model, X_test, y_test,
                       title='SVM {} Kernel'.format(kernel))
print('Test Accur: {:.2%}'.format(model.Accuracy_check(X_t_enc, y_cross)))

In [66]:
#     models = {ksolveRR : 'k Ridge Reg', ksolveRR_2: 'weigh Ridge Reg', \
#               ksolveLRR: 'k Logistic Ridge Reg', KernelSVM : 'Kernal SVM'

c  = 0.1

# trial.params
sigma  = 0.00119788517147901 #trial.params['sigma'] #4.133127895830191 #4#4.119788517147901 #4.538118805230398
kenel = 0
lam = 1.2752298700618223e-14 #trial.params['lam'] #6.196055115305764e-14#0.00000000001#1.2752298700618223e-14#8.583924705371449e-15 8.583924705371449e-15 #
# tol = 
    
#     ksolveRR (self, X, y, lam= 0.0001, sigma=0.5, kernel=rbf_kernel)
#     ksolveRR_2 (self, X, y, lam= 0.0001, sigma=0.5, sample_weights = None, kernel = rbf_kernel
#     ksolveLRR (self, X, y, lam = 0.1, sigma = 4, max_iter=100, tol=1e-5, kernel=rbf_kernel
#     KernelSVM (self, X, y, C=0.1, lam = 0.1, sigma = 4, tol = 1e-1, kernel=rbf_kernel

trick_list = [rbf_kernel, Exponential, Laplacian, RationalQuadratic, InverseMultiquadratic, \
                        Cauchy, TStudent, ANOVA, Fourier, Tanimoto, Sorensen]
    
tol = 1e-3 #trial.suggest_loguniform('tol', 1e-7, 1e-0)

#     sigma=10, d=2, rbf=1.0, poly=1.0
    
#     lr  = trial.suggest_loguniform('lr', 2e-5, 1e-1)
    
#     normalise = trial.suggest_categorical('normalise', [False , True])
    
#     rbf = trial.suggest_loguniform('rbf', 1e-3, 3e+0)
    
#     poly = trial.suggest_loguniform('poly', 1e-3, 3e+0)
    
    
#     model = trial.suggest_categorical('models', [ksolveRR , ksolveRR_2, ksolveLRR, KernelSVM])
    
    # ksolveRR (self, X, y, lam= 0.0001, sigma=0.5, kernel=rbf_kernel)
    # ksolveRR_2 (self, X, y, lam= 0.0001, sigma=0.5, sample_weights = None, kernel = rbf_kernel
    # ksolveLRR (self, X, y, lam = 0.1, sigma = 4, max_iter=100, tol=1e-5, kernel=rbf_kernel
    # KernelSVM (self, X, y, C=0.1, lam = 0.1, sigma = 4, tol = 1e-1, kernel=rbf_kernel
    
#     models = {ksolveRR : 'k Ridge Reg', ksolveRR_2: 'weigh Ridge Reg', \
#               ksolveLRR: 'k Logistic Ridge Reg', KernelSVM : 'Kernal SVM', LogisticRegressionBinary: 'log reg'} 
#     
kernels = {rbf_kernel : 'rbf',
               Exponential : 'Exponential kernel (self, sigma=None)',
               Laplacian : 'Laplacian kernel (self, sigma=None)',
               Cauchy : 'Cauchy kernel (self, sigma=None)',
               
               RationalQuadratic : 'Rational quadratic kernel (self, c=1)',
               InverseMultiquadratic : 'Inverse multiquadratic kernel (self, c=1)',
               
               TStudent : 'T-Student kernel (self, degree=2)',
               
               ANOVA : 'ANOVA kernel (self, sigma=1., d=2)',
               
               Fourier : 'Fourier kernel (self, q=0.1)',
               
               Tanimoto : 'Tanimoto kernel',
               Sorensen : 'Sorensen kernel'
              }

if kernels[trick_list[kenel]] == 'ANOVA kernel (self, sigma=sigma, d=d)':
        kenel = trick_list[kenel](sigma=sigma, d=d)
        
elif kernels[trick_list[kenel]] == 'Exponential kernel (self, sigma=None)' or \
                    kernels[trick_list[kenel]] == 'Laplacian kernel (self, sigma=None)' or \
                    kernels[trick_list[kenel]] == 'Cauchy kernel (self, sigma=None)'or \
                    kernels[trick_list[kenel]] == 'rbf':

    kenel = trick_list[kenel](sigma=sigma)

elif kernels[trick_list[kenel]] == 'Rational quadratic kernel (self, c=1)' or \
                    kernels[trick_list[kenel]] == 'Inverse multiquadratic kernel (self, c=1)':
    kenel = trick_list[kenel](c=c)

elif kernels[trick_list[kenel]] == 'T-Student kernel (self, degree=2)':
    kenel = trick_list[kenel](degree=degree)

elif kernels[trick_list[kenel]] == 'Fourier kernel (self, q=0.1)':
    kenel = trick_list[kenel](q=q)

else:
    kenel = trick_list[kenel]()


n = 1

models = {KernelSVM : 'Kernal SVM'} 

#     accuracy = []
#     for model in models:
#     ipdb.set_trace()


X_cross, y_cross, X_t_enc = spectrum_kernal(X_train_, Y_train_, X_test_, n=n, encoder=8, one_hot = False, normalise = False)

for model in models:
    accuracy = []
    for i, (train_index, validate_index) in enumerate(kfold.split(X_cross)):
        X_train, y_train = X_cross[train_index], y_cross[train_index]
        X_valid, y_valid = X_cross[validate_index], y_cross[validate_index]

#             ipdb.set_trace()
        if models[model] == 'weigh Ridge Reg':
            sample_weights = np.random.rand(len(y_train))
            model_curr = model(X_train, y_train, lam = lam, sigma = sigma, sample_weights = sample_weights, kernel = kenel)

        elif models[model] == 'k Logistic Ridge Reg':
            model_curr = model(X_train, y_train, lam = lam, sigma = sigma, max_iter=100, tol = tol, kernel = kenel)
        elif models[model] == 'k Ridge Reg':
#                 model_curr = model(X_train, y_train, lam= lam, sigma = sigma, d=d, rbf=rbf, poly=poly, kernel=rbf_kernel)

            model_curr = model(X_train, y_train, lam= lam, kernel=kenel)
        elif models[model] == 'log reg':
            model_curr = model(lr=lr, num_iter=5000, batch_size=1)
        else:
            model_curr = model(X_train, y_train, C=c, lam = lam,\
                               tol= tol, kernel = kenel)

        if models[model] == 'log reg':
            model_curr.fit(X_train, y_train)
        else:
            model_curr.fit()


        accuracy.append(model_curr.Accuracy_check(X_valid, y_valid, threshold=0.5))
        print(f'accurracy fold {i}: {accuracy[i]}')

print(f'\nAverage accuracy {models[model]} is : {np.mean(accuracy)}\n')


Mean of empty slice.


invalid value encountered in double_scalars


invalid value encountered in greater_equal



accurracy fold 0: 0.4875
accurracy fold 1: 0.4725
accurracy fold 2: 0.5475
accurracy fold 3: 0.495
accurracy fold 4: 0.5025

Average accuracy Kernal SVM is : 0.501

time: 38 s


In [None]:
# accurracy fold 0: 0.4875
# accurracy fold 1: 0.4725
# accurracy fold 2: 0.5475
# accurracy fold 3: 0.495

In [56]:
# 0.6535 = 0.68800
# 0.657 = 0.69200

time: 525 µs


In [74]:
# Cehckinf full model
model = ksolveRR(X_cross, y_cross, lam = lam, sigma = sigma, kernel = kenel)
# model = svm_primal_soft_to_qp(X_cross, y_cross, C=1)

model.fit()

model.Accuracy_check(X_cross, y_cross, threshold=0.5)

1.0

time: 544 ms


# Predictions

In [75]:
model = ksolveRR(X_cross, y_cross, lam = lam, sigma = sigma, kernel = kenel)
model.fit()
y_pred = model.predict(X_t_enc, 0.5)

time: 443 ms


In [76]:
X = np.arange(1000).reshape(-1, 1)
sample = pd.DataFrame(data=X, columns=['Id'])
sample.head()

Unnamed: 0,Id
0,0
1,1
2,2
3,3
4,4


time: 9.91 ms


In [77]:
sample['Bound'] = y_pred

time: 1.85 ms


In [78]:
sample.tail()

Unnamed: 0,Id,Bound
995,995,0
996,996,0
997,997,1
998,998,1
999,999,1


time: 9.87 ms


In [79]:
sample.to_csv('./ksolveRR_0.6625000000000001_cv_rbf_kernel_sigma_'+str(sigma)+'_lam_'+str(lam)+'.csv', index=False)

time: 3.99 ms
