In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [33]:
data = pd.read_csv('data/significant-features-data.csv')
data.head()

Unnamed: 0,Previous qualification (grade),Admission grade,Age at enrollment,Curricular units 1st sem (enrolled),Curricular units 1st sem (evaluations),Curricular units 1st sem (approved),Curricular units 1st sem (grade),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Target,y_labels
0,0.284211,0.34,0.056604,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.372093,0.488889,0.766182,Dropout,-1
1,0.684211,0.5,0.037736,0.230769,0.133333,0.230769,0.741722,0.26087,0.181818,0.3,0.735897,0.0,0.732558,0.111111,0.640687,Graduate,1
2,0.284211,0.313684,0.037736,0.230769,0.0,0.0,0.0,0.26087,0.0,0.0,0.0,0.0,0.372093,0.488889,0.766182,Dropout,-1
3,0.284211,0.258947,0.056604,0.230769,0.177778,0.230769,0.711447,0.26087,0.30303,0.25,0.667692,0.0,0.209302,0.0,0.124174,Graduate,1
4,0.052632,0.489474,0.528302,0.230769,0.2,0.192308,0.653422,0.26087,0.181818,0.3,0.7,0.0,0.732558,0.111111,0.640687,Graduate,1


In [34]:
feature_columns = data.columns.drop(['Target', 'y_labels'])
X = data[feature_columns].to_numpy(dtype=float)
y = data['y_labels'].to_numpy(dtype=int)

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, shuffle=True)

In [36]:
# Augment X matrices with column of 1s (for intercept)
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])

In [37]:
# Polynomial Kernel
def polynomial_kernel(X, Y, degree=3, gamma=1.0, coef0=1.0):
    """
    K(x, x') = (gamma * <x, x'> + coef0)^degree
    """
    return (gamma * X.dot(Y.T) + coef0) ** degree

In [54]:
# Gaussian Kernel
def gaussian_kernel(X, Y, gamma=1.0, degree=None):
    """
    K(x, x') = exp( - gamma * ||x - x'||^2 )
    """
    X_sq = np.sum(X**2, axis=1)[:, None]
    Y_sq = np.sum(Y**2, axis=1)[None, :]
    sq_dists = X_sq + Y_sq - 2 * X.dot(Y.T)
    return np.exp(-gamma * sq_dists)

In [None]:
def train_kernel_svm_sgd(X, y, kernel, epochs=1000, C=1.0, **kernel_params):
    """
    Dual-SGD for hinge-loss SVM using a precomputed Gram matrix.
    Returns the averaged alpha coefficients.
    """
    n = X.shape[0]
    # precompute Gram matrix
    K = kernel(X, X, **kernel_params)
    
    # betas accumulate raw counts; we'll convert to alphas each step
    beta = np.zeros(n)
    alpha_sum = np.zeros(n)
    
    for t in range(1, epochs + 1):
        # compute current alphas from beta
        alpha = beta / (2 * C * t)
        
        # uniformly pick one example at random
        j = np.random.randint(n)
        
        # decision function margin for j
        margin_j = y[j] * np.dot(alpha * y, K[:, j])
        
        # if margin_j < 1, we incur hinge loss → update beta_j
        if margin_j < 1:
            beta[j] += y[j]
        
        alpha_sum += alpha
    
    # return the averaged alpha over all iterations
    return alpha_sum / epochs

In [48]:
def predict_kernel_svm(alpha, X_train, y_train, X_eval, kernel_func, **kernel_params):
    """
    Predict sign( f(x) ) where
      f(x) = sum_i alpha_i * y_i * K(x_i, x)
    """
    K_eval = kernel_func(X_train, X_eval, **kernel_params)
    decision = np.dot((alpha * y_train), K_eval)
    return np.sign(decision)