In [2]:
import numpy as np
import pandas as pd

# Load data
train_data = pd.read_csv("train.csv", header=None)
test_data = pd.read_csv("test.csv", header=None)

In [3]:
# Convert labels to {1, -1}
train_data[4] = train_data[4].apply(lambda x: 1 if x == 1 else -1)
test_data[4] = test_data[4].apply(lambda x: 1 if x == 1 else -1)

# Extract features (X) and labels (y)
X_train = train_data.iloc[:, :-1].values
y_train = train_data.iloc[:, -1].values
X_test = test_data.iloc[:, :-1].values
y_test = test_data.iloc[:, -1].values

In [4]:
# Define the SVM with stochastic sub-gradient descent
def svm_sgd(X, y, C, gamma_schedule, max_epochs=100):
    """
    SVM using stochastic sub-gradient descent in the primal domain.
    
    Parameters:
    X: Features (numpy array)
    y: Labels (numpy array)
    C: Regularization parameter
    gamma_schedule: Function to calculate learning rate gamma_t
    max_epochs: Maximum number of epochs
    
    Returns:
    weights, biases: Model parameters
    """
    n_samples, n_features = X.shape
    w = np.zeros(n_features)
    b = 0
    
    for epoch in range(max_epochs):
        # Shuffle the training data
        indices = np.random.permutation(n_samples)
        X, y = X[indices], y[indices]
        
        for t, (x_i, y_i) in enumerate(zip(X, y)):
            gamma_t = gamma_schedule(epoch * n_samples + t)
            condition = y_i * (np.dot(w, x_i) + b) < 1
            
            if condition:
                w = (1 - gamma_t) * w + gamma_t * C * y_i * x_i
                b = b + gamma_t * C * y_i
            else:
                w = (1 - gamma_t) * w
    
    return w, b

In [5]:
# Define the learning rate schedules
def gamma_schedule_a(t, gamma_0=0.1, a=0.01):
    return gamma_0 / (1 + gamma_0 * a * t)

def gamma_schedule_b(t, gamma_0=0.1):
    return gamma_0 / (1 + t)

# Training and testing functions
def compute_error(X, y, w, b):
    predictions = np.sign(np.dot(X, w) + b)
    return np.mean(predictions != y)

In [6]:
# Set hyperparameters
C_values = [100 / 873, 500 / 873, 700 / 873]
gamma_0 = 0.1
a = 0.01

# Training with both schedules
results = {}
for C in C_values:
    # Schedule (a)
    w_a, b_a = svm_sgd(X_train, y_train, C, lambda t: gamma_schedule_a(t, gamma_0, a))
    train_error_a = compute_error(X_train, y_train, w_a, b_a)
    test_error_a = compute_error(X_test, y_test, w_a, b_a)
    
    # Schedule (b)
    w_b, b_b = svm_sgd(X_train, y_train, C, lambda t: gamma_schedule_b(t, gamma_0))
    train_error_b = compute_error(X_train, y_train, w_b, b_b)
    test_error_b = compute_error(X_test, y_test, w_b, b_b)
    
    # Store results
    results[C] = {
        'Schedule_a': {'train_error': train_error_a, 'test_error': test_error_a},
        'Schedule_b': {'train_error': train_error_b, 'test_error': test_error_b},
        'weights_diff': np.linalg.norm(w_a - w_b),
        'bias_diff': np.abs(b_a - b_b)
    }

In [8]:
results_df = pd.DataFrame(results).T
print(results_df)

                                                 Schedule_a  \
0.114548  {'train_error': 0.05963302752293578, 'test_err...   
0.572738  {'train_error': 0.022935779816513763, 'test_er...   
0.801833  {'train_error': 0.020642201834862386, 'test_er...   

                                                 Schedule_b weights_diff  \
0.114548  {'train_error': 0.18577981651376146, 'test_err...     0.045637   
0.572738  {'train_error': 0.0481651376146789, 'test_erro...     0.131222   
0.801833  {'train_error': 0.027522935779816515, 'test_er...     0.080821   

         bias_diff  
0.114548  0.143152  
0.572738   0.50323  
0.801833  0.408985  


In [None]:
# Schedule a generally has lower training and testing errors than Schedule b, indicating it may converge more effectively for this task.
# Testing errors are slightly higher than training errors, which is expected due to generalization.
# These weights and bias differences seem to decrease as C increases, 
# indicating that higher C values might lead to more similar models between the schedules.

In [9]:
from scipy.optimize import minimize
import numpy as np

# Dual SVM implementation with optimization
def dual_svm_linear_optimized(X, y, C):
    """
    Optimized Dual SVM implementation for linear kernel.
    """
    n_samples = X.shape[0]
    
    # Compute the kernel matrix (linear kernel)
    K = np.dot(X, X.T)
    
    # Define the objective function
    P = np.outer(y, y) * K  # Precompute for vectorized form
    def objective(alpha):
        return 0.5 * alpha @ P @ alpha - np.sum(alpha)
    
    # Equality constraint: sum(alpha * y) = 0
    constraints = {'type': 'eq', 'fun': lambda alpha: np.dot(alpha, y)}
    
    # Bounds: 0 <= alpha_i <= C
    bounds = [(0, C) for _ in range(n_samples)]
    
    # Initial guess
    alpha_0 = np.zeros(n_samples)
    
    # Solve the optimization problem using SLSQP
    result = minimize(
        objective, alpha_0, method='SLSQP', bounds=bounds, constraints=constraints,
        options={'maxiter': 100, 'ftol': 1e-6, 'disp': False}
    )
    
    return result.x  # Lagrange multipliers (alpha)

# Function to recover weights and bias
def recover_weights_bias(alpha, X, y):
    """
    Recover weights and bias from dual SVM.
    """
    w = np.sum((alpha * y)[:, None] * X, axis=0)
    support_vectors = alpha > 1e-5
    b = np.mean(y[support_vectors] - np.dot(X[support_vectors], w))
    return w, b

# Test with one value of C for development
C = 100 / 873

# Train the dual SVM
alpha = dual_svm_linear_optimized(X_train, y_train, C)

# Recover weights and bias
w, b = recover_weights_bias(alpha, X_train, y_train)

# Compute training and test errors
train_error = compute_error(X_train, y_train, w, b)
test_error = compute_error(X_test, y_test, w, b)

# Display results
print(f"Train Error: {train_error:.4f}")
print(f"Test Error: {test_error:.4f}")
print(f"Weights: {w}")
print(f"Bias: {b}")


  fx = wrapped_fun(x)
  g = append(wrapped_grad(x), 0.0)
  a_eq = vstack([con['jac'](x, *con['args'])


Train Error: 0.0092
Test Error: 0.0100
Weights: [-0.94292626 -0.6514918  -0.73372188 -0.04102192]
Bias: 1.0745032328893676


In [11]:
import numpy as np
from scipy.optimize import minimize
import pandas as pd

# Gaussian kernel function
def gaussian_kernel(X, gamma):
    """
    Compute the Gaussian kernel matrix for a dataset X.
    """
    n_samples = X.shape[0]
    K = np.zeros((n_samples, n_samples))
    for i in range(n_samples):
        for j in range(n_samples):
            K[i, j] = np.exp(-np.linalg.norm(X[i] - X[j])**2 / gamma)
    return K

# Dual SVM with Gaussian kernel
def dual_svm_gaussian(X, y, C, gamma):
    """
    Dual SVM implementation for Gaussian kernel using scipy.optimize.minimize.
    """
    n_samples = X.shape[0]
    K = gaussian_kernel(X, gamma)
    
    # Define the objective function
    P = np.outer(y, y) * K
    def objective(alpha):
        return 0.5 * alpha @ P @ alpha - np.sum(alpha)
    
    # Equality constraint: sum(alpha * y) = 0
    constraints = {'type': 'eq', 'fun': lambda alpha: np.dot(alpha, y)}
    
    # Bounds: 0 <= alpha_i <= C
    bounds = [(0, C) for _ in range(n_samples)]
    
    # Initial guess
    alpha_0 = np.zeros(n_samples)
    
    result = minimize(
        objective, alpha_0, method='SLSQP', bounds=bounds, constraints=constraints,
        options={'maxiter': 100, 'ftol': 1e-6, 'disp': False}
    )
    
    return result.x

# Prediction function for Gaussian kernel
def predict_gaussian(alpha, X_train, y_train, X_test, gamma):
    """
    Predict labels for test data using the dual SVM with Gaussian kernel.
    """
    n_samples = X_test.shape[0]
    K = np.zeros((n_samples, X_train.shape[0]))
    for i in range(n_samples):
        for j in range(X_train.shape[0]):
            K[i, j] = np.exp(-np.linalg.norm(X_test[i] - X_train[j])**2 / gamma)
    
    # Ensure correct broadcasting
    weighted_sum = np.sum((alpha * y_train)[None, :] * K, axis=1)
    return np.sign(weighted_sum)

# Hyperparameters
C_values = [100 / 873, 500 / 873, 700 / 873]
gamma_values = [0.1, 0.5, 1, 5, 100]

# Store results
gaussian_results = {}

for C in C_values:
    for gamma in gamma_values:
        # Train the dual SVM
        alpha = dual_svm_gaussian(X_train, y_train, C, gamma)
        
        # Predict on training and test data
        train_predictions = predict_gaussian(alpha, X_train, y_train, X_train, gamma)
        test_predictions = predict_gaussian(alpha, X_train, y_train, X_test, gamma)
        
        # Compute errors
        train_error = np.mean(train_predictions != y_train)
        test_error = np.mean(test_predictions != y_test)
        
        # Count support vectors
        support_vectors = np.sum(alpha > 1e-5)
        
        # Store results
        gaussian_results[(C, gamma)] = {
            'train_error': train_error,
            'test_error': test_error,
            'support_vectors': support_vectors
        }

gaussian_results_df = pd.DataFrame(gaussian_results).T
gaussian_results_df.columns = ['Train Error', 'Test Error', 'Support Vectors']

print(gaussian_results_df)

                Train Error  Test Error  Support Vectors
0.114548 0.1       0.000000       0.002            869.0
         0.5       0.000000       0.002            825.0
         1.0       0.000000       0.002            805.0
         5.0       0.008028       0.006            442.0
         100.0     0.003440       0.004            290.0
0.572738 0.1       0.000000       0.002            869.0
         0.5       0.000000       0.002            731.0
         1.0       0.000000       0.002            556.0
         5.0       0.000000       0.002            208.0
         100.0     0.000000       0.000            116.0
0.801833 0.1       0.000000       0.002            868.0
         0.5       0.000000       0.002            694.0
         1.0       0.000000       0.002            528.0
         5.0       0.000000       0.002            194.0
         100.0     0.000000       0.000             99.0
