# Homework 9
12110418 
庄子鲲
## Problem 1
Using Python and Numpy, write a class named SVMClassifier, which implements the SVM algorithm having slack variables and kernels such as Polynomial,Gaussian, and Sigmoid (using cvxopt package to solve the quadratic programing problem for Lagrange multipliers).

In [33]:
import numpy as np
from cvxopt import matrix, solvers

class SVMClassifier:
    def __init__(self, kernel='linear', degree=3, gamma=None, coef0=0.0, C=1.0):
        self.kernel = kernel
        self.degree = degree
        self.gamma = gamma
        self.coef0 = coef0
        self.C = C
        self.alpha = None
        self.support_vectors = None
        self.support_vector_labels = None
        self.bias = None

    def linear_kernel(self, x1, x2):
        return np.dot(x1, x2.T)
    
    def poly_kernel(self, x1, x2):
        return (np.dot(x1, x2.T) + self.coef0)**self.degree
    
    def rbf_kernel(self, x1, x2):
        return np.exp(-((x1[:, np.newaxis] - x2)**2).sum(axis=2)/(2 * self.gamma**2))
    
    def sigmoid_kernel(self, x1, x2):
        return np.tanh(self.gamma * np.dot(x1, x2.T) + self.coef0)
    
    def fit(self, X, y):
        if self.kernel == 'linear':
            self.K = self.linear_kernel(X, X)
        elif self.kernel == 'poly':
            self.K = self.poly_kernel(X, X)
        elif self.kernel == 'rbf':
            if self.gamma is None:
                self.gamma = 1.0 / X.shape[1]  # Default gamma
            self.K = self.rbf_kernel(X, X)
        elif self.kernel == 'sigmoid':
            self.K = self.sigmoid_kernel(X, X)

        n_samples, n_features = X.shape
        P = matrix(np.outer(y, y) * self.K, tc='d')
        q = matrix(-np.ones(n_samples), tc='d')
        G = matrix(np.vstack((np.eye(n_samples), -np.eye(n_samples))), tc='d')
        h = matrix(np.hstack((self.C * np.ones(n_samples), np.zeros(n_samples))), tc='d')
        A = matrix(y.reshape(1, -1), tc='d')
        b = matrix(0.0, tc='d')

        # Solve the quadratic programming problem
        solution = solvers.qp(P, q, G, h, A, b)

        # Extract Lagrange multipliers from the solution
        self.alpha = np.array(solution['x']).flatten()
        # Support vectors have non-zero Lagrange multipliers
        sv_indices = np.where(self.alpha > 1e-8)[0]
        self.support_vectors = X[sv_indices]
        self.support_vector_labels = y[sv_indices]
        print('shape of support vector: ',self.support_vectors.shape)
        self.alpha = self.alpha.reshape(-1,1)
        print('alpha: ',self.alpha)
        self.alpha=self.alpha[sv_indices]
        # Compute the bias term
        if self.kernel == 'linear':
            self.bias = np.mean(self.support_vector_labels-np.dot(self.linear_kernel(self.support_vectors,self.support_vectors), (self.alpha * self.support_vector_labels)))     
        elif self.kernel == 'poly':
            self.bias = np.mean(self.support_vector_labels-np.dot(self.poly_kernel(self.support_vectors,self.support_vectors), (self.alpha * self.support_vector_labels)))
        elif self.kernel=='rbf':
            self.bias = np.mean(self.support_vector_labels-np.dot(self.rbf_kernel(self.support_vectors,self.support_vectors),(self.alpha * self.support_vector_labels)))
        elif self.kernel=='sigmoid':
            self.bias = np.mean(self.support_vector_labels-np.dot(self.sigmoid_kernel(self.support_vectors,self.support_vectors),(self.alpha * self.support_vector_labels)))
        print(self.bias)
        
        
    def predict(self, X):
        if self.kernel == 'linear':
            decision_function = np.dot(self.linear_kernel(X,self.support_vectors), (self.alpha * self.support_vector_labels)) + self.bias
        elif self.kernel == 'poly':
            decision_function = np.dot(self.poly_kernel(X,self.support_vectors), (self.alpha * self.support_vector_labels)) + self.bias
        elif self.kernel == 'rbf':
            decision_function = np.dot(self.rbf_kernel(X,self.support_vectors),(self.alpha * self.support_vector_labels)) + self.bias
        elif self.kernel == 'sigmoid':
            decision_function = np.dot(self.sigmoid_kernel(X,self.support_vectors),(self.alpha * self.support_vector_labels)) + self.bias

        # Predict using the sign of the decision function
        return np.sign(decision_function)
        
# Example usage:
svm = SVMClassifier(kernel='sigmoid', gamma=0.1, C=1.0)
# Load data from the file
data = np.loadtxt("letter-recognition.data", dtype=str, delimiter=',')
# Extract labels and features
y = np.array([1 if label == "C" else -1 for label in data[:1000, 0]])
y=y.reshape(-1,1)
X = data[:1000, 1:].astype(int)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

svm.fit(X_train, y_train)
predictions = svm.predict(X_test)
print('accuracy:', accuracy_score(y_test, predictions))

     pcost       dcost       gap    pres   dres
 0: -1.0436e+02 -1.6504e+03  9e+03  3e+00  1e-14
 1: -5.8536e+01 -8.5050e+02  1e+03  1e-01  2e-14
 2: -5.3928e+01 -1.0232e+02  5e+01  2e-04  7e-15
 3: -5.3984e+01 -5.4486e+01  5e-01  2e-06  5e-15
 4: -5.4000e+01 -5.4005e+01  5e-03  2e-08  4e-15
 5: -5.4000e+01 -5.4000e+01  5e-05  2e-10  4e-15
Optimal solution found.
shape of support vector:  (800, 16)
alpha:  [[0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [0.03492885]
 [