In [1]:
import numpy as np
from sklearn.svm import SVC
from sklearn.cluster import KMeans
import random as rand

In [2]:
num_points = 100
num_iterations = 10000

In [3]:
def generate_training_points(num_points):
    xn = np.array([(1, rand.uniform(-1, 1), rand.uniform(-1, 1)) for _ in range(num_points)])
    yn = np.array([np.sign(x2 - x1 + 0.25 * np.sin(np.pi * x1)) for _, x1, x2 in xn])
    
    return xn, yn

In [4]:
def calc_error(yn, y_pred):
    # misclassified points are opposite sign so will be -1 when multiplied
    error = yn * y_pred
    return np.count_nonzero(error == -1) / len(yn)

In [5]:
def run_svm(xn, yn, C=1):
    model = SVC(C=C, kernel='rbf', gamma=1.5)
    model.fit(xn, yn)
    
    Ein = np.sum(model.predict(xn)*yn<0)/(1.*yn.size)
    
    return Ein, model

Q13

In [6]:
counter = 0

for i in range(num_iterations):
    xn, yn = generate_training_points(num_points)
    Ein, _ = run_svm(xn, yn)
    
    if (Ein == 0):
        counter += 1
        
print(f"Rate of data set not being separable by RBF: {100 * counter / num_iterations}%")

Rate of data set not being separable by RBF: 4.97%


Q14

In [7]:
def rbf1(x,mu0,gamma=1.5):
    return( np.exp(-gamma*np.sum((x-mu0)**2)) )

In [8]:
def rbf(xn, centers, gamma):
    z = np.zeros((xn.shape[0], 1+centers.shape[0]))  # N x (K+1), extra 1 for intercept
    z[:,0] = np.ones(xn.shape[0])
    
    for i in range(centers.shape[0]):
        z[:,i+1] = np.apply_along_axis(rbf1, 1, xn, centers[i],gamma)
    
    return z

In [9]:
def calculate_w_reg(Z, ZT, lambda_value, y):
    step_1 = np.matmul(ZT, Z)
    step_2 = step_1 + lambda_value * np.identity(len(step_1))
    step_3 = np.linalg.inv(step_2)
    step_4 = np.matmul(step_3, ZT)
    w_reg = np.matmul(step_4, y)
    
    return w_reg

In [38]:
def regular_rbf(xn, yn, n_clusters, gamma):
    kmeans = KMeans(n_clusters=n_clusters).fit(xn)
    
    centers = kmeans.cluster_centers_
    
    z = rbf(xn, centers, gamma=gamma)
    
    w = calculate_w_reg(z, z.T, 0, yn)
    
    y_pred = np.dot(z, w)
    
    Ein = np.sum(y_pred*yn<0)/(1.*yn.size)
    
    return Ein, w, centers

In [39]:
def run_experiment(num_clusters, gamma=1.5):
    xn, yn = generate_training_points(num_points)
    xn_test, yn_test = generate_training_points(num_points)
    
    # regular RBF
    Ein_regular, w, centers = regular_rbf(xn, yn, num_clusters, gamma=gamma)
    
    z_out = rbf(xn_test, centers, gamma=gamma)
    
    y_pred = np.dot(z_out, w)
    
    Eout_regular = np.sum(y_pred*yn_test<0)/(1.*yn_test.size)
    
    # kernel RBF
    _, model = run_svm(xn, yn, C=1e6)
    y_pred_regular = model.predict(xn_test)
    Ein_kernel = np.sum(y_pred_regular*yn<0)/(1.*yn.size)
    Eout_kernel = np.sum(y_pred_regular*yn_test<0)/(1.*yn_test.size)
    
    return Ein_regular, Eout_regular, Ein_kernel, Eout_kernel

In [40]:
eout_regular_arr = []
eout_kernel_arr = []

for i in range(100):
    _, Eout_regular, _, Eout_kernel = run_experiment(num_clusters=9)
    
    eout_regular_arr.append(Eout_regular)
    eout_kernel_arr.append(Eout_kernel)

In [41]:
counter = 0

for (Eout_regular, Eout_kernel) in zip(eout_regular_arr, eout_kernel_arr):
    if (Eout_kernel < Eout_regular):
        counter += 1
        
print(f"Kernel beating regular: {100 * counter / 100}%")

Kernel beating regular: 79.0%


Q15

In [15]:
eout_regular_arr = []
eout_kernel_arr = []

for i in range(100):
    _, Eout_regular, _, Eout_kernel = run_experiment(num_clusters=12)
    
    eout_regular_arr.append(Eout_regular)
    eout_kernel_arr.append(Eout_kernel)

In [16]:
counter = 0

for (Eout_regular, Eout_kernel) in zip(eout_regular_arr, eout_kernel_arr):
    if (Eout_kernel < Eout_regular):
        counter += 1
        
print(f"Kernel beating regular: {100 * counter / 100}%")

Kernel beating regular: 61.0%


Q16

In [17]:
Ein_9 = []
Ein_12 = []
Eout_9 = []
Eout_12 = []

In [18]:
for i in range(100):
    Ein, Eout, _, _ = run_experiment(num_clusters=9)
    Ein_9.append(Ein)
    Eout_9.append(Eout)
    
    Ein, Eout, _, _ = run_experiment(num_clusters=12)
    Ein_12.append(Ein)
    Eout_12.append(Eout)

In [19]:
Ein_down_Eout_up = 0
Ein_up_Eout_down = 0
Ein_up_Eout_up = 0
Ein_down_Eout_down = 0

for ein9, ein12, eout9, eout12 in zip(Ein_9, Ein_12, Eout_9, Eout_12):
    if (ein9 < ein12):
        if eout9 < eout12:
            Ein_up_Eout_up += 1
        else:
            Ein_up_Eout_down += 1
    
    else:
        if eout9 < eout12:
            Ein_down_Eout_up += 1
        else:
            Ein_down_Eout_down += 1
            
Ein_down_Eout_up, Ein_up_Eout_down, Ein_up_Eout_up, Ein_down_Eout_down

(28, 9, 12, 51)

Q17

In [29]:
Ein_15 = []
Ein_2 = []
Eout_15 = []
Eout_2 = []

In [30]:
for i in range(100):
    Ein, Eout, _, _ = run_experiment(num_clusters=9, gamma=1.5)
    Ein_15.append(Ein)
    Eout_15.append(Eout)
    
    Ein, Eout, _, _ = run_experiment(num_clusters=9, gamma=2)
    Ein_2.append(Ein)
    Eout_2.append(Eout)

In [31]:
Ein_down_Eout_up = 0
Ein_up_Eout_down = 0
Ein_up_Eout_up = 0
Ein_down_Eout_down = 0

for ein15, ein2, eout15, eout2 in zip(Ein_15, Ein_2, Eout_15, Eout_2):
    if (ein15 < ein2):
        if eout15 < eout2:
            Ein_up_Eout_up += 1
        else:
            Ein_up_Eout_down += 1
    
    else:
        if eout15 < eout2:
            Ein_down_Eout_up += 1
        else:
            Ein_down_Eout_down += 1
            
Ein_down_Eout_up, Ein_up_Eout_down, Ein_up_Eout_up, Ein_down_Eout_down

(17, 18, 36, 29)

Q18

In [23]:
counter = 0

for i in range(1000):
    Ein, _, _, _ = run_experiment(num_clusters=9, gamma=1.5)
    
    if (Ein == 0):
        counter += 1

print(f"Percentage of time regular RBF achieves Ein=0: {100 * counter / 1000}%")

Percentage of time regular RBF achieves Ein=0: 2.9%
