In [1]:
import numpy as np
from statistics import mean
import random as rand
import matplotlib.pyplot as plt

In [2]:
num_experiments = 1000
num_points = 1000
noise_rate = 0.1

In [3]:
Ein = []

In [4]:
def target_function(x1, x2):
    value = x1 * x1 + x2 * x2 - 0.6
    
    if value > 0:
        return 1
    return -1

In [5]:
def classify_point(m, b, x, y):
    expected_value = m * x + b
    
    if y >= expected_value:
        return 1
    else:
        return -1

In [6]:
def calculate_E(E, f, g):
    E_curr = 0

    for i in range(len(g)):
        if f[i] != g[i]:
            E_curr += 1

    E.append(E_curr / num_points)

In [7]:
def generate_noise(xn, yn):
    indices = rand.sample(range(num_points), int(noise_rate * num_points))
    xn_ret, yn_ret = [], []
    
    for i in range(num_experiments):
        xn_ret.append(xn[i])
        if i in indices:
            yn_ret.append(-1 * yn[i])
        else:
            yn_ret.append(yn[i])
    return xn_ret, yn_ret

In [8]:
def run_experiment():
    bias = [1] * num_points
    x1 = [rand.uniform(-1, 1) for _ in range(num_points)]
    x2 = [rand.uniform(-1, 1) for _ in range(num_points)]

    xn = np.column_stack((x1, x2))
    yn = [target_function(x, y) for (x, y) in xn]
    
    xn, yn = generate_noise(xn, yn)
    
    xn_bias = np.column_stack((bias, xn))
    
    matrix_X = np.array(xn)
    matrix_X_bias = np.array(xn_bias)
    matrix_y = np.array(yn)
    
    w = np.matmul(np.linalg.pinv(matrix_X_bias), matrix_y)

    m_g = -1 * w[1] / w[2]
    b_g = -1 * w[0] / w[2]
    
    gn = [classify_point(m_g, b_g, x, y) for (x, y) in matrix_X]

    calculate_E(Ein, yn, gn)

In [9]:
for i in range(num_experiments):
    run_experiment()

In [10]:
mean(Ein)

0.500112

Non-Linear Transformation

In [11]:
def transform_data(x1, x2):
    return (1, x1, x2, x1 * x2, x1**2, x2**2)

In [12]:
weights = []

In [13]:
def classify_point_nonlinear(w, z):
    signal = np.dot(w, z)
    
    if signal > 0:
        return 1
    else:
        return -1

In [14]:
g_a = [-1, -0.05, 0.08, 0.13, 1.5, 1.5]
g_b = [-1, -0.05, 0.08, 0.13, 1.5, 15 ]
g_c = [-1, -0.05, 0.08, 0.13, 15 , 1.5]
g_d = [-1, -1.50, 0.08, 0.13, .05, .05]
g_e = [-1, -0.05, 0.08, 1.50, .15, .15]

g_set = [g_a, g_b, g_c, g_d, g_e]
error = []

In [15]:
def compare_w_with_g(w, z_transform):
    agreement_ret = []
    for z in z_transform:
        w_val = np.sign(np.dot(w, z))

        agreement = []
        for g in g_set:
            counter = 0
            
            g_val = np.sign(np.dot(g, z))
            
            for _ in range(num_points):
                if w_val == g_val:
                    counter += 1
            
            agreement.append(counter / num_points)
        agreement_ret.append(agreement)

    return agreement_ret

In [16]:
def run_experiment():
    x1 = [rand.uniform(-1, 1) for _ in range(num_points)]
    x2 = [rand.uniform(-1, 1) for _ in range(num_points)]

    xn = np.column_stack((x1, x2))
    yn = [target_function(x, y) for (x, y) in xn]
    
    z_noise, yn = generate_noise(xn, yn)
    
    z_transform = [transform_data(x, y) for (x, y) in z_noise]

    matrix_y = np.array(yn)
    
    w = np.matmul(np.linalg.pinv(z_transform), matrix_y)
    
    error_curr = compare_w_with_g(w, z_transform)
    error.append(error_curr)

In [17]:
for i in range(num_experiments):
    run_experiment()

In [18]:
final = []

In [20]:
for i in range(5):
    temp = []
    for e_curr in error:
        for point in e_curr:
            temp.append(point[i])
    final.append(mean(temp))

In [21]:
final

[0.961583, 0.664859, 0.663679, 0.631732, 0.558792]

Out of Sample Error

In [22]:
Eout = []

In [23]:
def generate_out_of_sample():
    x1 = [rand.uniform(-1, 1) for _ in range(num_points)]
    x2 = [rand.uniform(-1, 1) for _ in range(num_points)]
    return x1, x2

In [24]:
for i in range(num_experiments):
    x1, x2 = generate_out_of_sample()
    xn = np.column_stack((x1, x2))
    yn = [target_function(x, y) for (x, y) in xn]
    
    z_noise, yn = generate_noise(xn, yn)
    z_transform = [transform_data(x, y) for (x, y) in z_noise]
    
    w = np.matmul(np.linalg.pinv(z_transform), yn)
    f_classfication = yn
    w_classfication = [classify_point_nonlinear(w, z) for z in z_transform]
    
    calculate_E(Eout, f_classfication, w_classfication)

mean(Eout)

0.123954