In [1]:
import numpy as np

In [2]:
training_set = []
testing_set = []

In [3]:
with open('data_in.txt', 'r') as f:
    for line in f:
        training_set.append([float(x) for x in line.split()])

with open('data_out.txt', 'r') as f:
    for line in f:
        testing_set.append([float(x) for x in line.split()])

In [4]:
np_training_set = np.array(training_set)
np_testing_set = np.array(testing_set)

In [5]:
def transform_data(transform_index, x1, x2):
    match transform_index:
        case 0:
            return (1)
        case 1:
            return (1, x1)
        case 2:
            return (1, x1, x2)
        case 3:
            return (1, x1, x2, x1**2)
        case 4:
            return (1, x1, x2, x1**2, x2**2)
        case 5:
            return (1, x1, x2, x1**2, x2**2, x1 * x2)
        case 6:
            return (1, x1, x2, x1**2, x2**2, x1 * x2, np.abs(x1 - x2))
        case 7:
            return (1, x1, x2, x1**2, x2**2, x1 * x2, np.abs(x1 - x2), np.abs(x1 + x2))

In [6]:
def calc_error(classification, w, z_transform):
    yn = classification
    counter = 0
    for i in range(len(z_transform)):
        z = z_transform[i]
        w_val = np.sign(np.dot(w, z))
        
        if w_val != yn[i]:
            counter += 1
    
    return counter / len(classification)

In [7]:
def get_x_values(dataset):
    dataset = np.array(dataset)
    x1 = dataset[:, 0]
    x2 = dataset[:, 1]

    return np.column_stack((x1, x2))

In [8]:
def run_linear_regression_experiment(transform_index, train_num):
    x_values = get_x_values(training_set)
    train_xn = x_values[0:train_num]
    validate_xn = x_values[train_num:]
    train_yn = np_training_set[:, 2][0:train_num]
    validate_yn = np_training_set[:, 2][train_num:]
    
    
    test_xn = get_x_values(testing_set)
    test_yn = np_testing_set[:, 2]

    train_xn_transform = [transform_data(transform_index, x1, x2) for (x1, x2) in train_xn]
    validate_xn_transform = [transform_data(transform_index, x1, x2) for (x1, x2) in validate_xn]
    test_xn_transform = [transform_data(transform_index, x1, x2) for (x1, x2) in test_xn]
    
    w = np.matmul(np.linalg.pinv(train_xn_transform), train_yn)

    E_in = calc_error(train_yn, w, train_xn_transform)
    E_out = calc_error(test_yn, w, test_xn_transform)
    E_val = calc_error(validate_yn, w, validate_xn_transform)

    return E_in, E_out, E_val

Question 1

In [9]:
num_training_points = 25

In [10]:
for k in range(3, 8):
    E_in, E_out, E_val = run_linear_regression_experiment(k, num_training_points)
    print(f"k = {k} --> E_val = {E_val}")

k = 3 --> E_val = 0.3
k = 4 --> E_val = 0.5
k = 5 --> E_val = 0.2
k = 6 --> E_val = 0.0
k = 7 --> E_val = 0.1


Question 2

In [11]:
for k in range(3, 8):
    E_in, E_out, E_val = run_linear_regression_experiment(k, num_training_points)
    print(f"k = {k} --> E_out = {E_out}")

k = 3 --> E_out = 0.42
k = 4 --> E_out = 0.416
k = 5 --> E_out = 0.188
k = 6 --> E_out = 0.084
k = 7 --> E_out = 0.072


Question 3

In [32]:
def run_linear_regression_experiment_switch(transform_index):
    x_values = get_x_values(training_set)
    train_xn = x_values[-10:]
    validate_xn = x_values[0:25]
    train_yn = np_training_set[:, 2][-10:]
    validate_yn = np_training_set[:, 2][0:25]
    
    
    test_xn = get_x_values(testing_set)
    test_yn = np_testing_set[:, 2]

    train_xn_transform = [transform_data(transform_index, x1, x2) for (x1, x2) in train_xn]
    validate_xn_transform = [transform_data(transform_index, x1, x2) for (x1, x2) in validate_xn]
    test_xn_transform = [transform_data(transform_index, x1, x2) for (x1, x2) in test_xn]
    
    w = np.matmul(np.linalg.pinv(train_xn_transform), train_yn)

    E_in = calc_error(train_yn, w, train_xn_transform)
    E_out = calc_error(test_yn, w, test_xn_transform)
    E_val = calc_error(validate_yn, w, validate_xn_transform)

    return E_in, E_out, E_val

In [33]:
for k in range(3, 8):
    E_in, E_out, E_val = run_linear_regression_experiment(k)
    print(f"k = {k} --> E_val = {E_val}")

k = 3 --> E_val = 0.28
k = 4 --> E_val = 0.36
k = 5 --> E_val = 0.2
k = 6 --> E_val = 0.08
k = 7 --> E_val = 0.12


Question 4

In [35]:
for k in range(3, 8):
    E_in, E_out, E_val = run_linear_regression_experiment(k)
    print(f"k = {k} --> E_out = {E_out}")

k = 3 --> E_out = 0.396
k = 4 --> E_out = 0.388
k = 5 --> E_out = 0.284
k = 6 --> E_out = 0.192
k = 7 --> E_out = 0.196


Question 6

In [15]:
num_trials = 100000

In [16]:
def calc_expected_e(trial_num):
    e1_sum = 0
    e2_sum = 0
    e_min_sum = 0   
    
    for _ in range(trial_num):
        e1 = np.random.uniform()
        e2 = np.random.uniform()
        
        e1_sum += e1
        e2_sum += e2
        e_min_sum += min(e1, e2)

    return e1_sum / trial_num, e2_sum / trial_num, e_min_sum / trial_num

In [17]:
calc_expected_e(num_trials)

(0.5002927418203492, 0.5000939658012538, 0.3332756901006206)