In [1]:
import numpy as np

In [2]:
training_features = np.loadtxt("features.train")
testing_features = np.loadtxt("features.test")

In [3]:
def split_data(one_versus_all, dataset, choice1, choice2):
    to_return = dataset
    if not one_versus_all:
        to_return = []
        
        for digit, intensity, symmetry in dataset:
            if digit == choice1 or digit == choice2:
                to_return.append([digit, intensity, symmetry])

    to_return = np.array(to_return)
    digits = to_return[:, 0]
    intensity = to_return[:, 1]
    symmetry = to_return[:, 2]
    
    return digits, intensity, symmetry

In [4]:
def generate_one_versus_all_yn(digits, one_choice):
    yn = np.ones(len(digits))
    
    for i, digit in enumerate(digits):
        if digit != one_choice:
            yn[i] = -1
            
    return np.array(yn)

In [5]:
def parse_digits(digits, choice1, choice2):
    result = []
    
    for digit in digits:
        if digit == choice1 or digit == choice2:
            result.append(digit)
            
    return digits

In [6]:
def generate_one_versus_one_yn(digits, choice1, choice2):
    parsed_digits = parse_digits(digits, choice1, choice2)
    yn = np.ones(len(parsed_digits))
    
    for i, digit in enumerate(digits):
        if digit == choice2:
            yn[i] = -1
    
    return yn

In [7]:
def calc_error(classification, w, z_transform):
    yn = classification
    counter = 0
    for i in range(len(z_transform)):
        z = z_transform[i]
        w_val = np.sign(np.dot(w, z))
        
        if w_val != yn[i]:
            counter += 1
    
    return counter / len(classification)

In [8]:
def transform_data(x1, x2):
    return (1, x1, x2)

In [9]:
def calculate_w_reg(Z, ZT, lambda_value, y):
    step_1 = np.matmul(ZT, Z)
    step_2 = step_1 + lambda_value * np.identity(len(step_1))
    step_3 = np.linalg.inv(step_2)
    step_4 = np.matmul(step_3, ZT)
    w_reg = np.matmul(step_4, y)
    
    return w_reg

In [10]:
def run_regularized_linear_regression(lambda_value, one_versus_all, choice, choice1=-1):
    digits_train, intensities_train, symmetries_train = split_data(one_versus_all, training_features, choice, choice1)
    digits_test, intensities_test, symmetries_test = split_data(one_versus_all, testing_features, choice, choice1)
    
    if (one_versus_all):
        yn_train = generate_one_versus_all_yn(digits_train, choice)
        yn_test = generate_one_versus_all_yn(digits_test, choice)
    
    else:
        yn_train = generate_one_versus_one_yn(digits_train, choice, choice1)
        yn_test = generate_one_versus_one_yn(digits_test, choice, choice1)

    xn_train = np.column_stack((intensities_train, symmetries_train))
    xn_train_transform = np.array([transform_data(x1, x2) for (x1, x2) in xn_train])
    
    xn_test= np.column_stack((intensities_test, symmetries_test))
    xn_test_transform = np.array([transform_data(x1, x2) for (x1, x2) in xn_test])

    w_reg = calculate_w_reg(xn_train_transform, np.transpose(xn_train_transform), lambda_value, yn_train)
    
    Ein = calc_error(yn_train, w_reg, xn_train_transform)
    Eout = calc_error(yn_test, w_reg, xn_test_transform)
    
    return Ein, Eout

Q7

In [11]:
choices = [5, 6, 7, 8, 9]

for choice in choices:
    Ein, _ = run_regularized_linear_regression(lambda_value=1, one_versus_all=True, choice=choice)
    print(f"E_in for {choice} vs. all: {Ein}")

E_in for 5 vs. all: 0.07625840076807022
E_in for 6 vs. all: 0.09107118365107666
E_in for 7 vs. all: 0.08846523110684405
E_in for 8 vs. all: 0.07433822520916199
E_in for 9 vs. all: 0.08832807570977919


Q8

In [12]:
def transform_data(x1, x2):
    return (1, x1, x2, x1 * x2, x1 * x1, x2 * x2)

In [22]:
choices = [0, 1, 2, 3, 4]

for choice in choices:
    _, Eout = run_regularized_linear_regression(lambda_value=1, one_versus_all=True, choice=choice)
    print(f"E_out for {choice} vs. all: {Eout}")

E_out for 0 vs. all: 0.10662680617837568
E_out for 1 vs. all: 0.02192326856003986
E_out for 2 vs. all: 0.09865470852017937
E_out for 3 vs. all: 0.08271051320378675
E_out for 4 vs. all: 0.09965122072745392


Q9

In [14]:
Eout_no_transform = []
Eout_transform = []

In [15]:
def transform_data(x1, x2):
    return (1, x1, x2)

In [23]:
choices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

for choice in choices:
    _, Eout = run_regularized_linear_regression(lambda_value=1, one_versus_all=True, choice=choice)
    Eout_no_transform.append(Eout)
    print(f"E_out for {choice} vs. all: {Eout}")

E_out for 0 vs. all: 0.10662680617837568
E_out for 1 vs. all: 0.02192326856003986
E_out for 2 vs. all: 0.09865470852017937
E_out for 3 vs. all: 0.08271051320378675
E_out for 4 vs. all: 0.09965122072745392
E_out for 5 vs. all: 0.07922272047832586
E_out for 6 vs. all: 0.08470353761833582
E_out for 7 vs. all: 0.07324364723467862
E_out for 8 vs. all: 0.08271051320378675
E_out for 9 vs. all: 0.08819133034379671


In [17]:
def transform_data(x1, x2):
    return (1, x1, x2, x1 * x2, x1 * x1, x2 * x2)

In [24]:
choices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

for choice in choices:
    _, Eout = run_regularized_linear_regression(lambda_value=1, one_versus_all=True, choice=choice)
    Eout_transform.append(Eout)
    print(f"E_out for {choice} vs. all: {Eout}")

E_out for 0 vs. all: 0.10662680617837568
E_out for 1 vs. all: 0.02192326856003986
E_out for 2 vs. all: 0.09865470852017937
E_out for 3 vs. all: 0.08271051320378675
E_out for 4 vs. all: 0.09965122072745392
E_out for 5 vs. all: 0.07922272047832586
E_out for 6 vs. all: 0.08470353761833582
E_out for 7 vs. all: 0.07324364723467862
E_out for 8 vs. all: 0.08271051320378675
E_out for 9 vs. all: 0.08819133034379671


In [19]:
# show that b, c, and d are wrong
print("Classifier -->\tEout no transform | Eout with transform")
for i, (e1, e2) in enumerate(zip(Eout_no_transform, Eout_transform)):
    if (e1 > e2):
        print(f"{i} vs. all --> {e1} > {e2}")
    
    else:
        print(f"{i} vs. all --> {e1} ≤ {e2}")

Classifier -->	Eout no transform | Eout with transform
0 vs. all --> 0.11509715994020926 > 0.10662680617837568
1 vs. all --> 0.02242152466367713 > 0.02192326856003986
2 vs. all --> 0.09865470852017937 ≤ 0.09865470852017937
3 vs. all --> 0.08271051320378675 ≤ 0.08271051320378675
4 vs. all --> 0.09965122072745392 ≤ 0.09965122072745392
5 vs. all --> 0.07972097658196313 > 0.07922272047832586
6 vs. all --> 0.08470353761833582 ≤ 0.08470353761833582
7 vs. all --> 0.07324364723467862 ≤ 0.07324364723467862
8 vs. all --> 0.08271051320378675 ≤ 0.08271051320378675
9 vs. all --> 0.08819133034379671 ≤ 0.08819133034379671


By looking at the difference in the Eout between no transformation and the transformation, we see that the overfit measure is always less than or equal to 0, so there is no overfitting occuring and thus a is wrong.

In [20]:
# investigate e
print((1 - (0.07922272047832586 / 0.07972097658196313 )) * 100)


0.6249999999999978


Q10

In [21]:
Ein_0, Eout_0 = run_regularized_linear_regression(lambda_value=0.01, one_versus_all=False, choice=1, choice1=5)
Ein_1, Eout_1 = run_regularized_linear_regression(lambda_value=1, one_versus_all=False, choice=1, choice1=5)

print(f"lambda = 0.01 --> Ein = {Ein_0} | Eout = {Eout_0}")
print(f"lambda = 1.00 --> Ein = {Ein_1} | Eout = {Eout_1}")

lambda = 0.01 --> Ein = 0.004484304932735426 | Eout = 0.02830188679245283
lambda = 1.00 --> Ein = 0.005124919923126201 | Eout = 0.025943396226415096
