# Introduction to Machine Learning
## Home Assignment 1
## Imports

In [1]:
import requests
from zipfile import ZipFile
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random

## Data Preporcessing

In [2]:
# we get the data
_URL = 'http://www.di.ens.fr/appstat/spring-2020/project/data2.zip'
r = requests.get(_URL, allow_redirects=True)
with open('data2.zip', 'wb') as f:
    f.write(r.content)
with ZipFile('data2.zip', 'r') as zipObj:
    zipObj.extractall()


In [3]:
training_data_path = os.path.join('data2', 'train')
test_data_path = os.path.join('data2', 'test')

In [30]:

number_of_samples_per_letter = 300
pixel_height = 28
pixel_width = 28
number_of_pixels = pixel_height*pixel_width
batch_size = 50
shape = (pixel_height*pixel_width)
seed = 54
# We define a flow of images: 

def generate (dir_path): 
    training_data = []
    training_labels = []
    for item in os.listdir(dir_path):
        tmp = os.path.join(dir_path,item)
        if os.path.isdir(tmp):
            d, l = generate (tmp)
            training_data = training_data + d
            training_labels = training_labels + l
        elif tmp[-3:] == "png":
            im = Image.open(tmp)
            training_data.append(np.reshape(np.asarray(im), shape))
            if(dir_path[-1:] == 'A'):
                training_labels.append(1)
            else: 
                training_labels.append(-1)
    return training_data, training_labels





            
def make (dir_path): 
    d, l = generate(dir_path)
    data = np.array(d)/255
    label = np.reshape(np.array(l), (np.shape(l)[0],1))
    L = np.arange(np.shape(data)[0])
    random.shuffle(L)
    return data[L], label[L]


training_data, training_labels = make(training_data_path)
test_data, test_labels = make(test_data_path)


print(np.shape(training_data))
print(np.shape(training_labels))

(900, 784)
(900, 1)


## Utility functions 

In [86]:
def logistic_loss_lasso (theta, x, y, lambd): 
    return 1/np.shape(y)[0] * np.sum(np.log(1+np.exp(-y*(x@theta))), axis=0) + lambd*np.sum(np.abs(theta), axis=0)


def S_lambda (x, lambd):
    if (np.abs(x) <= lambd):
        return 0
    return x - lambd*np.sign(x)
    

def update_i_coordinate_lasso (theta, x, y, lambd, i):
    x_i_T = np.reshape(x[:,i], (1, np.shape(y)[0]))
    if (i > 0 and i < number_of_pixels):
        x__i = np.hstack([x[:,:i], x[:,i+1:]])
        theta_i = np.concatenate([theta[:i], theta[i+1:]])
    elif (i == 0):
        x__i  = x[:, 1:]
        theta_i = theta[1:]
    else: 
        x__i = x[:, :(number_of_pixels - 1)]
        theta_i = theta[:(number_pixels - 1)]
    a = S_lambda(x_i_T@(y-(x_i_T@x__i@theta_i)), lambd)/(x_i_T@np.reshape(x[:,i], (np.shape(y)[0], 1)))
    print(a)
    return a
    
    
def logistic_loss_ridge (theta, x, y, lambd): 
    return 1/np.shape(y)[0] * np.sum(np.log(1+np.exp(-y*(x@theta))), axis=0) + lambd*np.sum(theta**2, axis=0)

def logistic_gradient_ridge (theta, x, y, lambd):
    k = -y*x
    e = np.exp(k@theta)
    u_ = k*e
    return np.reshape(1/np.shape(y)[0] * np.sum(u_/(1+e), axis=0), np.shape(theta)) + lambd*2*theta

def sigmoid(t):
    return 1/(1 + np.exp(-t))       

def logistic_classify(theta, data): 
    return 2*(sigmoid(data@theta)>0.5) - 1

def test(theta, data_set, labels): 
    accuracy = 0
    for x, y in zip (data_set, labels):
        accuracy += (logistic_classify(theta, x) == y) 
    return accuracy/len(labels)

def plot(training_error, test_error, t, t_s, title):
    plt.plot(t, training_error, label="training_error")
    plt.plot(t, test_error, label="test_error")
    plt.legend()
    plt.ylabel("error_rate")
    plt.xlabel(t_s)
    plt.title(title)
    plt.show(block=False)
    



## Gradient Descent and Coordinate Desecent

In [87]:
def gradient_descent_ridge (starting_point, training_data, training_labels, eta, t, lambd):
    theta = starting_point
    for i in range (t):
        delta_theta = -logistic_gradient_ridge(theta, training_data, training_labels, lambd)
        theta += eta*delta_theta
    return theta

def coordinate_descent_lasso (starting_point, training_data, training_labels, eta, t, lambd):
    theta = starting_point
    for j in range(t):
        for i in range(number_of_pixels):
            theta[i, 0] = update_i_coordinate_lasso(theta, training_data, training_labels, lambd, i)[0, 0]
    return theta

In [88]:
def evaluate (descent, title):
    training_errors = []
    test_errors = []
    lambdas = np.arange(0, 10, 0.1)
    for lambd in lambdas:
        theta = descent(0.001*np.ones((pixel_height*pixel_width, 1)), training_data, training_labels, 0.01, 100, lambd)
        tmp = test(theta, test_data, test_labels)
        print(tmp)
        training_errors.append(test(theta, training_data, training_labels))
        test_errors.append(tmp)


    plot(1-np.array(training_errors), 1-np.array(test_errors), lambdas, "lambda", title)
    
    
evaluate(coordinate_descent_lasso, "LASSO")
evaluate(gradient_descent_ridge, "RIDGE")

[[-52.87019644]]
[[5105.78606473]]
[[-647003.7185553]]
[[89287630.77384183]]
[[-1.344871e+10]]
[[2.28544309e+12]]
[[-4.48251951e+14]]
[[1.04607164e+17]]
[[-2.73830603e+19]]
[[8.03354795e+21]]
[[-2.64754975e+24]]
[[9.85515389e+26]]
[[-4.18579112e+29]]
[[2.0016668e+32]]
[[-1.0611811e+35]]
[[5.74564518e+37]]
[[-3.07014547e+40]]
[[1.60027492e+43]]
[[-7.90638204e+45]]
[[3.58438547e+48]]
[[-1.42054068e+51]]
[[4.64821644e+53]]
[[-1.23427649e+56]]
[[2.51486293e+58]]
[[-3.84009021e+60]]
[[4.51038609e+62]]
[[-3.98320629e+64]]
[[2.53174919e+66]]
[[-3.06454119e+67]]
[[3.001821e+69]]
[[-3.92927702e+71]]
[[6.06799497e+73]]
[[-1.08295227e+76]]
[[2.25067756e+78]]
[[-5.55852201e+80]]
[[1.62023987e+83]]
[[-5.46462552e+85]]
[[2.17882643e+88]]
[[-9.94343628e+90]]
[[5.00550658e+93]]
[[-2.7341034e+96]]
[[1.61008262e+99]]
[[-9.88964885e+101]]
[[6.0038983e+104]]
[[-3.52149466e+107]]
[[2.00689623e+110]]
[[-1.13470991e+113]]
[[6.32248801e+115]]
[[-3.40142051e+118]]
[[1.73153172e+121]]
[[-7.93772421e+123]]
[[3.0



[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]
[[nan]]


KeyboardInterrupt: 

In [None]:
def K_fold(descent, K, lambd):
    n = np.shape(training_labels)[0]
    assert (n % K == 0)
    test_error = []
    for i in range(0, n, K):
            theta = descent(0.001*np.ones((pixel_height*pixel_width, 1)), 
                            np.concatenate([training_data[:i], training_data[i+K:n]]), 
                            np.concatenate([training_labels[:i], training_labels[i+K:n]]),
                            0.01, 100, lambd)
            test_error.append(test(theta, training_data[i:i+K], training_labels[i:i+K]))
    a = sum(test_error)/len(test_error)
    print(a)
    return a
                              
def choose_lambda (descent, K=90):
    l = np.array([K_fold(descent, K, i/10) for i in range(50)])
    plt.plot(np.arange(0, 5, 0.1), 1 - l, label="crossvalidation_error")
    plt.ylabel("error rate")
    plt.xlabel("lambda")
    plt.title("cross validation")
    plt.show(block=False)
    return np.argmax(l)/10
    

choose_lambda(gradient_descent_ridge)