In [1]:
from losses import *
import numpy as np
from scipy.special import expit

%load_ext autoreload
%autoreload 2
np.random.seed(0)

# Testing losses.py

$$f_{true}(x) = x_0*-3+x_0*2+x_1*0$$

The true values are:
$$y_0 = -1, y_1=-2, y_2=25$$
The predictions should be:
$$pred_0 = 6$$
$$pred_1 = 0$$
$$pred_2 = 5$$
$$error_0 = -7,  error_1 = -2, error_2 = 20$$
$$loss_0 = 49,  loss_1 = 4, loss_2 = 400$$
so the total loss should be:
$$453/2*3 = 75.5$$

the MSE gradient should be:
$$-1/3*[-7*[1,1,1]-2*[0, -1, 2]+20*[-5, 5, 10]] = [35.6666, -31.66666, -63.0]$$

## MSE

In [2]:
eps = 1e-8

tx = np.array([[1,1,1],[0,-1, 2], [-5, 5, 10]])
y = np.array([-3*x[0]+2*x[1] for x in tx])
w = np.array([3, 2, 1])
loss_mse = compute_mse_loss(y, tx, w)
expected_loss = 75.5
assert type(loss_mse) == float, "type of loss is incorrect"
assert np.abs(loss_mse - expected_loss)<eps ,"Expected loss is not equal to mse_loss."

expected_gradient = np.array([35.6666666666, -31.66666666666, -63.0])
sg = compute_mse_gradient(y, tx, w)
assert expected_gradient.shape == sg.shape, "Shape of gradient is not correct"
for i in range(len(expected_gradient)):
    assert np.abs(expected_gradient[i]-sg[i]) < eps, "Expected gradient is not equal to sg"


## Logistic loss

In [3]:

test = np.array([-100, -20, -1, 0, 1, 100, 5000])
sig = sigmoid(test)
expected_sigmoid = expit(test)
assert sig.shape==expected_sigmoid.shape, "Sigmoid shape doesn't match."
for i in range(len(expected_sigmoid)):
    assert np.abs(expected_sigmoid[i]-sig[i]) < eps, "Sigmoid has a wrong value at {}".format(i)

expected_log_loss = -107.29766178581356
log_loss = compute_logistic_loss(y, tx, w)
assert type(log_loss)==float, "Log loss has wrong type"
assert np.abs(log_loss-expected_log_loss)<eps, "Wrong log-loss value"

lambda_=0.2
expected_reg_term = 1.4
expected_reg_log_loss = expected_log_loss+expected_reg_term
reg_log_loss = compute_regularized_logistic_loss(y, tx, w, lambda_)
assert type(reg_log_loss)==float, "Reg log loss is not a float"
assert np.abs(reg_log_loss-expected_reg_log_loss)<eps, "Reg log loss has a wrong value"


expected_log_grad = np.array([ 122.03099163, -120.53593688, -233.06940113])
log_grad =compute_logistic_gradient(y, tx, w)
assert expected_log_grad.shape == log_grad.shape, "Log gradient has an incorrect shape"
for i in range(len(expected_log_grad)):
    assert np.abs(expected_log_grad[i]-log_grad[i])<eps, "Log gradient has an incorrect value at {}".format(i)

expected_reg_term_grad = np.array([0.6, 0.4, 0.2])
expected_reg_log_grad = expected_log_grad+expected_reg_term_grad
reg_log_grad = compute_regularized_logistic_gradient(y, tx, w, lambda_)
assert expected_reg_log_grad.shape == reg_log_grad.shape, "Reg log grad has an incorrect shape"
for i in range(len(expected_reg_log_grad)):
    assert np.abs(expected_reg_log_grad[i]-reg_log_grad[i])<eps, "Log gradient has an incorrect value at {}".format(i)


# Testing helpers.py

## prepare dimensions

In [4]:
from helpers import prepare_dimensions
y_to_reshape = np.array([1,1,1])
tx_ = np.array([1,1,1])
y_out, tx_out_ = prepare_dimensions(y_to_reshape, tx)
assert y_out.shape == (3,1), "y has incorrect shape"
assert len(y_out)==len(tx_), "y and tx have different lengths"
assert len(tx.shape) == 2, "tx is 2-dimensional"

## build poly

In [89]:
from helpers import build_poly
out = build_poly(tx, 3)
expected_shape = (3, 13)
assert out.shape == expected_shape, "Shape is incorrect"

# Testing implementations.py

## Least squared GD and SGD

In [5]:
from implementations import least_squares_GD, least_squares_SGD
lr = 0.001
initial_w = np.array([3,2,1])
(w, l) = least_squares_GD(y, tx, initial_w, 1, lr)
expected_gradient = np.array([35.6666666666, -31.66666666666, -63.0])
expected_update =lr*expected_gradient
expected_weights = (initial_w-expected_update).reshape([-1,1])
assert w.shape == expected_weights.shape, "new GD weights have incorrect shape"
for i in range(len(expected_weights)):
    assert np.abs(expected_weights[i]-w[i])<eps, "New GD weight has an incorrect value at {}".format(i)

np.random.seed(0)
#ensuring [-5, 5, 10] is used with the fixed seed
(w, l) = least_squares_SGD(y, tx, initial_w, 1, lr)
expected_gradient = np.array([100, -100, -200])
expected_update =lr*expected_gradient
expected_weights = (initial_w-expected_update).reshape([-1,1])
assert w.shape == expected_weights.shape, "new weights have incorrect shape"
for i in range(len(expected_weights)):
    assert np.abs(expected_weights[i]-w[i])<eps, "New weight has an incorrect value at {}".format(i)

## Logistic regression and regularized version

In [43]:
from implementations import logistic_regression, reg_logistic_regression
np.random.seed(0)
lr = 0.001
initial_w = np.array([3,2,1])
(w, l) = logistic_regression(y, tx, initial_w, 1, lr)
expected_gradient = np.array([120.03346425, -120.03346425, -240.06692851])
expected_update =lr*expected_gradient
expected_weights = (initial_w-expected_update).reshape([-1,1])
assert w.shape == expected_weights.shape, "new log-loss weights have incorrect shape"
for i in range(len(expected_weights)):
    assert np.abs(expected_weights[i]-w[i])<eps, "New log-loss weight has an incorrect value at {}".format(i)

lambda_ = 0.1
(w, l) = reg_logistic_regression(y, tx, lambda_, initial_w, 1, lr)
expected_gradient = np.array([120.33346425, -119.83346425, -239.96692851])
expected_update =lr*expected_gradient
expected_weights = (initial_w-expected_update).reshape([-1,1])
assert w.shape == expected_weights.shape, "new reg log-loss weights have incorrect shape"
for i in range(len(expected_weights)):
    assert np.abs(expected_weights[i]-w[i])<eps, "New reg log-loss weight has an incorrect value at {}".format(i)