In [52]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load the data

In [53]:
import datetime
from helpers import *
y, x = load_data(sub_sample=False, add_outlier=False, train=True)
y_te, x_te = load_data(sub_sample=False, add_outlier=False, train=False)

In [54]:
x, mean_x, std_x = standardize(x)
y, tx = build_model_data(x, y)
x_te, mean_xte, std_xte = standardize(x_te)
y_te, tx_te = build_model_data(x_te, y_te)
#tx,y

# Computing the Cost Function
Fill in the `compute_cost` function below:

In [55]:
def calculate_mse(e):
    """Calculate the mse for vector e."""
    return 1/2*np.mean(e**2)


def calculate_mae(e):
    """Calculate the mae for vector e."""
    return np.mean(np.abs(e))


def compute_loss(y, tx, w):
    """Calculate the loss.

    You can calculate the loss using mse or mae.
    """
    e = y - tx.dot(w)
    return calculate_mse(e)

# Gradient Descent

In [56]:
def compute_gradient(y, tx, w):
    """Compute the gradient."""
    err = y - tx.dot(w)
    grad = -tx.T.dot(err) / len(err)
    return grad, err

In [57]:
def gradient_descent(y, tx, initial_w, max_iters, gamma):
    """Gradient descent algorithm."""
    # Define parameters to store w and loss
    ws = [initial_w]
    #losses = []
    w = initial_w
    for n_iter in range(max_iters):
        # compute loss, gradient
        grad, err = compute_gradient(y, tx, w)
        loss = calculate_mse(err)
        # gradient w by descent update
        w = w - gamma * grad
        # store w and loss
        #losses.append(loss)
        #print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              #bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return loss, w

In [58]:
# Define the parameters of the algorithm.
max_iters = 150
gamma = 0.1

# Initialization
w_initial = 0.1*np.ones(tx.shape[1])

# Start gradient descent.
#start_time = datetime.datetime.now()
gradient_loss, gradient_w = gradient_descent(y, tx, w_initial, max_iters, gamma)
gradient_loss_te = compute_loss(y_te, tx_te, gradient_w)
#end_time = datetime.datetime.now()

In [59]:
gradient_loss,gradient_loss_te

(0.08530420147924032, 0.24137040482184374)

In [60]:
gradient_w

array([ 0.34266797,  0.01726452, -0.12133663, -0.10846244,  0.00162979,
       -0.00858062,  0.1621876 , -0.01210013,  0.11996346, -0.00532339,
       -0.01345443, -0.06396064,  0.05820906, -0.00949754,  0.10634043,
       -0.00042367, -0.000675  ,  0.10992295, -0.0004416 ,  0.00124521,
        0.05075837,  0.00052764, -0.03722702, -0.07092928,  0.01653835,
        0.02079013,  0.02104429, -0.01036036, -0.00984098, -0.00982033,
       -0.0376149 ])

In [61]:
def compute_stoch_gradient(y, tx, w):
    """Compute a stochastic gradient from just few examples n and their corresponding y_n labels."""
    err = y - tx.dot(w)
    grad = -tx.T.dot(err) / len(err)
    return grad, err

def stochastic_gradient_descent(
        y, tx, initial_w, batch_size, max_iters, gamma):
    """Stochastic gradient descent."""
    # Define parameters to store w and loss
    #ws = [initial_w]
    #losses = []
    w = initial_w
    
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1):
            # compute a stochastic gradient and loss
            grad, _ = compute_stoch_gradient(y_batch, tx_batch, w)
            # update w through the stochastic gradient update
            w = w - gamma * grad
            # calculate loss
            loss = compute_loss(y, tx, w)
            # store w and loss
            #ws.append(w)
            #losses.append(loss)

        #print("SGD({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              #bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return loss, w
# from stochastic_gradient_descent import *

# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.7
batch_size = 1

# Initialization
w_initial = 0.1*np.ones(tx.shape[1])

# Start SGD.
#start_time = datetime.datetime.now()
sgd_loss, sgd_w = stochastic_gradient_descent(
    y, tx, w_initial, batch_size, max_iters, gamma)
#end_time = datetime.datetime.now()
sgd_loss_te = compute_loss(y_te, tx_te, sgd_w)
# Print result
#exection_time = (end_time - start_time).total_seconds()
#print("SGD: execution time={t:.3f} seconds".format(t=exection_time))

In [62]:
sgd_loss,sgd_loss_te

(1.1671159417604567e+65, 1.166952206493414e+65)

In [63]:
#print(tx),print(y)

In [64]:
def least_squares(y, tx):
    """calculate the least squares solution."""
    a = tx.T.dot(tx)
    b = tx.T.dot(y)
    w = np.linalg.solve(a, b)
    loss = compute_loss(y, tx, w)
    return loss, w

In [65]:
ls_loss, ls_w = least_squares(y, tx)
ls_loss_te = compute_loss(y_te, tx_te, ls_w)

In [66]:
ls_loss,ls_loss_te

(0.08486139962233243, 0.2655687176380621)

In [67]:
def ridge_regression(y, tx, lambda_):
    """implement ridge regression."""
    aI = 2 * tx.shape[0] * lambda_ * np.identity(tx.shape[1])
    a = tx.T.dot(tx) + aI
    b = tx.T.dot(y)
    w = np.linalg.solve(a, b)
    loss = compute_loss(y, tx, w)
    return loss, w
# define parameter
lambdas = np.logspace(-5, 0, 15)
rdrg_losses = []
rdrg_losses_te = []
rdrg_ws = []
for ind, lambda_ in enumerate(lambdas):
    # ridge regression
    rdrg_loss, rdrg_w = ridge_regression(y, tx, lambda_)
    rdrg_loss_te = compute_loss(y_te, tx_te, rdrg_w)
    rdrg_losses.append(rdrg_loss)
    rdrg_losses_te.append(rdrg_loss_te)
    rdrg_ws.append(rdrg_w)
    #rmse_tr.append(np.sqrt(2 * compute_mse(y_tr, tx_tr, weight)))
    #rmse_te.append(np.sqrt(2 * compute_mse(y_te, tx_te, weight)))
    #print("proportion={p}, degree={d}, lambda={l:.3f}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".format(
           #p=ratio, d=degree, l=lambda_, tr=rmse_tr[ind], te=rmse_te[ind]))
#plot_train_test(rmse_tr, rmse_te, lambdas, degree)
    
    


In [68]:
rdrg_losses,rdrg_losses_te

([0.08495553210784819,
  0.08497782126202305,
  0.0849911773447907,
  0.08499834465694674,
  0.0850022497169442,
  0.085005086622608,
  0.08501031826692962,
  0.08503084462381698,
  0.08512011733444838,
  0.08546299477484444,
  0.08651924717608842,
  0.08910479140635215,
  0.0947294801425132,
  0.10587091734858638,
  0.12276418631822966],
 [0.24361002759146122,
  0.24360699679019235,
  0.24360928736143997,
  0.24361775147430037,
  0.24363870660481282,
  0.2436884952881576,
  0.2438082163834528,
  0.24410839297670336,
  0.2449102141909508,
  0.2471583133215684,
  0.2533032898883575,
  0.2681770961664956,
  0.2979910117427005,
  0.3448074328625563,
  0.39882084106483123])