In [16]:
# from implementations import *
# import numpy as np
from proj1_helpers import *

In [29]:
########################
####### IMPORTS ########
########################
import numpy as np


########################
######## HELPERS #######
########################
def MSE(y, tx, w):
    return np.sum(np.power(y - np.dot(tx, w), 2)/(2*len(y)))  # MSE


def MAE(y, tx, w):
    return np.sum(np.abs(y - np.dot(tx, w)))/len(y)  # MAE


def RMSE(y, tx, w):
    return np.sqrt(2*MSE(y, tx, w))


def compute_gradient(y, tx, w):
    """Compute the gradient."""
    e = y - tx.dot(w)
    grad = -tx.T.dot(e)/len(y)
    return grad


def calculate_gradient_log(y, tx, w):
    """compute the gradient of loss."""
    pred = sigmoid(tx.dot(w))
    grad = tx.T.dot(pred - y)
    return grad


def sigmoid(t):
    """apply sigmoid function on t."""
    return 1/(1+np.exp(-t))


def NLL(y, tx, w):
    """compute the cost by negative log likelihood."""
    pred = sigmoid(tx.dot(w))
    loss = y.T.dot(np.log(pred)) + (1 - y).T.dot(np.log(1 - pred))
    return np.squeeze(- loss)
########################
###### ASSIGNMENT ######
########################


def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    w = initial_w
    for n_iter in range(max_iters):
        grad = compute_gradient(y, tx, w)
        loss = MSE(y, tx, w)
        w = w - gamma*grad
        # print("Step {}, loss is   {}".format(n_iter, loss))
    return (w, loss)


def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    w = initial_w
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size):
            grad = compute_gradient(y, tx, w)
            w = w - gamma*grad
    loss = MSE(y, tx, w)
    return (w, loss)


def least_squares(y, tx):
    a = tx.T.dot(tx)
    b = tx.T.dot(y)
    w = np.linalg.solve(a, b)
    loss = MSE(y, tx, w)
    return (w, loss)


def ridge_regression(y, tx, lambda_):
    aI = 2 * tx.shape[0] * lambda_ * np.identity(tx.shape[1])
    a = tx.T.dot(tx) + aI
    b = tx.T.dot(y)
    w = np.linalg.solve(a, b)
    loss = MSE(y, tx, w)
    return (w, loss)

def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    pass    

In [44]:
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    def sigmoid(t):
        """apply sigmoid function on t."""
        return 1/(1+np.exp(-t))

    def calculate_gradient(y, tx, w):
        """compute the gradient of loss."""
        pred = sigmoid(tx.dot(w))
        print("pred: {}".format(pred))
        grad = tx.T.dot(pred - y)
        return grad

    def calculate_loss(y, tx, w):
        """compute the cost by negative log likelihood."""
        pred = sigmoid(tx.dot(w))
        loss = y.T.dot(np.log(pred)) + (1 - y).T.dot(np.log(1 - pred))
        return np.squeeze(- loss)

    def learning_by_gradient_descent(y, tx, w, gamma):
        """
        Do one step of gradient descen using logistic regression.
        Return the loss and the updated w.
        """
        loss = calculate_loss(y, tx, w)
        grad = calculate_gradient(y, tx, w)
        w2 = w - gamma * grad
#         print("W went from {}\nto {}\nwith grad {}".format(w[:5], w2[:5], grad[:5]))
        return loss, w2

    losses = []
    w = initial_w
    for i in range(max_iters):
        # get loss and update w.
        loss, w = learning_by_gradient_descent(y, tx, w, gamma)
        # converge criterion
        print("Step {}, loss is {}".format(i, loss))
        losses.append(loss)
        if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < 1e-8:
            break
    return (w, loss)


In [45]:
(y_train, tX_train, ids_train) = (np.array(x) for x in load_csv_data("data/train.csv"))
(y_test, tX_test, ids_test) = (np.array(x) for x in load_csv_data("data/test.csv"))

In [46]:
# w1, loss1_train = least_squares(y_train, tX_train)
# loss1_test = MSE(y_test, tX_test, w1)
# print(loss1_test)

In [59]:
initial_w = np.zeros(tX_train.shape[1]+1)
# initial_w = np.random.random(tX_train.shape[1])

In [60]:
# w2, loss2 = least_squares_GD(y_train, tX_train, initial_w, 100, 0.0000001)
# loss2

In [61]:
# loss2_test = MSE(y_test, tX_test, w2)
# loss2_test

In [62]:
tx = np.c_[np.ones((y_train.shape[0], 1)), tX_train]
w3, loss3 = logistic_regression(y_train, tx, initial_w, 10, 0.0000000001)
w3

pred: [0.5 0.5 0.5 ... 0.5 0.5 0.5]
Step 0, loss is 173286.79514001933
pred: [8.47422860e-01 5.57259408e-49 5.19427589e-51 ... 4.26764955e-49
 1.67874614e-61 3.94470630e-63]
Step 1, loss is nan
pred: [8.14164358e-01 8.33614345e-70 3.41573559e-73 ... 5.33837518e-70
 3.12554287e-88 6.60774366e-91]
Step 2, loss is nan
pred: [7.94303155e-001 1.22215546e-090 2.29766740e-095 ... 6.56608363e-091
 5.23094403e-115 1.00794578e-118]
Step 3, loss is nan
pred: [7.86092190e-001 1.77078494e-111 1.52581621e-117 ... 7.98787612e-112
 8.15818464e-142 1.40580244e-146]
Step 4, loss is nan


  app.launch_new_instance()


pred: [7.85830519e-001 2.54465356e-132 9.99719428e-140 ... 9.64065868e-133
 1.21680760e-168 1.84531368e-174]
Step 5, loss is nan
pred: [7.90699048e-001 3.63537175e-153 6.48511660e-162 ... 1.15692677e-153
 1.76292546e-195 2.32720359e-202]
Step 6, loss is nan
pred: [7.98804855e-001 5.17165893e-174 4.17721241e-184 ... 1.38264139e-174
 2.50435285e-222 2.85651214e-230]
Step 7, loss is nan
pred: [8.08891652e-001 7.33419298e-195 2.67709629e-206 ... 1.64735377e-195
 3.50860951e-249 3.44035928e-258]
Step 8, loss is nan
pred: [8.20111993e-001 1.03765273e-215 1.70939790e-228 ... 1.95824232e-216
 4.86624127e-276 4.08713816e-286]
Step 9, loss is nan


  after removing the cwd from sys.path.


array([-1.37769787e-04,  2.25399091e-02, -9.04829482e-03, -1.11619145e-02,
       -7.36415674e-03,  7.97215659e-02,  6.26808175e-02,  7.98589415e-02,
       -3.09205415e-04, -3.01542030e-03, -2.33238970e-02, -2.37442595e-04,
        5.86808088e-05,  7.98239204e-02, -4.34229928e-03,  8.17527081e-07,
       -8.69944304e-07, -6.76834482e-03,  2.38006185e-06, -3.89072270e-06,
       -6.17592324e-03,  4.07089878e-06, -3.03420056e-02, -1.63956889e-04,
        4.65535120e-02,  5.38708224e-02,  5.38713025e-02,  7.63490218e-02,
        7.98463520e-02,  7.98447280e-02, -1.22132527e-02])

In [41]:
loss3_test = MSE(y_test, tX_test, w3)
loss3_test

145855.06257790796