## load and clean boson data 

In [None]:
from proj1_helpers import *
from helpers import *

# load the boson data set
data_path = "../dataset/train.csv"
y_boson, x_boson, col_labels = load_csv_data(data_path, sub_sample=False)
y_boson = y_boson.reshape((-1, 1))

y_boson.shape, x_boson.shape

In [None]:
y_boson,x_boson = equalize_predictions(y_boson,x_boson)

x_ = fill_with_nan_list(x_boson, nan_values=[0, -999])
x_, mean_x, std_x = standardize(x_)
x_ = sustitute_nans(x_, substitutions=np.nanmean(x_, axis=0)) 
x_.shape

In [None]:
# chose the degree
degree = 4
tx_ = build_poly(x_, degree)
tx_.shape

In [None]:
# take a subset
n = 5000
x_sub = tx_[:n, :]
y_sub = y_boson[:n]
y_sub[y_sub==-1] = 0

y_sub.shape, x_sub.shape

## Logistic Regression

Compute your cost by negative log likelihood.

In [None]:
def sigmoid(t):
    """apply sigmoid function on t."""
    return 1.0 / (1 + np.exp(-t))

In [None]:
def calculate_loss(y, tx, w):
    """compute the cost by negative log likelihood."""
    pred = sigmoid(tx @ w)
    loss = y.T @ (np.log(pred)) + (1 - y).T @ (np.log(1 - pred))
    return np.squeeze(- loss) 

In [None]:
def calculate_gradient(y, tx, w):
    """compute the gradient of loss."""
    pred = sigmoid(tx @ w)
    
    grad = tx.T @ (pred - y)
    return grad

### Using Gradient Descent
Implement your function to calculate the gradient for logistic regression.

In [None]:
def learning_by_gradient_descent(y, tx, w, gamma):
    """
    Do one step of gradient descen using logistic regression.
    Return the loss and the updated w.
    """
    loss = calculate_loss(y, tx, w)
    grad = calculate_gradient(y, tx, w)
    w -= gamma * grad
    return loss, w

Demo!

In [None]:
def logistic_regression_gradient_descent_demo(y, x):
    # init parameters
    max_iter = 5000
    threshold = 1e-8
    gamma = 1e-2
    losses = []

    # build tx
#     tx = np.c_[np.ones((y.shape[0], 1)), x]
    tx = x
    w = np.zeros((x.shape[1], 1))

    lowest_loss = float('Inf')
    best_w = -1
    
    # start the logistic regression
    for iter in range(max_iter):
        # get loss and update w.
        loss, w = learning_by_gradient_descent(y, tx, w, gamma)
        
        #gamma /= 1.5
        
        if loss < lowest_loss:
            lowest_loss = loss
            best_w = w
            
        # log info
        if iter % 100 == 0:
            print("Current iteration={i}, loss={l}".format(i=iter, l=loss))
        # converge criterion
        losses.append(loss)
#         if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
#             break
    # visualization
    visualization(y, x[:, 1:], mean_x, std_x, w, "classification_by_logistic_regression_gradient_descent")
    print("loss={l}".format(l=calculate_loss(y, tx, w)))
    
    return best_w

w = logistic_regression_gradient_descent_demo(y_sub, x_sub)

In [None]:
y_sub_ = y_sub.copy()
y_sub_[y_sub_== 0] = -1
compute_loss(y_sub_, x_sub, w, costfunc=CostFunction.SUCCESS_RATIO)

### Using penalized logistic regression
Fill in the function below.

In [None]:
def penalized_logistic_regression(y, tx, w, lambda_):
    """return the loss and gradient."""
    num_samples = y.shape[0]
    loss = calculate_loss(y, tx, w) + lambda_ * np.squeeze(w.T.dot(w))
    gradient = calculate_gradient(y, tx, w) + 2 * lambda_ * w
    return loss, gradient

In [None]:
def learning_by_penalized_gradient(y, tx, w, gamma, lambda_):
    """
    Do one step of gradient descent, using the penalized logistic regression.
    Return the loss and updated w.
    """
    loss, gradient = penalized_logistic_regression(y, tx, w, lambda_)
    w -= gamma * gradient
    return loss, w

In [None]:
def logistic_regression_penalized_gradient_descent_demo(y, x):
    # init parameters
    max_iter = 500
    gamma = 1e-13
    lambda_ = 0.1
    threshold = 1e-6
    losses = []

    # build tx
#     tx = np.c_[np.ones((y.shape[0], 1)), x]
    tx = x
    w = np.zeros((tx.shape[1], 1))

    lowest_loss = float('Inf')
    best_w = -1
    # start the logistic regression
    for iter in range(max_iter):
        # get loss and update w.
        loss, w = learning_by_penalized_gradient(y, tx, w, gamma, lambda_)
        # update lambda
        gamma /= 1.2
        
        if loss < lowest_loss:
            lowest_loss = loss
            best_w = w
            
        # log info
        if iter % 100 == 0:
            print("Current iteration={i}, loss={l}".format(i=iter, l=loss))
        # converge criterion
        losses.append(loss)
        if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
            break
    # visualization
    #visualization(y, x, mean_x, std_x, w, "classification_by_logistic_regression_penalized_gradient_descent")
    print("loss={l}".format(l=calculate_loss(y, tx, w)))
    
    return best_w
    
w = logistic_regression_penalized_gradient_descent_demo(y_sub, x_sub)

In [None]:
y_sub_ = y_sub.copy()
y_sub_[y_sub_==0] = -1
compute_loss(y_sub_, x_sub, w, costfunc=CostFunction.SUCCESS_RATIO)

In [None]:
lam = 1e-8

In [None]:
lam = lam/(1+lam)