In [11]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from helpers import *
%load_ext autoreload
%autoreload 2

## Load the training data into feature matrix, class labels, and event ids:

In [12]:
from proj1_helpers import *
DATA_TRAIN_PATH = '../data/train.csv'
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

## Do your thing crazy machine learning thing here :) ...

In [3]:
def compute_loss(y, tx, w):
    return np.sum(np.power(y- tx @ w,2))/(2*len(y))

In [4]:
def compute_gradient(y, tx, w):
    e = y - tx @ w;
    return -(tx.T @ e)/len(y);

In [5]:
def build_poly(x, degree):
    """polynomial basis functions for input data x, for j=0 up to j=degree."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # polynomial basis function: TODO
    # this function should return the matrix formed
    # by applying the polynomial basis to the input data
    # ***************************************************
    x_poly = np.zeros((len(x),degree+1))
    for i in range(degree+1):
        x_poly[:,i] = np.power(x,i)
        
    return x_poly;

In [6]:
def sigmoid(t):
    """apply sigmoid function on t."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # TODO
    # ***************************************************
    return 1/(1+np.power(math.e,-t))

In [7]:
def calculate_loss(y, tx, w):
    """compute the cost by negative log likelihood."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # TODO
    # ***************************************************
    return np.sum(np.log(1+np.exp(tx @ w)) - y*(tx @ w))

In [8]:
def calculate_gradient(y, tx, w):
    """compute the gradient of loss."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # TODO
    # ***************************************************
    return tx.T@(sigmoid(tx @ w) - y)

In [9]:
def penalized_logistic_regression(y, tx, w, lambda_):
    """return the loss, gradient, and hessian."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # return loss, gradient, and hessian: TODO
    # ***************************************************
    pen = lambda_*np.sum(np.power(w,2))
    loss = calculate_loss(y,tx,w) + pen
    gradient = calculate_gradient(y,tx,w) + lambda_ * 2 * w
    H = calculate_hessian(y,tx,w)
    
    return loss,gradient,H

In [10]:
def learning_by_penalized_gradient(y, tx, w, gamma, lambda_):
    """
    Do one step of gradient descent, using the penalized logistic regression.
    Return the loss and updated w.
    """
    # ***************************************************
    # INSERT YOUR CODE HERE
    # return loss, gradient and hessian: TODO
    # ***************************************************
    loss, gradient, H = penalized_logistic_regression(y,tx,w,lambda_)
    # ***************************************************
    # INSERT YOUR CODE HERE
    # update w: TODO
    # ***************************************************
    new_w = w - gamma*np.linalg.inv(H) @ gradient
    w = new_w
    return loss, w

## methods

In [12]:
def least_squares(y, tx):
    weight = np.linalg.solve(tx.T @ tx,tx.T @ y)
    mse = compute_loss(y,tx,weight)
    return mse, weight

In [13]:
def least_squares_GD(y, tx, initial_w, max_iters, gamma): 
    """Gradient descent algorithm."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        # ***************************************************
        # INSERT YOUR CODE HERE
        # TODO: compute gradient and loss
        # ***************************************************
        gradient = compute_gradient(y,tx,w);
        loss = compute_loss(y,tx,w);
        # ***************************************************
        # INSERT YOUR CODE HERE
        # TODO: update w by gradient
        # ***************************************************
        w = w  - gamma*gradient;
        # store w and loss
        ws.append(w)
        losses.append(loss)

    return losses, ws
    

In [14]:
def least_squares_SGD(y,tx,initial_w,batch_size,max_iters,gamma):
    
    batch = batch_iter(y, tx, batch_size, shuffle=True);
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch:
            # ***************************************************
            # INSERT YOUR CODE HERE
            # TODO: compute gradient and loss
            # ***************************************************
            gradient = compute_gradient(y_batch,tx_batch,w);
            loss = compute_loss(y_batch,tx_batch,w);
            # ***************************************************
            # INSERT YOUR CODE HERE
            # TODO: update w by gradient
            # ***************************************************
            w = w  - gamma*gradient;
            # store w and loss
            ws.append(np.copy(w));
            losses.append(loss);

    return losses, ws

In [15]:
def ridge_regression(y, tx, lamb):
    """implement ridge regression."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # ridge regression: TODO
    # ***************************************************
    m = np.shape(tx)[1]
    M = np.eye(m)
    #M[0,0] = 0; #depends on the matrix tx we give
    d_lamb = lamb*2*m
    weight = np.linalg.solve(tx.T @ tx + (d_lamb * M),tx.T @ y)
    
    mse = compute_loss(y, tX_clean, weight)
    
    return mse,weight;

In [16]:
def reg_logistic_regression(y, tx, lambda_ , gamma, max_iters):
    # init parameters
    threshold = 1e-8
    losses = []
    w = np.zeros((tx.shape[1], 1))

    # start the logistic regression
    for iter in range(max_iters):
        # get loss and update w.
        loss, w = learning_by_penalized_gradient(y, tx, w, gamma, lambda_)
        # log info
        if iter % 500 == 0:
            print("Current iteration={i}, the loss={l}".format(i=iter, l=loss))
        # converge criteria
        losses.append(loss)
        if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
            break
    # visualization
    visualization(y, x, mean_x, std_x, w, "classification_by_logistic_regression_penalized_gradient_descent")
    print("The loss={l}".format(l=calculate_loss(y, tx, w)))
    

In [17]:
def logistic_regression(y, tx, gamma, max_iters):
    return reg_logisitc_regression(y, tx,0,gamma, max_iters)

## Cleaning of data and visualisation

In [18]:
#Clean data

#a = np.array([[3.0,8.0,5.0,-999.0,1.0],[7.0,4.0,1.0,2.0,6.0]])

#remove -999 to the mean of the colum
for column in tX.T:
    clean = column[np.where(column != -999)]
    mean = np.mean(clean)
    column[np.where(column == -999)] = mean
    
tX_clean = tX
for column in tX_clean.T:
    mean = np.mean(column)
    var = np.var(column)
    if(var == 0):
        var =1
    column[:] = (column[:] - mean)/var


## Test of the different methods

In [18]:
#*********************** least squares *******************#
#mse,weight = least_squares(y,tX)
mse_clean,weight_clean = least_squares(y,tX_clean)
print(mse_clean)
print(weight_clean)

0.38991616861
[  5.08222097e-01  -9.00300387e+00  -1.07584597e+01  -7.01363484e-02
   2.04988435e-02   1.92921914e+01   9.33521771e-03   2.20787357e-01
  -6.27003319e-01  -3.81089031e+04  -1.58930593e-01   1.40920349e-01
   1.64542195e-02   1.43396556e+03  -9.46324146e-04  -1.50910375e-03
   1.39223926e+03  -1.08936158e-03   4.57406193e-03   3.40983436e+00
   1.69222382e-03  -5.94570589e+00   4.08148906e-02  -2.23628996e+00
   8.99758622e-04   2.65210390e-04  -6.30616007e-01   1.70555214e-03
  -1.70612895e-03   2.73475607e+04]


In [22]:
#******************** gradient descent *******************#
w_initial = np.zeros([tX.shape[1]])
max_iters = 50
gamma = 0.08
mse,weight = least_squares_GD(y,tX_clean,w_initial,max_iters,gamma)
print(mse[max_iters-1])
print(weight[max_iters-1])

0.433581228877
[ -5.48557503e-04  -2.17980257e-02  -1.34270410e-03   6.43124287e-03
   7.83168238e-02   8.73135654e-04  -3.98162218e-02   6.89524048e-02
  -6.10240715e-03   2.62227133e-03  -1.48961241e-01   2.46030594e-01
   1.85793269e-02   2.20884226e-02  -2.84550363e-03  -3.80033446e-03
   1.07377766e-02  -4.21963511e-04   3.29371979e-03   7.65914700e-04
   6.17622665e-03   1.78256656e-03   7.62812533e-02   6.22203185e-03
   8.44205888e-04  -1.31532870e-04  -5.31550784e-04   1.75260216e-03
  -1.57908303e-03   1.95521398e-03]


In [23]:
#************** stochastic gradient descent ****************#
#w_initial = np.zeros([tX.shape[1]])
#batch_size = 1
#max_iters = 1
#gamma = 0.08
#mse_SGD,weight_SGD = least_squares_SGD(y,tX_clean,w_initial,batch_size,max_iters,gamma)
#print(mse_SGD)


In [27]:
#************** ridge regression ****************#

#degree = 7
#x_train_poly = build_poly(x_train,degree);
#x_test_poly = build_poly(x_test,degree);

"""
lambdas = np.logspace(-5, 0, 15)
rmse_trs = []
for lamb in lambdas:
    w = ridge_regression(y, tX, lamb)

    loss_train = compute_loss(y, tX, w)
    rmse_train = np.sqrt(2*loss_train)

    print("lambda={l:.3f}, proportion={p}, Training RMSE={tr:.3f}".format(
        l=lamb, p=ratio, tr=rmse_train))
    rmse_trs.append(rmse_train)
    
p=plt.semilogx(lambdas,rmse_trs, marker='o')
plt.title("Ridge regression") 
plt.legend(p, ["Training error"])
plt.xlabel('Lambdas')
plt.ylabel('rmse')
plt.show()
"""

#lambda=0.016, proportion=0.5, Training RMSE=0.883 
#is the last one before rmse is going up
lambda_ = 0.016
mse,weight = ridge_regression(y, tX_clean, lambda_)
print(mse)
print(weight)

0.390120020505
[  3.02159224e-01  -8.87564082e+00  -1.03720589e+01   9.87786297e-02
   4.54173023e-02   1.09295682e+01  -9.11540573e-03   2.17908542e-01
  -6.76441049e-01   2.06360638e-01  -1.58080828e-01   1.42150471e-01
   1.57512972e-02   4.04320466e+00  -1.03959072e-03  -1.53968489e-03
   6.21374322e+00  -9.44788851e-04   4.57984978e-03   3.32115875e+00
   1.90672921e-03  -4.42669972e+00   4.02143045e-02  -1.86363400e+00
   8.28920681e-04   2.76459156e-04  -4.68179488e-01   1.69145464e-03
  -1.66220167e-03  -2.38696390e-01]


In [1]:
#************** logistic regression ****************#
max_iters = 1
gamma = 0.000001
lambda_ = 0.016
mse,weight = reg_logistic_regression(y, tX_clean, lambda_, gamma, max_iters)
print(mse)
print(weight)

NameError: name 'reg_logistic_regression' is not defined

## Generate predictions and save ouput in csv format for submission:

In [10]:
DATA_TEST_PATH = '' # TODO: download train data and supply path here 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [31]:
OUTPUT_PATH = '' # TODO: fill in desired name of output file for submission
y_pred = predict_labels(weights, tX_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)