# Advanced Machine Learning

In [66]:
# all the libraries needed for the calculation
import numpy as np

## Ridge Regression

### Ridge cost function

In [10]:
def predict(X, W):
    """ Predict our h(x) in function of their features 
      INPUT :
        X shape(n, m)
        W shape(n, 1)
      OUTPUT :
        h shape(1, m)"""
  
    return np.dot(W.T, X)

In [15]:
def ComputeCostRidge(X, W, Y, lamda):
    """ Compute cost for ridge regression 
      INPUT :
        X      shape(n, m)
        W      shape(n, 1)
        Y      shape(1, m)
        lamda  float
      OUTPUT :
        J float"""
    
    _, m = X.shape
        
    J = 0
    #Jridge = (lamda / (2*m)) * np.sum(np.square(W))
    #J = 1/(2*m) * np.dot((predict(X, W)-Y), (predict(X, W)-Y).T) + Jridge
        
    # predict(W, W) = np.dot(W.T, W)
    J = 1/(2*m) * (np.dot((predict(X, W)-Y), (predict(X, W)-Y).T) + lamda * predict(W, W))
        
    return J

### Ridge gradient descent

In [18]:
def gradientDescentRidge(X, W, Y, alpha, num_iters, lamda):
    """ gradientDescent compute gradient descent to learn best W
      INPUT :
        X           shape(n, m)
        W           shape(n, 1)
        Y           shape(1, m)
        alpha       float
        num_iters   integer
        lamda       float
      OUTPUT :
        W           shape(n, 1)
        J_history   shape(num_iters,)"""
    
    _, m = X.shape
      
    J_history = np.zeros(num_iters)

    for i in range(0, num_iters):
        J_history[i] = ComputeCostRidge(X, W, Y, lamda)
            
        gradient = (alpha/m) * (np.dot(X, (predict(X, W)-Y).T) + lamda * W)
        W = W - gradient
            
    return W, J_history

### Some tests

In [50]:
# Creation of the dataset
multivariate_regression = np.genfromtxt('multivariate_regression.txt', delimiter=',')

m, _ = multivariate_regression.shape

#X = np.stack(multivariate_regression[:, :2], axis=1)
X = multivariate_regression[:, :2] 
Y = multivariate_regression[:, 2].reshape((1, m))

# Standardisation of the features
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
X_1 = np.stack(np.hstack((np.ones(m).reshape(m, 1), X)), axis=1)

In [61]:
W = np.array((1,1,1)).reshape(3, 1)

# same results as the cost function without regularization when lamda = 0 
#print("With W = "+str(np.squeeze(W))+"\nCost Function = " + str(np.squeeze(ComputeCostRidge(X_1, W, Y, lamda=0))))

print("With W = "+str(np.squeeze(W))+"\nCost Function = " + str(np.squeeze(ComputeCostRidge(X_1, W, Y, lamda=100))))

With W = [1 1 1]
Cost Function = 65591047226.09408


In [65]:
W = np.array((0, 0, 0)).reshape((3,1))
alpha = 0.01
iterations = 200

# same results as the gradient descent without regularization when lamda = 0
lamda = 0.5


W, J_history = gradientDescentRidge(X_1, W, Y, alpha, iterations, lamda)
print("Values of the parameters W after " + str(iterations) + " iterations of gradient descent :\n" + str(W))

Values of the parameters W after 200 iterations of gradient descent :
[[292660.65750329]
 [ 82623.98092499]
 [ 15240.47187713]]


## Lasso Regression

### Lasso cost function

In [21]:
def sigmoid(z):
    """ Apply sigmoid function to a number
      INPUT :
        z shape(1, m)
      OUTPUT :
        g(z) shape(1, m)"""
  
    return 1/(1+np.exp(-z))

In [23]:
def predictLasso(X, W):
    """ This function makes predictions by linearly combining W and X. 
     Sigmoid is apply to born the output between [0, 1]
     INPUT : 
        X shape(n, m)
        W shape(n, 1)
     OUTPUT :  
        h shape(1, m)"""
  
    return sigmoid(np.dot(W.T, X))

In [31]:
def computeCostLasso(X, W, Y, lamda) :
    """ Compute the cost for the W parameters (Lasso L2)
      INPUT : 
        X       shape(n, m)
        W       shape(n, 1)
        Y       shape(1, m)
        lamda   float
      OUTPUT : 
        J       float"""
        
    Jlasso = (lamda / (2*m)) * np.sum(np.square(W))
    J = -(1/m) * np.sum(Y * np.log(predictLasso(X, W))+ (1-Y) * np.log(1-predictLasso(X, W)))
    
    return J + Jlasso

### Lasso gradient descent

In [33]:
def gradientDescentLasso(X, W, Y, alpha, num_iters, lamda):
    """ gradientDescent updates W by taking num_iters
      gradient steps with learning rate alpha 
      INPUT :
        X         shape(n, m)
        W         shape(n, 1)
        Y         shape(1, m)
        alpha     float
        lamda     float
        num_iters integer
      OUTPUT :
        W         shape(n, 1)
        J_history shape(num_iters,)"""

    _, m = X.shape

    J_history = np.zeros(num_iters)
    
    for i in range(0, num_iters):
        
        J_history[i] = computeCostLasso(X, W, Y, lamda)
        
        gradient = (alpha/m) * (np.dot(X, (predictLasso(X, W)-Y).T) + lamda * W)
        W = W - gradient
        
        
    return W, J_history

### Some tests

In [34]:
logistic_regression = np.genfromtxt('logistic_regression.txt', delimiter=',')

In [35]:
m, n = logistic_regression[:, :2].shape
X = logistic_regression[:, :2]
X_1 = np.stack(np.hstack((np.ones(m).reshape(m, 1), X)), axis=1)
Y = logistic_regression[:, 2].reshape(1, m)

In [36]:
W = np.array((-24, 0.2, 0.2)).reshape(3, 1)

h_5 = predictLasso(X_1[:, :5], W)
print(h_5)

[[1.86892535e-01 1.04767902e-04 9.53426817e-02 9.95024445e-01
  9.98968205e-01]]


In [43]:
W = np.array((-24, 0.2, 0.2)).reshape(3, 1)

#J = computeCostLasso(X_1, W, Y, lamda=0)
# same results as the cost function without regularization when lamda = 0 

J = computeCostLasso(X_1, W, Y, lamda=0.5)

print("With W = "+str(np.squeeze(W))+"\nCost Function = " +str(np.squeeze(J)))

With W = [-24.    0.2   0.2]
Cost Function = 1.6585301938265977


In [49]:
W = np.array((-24, 0.2, 0.2)).reshape((3,1))
alpha = 0.001
iterations = 20
lamda = 0.5

# same results as the gradient descent function without regularization when lamda = 0

W, J_history = gradientDescentLasso(X_1, W, Y, alpha, iterations, lamda)

print("Values of the parameters W after " + str(iterations) +  \
      " iterations of gradient descent :\n" + str(W))
print("Values of the cost function : " +str(J_history[iterations-1]))

Values of the parameters W after 20 iterations of gradient descent :
[[-23.99769644]
 [  0.19570881]
 [  0.19331878]]
Values of the cost function : 1.6436929607022337
