In [51]:
from sklearn.datasets import load_boston
data = load_boston()



    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

In [52]:
data.data.shape

(506, 13)

In [53]:
Y = data.data[:, [3]]
X = data.data[:, [0,5,6,12]]

In [54]:
len([i[0] for i in Y if i[0]==1])

35

In [55]:
X

array([[6.3200e-03, 6.5750e+00, 6.5200e+01, 4.9800e+00],
       [2.7310e-02, 6.4210e+00, 7.8900e+01, 9.1400e+00],
       [2.7290e-02, 7.1850e+00, 6.1100e+01, 4.0300e+00],
       ...,
       [6.0760e-02, 6.9760e+00, 9.1000e+01, 5.6400e+00],
       [1.0959e-01, 6.7940e+00, 8.9300e+01, 6.4800e+00],
       [4.7410e-02, 6.0300e+00, 8.0800e+01, 7.8800e+00]])

In [56]:
import numpy as np

In [57]:
def initialize(d, seed=1):
    '''
    Function to initialize the parameters for the logisitic regression model
    
    Inputs:
        d: number of features for every data point
        seed: random generator seed for reproducing the results
        
    Outputs:
        w: weight vector of dimensions (d, 1)
        b: scalar bias value
    '''
    np.random.seed(seed)
    
    # NOTE: initialize w to be a (d,1) column vector instead of (d,) vector 
    # Hint: initialize w to a random vector with small values. For example, 0.01*np.random.randn(.) can be used.
    #       and initialize b to scalar 0
    # your code here
    w = 0.01*np.random.randn(d, 1)
    b = 0
    
    return w,b

In [58]:
def sigmoid(z):
    # your code here
    A = 1/(1+ np.exp(-z))
    
    return A

In [87]:
def logistic_loss(A,Y):
    '''
    Function to calculate the logistic loss given the predictions and the targets.
    
    Inputs:
        A: Estimated prediction values, A is of dimension (1, m)
        Y: groundtruth labels, Y is of dimension (1, m)
        
    Outputs:
        loss: logistic loss
    '''
    m = A.shape[1]
    # your code here
    loss = (-1/m) * np.sum(Y*A+ (1-Y)*np.log(1-A))
#     print((-1/m))
#     print(np.dot((Y),np.log(A).T)+ np.dot((1-Y),np.log(1-A).T) )
#     print(np.dot((Y),np.log(A).T), Y, np.log(A), A)
#     print(np.dot((1-Y),np.log(1-A).T) )

    return loss

In [88]:
def grad_fn(X,dZ):
    '''
    Function to calculate the gradients of weights (dw) and biases (db) w.r.t the objective function L.
    
    Inputs:
        X: training data of dimensions (d, m)
        dZ: gradient dL/dZ where L is the logistic loss and Z = w^T*X+b is the input to the sigmoid activation function
            dZ is of dimensions (1, m)
        
    outputs:
        dw: gradient dL/dw - gradient of the weight w.r.t. the logistic loss. It is of dimensions (d,1)
        db: gradient dL/db - gradient of the bias w.r.t. the logistic loss. It is a scalar
    '''
    m = X.shape[1]
    # your code here
    dw = (1/m)* np.dot(X, dZ.T)
    db = (1/m)*np.sum(dZ)
    
    return dw,db

In [89]:
def model_fit(w,b,X,Y,alpha,n_epochs,log=False):
    '''
    Function to fit a logistic model with the parameters w,b to the training data with labels X and Y.
    
    Inputs:
        w: weight vector of dimensions (d, 1)
        b: scalar bias value
        X: training data of dimensions (d, m)
        Y: training data labels of dimensions (1, m)
        alpha: learning rate
        n_epochs: number of epochs to train the model
        
    Outputs:
        params: a dictionary to hold parameters w and b
        losses: a list train loss at every epoch
    '''
    losses=[]
    for epoch in range(n_epochs):
        
        # Implement the steps in the logistic regression using the functions defined earlier.
        # For each iteration of the for loop
            # Step 1: Calculate output Z = w.T*X + b
            # Step 2: Apply sigmoid activation: A = sigmoid(Z)
            # Step 3: Calculate loss = logistic_loss(.) between predicted values A and groundtruth labels Y
            # Step 4: Estimate gradient dZ = A-Y
            # Step 5: Estimate gradients dw and db using grad_fn(.).
            # Step 6: Update parameters w and b using gradients dw, db and learning rate
            #         w = w - alpha * dw
            #         b = b - alpha * db

        # your code here
        Z = np.dot(w.T,X) + b
        A = sigmoid(Z)
        loss = logistic_loss(A,Y)
        dZ = A - Y
        dw,db = grad_fn(X,dZ)
        w = w - alpha * dw
        b = b - alpha * db
        
        if epoch%100 == 0:
            losses.append(loss)
            if log == True:
                print("After %i iterations, Loss = %f"%(epoch,loss))
    params ={"w":w,"b":b}
    
    return params,losses    

In [90]:
def model_predict(params,X,Y=np. array([]),pred_threshold=0.5):
    '''
    Function to calculate category predictions on given data and returns the accuracy of the predictions.
    Inputs:
        params: a dictionary to hold parameters w and b
        X: training data of dimensions (d, m)
        Y: training data labels of dimensions (1, m). If not provided, the function merely makes predictions on X
        
    outputs:
        Y_Pred: Predicted class labels for X. Has dimensions (1, m)
        acc: accuracy of prediction over X if Y is provided else, 0 
        loss: loss of prediction over X if Y is provided else, Inf  
    '''
    w = params['w']
    b = params['b']
    m = X.shape[1]
    
    # Calculate Z using X, w and b
    # Calculate A using the sigmoid - A is the set of (1,m) probabilities
    # Calculate the prediction labels Y_Pred of size (1,m) using A and pred_threshold 
    # When A>pred_threshold Y_Pred is 1 else 0
    # your code here
    Z = np.dot(w.T,X) + b
    A = sigmoid(Z)
    Y_Pred = np.zeros(A.shape)
    Y_Pred[A>=pred_threshold] = 1
  
    acc = 0
    loss = float('inf')
    if Y.size!=0:
        loss = logistic_loss(A,Y)
        acc = np.mean(Y_Pred==Y)
    return Y_Pred, acc, loss

In [91]:
alpha = 0.001
n_epochs = 2000

# Write code to initialize parameters w and b with initialize(.) (use train_X to get feature dimensions d)
# Use model_fit(.) to estimate the updated 'params' of the logistic regression model and calculate how the 'losses' varies 
# Use variables 'params' and 'losses' to store the outputs of model_fit(.) 
# your code here
X0 = X.T
Y0 = Y.T
d = X0.shape[0]
w,b = initialize(d, seed=100)
params,losses = model_fit(w,b,X0,Y0,alpha,n_epochs,log=False)


In [92]:
losses

[0.9921305644548102,
 0.1129128026104857,
 0.10500179836980615,
 0.09892968419038174,
 0.09416163153916518,
 0.09034552020878654,
 0.08724047299020152,
 0.08467688370991742,
 0.0825325619379671,
 0.08071776483512903,
 0.07916546155418605,
 0.07782482651743701,
 0.07665679376860804,
 0.07563096161723638,
 0.07472340098836268,
 0.07391507984052702,
 0.07319071454341419,
 0.07253792164232313,
 0.07194658392122749,
 0.07140837134243674]

In [65]:
params

{'w': array([[-0.02540775],
        [-0.28045047],
        [ 0.01317362],
        [-0.12897213]]),
 'b': -0.04513705226906909}

In [93]:
X

array([[6.3200e-03, 6.5750e+00, 6.5200e+01, 4.9800e+00],
       [2.7310e-02, 6.4210e+00, 7.8900e+01, 9.1400e+00],
       [2.7290e-02, 7.1850e+00, 6.1100e+01, 4.0300e+00],
       ...,
       [6.0760e-02, 6.9760e+00, 9.1000e+01, 5.6400e+00],
       [1.0959e-01, 6.7940e+00, 8.9300e+01, 6.4800e+00],
       [4.7410e-02, 6.0300e+00, 8.0800e+01, 7.8800e+00]])