In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import pandas as pd
# pd.options.display.max_rows = 999
import numpy as np
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
from pprint import pprint

In this exercise, we will <b>build a logistic regression model from scratch</b> and use it to <b>classify whether a patient has diabetes or not</b>.

# Load iris dataset
- Load the iris dataset from sklearn, use data for the classes 0 and 1 only for this example
- Split chosen data into train set and test set

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
## testing purpose
# X = np.append(iris.data[:3], iris.data[50:53], axis=0)
# y = np.append(iris.target[:3], iris.target[50:53], axis=0)[:, np.newaxis]
X = iris.data[:100]
y = iris.target[:100][:, np.newaxis]
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, stratify=y, random_state=42)

# Construct logistic regression model

## Compute prediction

### Write sigmoid function
- Recall sigmoid function is defined by:<br> 
$$S(x) = \frac{1}{1 + e^{-x}}$$

In [3]:
# sigmoid function
def sigmoid(x):
    """
    Args:
        x (numpy array): the linear combination of features
    
    Returns:
        return a probability (0-1)
    """
    return 1 / (1 + np.exp(-x))

In [4]:
# test sigmoid
sigmoid(0)

0.5

### Write predict function
- The predicted probabiliy of logistic regression model is given by:<br>
$$h_{\beta}(x) = S(\beta \cdot x)$$

In [5]:
# y_hat function
def y_hat(beta, X):
    """
    Args:
        beta (numpy array): weight vector in linear regression, dimension 1 x n
        X (numpy array): training data, dimension m x n
        
    Returns:
        return predicted probabilities, dimension 1 x n
    """
    return sigmoid(np.dot(X, beta))

In [6]:
# choose the first two rows of training data for trial
beta = np.zeros((X.shape[1], 1))
Xtry = Xtrain[0:2]
ytry = ytrain[0:2]

In [7]:
# test yhat with trial data
yhat = y_hat(beta, Xtry)
yhat

array([[0.5],
       [0.5]])

## Write cost function
- Suppose we have $m$ rows of training data and $n$ features
- Recall the cost function of logistic regression is given by:<br> 
$$J(\beta) = -\frac{1}{m}\sum_{i=1}^{m}[y_i log(\hat{y_i})+(1-y_i)log(1-\hat{y_i})]$$
where $y_i$ and $\hat{y_i}$ are the true values and predicted values of i-th row of training data respectively.

In [8]:
# cost function per row of data
def cost_per_row(y, yhat):
    if y == 1:
        return np.multiply(y, np.log(yhat)) 
    else:
        return np.multiply(1-y, np.log(1 - yhat))

In [9]:
# test cost per row function
cost_per_row(0, 0.5)

-0.6931471805599453

In [10]:
# cost function of logistic regression
def cost(Y, Yhat):
    """
    Args:
        y (numpy array): labels of training data, dimension 1 x m
        yhat (numpy array): predictied values of training data, dimension 1 x m
        
    Returns:
        return the cost of beta
    """
    m = len(y)
    df1 = pd.DataFrame(data={'y': Y.ravel(), 'yhat': Yhat.ravel()})
    diff = df1.apply(lambda row: cost_per_row(row['y'], row['yhat']), axis=1).to_numpy().reshape(-1, 1)
    return -np.mean(diff)

In [11]:
# test cost functon
cost(ytry, yhat)

0.6931471805599453

## Calculate the gradient
- We're going to use gradient descent for optimizing the cost function above.
- Write a function to compute gradient for updating the weight $\beta$ when feeding each row of training data
- Recall in sec 2.1 the weight updating formula is given by:<br>
$$\beta_{k+1} = \beta_{k} - \alpha \frac{\partial J}{\partial \beta}(\beta_k)$$
where $\alpha$ is a fixed learning rate, $k$ is the number of iterations/epochs.
- The gradient is<br><br>
$$\frac{\partial J}{\partial \beta} = (\frac{\partial J}{\partial \beta_0}, \frac{\partial J}{\partial \beta_1},..., \frac{\partial J}{\partial \beta_n})$$
- We only need to compute $$\frac{\partial J}{\partial \beta_j} = \frac{1}{m}\sum_{i=1}^{m}(\hat{y_i}-y_i)x_{ij}$$ for each $j = 0, 1, 2,... ,n$.
- Derivation of the gradient formula of logistic regression at p.18 of:<br> 
https://see.stanford.edu/materials/aimlcs229/cs229-notes1.pdf

In [12]:
# function to compute gradient
def gradient(beta, X, y, yhat):
    # initialize the gradient as a zero vector
    grad = np.zeros(beta.shape)
    
    # compute delta J/delta beta for each j
    for j in range(X.shape[1]):
        first = np.multiply(yhat - y, X[:, j][: ,np.newaxis])
        grad[j] = np.mean(first)
    
    return grad

In [13]:
# test gradient function
gradient(beta, Xtry, ytry, yhat)

array([[-3.  ],
       [-1.45],
       [-2.05],
       [-0.7 ]])

## Write function for gradient descent
- Initialize beta as a zero vector, for each epoch, feed the whole train set to calculate: 
    1. yhat (prediction of training data using existing beta)
    2. gradient vector
    3. update beta using formula in 2.3
    4. cost/loss of existing prediction
- Remember to add a constant column (e.g. a column with all 1) to Xtrain with represents the <b>intercept</b> of linear regression

In [14]:
def train_grad_desecnt(X, y, epochs=1000, alpha=0.1):
    """
    Args:
        epochs (int): no. of iterations allowed for optimization
        alpha (floats): learning rate
    
    Returns:
        beta (numpy array): the final weight
        loss (float): the final cost
    """
    m, n = X.shape
    beta = np.zeros((n+1, 1))    # +1 accounts for constant coefficient beta_0
    X = np.hstack((np.ones((m, 1)), X))    # add a constant column for constant coefficient
    losses = []
    
    # training loop
    for epoch in range(epochs):
        yhat = y_hat(beta, X)
        grad = gradient(beta, X, y, yhat)
                        
        # update beta and loss
        beta -= alpha*grad
        loss = cost(y, yhat)
        losses.append(loss)
        
        # log result of each epoch
        print("*"*50)
        print(f'epoch = {epoch}')
        print(f'beta = {beta}')
        print(f'grad = {grad}')
        print(f'yhat = {yhat[:50]}')
        print(f'y = {y[:50]}')
        print(f'loss = {loss}')
        
    return beta, losses

In [15]:
%%time
# fit training set into train_gard_descent function
beta_final, losses = train_grad_desecnt(Xtrain, ytrain)
print('beta_final = ', beta_final)
print('loss = ', losses[-1])

**************************************************
epoch = 0
beta = [[ 0.00066667]
 [ 0.0286    ]
 [-0.0156    ]
 [ 0.0714    ]
 [ 0.02706667]]
grad = [[-0.00666667]
 [-0.286     ]
 [ 0.156     ]
 [-0.714     ]
 [-0.27066667]]
yhat = [[0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]]
loss = 0.6931471805599454
**************************************************
epoch = 1
beta = [[-0.00697957]
 [ 0.00963837]
 [-0.05585879]
 [ 0.11402806]
 [ 0.0

**************************************************
epoch = 49
beta = [[-0.13542967]
 [-0.17453374]
 [-0.80992934]
 [ 1.17896457]
 [ 0.49999363]]
grad = [[ 0.01378923]
 [ 0.01894411]
 [ 0.07985542]
 [-0.11406315]
 [-0.04953872]]
yhat = [[0.88971665]
 [0.86774791]
 [0.0631332 ]
 [0.88630603]
 [0.88765443]
 [0.10426998]
 [0.91797999]
 [0.06191132]
 [0.16580374]
 [0.89576314]
 [0.8941642 ]
 [0.10285605]
 [0.88532655]
 [0.94139309]
 [0.87793651]
 [0.09333849]
 [0.88589803]
 [0.13663095]
 [0.1617152 ]
 [0.18517921]
 [0.10548499]
 [0.90300859]
 [0.1032107 ]
 [0.92207226]
 [0.17002396]
 [0.90250368]
 [0.12472649]
 [0.92842097]
 [0.79872504]
 [0.9378861 ]
 [0.90730946]
 [0.87871939]
 [0.12044391]
 [0.15650508]
 [0.11584799]
 [0.11805084]
 [0.12897272]
 [0.87802576]
 [0.9395676 ]
 [0.14668738]
 [0.17309173]
 [0.80538441]
 [0.92080415]
 [0.85897143]
 [0.19216901]
 [0.87883261]
 [0.93160942]
 [0.10960359]
 [0.13334432]
 [0.90651726]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]


**************************************************
epoch = 97
beta = [[-0.18294678]
 [-0.2390532 ]
 [-1.08459352]
 [ 1.57216781]
 [ 0.67235137]]
grad = [[ 0.00741023]
 [ 0.00995944]
 [ 0.04281456]
 [-0.0613915 ]
 [-0.02716291]]
yhat = [[0.9401441 ]
 [0.92239999]
 [0.02470804]
 [0.93713622]
 [0.93861484]
 [0.05043836]
 [0.960379  ]
 [0.02406907]
 [0.09843961]
 [0.94446973]
 [0.94339475]
 [0.04935536]
 [0.93678591]
 [0.97536248]
 [0.93106967]
 [0.0429899 ]
 [0.93750027]
 [0.07420104]
 [0.09464662]
 [0.11565096]
 [0.05132655]
 [0.95009846]
 [0.0496633 ]
 [0.96324132]
 [0.10224859]
 [0.94990018]
 [0.06554705]
 [0.96750883]
 [0.85848845]
 [0.97325679]
 [0.95327736]
 [0.93198695]
 [0.06190643]
 [0.09059322]
 [0.0586043 ]
 [0.06003983]
 [0.06842396]
 [0.93091396]
 [0.97413244]
 [0.08257222]
 [0.10496212]
 [0.86529664]
 [0.96226241]
 [0.91526396]
 [0.12197538]
 [0.93153889]
 [0.96934226]
 [0.05406889]
 [0.07195766]
 [0.95222135]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]


 [0.96845913]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]]
loss = 0.0475784491002116
**************************************************
epoch = 144
beta = [[-0.21146691]
 [-0.27717242]
 [-1.24956161]
 [ 1.80884304]
 [ 0.77773894]]
grad = [[ 0.00506638]
 [ 0.00673726]
 [ 0.02935218]
 [-0.04212621]
 [-0.01887607]]
yhat = [[0.95934165]
 [0.94474584]
 [0.01399622]
 [0.95683175]
 [0.95811479]
 [0.03230622]
 [0.97488461]
 [0.01357468]
 [0.0712113 ]
 [0.9627031 ]
 [0.96187627]
 [0.03146816]
 [0.95663055]
 [0.98561743]
 [0.95200282]
 [0.02676044]
 [0.95728724]
 [0.05091713]
 [0.06791636]
 [0.08626498]
 [0.03299425]
 [0.96713223]
 [0.03171972]
 [0.97702286]
 [0.07455103]
 [0.96703727]
 [0.04406158]
 [0.9801298 ]
 [0.88733726]
 [0.98415357]
 [0.96961088]
 [0.9528153 ]
 [0.04109798]
 [0.06

**************************************************
epoch = 190
beta = [[-0.23168918]
 [-0.30397043]
 [-1.36693625]
 [ 1.97731386]
 [ 0.85359793]]
grad = [[ 0.00385974]
 [ 0.00509829]
 [ 0.02244835]
 [-0.03222117]
 [-0.01457775]]
yhat = [[0.96933834]
 [0.9569175 ]
 [0.00934359]
 [0.9671997 ]
 [0.96830517]
 [0.02349164]
 [0.98195817]
 [0.00903222]
 [0.05639975]
 [0.97209866]
 [0.97141891]
 [0.022811  ]
 [0.96705331]
 [0.99025067]
 [0.96314834]
 [0.01907703]
 [0.96764162]
 [0.03886773]
 [0.05349696]
 [0.06981837]
 [0.02405496]
 [0.97573413]
 [0.02302231]
 [0.98365465]
 [0.05937798]
 [0.9756859 ]
 [0.03313262]
 [0.98607488]
 [0.90484314]
 [0.98914152]
 [0.97776536]
 [0.96385744]
 [0.03064526]
 [0.05054395]
 [0.02857264]
 [0.02944126]
 [0.03496113]
 [0.96298272]
 [0.98957084]
 [0.04478436]
 [0.06144496]
 [0.9110258 ]
 [0.98306693]
 [0.95169381]
 [0.07490293]
 [0.96338075]
 [0.98705606]
 [0.02568557]
 [0.03740594]
 [0.97699315]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]

**************************************************
epoch = 234
beta = [[-0.24694524]
 [-0.32407311]
 [-1.4558406 ]
 [ 2.104912  ]
 [ 0.91155205]]
grad = [[ 0.00313999]
 [ 0.00412813]
 [ 0.01833542]
 [-0.02631249]
 [-0.011996  ]]
yhat = [[0.9753221 ]
 [0.96444261]
 [0.00688457]
 [0.97345349]
 [0.97442157]
 [0.01844953]
 [0.98599918]
 [0.00663771]
 [0.04722313]
 [0.97768042]
 [0.97709704]
 [0.01787413]
 [0.97333215]
 [0.99275759]
 [0.96992938]
 [0.01476309]
 [0.97386264]
 [0.03166216]
 [0.04461471]
 [0.05941793]
 [0.01893017]
 [0.98077378]
 [0.01805702]
 [0.98740779]
 [0.04992955]
 [0.98074989]
 [0.02667873]
 [0.98938985]
 [0.91655404]
 [0.99186641]
 [0.98250326]
 [0.97055627]
 [0.0245258 ]
 [0.0419672 ]
 [0.02276367]
 [0.02349927]
 [0.02825309]
 [0.96977768]
 [0.99220695]
 [0.03686157]
 [0.05178902]
 [0.92243635]
 [0.98692215]
 [0.95978277]
 [0.06408981]
 [0.970114  ]
 [0.99018886]
 [0.02029641]
 [0.0303896 ]
 [0.98183875]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]

beta = [[-0.25932055]
 [-0.3403127 ]
 [-1.52824547]
 [ 2.20880154]
 [ 0.95907172]]
grad = [[ 0.00265413]
 [ 0.00347683]
 [ 0.01555919]
 [-0.02232116]
 [-0.01024219]]
yhat = [[0.97936156]
 [0.96965083]
 [0.00537174]
 [0.97769994]
 [0.97855971]
 [0.01515497]
 [0.9886313 ]
 [0.00516767]
 [0.04084933]
 [0.98142664]
 [0.98091293]
 [0.01465576]
 [0.97759233]
 [0.9943237 ]
 [0.97456583]
 [0.011984  ]
 [0.97807486]
 [0.02679088]
 [0.03847188]
 [0.05207925]
 [0.01557579]
 [0.98412118]
 [0.01481734]
 [0.98983488]
 [0.04334115]
 [0.98411164]
 [0.02235837]
 [0.99150972]
 [0.92517231]
 [0.99358159]
 [0.98562992]
 [0.97512691]
 [0.02045576]
 [0.03605621]
 [0.01891776]
 [0.01955897]
 [0.0237497 ]
 [0.97442729]
 [0.99386293]
 [0.03144717]
 [0.04503786]
 [0.93078771]
 [0.98942247]
 [0.96542497]
 [0.05642155]
 [0.97471831]
 [0.99218397]
 [0.01675476]
 [0.02565499]
 [0.98504919]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [

epoch = 329
beta = [[-0.27194787]
 [-0.35682446]
 [-1.60244072]
 [ 2.31521707]
 [ 1.00806999]]
grad = [[ 0.00223438]
 [ 0.00291677]
 [ 0.01315918]
 [-0.01886905]
 [-0.00871728]]
yhat = [[0.98284466]
 [0.97424301]
 [0.00416844]
 [0.98138047]
 [0.98213527]
 [0.01239076]
 [0.99082971]
 [0.0040007 ]
 [0.03520298]
 [0.98464009]
 [0.98419018]
 [0.01196095]
 [0.98128283]
 [0.99558391]
 [0.97861018]
 [0.00968138]
 [0.98171703]
 [0.02257781]
 [0.03305051]
 [0.04548587]
 [0.01275699]
 [0.98696663]
 [0.01210257]
 [0.99184914]
 [0.03748407]
 [0.98696777]
 [0.01865506]
 [0.99325182]
 [0.93320322]
 [0.99497125]
 [0.98827224]
 [0.97910678]
 [0.01698662]
 [0.03085623]
 [0.0156528 ]
 [0.01620899]
 [0.01987927]
 [0.97848571]
 [0.99520209]
 [0.02671965]
 [0.03902195]
 [0.93853062]
 [0.99150266]
 [0.97043536]
 [0.04950097]
 [0.97873504]
 [0.99381718]
 [0.01376795]
 [0.02156721]
 [0.98777143]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]


epoch = 371
beta = [[-0.28076537]
 [-0.36832102]
 [-1.65446505]
 [ 2.38979953]
 [ 1.04261222]]
grad = [[ 0.00198055]
 [ 0.00257938]
 [ 0.01170632]
 [-0.01677872]
 [-0.00778952]]
yhat = [[0.98494444]
 [0.9770651 ]
 [0.00349087]
 [0.98360905]
 [0.98429476]
 [0.01076102]
 [0.99211913]
 [0.00334475]
 [0.0317149 ]
 [0.98656856]
 [0.98615916]
 [0.01037485]
 [0.98351672]
 [0.99629958]
 [0.98107301]
 [0.00833832]
 [0.98391814]
 [0.02002799]
 [0.02971196]
 [0.04136317]
 [0.01109279]
 [0.98866126]
 [0.01050363]
 [0.99302408]
 [0.03385481]
 [0.98866791]
 [0.01643122]
 [0.99425952]
 [0.9383799 ]
 [0.99576521]
 [0.98983793]
 [0.98152679]
 [0.01491329]
 [0.02766319]
 [0.01370814]
 [0.01421114]
 [0.01754962]
 [0.98095834]
 [0.9959659 ]
 [0.02383532]
 [0.03528683]
 [0.94349953]
 [0.99271856]
 [0.97353376]
 [0.04515687]
 [0.98118126]
 [0.99475858]
 [0.01199916]
 [0.01909708]
 [0.98938891]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]


y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]]
loss = 0.017377852161521358
**************************************************
epoch = 420
beta = [[-0.28986581]
 [-0.38015891]
 [-1.70836385]
 [ 2.46703339]
 [ 1.07856215]]
grad = [[ 0.0017482 ]
 [ 0.0022715 ]
 [ 0.01037475]
 [-0.01486263]
 [-0.00693562]]
yhat = [[0.98685956]
 [0.97968017]
 [0.00290599]
 [0.98564896]
 [0.98626742]
 [0.00930011]
 [0.99326887]
 [0.00277937]
 [0.02846578]
 [0.98832087]
 [0.98795   ]
 [0.00895505]
 [0.98556113]
 [0.99692097]
 [0.98333822]
 [0.00714499]
 [0.98592989]
 [0.01769249]
 [0.02661004]
 [0.03748438]
 [0.00959929]
 [0.99019158]
 [0.00907153]
 [0.99406703]
 [0.03046565]
 [0.99020254]
 [0.01440759]
 [0.99514793]
 [0.94337071]
 [0.99645803]
 [0.99124587]
 [0.98375003]
 [0.01303387]
 [0.02470367]
 [0.0

**************************************************
epoch = 471
beta = [[-0.29826733]
 [-0.39106371]
 [-1.7583238 ]
 [ 2.5385858 ]
 [ 1.11203454]]
grad = [[ 0.0015575 ]
 [ 0.00201953]
 [ 0.00928017]
 [-0.01328749]
 [-0.00623077]]
yhat = [[0.98842441]
 [0.98185033]
 [0.00245269]
 [0.98732158]
 [0.98788183]
 [0.00812539]
 [0.99418782]
 [0.00234185]
 [0.02575351]
 [0.98974742]
 [0.98940936]
 [0.00781492]
 [0.98723729]
 [0.99740485]
 [0.98520453]
 [0.00619364]
 [0.98757715]
 [0.0157745 ]
 [0.02402698]
 [0.03421486]
 [0.008397  ]
 [0.99143005]
 [0.00792087]
 [0.99489696]
 [0.02762952]
 [0.99144394]
 [0.01275644]
 [0.99585023]
 [0.94767605]
 [0.99700023]
 [0.9923806 ]
 [0.98557979]
 [0.01150604]
 [0.02224516]
 [0.010525  ]
 [0.01093591]
 [0.01368893]
 [0.98510852]
 [0.99715156]
 [0.01897827]
 [0.02886503]
 [0.95237382]
 [0.99466126]
 [0.97882769]
 [0.03758958]
 [0.98528534]
 [0.99623816]
 [0.00912368]
 [0.01498454]
 [0.99202371]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]

 [0.9928066 ]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]]
loss = 0.014389793090801947
**************************************************
epoch = 514
beta = [[-0.30466718]
 [-0.39935554]
 [-1.79651912]
 [ 2.59326208]
 [ 1.13772269]]
grad = [[ 0.00142604]
 [ 0.00184625]
 [ 0.00852442]
 [-0.01219994]
 [-0.00574238]]
yhat = [[0.98949836]
 [0.98335937]
 [0.00215499]
 [0.98847286]
 [0.9889913 ]
 [0.00732951]
 [0.99480681]
 [0.00205489]
 [0.02385689]
 [0.9907234 ]
 [0.99040865]
 [0.00704334]
 [0.98839099]
 [0.99772365]
 [0.98649447]
 [0.00555375]
 [0.98870972]
 [0.01445162]
 [0.02222438]
 [0.03190972]
 [0.00758165]
 [0.99227317]
 [0.00714181]
 [0.99545392]
 [0.02564214]
 [0.99228872]
 [0.01162392]
 [0.99631894]
 [0.95076975]
 [0.99735897]
 [0.99315037]
 [0.98684334]
 [0.01046132]
 [0.

 [0.99346315]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]]
loss = 0.013373712556139023
**************************************************
epoch = 556
beta = [[-0.31041666]
 [-0.40679408]
 [-1.83094082]
 [ 2.64251482]
 [ 1.16094606]]
grad = [[ 0.00131724]
 [ 0.0017031 ]
 [ 0.00789799]
 [-0.01129855]
 [-0.00533636]]
yhat = [[0.99038355]
 [0.9846166 ]
 [0.00191817]
 [0.98942403]
 [0.98990682]
 [0.00668006]
 [0.99530928]
 [0.00182686]
 [0.02226877]
 [0.99152578]
 [0.9912308 ]
 [0.0064143 ]
 [0.98934421]
 [0.99797779]
 [0.98756391]
 [0.00503466]
 [0.98964465]
 [0.01335625]
 [0.02071746]
 [0.02996655]
 [0.00691579]
 [0.99296357]
 [0.00650642]
 [0.99590467]
 [0.02397517]
 [0.99298025]
 [0.01069046]
 [0.99669655]
 [0.95341745]
 [0.99764597]
 [0.9937789 ]
 [0.98789017]
 [0.00960236]
 [0.

 [ 1.18358275]]
grad = [[ 0.00121958]
 [ 0.00157484]
 [ 0.00733488]
 [-0.01048832]
 [-0.00497038]]
yhat = [[0.9911747 ]
 [0.98575152]
 [0.00171329]
 [0.99027602]
 [0.99072596]
 [0.00610477]
 [0.99575203]
 [0.00162979]
 [0.02082785]
 [0.9922412 ]
 [0.99196434]
 [0.00585757]
 [0.99019809]
 [0.99819799]
 [0.98852496]
 [0.00457737]
 [0.99048145]
 [0.01237268]
 [0.01935227]
 [0.0281925 ]
 [0.00632552]
 [0.99357689]
 [0.00594385]
 [0.99630075]
 [0.0224603 ]
 [0.99359438]
 [0.00985587]
 [0.99702699]
 [0.95586816]
 [0.99789544]
 [0.99433575]
 [0.98883031]
 [0.00883613]
 [0.01781207]
 [0.00804392]
 [0.00837759]
 [0.01062921]
 [0.98844609]
 [0.99800841]
 [0.01504481]
 [0.02351626]
 [0.96013522]
 [0.99612177]
 [0.98318953]
 [0.03117394]
 [0.98858421]
 [0.99732579]
 [0.0069034 ]
 [0.01170475]
 [0.99405751]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]

**************************************************
epoch = 641
beta = [[-0.32081826]
 [-0.42022664]
 [-1.89348929]
 [ 2.73195632]
 [ 1.20332454]]
grad = [[ 0.00114064]
 [ 0.00147133]
 [ 0.00687905]
 [-0.00983254]
 [-0.00467336]]
yhat = [[0.99181144]
 [0.98667335]
 [0.0015532 ]
 [0.99096309]
 [0.99138589]
 [0.0056454 ]
 [0.99610375]
 [0.00147595]
 [0.01965164]
 [0.99281571]
 [0.9925538 ]
 [0.00541335]
 [0.99088676]
 [0.99837021]
 [0.98930235]
 [0.00421404]
 [0.99115583]
 [0.0115774 ]
 [0.0182394 ]
 [0.02673607]
 [0.00585385]
 [0.99406777]
 [0.00549483]
 [0.99661457]
 [0.02122192]
 [0.99408577]
 [0.00918374]
 [0.99728782]
 [0.95790509]
 [0.99809116]
 [0.99478033]
 [0.98959036]
 [0.00822032]
 [0.01676027]
 [0.00747363]
 [0.0077887 ]
 [0.00991824]
 [0.98922778]
 [0.99819535]
 [0.0141178 ]
 [0.02223236]
 [0.96205537]
 [0.99644889]
 [0.98422756]
 [0.02961599]
 [0.98935663]
 [0.99756579]
 [0.00639625]
 [0.0109395 ]
 [0.99452105]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]

**************************************************
epoch = 693
beta = [[-0.32651405]
 [-0.42756908]
 [-1.92789836]
 [ 2.78112628]
 [ 1.22673668]]
grad = [[ 0.00105397]
 [ 0.00135785]
 [ 0.00637774]
 [-0.00911141]
 [-0.00434587]]
yhat = [[0.99250733]
 [0.98769018]
 [0.00138332]
 [0.9917155 ]
 [0.99210788]
 [0.00514715]
 [0.99648312]
 [0.00131287]
 [0.01834728]
 [0.99344219]
 [0.99319701]
 [0.0049319 ]
 [0.99164101]
 [0.99855309]
 [0.99015631]
 [0.00382196]
 [0.99189388]
 [0.01070384]
 [0.01700696]
 [0.02511164]
 [0.0053419 ]
 [0.99460128]
 [0.00500801]
 [0.99695219]
 [0.01984656]
 [0.99461967]
 [0.00844846]
 [0.99756735]
 [0.96020475]
 [0.99829962]
 [0.99526231]
 [0.99042481]
 [0.00754804]
 [0.01559728]
 [0.006852  ]
 [0.00714637]
 [0.00913945]
 [0.99008661]
 [0.99839427]
 [0.01309589]
 [0.02080522]
 [0.96421814]
 [0.9968011 ]
 [0.98537631]
 [0.02787505]
 [0.99020518]
 [0.99782248]
 [0.00584501]
 [0.01009972]
 [0.99502409]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]

**************************************************
epoch = 748
beta = [[-0.33208548]
 [-0.43474256]
 [-1.96167002]
 [ 2.82936078]
 [ 1.24978381]]
grad = [[ 0.00097543]
 [ 0.00125519]
 [ 0.00592259]
 [-0.00845681]
 [-0.00404769]]
yhat = [[0.99313459]
 [0.98861585]
 [0.00123491]
 [0.99239514]
 [0.99275938]
 [0.00470152]
 [0.99682028]
 [0.00117055]
 [0.0171526 ]
 [0.99400552]
 [0.99377581]
 [0.00450166]
 [0.99232244]
 [0.9987129 ]
 [0.99093031]
 [0.00347317]
 [0.99256012]
 [0.00991188]
 [0.0158798 ]
 [0.02361463]
 [0.00488367]
 [0.99507933]
 [0.00457281]
 [0.99725141]
 [0.01858486]
 [0.9950979 ]
 [0.00778478]
 [0.9978141 ]
 [0.96235107]
 [0.9984824 ]
 [0.99569302]
 [0.99118069]
 [0.00694254]
 [0.01453543]
 [0.00629304]
 [0.00656839]
 [0.00843552]
 [0.99086513]
 [0.9985685 ]
 [0.0121659 ]
 [0.01949482]
 [0.96623161]
 [0.99711352]
 [0.98642579]
 [0.02626744]
 [0.99097432]
 [0.99804854]
 [0.00535087]
 [0.00933916]
 [0.99547407]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]

**************************************************
epoch = 793
beta = [[-0.33634333]
 [-0.44021912]
 [-1.98755798]
 [ 2.86631817]
 [ 1.26749705]]
grad = [[ 0.00091929]
 [ 0.00118191]
 [ 0.00559668]
 [-0.00798815]
 [-0.00383365]]
yhat = [[0.99358072]
 [0.98927996]
 [0.00113216]
 [0.99287943]
 [0.99322323]
 [0.00438662]
 [0.99705713]
 [0.00107211]
 [0.01629065]
 [0.99440534]
 [0.99418689]
 [0.00419784]
 [0.9928081 ]
 [0.99882353]
 [0.99148349]
 [0.00322786]
 [0.99303462]
 [0.00934554]
 [0.01506758]
 [0.02252874]
 [0.00455964]
 [0.99541759]
 [0.00426539]
 [0.99746111]
 [0.01767329]
 [0.99543619]
 [0.00731201]
 [0.99798641]
 [0.96392485]
 [0.99860929]
 [0.99599707]
 [0.99172066]
 [0.00651204]
 [0.01377143]
 [0.00589619]
 [0.00615778]
 [0.00793346]
 [0.99142163]
 [0.99868935]
 [0.01149865]
 [0.01854732]
 [0.96770471]
 [0.99733262]
 [0.9871811 ]
 [0.02509926]
 [0.99152405]
 [0.99820608]
 [0.00500099]
 [0.00879578]
 [0.99579199]]
y = [[1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]

**************************************************
epoch = 841
beta = [[-0.34062296]
 [-0.44571891]
 [-2.01364912]
 [ 2.90354998]
 [ 1.28539007]]
grad = [[ 0.00086604]
 [ 0.00111249]
 [ 0.00528707]
 [-0.00754301]
 [-0.00362989]]
yhat = [[9.94001920e-01]
 [9.89911683e-01]
 [1.03737336e-03]
 [9.93337383e-01]
 [9.93661537e-01]
 [4.09090909e-03]
 [9.97278371e-01]
 [9.81384033e-04]
 [1.54665503e-02]
 [9.94782111e-01]
 [9.94574493e-01]
 [3.91271441e-03]
 [9.93267419e-01]
 [9.98925539e-01]
 [9.92007966e-01]
 [2.99844018e-03]
 [9.93483110e-01]
 [8.80821912e-03]
 [1.42918573e-02]
 [2.14856982e-02]
 [4.25518158e-03]
 [9.95735525e-01]
 [3.97680689e-03]
 [9.97656571e-01]
 [1.68006838e-02]
 [9.95754066e-01]
 [6.86498637e-03]
 [9.98146533e-01]
 [9.65450404e-01]
 [9.98726605e-01]
 [9.96282253e-01]
 [9.92232403e-01]
 [6.10563577e-03]
 [1.30427063e-02]
 [5.52200701e-03]
 [5.77041683e-03]
 [7.45821769e-03]
 [9.91949301e-01]
 [9.98800981e-01]
 [1.08637822e-02]
 [1.76397030e-02]
 [9.69129919e-01]
 [9.9753

**************************************************
epoch = 879
beta = [[-0.34383873]
 [-0.44984842]
 [-2.03330217]
 [ 2.93158405]
 [ 1.29889501]]
grad = [[ 0.00082803]
 [ 0.00106298]
 [ 0.00506569]
 [-0.00722477]
 [-0.0034839 ]]
yhat = [[9.94301386e-01]
 [9.90363808e-01]
 [9.71326926e-04]
 [9.93663435e-01]
 [9.93973412e-01]
 [3.88161359e-03]
 [9.97434192e-01]
 [9.18212027e-04]
 [1.48739682e-02]
 [9.95049557e-01]
 [9.94849774e-01]
 [3.71101769e-03]
 [9.93594508e-01]
 [9.98996582e-01]
 [9.92382257e-01]
 [2.83664432e-03]
 [9.93802314e-01]
 [8.42444609e-03]
 [1.37345851e-02]
 [2.07326156e-02]
 [4.03957873e-03]
 [9.95960690e-01]
 [3.77261293e-03]
 [9.97793984e-01]
 [1.61725570e-02]
 [9.95979139e-01]
 [6.54666416e-03]
 [9.98258805e-01]
 [9.66560548e-01]
 [9.98808488e-01]
 [9.96483861e-01]
 [9.92597477e-01]
 [5.81664580e-03]
 [1.25198125e-02]
 [5.25621789e-03]
 [5.49512916e-03]
 [7.11947403e-03]
 [9.92325909e-01]
 [9.98878841e-01]
 [1.04092145e-02]
 [1.69859920e-02]
 [9.70165268e-01]
 [9.9768

**************************************************
epoch = 926
beta = [[-0.34762626]
 [-0.4547088 ]
 [-2.05650337]
 [ 2.96466728]
 [ 1.31486815]]
grad = [[ 0.00078534]
 [ 0.00100743]
 [ 0.00481673]
 [-0.00686696]
 [-0.00331944]]
yhat = [[9.94636296e-01]
 [9.90872570e-01]
 [8.98824311e-04]
 [9.94028551e-01]
 [9.94322458e-01]
 [3.64849250e-03]
 [9.97606934e-01]
 [8.48914260e-04]
 [1.42041329e-02]
 [9.95348204e-01]
 [9.95157322e-01]
 [3.48647132e-03]
 [9.93960852e-01]
 [9.99074516e-01]
 [9.92802314e-01]
 [2.65702945e-03]
 [9.94159648e-01]
 [7.99335187e-03]
 [1.31052090e-02]
 [1.98781167e-02]
 [3.79931359e-03]
 [9.96211596e-01]
 [3.54523424e-03]
 [9.97946060e-01]
 [1.54618371e-02]
 [9.96229887e-01]
 [6.19009701e-03]
 [9.98382750e-01]
 [9.67829256e-01]
 [9.98898503e-01]
 [9.96708136e-01]
 [9.93007061e-01]
 [5.49335594e-03]
 [1.19299089e-02]
 [4.95917820e-03]
 [5.18733503e-03]
 [6.73968537e-03]
 [9.92748602e-01]
 [9.98964375e-01]
 [9.89742286e-03]
 [1.62459237e-02]
 [9.71346605e-01]
 [9.9783

beta = [[-0.35137244]
 [-0.45951263]
 [-2.07950989]
 [ 2.99745969]
 [ 1.33073944]]
grad = [[ 0.00074523]
 [ 0.00095529]
 [ 0.00458245]
 [-0.0065303 ]
 [-0.00316438]]
yhat = [[9.94949520e-01]
 [9.91351555e-01]
 [8.32351148e-04]
 [9.94370498e-01]
 [9.94649164e-01]
 [3.43138018e-03]
 [9.97766969e-01]
 [7.85428829e-04]
 [1.35703166e-02]
 [9.95627054e-01]
 [9.95444639e-01]
 [3.27745556e-03]
 [9.94304023e-01]
 [9.99145901e-01]
 [9.93196658e-01]
 [2.49034769e-03]
 [9.94494198e-01]
 [7.58817780e-03]
 [1.25102286e-02]
 [1.90662557e-02]
 [3.57542771e-03]
 [9.96445342e-01]
 [3.33352957e-03]
 [9.98086690e-01]
 [1.47886131e-02]
 [9.96463431e-01]
 [5.85599136e-03]
 [9.98497064e-01]
 [9.69043803e-01]
 [9.98981143e-01]
 [9.96916694e-01]
 [9.93391444e-01]
 [5.19085219e-03]
 [1.13729062e-02]
 [4.68153342e-03]
 [4.89949711e-03]
 [6.38346727e-03]
 [9.93145455e-01]
 [9.99042843e-01]
 [9.41521992e-03]
 [1.55444956e-02]
 [9.72475555e-01]
 [9.97987119e-01]
 [9.89551483e-01]
 [2.13616874e-02]
 [9.93226722e-01]

## Write predict function for new data
- The function should use beta calculated in gradient descent and Xtest to return both prediciton and predicted probability.

In [16]:
# use trained beta to predict new data
def predict(beta, X):
    """
    Args:
        X (numpy array): feature to predict
        
    Returns:
        y (numpy array): predict results (0 or 1)
    """
    m = X.shape[0]
    X = np.hstack((np.ones((m, 1)), X))    # add constant column to X for intercept
    pred_proba = sigmoid(np.dot(X, beta)).ravel()
    print(pred_proba.shape)
    pred = np.array([round(x) for x in pred_proba])
    return pred, pred_proba

In [17]:
# find accuracy of model on test data
from sklearn.metrics import accuracy_score
ypred, ypred_proba = predict(beta_final, Xtest)
accuracy_score(ytest, ypred)

(25,)


1.0

In [18]:
# see if the predicted probabilities look normal 
ypred_proba

array([9.95799570e-01, 9.98890025e-01, 9.97852823e-01, 4.20178672e-03,
       9.97741322e-01, 9.98418422e-01, 5.11400991e-02, 9.98265663e-01,
       9.93678890e-01, 9.96196031e-01, 7.46767781e-03, 9.74623091e-01,
       5.47415895e-04, 6.81204464e-03, 9.95258850e-01, 6.50663366e-03,
       3.03637496e-03, 9.99510463e-01, 1.12998599e-02, 1.20260491e-03,
       4.47302413e-03, 2.15560638e-02, 1.03092611e-02, 4.60555138e-03,
       9.78354426e-01])

The accuracy is 1.0 and the prediced probabilities look normal, great! Note that this implementation is only a simple version, it may not work well in more complex dataset. More details like <b>regularization</b> and <b>weighted cost function</b> (for imbalanced data) can be added to advance the model. Also, data preprocessing like <b>feature scaling</b> may be required.