In [1]:
import numpy as np

###### X is a NxM matrix and Y is a Nx1 matrix

In [32]:
X = np.array([[0, 0, 0],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1],
              [0.5, 0.5, 0.5],
              [0.8, 0.2, 0.3]])

Y = np.array([[0],
              [0],
              [0],
              [1],
              [0],
              [1]])
print(X)
print("")
print(Y) ## Nx1 matrix

[[0.  0.  0. ]
 [0.  1.  1. ]
 [1.  0.  1. ]
 [1.  1.  1. ]
 [0.5 0.5 0.5]
 [0.8 0.2 0.3]]

[[0]
 [0]
 [0]
 [1]
 [0]
 [1]]


#### Implementation of Logistic Regression with maximizing log likelihood

###### We add the column of X0 where all X0s are 1s to X so that the matrix multiplication in the next steps becomes easier. This makes X a NxM+1 matrix

In [33]:
##Adding X0 column of ones
X = np.c_[np.ones((X.shape[0], 1)), X]
X ## NxM matrix

array([[1. , 0. , 0. , 0. ],
       [1. , 0. , 1. , 1. ],
       [1. , 1. , 0. , 1. ],
       [1. , 1. , 1. , 1. ],
       [1. , 0.5, 0.5, 0.5],
       [1. , 0.8, 0.2, 0.3]])

###### We set our initial learning rate. We can play around with this rate, to control the amount of gradient descent has effect on params. Larger rate makes the descent rapid but can cause cost to sway other way, where as smaller rate can cause the gradient descent steps to have very less effect.
###### We set the number of iterations as well. Can play around till the cost is minimized after a point.
###### Updated m = M+1 and n = N

In [51]:
learning_rate = 0.001
n_iterations = 1000
m = X.shape[1] ##No. of features
n = X.shape[0] ##No. of training samples

###### Setting the initial params values. Can set all to 0 as well.

In [52]:
##Initialize random weights
weights = np.random.rand(m, 1)
weights.shape ## Mx1 matrix

(4, 1)

###### Setting up our logistic function

In [53]:
def sigmoid(z):
  return 1 / (1 + np.exp(-z))

#### Batch Gradient ascent

In [54]:
for _ in range(n_iterations):
  ## Batch Grad ascent
  weights = weights + learning_rate * (X.T @ (Y - (sigmoid(X @ weights)))) ## theta = theta + alpha*((y - predicition) * X)
weights

array([[-0.27836175],
       [ 0.36026302],
       [-0.0531296 ],
       [-0.38798215]])

###### The predictions given by multiplying the X and params

In [55]:
predictions = X @ weights
predictions

array([[-0.27836175],
       [-0.71947351],
       [-0.30608088],
       [-0.35921049],
       [-0.31878612],
       [-0.1171719 ]])

###### Creating decision boundary where at 0.5

In [56]:
predictions = (predictions >= 0.5).astype(int)
predictions

array([[0],
       [0],
       [0],
       [0],
       [0],
       [0]])