# Building Logistic regression in NN style using python and numpy

In [1]:
import numpy as np

$$z^{(i)} = w^T x^{(i)} + b \tag{1}$$
$$\hat{y}^{(i)} = a^{(i)} = sigmoid(z^{(i)})\tag{2}$$ 
$$ \mathcal{L}(a^{(i)}, y^{(i)}) =  - y^{(i)}  \log(a^{(i)}) - (1-y^{(i)} )  \log(1-a^{(i)})\tag{3}$$



<img src="images/image1.png" style="width:650px;height:400px;">

<img src="images/gradient.png" style="width:350px;height:200px;">


The cost is then computed by summing over all training examples:
$$ J = \frac{1}{m} \sum_{i=1}^m \mathcal{L}(a^{(i)}, y^{(i)})\tag{6}$$

In [139]:
"""
. Train, Test split
. Initialize the weights with some value (initialize)
. Learn the weights using some algorithm (optimize)
. Predict for test
. Calculate the accuracy
"""
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5):
    # Get the number of features! Why?
    num_of_features = None
    
    # Initialize the weights with some value! What value it should be?
    w, b = initialize(None)
    
    # Optimize/learn the weights
    parameters = learn(None)
    
    # Get the parameters
    w = None
    b = None
    
    Y_prediction_test = predict(None)
    Y_prediction_train = predict(None)

    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    return


In [None]:
"""
Create a weight matrix of size = dim,1
Hint: use np.zeros to initilze it.
Question: Is b a matrix, vector or a scaler?
"""
def initialize(dim):
    w = None
    b = None
    return w, b

### Learning
The objective of the function is to learn the weights.


Optimizer: Gradient Descent.



<img src="images/gradient.png" style="width:350px;height:200px;">



Formula:

$$ w = w - \frac{\partial J}{\partial w} * learningrate $$
$$ b = b - \frac{\partial J}{\partial b} * learningrate $$


In [None]:
def learn(None):
    
    # Iterate
    for i in range(None):
        # Calculate dw and db
        dw, db = propagate(None)
        
        # Update w and b
        w = None
        b = None


    return None

We will calculate three things:
1. cost : Value of cost function (cost), this is only for logging, to see if the gradiant descent is optimizing.
    $$ J = -\frac{1}{m}\sum_{i=1}^{m}y^{(i)}\log(a^{(i)})+(1-y^{(i)})\log(1-a^{(i)})$$
2. dw : First derivative of cost function with respect to w.
    $$ \frac{\partial J}{\partial w} = \frac{1}{m}X(A-Y)^T$$
3. db : First derivative of cost function with respect to b.
    $$ \frac{\partial J}{\partial b} = \frac{1}{m} \sum_{i=1}^m (a^{(i)}-y^{(i)})$$
    
    
But we need these:
$$z^{(i)} = w^T x^{(i)} + b $$
$$\hat{y}^{(i)} = a^{(i)} = sigmoid(z^{(i)})$$ 
$$ \mathcal{L}(a^{(i)}, y^{(i)}) =  - y^{(i)}  \log(a^{(i)}) - (1-y^{(i)} )  \log(1-a^{(i)})$$

In [None]:
def propagate(None):
    number_of_rows = None
    Z = None
    A = sigmoid(None)
    cost = None
    
    dw = None
    db = None
    
    return dw, db, cost

$sigmoid( w^T x + b) = \frac{1}{1 + e^{-(w^T x + b)}}$ to make predictions. 

Hint: Use np.exp().

In [None]:
def sigmoid(z):
    s = None
    return s

In [None]:
def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0], 1)
    A = sigmoid(np.dot(w.T, X) + b)    
    for i in range(A.shape[1]):
        Y_prediction[0, i] = 1 if A[0, i] > 0.5 else 0
    
    return Y_prediction

In [149]:
d = model(X_train, Y_train, X_test, Y_test, num_iterations = 20000, learning_rate = 0.01, print_cost = True)

@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0.]] 0.0
@#@#@#@ 0.0 [[-0.]
 [-0.]]
[[-0.]
 [-0.]] 0.0 [[0.]
 [0

### Dataset

In [None]:
import matplotlib.pyplot as plt
import sklearn.datasets
from sklearn.model_selection import train_test_split
%matplotlib inline

In [None]:
X,Y = sklearn.datasets.make_circles(n_samples=1000, factor=.5, noise=.3)
#X,Y = sklearn.datasets.make_moons(n_samples=1000, noise=.2)
#X, Y = sklearn.datasets.make_blobs(n_samples=1000, random_state=2, n_features=2, centers=2)
print(X.shape, Y.shape)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
Y_train, Y_test = Y_train.reshape(Y_train.shape[0],1), Y_test.reshape(Y_test.shape[0],1)
X, Y = X.T, Y.reshape(1, Y.shape[0])
X_train, Y_train = X_train.T, Y_train.reshape(1, Y_train.shape[0])
X_test, Y_test = X_test.T, Y_test.reshape(1, Y_test.shape[0])
plt.scatter(X[0, :], X[1, :], c=Y[0], s=10, cmap=plt.cm.Spectral);