<a href="https://colab.research.google.com/github/PaulToronto/Stanford-Andrew-Ng-Machine-Learning-Specialization/blob/main/Logistic_Regression_with_Sympy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Logistic Regression with `sympy`

## Imports

In [1]:
import sympy as sym
import pandas as pd
import numpy as np

## Functions

In [2]:
def sigmoid(z, clip=False):
    """
    Compute the sigmoid of z

    Parameters
    ----------
    z : array_like
        A scalar or numpy array of any size.

    Returns
    -------
     g : array_like
         sigmoid(z)
    """
    if clip:
        z = np.clip( z, -500, 500 )           # protect against overflow
    g = 1.0 / (1.0 + sym.exp(-z))

    return g

## Symbols

In [3]:
# training set data
x11, x12 = sym.symbols('x_{11} x_{12}')
x21, x22 = sym.symbols('x_{21} x_{22}')
x31, x32 = sym.symbols('x_{31} x_{32}')
x41, x42 = sym.symbols('x_{41} x_{42}')
x51, x52 = sym.symbols('x_{51} x_{52}')
x61, x62 = sym.symbols('x_{61} x_{62}')

# weights and bias
w1, w2, b = sym.symbols('w_1 w_2 b')
w = sym.Matrix([w1, w2])

# weight for simple logistic regression
m = sym.symbols('m')

## Toy Datasets

In [4]:
data1 = pd.DataFrame({
    'feature1': [x11, x21, x31, x41, x51, x61],
    'target': [0, 0, 0, 1, 1, 1]
})

x_train1 = sym.Matrix(data1.drop('target', axis=1))
y_train1 = sym.Matrix(data1['target'])

In [5]:
data2 = pd.DataFrame({
    'feature1': [x11, x21, x31, x41, x51, x61],
    'feature2': [x12, x22, x32, x42, x52, x62],
    'target': [0, 0, 0, 1, 1, 1]
})

x_train2 = sym.Matrix(data2.drop('target', axis=1))
y_train2 = sym.Matrix(data2['target'])

In [14]:
data3 = pd.DataFrame({
        'x': [0, 1, 2, 3, 4, 5],
        'y': [0, 0, 0, 1, 1, 1]
})

x_train3 = sym.Matrix(data3.drop('y', axis=1).values)
y_train3 = sym.Matrix(data3['y'].values)

pos3 = np.array(y_train3) == 1
neg3 = np.array(y_train3) == 0

In [20]:
data4 = pd.DataFrame({
    'x1': [0.5, 1, 1.5, 3, 2, 1],
    'x2': [1.5, 1, 0.5, 0.5, 2, 2.5],
    'y': [0, 0, 0, 1, 1, 1]
})

x_train4 = sym.Matrix(data4.drop('y', axis=1).values)
y_train4 = sym.Matrix(data4['y'].values)

pos4 = np.array(y_train4) == 1
neg4 = np.array(y_train4) == 0

## Prediction

$$
f_{\vec{w},b}\left(\vec{x}\right) = \frac{1}{1 + e^{-(\vec{w}\cdot\vec{x} + b)}}
$$

In [6]:
def f_wb(X, w, b):
    m, n = X.shape
    w = sym.Matrix([w]) # ensure w is a matrix
    pred = X @ w + b * sym.ones(m, 1)
    pred = pred.applyfunc(sigmoid)
    return pred

In [7]:
f_wb(x_train1, m, b)

Matrix([
[1.0/(exp(-b - m*x_{11}) + 1.0)],
[1.0/(exp(-b - m*x_{21}) + 1.0)],
[1.0/(exp(-b - m*x_{31}) + 1.0)],
[1.0/(exp(-b - m*x_{41}) + 1.0)],
[1.0/(exp(-b - m*x_{51}) + 1.0)],
[1.0/(exp(-b - m*x_{61}) + 1.0)]])

In [8]:
f_wb(x_train1[0,:], m, b)

Matrix([[1.0/(exp(-b - m*x_{11}) + 1.0)]])

In [9]:
f_wb(x_train2, w, b)

Matrix([
[1.0/(exp(-b - w_1*x_{11} - w_2*x_{12}) + 1.0)],
[1.0/(exp(-b - w_1*x_{21} - w_2*x_{22}) + 1.0)],
[1.0/(exp(-b - w_1*x_{31} - w_2*x_{32}) + 1.0)],
[1.0/(exp(-b - w_1*x_{41} - w_2*x_{42}) + 1.0)],
[1.0/(exp(-b - w_1*x_{51} - w_2*x_{52}) + 1.0)],
[1.0/(exp(-b - w_1*x_{61} - w_2*x_{62}) + 1.0)]])

In [21]:
x_train3

Matrix([
[0],
[1],
[2],
[3],
[4],
[5]])

In [25]:
f_wb(x_train3, m, b)

Matrix([
[      1.0/(1.0 + exp(-b))],
[  1.0/(exp(-b - m) + 1.0)],
[1.0/(exp(-b - 2*m) + 1.0)],
[1.0/(exp(-b - 3*m) + 1.0)],
[1.0/(exp(-b - 4*m) + 1.0)],
[1.0/(exp(-b - 5*m) + 1.0)]])

In [26]:
f_wb(x_train4, w, b)

Matrix([
[1.0/(exp(-b - 0.5*w_1 - 1.5*w_2) + 1.0)],
[1.0/(exp(-b - 1.0*w_1 - 1.0*w_2) + 1.0)],
[1.0/(exp(-b - 1.5*w_1 - 0.5*w_2) + 1.0)],
[1.0/(exp(-b - 3.0*w_1 - 0.5*w_2) + 1.0)],
[1.0/(exp(-b - 2.0*w_1 - 2.0*w_2) + 1.0)],
[1.0/(exp(-b - 1.0*w_1 - 2.5*w_2) + 1.0)]])

## Cost Function

$$
J\left(\vec{w},b\right) = -\frac{1}{m}\sum_{i=1}^{m}\left(y^{(i)}\log{\left(f_{\vec{w},b}\left(\vec{x}^{(i)}\right)\right)} + \left(1 - y^{(i)}\right)\log{\left(1 - f_{\vec{w},b}\left(\vec{x}^{(i)}\right)\right)}\right)
$$

In [209]:
def compute_cost_loop(X, y, w, b):
    m = X.shape[0]

    cost = 0.0
    for i in range(m):
        f_wb_i = f_wb(X[i,:], w, b)
        cost += -y[i] * sym.log(f_wb_i[0]) - (1 - y[i]) * sym.log(1 - f_wb_i[0])

    cost = cost / m

    return cost

In [381]:
def compute_cost(X, y, w, b):
    m = X.shape[0]

    f = f_wb(X, w, b)
    cost = sym.Rational(1, m) * ((-y.T @ f.applyfunc(sym.log)) -
     (sym.ones(m, 1) - y).T  @ (sym.ones(m, 1) - f).applyfunc(sym.log))
    cost = cost[0, 0]

    return cost

In [386]:
compute_cost_loop(x_train1, y_train1, m, b)

-log(1 - 1.0/(exp(-b - m*x_{11}) + 1.0))/6 - log(1 - 1.0/(exp(-b - m*x_{21}) + 1.0))/6 - log(1 - 1.0/(exp(-b - m*x_{31}) + 1.0))/6 - log(1.0/(exp(-b - m*x_{41}) + 1.0))/6 - log(1.0/(exp(-b - m*x_{51}) + 1.0))/6 - log(1.0/(exp(-b - m*x_{61}) + 1.0))/6

In [387]:
compute_cost(x_train1, y_train1, m, b)

-log(1 - 1.0/(exp(-b - m*x_{11}) + 1.0))/6 - log(1 - 1.0/(exp(-b - m*x_{21}) + 1.0))/6 - log(1 - 1.0/(exp(-b - m*x_{31}) + 1.0))/6 - log(1.0/(exp(-b - m*x_{41}) + 1.0))/6 - log(1.0/(exp(-b - m*x_{51}) + 1.0))/6 - log(1.0/(exp(-b - m*x_{61}) + 1.0))/6

In [259]:
compute_cost_loop(x_train2, y_train2, w, b)

-log(1 - 1.0/(exp(-b - w_1*x_{11} - w_2*x_{12}) + 1.0))/6 - log(1 - 1.0/(exp(-b - w_1*x_{21} - w_2*x_{22}) + 1.0))/6 - log(1 - 1.0/(exp(-b - w_1*x_{31} - w_2*x_{32}) + 1.0))/6 - log(1.0/(exp(-b - w_1*x_{41} - w_2*x_{42}) + 1.0))/6 - log(1.0/(exp(-b - w_1*x_{51} - w_2*x_{52}) + 1.0))/6 - log(1.0/(exp(-b - w_1*x_{61} - w_2*x_{62}) + 1.0))/6

In [388]:
compute_cost(x_train2, y_train2, w, b)

-log(1 - 1.0/(exp(-b - w_1*x_{11} - w_2*x_{12}) + 1.0))/6 - log(1 - 1.0/(exp(-b - w_1*x_{21} - w_2*x_{22}) + 1.0))/6 - log(1 - 1.0/(exp(-b - w_1*x_{31} - w_2*x_{32}) + 1.0))/6 - log(1.0/(exp(-b - w_1*x_{41} - w_2*x_{42}) + 1.0))/6 - log(1.0/(exp(-b - w_1*x_{51} - w_2*x_{52}) + 1.0))/6 - log(1.0/(exp(-b - w_1*x_{61} - w_2*x_{62}) + 1.0))/6

In [277]:
compute_cost_loop(x_train3, y_train3, m, b)

-log(1 - 1.0/(1.0 + exp(-b)))/6 - log(1 - 1.0/(exp(-b - 2*m) + 1.0))/6 - log(1 - 1.0/(exp(-b - m) + 1.0))/6 - log(1.0/(exp(-b - 5*m) + 1.0))/6 - log(1.0/(exp(-b - 4*m) + 1.0))/6 - log(1.0/(exp(-b - 3*m) + 1.0))/6

In [389]:
compute_cost(x_train3, y_train3, m, b)

-log(1 - 1.0/(1.0 + exp(-b)))/6 - log(1 - 1.0/(exp(-b - 2*m) + 1.0))/6 - log(1 - 1.0/(exp(-b - m) + 1.0))/6 - log(1.0/(exp(-b - 5*m) + 1.0))/6 - log(1.0/(exp(-b - 4*m) + 1.0))/6 - log(1.0/(exp(-b - 3*m) + 1.0))/6

In [263]:
compute_cost_loop(x_train4, y_train4, sym.Matrix([1, 1]), -3)

0.366866786405517

In [392]:
compute_cost(x_train4, y_train4, sym.Matrix([1, 1]), -3)

0.366866786405517

In [264]:
compute_cost_loop(x_train4, y_train4, sym.Matrix([1, 1]), -4)

0.503680863674846

In [393]:
compute_cost(x_train4, y_train4, sym.Matrix([1, 1]), -4)

0.503680863674846

## The Gradient

$$
\begin{align}
\frac{\partial}{\partial w_j} &= \frac{1}{m}\sum_{i=1}^{m}\left(f_{\vec{w},b}\left(\vec{x}^{(i)}\right) - y^{(i)}\right)x_j^{(i)} \\
\frac{\partial}{\partial b} &= \frac{1}{m}\sum_{i=1}^{m}\left(f_{\vec{w},b}\left(\vec{x}^{(i)}\right) - y^{(i)}\right)
\end{align}
$$

In [394]:
def compute_gradient_loop(X, y, w, b):
    pass

https://colab.research.google.com/drive/1WfW2UehNdCORcvo9r2OYPGcOlDOQ38MQ