In [1]:
import numpy as np
from numpy import linalg as LA
import time, random
import matplotlib.pyplot as plt

# <center>Cost Function and Gradient as Matrix Multiplication</center>

We defined our function as $J(\theta)=\frac{1}{m}\sum_{i=1}^{m}-y_i\log{(g(x_{i,*}\theta))}-(1-y_i)\log{(1-g(x_{i,*}\theta))}$, where $g(z)=\frac{1}{1+e^{-z}}$. Rewriting the inside summation

$$
-y_i\log{(g(x_{i,*}\theta))}-(1-y_i)\log{(1-g(x_{i,*}\theta))}=
y_i\log{(1+e^{-x_{i,*}\theta})}-(1-y_i)\log{(e^{-x_{i,*}\theta})}+(1-y_i)\log{(1+e^{-x_{i,*}\theta})}
$$

$$
=(1-y_i)(x_{i,*}\theta)+\log{(1+e^{-x_{i,*}\theta})}
$$

$$
=\log{(1+e^{-x_{i,*}\theta})}+(x_{i,*}\theta)-(y_i)(x_{i,*}\theta)
$$

$$
=\log{(1+e^{-x_{i,*}\theta})}-\ln{(e^{-x_{i,*}\theta})}-(y_i)(x_{i,*}\theta)
$$

$$
=\log{(1+e^{x_{i,*}\theta})}-(y_i)(x_{i,*}\theta)
$$

Hence, a more compact way of expressing the cost function is: 

$$J(\theta)=\frac{1}{m}\sum_{i=1}^{m} \log{(1+e^{x_{i,*}\theta})}-(y_i)(x_{i,*}\theta)$$

$$J(\theta)=-\frac{1}{m}(\tilde{X}\theta)^tY+\frac{1}{m}\sum_{i=1}^{m}\log(1+e^{x_{i,*}\theta})$$


We can now easily compute partial derivatives

$$\frac{\partial J}{\partial\theta_k} = \frac{1}{m}\sum_{i=1}^{m} \frac{x_{ik}e^{x_{i,*}\theta}}{1+e^{x_{i,*}\theta}} - x_{ik}y_i$$

$$ = \frac{1}{m}\sum_{i=1}^{m} x_{ik} \biggl(\frac{1}{1+e^{-x_{i,*}\theta}} - y_i\biggl)$$

$$ = \frac{1}{m}\sum_{i=1}^{m} x_{ik} (g(x_{i,*}\theta) - y_i)$$

$$ = \frac{1}{m}(x_{*,k})^t[g(\tilde{X}\theta)-Y]$$

It follows that

$$\nabla J = \frac{1}{m}(\tilde{X})^t[g(\tilde{X}\theta)-Y]$$

In [6]:
def calculateCost(X, Y, theta):
    return 1/len(X)*(np.log(1+np.exp(X@theta)).sum()-X@theta@Y)

In [7]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def getNewTheta(X,Y, theta, alpha = 1):
    c = sigmoid(X@theta)-Y
    return theta-alpha*X.T@c/len(X)

In [8]:
def runLinearRegression(X,Y, theta, steps, alpha = 1, log = False):
    
    ans = theta        
    stepCount = [0]
    cost = [calculateCost(X, Y, ans)]

    for i in range(steps):
        ans = getNewTheta(X, Y, ans, alpha)
        cost.append(calculateCost(X, Y, ans))
        stepCount.append(i+1)
        
    plt.scatter(stepCount, cost)
    plt.title('Cost vs Steps')
    plt.xlabel('Steps')
    plt.ylabel('Cost')
    
    if log:
        plt.xscale('log')
        plt.yscale('log')
    
    plt.show()
        
    return ans

In [None]:
# have not yet tested if this code actually works since I have not found some sample data fit for logistic regression