In [1]:
# Closed form solution of logistic regression is not found generally and we use gradient descent to find the solution.

# What is closed form solution anyway?

# In our example, closed form will be possible as we have two gaussian distributions with same covariance and different mean.

# What is multivariate gaussian distribution?

# P(Y|X) = (P(X|Y) * P(Y)) / P(X)

# P(Y|X) - posterior,   P(X|Y) - likelihood, P(Y) - prior,   

# Now, if we manipulate baye's rule a little bit for P(Y = 1| X), we get 
# p(y=1|x) = 1/(1 + ( (p(x|y=0)*p(y=0))/(p)(x|y=1)*p(y=1) ))

# looks a lot like logistic regression's sigmoid function, if we compare them we will find

# -(w.T.x + b)  = ln( (p(x|y=0)*p(y=0))/(p)(x|y=1)*p(y=1) )

# we can solve it 

# Covariance and its inverse is symmetric. 

# If covariance is diagonal matrix then its a case of Naive Bayes

# Above method is called LDA since we have same covariances and if we have different covariances then it would be QDA

In [2]:
# Logistic Regression Error - Squared Error
# J = (t - y)^2
# squared error can't be used for logistic regression as it is assumed to be gaussian distributed.
# Mean squared error actually is not taken as random but when errors are normally distributed then cost function comes 
# as mean sqyared error, this is proven 


# But logistic regression error can't be gaussian distributed as target is 0 or 1 and y is a number between 0 and 1.

# So for logistic regression we get something called cross entropy error or log loss. 
# This error function is 0 when there is no error and when there is an error, in that case bigger is the cost of more 
# wrong is the prediction

# J = - ( tlogy + (1-t)log(1-y) ), where t is target (0 or 1) and y = output of logistic function.

# At one point of time, only one term will be useful 
# we can check for the different values of t and y

In [3]:
import numpy as np
import pandas as pd

In [4]:
N = 100
D = 2

In [13]:
# Creating data
X = np.random.randn(N, D)

# center the first 50 points at -2, -2
X[:50, :] = X[:50, :] - 2*np.ones((50, D))

# center the last 50 points at 2, 2
X[50:, :] = X[50:, :] + 2*np.ones((50, D))

# Creating target values
T = np.array([0]*50 + [1]*50)


# Now add the bias term here in X only 
ones = np.ones((N, 1))
Xb = np.concatenate((ones, X), axis=1)

# randomly initialize weights 
w = np.random.randn(D+1)

# calculate the model output 
z = Xb.dot(w)


def sigmoid(z):
    return 1/(1 + np.exp(-z))



y = sigmoid(z)


# calculate the cross entropy error
def cross_entropy(T, y):
    E = 0
    for i in range(len(T)):
        # summing up all the error function
        if T[i] == 1:
            E -= np.log(y[i])
        else:
            E -= np.log(1 - y[i])
            
    return E

# calculate the cross entropy error
print(cross_entropy(T, y))

279.32003320442317


In [14]:
# Let's try with our closed form solution 
w = np.array([0, 4, 4])

# calculate the model output 
z = Xb.dot(w)
y = sigmoid(z)


# calculate the cross entropy error
print(cross_entropy(T, y))

# Cross entropy error in this case is very less as we used closed form solution weights

0.08313117987668509
