# Logistic Regression

Logistic Regression Cost Function

$$ J(\theta) = \frac{1}{m} \sum_{i=1}^{m} [-y^{(i)}\log (h_\theta(x^{(i)})) - (1-y^{(i)})\log (1-h_\theta(x^{(i)})]
$$

Gradient decent


Want $ \min_\theta J(\theta): $
    
Repeat   {
    $$ \theta_j :=  \theta_j  - \alpha \sum_{i=1}^{m} h_\theta(x^{(i)} - y^{(i)}) x_j^{(i)} $$
    (simultaneously update all $\theta_j$)
    
}
    

$$ h_\theta(x) = \frac{1}{1+ e^{-\theta^T{x}} } $$

Vectorized implementation: 
    $$h = g(H\theta)$$

$$J(\theta) = \frac{1}{m} (-y^T \log(h) - (1-y)^T \log(1-h))$$

$$ \theta := \theta - \frac{\alpha}{m} X^T (g(X\theta) - \overrightarrow{y}) $$

In [1]:
import numpy as np
import pandas as pd
import math
from  scipy.optimize import minimize as opt
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data = pd.read_csv('ex2data1.txt', header = None)
data = np.array(data)
X = data[:, [0,1]] 
y = data[:, [2]]

# Sigmoid function

$$ g(z) = \frac{1}{1+ e^{-z}} $$


 $$h_\theta(x)= g(\theta^T x) $$

In [3]:
def g(z):
    sigm = 1.0/(1.0+np.exp(-z))
    return sigm

In [4]:
X = np.insert(X, 0, values=1, axis=1)
theta = np.zeros((X.shape[1],1))

# Cost function and gradient

Logistic Regression Cost Function


$$ J(\theta) = \frac{1}{m} \sum_{i=1}^{m} [-y^{(i)}\log (h_\theta(x^{(i)})) - (1-y^{(i)})\log (1-h_\theta(x^{(i)}))]
$$

Vectorized implementation: 
    $$h = g(H\theta)$$

$$J(\theta) = \frac{1}{m} (-y^T \log(h) - (1-y)^T \log(1-h))$$

In [5]:
in_theta = np.zeros((X.shape[1],1))

In [6]:
def costFunction(theta, x, y):   
    m = len(y)
    h_theta = g(x.dot(theta))
    J = (1.0/m)* (((-y).transpose()).dot(np.log(h_theta)) - (1.0 -y.transpose()).dot(np.log(1.0-h_theta)))
    grad = grad = (1.0/m)* x.transpose().dot(h_theta - y)    
    #return J, grad
    print 'Cost at theta:', str(J[0,0])
    print 'Gradient at theta:','\n', str(grad[0,0]),'\n', str(grad[1,0]),'\n', str(grad[2,0])
 

In [7]:
costFunction(in_theta, X, y)

Cost at theta: 0.69314718056
Gradient at theta: 
-0.1 
-12.0092165893 
-11.2628422055


In [8]:
test_theta= np.array([[-24], [0.2], [0.2]])

In [9]:
costFunction(test_theta, X, y)

Cost at theta: 0.218330193827
Gradient at theta: 
0.0429029948995 
2.56623411551 
2.64679737108


# Optimizing

In [None]:
def CostFunction(theta, x, y):
    m = len(y)
    h_theta = g(x.dot(theta))
    J = (1.0/m)* (((-y).transpose()).dot(np.log(h_theta)) - (1.0 -y.transpose()).dot(np.log(1.0-h_theta)))
    J = np.float64(J)
    return J

In [11]:
 def Gradient(theta, x, y):
    m = len(y)
    n = x.shape[1]
    theta = theta.reshape((n,1))
    h_theta = g(x.dot(theta))
    grad = (1.0/m)* (x.transpose().dot(h_theta - y)) 
    return grad.flatten()   

In [13]:
Result = opt(fun = CostFunction, x0 = in_theta, args = (X, y), method = 'TNC', jac = Gradient, options ={'maxiter':400})
theta = Result.x
print'Cost at theta:',Result.fun, '\n', 'Theta:', Result.x

Cost at theta: 0.203497701589 
Theta: [-25.16131866   0.20623159   0.20147149]


# Evaluating logistic regression

In [15]:
prob = g(np.array([1, 45, 85]).dot(theta))
print 'For a student with score 45 on exam 1\n and score 85 on exam 2 we predict an admission probability of ', prob

For a student with score 45 on exam 1
 and score 85 on exam 2 we predict an admission probability of  0.776290624348


In [21]:
def predict(theta, x):    
    m = X.shape[0]
    p = np.zeros((m,1))
    n = X.shape[1]
    theta = theta.reshape((n,1))
    h_theta = g(X.dot(theta))    
    for i in range(0, h_theta.shape[0]):
        if h_theta[i] > 0.5:
            p[i, 0] = 1
        else:
            p[i, 0] = 0
    return p

In [17]:
p = predict(theta, X)
print 'Train Accuracy:', (y[p == y].size / float(y.size)) * 100.0

Train Accuracy: 89.0


# Regularized logistic regression

In [1]:
import numpy as np
import pandas as pd
import math
from  scipy.optimize import minimize as opt

In [2]:
data = pd.read_csv('ex2data2.txt', header = None)
data = np.array(data)
X = data[:, [0,1]] 
y = data[:, [2]]

In [3]:
x_size = X[:,0].shape[0]

X1 = X[:,0]
X1 = X1.reshape(x_size, 1)

X2 = X[:,1]
X2 = X2.reshape(x_size, 1)

In [4]:
def mapFeature(X1, X2):
    degree = 6
    out = np.ones((X1.shape[0], 1))
    for i in range(1, degree+1):
        for j in range (0, i+1):
            out1 = np.power(X1, (i-j))
            out2 = np.power(X2, j)
            out = np.concatenate((out,out1*out2),axis=1)
       
    return out

In [5]:
X = mapFeature(X1, X2)

# Regularized Cost function and Gradient

Regularized Logistic Regression Cost Function


$$ J(\theta) = \frac{1}{m} \sum_{i=1}^{m} [-y^{(i)}\log (h_\theta(x^{(i)})) - (1-y^{(i)})\log (1-h_\theta(x^{(i)}))] +  \frac{\lambda}{2m}\sum_{j=1}^{n} \theta_j^2
$$

In [6]:
in_theta = np.zeros((X.shape[1],1))

In [24]:
lamb = 10

In [8]:
def g(z):
    sigm = 1.0/(1.0+np.exp(-z))
    return sigm

def CostFunction(theta, x, y):
    m = len(y)
    h_theta = g(x.dot(theta))
    J = (1.0/m)* (((-y).transpose()).dot(np.log(h_theta)) - (1.0 -y.transpose()).dot(np.log(1.0-h_theta)))
    J = np.float64(J)
    return J

def Gradient(theta, x, y):
    m = len(y)
    n = x.shape[1]
    theta = theta.reshape((n,1))
    h_theta = g(x.dot(theta))
    grad = (1.0/m)* (x.transpose().dot(h_theta - y))
    return grad

In [25]:
def costFunctionReg(theta, x, y, lamb):
    m = len(y)
    J = CostFunction(theta, x, y)
    sum_theta = (theta[1:]**2).sum()
    J = J +lamb/(2*m)*sum_theta
    return J

def GradReg(theta, x, y, lamb):
    m = len(y)
    n = x.shape[1]
    theta = theta.reshape((n,1))
    grad = Gradient(theta, X, y)
    grad[1:,:] =grad[1:,:]+(lamb/m)*theta[1:,:]
    return grad

In [26]:
cost = costFunctionReg(in_theta, X, y, lamb)
grad = GradReg(in_theta, X, y, lamb)
print 'Cost at initial theta (zeros):', cost, '\n', 'Gradient at initial theta (zeros) - first five values only:', grad[:5]

Cost at initial theta (zeros): 0.69314718056 
Gradient at initial theta (zeros) - first five values only: [[  8.47457627e-03]
 [  1.87880932e-02]
 [  7.77711864e-05]
 [  5.03446395e-02]
 [  1.15013308e-02]]


In [27]:
test_theta = np.ones((X.shape[1],1))

In [28]:
cost = CostFunction(test_theta, X, y)
grad = Gradient(test_theta, X, y)
print 'Cost at test theta:', cost, '\n', 'Gradient at test theta - first five values only:', grad[:5]

Cost at test theta: 2.020441535 
Gradient at test theta - first five values only: [[ 0.34604507]
 [ 0.07660616]
 [ 0.11004999]
 [ 0.14211702]
 [ 0.00743991]]


In [29]:
Result = opt(fun = costFunctionReg, x0 = in_theta, args = (X, y, lamb), method = 'TNC', jac = GradReg, options ={'maxiter':400})
theta = Result.x
print'Cost at theta:',Result.fun, '\n', 'Theta:', theta

Cost at theta: 0.250236672827 
Theta: [   13.47701351    19.97645483    12.24521409  -136.71859538   -69.17157502
   -47.0636926   -151.0565139   -165.98761607   -86.99502279   -11.52689396
   480.93353632   445.19456552   535.50346536   223.30098962    48.73230863
   262.45927299   460.44954642   524.55822431   304.87257986    83.01116309
    -4.45569691  -558.43698419  -806.53951448 -1189.44213475  -880.87932779
  -670.60722854  -232.15365936   -18.16217856]


In [30]:
def predict(theta, x): 
    m = X.shape[0]
    p = np.zeros((m,1))
    n = X.shape[1]
    theta = theta.reshape((n,1))
    h_theta = g(X.dot(theta))
    for i in range(0, h_theta.shape[0]):
        if h_theta[i] > 0.5:
            p[i, 0] = 1
        else:
            p[i, 0] = 0
    return p

In [31]:
p = predict(theta, X)
print 'Train Accuracy:', (y[p == y].size / float(y.size)) * 100.0

Train Accuracy: 88.1355932203
