# Exercise 2: Logistic Regression 

In [1]:
from sklearn import datasets
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pdb

## 1. Data preparation

In [2]:
cancer = datasets.load_breast_cancer()

In [3]:
X = cancer.data
y = cancer.target
m, n = X.shape
print(m, n)

569 30


In [4]:
# feature scaling
def feature_normalize(X):
    X_norm = X.copy()
    mu = np.zeros(X.shape[1])
    sigma = np.zeros(X.shape[1])
    
    mu = np.mean(X, axis = 0)
    sigma = np.std(X, axis = 0)
    X_norm = (X - mu) / sigma
    
    return X_norm, mu, sigma

In [5]:
X, mu, sigma = feature_normalize(X)
#print('mean:', mu)
#print('standard deviation:', sigma)

In [6]:
# add intercept term to X
X = np.concatenate([np.ones((m, 1)), X], axis=1)

## 2. Sigmoid Function: $g(z) = \frac{1}{1 + e^{-z}}$

In [7]:
def sigmoid(z):
    z = np.array(z)
    g = 1 / (1 + np.exp(-z))
    return g

In [8]:
sigmoid(36)

0.9999999999999998

In [9]:
sigmoid(-709)

1.216780750623423e-308

In [10]:
np.log(0)

  """Entry point for launching an IPython kernel.


-inf

## 3. Cost Function: $J(\theta)=\frac{1}{m} \sum_{i=1}^{m}\left[-y^{(i)} \log \left(h_{\theta}\left(x^{(i)}\right)\right)-\left(1-y^{(i)}\right) \log \left(1-h_{\theta}\left(x^{(i)}\right)\right)\right]$

In [11]:
def cost_func(X, y, theta):
    m = y.size
    z = X.dot(theta) 
    h = sigmoid(z)
    # add 1e-4 to deal with log(0)
    loss = (-y.dot(np.log(h + 1e-4)) - (1 - y).dot(np.log(1 - h + 1e-4))) / m   
    #loss = (-y.dot(np.log(h)) - (1 - y).dot(np.log(1 - h))) / m 
    return loss

In [13]:
# initializing parameters
theta = np.zeros(31)
iters = 1000; 
lr = 1;  
# lr = 50, iters = 500000 to get better result

In [14]:
cost_func(X, y ,theta)

0.6929472005572793

## 3. Gradient Descent: $\theta_{j}=\theta_{j}-\alpha \frac{1}{m} \sum_{i=1}^{m}\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{j}^{(i)}$

In [15]:
# vectorization implementation of gradient descent
def grad_descent(X, y, theta, lr, iters):
    m = len(y)
    X_T = X.T   
    for i in range(iters):
        h = sigmoid(X.dot(theta))
        delta = X_T.dot(h - y)
        theta -= lr*delta / m
        print(cost_func(X, y, theta))
    return theta

In [None]:
learned_theta = grad_descent(X, y, theta, lr, iters)

In [None]:
learned_theta

## 4. Train Accuracy

In [18]:
def predict(X, theta):
    m = X.shape[0]
    p = np.round(sigmoid(X.dot(theta)))
    return p

In [19]:
p = predict(X, learned_theta)
print("Training Accuracy : {:.2f}%".format(np.mean(p == y) * 100))

Training Accuracy : 97.72%


In [None]:
initial_theta = np.zeros(31)
p = predict(X, initial_theta)
print("Training Accuracy : {:.2f}%".format(np.mean(p == y) * 100))