# EX2

## Set-up

In [1]:
import pandas as pd
import numpy as np
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *
from scipy.optimize import minimize

# Init plotly for notebook usage
init_notebook_mode(connected=True)

# Set np print options
np.set_printoptions(suppress=True, formatter={'float_kind':'{:0.4f}'.format})

## Visualise test data

In [2]:
# Read sample CSV
df = pd.read_csv('machine-learning-ex2/ex2/ex2data1.txt', names=['ex1', 'ex2', 'adm'])

admitted = df[(df.adm == 1)]
not_admitted = df[(df.adm == 0)]

def plot_data(admitted, not_admitted):
    # Build data for graph
    admitted_data = np.concatenate((admitted.iloc[:, [0]].values, admitted.iloc[:, [1]].values), axis=1).T
    not_admitted_data = np.concatenate((not_admitted.iloc[:, [0]].values, not_admitted.iloc[:, [1]].values), axis=1).T
    
    # Build scatter plots
    trace0 = Scatter(
        x=admitted_data[0],
        y=admitted_data[1],
        mode='markers',
        name='Admitted',
        marker=dict(
            color='rgb(145,191,219)'
        )
    )
    trace1 = Scatter(
        x=not_admitted_data[0],
        y=not_admitted_data[1],
        mode='markers',
        name='Not Admitted',
        marker=dict(
            color='rgba(255,182,193)'
        )
    )
    data = [trace0, trace1]
    
    #Configure plot apperance
    layout = Layout(
        xaxis=dict(
            title='Exam 1 score'
        ),
        yaxis=dict(
            title='Exam 2 score'
        )
    )
    
    # Plot the graph
    iplot(dict(data=data, layout=layout))

plot_data(admitted, not_admitted)

## Warmup: sigmoid

In [3]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

print(sigmoid(0))
print(sigmoid(np.array([[0]])))
print(sigmoid(np.array([[0, 0], [0, 0]])))

0.5
[[0.5000]]
[[0.5000 0.5000]
 [0.5000 0.5000]]


## Cost & gradient functions

In [4]:
def gradient_function(theta, x, y, alpha=1):
    m = y.size
    h = sigmoid(x@theta.reshape(-1,1))
    grad = (alpha / m) * X.T@(h-y)
    return grad.flatten()

def cost_function(theta, x, y):
    m = y.size
    h = sigmoid(x@theta)
    return 1.0 / m * (-y.T@np.log(h) - (1 - y).T@np.log(1 - h))

orig_X = df.iloc[:, [0, 1]].values
orig_Y = df.iloc[:, [2]].values
x_shape = orig_X.shape

# Add intercept term
X = np.append(np.ones((x_shape[0], 1)), orig_X, axis=1)
# Init fitting param 
initial_theta = np.zeros((x_shape[1] + 1, 1))
cost = cost_function(initial_theta, X, orig_Y)
grad = gradient_function(initial_theta, X, orig_Y)
print('Cost at initial theta (zeros): {}'.format(cost))
print('Expected cost (approx): 0.693')
print('Gradient at initial theta (zeros): {}'.format(grad))
print('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628')

# Second test
test_theta = np.array([[-24], [0.2], [0.2]])
cost = cost_function(test_theta, X, orig_Y)
grad = gradient_function(test_theta, X, orig_Y)
print('Cost at test theta: {}'.format(cost))
print('Expected cost (approx): 0.218')
print('Gradient at test theta: {}'.format(grad))
print('Expected gradients (approx):\n 0.043\n 2.566\n 2.647')

Cost at initial theta (zeros): [[0.6931]]
Expected cost (approx): 0.693
Gradient at initial theta (zeros): [-0.1000 -12.0092 -11.2628]
Expected gradients (approx):
 -0.1000
 -12.0092
 -11.2628
Cost at test theta: [[0.2183]]
Expected cost (approx): 0.218
Gradient at test theta: [0.0429 2.5662 2.6468]
Expected gradients (approx):
 0.043
 2.566
 2.647


### Cost function:

$$
\begin{align*}
& h = g(X\theta)\newline
& J(\theta)  = \frac{1}{m} \cdot \left(-y^{T}\log(h)-(1-y)^{T}\log(1-h)\right)
\end{align*}
$$

### Gradient function
$$
\begin{align*}
& Repeat \; \lbrace \newline
& \; \theta_j := \theta_j - \frac{\alpha}{m} \sum_{i=1}^m (h_\theta(x^{(i)}) - y^{(i)}) x_j^{(i)} \newline & \rbrace
\end{align*}
$$

## Optimize using minimize

In [5]:
xopt = minimize(cost_function, initial_theta, args=(X, orig_Y), 
                method=None, jac=gradient_function, options={'maxiter':400})

cost = xopt.fun
theta = xopt.x

print('Cost at theta found by fminunc: {}'.format(cost))
print('Expected cost (approx): 0.203')
print('Theta: {}'.format(theta))
print('Expected theta (approx):')
print('-25.161\n 0.206\n 0.201')

Cost at theta found by fminunc: 0.2034977015894438
Expected cost (approx): 0.203
Theta: [-25.1613 0.2062 0.2015]
Expected theta (approx):
-25.161
 0.206
 0.201



divide by zero encountered in log



## Predict & accuracy

In [6]:
def predict(theta, x, threshold=0.5):
    return sigmoid(x@theta.reshape(-1,1)) >= threshold

prob = sigmoid(np.array([[1, 45, 85]])@theta)
print('For a student with scores 45 and 85, we predict an admission probability of {}'.format(prob))
print('Expected value: 0.775 +/- 0.002')

# Compute accuracy on our training set
p = predict(theta, X)
acc = (len(np.where(p == orig_Y)[0]) / X.shape[0]) * 100
print('Train Accuracy: {}'.format(acc))
print('Expected accuracy (approx): 89.0')

For a student with scores 45 and 85, we predict an admission probability of [0.7763]
Expected value: 0.775 +/- 0.002
Train Accuracy: 89.0
Expected accuracy (approx): 89.0


## Visualize reg data

In [7]:
# Read sample CSV
df_reg = pd.read_csv('machine-learning-ex2/ex2/ex2data2.txt', names=['micro1', 'micro2', 'y'])

data_a = df_reg[(df_reg.y == 1)]
data_b = df_reg[(df_reg.y == 0)]

def plot_data(a, b):
    # Build data for graph
    a_data = np.concatenate((a.iloc[:, [0]].values, a.iloc[:, [1]].values), axis=1).T
    b_data = np.concatenate((b.iloc[:, [0]].values, b.iloc[:, [1]].values), axis=1).T
    
    # Build scatter plots
    trace0 = Scatter(
        x=a_data[0],
        y=a_data[1],
        mode='markers',
        name='y = 1',
        marker=dict(
            color='rgb(145,191,219)'
        )
    )
    trace1 = Scatter(
        x=b_data[0],
        y=b_data[1],
        mode='markers',
        name='y = 0',
        marker=dict(
            color='rgba(255,182,193)'
        )
    )
    data = [trace0, trace1]
    
    #Configure plot apperance
    layout = Layout(
        xaxis=dict(
            title='Microchip Test 1',
            range=[-1, 1.5]
        ),
        yaxis=dict(
            title='Microchip Test 2',
            range=[-0.8, 1.2]
        )
    )
    
    # Plot the graph
    iplot(dict(data=data, layout=layout))

plot_data(data_a, data_b)

## Feature mapping

In [8]:
from sklearn.preprocessing import PolynomialFeatures

def gradient_function_reg(theta, x, y, lambda_param=1, alpha=1):
    m = y.size
    h = sigmoid(x@theta.reshape(-1,1))
    t = np.r_[[[0]],theta[1:].reshape(-1,1)] # Cheat: looked this one up!
    grad = (alpha / m) * X.T@(h-y) + (lambda_param / m * t)
    return grad.flatten()

def cost_function_reg(theta, x, y, lambda_param=1):
    m = y.size
    h = sigmoid(x@theta)
    t = theta[1:]
    return 1.0 / m * (-y.T@np.log(h) - (1 - y).T@np.log(1 - h)) + (lambda_param / (2 * m) * np.sum(np.power(t, 2)))

# Read data
orig_X = df_reg.iloc[:, [0, 1]].values
orig_Y = df_reg.iloc[:, [2]].values

# Add Polynomial Features
poly = PolynomialFeatures(6)
X = poly.fit_transform(orig_X)

# Init fitting param 
initial_theta = np.zeros((X.shape[1], 1))

# compute and display initial cost and gradient for regularized logistic regression
cost = cost_function_reg(initial_theta, X, orig_Y)
grad = gradient_function_reg(initial_theta, X, orig_Y)

print('Cost at initial theta (zeros): {}'.format(cost))
print('Expected cost (approx): 0.693')
print('Gradient at initial theta (zeros) - first five values only:')
print('{}'.format(grad[:5]))
print('Expected gradients (approx) - first five values only')
print(' 0.0085\n 0.0188\n 0.0001\n 0.0503\n 0.0115')

test_theta = np.ones((X.shape[1], 1))
# compute and display initial cost and gradient for regularized logistic regression
cost = cost_function_reg(test_theta, X, orig_Y, lambda_param=10)
grad = gradient_function_reg(test_theta, X, orig_Y, lambda_param=10)

print('Cost at test theta (with lambda = 10): {}'.format(cost))
print('Expected cost (approx): 3.16')
print('Gradient at test theta - first five values only:')
print('{}'.format(grad[:5]))
print('Expected gradients (approx) - first five values only')
print(' 0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922')


Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working



Cost at initial theta (zeros): [[0.6931]]
Expected cost (approx): 0.693
Gradient at initial theta (zeros) - first five values only:
[0.0085 0.0188 0.0001 0.0503 0.0115]
Expected gradients (approx) - first five values only
 0.0085
 0.0188
 0.0001
 0.0503
 0.0115
Cost at test theta (with lambda = 10): [[3.1645]]
Expected cost (approx): 3.16
Gradient at test theta - first five values only:
[0.3460 0.1614 0.1948 0.2269 0.0922]
Expected gradients (approx) - first five values only
 0.3460
 0.1614
 0.1948
 0.2269
 0.0922


## Optimize

In [9]:
# Init fitting param 
initial_theta = np.zeros((X.shape[1], 1))

xopt = minimize(cost_function_reg, initial_theta, args=(X, orig_Y, 1), 
                method=None, jac=gradient_function_reg, options={'maxiter':400})

cost = xopt.fun
theta = xopt.x

p = predict(theta, X)
acc = (len(np.where(p == orig_Y)[0]) / X.shape[0]) * 100
print('Train Accuracy: {:.1f}'.format(acc))
print('Expected accuracy (with lambda = 1 ): 83.1')

Train Accuracy: 83.1
Expected accuracy (with lambda = 1 ): 83.1
