## $$\mbox{Logistic Regression with Gradient Descent.}$$


In [None]:
# %load ../../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import random

from scipy.optimize import minimize

from sklearn.preprocessing import PolynomialFeatures

from IPython.display import display, Math, Latex

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_seq_items', None)
 
#%config InlineBackend.figure_formats = {'pdf',}
%matplotlib inline

import seaborn as sns
sns.set_context('notebook')
sns.set_style('white')

In [None]:
def loaddata(file, delimeter):
    data = np.loadtxt(file, delimiter=delimeter)
    print('Dimensions: ',data.shape)
    print(data[1:6,:])
    return(data)

In [None]:
def plotData(data, label_x, label_y, label_pos, label_neg, axes=None):
    ## the data gives the grades for two exams; 
    ## the last entry in the data is a 0 or 1: 
    ##   rejected or accepted for a certain programme
    ## make a scatter plot of the data such that 
    ## a 1 gets a black plus, the 0 a yellow (not too) large dot
    if axes == None:
        axes = plt.gca()
    ## ...

### $$\mbox{Logistic regression.}$$

In [None]:
data = loaddata('data/ex2data1.txt', ',')
data.shape

In [None]:
## X: of the form [1 x1 x2] 
## y: 0's and 1's
## X = ...
## y = ...

In [None]:
plotData(data, 'Exam 1 score', 'Exam 2 score', 'Admitted', 'Not admitted')

#### $$ \mbox{Logistic regression hypothesis:} \hspace{1cm} h_{\theta}(x) = \sigma(\theta^{T}x), ~~~ \sigma(z)=\frac{1}{1+e^{−z}} $$

In [None]:
def sigmoid(z):
    ### return ...
    
## create a picture of this function on [-6, 6]
## xxx = ...
## yyy = ...
## plt...

#### $$ {\mbox{Cost function:}} \hspace{1cm}     J(\theta) = -\frac{1}{m}\big(\,\ln\,(\sigma(X\theta))^Ty+\ln\,(1-\sigma(X\theta))^T(1-y)\big)$$

In [None]:
def costFunction(theta, X, y):
    ## m = ...
    ## J = ...
    if np.isnan(J):
        return(np.inf)
    ## return ...
    

#### $$ \mbox{Partial derivative:}\hspace{1cm}\frac{\partial J(\theta)}{\partial\theta_{j}} = \frac{1}{m} X^T(\sigma(X\theta)-y)$$

Both the vector theta and the gradient should for this problem be (3,1)

In [None]:
def gradient(theta, X, y):
    ## m = ...
    ## grad = ...
    ## return ...

In [None]:
## checking the shapes and values
initial_theta = np.c_[np.zeros(X.shape[1])] 
print('Shape theta: \n', initial_theta.shape)
cost = costFunction(initial_theta, X, y)
grad = gradient(initial_theta, X, y)
print('Initial cost: \n', cost) 
print('Grad: \n', grad.T)
print('Shape grad: \n', grad.shape)

#### $$\mbox{Optimize cost function.}$$

In GradientDescent (below) you have to play with the learning parameter alpha and the number of interations: you cannot tell what good values are. 

Keep the values for costfunction at intermediate steps in order to view the convergence.

If this num_iters must be chosen very large (did the process converge?) you should write an extra loop within the loop


In [None]:
def gradientDescent(X, y, theta, alpha = 0.001, numiters = 5500):
    Jh = np.zeros(numiters)
    for iter in np.arange(numiters):
        ## theta = ...
        ## Jh[...] = ...
    return(theta, Jh)

In [None]:
theta = initial_theta
print('Initial cost: ',costFunction(theta, X, y))
theta , Jh = gradientDescent(X, y, theta)
print('Final cost: ',costFunction(theta, X, y))
print('Values of theta: ',theta[0], theta[1])
## plot the history of the cost function
## put sensible text on axis
## plt ...

How much from the whole data set using the optimized theta values from above has the correct classification?

In [None]:
def predict(theta, X, threshold = 0.5):
## ...

p = predict(theta, X) 
    ## print ...

In [None]:
# Student with Exam 1 score 45 and Exam 2 score 85: will he pass?
## print(...)

In [None]:
plt.scatter(45, 85, s=60, c='r', marker='v', label='(45, 85)')
plotData(data, 'Exam 1 score', 'Exam 2 score', 'Admitted', 'Not admitted')
x1_min, x1_max = X[:,1].min(), X[:,1].max(),
x2_min, x2_max = X[:,2].min(), X[:,2].max(),
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
h = sigmoid(np.c_[np.ones((xx1.ravel().shape[0],1)), xx1.ravel(), xx2.ravel()].dot(theta))
h = h.reshape(xx1.shape)
plt.contour(xx1, xx2, h, [0.5], linewidths=1, colors='b');