In [1]:
# Logistic Regression.
# As binary classifier, outputs the probability that instance is in positive class.

# The "logit" is the log-odds.
# Logit(p) = log(p/(1-p)).
# The "logistic" is found with sigmoid function.
# Sigmoid(t) = 1 / (1 + e^(-t)).
# Prob = sigmoid (XT*theta)
# Prediction = {0 if prob<0.5, else 1}
# Note: Logit ( Sigmoid ( t ) ) == t.

# For a single training instance:
# Cost (theta) = {-log(p) if y=1, -log(1-p) if y=0}
# Examples: y=1, p=0.9, cost = 0.05 ; y=0, p=0.1, cost = 0.05
#           y=1, p=0.1, cost = 1.00 ; y=0, p=0.9, cost = 1.00

# Summed over the training set:
# Cost (theta) = "log loss" = complicated, see Equation 4-17 on page 144.
# No closed form solution.
# Is confex so gradient descent finds global optimum.

# Partial derivative (cost(theta)) w.r.t. weight j
#    = (1/m)sum_over_m(sigmoid(thetaT*xi)-yi)*xji

In [2]:
# Import flower data.
# This is a minimum exploration using one feature to make a linear binary classifier.
# This data could be explored further.
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
list(iris.keys())

['data',
 'target',
 'frame',
 'target_names',
 'DESCR',
 'feature_names',
 'filename']

In [11]:
iris['data'][:5] # five features per flower

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [9]:
X=iris['data'][:,3:]  # use just last feature
y=(iris['target']==2).astype(np.int)   # is it species 2 (out of 3)?
X[:5], y[:5]

(array([[0.2],
        [0.2],
        [0.2],
        [0.2],
        [0.2]]),
 array([0, 0, 0, 0, 0]))

In [13]:
from sklearn.linear_model import LogisticRegression
reg=LogisticRegression()  # default = regularization with L2 penalty
reg.fit(X,y)
# Book used predictions in a loop to find that 1.6 is the decision boundary.
high_value=1.7
low_value=1.5
reg.predict([[high_value],[low_value]])

array([1, 0])