# Logistic Regression by Example

In [1]:
#import relavent packages
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [5]:
#Setup or get the data
#x should be 2D. reshape() will convert 1D to 2D array
#-1: as many rows possible. 1: one column 
x = np.arange(10).reshape(-1,1)
y = np.array([0,0,0,0,1,1,1,1,1,1])

In [15]:
#create a model and train it
#liblinear: Library for large linear classification
model = LogisticRegression(solver='liblinear', random_state=0).fit(x,y)

In [16]:
#train the model by fitting it with data to find coefficients
#b0, b1, b2, etc. of a polynomial cost function
#model.fit(x,y)

In [17]:
model.classes_

array([0, 1])

In [18]:
#b0 is the intercept
model.intercept_

array([-1.04608067])

In [19]:
#b1, b2, etc. coefecients 
model.coef_

array([[0.51491375]])

In [20]:
#Evaluate the model. Returns matrix of probabilities
#that predict output is either 0 or 1
model.predict_proba(x)
#in this matrix, each row is one observation
#first column is the probability that output is 0
#second column is the probability that output is 1

array([[0.74002157, 0.25997843],
       [0.62975524, 0.37024476],
       [0.5040632 , 0.4959368 ],
       [0.37785549, 0.62214451],
       [0.26628093, 0.73371907],
       [0.17821501, 0.82178499],
       [0.11472079, 0.88527921],
       [0.07186982, 0.92813018],
       [0.04422513, 0.95577487],
       [0.02690569, 0.97309431]])

In [21]:
#Find actual predictions based on the probability matrix 
model.predict(x)

array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1])

In [22]:
model.score(x,y)
#provides the ratio of # of correct predictions to total # of 
#observations

0.9

In [23]:
#confusion matrix
confusion_matrix(y, model.predict(x))

array([[3, 1],
       [0, 6]], dtype=int64)

# Let's improve the Regression

In [25]:
#lets setup regularization of training dataset. Smaller C means let training 
#data playout but watch for overfitting. Higher C means, dont believe training data
model = LogisticRegression(solver='liblinear', C=10.0, random_state=0)
model.fit(x,y)

In [28]:
#Now collect model parameters. intercept, coefficiant, etc.
model.intercept_

array([-3.51335372])

In [29]:
model.coef_

array([[1.12066084]])

In [30]:
model.predict_proba(x)

array([[0.97106534, 0.02893466],
       [0.9162684 , 0.0837316 ],
       [0.7810904 , 0.2189096 ],
       [0.53777071, 0.46222929],
       [0.27502212, 0.72497788],
       [0.11007743, 0.88992257],
       [0.03876835, 0.96123165],
       [0.01298011, 0.98701989],
       [0.0042697 , 0.9957303 ],
       [0.00139621, 0.99860379]])

In [31]:
model.predict(x)

array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

In [32]:
model.score(x,y)

1.0

In [34]:
confusion_matrix(y, model.predict(x))

array([[4, 0],
       [0, 6]], dtype=int64)