### Machine Learning for Engineers: [LogisticRegression](https://www.apmonitor.com/pds/index.php/Main/LogisticRegression)
- [Logistic Regression](https://www.apmonitor.com/pds/index.php/Main/LogisticRegression)
 - Source Blocks: 7
 - Description: Introduction to Logistic Regression
- [Course Overview](https://apmonitor.com/pds)
- [Course Schedule](https://apmonitor.com/pds/index.php/Main/CourseSchedule)


<img align=left width=400px src='http://apmonitor.com/pds/uploads/Main/logistic_regression.png'>

Logistic Regression with Scikit-Learn for data features `XA` and data labels `yA`.

```python
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(solver='lbfgs')
lr.fit(XA,yA)
yP = lr.predict(XB)
```

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np

from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(max_iter=5000,\
                                solver='lbfgs',\
                                multi_class='auto')

# The digits dataset
digits = datasets.load_digits()
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))

# Split into train and test subsets (50% each)
X_train, X_test, y_train, y_test = train_test_split(
    data, digits.target, test_size=0.5, shuffle=False)

# Learn the digits on the first half of the digits
classifier.fit(X_train, y_train)

# Test on second half of data
n = np.random.randint(int(n_samples/2),n_samples)
print('Predicted: ' + str(classifier.predict(digits.data[n:n+1])[0]))

# Show number
plt.imshow(digits.images[n], cmap=plt.cm.gray_r, interpolation='nearest')
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import seaborn as sns

# Generate blobs dataset
features, label = make_blobs(n_samples=800, centers=2,\
                             n_features=2, random_state=12) 
data = pd.DataFrame()
data['x1'] = features[:,0]
data['x2'] = features[:,1]
data['y']  = label

sns.pairplot(data,hue='y')

In [None]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(data.values,test_size=0.2)

In [None]:
from math import exp
def predict(row, beta):
    x = row[0:2]
    t = beta[0] + beta[1]*x[0] + beta[2]*x[1]
    return 1.0 / (1.0 + exp(-t))

In [None]:
l_rate = 0.3
n_epoch = 100

loss = np.zeros(n_epoch)
beta = [0.0,0.0,0.0]
for epoch in range(n_epoch):
    sum_error = 0
    for row in train:
        x = row[0:-1] # input features
        y = row[-1]   # output label
        p = predict(row, beta)
        error = y - p
        sum_error += error**2
        beta[0] += l_rate * error * p * (1.0 - p)
        beta[1] += l_rate * error * p * (1.0 - p) * x[0]
        beta[2] += l_rate * error * p * (1.0 - p) * x[1]
    loss[epoch] = sum_error

print('Coefficients:',beta)
plt.plot(loss)
plt.xlabel('Epoch')
plt.ylabel('Loss')

In [None]:
yhat = []
for row in test:
    yhat.append(round(predict(row, beta)))

from sklearn.metrics import confusion_matrix
cmat = confusion_matrix(test[:,-1],yhat)
sns.heatmap(cmat,annot=True)