# Logistic Regression

Logistic regression is commonly used for binary classification.
First, let us prepare a dataset that has only 2 classes as its labels.

In [None]:
import numpy as np
from sklearn import datasets
from sklearn import model_selection
from sklearn import metrics

import matplotlib.pyplot as plt
%matplotlib inline

We can load the readily available IRIS dataset from scikit-learn which has 3 classes.
We will then remove 1 of the 3 classes to suit our needs.

In [None]:
iris = datasets.load_iris()

In [None]:
dir(iris)

In [None]:
iris.data.shape

In [None]:
iris.target_names

In [None]:
iris.feature_names

In [None]:
np.unique(iris.target)

Remove the data entries with label 2.

In [None]:
idx = iris.target != 2

In [None]:
print(iris.target)

In [None]:
print(idx)

In [None]:
data = iris.data[idx].astype(np.float32)
target=iris.target[idx].astype(np.float32)

The target has only 2 classes, 0 or 1

In [None]:
print(target)

If you plot out all the new datapoints which consists of 2 classes, you can see that the dataset can be well separated linearly.

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(data[:, 0], data[:, 1], c=target, cmap=plt.cm.coolwarm, s=100)
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1]);

In [None]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    data, target, test_size=0.3, random_state=123
)

In [None]:
X_train.shape, y_train.shape

In [None]:
X_test.shape, y_test.shape

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver="liblinear")

In [None]:
model.fit(X_train, y_train)

In [None]:
predictions=model.predict(X_test)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))

In [None]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))

# Binary Classification of Breast Cancer dataset

In [None]:
bcancer = datasets.load_breast_cancer()

In [None]:
dir(bcancer)

In [None]:
bcancer.target_names

In [None]:
bcancer.data.shape,bcancer.target.shape

In [None]:
data = bcancer.data.astype(np.float32)
target = bcancer.target.astype(np.float32)

In [None]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    data, target, test_size=0.3, random_state=123
)

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver="liblinear")

model.fit(X_train,y_train)
predictions=model.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))