# Logistic Regression

In [4]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import datasets

In [3]:
class LogisticRegression:

    def __init__(self, lr=0.001, n_iters=1000):
        # initialize logistic regression with attributes
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        # set number of samples and features using shape of features
        n_samples, n_features = X.shape
        
        # initialize weights to zeros for each feature
        self.weights = np.zeros(n_features)
        # initialize bias to 0
        self.bias = 0

        # perform gradient descent for dw (weight) and db (bias)
        for _ in range(self.n_iters):
            # calculate predicted values using: dot product of X and weights in addition to bias
            linear_model = np.dot(X, self.weights) + self.bias
            # finalize calculated predicted values by applying sigmoid funciton
            y_predicted = self._sigmoid(linear_model)

            # calculate gradients of weight and bias
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            # update weight and bias for each iteration; the `-=` indicates a loss minimization mechanism
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        # apply sigmoid function to the linear model
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return y_predicted_cls

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
      

In [9]:
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target
print(X[:5])
print()
print(y[:5])

[[1.799e+01 1.038e+01 1.228e+02 1.001e+03 1.184e-01 2.776e-01 3.001e-01
  1.471e-01 2.419e-01 7.871e-02 1.095e+00 9.053e-01 8.589e+00 1.534e+02
  6.399e-03 4.904e-02 5.373e-02 1.587e-02 3.003e-02 6.193e-03 2.538e+01
  1.733e+01 1.846e+02 2.019e+03 1.622e-01 6.656e-01 7.119e-01 2.654e-01
  4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 1.326e+03 8.474e-02 7.864e-02 8.690e-02
  7.017e-02 1.812e-01 5.667e-02 5.435e-01 7.339e-01 3.398e+00 7.408e+01
  5.225e-03 1.308e-02 1.860e-02 1.340e-02 1.389e-02 3.532e-03 2.499e+01
  2.341e+01 1.588e+02 1.956e+03 1.238e-01 1.866e-01 2.416e-01 1.860e-01
  2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 1.203e+03 1.096e-01 1.599e-01 1.974e-01
  1.279e-01 2.069e-01 5.999e-02 7.456e-01 7.869e-01 4.585e+00 9.403e+01
  6.150e-03 4.006e-02 3.832e-02 2.058e-02 2.250e-02 4.571e-03 2.357e+01
  2.553e+01 1.525e+02 1.709e+03 1.444e-01 4.245e-01 4.504e-01 2.430e-01
  3.613e-01 8.758e-02]
 [1.142e+01 2.038e+01 7.758e+01 3.861e+02 1.425e-01 2.839e-01 2.414

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')

X_train shape: (455, 30)
y_train shape: (455,)


In [11]:
def accuracy(y_true, y_pred):
    return np.sum(y_true==y_pred) / len(y_true)

In [15]:
regressor = LogisticRegression()
regressor.fit(X_train, y_train)
predictions = regressor.predict(X_test)

print(f'Regressor accuracy: {accuracy(y_test, predictions)}')

Regressor accuracy: 0.9473684210526315


  return 1 / (1 + np.exp(-x))
