In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.base import BaseEstimator
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score

In [2]:
cancer = load_breast_cancer()

In [3]:
X_train, y_train = StandardScaler().fit_transform(cancer.data), cancer.target

In [4]:
X_train.shape, y_train.shape

((569, 30), (569,))

In [5]:
def performance(model):
    score = cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy').mean()
    return score

## MyLogisticRegression

In [6]:
class MyLogisticRegression(BaseEstimator):
    def __init__(self, max_iter = 100, learning_rate=0.1):
        self.max_iter = max_iter
        self.learning_rate = learning_rate
        self.w_ = None
    def fit(self, _X, y):
        n = _X.shape[0]
        m = _X.shape[1]
        X = _X.copy()
        X = np.c_[np.ones((n, 1)), X]
        w = np.zeros((m+1, 1))
        for _ in range(self.max_iter):
            for j in range(m+1):
                wj_gradient = 0
                for i in range(n):
                    wji_gradient = 1 / n * X[i, j] * (1 / (1 + np.exp(- w.T.dot(X[i]))) - y[i])
                    wj_gradient += wji_gradient
                w[j, 0] -= self.learning_rate * wj_gradient 
        self.w_ = w
        return self
    def predict(self, _X):
        n = _X.shape[0]
        m = _X.shape[1]
        X = _X.copy()
        X = np.c_[np.ones((n, 1)), X]
        y_score = X.dot(self.w_)
        y_pred = (y_score >= 0).astype(int)
        return y_pred

In [7]:
performance(MyLogisticRegression())

0.9719112596305579

In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
performance(LogisticRegression())

0.975392184164114