In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn import linear_model
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
class LG_scratch(BaseEstimator, ClassifierMixin, ):
    def __init__(self, alpha=0.1, loop=500, weights=None, l2=0.001, epsilon=1e-4):
        self.alpha = alpha   # step
        self.loop = loop     # Number of iterations
        self.weights = weights # parameters 
        self.l2 = l2           # l2 norm
        self.epsilon = epsilon

    def sigmoid(self, X):            # sigmoid function
        return 1.0 / (1 + np.exp(-X))

    def cost(self, X, y):                   # loss function
        h = self.sigmoid(X.dot(self.weights))       
        c = (-1.0/len(X))*(np.log(h).T.dot(y)+np.log(1-h).T.dot(1-y)) + \
            (self.l2/(2.0*(len(X))))*np.sum(np.square(self.weights[1:]))
        return c


    def fit(self, X, y, stop=False):
        self.weights = np.ones((np.shape(X)[1], 1))
        Y = np.reshape(y, (len(y), 1))            # reshape
        for k in range(self.loop):
            if stop:
                break
            for i in range(len(X)):
                output = self.sigmoid(X[i].dot(self.weights))
                diff = Y[i] - output
                self.weights += self.alpha * 1.0/len(X) * np.reshape(X[i], (len(X[i]), 1)) * diff  # update weights
                error = self.cost(X, y)
                if error < self.epsilon:
                    stop = True
                    break
        return self

    def predict(self, x):
        return [ self.sigmoid(x[i, :].dot(self.weights)) > 0.5 for i in range(len(x))]            # predict function

    def score(self, X, y, **kwargs):
        return accuracy_score(self.predict(X), y)

In [5]:
data = load_breast_cancer()
X, y = data.data, data.target
X_train, x_test, Y_train, y_test = train_test_split(X, y, train_size=0.9, random_state=111)
logistic = linear_model.LogisticRegression()
print('LogisticRegression score: %f' % logistic.fit(X_train, Y_train).score(x_test, y_test))

scratch = LG_scratch()
print('From my scratch score: %f' % scratch.fit(X_train, Y_train).score(x_test, y_test))

  
  # Remove the CWD from sys.path while we load stuff.


LogisticRegression score: 0.929825
From my scratch score: 0.929825
