In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn import linear_model
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
class LG_scratch(BaseEstimator, ClassifierMixin, ):
    def __init__(self, alpha=0.0005, loop=800, weights=None):
        self.alpha = alpha   # step
        self.loop = loop     # Number of iterations
        self.weights = weights # parameters 

    def sigmoid(self, X):
        return 1.0 / (1 + np.exp(-X))

    def fit(self, X, y):
        self.weights = np.ones((np.shape(X)[1], 1))
        Y = np.reshape(y, (len(y), 1))                           # reshape
        for k in range(self.loop):
                output = self.sigmoid(X.dot(self.weights))
                diff = Y - output
                self.weights += self.alpha * X.T.dot(diff)       # update weights
        return self

    def predict(self, x):                                                           # predict function
        return [self.sigmoid(x[i, :].dot(self.weights)) > 0.5 for i in range(len(x))]

    def score(self, X, y, **kwargs):
        return accuracy_score(self.predict(X), y)

In [5]:
data = load_breast_cancer()
X, y = data.data, data.target
X_train, x_test, Y_train, y_test = train_test_split(X, y, train_size=0.9, random_state=20)
logistic = linear_model.LogisticRegression()
print('LogisticRegression score: %f' % logistic.fit(X_train, Y_train).score(x_test, y_test))

scratch = LG_scratch()
print('From my scratch score: %f' % scratch.fit(X_train, Y_train).score(x_test, y_test))

LogisticRegression score: 0.929825
From my scratch score: 0.947368


  
