In [1]:
import math
import random
import numpy as np
import pandas as pd
from time import time
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

In [2]:
class LogisticRegression(object):
    def __init__(self, learning_rate=0.0001, max_iter=500000):
        self.learning_rate = learning_rate
        self.max_iter = max_iter

    def train(self, X, y):
        self.w = np.zeros(len(X[0]) + 1)
        iter_nb = 0
        
        while iter_nb <= self.max_iter:
            index = random.randint(0, len(y) - 1)  # TODO: use batch size > 1
            xi = np.append(X[index], [1.0], axis=0)
            yi = y[index]
            w_xi = np.dot(self.w, xi)
            exp_w_xi = math.exp(w_xi)

            for j in range(len(self.w)):
                g_wj = yi * xi[j] - exp_w_xi * xi[j] / (1 + exp_w_xi)
                self.w[j] += self.learning_rate * g_wj
            iter_nb += 1

    def predict_proba(self, X):
        bias = np.zeros(len(X)) + 1.0
        X = np.column_stack((X, bias))
        proba = np.dot(X, self.w)
        return proba

    def predict(self, X):
        proba = self.predict_proba(X)
        y = (proba > 0.5).astype(float)
        return y

    def evaluate(self, X, y):
        proba_predicted = self.predict_proba(X)
        y_predicted = self.predict(X)
        metrics = {
            'auc_roc': roc_auc_score(y, proba_predicted),
            'accuracy': accuracy_score(y, y_predicted)
        }
        return metrics

In [3]:
X_all, y_all = load_breast_cancer(return_X_y=True)
X_all = MinMaxScaler().fit_transform(X_all)
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=23323)

print('Start training...')
lr = LogisticRegression()
time_start = time()
lr.train(X_train, y_train)
print('Training took {} seconds'.format(time() - time_start))

test_metrics = lr.evaluate(X_test, y_test)
print('Test metrics:', test_metrics)

Start training...
Training took 48.961267948150635 seconds
Test metrics: {'auc_roc': 0.9978428351309707, 'accuracy': 0.9736842105263158}
