In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

def accuracy(y_tst, y_predicted):
    acc = np.sum(y_tst == y_predicted) / len(y_tst)
    return acc

def log_function(sample_size, y_trn, y_predicted):
    log_summation = np.sum(y_trn * np.log(y_predicted)
                           + (1 - y_trn) * np.log(1 - y_predicted))
    cost = - 1 / sample_size * log_summation
    return cost

def sigmoid(linear_function):
    sgm = 1 / (1 + np.exp(-linear_function))
    return sgm

class LogisticRegression:

    def __init__(self, alpha=0.1, n_iter=1000):
        self.alpha = alpha
        self.n_iter = n_iter
        self.weights = None
        self.bias = None
        self.info = []

    def __repr__(self):
        df = pd.DataFrame.from_dict(self.info)
        pd.set_option('display.max_columns', None)
        df.set_index('Iteration', inplace=True)
        return f'\n ---------- \n Training Model Coefficients - ' \
               + f'verify the minimum cost: \n ----------\n {df}'

    def fit(self, x_trn, y_trn):
        n_samples, n_features = x_trn.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        temp_dict = {}
        for iteration in range(self.n_iter):
            linear_model = self.linear_function(x_trn)
            y_predicted = sigmoid(linear_model)
            cost = log_function(n_samples, y_trn, y_predicted)
            residuals = y_predicted - y_trn
            self.gradient_descent(n_samples, x_trn, residuals)
            temp_dict['Iteration'] = iteration

            for i in range(len(self.weights)):
                temp_dict['W' + str(i)] = self.weights[i]

            temp_dict['Bias'] = self.bias
            temp_dict['Cost'] = cost
            self.info.append(temp_dict.copy())

    def linear_function(self, x):
        return np.dot(x, self.weights) + self.bias

    def gradient_descent(self, n_samples, x_trn, residuals):
        weight_derivative = (1 / n_samples) * np.dot(x_trn.T, residuals)
        bias_derivative = (1 / n_samples) * np.sum(residuals)
        self.weights -= self.alpha * weight_derivative
        self.bias -= self.alpha * bias_derivative

    def predict(self, x_tst):
        linear_model = self.linear_function(x_tst)
        sigmoid_function = sigmoid(linear_model)
        y_predicted = [1 if i > 0.5 else 0 for i in sigmoid_function]
        return y_predicted


if __name__ == '__main__':
    bc = datasets.load_breast_cancer()
    X, y = bc.data, bc.target

    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2,
                                                        random_state=1234)

    logistic_classifier = LogisticRegression(alpha=0.00001,
                                             n_iter=1000)
    logistic_classifier.fit(X_train, y_train)
    predictions = logistic_classifier.predict(X_test)
    print(logistic_classifier)
    print("----------\nLR classification accuracy:",
          accuracy(y_test, predictions))


 ---------- 
 Training Model Coefficients - verify the minimum cost: 
 ----------
                  W0        W1        W2        W3            W4            W5  \
Iteration                                                                       
0          0.000007  0.000017  0.000036 -0.000317  1.016919e-07 -2.021637e-08   
1          0.000045  0.000075  0.000283  0.001017  4.027614e-07  1.976795e-07   
2          0.000011  0.000040  0.000054 -0.001343  2.476329e-07 -1.227666e-07   
3          0.000081  0.000142  0.000499  0.001327  7.648824e-07  3.190927e-07   
4          0.000049  0.000110  0.000288 -0.000897  6.278010e-07  1.996203e-08   
...             ...       ...       ...       ...           ...           ...   
995        0.005041  0.008652  0.029695  0.015600  5.112721e-05 -8.071805e-06   
996        0.005044  0.008655  0.029709  0.015603  5.115044e-05 -8.095912e-06   
997        0.005046  0.008659  0.029723  0.015605  5.117365e-05 -8.120030e-06   
998        0.005049  0.00