In [1]:
from sklearn.utils import check_random_state
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

class MyLogisticRegression():
    def __init__(self, random_state=None):
        self.random_state = random_state                     
        generator = check_random_state(self.random_state)    
        self.generator = generator

    def my_generate_dataset(self, n, m, noise=0):
        X = self.generator.rand(n, m)
        X = np.concatenate((np.ones((n, 1)), X), axis=1)
        beta = self.generator.rand(m+1, 1)
        sigmoid = 1/(1 + np.exp(-np.dot(X, beta)))
        Y = sigmoid >= round(sigmoid.mean(),2)
        Y = Y.astype(int)
        binomial_noise = self.generator.binomial(n=1, p=noise, size=(n,1))
        Y += binomial_noise
        Y %= 2
        return X[:, 1:], Y, beta
    
    def my_weights_finder(self, X, Y, epochs=100, threshold=10**(-10), lr=0.01):
        n,m = X.shape
        X = np.concatenate((np.ones((n,1)),X),axis=1)
        beta = np.random.rand(m+1,1)
        cost_history = [np.inf]
        for i in range(epochs):
            raw_logit = np.matmul(X,beta)
            sigmoid = 1/(1+np.exp(-raw_logit))
            previous_cost = cost_history[-1]
            current_cost = (-1/n)*(np.sum(Y*np.log(sigmoid) + (1-Y)*np.log(1-sigmoid)))
            cost_history.append(current_cost)
            batch_gradient = (-1/n)*(np.matmul(X.T, (Y - sigmoid)))
            beta = beta - lr*batch_gradient
            if abs(previous_cost - current_cost) <= threshold:
                print("Threshold reached at epoch: ", i)
                break
        learnt_beta = beta
        return learnt_beta, cost_history
    
    def my_fit_visualizer(self, X, Y, learnt_weights):
        if pd.Series(Y.reshape(-1,)).unique().__len__() == 2:
            if X.shape[1] == 2:
                colorsIdx = {0: 'rgb(0,255,0)', 1: 'rgb(0,0,255)'}
                cols = [colorsIdx[i[0]] for i in Y]
                scatter_trace = go.Scatter(
                    x=X[:, 0],
                    y=X[:, 1],
                    mode='markers',
                    marker=dict(color=cols),
                    name='Data Points'
                    )
                line_trace = go.Scatter(
                    x=X[:, 0],
                    y=-learnt_weights[0][0] / learnt_weights[2][0] - learnt_weights[1][0] * X[:, 0] / learnt_weights[2][0],
                    mode='lines',
                    line=dict(color='red'),
                    name='Fitted Line',
                    )
                layout = go.Layout(
                    # title='Decision Boundary Visualized',
                    xaxis=dict(title='X1'),
                    yaxis=dict(title='X2'),
                    width=800,
                    height=600,
                    showlegend=False
                    )
                figure = go.Figure(data=[scatter_trace, line_trace], layout=layout)
                figure.show()
        
            elif X.shape[1] == 3:
                colorsIdx = {0: 'rgb(0,255,0)', 1: 'rgb(0,0,255)'}
                cols = [colorsIdx[i[0]] for i in Y]
                scatter_trace = go.Scatter3d(
                    x=X[:, 0],
                    y=X[:, 1],
                    z=X[:, 2],
                    mode='markers',
                    marker=dict(color=cols),
                    name='Data Points'
                    )
                x_min, x_max = X[:, 0].min(), X[:, 0].max()
                y_min, y_max = X[:, 1].min(), X[:, 1].max()
                step = 0.02
                xx, yy = np.meshgrid(np.arange(x_min, x_max, step),
                                     np.arange(y_min, y_max, step),)
                Z = -learnt_weights[0][0] / learnt_weights[3][0] - learnt_weights[1][0] * xx.ravel() / learnt_weights[3][0] - learnt_weights[2][0] * yy.ravel() / learnt_weights[3][0]
                Z = Z.reshape(xx.shape)
                decision_boundary_trace = go.Surface(
                    x=np.arange(x_min, x_max, step),
                    y=np.arange(x_min, x_max, step),
                    z=Z,
                    name='Decision Surface',
                    showscale=False
                    )
            
                layout = go.Layout(
                    # title='Decision Boundary Visualized',
                    width=800,
                    height=600,
                    showlegend=False
                    )
                figure = go.Figure(data=[scatter_trace, decision_boundary_trace], layout=layout)
                figure.show()
        else:
            print('Please provide dataset with Binary Labels, received %s classes'%(pd.Series(Y.reshape(-1,)).unique().__len__()))


In [2]:
logreg = MyLogisticRegression(random_state=100)
X, Y, weights = logreg.my_generate_dataset(n=500, m=3, noise=0.1)

In [3]:
print(X.shape, Y.shape, weights.shape)

(500, 3) (500, 1) (4, 1)


In [4]:
learnt_weights, cost_list = logreg.my_weights_finder(X=X, Y=Y, epochs=10000, lr=2)

Threshold reached at epoch:  2011


In [5]:
cost_list[-5:]

[0.36024406350486443,
 0.36024406340325643,
 0.3602440633023592,
 0.36024406320216795,
 0.3602440631026775]

In [6]:
logreg.my_fit_visualizer(X=X, Y=Y, learnt_weights=learnt_weights)