In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [2]:
class Logistic_Regression:
    def __init__(self, X, y, degree=1, lambda_=0):
        self.m = X.shape[0]
        self.degree = degree
        self.lambda_ = lambda_
        self.X_data = self.map_feature(self.data_normalize(X))
        self.y_data = y.reshape(-1, 1)
        self.n = self.X_data.shape[1]
        self.theta = np.random.rand(self.n).reshape(-1, 1) - 0.5

    def data_normalize(self, X):
        X_norm = np.zeros_like(X)
        for index in np.arange(X_norm.shape[1]):
            avg = np.mean(X[:, index])
            std = np.std(X[:, index])
            if std != 0:
                X_norm[:, index] = (X[:, index] - avg) / std
            else:
                X_norm[:, index] = X[:, index]
        return X_norm

    def map_feature(self, X):
        out = np.ones_like(X[:, 0]).reshape(-1, 1)
        for i in np.arange(1, self.degree + 1):
            for j in np.arange(0, i + 1):
                z = (np.power(X[:, 0], i - j) * np.power(X[:, 1], j)).reshape(-1, 1)
                out = np.hstack((out, z))
        return out

    def compute_hypothesis(self):
        self.hypothesis = self.sigmoid(np.matmul(self.X_data, self.theta)).reshape(-1, 1)

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def loss(self):
        return self.y_data * np.log(self.hypothesis) + (1 - self.y_data) * np.log(1 - self.hypothesis)

    def cost_function(self):
        return - np.mean(self.loss()) + (1 / self.m) * 2 * self.lambda_ * np.matmul(self.theta.T, self.theta)

    def compute_gradient(self, learning_rate):
        err = self.hypothesis - self.y_data
        delta = np.matmul(self.X_data.T, err)
        tmp_theta0 = self.theta[0, 0]
        self.theta = (1 - learning_rate * self.lambda_ / self.m) * self.theta
        self.theta[0, 0] = tmp_theta0
        self.theta = self.theta - (learning_rate / self.m) * delta

    def fit_data(self, iterations=100, learning_rate=0.1):
        self.compute_hypothesis()
        j = self.cost_function()
        cost = np.array([j])
        for i in np.arange(iterations):
            self.compute_gradient(learning_rate)
            self.compute_hypothesis()
            cost = np.append(cost, self.cost_function())
            if (cost[i] - cost[i +1 ]) < 1e-8: break
        x = np.arange(cost.shape[0])
        px.line(x=x, y=cost, template='plotly_dark').show()
        self.plot_model_and_data()

    def plot_model_and_data(self):
        pos = self.X_data[self.y_data[:, 0] == 1]
        neg = self.X_data[self.y_data[:, 0] == 0]
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=pos[:, 1], y=pos[:, 2], mode='markers'))
        fig.add_trace(go.Scatter(x=neg[:, 1], y=neg[:, 2], mode='markers'))
        if self.X_data.shape[1] <= 3:
            plot_x = np.array([np.min(self.X_data[1, :]), np.max(self.X_data[1, :])])
            plot_y = (-self.theta[1, 0] * plot_x - self.theta[0, 0]) / self.theta[2, 0]
            fig.add_trace(go.Scatter(x=plot_x, y=plot_y))
        else:
            u = np.linspace(-2, 3, num=100)
            v = np.linspace(-2, 3, num=100)
            z = np.zeros((np.size(u), np.size(v)))
            for i in np.arange(0, np.size(u)):
                for j in np.arange(0, np.size(v)):
                    h = np.array([[u[i], v[j]]])
                    g = self.map_feature(h)
                    z[i, j] = np.dot(g, self.theta)
            fig.add_trace(go.Contour(x=u, y=v, z=z, contours_coloring='lines',))
        fig.update_layout(
            template='plotly_dark'
        )
        fig.show()
        

In [3]:
# the first model is determining an applicant’s chance of 
# admission of a university department based on his results on two exams.  

data = np.loadtxt('ex2data1.txt')
X, Y= data[:, 0 : -1], data[:, -1]
model = Logistic_Regression(X, Y)

In [4]:
model.fit_data(iterations=200, learning_rate=1)

In [5]:
# the second model is determining whether a microchip should be accepted or rejected,
# based on the results of two tests

data = np.loadtxt('ex2data2.txt')
X, Y = data[:, 0 : -1], data[:, -1]
model = Logistic_Regression(X, Y, degree=3, lambda_=3)

In [6]:
model.fit_data(iterations=200, learning_rate=1)