In [11]:
import numpy as np

class LogisticRegression:
    def __init__(self, lr = 0.01, num_iter = 10000, fit_intercept = True, verbose = False):
        self.lr = lr
        self.num_inter = num_iter
        self.fit_intercept = fit_intercept
        
    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis = 1)
    
    def __sigmoid(self, z):
        return 1/(1 + np.exp(-z))
    
    def __loss(self, h, y):
        return(-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
    
    def fit(self, X, y):
        if(self.fit_intercept):
            X = self.__add_intercept(X)
            
        self.theta = np.zeros(X.shape[1])
        
        for i in range(self.num_inter):
            z = np.dot(X, self.theta)
            h = self.__sigmoid(z)
            gradient = np.dot(X.T, (h - y)) / y.size
            self.theta -= self.lr * gradient
            
            if(self.verbose == True and i % 10000 == 0):
                z = np.dot(X, self.theta)
                h = self.__sigmoid(z)
                print(f'loss: {self.__loss(h, y)} \t')
                
    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__add_intercept(X)
            
        return self.__sigmoid(np.dot(X, self.theta))
    
    def predict(self, X, threshold):
        return self.predict_prob(X) >= threshold

In [12]:
import numpy as np

class SoftmaxRegression:
    def __init__(self, K, lr=0.01, num_iter=10000):
        self.lr = lr
        self.num_inter = num_iter
        self.K = K

    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)

    def __softmax(self, z):
        z -= np.max(z)
        return np.exp(z) / np.sum(np.exp(z), axis=1)

    def __h(self, X, y):
        return self.__softmax(X @ self.theta)

    def __J(self, preds, y, m):
        return np.sum(- np.log(preds[np.arange(m), y]))

    def __T(self, y, K):
        # one hot encoding
        one_hot = np.zeros((len(y), K))
        one_hot[np.arange(len(y)), y] = 1
        return one_hot

    def __compute_gradient(self, theta, X, y, m):
        preds = self.__h(X, theta)
        gradient = 1 / m * X.T @ (preds - self.__T(y, self.K))
        return gradient

    def fit(self, X, y):
        hist = {'loss': [], 'acc': []}
        m, n = X.shape

        for i in range(self.__num.iter):
            gradient = self.__compute_gradient(self.theta, X, y, m)
            self.theta -= self.lr * gradient

            # loss
            preds = self.__h(X, self.theta)
            loss = self.__J(preds, y, m)

            c = 0
            for j in range(len(y)):
                if np.argmax(self.__h(X[j], self.theta)) == y[j]:
                    c += 1
                acc = c / len(y)
                hist['acc'].append(acc)
            # print stats
            if i % 200 == 0: print('{:.2f} {:.2f}%', format(loss, acc * 100))

In [14]:
SoftmaxRegression(3)

<__main__.SoftmaxRegression at 0x1c3fe3c4340>

In [15]:
LogisticRegression()

<__main__.LogisticRegression at 0x1c3fe3c42e0>