In [52]:
%%time
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split


np.random.seed(0)


class MultiLogisticRegression():
    """
    Logistic Regression for Multi-Class case based on Stanford lections
    http://deeplearning.stanford.edu/tutorial/supervised/SoftmaxRegression/

    ...

    Attributes
    ----------
    theta : matrix(k, n)
        all the parameters of our model
        k - number of classes
        n - number of features
    learning_rate : float
        learning_rate for SGD
    lambda : float
        lambda for weights regularization
    n_iterations : int
        number of iterations for SGD

    Methods
    -------
    fit(x=inputs, y=targets)
        Train model based on X, y
    predict(x=inputs)
        Train model based on X, y
    """

    def __init__(self, theta,
                 learning_rate=0.008,
                 regularization_lambda=0.0005,
                 n_iterations=100000):
        self.theta = theta
        self.learning_rate = learning_rate
        self.regularization_lambda = regularization_lambda
        self.n_iterations = n_iterations

    def _standartize(self, x):
        """
        Z-score Normalization for columns
        #TODO Remove nested loop
        """

        means, stds = np.mean(x, axis=0), np.std(x, axis=0)
        stds[stds == 0] = 0.02
        for i in range(x.shape[1]):
            for j in range(x.shape[0]):
                x[j, i] = (x[j, i] - means[i]) / stds[i]
        return x

    def _softmax(self, theta, x):
        """Softmax cost function"""

        summed_softmaxes = 0
        k = int(x.shape[0])
        for i in range(len(self.theta)):
            summed_softmaxes += np.exp(np.dot(self.theta[i], x.reshape((k, 1))))
        z = np.exp(np.dot(theta, x.reshape((k, 1)))) / float(summed_softmaxes)
        return z

    def _indicator_function(self, y):
        """Indicator function for true label"""
        indicator = [[1 if y[i] == k else 0 for k in range(len(np.unique(y)))] for i in range(len(y))]
        return indicator

    def _gradient(self, x, indicator):
        """Summed gradients for all objects"""
        m = len(x)
        actual_gradient = 0
        summed_gradients_for_all_classes = []
        for j in range(len(self.theta)):
            random_ind = np.random.randint(x.shape[0])
            actual_gradient += x[random_ind] * (indicator[random_ind][j] - self._softmax(self.theta[j], x[random_ind]))
            summed_gradients_for_all_classes.append(self.learning_rate * ((-1 / m) * actual_gradient
                                                                          + self.regularization_lambda * self.theta[j]))
        return summed_gradients_for_all_classes

    def fit(self, x, y):
        """Training process"""
        x = self._standartize(x)
        indicator = self._indicator_function(y)
        for k in range(self.n_iterations):
            self.theta = self.theta - self._gradient(x, indicator)

    def predict(self, x_test):
        """
        Predicted class for inputted Test matrix
        #TODO Remove nested loop
        """
        x_test = self._standartize(x_test)
        prob = np.ones((len(x_test), len(self.theta)))
        for i in range(len(x_test)):
            for j in range(len(self.theta)):
                prob[i, j] = self._softmax(self.theta[j], x_test[i])
        y_pred = []
        for i in range(len(prob)):
            y_pred.append(list(prob[i]).index(max(prob[i])))
        return y_pred


def main():
    # downloading digits dataset
    data = load_digits()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

    # adding constant feature
    z = np.ones((X_train.shape[0], 1))
    X_train = np.hstack((X_train, z))
    z = np.ones((X_test.shape[0], 1))
    X_test = np.hstack((X_test, z))

    # Creating parameters matrix
    num_classes = len(np.unique(y))
    theta = np.ones((num_classes, X_train.shape[1]))
    
    # Training
    clf = MultiLogisticRegression(theta)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    print("Parameters for logistic regression:")
    print(f"learning rate for gradient descent = {clf.learning_rate}")
    print(f"lambda for regularization = {clf.lamda}")
    print(f"number of iterations = {clf.n_iterations}\n")
    
    print("Accuracy score for digits dataset =", sum(y_pred == y_test) / float(len(y_pred)))
    
if __name__ == "__main__":
    main()

Parameters for logistic regression:
learning rate for gradient descent = 0.008000
lambda for regularization = 0.000500
number of iterations = 100000

Accuracy score for digits dataset = 0.962222222222
Wall time: 47.2 s
