In [2]:
import numpy as np
import pandas as pd
import time

In [3]:
class LogisticRegression:
    
    def __init__(self, num_classes = 2):
        self.num_classes = num_classes
        
    def sigmoid(self, z):
        sig = 1 / (1 + np.exp(-z))
        return sig
    
    def cost(self, theta, x, y, reg_type, lamb):
        m = len(y)
        sigmoids = self.sigmoid(np.dot(x, theta))
        first_log = np.log(sigmoids)
        second_log = np.log(1 - sigmoids)
        test = -y*first_log - (1 - y)*second_log
        if reg_type == 'L2': 
            J = sum(test)/m + lamb/(2*m) * sum(theta[1:]**2)
        elif reg_type == 'L1':
            J = sum(test)/m + lamb/(2*m) * sum(abs(theta[1:]))
        elif reg_type == 'elastic':
            J = sum(test)/m + lamb/(2*m) * sum(abs(theta[1:])) + + lamb/(2*m) * sum(theta[1:]**2)
    
        grad = 1/m*np.dot(x.T,(sigmoids - y))
        grad[1:] = grad[1:] + lamb/m * theta[1:]
        return J, grad
        
            
    def gradient_descent(self, theta, x, y, reg_type, lamb, alpha, max_iters):
        it = 0
        while it < max_iters:
            J, grad = self.cost(theta, x, y, reg_type, lamb)
            theta = theta - alpha * grad  
            it += 1
        return theta
    
    def fit(self, X_train, Y_train, reg_type = 'L2', lamb = 0, alpha = 0.1, max_iters = 100):
        
        m = X_train.shape[0]
        n = X_train.shape[1]

        self.theta = np.ndarray(shape=(self.num_classes, n + 1), dtype=float) 
        initial_theta = np.zeros(n+1)

        X_train_fix = np.concatenate((np.ones((m, 1)), X_train), axis=1)
        
        start = time.time()
        for c in range(self.num_classes):
            print(f'Trainings done: {c}/{self.num_classes}')
            y_c = np.array([int(y_val == c) for y_val in Y_train])
            theta_new = self.gradient_descent(initial_theta, X_train_fix, y_c, reg_type, lamb, alpha, max_iters)
            theta_new = theta_new.reshape(theta_new.shape[0], 1)
            self.theta[c, :] = np.transpose(theta_new)
            end = time.time()
            if (end - start)/60 < 1:
                print(f'Time elapsed: {end - start:.2f} s')
            else:
                print(f'Time elapsed: {(end - start)/60:.2f} m')
                
    def predict(self, X_test):
        m2 = X_test.shape[0]
        X_test_fix = np.concatenate((np.ones((m2, 1)), X_test), axis=1)
        hyp = self.theta.dot(np.transpose(X_test_fix))
        hyp = np.transpose(hyp)

        y_pred = []
        for i in range(len(hyp)):
            data = list(hyp[i, :])
            most_likely = data.index(max(data))
            y_pred.append(most_likely)
        return(y_pred)
    
    def accuracy(self, y_test, y_pred):
        acc = 0
        for i in range(len(y_pred)):
            if y_pred[i] == Y_test[i]:
                acc += 1
        return acc/len(y_pred)
                    

        


In [11]:
lr = LogisticRegression(num_classes = 10)

from tensorflow.keras.datasets import mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

lr.fit(X_train, Y_train)
y_pred = lr.predict(X_test)
accuracy = lr.accuracy(Y_test, y_pred)

Trainings done: 0/10
Time elapsed: 7.00 s
Trainings done: 1/10
Time elapsed: 14.13 s
Trainings done: 2/10
Time elapsed: 20.98 s
Trainings done: 3/10
Time elapsed: 28.05 s
Trainings done: 4/10
Time elapsed: 34.88 s
Trainings done: 5/10
Time elapsed: 41.84 s
Trainings done: 6/10
Time elapsed: 49.01 s
Trainings done: 7/10
Time elapsed: 56.05 s
Trainings done: 8/10
Time elapsed: 1.05 m
Trainings done: 9/10
Time elapsed: 1.17 m


In [12]:
X_train.shape

(60000, 784)

In [13]:
Y_train.shape

(60000,)

In [14]:
print(accuracy)

0.8575
