In [20]:
import numpy as np
from tqdm import tqdm
from keras.datasets import mnist

In [21]:
def standardize(data, ax):
    mean = np.sum(data, axis=ax) / len(data)
    variance = np.sum((data - mean) ** 2) / len(data)
    std_deviation = np.sqrt(variance)

    standardized_data = (data - mean) / std_deviation
    return standardized_data

In [22]:
def K_fold(data, K=5):
    fold_size = data.shape[0] // K
    folds = np.zeros([K, fold_size, 785])
    ready_data = np.array(data, dtype=object)
    for i in range(K):
        indices = np.random.choice(ready_data.shape[0], size=fold_size, replace=False)
        work_fold = ready_data[indices]
        folds[i] = np.array(work_fold)
        ready_data = np.delete(ready_data, indices, axis=0)
    folds = np.array(folds)
    return folds

In [23]:
def rand_weights(size):
    return np.random.uniform(-1, 1, size=size)


def sigmoid(x):
    X = [(1 / (1 + np.exp(-z))) for z in x]
    return np.array(X)


class LogisticRegression():
    def __init__(self, learning_rate=0.05, maxIter=1000, error_ratio=0.01, L1=0, batch_size=16, beta_1=0.9, beta_2=0.9,
                 epsilon=0.5):
        self.__epsilon = epsilon
        self.__beta_2 = beta_2
        self.__beta_1 = beta_1
        self.__learning_rate = learning_rate
        self.__maxIter = maxIter
        self.__weigths = None
        self.__bias = 0
        self.__error_ratio = error_ratio
        self.__L1 = L1
        self.__batch_size = batch_size

    def fit(self, X, Y, optimizer="None"):
        sample_size = np.array(X).shape[0]
        n_features = np.array(X).shape[1]
        # self.__weigths = np.zeros(n_features)
        self.__weigths = rand_weights(n_features)

        Error = 1
        if self.__batch_size > 1:
            batches = np.random.choice(len(X), size=[sample_size // self.__batch_size, self.__batch_size],
                                       replace=False)
            batched_data = np.array(X[batches])
            batched_labels = np.array(Y[batches])
        else:
            batched_data = np.array(X).reshape(1, sample_size, n_features)
            batched_labels = np.array(Y).reshape(1, -1)

        m_dw = np.zeros_like(self.__weigths)
        v_dw = np.zeros_like(self.__weigths)
        m_db = 0
        v_db = 0
        t = 0

        for i in range(self.__maxIter):
            epoch_bar = tqdm(total=sample_size // self.__batch_size, desc=f"Epochs {i + 1}/{self.__maxIter}")
            for I in range(len(batched_data)):
                epoch_bar.update(1)
                epoch_bar.set_postfix({'accuracy': f'{1 - Error:.3f}'})

                linear = np.dot(batched_data[I], self.__weigths) + self.__bias
                prediction = sigmoid(linear)
                dw = (1 / self.__batch_size) * np.dot(batched_data[I].T, (prediction - batched_labels[I])) + (
                        self.__L1 / sample_size) * np.sum(np.abs(self.__weigths))
                db = (1 / self.__batch_size) * np.sum(prediction - batched_labels[I])

                t += 1
                if optimizer == "adam":
                    if len(dw.shape)>1:
                        dw=np.sum(dw,axis=1)
                    m_dw = self.__beta_1 * m_dw + (1 - self.__beta_1) * dw
                    m_db = self.__beta_1 * m_db + (1 - self.__beta_1) * db
                    v_dw = self.__beta_2 * v_dw + (1 - self.__beta_2) * (dw ** 2)
                    v_db = self.__beta_2 * v_db + (1 - self.__beta_2) * (db ** 2)

                    # Bias correction
                    v_dw_hat = v_dw / (1 - self.__beta_2 ** t)
                    v_db_hat = v_db / (1 - self.__beta_2 ** t)
                    m_dw_hat = m_dw / (1 - self.__beta_1 ** t)
                    m_db_hat = m_db / (1 - self.__beta_1 ** t)

                    self.__weigths = self.__weigths - self.__learning_rate * m_dw_hat / (
                                np.sqrt(v_dw_hat) + self.__epsilon)
                    self.__bias = self.__bias - self.__learning_rate * m_db_hat / (np.sqrt(v_db_hat) + self.__epsilon)

                elif optimizer == "rms":
                    if len(dw.shape)>1:
                        dw=np.sum(dw,axis=1)
                    v_dw = self.__beta_2 * v_dw + (1 - self.__beta_2) * (dw ** 2)
                    v_db = self.__beta_2 * v_db + (1 - self.__beta_2) * (db ** 2)

                    # Bias correction
                    v_dw_hat = v_dw / (1 - self.__beta_2 ** t)
                    v_db_hat = v_db / (1 - self.__beta_2 ** t)

                    self.__weigths = self.__weigths - self.__learning_rate * dw / (np.sqrt(v_dw_hat) + self.__epsilon)
                    self.__bias = self.__bias - self.__learning_rate * db / (np.sqrt(v_db_hat) + self.__epsilon)

                elif optimizer == "None":
                    if len(dw.shape)>1:
                        dw=np.sum(dw,axis=1)
                    self.__weigths = self.__weigths - self.__learning_rate * dw
                    self.__bias = self.__bias - self.__learning_rate * db

                Error = abs(self._error(prediction, batched_labels[I], self.__L1))
                if self.__error_ratio > Error:
                    break

    def predict(self, X_test):
        linear = np.dot(X_test, self.__weigths) + self.__bias
        Y_predicted = sigmoid(linear)
        class_f = [1 if y > 0.5 else 0 for y in Y_predicted]
        return class_f

    def evaluate(self, X_test, Y_test):
        res = self.predict(X_test)
        acc = 0
        for i in range(len(res)):
            if res[i] == Y_test[i]:
                acc += 1
        return acc / len(Y_test)

    def _error(self, H, Y, L1=0):
        er = np.mean(((Y * np.log(H)) + ((1 - Y) * np.log(1 - H))) + (L1 / len(H)) * np.sum(np.abs(self.__weigths)))
        return er

    def get_weights(self):
        return self.__weigths

    def set_weights(self, weigths):
        self.__weigths = weigths

In [24]:
def model(x_train, y_train, x_test, y_test, l_r, L1):
    model = LogisticRegression(learning_rate=l_r, maxIter=10, L1=L1, error_ratio=0.0001)
    model.fit(x_train, y_train, optimizer="rsm")
    acc = model.evaluate(x_test, y_test)
    return acc, model

In [25]:
(train_X, train_y), (test_X, test_y) = mnist.load_data()

In [26]:
train_X = np.array(train_X)
train_y = np.array(train_y)
test_X = np.array(test_X)
test_y = np.array(test_y)
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)


In [27]:
train_X = np.append(train_X[np.where(train_y == 0)], train_X[np.where(train_y == 1)], axis=0)
train_y = np.append(train_y[np.where(train_y == 0)], train_y[np.where(train_y == 1)], axis=0).reshape(-1, 1)
test_X = np.append(test_X[np.where(test_y == 0)], test_X[np.where(test_y == 1)], axis=0)
test_y = np.append(test_y[np.where(test_y == 0)], test_y[np.where(test_y == 1)], axis=0).reshape(-1, 1)
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

(12665, 28, 28) (12665, 1) (2115, 28, 28) (2115, 1)


In [28]:
x_train = train_X.reshape(-1, 28 * 28)
x_train = np.array(standardize(x_train, 0))
x_test = test_X.reshape(-1, 28 * 28)
x_test = np.array(standardize(x_test, 0))
print(x_train.shape, x_test.shape)

(12665, 784) (2115, 784)


In [29]:
# train_data = np.append(x_train, train_y, axis=1)
# print(train_data.shape)

In [30]:
# learning_rates = [0.1, 0.01, 0.001, 0.0001]
# validation_accuarcy = []
# test_accuarcy = []
# models = []

In [31]:
# for i in learning_rates:
#     training = K_fold(train_data, 10)
#     for j in range(10):
#         val = training[j]
#         x_val = np.array(val[:, 0:784])
#         y_val = np.array(val[:, 784])
#
#         train = np.delete(training, j, axis=0).reshape(-1, 785)
#         x_train = np.array(train[:, 0:784])
#         y_train = np.array(train[:, 784])
#
#         Acc, Model = model(x_train, y_train, x_val, y_val, i, 0)
#
#         validation_accuarcy.append(Acc)
#         models.append(Model.get_weights())

In [32]:
# Max = max(validation_accuarcy)
# idx = validation_accuarcy.index(Max)
# weights = models[idx]
# learning_rate = learning_rates[(idx // 10)]
# print(Max, idx, learning_rate)

In [33]:
# cls = LogisticRegression(learning_rate=learning_rate)
# cls.set_weights(weights)
# predictions = cls.predict(x_test)
# print(f"acc on test = {accuracy(predictions, test_y)}")

In [None]:
batch_model_1 = LogisticRegression(maxIter=10, batch_size=64)
batch_model_1.fit(x_train, train_y)
print(f"Accuracy = {batch_model_1.evaluate(x_test, test_y)}")


Epochs 1/10:   0%|          | 0/197 [00:00<?, ?it/s][A
Epochs 1/10:   1%|          | 1/197 [00:00<00:00, 1003.42it/s, accuracy=0.000][A
Epochs 1/10:   1%|          | 2/197 [00:00<00:00, 399.86it/s, accuracy=0.276] [A
Epochs 1/10:   2%|▏         | 3/197 [00:00<00:00, 374.80it/s, accuracy=0.185][A
Epochs 1/10:   2%|▏         | 4/197 [00:00<00:00, 306.04it/s, accuracy=0.108][A
Epochs 1/10:   3%|▎         | 5/197 [00:00<00:00, 276.99it/s, accuracy=-0.050][A
Epochs 1/10:   3%|▎         | 6/197 [00:00<00:00, 272.45it/s, accuracy=-0.224][A
Epochs 1/10:   4%|▎         | 7/197 [00:00<00:00, 273.43it/s, accuracy=-0.344][A
Epochs 1/10:   4%|▍         | 8/197 [00:00<00:00, 253.41it/s, accuracy=-0.447][A
Epochs 1/10:   5%|▍         | 9/197 [00:00<00:00, 253.00it/s, accuracy=-0.818][A
Epochs 1/10:   5%|▌         | 10/197 [00:00<00:00, 259.23it/s, accuracy=-0.926][A
Epochs 1/10:   6%|▌         | 11/197 [00:00<00:00, 258.38it/s, accuracy=-1.014][A
Epochs 1/10:   6%|▌         | 12/197 [00:

In [None]:
reg_model_1 = LogisticRegression(maxIter=10, batch_size=1,L1=2)
reg_model_1.fit(x_train, train_y)
print(f"Accuracy = {reg_model_1.evaluate(x_test, test_y)}")

In [None]:
reg_model_2 = LogisticRegression(maxIter=10, batch_size=1,L1=0.9)
reg_model_2.fit(x_train, train_y)
print(f"Accuracy = {reg_model_2.evaluate(x_test, test_y)}")

In [None]:
batch_model_2 = LogisticRegression(maxIter=10, batch_size=32)
batch_model_2.fit(x_train, train_y)
print(f"Accuracy = {batch_model_2.evaluate(x_test, test_y)}")

In [None]:
ADAM_model_2 = LogisticRegression(maxIter=10, batch_size=32)
ADAM_model_2.fit(x_train, train_y,"adam")
print(f"Accuracy = {ADAM_model_2.evaluate(x_test, test_y)}")

In [None]:
RSM_model_2 = LogisticRegression(maxIter=10, batch_size=32)
RSM_model_2.fit(x_train, train_y,"rsm")
print(f"Accuracy = {RSM_model_2.evaluate(x_test, test_y)}")

In [None]:
# the GD model with regylarization accuracy =0.9962174940898345
# the mini batch GD with regularization model accuracy =
# the RMS model accuracy =
# ADMAS model accuracy=
# it is clear that