In [2]:
import numpy as np
import h5py
import tensorflow as tf
import matplotlib.pyplot as plt
import scipy

In [150]:
def load_data():
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    return x_train, y_train, x_test, y_test

In [137]:
x_train.shape

(60000, 28, 28)

In [138]:
def network_initialization(num_input, num_classes, x_train):
    #number of classes is 10 --> 0-9
    #number of input features are 28 * 28
    #xTw + b ==> x = [1, number of features][number of features, number of class]
    # ==> [1, number of classes] <== b
    w = np.random.rand(num_input, num_classes)
    b = np.random.rand(1, num_classes)
    param = {"weight": w, "bias": b}
    return param

In [127]:
def logloss(y_hat, y):
    eps = np.finfo(float).eps
    predictions = np.clip(y_hat, eps, 1 - eps)
    predictions /= np.sum(predictions)[np.newaxis]
    rows = y.shape[0]
    vsota = np.sum(y * np.log(predictions))
    value = (-1.0 / rows * vsota)
    return value

In [140]:
def oneHot(y):
    m = y.shape[0]
    OH = scipy.sparse.csr_matrix((np.ones(m), (y, np.array(range(m)))))
    OH = np.array(OH.todense()).T
    return OH

In [187]:
def test_preparation(x_train, y_train, y_test):
    hyperparam = {}
    hyperparam["num_epochs"] = 100
    hyperparam["batch_size"] = 64
    hyperparam["num_samples"] = x_train.shape[0]
    hyperparam["mini_BGD"] = False
    hyperparam["num_batch"] = int(num_samples / batch_size)
    hyperparam["learning_rate"] = 0.001
    last_batch = num_samples % batch_size

    y_train = oneHot(y_train)
    y_test = oneHot(y_test)

    loss_epoch_report = 0
    loss_batch_report = 0
    x_train = x_train.reshape((x_train.shape[0], x_train.shape[1] * x_train.shape[2]))
    x_test = x_test.reshape((x_test.shape[0], x_test.shape[1] * x_test.shape[2]))
    return x_train, y_train, y_test, hyperparam

In [144]:

# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import log_loss
# from scipy.special import softmax


param = network_initialization(784, 10, x_train)
def fit_SGD(x_train, y_train, y_test, hyperparam, param):
    loss_epoch_report = 0
    accuracy = 0
    learning_rate = hyperparam["learning_rate"]
    for epoch in range(hyperparam["num_epochs"]):
        if epoch % 20 == 0:
            learning_rate /= 2
        shuffling = np.arange(hyperparam["num_samples"])
        np.random.seed(0)
        np.random.RandomState(seed=42).shuffle(shuffling)
        x_epoch = x_train[shuffling]
        y_epoch = y_train[shuffling]

        #calculating xTw + b
        mat_mul = np.dot(x_epoch, param["weight"]) + param["bias"]

        #calculating softmax of xTw+b
        mat_mul = mat_mul / np.sum(mat_mul, axis = 1)[:, np.newaxis]
        sm = np.exp(mat_mul) / np.sum(np.exp(mat_mul), axis = 1)[:, np.newaxis]

        #softmax result with probabilities to 0s and 1s
        softmax_out = np.zeros_like(sm)
        softmax_out[np.arange(len(sm)), sm.argmax(1)] = 1

        #softmax class output
        y_hat = np.argmax(sm, axis = 1)[:, np.newaxis]

        #class output of true labels
        y_label = np.argmax(y_epoch, axis = 1)[:, np.newaxis]

        #calculating accuracy of the model
        accuracy = (y_hat == y_label).all(axis = 1).mean()

        #calling logloss function
        loss_epoch_report = logloss(y_epoch, softmax_out)

        #calculating gradient of weight and bias
        weight_grad = np.dot(x_epoch.T, softmax_out - y_epoch)
        bias_grad = -(softmax_out - y_epoch)

        #updating parameters
        param["weight"] =  param["weight"] - learning_rate * weight_grad
        param["bias"] = param["bias"] - (learning_rate * bias_grad)

        #report so far
        print("accuracy is: ", accuracy)
        print("loss for epoch ", epoch, " is: ", loss_epoch_report)
        loss_epoch_report = 0
        accuracy = 0
    return param

In [190]:
#reshape x_train
def fit_mini_BGD(param, hyperparam, x_train, y_train):
    batch_size = hyperparam["batch_size"]
    learning_rate = hyperparam["learning_rate"]
    num_samples = hyperparam["num_samples"]
    num_batch = hyperparam["num_batch"]
    last_batch = num_samples % batch_size
    loss_epoch_report = 0
    loss_batch_report = 0
    batch_gradient_descent = hyperparam["mini_BGD"]
    
    for epoch in range(hyperparam["num_epochs"]):
        shuffling = np.arange(num_samples)
        np.random.shuffle(shuffling)
        
        for batch in range(num_batch):
            x_batch = x_train[shuffling][
                batch * batch_size: (batch + 1) * batch_size]
            y_batch = y_train[shuffling][
                batch * batch_size: (batch + 1) * batch_size]
            
            #mini batch calculations
            dw, db, batch_loss, acc = mini_BGD(
                param, x_batch, y_batch, batch_size)
            loss_batch_report += (batch_loss * (batch + 1)/(batch + 2))
            loss_epoch_report += (batch_loss * (batch + 1)/(batch + 2))
            param["weight"] = param["weight"] - learning_rate * dw
            param["bias"] = param["bias"] - learning_rate * db
            if batch % 50 == 0:
                print("loss for epoch ", epoch, " and batch ", batch, " is: ",
                     loss_batch_report/((batch+1) * batch_size))
                print("and accuracy is: ", acc)
                loss_batch_report = 0

        print("loss for epoch ", epoch, " is: ", loss_epoch_report)
        loss_epoch_report = 0
    
    return param

In [180]:
def mini_BGD(param, x_batch, y_batch, batch_size):
    loss = 0
    
    #calculating xTw + b
    mat_mul = np.dot(x_batch, param["weight"])

    #calculating softmax of xTw+b
    mat_mul = mat_mul / np.sum(mat_mul, axis = 1)[:, np.newaxis]
    sm = np.exp(mat_mul) / np.sum(np.exp(mat_mul), axis = 1)[:, np.newaxis]

    #softmax result with probabilities to 0s and 1s
    softmax_out = np.zeros_like(sm)
    softmax_out[np.arange(len(sm)), sm.argmax(1)] = 1

    #softmax class output
    y_hat = np.argmax(sm, axis = 1)[:, np.newaxis]

    #class output of true labels
    y_label = np.argmax(y_batch, axis = 1)[:, np.newaxis]

    #calculating accuracy of the model
    accuracy = (y_hat == y_label).all(axis = 1).mean()

    #calling logloss function
    loss = logloss(y_batch, softmax_out)

    #calculating gradient of weight and bias
    weight_grad = np.dot(x_batch.T, softmax_out - y_batch)
    bias_grad = -(softmax_out - y_epoch)
    
    return weight_grad, bias_grad, loss, accuracy
    

In [167]:
def testing(x_test, y_test, param):
    #calculating xTw + b
    mat_mul = np.dot(x_test, param["weight"])

    #calculating softmax of xTw+b
    mat_mul = mat_mul / np.sum(mat_mul, axis = 1)[:, np.newaxis]
    sm = np.exp(mat_mul) / np.sum(np.exp(mat_mul), axis = 1)[:, np.newaxis]    
    
    #softmax class output
    y_hat = np.argmax(sm, axis = 1)[:, np.newaxis]

    #class output of true labels
    y_label = np.argmax(y_test, axis = 1)[:, np.newaxis]

    #calculating accuracy of the model
    accuracy = (y_hat == y_label).all(axis = 1).mean()
    print(accuracy)

In [153]:
x_train, y_train, x_test, y_test = load_data()
param = network_initialization(784, 10, x_train)
x_train, y_train, y_test, hyperparam = test_preparation(
    x_train, y_train, y_test)
if hyperparam["mini_BGD"] = True:
    param = fit_mini_BGD(param, hyperparam, x_train, y_train)
else:
    param = fit_SGD(x_train, y_train, y_test, hyperparam, param)

testing(x_test, y_test, param)


accuracy is:  0.116
loss for epoch  0  is:  42.86468943718382
accuracy is:  0.37803333333333333
loss for epoch  1  is:  33.42005079412214
accuracy is:  0.21013333333333334
loss for epoch  2  is:  39.47178019815491
accuracy is:  0.4733
loss for epoch  3  is:  29.986292081252262
accuracy is:  0.3568
loss for epoch  4  is:  34.185377701084384
accuracy is:  0.47153333333333336
loss for epoch  5  is:  30.049969202239687
accuracy is:  0.39935
loss for epoch  6  is:  32.651720249377455
accuracy is:  0.39895
loss for epoch  7  is:  32.66613771073311
accuracy is:  0.5555333333333333
loss for epoch  8  is:  27.022302317553837
accuracy is:  0.5950166666666666
loss for epoch  9  is:  25.59917873624021
accuracy is:  0.691
loss for epoch  10  is:  22.139588738441436
accuracy is:  0.6867666666666666
loss for epoch  11  is:  22.2921735377887
accuracy is:  0.6906
loss for epoch  12  is:  22.154006199797095
accuracy is:  0.7585833333333334
loss for epoch  13  is:  19.703638496893596
accuracy is:  0.7714

(10000, 784) (784, 10)
0.8979


In [126]:
def softmax(z):
#     z -= np.max(z)
    sm = (np.exp(z) / np.sum(np.exp(z),axis=1))
    result = sm.reshape((len(z),1))
    return result

In [139]:
# a = x_train[0].reshape((x_train.shape[1] * x_train.shape[2], 1))
# print(a.shape)
# print(np.dot(a.T, param["weight"]))
# print(y_train[0])
# print(oneHot(y_train)[0])
# print(y_train.shape[0])


In [234]:
print("a")

a


In [24]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
digits = load_digits()
x_train, x_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.25, random_state=0)

In [26]:
y_train.shape

(1347,)

In [51]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
print(x_train.shape, y_train.shape)
y_label = np.argmax(y_train, axis = 1)[:, np.newaxis]
clf = LogisticRegression(random_state=0).fit(x_train, y_label)
clf.predict(x_train[:2, :])

(60000, 784) (60000, 10)


  y = column_or_1d(y, warn=True)


array([5, 0])

In [52]:
clf.predict(x_train)

array([5, 0, 4, ..., 5, 6, 8])

In [54]:
clf.score(x_train, y_label)

0.9324333333333333