In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print('x_train:', x_train.shape)
K = len(np.unique(y_train)) # Return the unique elements of a tratining output set and take it length as Classes
Ntr = x_train.shape[0] # number of training examples
Nte = x_test.shape[0] # number of testing examples
Din = 3072 # By CIFAR10 data set with 32 x 32 x 3 color images

x_train = x_train[range(Ntr), :]
x_test = x_test[range(Nte), :]
y_train = y_train[range(Ntr)]
y_test = y_test[range(Nte)]


y_train = tf.keras.utils.to_categorical(y_train, num_classes=K) # This function returns a matrix of binary values (either ‘1’ or ‘0’). It has number of rows equal to the length of the input vector and number of columns equal to the number of classes.
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K)
x_train = np.reshape(x_train,(Ntr,Din)).astype('float32') # reshape the data set
x_test = np.reshape(x_test,(Nte,Din)).astype('float32')

#x_train, x_test = x_train / 255.0, x_test / 255.0
mean_image = np.mean(x_train, axis=0)
x_train = x_train - mean_image
x_test = x_test - mean_image

H = 200 # No of hidden nodes
std=1e-6 # standard deviation (sigma)
w1 = std*np.random.randn(Din, H) #Return a sample (or samples) from the “standard normal” distribution.
w2 = std*np.random.randn(H, K)
b1 = np.zeros(H) # creating array of zeros
b2 = np.zeros(K)
print("w1:", w1.shape)
print("w2:", w2.shape)
print("b1:", b1.shape)
print("b2:", b2.shape)

x_train: (50000, 32, 32, 3)
w1: (3072, 200)
w2: (200, 10)
b1: (200,)
b2: (10,)


In [11]:
batch_size = 49990 # for gradient descent optimization batch size is equal to number of training set

iterations = 3 # epochs
lr = 1.4e-2 # the learning rate alpha
lr_decay = 0.999
reg = 5e-6 # the regularization constant
loss_history = []
train_acc_history = []
val_acc_history = []
lr_history = []

for t in range(iterations):
    
    for start in range(0, Ntr, batch_size):
        batch_indices = np.random.choice(Ntr, batch_size)
        x = x_train[batch_indices]
        y = y_train[batch_indices]
        
        #forward
        #----------------------------------------------for train set---------------------------------------------------------------------
        h = 1.0/(1.0 + np.exp(-(x.dot(w1) + b1 ))) # create a activation function (sigmoid function)
        y_pred = h.dot(w2) + b2 # create predictable output
        #----------------------------------------------for test set----------------------------------------------------------------------
        h_t = 1.0/(1.0 + np.exp(-(x_test.dot(w1) + b1 ))) # create a activation function for test data (sigmoid function)
        y_pred_test = h_t.dot(w2) + b2 # create predictable output
        #--------------------------------------------------------------------------------------------------------------------------------
        loss = (1./(y_pred.shape[0]))*np.square(y_pred - y).sum() + reg*(np.sum(w2*w2) + np.sum(w1*w1)) # loss function with regularization term 
        
    
        # compute the accuracy as percentage
        training_acc = 100*(1 - (1/((y_pred.shape[0])*K))*(np.abs(np.argmax(y,axis=1) - np.argmax(y_pred,axis=1))).sum())  
        testing_acc = 100*(1 - (1/((y_pred_test.shape[0])*K))*(np.abs(np.argmax(y_test,axis=1) - np.argmax(y_pred_test,axis=1))).sum())
        
        
        #backward
        dy_pred = 1./batch_size*2.0*(y_pred - y) # partial derivatives w.r.t y_predicted
        dw2 = h.T.dot(dy_pred) + reg*w2
        db2 = dy_pred.sum(axis=0)
        dh = dy_pred.dot(w2.T)
        dw1 = x.T.dot(dh*h*(1-h)) + reg*w1
        db1 = (dh*h*(1-h)).sum(axis=0)

        # updating learning parameters
        w1 -= lr*dw1
        w2 -= lr*dw2
        b1 -= lr*db1
        b2 -= lr*db2
        lr *= lr_decay
        
    train_acc_history.append(training_acc)
    val_acc_history.append(testing_acc)
    loss_history.append(loss)
    lr_history.append(lr)
    
    if t % 1 == 0:
            print("iteration %d / %d| loss %f| training accuracy %f| testing accuracy %f" % (t, iterations, loss, training_acc, testing_acc))
        
        
        

iteration 0 / 3| loss 0.779884| training accuracy 77.601320| testing accuracy 77.255000
iteration 1 / 3| loss 0.777098| training accuracy 77.984597| testing accuracy 77.535000
iteration 2 / 3| loss 0.784402| training accuracy 77.596719| testing accuracy 77.137000
