In [4]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

def regloss(y_pred,y,w1,w2=None):
    batch_size=y_pred.shape[0]
    loss=(1/batch_size)*(np.square(y-y_pred)).sum()+reg*(np.sum(w1*w1)+np.sum(w1*w1))
    return loss

(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
K = len(np.unique(y_train)) # Return the unique elements of a tratining output set and take it length as Classes
Ntr = x_train.shape[0] # number of training examples
Nte = x_test.shape[0] # number of testing examples
Din = 3072 # By CIFAR10 data set with 32 x 32 x 3 color images

# Normalize pixel values: Image data preprocessing
x_train, x_test = x_train / 255.0, x_test / 255.0
mean_image = np.mean(x_train, axis=0) # axis=0: mean of a column; Mean of each pixel
x_train = x_train - mean_image
x_test = x_test - mean_image

y_train = tf.keras.utils.to_categorical(y_train, num_classes=K) # This function returns a matrix of binary values (either ‘1’ or ‘0’). It has number of rows equal to the length of the input vector and number of columns equal to the number of classes.
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K)

x_train = np.reshape(x_train,(Ntr,Din))
x_test = np.reshape(x_test,(Nte,Din))
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
print('x_train:', x_train.shape)
print('x_test:', x_test.shape)
print('y_train:', y_train.shape)
print('y_test:', y_test.shape)

batch_size = Ntr # for gradient descent optimization batch size is equal to number of training set
iterations = 10 # epochs
lr = 1.4e-2 # the learning rate alpha
lr_decay= 0.999
reg = 5e-6 #the regularization constant - lamda


#Ntr = x_train.shape[0]
#Nte = x_test.shape[0]

loss_history = []
train_acc_history = []
val_acc_history = []

std=1e-5
w1 = std*np.random.randn(Din, K)
b1 = np.zeros(K)

for t in range(iterations):
    batch_indices = np.random.choice(Ntr, batch_size)
    x = x_train[batch_indices]
    y = y_train[batch_indices]

    #forward pass
    y_pred=x.dot(w1)+b1
    y_pred_test=x_test.dot(w1)+b1
    train_loss=regloss(y_pred,y,w1)
    test_loss=regloss(y_pred_test,y_test,w1)
    loss=(1/batch_size)*(np.square(y-y_pred)).sum() + reg*(np.sum(w1*w1))
    loss_history.append(train_loss)
        
        

    if t%1 == 0:
        print('iteration %d / %d: loss %f' % (t, iterations, loss))

    # Backward pass
    dy_pred=(1./batch_size)*2.0*(y_pred-y)
    dw1=x.T.dot(dy_pred)+reg*w1
    db1=dy_pred.sum(axis=0)
    w1-=lr*dw1
    b1-=lr*db1
    lr*=lr_decay
   

x_train: (50000, 3072)
x_test: (10000, 3072)
y_train: (50000, 10)
y_test: (10000, 10)
iteration 0 / 10: loss 0.999996
iteration 1 / 10: loss 0.957596
iteration 2 / 10: loss 0.937864
iteration 3 / 10: loss 0.924301
iteration 4 / 10: loss 0.914502
iteration 5 / 10: loss 0.906159
iteration 6 / 10: loss 0.898421
iteration 7 / 10: loss 0.893817
iteration 8 / 10: loss 0.886756
iteration 9 / 10: loss 0.881152
