In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

K = len(np.unique(y_train)) # Classes
Ntr = x_train.shape[0]
Nte = x_test.shape[0]
Din = 3072 # CIFAR10
# Din = 784 # MINIST

# Normalize pixel values
#x_train, x_test = x_train / 255.0, x_test / 255.0
mean_image = np.mean(x_train, axis=0)
x_train = x_train - mean_image
x_test = x_test - mean_image

y_train = tf.keras.utils.to_categorical(y_train, num_classes=K)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K)

x_train = np.reshape(x_train,(Ntr,Din))
x_test = np.reshape(x_test,(Nte,Din))
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

std=1e-5
H= 200
w1 = std*np.random.randn(Din,H)
w2 = std*np.random.randn(H,K)
b1 = np.zeros(H)
b2 = np.zeros(K)
batch_size = 500

iterations = 300
lr = 1.4e-2
lr_decay=0.999
reg = 5e-6
loss_history = []
train_acc_history = []
val_acc_history = []
seed = 0
rng = np.random.default_rng(seed=seed)


In [3]:

for t in range(iterations):
    indices = np.arange(Ntr)
    rng.shuffle(indices)

    x = x_train[indices]
    y = y_train[indices]

    n = int(Ntr/batch_size)
    x_batches = np.array_split(x,n)
    y_batches = np.array_split(y,n)

    for i in range(n):
        h = 1.0/(1.0+np.exp(-(x_batches[i].dot(w1)+b1)))
        y_pred = h.dot(w2)+b2
        loss = 1./batch_size*np.square(y_pred-y_batches[i]).sum()+reg*(np.sum(w2*w2)+np.sum(w1*w1))
        loss_history.append(loss)
        dy_pred = 1./batch_size*2.0*(y_pred-y_batches[i]) #partial derivative of L w.r.t. y_hat
        dw2 = h.T.dot(dy_pred)+reg*w2
        db2 = dy_pred.sum(axis=0)
        dh = dy_pred.dot(w2.T)
        dw1 = x_batches[i].T.dot(dh*h*(1-h))+reg*w1
        db1 = (dh*h*(1-h)).sum(axis=0)
        w1 -= lr*dw1
        w2 -= lr*dw2
        b1 -= lr*db1
        b2 -= lr*db2
        lr *= lr_decay
    print('iteration %d / %d: loss %f' %(t,iterations,loss))
    



    

iteration 0 / 300: loss 0.797861
iteration 1 / 300: loss 0.766751
iteration 2 / 300: loss 0.762217
iteration 3 / 300: loss 0.770548
iteration 4 / 300: loss 0.745263
iteration 5 / 300: loss 0.739556
iteration 6 / 300: loss 0.713454
iteration 7 / 300: loss 0.705209
iteration 8 / 300: loss 0.709619
iteration 9 / 300: loss 0.681471
iteration 10 / 300: loss 0.691875
iteration 11 / 300: loss 0.676296
iteration 12 / 300: loss 0.675843
iteration 13 / 300: loss 0.674922
iteration 14 / 300: loss 0.641563
iteration 15 / 300: loss 0.652052
iteration 16 / 300: loss 0.669520
iteration 17 / 300: loss 0.628202
iteration 18 / 300: loss 0.635810
iteration 19 / 300: loss 0.659641
iteration 20 / 300: loss 0.614135
iteration 21 / 300: loss 0.609187
iteration 22 / 300: loss 0.604186
iteration 23 / 300: loss 0.616057
iteration 24 / 300: loss 0.653306
iteration 25 / 300: loss 0.597200
iteration 26 / 300: loss 0.604123
iteration 27 / 300: loss 0.590445
iteration 28 / 300: loss 0.591348
iteration 29 / 300: loss

In [5]:
indices1 = np.arange(Ntr)
rng.shuffle(indices1)

indices2 = np.arange(Nte)
rng.shuffle(indices2)

x_tr = x_train[indices1]
y_tr = y_train[indices1]

x_te = x_test[indices2]
y_te = y_test[indices2]

n = int(Ntr/batch_size)
x_batches1 = np.array_split(x_tr,n)
y_batches1 = np.array_split(y_tr,n)

x_batches2 = np.array_split(x_te,n)
y_batches2 = np.array_split(y_te,n)


for i in range(n):
    #train accuracy,train loss
    h = 1.0/(1.0+np.exp(-(x_batches1[i].dot(w1)+b1)))
    y_pred = h.dot(w2)+b2
    train_acc = 1.0 -1/(9*Ntr)*(np.abs(np.argmax(y_batches1[i],axis=1) - np.argmax(y_pred, axis=1))).sum()
    #train_acc = 1/Ntr*(np.sum(np.argmax(y_pred,axis=1)==np.argmax(y_batches1[i],axis=1)))
    train_loss = 1./batch_size*np.square(y_pred-y_batches1[i]).sum()+reg*(np.sum(w2*w2)+np.sum(w1*w1))

    #test accuracy, test loss
    h = 1.0/(1.0+np.exp(-(x_batches2[i].dot(w1)+b1)))
    y_pred = h.dot(w2)+b2
    test_acc = 1.0 - 1/(9*Nte)*(np.abs(np.argmax(y_batches2[i],axis=1) - np.argmax(y_pred, axis=1))).sum()
    #test_acc = 1/Nte*(np.sum(np.argmax(y_pred,axis=1)==np.argmax(y_batches2[i],axis=1)))
    test_loss = 1./batch_size*np.square(y_pred-y_batches2[i]).sum()+reg*(np.sum(w2*w2)+np.sum(w1*w1))

print("train_acc = ", train_acc)
print("train_loss = ", train_loss)
print("test_acc = ", test_acc)
print("test_loss = ", test_loss)




train_acc =  0.9985933333333333
train_loss =  0.5843544134257277
test_acc =  0.9981444444444444
test_loss =  0.13853067579905712
