In [8]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

In [9]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

K = len(np.unique(y_train)) # Classes
Ntr = x_train.shape[0]
Nte = x_test.shape[0]
Din = 3072 # CIFAR10
# Din = 784 # MINIST

# Normalize pixel values
#x_train, x_test = x_train / 255.0, x_test / 255.0
mean_image = np.mean(x_train, axis=0)
x_train = x_train - mean_image
x_test = x_test - mean_image

y_train = tf.keras.utils.to_categorical(y_train, num_classes=K)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K)

x_train = np.reshape(x_train,(Ntr,Din))
x_test = np.reshape(x_test,(Nte,Din))
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

std=1e-5
H= 200
w1 = std*np.random.randn(Din,H)
w2 = std*np.random.randn(H,K)
b1 = np.zeros(H)
b2 = np.zeros(K)
batch_size = 500

iterations = 300
lr = 1.4e-2
lr_decay=0.999
reg = 5e-6
loss_history = []
train_acc_history = []
val_acc_history = []
seed = 0
rng = np.random.default_rng(seed=seed)


In [10]:

for t in range(iterations):
    indices = np.arange(Ntr)
    rng.shuffle(indices)
    #batch_indices = np.random.choice(Ntr,batch_size)
    x = x_train[indices]
    y = y_train[indices]

    n = int(Ntr/batch_size)
    x_batches = np.array_split(x,n)
    y_batches = np.array_split(y,n)

    for i in range(n):
        h = 1.0/(1.0+np.exp(-(x_batches[i].dot(w1)+b1)))
        y_pred = h.dot(w2)+b2
        loss = 1./batch_size*np.square(y_pred-y_batches[i]).sum()+reg*(np.sum(w2*w2)+np.sum(w1*w1))
        loss_history.append(loss)
        dy_pred = 1./batch_size*2.0*(y_pred-y_batches[i]) #partial derivative of L w.r.t. y_hat
        dw2 = h.T.dot(dy_pred)+reg*w2
        db2 = dy_pred.sum(axis=0)
        dh = dy_pred.dot(w2.T)
        dw1 = x_batches[i].T.dot(dh*h*(1-h))+reg*w1
        db1 = (dh*h*(1-h)).sum(axis=0)
        w1 -= lr*dw1
        w2 -= lr*dw2
        b1 -= lr*db1
        b2 -= lr*db2
        lr *= lr_decay
    print('iteration %d / %d: loss %f' %(t,iterations,loss))
    



    

iteration 0 / 300: loss 0.793045
iteration 1 / 300: loss 0.769099
iteration 2 / 300: loss 0.761735
iteration 3 / 300: loss 0.769830
iteration 4 / 300: loss 0.747253
iteration 5 / 300: loss 0.740631
iteration 6 / 300: loss 0.712433
iteration 7 / 300: loss 0.703670
iteration 8 / 300: loss 0.704660
iteration 9 / 300: loss 0.680686
iteration 10 / 300: loss 0.690268
iteration 11 / 300: loss 0.674384
iteration 12 / 300: loss 0.674291
iteration 13 / 300: loss 0.681925
iteration 14 / 300: loss 0.640128
iteration 15 / 300: loss 0.648779
iteration 16 / 300: loss 0.659229
iteration 17 / 300: loss 0.638731
iteration 18 / 300: loss 0.632524
iteration 19 / 300: loss 0.665128
iteration 20 / 300: loss 0.618925
iteration 21 / 300: loss 0.612948
iteration 22 / 300: loss 0.608820
iteration 23 / 300: loss 0.621480
iteration 24 / 300: loss 0.642806
iteration 25 / 300: loss 0.602547
iteration 26 / 300: loss 0.610290
iteration 27 / 300: loss 0.589837
iteration 28 / 300: loss 0.590453
iteration 29 / 300: loss

In [20]:
indices1 = np.arange(Ntr)
rng.shuffle(indices1)

indices2 = np.arange(Nte)
rng.shuffle(indices2)

x_tr = x_train[indices1]
y_tr = y_train[indices1]

x_te = x_test[indices2]
y_te = y_test[indices2]

n1 = int(Ntr/batch_size)
x_batches1 = np.array_split(x_tr,n1)
y_batches1 = np.array_split(y_tr,n1)

n2 = int(Nte/batch_size)
x_batches2 = np.array_split(x_te,n2)
y_batches2 = np.array_split(y_te,n2)


for i in range(n1):
    #train accuracy,train loss
    h = 1.0/(1.0+np.exp(-(x_batches1[i].dot(w1)+b1)))
    y_pred = h.dot(w2)+b2
    train_acc = 1.0 -1/(9*batch_size)*(np.abs(np.argmax(y_batches1[i],axis=1) - np.argmax(y_pred, axis=1))).sum()
    train_loss = 1/(batch_size)*np.square( y_batches1[i]- y_pred).sum()
    #train_loss = 1.0 -1/(81*Ntr)*np.square(np.argmax(y_batches1[i],axis=1) - np.argmax(y_pred, axis=1)).sum() + reg*(np.sum(w2*w2)+np.sum(w1*w1))
    #train_acc = 1/Ntr*(np.sum(np.argmax(y_pred,axis=1)==np.argmax(y_batches1[i],axis=1)))
    #train_loss = 1./batch_size*np.square(y_pred-y_batches1[i]).sum()+reg*(np.sum(w2*w2)+np.sum(w1*w1))

for i in range(n2):
    #test accuracy, test loss
    h = 1.0/(1.0+np.exp(-(x_batches2[i].dot(w1)+b1)))
    y_pred = h.dot(w2)+b2
    test_acc = 1.0 - 1/(9*batch_size)*(np.abs(np.argmax(y_batches2[i],axis=1) - np.argmax(y_pred, axis=1))).sum()
    test_loss = 1/(batch_size)*np.square(y_batches2[i] - y_pred).sum()

    #test_loss = 1.0 -1/(81*Nte)*np.square(np.argmax(y_batches2[i],axis=1) - np.argmax(y_pred, axis=1)).sum() + reg*(np.sum(w2*w2)+np.sum(w1*w1))
    #test_acc = 1/Nte*(np.sum(np.argmax(y_pred,axis=1)==np.argmax(y_batches2[i],axis=1)))
    #test_loss = 1./batch_size*np.square(y_pred-y_batches2[i]).sum()+reg*(np.sum(w2*w2)+np.sum(w1*w1))



print("train_acc = ", train_acc)
print("train_loss = ", train_loss)
print("test_acc = ", test_acc)
print("test_loss = ", test_loss)



train_acc =  0.868
train_loss =  0.5888579116872218
test_acc =  0.7775555555555556
test_loss =  0.7327051792320263
