# 1. Linear Classifier

In [104]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print('x_train:', x_train.shape)

K = len(np.unique(y_train)) # Classes

Ntr = x_train.shape[0]
Nte = x_test.shape[0]
Din = 3072 # CIFAR10
# Din = 784 # MINIST

# Normalize pixel values
x_train, x_test = x_train / 255.0, x_test / 255.0
mean_image = np.mean(x_train, axis=0)
x_train = x_train - mean_image
x_test = x_test - mean_image

y_train = tf.keras.utils.to_categorical(y_train, num_classes=K)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K)

x_train = np.reshape(x_train,(Ntr,Din))
x_test = np.reshape(x_test,(Nte,Din))
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

std=1e-5
w1 = std*np.random.randn(Din, K)
b1 = np.zeros((1,K))
print("w1:", w1.shape)
print("b1:", b1.shape)
batch_size = Ntr

iterations = 300
lr = 0.014
lr_decay = 0.999
reg = 5e-6
loss_history = []
train_acc_history = []
val_acc_history = []
seed = 0
rng = np.random.default_rng(seed=seed)

for t in range(iterations):
    indices = np.arange(Ntr)
    rng.shuffle(indices)
    
    # Forward pass
    X = x_train[indices]
    Y = y_train[indices]
    
    y_p = np.matmul(X,w1)+b1
    dy = y_p - Y
    loss = (1/batch_size)*np.sum(np.square(dy)) + np.sum(w1**2)*reg
    loss_history.append(loss)
    if not(t%10):print('t :',t,'---------- loss',loss)
    
    # Backward pass
    dw = np.matmul(X.T,2*dy)/batch_size  
    db = np.sum(2*dy,axis=0)/batch_size  

    # Perform Gradient Descent
    w1 -= dw*lr
    b1 -= db*lr
    lr *= lr_decay
# Printing accuracies and displaying w as images

x_train: (50000, 32, 32, 3)
w1: (3072, 10)
b1: (1, 10)
t : 0 ---------- loss 1.0000412910238619
t : 10 ---------- loss 0.876636905454685
t : 20 ---------- loss 0.8425399261678642
t : 30 ---------- loss 0.8236159816614267
t : 40 ---------- loss 0.8123886621188027
t : 50 ---------- loss 0.8054259349351952
t : 60 ---------- loss 0.8009055721657667
t : 70 ---------- loss 0.7978212855921176
t : 80 ---------- loss 0.7956050261531737
t : 90 ---------- loss 0.7939300752120351
t : 100 ---------- loss 0.7926049121525146
t : 110 ---------- loss 0.7915148984484983
t : 120 ---------- loss 0.7905897606411252
t : 130 ---------- loss 0.7897852235860183
t : 140 ---------- loss 0.7890725083544458
t : 150 ---------- loss 0.7884322524252738
t : 160 ---------- loss 0.7878509436404073
t : 170 ---------- loss 0.7873187982945276
t : 180 ---------- loss 0.7868284778916006
t : 190 ---------- loss 0.78637429855885
t : 200 ---------- loss 0.7859517335043814
t : 210 ---------- loss 0.7855570922393196
t : 220 -----

In [105]:
# Train Accuracy

y_p_in = np.argmax(y_p,axis=1)
y_class = np.argmax(Y,axis=1)
print('Train accuracy :',np.sum(y_p_in==y_class)*100/y_class.size)

Train accuracy : 41.952


In [106]:
# Prediction

y_pred = np.matmul(x_test,w1)+b1

print("Learning rate:", lr)
print("Training set loss:", loss_history[-1])

loss_test = (1/Nte)*np.sum(np.square(y_pred - y_test)) + np.sum(w1**2)*reg
print("Test set loss:", loss_test)

y_pred_in = np.argmax(y_pred,axis=1)
y_test_class = np.argmax(y_test,axis=1)

print("Test Accuracy:", np.sum(y_pred_in==y_test_class)*100/y_test_class.size)

Learning rate: 0.010369898450185398
Training set loss: 0.7829008427980855
Test set loss: 0.7876065250735699
Test Accuracy: 40.57


# 2. 2-Layer Neural Network

In [152]:
def sigmoid(z):
  return 1/(1+np.exp(-z))

In [161]:
H = 200
w_1 = std*np.random.randn(Din, H)
b_1 = np.zeros((1,H))
w_2 = std*np.random.randn(H, K)
b_2 = np.zeros((1,K))

print("w1:", w_1.shape)
print("b1:", b_1.shape)
print("w2:", w_2.shape)
print("b2:", b_2.shape)

iterations = 300
lr = 0.014
lr_decay = 0.999
reg = 5e-6
loss_history_nn = []

w1: (3072, 200)
b1: (1, 200)
w2: (200, 10)
b2: (1, 10)


In [155]:
for t in range(iterations):
    indices = np.arange(Ntr)
    rng.shuffle(indices)

    # Forward pass

    Xi = x_train[indices]
    Yi = y_train[indices]
    
    ac = sigmoid(np.matmul(Xi,w_1)+b_1)
    y_nn = np.matmul(ac,w_2)+b_2
    dy = y_nn - Yi
    loss = (1/batch_size)*np.sum(np.square(dy)) + np.sum(w_1**2)*reg
    loss_history_nn.append(loss)
    if not(t%10):print('t :',t,'---------- loss',loss)
    
    # Backward pass
    dac = np.matmul(2*dy,w_2.T)/batch_size
    dw1 = np.matmul(Xi.T,dac*ac*(1-ac))/batch_size + 2*w_1*reg
    db1 = np.sum(dac*ac*(1-ac),axis=0)/batch_size  

    dw2 = np.matmul(ac.T,2*dy)/batch_size + 2*w_2*reg
    db2 = np.sum(2*dy,axis=0)/batch_size  

    # Perform Gradient Descent
    w_1 -= dw1*lr
    b_1 -= db1*lr
    w_2 -= dw2*lr
    b_2 -= db2*lr
    lr *= lr_decay    

t : 0 ---------- loss 0.9999533676809134
t : 10 ---------- loss 0.899999996033859
t : 20 ---------- loss 0.8999999955213841
t : 30 ---------- loss 0.8999999950141696
t : 40 ---------- loss 0.8999999945120051
t : 50 ---------- loss 0.8999999940148401
t : 60 ---------- loss 0.8999999935226263
t : 70 ---------- loss 0.8999999930353132
t : 80 ---------- loss 0.8999999925528522
t : 90 ---------- loss 0.8999999920751947
t : 100 ---------- loss 0.8999999916022934
t : 110 ---------- loss 0.8999999911341006
t : 120 ---------- loss 0.8999999906705691
t : 130 ---------- loss 0.899999990211653
t : 140 ---------- loss 0.8999999897573061
t : 150 ---------- loss 0.8999999893074826
t : 160 ---------- loss 0.8999999888621376
t : 170 ---------- loss 0.8999999884212269
t : 180 ---------- loss 0.8999999879847057
t : 190 ---------- loss 0.8999999875525305
t : 200 ---------- loss 0.8999999871246576
t : 210 ---------- loss 0.8999999867010456
t : 220 ---------- loss 0.8999999862816503
t : 230 ---------- loss 

In [156]:
# Train Accuracy

y_p_in = np.argmax(y_nn,axis=1)
y_class = np.argmax(Yi,axis=1)
print('Train accuracy :',np.sum(y_p_in==y_class)*100/y_class.size)

Train accuracy : 10.054


In [157]:
# Prediction
ac_t = sigmoid(np.matmul(x_test,w_1)+b_1)
y_predic = np.matmul(ac_t,w_2)+b_2

print("Learning rate:", lr)
print("Training set loss:", loss_history[-1])

loss_test = (1/Nte)*np.sum(np.square(y_predic - y_test))
print("Test set loss:", loss_test)

y_predic_in = np.argmax(y_predic,axis=1)
y_test_class = np.argmax(y_test,axis=1)

print("Test Accuracy:", np.sum(y_predic_in==y_test_class)*100/y_test_class.size)

Learning rate: 0.005433502828270419
Training set loss: 0.8999999823415467
Test set loss: 0.8999999823487368
Test Accuracy: 10.05


# 3. Stochastic Gradient Descent

In [None]:
lr = 0.014
lr_decay = 0.999
reg = 5e-6
loss_history_st = []

batch_size = 500
per_iter = Ntr/batch_size

In [None]:
for t in range(iterations):
    indices = np.arange(Ntr)
    rng.shuffle(indices)

    # Forward pass

    Xi = x_train[indices]
    Yi = y_train[indices]