In [None]:
import os
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers
from sklearn import preprocessing
from keras.utils.np_utils import to_categorical

import warnings
warnings.filterwarnings("ignore")

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
train_dataset = np.load('train_dataset_5k_60.npz')
val_dataset = np.load('validation_dataset_5k_40.npz')
test_dataset = np.load('test_dataset.npz')

# =========== Loading Datasets ===============

x_train = train_dataset['x'].reshape(3000, 784).astype("float32") / 255
y_train = train_dataset['y'].astype("float32")
  
x_val = val_dataset['x'].reshape(2000, 784).astype("float32") / 255
y_val = val_dataset['y'].astype("float32")   
                    
x_test = test_dataset['x'].reshape(10010, 784).astype("float32") / 255
y_test = test_dataset['y'].astype("float32")                    


x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape 

((3000, 784), (3000,), (2000, 784), (2000,), (10010, 784), (10010,))

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(784,)),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(10)    # didn't use softmax since it will be called when (logits=true) in below step
])
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
@tf.function
def loss(w1,w2,lamda1, lamda2,loss_fn,y_train,logits): # Lambda
    total_loss = loss_fn(y_train,logits)
    return total_loss + (tf.math.exp(lamda1)*tf.nn.l2_loss(w1) + tf.math.exp(lamda2)*tf.nn.l2_loss(w2))/(2*y_train.shape[0])

In [None]:
wt_layer1_init = model.layers[1].get_weights()
wt_layer2_init = model.layers[2].get_weights()

train_df = tf.data.Dataset.from_tensor_slices((x_train,y_train))
train_df = train_df.shuffle(buffer_size = 1024).batch(64)

def fmin_loss(lamda1, lamda2, l_rate, momentum, epochs= 50, nesterov = True):      # lamda, not exp(lamda), Works with both tf.Variable and tf.constant type lambda input, (or just scalar)
    tf.keras.backend.clear_session()
    optimizer = keras.optimizers.SGD(learning_rate=l_rate,momentum = momentum , nesterov =nesterov )
    total_loss0 = 1e20

    for epoch in range(epochs):
        for step,(x_train,y_train) in enumerate(train_df):
            with tf.GradientTape() as tape:
                logits = model(x_train, training=True)
                w1 = model.layers[1].weights[0]
                w2 = model.layers[2].weights[0]
                total_loss1 = loss(w1,w2,lamda1, lamda2,loss_fn,y_train,logits)
                
            vars_list = model.trainable_weights
            grads = tape.gradient(total_loss1, vars_list)      # for ref  - https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough 
            optimizer.apply_gradients(zip(grads,vars_list))

        total_loss0 = total_loss1

    wt_layer1 = model.layers[1].get_weights()
    wt_layer2 = model.layers[2].get_weights()
    model.layers[1].set_weights(wt_layer1_init)
    model.layers[2].set_weights(wt_layer2_init)

    return [total_loss1, wt_layer1, wt_layer2]

In [None]:
l_rates = np.linspace(0.1,0.9,9)
momentas = np.arange(0.01,0.1,0.01,dtype='float32')

mn_loss = 1e12
glbl_l_rate = None
glbl_momentum = None
for l_rate in l_rates:
  for momentum in momentas:
    # Setting Initial Weights For Every Run
    model.layers[1].set_weights(wt_layer1_init)
    model.layers[2].set_weights(wt_layer2_init)
#     model.layers[1].set_weights(wt_layer1)
#     model.layers[2].set_weights(wt_layer2)
    loss_ , _ , _ = fmin_loss(-5.0, -4.0 , l_rate, momentum )
    print(f'loss = {loss_} , l_rate = {l_rate}, momentum = {momentum}')
    if loss_ < mn_loss:
      glbl_l_rate = l_rate
      glbl_momentum = momentum
      mn_loss = loss_

print('###############################################################')
print('--------------------Result-------------------------------------')
print(f'final_loss = {mn_loss} , l_rate = {glbl_l_rate} , glbl_momentum = {glbl_momentum}')

loss = 0.04412248358130455 , l_rate = 0.1, momentum = 0.009999999776482582
loss = 0.027074240148067474 , l_rate = 0.1, momentum = 0.019999999552965164
loss = 0.02286617085337639 , l_rate = 0.1, momentum = 0.029999999329447746
loss = 0.051924463361501694 , l_rate = 0.1, momentum = 0.03999999910593033
loss = 0.044988904148340225 , l_rate = 0.1, momentum = 0.04999999701976776
loss = 0.04640210047364235 , l_rate = 0.1, momentum = 0.059999994933605194
loss = 0.02805834449827671 , l_rate = 0.1, momentum = 0.07000000029802322
loss = 0.047282151877880096 , l_rate = 0.1, momentum = 0.07999999821186066
loss = 0.03773213177919388 , l_rate = 0.1, momentum = 0.08999999612569809
loss = 0.02652968466281891 , l_rate = 0.2, momentum = 0.009999999776482582
loss = 0.022068822756409645 , l_rate = 0.2, momentum = 0.019999999552965164
loss = 0.020238641649484634 , l_rate = 0.2, momentum = 0.029999999329447746
loss = 0.024280274286866188 , l_rate = 0.2, momentum = 0.03999999910593033
loss = 0.020510910078883

In [None]:
lamdas = np.linspace(-10,0,30, dtype='float32')
lamda_grid = [[i,j] for i in lamdas for j in lamdas]

In [None]:
loss_from_val = 1000000000
loss_from_train = None
loss_from_test = None
corres_lamda = None

for lamda in lamda_grid:
  model.layers[1].set_weights(wt_layer1_init)
  model.layers[2].set_weights(wt_layer2_init)
  
  min_loss,final_wt1,final_wt2 = fmin_loss(lamda[0],lamda[1],glbl_l_rate,glbl_momentum)
  
  model.layers[1].set_weights(final_wt1)
  model.layers[2].set_weights(final_wt2)
  
  val_logits = model(x_val,training=False)
  val_loss = loss_fn(y_val,val_logits)
  train_logits = model(x_train,training=False)
  training_loss = loss_fn(y_train,train_logits)
  
  test_logits = model(x_test,training=False)
  test_loss = loss_fn(y_test,test_logits)
  if val_loss < loss_from_val:
    loss_from_val = val_loss
    loss_from_train = training_loss
    loss_from_test = test_loss
    corres_lamda = lamda
    print(f'min  loss at lamda = {lamda} is train_loss = {training_loss}, val_loss = {val_loss} , test_loss = {test_loss}')

print('###############################################################')
print('--------------------Result-------------------------------------')
print(f'min  loss at lamda = {corres_lamda} is train_loss = {loss_from_train}, val_loss = {loss_from_val} , test_loss = {loss_from_test}')


min  loss at lamda = [-10.0, -10.0] is train_loss = 0.0023097896482795477, val_loss = 0.4026065766811371 , test_loss = 0.34856900572776794
min  loss at lamda = [-10.0, -6.8965516] is train_loss = 0.0023713144473731518, val_loss = 0.3958318531513214 , test_loss = 0.3441854417324066
min  loss at lamda = [-10.0, -4.137931] is train_loss = 0.00296982005238533, val_loss = 0.39061978459358215 , test_loss = 0.3302921652793884
min  loss at lamda = [-10.0, -3.7931035] is train_loss = 0.0032979710958898067, val_loss = 0.37839946150779724 , test_loss = 0.3237793743610382
min  loss at lamda = [-10.0, -3.1034484] is train_loss = 0.004341995343565941, val_loss = 0.37025246024131775 , test_loss = 0.31663623452186584
min  loss at lamda = [-10.0, -2.7586207] is train_loss = 0.005299258977174759, val_loss = 0.35594385862350464 , test_loss = 0.31400543451309204
min  loss at lamda = [-10.0, -2.413793] is train_loss = 0.006753614638000727, val_loss = 0.34627896547317505 , test_loss = 0.2993249297142029
min

In [None]:
random_lamda = np.random.random(30)     #value will be only between [0,1)
ran_lambda = np.sort(10*random_lamda.astype('float32') - 10)
ran_grid = [[i,j] for i in ran_lambda for j in ran_lambda]

In [None]:
loss_from_val = 1000000000
loss_from_train = None
loss_from_test = None
corres_lamda = None

for lamda in ran_grid:
  model.layers[1].set_weights(wt_layer1_init)
  model.layers[2].set_weights(wt_layer2_init)
  
  min_loss,final_wt1,final_wt2 = fmin_loss(lamda[0],lamda[1],glbl_l_rate,glbl_momentum)
  
  model.layers[1].set_weights(final_wt1)
  model.layers[2].set_weights(final_wt2)
  
  val_logits = model(x_val,training=False)
  val_loss = loss_fn(y_val,val_logits)
  train_logits = model(x_train,training=False)
  training_loss = loss_fn(y_train,train_logits)
  
  test_logits = model(x_test,training=False)
  test_loss = loss_fn(y_test,test_logits)
  if val_loss < loss_from_val:
    loss_from_val = val_loss
    loss_from_train = training_loss
    loss_from_test = test_loss
    corres_lamda = lamda
    print(f'min  loss at lamda = {lamda} is train_loss = {training_loss}, val_loss = {val_loss} , test_loss = {test_loss}')

print('###############################################################')
print('--------------------Result-------------------------------------')
print(f'min  loss at lamda = {corres_lamda} is train_loss = {loss_from_train}, val_loss = {loss_from_val} , test_loss = {loss_from_test}')


min  loss at lamda = [-9.789014, -9.789014] is train_loss = 0.002315936842933297, val_loss = 0.40320315957069397 , test_loss = 0.34667351841926575
min  loss at lamda = [-9.789014, -8.005441] is train_loss = 0.0023584943264722824, val_loss = 0.40310436487197876 , test_loss = 0.35113292932510376
min  loss at lamda = [-9.789014, -5.246722] is train_loss = 0.00250978278927505, val_loss = 0.3965491056442261 , test_loss = 0.33882254362106323
min  loss at lamda = [-9.789014, -4.312007] is train_loss = 0.0028718190733343363, val_loss = 0.39222416281700134 , test_loss = 0.3372695744037628
min  loss at lamda = [-9.789014, -3.77451] is train_loss = 0.003336251014843583, val_loss = 0.3854410648345947 , test_loss = 0.3276645839214325
min  loss at lamda = [-9.789014, -2.809784] is train_loss = 0.005121784284710884, val_loss = 0.37016963958740234 , test_loss = 0.31290870904922485
min  loss at lamda = [-9.789014, -2.7225738] is train_loss = 0.005414261016994715, val_loss = 0.3629927337169647 , test_lo