In [None]:
import os
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers
from sklearn import preprocessing
from keras.utils.np_utils import to_categorical

import warnings
warnings.filterwarnings("ignore")

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
train_dataset = np.load('train_dataset1_60.npz')
val_dataset = np.load('validation_dataset1_40.npz')
test_dataset = np.load('test_dataset.npz')

# =========== Loading Datasets ===============

x_train = train_dataset['x'].reshape(600, 784).astype("float32") / 255
y_train = train_dataset['y'].astype("float32")
  
x_val = val_dataset['x'].reshape(400, 784).astype("float32") / 255
y_val = val_dataset['y'].astype("float32")   
                    
x_test = test_dataset['x'].reshape(10010, 784).astype("float32") / 255
y_test = test_dataset['y'].astype("float32")                    


x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape 

((600, 784), (600,), (400, 784), (400,), (10010, 784), (10010,))

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(784,)),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(10)
])
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
@tf.function
def loss(w1,w2,lamda1, lamda2,loss_fn,y_train,logits): # Lambda
    total_loss = loss_fn(y_train,logits)
    return total_loss + (tf.math.exp(lamda1)*tf.nn.l2_loss(w1) + tf.math.exp(lamda2)*tf.nn.l2_loss(w2))/(2*y_train.shape[0])

In [None]:
wt_layer1_init = model.layers[1].get_weights()
wt_layer2_init = model.layers[2].get_weights()
losses = []
train_df = tf.data.Dataset.from_tensor_slices((x_train,y_train))
train_df = train_df.shuffle(buffer_size = 1024).batch(64)

def fmin_loss(lamda1, lamda2, l_rate, momentum, epochs= 50, nesterov = True):      # lamda, not exp(lamda), Works with both tf.Variable and tf.constant type lambda input, (or just scalar)
    tf.keras.backend.clear_session()
    optimizer = keras.optimizers.SGD(learning_rate=l_rate,momentum = momentum , nesterov =nesterov )
    total_loss0 = 1e20
    losses.clear()
    for epoch in range(epochs):
        for step,(x_train,y_train) in enumerate(train_df):
            with tf.GradientTape() as tape:
                logits = model(x_train, training=True)
                w1 = model.layers[1].weights[0]
                w2 = model.layers[2].weights[0]
                total_loss1 = loss(w1,w2,lamda1, lamda2,loss_fn,y_train,logits)
                
            vars_list = model.trainable_weights
            grads = tape.gradient(total_loss1, vars_list)      # for ref  - https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough 
            optimizer.apply_gradients(zip(grads,vars_list))

        total_loss0 = total_loss1
        losses.append(total_loss0)
    wt_layer1 = model.layers[1].get_weights()
    wt_layer2 = model.layers[2].get_weights()
    model.layers[1].set_weights(wt_layer1_init)
    model.layers[2].set_weights(wt_layer2_init)

    return [total_loss1, wt_layer1, wt_layer2]

In [None]:
l_rates = np.linspace(0.1,0.9,9)
momentas = np.arange(0.01,1,0.01,dtype='float32')

In [None]:
# fmin_loss(-1.3793104, -0.3448276,0.5,0.0799)
# mn_loss = 1e12
# glbl_l_rate = None
# glbl_momentum = None
# for l_rate in l_rates:
#   for momentum in momentas:
#     model.layers[1].set_weights(wt_layer1_init)
#     model.layers[2].set_weights(wt_layer2_init)
#     loss_ , _ , _ = fmin_loss(-5.0,l_rate,momentum)
#     print(f'loss = {loss_} , l_rate = {l_rate}, momentum = {momentum}')
#     if loss_ < mn_loss:
#       glbl_l_rate = l_rate
#       glbl_momentum = momentum
#       mn_loss = loss_

# print('###############################################################')
# print('--------------------Result-------------------------------------')
# print(f'final_loss = {mn_loss} , l_rate = {glbl_l_rate} , glbl_momentum = {glbl_momentum}')
    

In [None]:
# y = losses
# x = [i+1 for i in range(200)]
# import matplotlib.pyplot as plt

# plt.plot(x,y)
# plt.show()

In [None]:
# y = losses
# x = [i+1 for i in range(100)]
# import matplotlib.pyplot as plt

# plt.plot(x,y)
# plt.show()

In [None]:
# y = losses
# x = [i+1 for i in range(50)]
# import matplotlib.pyplot as plt

# plt.plot(x,y)
# plt.show()

In [None]:
import matplotlib.pyplot as plt

In [None]:
lamdas = np.linspace(-10,0,30, dtype='float32')
lamda_grid = [[i,j] for i in lamdas for j in lamdas]

In [None]:
glbl_l_rate = 0.5
glbl_momentum = 0.079
loss_from_val = 1000000000
loss_from_train = None
loss_from_test = None
corres_lamda = None

for lamda in lamda_grid:
  model.layers[1].set_weights(wt_layer1_init)
  model.layers[2].set_weights(wt_layer2_init)
  
  min_loss,final_wt1,final_wt2 = fmin_loss(lamda[0],lamda[1],glbl_l_rate,glbl_momentum)
  model.layers[1].set_weights(final_wt1)
  model.layers[2].set_weights(final_wt2)
  
  val_logits = model(x_val,training=False)
  val_loss = loss_fn(y_val,val_logits)
  train_logits = model(x_train,training=False)
  training_loss = loss_fn(y_train,train_logits)
  
  test_logits = model(x_test,training=False)
  test_loss = loss_fn(y_test,test_logits)
  if val_loss < loss_from_val:
    loss_from_val = val_loss
    loss_from_train = training_loss
    loss_from_test = test_loss
    corres_lamda = lamda
    print(f'min  loss at lamda = {lamda} is train_loss = {training_loss}, val_loss = {val_loss} , test_loss = {test_loss}')

print('###############################################################')
print('--------------------Result-------------------------------------')
print(f'min  loss at lamda = {corres_lamda} is train_loss = {loss_from_train}, val_loss = {loss_from_val} , test_loss = {loss_from_test}')


min  loss at lamda = [-10.0, -10.0] is train_loss = 0.0039035603404045105, val_loss = 0.5539500713348389 , test_loss = 0.5245660543441772
min  loss at lamda = [-10.0, -8.620689] is train_loss = 0.003770330687984824, val_loss = 0.5439983606338501 , test_loss = 0.5111501216888428
min  loss at lamda = [-10.0, -7.586207] is train_loss = 0.003893473884090781, val_loss = 0.5307661294937134 , test_loss = 0.5100322365760803
min  loss at lamda = [-10.0, -2.7586207] is train_loss = 0.005561762023717165, val_loss = 0.5217079520225525 , test_loss = 0.49437159299850464
min  loss at lamda = [-10.0, -2.413793] is train_loss = 0.006065226625651121, val_loss = 0.5133768320083618 , test_loss = 0.485772043466568
min  loss at lamda = [-10.0, -2.0689654] is train_loss = 0.007150200195610523, val_loss = 0.5125202536582947 , test_loss = 0.4823510944843292
min  loss at lamda = [-10.0, -1.7241379] is train_loss = 0.008833025582134724, val_loss = 0.5044297575950623 , test_loss = 0.467305064201355
min  loss at l

In [None]:
random_lamda = np.random.random(30)     #value will be only between [0,1)
ran_lambda = np.sort(10*random_lamda.astype('float32') - 10)
ran_grid = [[i,j] for i in ran_lambda for j in ran_lambda]

In [None]:
loss_from_val = 1000000000
loss_from_train = None
loss_from_test = None
corres_lamda = None

for lamda in ran_grid:
  model.layers[1].set_weights(wt_layer1_init)
  model.layers[2].set_weights(wt_layer2_init)
  
  min_loss,final_wt1,final_wt2 = fmin_loss(lamda[0],lamda[1],glbl_l_rate,glbl_momentum)
  
  model.layers[1].set_weights(final_wt1)
  model.layers[2].set_weights(final_wt2)
  
  val_logits = model(x_val,training=False)
  val_loss = loss_fn(y_val,val_logits)
  train_logits = model(x_train,training=False)
  training_loss = loss_fn(y_train,train_logits)
  
  test_logits = model(x_test,training=False)
  test_loss = loss_fn(y_test,test_logits)
  if val_loss < loss_from_val:
    loss_from_val = val_loss
    loss_from_train = training_loss
    loss_from_test = test_loss
    corres_lamda = lamda
    print(f'min  loss at lamda = {lamda} is train_loss = {training_loss}, val_loss = {val_loss} , test_loss = {test_loss}')

print('###############################################################')
print('--------------------Result-------------------------------------')
print(f'min  loss at lamda = {corres_lamda} is train_loss = {loss_from_train}, val_loss = {loss_from_val} , test_loss = {loss_from_test}')


min  loss at lamda = [-9.947112, -9.947112] is train_loss = 0.003944482654333115, val_loss = 0.5591757297515869 , test_loss = 0.5257105827331543
min  loss at lamda = [-9.947112, -9.633759] is train_loss = 0.003900391748175025, val_loss = 0.5552642941474915 , test_loss = 0.5231519341468811
min  loss at lamda = [-9.947112, -8.802883] is train_loss = 0.0038315034471452236, val_loss = 0.5403742790222168 , test_loss = 0.5034715533256531
min  loss at lamda = [-9.947112, -7.4025574] is train_loss = 0.0038341025356203318, val_loss = 0.5305639505386353 , test_loss = 0.5105301737785339
min  loss at lamda = [-9.947112, -3.2799363] is train_loss = 0.004727335646748543, val_loss = 0.5179455280303955 , test_loss = 0.4954743981361389
min  loss at lamda = [-9.947112, -2.0614963] is train_loss = 0.0072151185013353825, val_loss = 0.51263028383255 , test_loss = 0.48609867691993713
min  loss at lamda = [-9.947112, -1.5578079] is train_loss = 0.009785624220967293, val_loss = 0.5083457231521606 , test_loss 