In [None]:
!pip install optuna

import tensorflow as tf
import optuna

import os
import numpy as np
import pandas as pd
import pickle as pkl
import itertools
from datetime import datetime
import math

import warnings
warnings.filterwarnings("ignore")

In [2]:
# GOLBAL VARIABLES NEEDED TO RUN THE EXPERIMENT

# 1] ENTER DATA DIRECTORIES
base_dir  = "/content/drive/MyDrive/CIFAR_Dataset/CIFAR_10/Batch3_1+10K"
train_dir = base_dir+'/cifar10_train.npz'
val_dir   = base_dir+'/cifar10_val.npz'
test_dir  = base_dir+'/cifar10_test.npz'
# 2] ENTER DIRECTORY TO SAVE OPTUNA's TRAINED MODEL
OPTUNA_MODEL_DIRECTORY = "/content/drive/MyDrive/CIFAR_Dataset/CIFAR_10/resnet_optuna.pickle"

TYPE_OF_SAMPLER = 'grid' #  'grid', 'random', 'qmc', 'tpe' 
NUM_OPTUNA_TRIALS = 10

#**OPTUNA STUDY**

##**Loading Data**

In [3]:
# Enter training, validation and testing dataset directories for CIFAR-10 datasets.
def preprocess_image_input(input_images): # Only used when resent50 is selected as model_type below
  input_images = input_images.astype('float32')
  output_ims = tf.keras.applications.resnet50.preprocess_input(input_images)
  return output_ims

def data_loader( train_dir, val_dir, test_dir, model_type ): # model_type : 'cnn' OR 'resnet50' ( Please specify only one of these in sttring format )

    train_dataset = np.load(train_dir)
    val_dataset   = np.load(val_dir)
    test_dataset  = np.load(test_dir)

    y_train = train_dataset['y_train'].astype("float32")
    y_val   = val_dataset['y_val'].astype("float32") 
    y_test  = test_dataset['y_test'].astype("float32") 

    x_train = train_dataset['x_train'].astype("float32")
    x_val   = val_dataset['x_val'].astype("float32")
    x_test  = test_dataset['x_test'].astype("float32") 

    if model_type == 'cnn':
        x_train, x_val, x_test = x_train/255, x_val/255, x_test/255
    elif model_type == 'resnet50':
        x_train = preprocess_image_input(x_train)
        x_val = preprocess_image_input(x_val)
        x_test = preprocess_image_input(x_test)
    elif model_type not in ['cnn', 'resnet50']:
        raise ValueError('Error: Please enter correct \'model_type\' variable value. Correct values are \'cnn\' or \'resnet50\' (strings).')
    return x_train, x_val, x_test, y_train, y_val, y_test

## ============================================  LOADING DATA  =====================================================
x_train, x_val, x_test, y_train, y_val, y_test = data_loader( train_dir, val_dir, test_dir, model_type='resnet50' )

input_shape  = x_train.shape[1:]
output_shape = 10

##**Custom Model Defining**

In [4]:
class CNN:
    def __init__(self, input_shape, num_classes):
        self.input_shape = input_shape
        self.num_classes = num_classes
    def generate_model(self, layer_info = None ):
        UpSampling = 224/self.input_shape[0]

        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Input(shape=self.input_shape))
        model.add( tf.keras.layers.UpSampling2D(size=(UpSampling,UpSampling)))
        model.add(tf.keras.applications.resnet50.ResNet50(include_top = False, weights = 'imagenet', input_shape = (224, 224, 3),  pooling = 'avg'))
        if layer_info is not None:
            for layer in layer_info[:-1]:
                layer_params = layer['params']
                if layer['type'] == 'dense':
                    model.add(tf.keras.layers.Dense(**layer_params))
                    model.add(tf.keras.layers.BatchNormalization())
            for layer in layer_info[-1:]:
                layer_params = layer['params']
                if layer['type'] == 'dense':
                    model.add(tf.keras.layers.Dense(**layer_params))
        model.layers[1].trainable = False
        # Calculate the number of trainable parameters in the model
        trainable_count = sum(tf.keras.backend.count_params(weights) for weights in model.trainable_weights)
        print(f"Trainable parameters: {trainable_count:,}")
        return model

In [5]:
@tf.function
def loss_function_optuna( y_dataset, logits, loss ): # logits = model(x_dataset)
    total_loss = loss(y_dataset, logits)
    total_loss = tf.cast( total_loss, dtype=tf.float32 )
    return total_loss

In [6]:
def fmin_loss( model, loss_function, optimizer, batch_size , epochs, record = True ):      # lamda, not exp(lamda), Works with both tf.Variable and tf.constant type lambda input, (or just scalar)
    # total_loss0 = 1e20
    train_df = tf.data.Dataset.from_tensor_slices((x_train,y_train))
    train_df = train_df.shuffle(buffer_size = 1024).batch(batch_size)

    All_Epoch_Gradients, All_Epoch_Weights = [], []

    for epoch in range(epochs):

        weights0 = [var.numpy() for var in model.trainable_weights] # Getting only trainable weights at which the gradient is being calculated.
        # Note : model.get_weights() retrieves all the weights (including non-trainable)

        Step_Gradient, Num_batch = [], 0
        
        for step,(x_train_,y_train_) in enumerate(train_df):
            # print("Step == ", step)
            with tf.GradientTape(persistent = True) as tape:

                logits = model(x_train_, training=True)
                total_loss1 = loss_function_optuna( y_train_, logits, loss = loss_function ) 

            vars_list = model.trainable_weights
            grads = tape.gradient(total_loss1, vars_list)      # for ref  - https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough 
            optimizer.apply_gradients(zip(grads,vars_list))

            if record : 
                if step == 0 : 
                    Step_Gradient = grads
                    # print( Step_Gradient )
                else:
                    for idx in range(len(Step_Gradient)):
                        Step_Gradient[idx] =  tf.add(Step_Gradient[idx], grads[idx])
            Num_batch = step
        
    # total_loss0   = total_loss1
        if record : 
            Step_Gradient = [ i/Num_batch for i in Step_Gradient ] 
            All_Epoch_Gradients.append( Step_Gradient )
            All_Epoch_Weights.append(weights0)
            
    if record :
        return All_Epoch_Gradients, All_Epoch_Weights
    else: 
        return 0, 0

####Note: Training and Local Tuning is done based on Loss and not the Accuracy.

In [7]:
NUMBER_OF_DENSE_LAYERS = 2

def optuna_optimizer(trial):

    tf.keras.backend.clear_session()

    alphas = [ trial.suggest_float(f'regularization{i}', 1e-6, 1e-1, log=True) for i in range(NUMBER_OF_DENSE_LAYERS) ]

    layer_info_ = [ {'type': 'dense', 'params': {'units': 16, 'activation': 'relu', 'kernel_regularizer':tf.keras.regularizers.l2(i)}} for i in alphas[:-1]]
    layer_info_ += [ {'type': 'dense', 'params': {'units': output_shape, 'activation': 'softmax', 'kernel_regularizer':tf.keras.regularizers.l2(alphas[-1])}} ]

    model = CNN( input_shape, output_shape).generate_model( layer_info_ )
    
    # Optimizing
    optimizer         = tf.keras.optimizers.Adam(0.01)
    loss_function     = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    All_Epoch_Gradients, All_Epoch_Weights = fmin_loss( model, loss_function, optimizer,  128,  10, True)

    # Getting Scores
    cce = tf.keras.losses.SparseCategoricalCrossentropy()
    sca = tf.keras.metrics.SparseCategoricalAccuracy()

    y_pred_train = model.predict(x_train)
    train_loss_unregularized = cce(y_train, y_pred_train).numpy()
    train_acc = sca(y_train, y_pred_train).numpy()

    y_pred_val = model.predict(x_val)
    val_loss_unregularized = cce(y_val, y_pred_val).numpy()
    val_acc = sca(y_val, y_pred_val).numpy()

    y_pred_test = model.predict(x_test)
    test_loss_unregularized = cce(y_test, y_pred_test).numpy()
    test_acc = sca(y_test, y_pred_test).numpy()

    print("\nTraining:  Loss ", train_loss_unregularized, "  Accuracy", train_acc*100 )
    print("Validation: Loss ", val_loss_unregularized, "  Accuracy", val_acc*100 )
    print("Test:       Loss", test_loss_unregularized, "  Accuracy", test_acc*100, "\n\n")

    with open("{}.pickle".format(trial.number), "wb") as fout:
        pkl.dump(model, fout)

    with open("training_info_{}.pickle".format(trial.number), "wb") as fout:
        Dict_ = { "Gradients" : All_Epoch_Gradients, "Weights" : All_Epoch_Weights }
        pkl.dump(Dict_, fout)
    
    score = val_loss_unregularized
    return(score)


def optuna_training( num_trials ):

    time1 = datetime.now()

    if TYPE_OF_SAMPLER == 'grid': 
        search_space = { f"regularization{i}" : list(np.linspace(1e-6,1e-1,10)) for i in range(NUMBER_OF_DENSE_LAYERS) }
        study = optuna.create_study(sampler=optuna.samplers.GridSampler(search_space), direction = "minimize")
    elif TYPE_OF_SAMPLER == 'random': 
        study = optuna.create_study(sampler=optuna.samplers.RandomSampler(), direction = "minimize")
    elif TYPE_OF_SAMPLER == 'qmc': 
        study = optuna.create_study(sampler=optuna.samplers.QMCSampler(), direction = "minimize")
    elif TYPE_OF_SAMPLER == 'tpe': 
        study = optuna.create_study(sampler=optuna.samplers.TPESampler(), direction = "minimize")
    else:
        print("...Mention the correct sampler name...")

    study.optimize(optuna_optimizer, n_trials = num_trials)

    print('\n\n')
    trial = study.best_trial
    print("Best Score: ", trial.value)
    print("Best Params: ")
    for key, value in trial.params.items():
        print("  {}: {}".format(key, value))

    print( "\n\n ", "Trial Number: ", trial.number, "\n" )

    time2 = datetime.now()
    delta = time2 - time1
    print(f"Time difference is {delta.total_seconds()} seconds")

    # Loading Best OPTUNA model to get initial feasible weights for trainable layers
    with open("{}.pickle".format(trial.number), "rb") as fin:
        best_clf = pkl.load(fin)

    with open("training_info_{}.pickle".format(trial.number), "rb") as fin_:
        wt_grad = pkl.load(fin_)

    # Getting Optimal Model's Weights and Gradients values for Hessian Approximations
    weight_sets = wt_grad['Weights']
    grad_sets   = wt_grad['Gradients']

    # Getting Optimal Model's Weights and Hyperparameters
    full_weights_list          = best_clf.get_weights()
    trainable_weights_list     = best_clf.trainable_weights
    init_hyperparameters = [ tf.Variable(value) for key, value in trial.params.items() ]

    return trainable_weights_list, full_weights_list, init_hyperparameters, weight_sets, grad_sets, delta

In [8]:
trainable_weights_list, full_weights_list, init_hyperparameters, weight_sets, grad_sets, optuna_time = optuna_training(NUM_OPTUNA_TRIALS)

model__ = [ trainable_weights_list, full_weights_list, init_hyperparameters, weight_sets, grad_sets, optuna_time ]
# LOADING OPTUNA TRAINED MODELS
with open(OPTUNA_MODEL_DIRECTORY, "wb") as fout:
        pkl.dump(model__, fout)

[32m[I 2023-05-23 13:59:43,493][0m A new study created in memory with name: no-name-fba4b8da-4f2b-4fe4-b73c-4cbb98bfeadc[0m


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Trainable parameters: 32,986





Training:  Loss  0.8891305   Accuracy 71.16666436195374
Validation: Loss  1.4421617   Accuracy 64.0999972820282
Test:       Loss 1.4398409   Accuracy 54.37272787094116 




[32m[I 2023-05-23 14:00:28,815][0m Trial 0 finished with value: 1.4421616792678833 and parameters: {'regularization0': 0.1, 'regularization1': 0.011112}. Best is trial 0 with value: 1.4421616792678833.[0m


Trainable parameters: 32,986

Training:  Loss  0.9514213   Accuracy 71.66666388511658
Validation: Loss  1.4544638   Accuracy 63.70000243186951
Test:       Loss 1.4903299   Accuracy 53.93636226654053 




[32m[I 2023-05-23 14:00:48,381][0m Trial 1 finished with value: 1.4544638395309448 and parameters: {'regularization0': 0.05555600000000001, 'regularization1': 0.1}. Best is trial 0 with value: 1.4421616792678833.[0m





Best Score:  1.4421616792678833
Best Params: 
  regularization0: 0.1
  regularization1: 0.011112


  Trial Number:  0 

Time difference is 64.893777 seconds
