# Getting Started

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from mlp import generate_mlp
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
# Bayesian Optimization Libraries for hyperparameter tuning.
import optuna
from numba import cuda
import gc

from tensorflow.keras import layers, models

2023-11-11 01:02:52.404706: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-11 01:02:52.404763: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-11 01:02:52.404786: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-11 01:02:52.414771: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
height, width = 256, 256
classes = 3
epochs = 1
hyp_calls = 30

# Preprocessing stuff
image_tup = (height, width)
batch_size = 10

# Training & Tuning

Models to Cover 
1. v2l
2. v2m
3. cxm
4. r50
5. vgg16

## Universal Variables

In [3]:
def train_test_split(dataset, train_size_ratio=0.8, shuffle_buffer_size=1000):

    dataset_size = sum(1 for _ in dataset)

    if shuffle_buffer_size:
        dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)

    train_size = int(train_size_ratio * dataset_size)

    train_dataset = dataset.take(train_size)
    val_dataset = dataset.skip(train_size)

    return train_dataset, val_dataset


In [4]:
def parse_examples(serialized_examples):
    feature_description={'image':tf.io.FixedLenFeature([],tf.string),
                         'label':tf.io.FixedLenFeature([],tf.int64)}
    examples=tf.io.parse_example(serialized_examples, feature_description)
    labels=examples.pop('label')
    labels = tf.one_hot(labels, depth=classes) 
    images=tf.image.resize_with_pad(tf.cast(tf.io.decode_jpeg(examples['image'],channels=3),tf.float32),299,299)
    return images, labels

In [5]:
 def preprocess(image, label):
        print(type(image))
        print(image)
    
        image = tf.image.resize(image, image_tup)
        tf.keras.applications.efficientnet_v2.preprocess_input(image)
    
        return image, label

In [6]:
def load_data():
    raw_dataset_train= tf.data.TFRecordDataset(['birds-vs-squirrels-train.tfrecords'])
    raw_dataset_valid = tf.data.TFRecordDataset(['birds-vs-squirrels-validation.tfrecords'])
    
    
    
    train_base, test_base = train_test_split(raw_dataset_train, 0.8, shuffle_buffer_size=10000)
    
    train_base = train_base.map(parse_examples, num_parallel_calls=16)
    test_base = test_base.map(parse_examples, num_parallel_calls=16)
    val_base = raw_dataset_valid.map(parse_examples, num_parallel_calls=16)

    val_X = []
    val_y = []
    
    for image, label in val_base:
        val_X.append(image.numpy())  # Convert to numpy array
        val_y.append(label.numpy())
    
    val_x = np.array(val_X)
    val_y = np.array(val_y)

    early_stopping = EarlyStopping(
        monitor='val_loss',  
        patience=5, 
        min_delta=0.01,  
        restore_best_weights=True  
    )

    
    
   
    
    train = train_base.map(preprocess, num_parallel_calls=-1)
    test = test_base.map(preprocess, num_parallel_calls=-1)
    valid = val_base.map(preprocess, num_parallel_calls=-1)
    
    train = train.batch(batch_size)
    test = test.batch(batch_size)
    valid = valid.batch(batch_size)
    
    input_shape = (height, width, 3)
    inputs = Input(shape=input_shape)

    v2l = tf.keras.applications.EfficientNetV2L(
        include_top=False,
        weights="imagenet",
        input_tensor=None,
        input_shape=(height, width, classes),
        pooling=None,
        classes=classes,
        classifier_activation="softmax",
        include_preprocessing=True,
    )

    r50 = tf.keras.applications.ResNet50(
        include_top=False,
        weights="imagenet",
        input_tensor=None,
        input_shape=(height, width, classes),
        pooling=None,
        classes=3,
        )

    vgg16 = tf.keras.applications.VGG16(
        include_top=False,
        weights="imagenet",
        input_tensor=None,
        input_shape=(height, width, classes),
        pooling=None,
        classes=classes,
        classifier_activation="softmax",
        )
    cxl = tf.keras.applications.ConvNeXtXLarge(
        model_name="convnext_xlarge",
        include_top=False,
        include_preprocessing=True,
        weights="imagenet",
        input_tensor=None,
        input_shape=(height, width, classes),
        pooling=None,
        classes=classes,
        classifier_activation="softmax",
    )

    v2m = tf.keras.applications.EfficientNetV2L(
            include_top=False,
            weights="imagenet",
            input_tensor=None,
            input_shape=(height, width, classes),
            pooling=None,
            classes=classes,
            classifier_activation="softmax",
            include_preprocessing=True,
        )

    return train, test, valid, val_x, val_y, v2l, vgg16, v2m, r50, cxl, inputs, early_stopping

# Bayesian Optimization Tuning

In [7]:
def get_model_from_str(base_model_str, v2l, vgg16, v2m, r50, cxl):
    if base_model_str == 'v2l':
        return v2l
    elif base_model_str == 'vgg16':
        return vgg16
    elif base_model_str == 'v2m':
        return v2m
    elif base_model_str == 'r50':
        return r50
    elif base_model_str == 'cxl':
        return cxl
    else:
        raise ValueError(f"Unknown model string: {base_model_str}")


In [8]:
def create_model(optimizer:str, base_model:str, activation, init_neurons, num_layers, scaling_factor, dropout_rate, learning_rate, v2l, vgg16, v2m, r50, cxl, inputs):
    
    base_model = get_model_from_str(base_model, v2l, vgg16, v2m, r50, cxl)
    base_model.trainable = False
    x = base_model(inputs, training=False) # Inputs is defined in section above. 
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    
    # Define your custom top layer
    top_layer = generate_mlp(num_layers=num_layers, 
                             initial_neurons=init_neurons, 
                             output_classes=classes, 
                             dropout_rate=dropout_rate, 
                             activation_function=activation,
                             scaling_factor=scaling_factor)
    
    x = top_layer(x)
    
    model = Model(inputs, x)

    if optimizer == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    return model
    

In [9]:
def objective(trial):
    # Optuna suggests the parameters
    optimizer = trial.suggest_categorical('optimizer', ['adam', 'sgd'])
    base_model = trial.suggest_categorical('base_model', ['r50', 'v2l', 'vgg16', 'v2m',  'cxl'])
    activation = trial.suggest_categorical('activation', ['relu', 'tanh', 'sigmoid'])
    init_neurons = trial.suggest_int('init_neurons', 32, 1028)
    num_layers = trial.suggest_int('num_layers', 1, 10)
    scaling_factor = trial.suggest_float('scaling_factor', 0.01, 0.99)
    dropout_rate = trial.suggest_float('dropout_rate', 0.01, 0.99)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)

    train, test, valid, val_x, val_y, v2l, vgg16, v2m, r50, cxl, inputs, early_stopping = load_data() 

    model = create_model(optimizer, base_model, activation, init_neurons, num_layers, scaling_factor, dropout_rate, learning_rate, v2l, vgg16, v2m, r50, cxl, inputs)
    
    # Assume you have training data (X_train, y_train)
    model.fit(train, epochs=epochs, batch_size = batch_size, validation_data=test, verbose=1, callbacks=[early_stopping])
    
    acc = model.evaluate(val_x, val_y, verbose=0) 
    tf.keras.backend.clear_session()
    del model, v2l, vgg16, v2m, r50, cxl, train, test, valid
    gc.collect()

    cuda.select_device(0)
    cuda.close()
    return -acc[1]

In [10]:
study = optuna.create_study(direction='minimize')

[I 2023-11-11 01:02:55,385] A new study created in memory with name: no-name-bafbc748-04ea-4dce-8ecc-e3226d7c1fcb


In [None]:
study.optimize(objective, n_trials=hyp_calls, gc_after_trial=True)

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
2023-11-11 01:02:55.523411: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-11 01:02:55.530005: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-11 01:02:55.530063: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-11 01:02:55.531932: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been buil

<class 'tensorflow.python.framework.ops.SymbolicTensor'>
Tensor("args_0:0", shape=(299, 299, 3), dtype=float32)
<class 'tensorflow.python.framework.ops.SymbolicTensor'>
Tensor("args_0:0", shape=(299, 299, 3), dtype=float32)
<class 'tensorflow.python.framework.ops.SymbolicTensor'>
Tensor("args_0:0", shape=(299, 299, 3), dtype=float32)


2023-11-11 01:03:35.466128: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-11 01:03:38.622045: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8700
2023-11-11 01:03:39.560030: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-11 01:03:40.069372: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fd01c6f84b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-11-11 01:03:40.069406: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3080 Ti, Compute Capability 8.6
2023-11-11 01:03:40.441884: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.




2023-11-11 01:06:26.896976: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2718505608 exceeds 10% of free system memory.
2023-11-11 01:06:29.529517: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2718505608 exceeds 10% of free system memory.
[I 2023-11-11 01:07:38,445] Trial 0 finished with value: -0.9617205858230591 and parameters: {'optimizer': 'sgd', 'base_model': 'cxl', 'activation': 'tanh', 'init_neurons': 945, 'num_layers': 5, 'scaling_factor': 0.7453602465605936, 'dropout_rate': 0.0964543852992191, 'learning_rate': 0.00010753396664346617}. Best is trial 0 with value: -0.9617205858230591.


# Results

In [None]:
# Get the best parameters
best_params = study.best_params
print("Best Parameters:", best_params)

# Best objective value achieved
best_value = study.best_value
print("Best Objective Value:", best_value)


In [None]:
# Print all trials
for trial in study.trials:
    print("Trial Number:", trial.number)
    print("Params:", trial.params)
    print("Value:", trial.value)


In [None]:
from optuna.visualization import plot_optimization_history, plot_param_importances, plot_slice, plot_contour
plot_optimization_history(study)


In [None]:
plot_param_importances(study)

In [None]:
plot_slice(study)

In [None]:
plot_contour(study)