#### Packages

In [25]:
import keras
from keras import layers
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import os
from PIL import Image
%load_ext tensorboard

#### Dataset

Here we will be using the same dataset as the previous assignments in which the problem is to classify a set of images as cats or not retrieved from https://www.kaggle.com/datasets/samuelcortinhas/cats-and-dogs-image-classification?select=train. We removed the picture "dog_505.png" as it caused problems while preprocessing it. We will first process only 100 images per class (100 cats and 100 dogs for training and another 100 cats and 100 dogs for validation) to reduce their pixel resolution to the same as the other assignments (64 pixels x 64 pixels) and then represent them as arrays.

In [2]:
def preprocess_dataset(dataset):
    dataset_flatten = dataset.reshape(dataset.shape[0],-1).T
    return dataset_flatten/255 

def process_images(directory, target_size=(64, 64), image_range=range(0,100)):
    image_list = []

    filenames = sorted(os.listdir(directory))[min(image_range):max(image_range)+1]
    
    for filename in filenames:
        if filename.endswith(".jpg"):
            file_path = os.path.join(directory, filename)
            
            img = Image.open(file_path)
            img_resized = img.resize(target_size)
            img_array = np.array(img_resized)
            
            image_list.append(img_array)
    
    return np.array(image_list)

def join_cats_and_dogs(cat_images,dog_images):
    cat_set_X = preprocess_dataset(cat_images)
    dog_set_X = preprocess_dataset(dog_images)
    m_cat_set = cat_set_X.shape[1]
    cat_set_Y = np.ones((1, m_cat_set))
    m_dog_set = dog_set_X.shape[1]
    dog_set_Y = np.zeros((1, m_dog_set))

    set_X = np.concatenate((cat_set_X, dog_set_X), axis=1)
    set_Y = np.concatenate((cat_set_Y, dog_set_Y), axis=1)

    np.random.seed(1)
    shuffle_indices = np.random.permutation(set_X.shape[1])
    final_set_X = set_X[:, shuffle_indices]
    final_set_Y = set_Y[:, shuffle_indices]

    return final_set_X,final_set_Y


In [15]:
train_cat_images = process_images("datasets/train/cats")
train_dog_images = process_images("datasets/train/dogs")

test_cat_images = process_images("datasets/train/cats",image_range=(101,200))
test_dog_images = process_images("datasets/train/dogs",image_range=(101,200))

x_train, y_train = join_cats_and_dogs(train_cat_images,train_dog_images)
y_train = y_train.flatten()
x_train = x_train.T


x_test, y_test = join_cats_and_dogs(test_cat_images,test_dog_images)
y_test = y_test.flatten()
x_test = x_test.T


## Tuning Parameters

### Grid Search
We will tune 4 parameters: the number of units, the dropout frequency, the batch size and the optimizer used. 

In [20]:
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([16, 32]))
HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.05, 0.10, 0.15, 0.20, 0.25]))
HP_BATCH_SIZE = hp.HParam('batch_size', hp.Discrete([8, 16, 32]))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd', 'lion']))

METRIC_ACCURACY = 'accuracy'

In [22]:
def train_test_model(hparams):
  model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu),
    tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
    tf.keras.layers.Dense(2, activation=tf.nn.softmax),
  ])
  model.compile(
      optimizer=hparams[HP_OPTIMIZER],
      loss='sparse_categorical_crossentropy',
      metrics=['accuracy'],
  )

  callbacks = [
  keras.callbacks.TensorBoard(
  log_dir='logs/hparam_tuning',
  histogram_freq=1,
  embeddings_freq=1,
  )
  ]

  model.fit(x_train, y_train, epochs=1,callbacks=callbacks, batch_size=hparams[HP_BATCH_SIZE]) 
  _, accuracy = model.evaluate(x_test, y_test)
  return accuracy

def run(run_dir, hparams):
  with tf.summary.create_file_writer(run_dir).as_default():
    hp.hparams(hparams)  # record the values used in this trial
    accuracy = train_test_model(hparams)
    tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

In [23]:
session_num = 0

for num_units in HP_NUM_UNITS.domain.values:
    for dropout_rate in HP_DROPOUT.domain.values:
        for optimizer in HP_OPTIMIZER.domain.values:
            for batch_size in HP_BATCH_SIZE.domain.values:  # Add this loop
                hparams = {
                    HP_NUM_UNITS: num_units,
                    HP_DROPOUT: dropout_rate,
                    HP_OPTIMIZER: optimizer,
                    HP_BATCH_SIZE: batch_size,  # Include batch size
                }
                run_name = "run-%d" % session_num
                print('--- Starting trial: %s' % run_name)
                print({h.name: hparams[h] for h in hparams})
                run('logs/hparam_tuning/' + run_name, hparams)
                session_num += 1


--- Starting trial: run-0
{'num_units': 16, 'dropout': 0.05, 'optimizer': 'adam', 'batch_size': 8}
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.4457 - loss: 0.9047
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5200 - loss: 0.7181  
--- Starting trial: run-1
{'num_units': 16, 'dropout': 0.05, 'optimizer': 'adam', 'batch_size': 16}
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.5260 - loss: 1.2504
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5187 - loss: 0.6930  
--- Starting trial: run-2
{'num_units': 16, 'dropout': 0.05, 'optimizer': 'adam', 'batch_size': 32}
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.5089 - loss: 1.5606
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4800 - loss: 2.4291  
--- Starting trial: run-3
{'num_units': 16, 'dropout': 0.05

In [30]:
%tensorboard --logdir logs/hparam_tuning

Reusing TensorBoard on port 6007 (pid 26432), started 2:26:11 ago. (Use '!kill 26432' to kill it.)