<a href="https://colab.research.google.com/github/SonOf1998/ProblemSet5/blob/main/ProblemSet5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

As hyperas needs the notebook's relative path as a parameter, we first need to download it directly from the github repository.

In [1]:
import os
from urllib.request import urlretrieve
import requests
import zipfile

working_dir = os.getcwd()
local_zip = os.path.join(working_dir, 'main.zip')

url = 'https://github.com/SonOf1998/ProblemSet5/archive/main.zip'
urlretrieve(url,local_zip)

# Extract zip file
zip_ref = zipfile.ZipFile(local_zip,'r') 
zip_ref.extractall()
zip_ref.close()

# Discard zip file
if os.path.exists(local_zip):
  os.remove(local_zip)

The dataset needs to be created in a function to support hyperas.  

Pictures needs to be min-max scaled first. Also, we'd like our labels one hot encoded.

In [2]:
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical

def dataset():
  (x_train, y_train), (x_test, y_test) = cifar10.load_data()

  y_train = to_categorical(y_train, 10)
  y_test =  to_categorical(y_test, 10)

  # min-max normalization
  x_train = x_train / 255
  x_test  = x_test / 255

  return x_train, y_train, x_test, y_test

In [3]:
!pip install hyperas
!pip install hyperopt

Collecting hyperas
  Downloading https://files.pythonhosted.org/packages/04/34/87ad6ffb42df9c1fa9c4c906f65813d42ad70d68c66af4ffff048c228cd4/hyperas-0.4.1-py3-none-any.whl
Installing collected packages: hyperas
Successfully installed hyperas-0.4.1


In [4]:
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D, ReLU, LeakyReLU
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend

import numpy as np

import hyperas
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

Model creation is also something that hyperas forces us to do in a function.  

The model structure is more or less copied from here: https://gist.github.com/JulieProst/8000610500a67fda4b76e07efe585552#file-keras_model-py

I used the exact same method for hyperparameter optimalization and logging with which we were experimenting during the course and which could be found here: 
https://github.com/BME-SmartLab-Education/vitmav45/blob/master/12/hyperas_fashionmnist_pub.ipynb

In [5]:
def create_model(x_train, x_test, y_train, y_test):

  act = {{choice(['relu', 'leakyrelu'])}}
  opti = {{choice(['rmsprop', 'adam', 'sgd'])}}
  kernel_size = {{choice([3, 4])}}
  dropout_1 = {{uniform(0, 0.5)}}
  dropout_2 = {{uniform(0, 0.5)}}
  dropout_3 = {{uniform(0, 0.5)}}
  unit_size = {{choice([64, 128, 256])}}
  batch_size = {{choice([64, 128, 256])}}
  flattening_layer = {{choice(['flatten', 'globalavgpooling'])}}

  activation = None
  if act == 'relu':
    activation=ReLU()
  elif act == 'leakyrelu':
    activation=LeakyReLU()

  flat_layer = None
  if flattening_layer == 'flatten'
    flat_layer = Flatten()
  elif flattening_layer == 'globalavgpooling'
    flat_layer = GlobalAveragePooling2D()

  early_stop = EarlyStopping(monitor='val_accuracy', patience=3, verbose=0)

  model = Sequential()
  model.add(Conv2D(16, kernel_size, input_shape=(32, 32, 3)))
  model.add(activation)
  model.add(Conv2D(16, kernel_size))
  model.add(activation)
  model.add(MaxPooling2D(2))
  model.add(Dropout(dropout_1))
  model.add(Conv2D(32, kernel_size))
  model.add(activation)
  model.add(Conv2D(64, kernel_size))
  model.add(activation)
  model.add(MaxPooling2D(2))
  model.add(Dropout(dropout_2))
  model.add(flat_layer)
  model.add(Dense(unit_size))
  model.add(activation)
  model.add(Dropout(dropout_3))
  model.add(Dense(10, activation='softmax'))

  model.compile(optimizer=opti, loss='categorical_crossentropy', metrics=['accuracy'])
  results = model.fit(x_train, y_train, batch_size=batch_size, epochs=100, verbose=0,
                      validation_data=(x_test, y_test), callbacks=[early_stop], shuffle=True)

  # model needs to be freed to avoid RAM issues in Colab
  backend.clear_session()

  best_val_acc = np.amax(results.history['val_accuracy']) 

  with open('hyperas-log.csv', 'a') as csv_file:
      csv_file.write(str(kernel_size) + ';')
      csv_file.write(str(dropout_1) + ';')
      csv_file.write(str(dropout_2) + ';')
      csv_file.write(str(dropout_3) + ';')
      csv_file.write(str(act) + ';')
      csv_file.write(str(unit_size) + ';')
      csv_file.write(str(opti) + ';')
      csv_file.write(str(flattening_layer) + ';')
      csv_file.write(str(batch_size) + ';')
      csv_file.write(str(best_val_acc) + '\n')


  return {'loss': -best_val_acc, 'status': STATUS_OK, 'model': model}

In [6]:
with open('hyperas-log.csv', 'w') as csv_file:
  csv_file.write("kernel_size" + ';')
  csv_file.write("dropout_1" + ';')
  csv_file.write("dropout_2" + ';')
  csv_file.write("dropout_3" + ';')
  csv_file.write("activation" + ';')
  csv_file.write("unit_size" + ';')
  csv_file.write("opti" + ';')
  csv_file.write("flattening_layer" + ';')
  csv_file.write("batch_size" + ';')
  csv_file.write("best_val_acc" + '\n')

We have both the model creating and the dataset creating functions, so we can start the optimiaztion process.

In [None]:
best_run, best_model = optim.minimize(model=create_model, 
                                      data=dataset, algo=tpe.suggest, 
                                      max_evals=100, trials=Trials(), 
                                      notebook_name="ProblemSet5-main/ProblemSet5", 
                                      verbose=True)

>>> Imports:
#coding=utf-8

try:
    import os
except:
    pass

try:
    from urllib.request import urlretrieve
except:
    pass

try:
    import requests
except:
    pass

try:
    import zipfile
except:
    pass

try:
    from tensorflow.keras.datasets import cifar10
except:
    pass

try:
    from tensorflow.keras.utils import to_categorical
except:
    pass

try:
    from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D, ReLU, LeakyReLU
except:
    pass

try:
    from tensorflow.keras import Sequential
except:
    pass

try:
    from tensorflow.keras.callbacks import EarlyStopping
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    import hyperas
except:
    pass

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

try:
    import pandas
except:
    pass

>>> Hyperas sear

Let's print the log in descending order with respect to 'best validation accuracy' and examine the results.

In [1]:
import pandas
df = pandas.read_csv('hyperas-log.csv', delimiter=';')
df.sort_values(by=['best_val_acc'], ascending=False)

Unnamed: 0,kernel_size,dropout_1,dropout_2,dropout_3,activation,unit_size,opti,batch_size,best_val_acc
29,3,0.03196,0.086911,0.049231,<tensorflow.python.keras.layers.advanced_activ...,64,adam,64,0.7505
46,4,0.127256,0.071844,0.41654,<tensorflow.python.keras.layers.advanced_activ...,64,adam,64,0.7404
34,3,0.135699,0.241943,0.107268,<tensorflow.python.keras.layers.advanced_activ...,64,adam,64,0.7396
20,3,0.017025,0.141627,0.17207,<tensorflow.python.keras.layers.advanced_activ...,64,adam,64,0.7367
33,3,0.044448,0.314108,0.16416,<tensorflow.python.keras.layers.advanced_activ...,128,adam,64,0.7342
25,3,0.096378,0.14415,0.038713,<tensorflow.python.keras.layers.advanced_activ...,64,adam,128,0.734
36,4,0.243746,0.238245,0.100664,<tensorflow.python.keras.layers.advanced_activ...,128,adam,64,0.7339
9,3,0.184321,0.192033,0.207181,<tensorflow.python.keras.layers.advanced_activ...,64,adam,128,0.7303
39,3,0.353507,0.293696,0.065324,<tensorflow.python.keras.layers.advanced_activ...,64,adam,64,0.728
27,3,0.003262,0.039924,0.032111,<tensorflow.python.keras.layers.advanced_activ...,64,adam,128,0.7264


#Observations:

> the best models mostly used adam as optimizer, and the worst models were using sgd

> The number of neurons in the last hidden layer were mostly 64 units in case of the best performing models

> 