In [1]:
import os
import gc
import numpy as np
from numba import cuda
import cv2
import tensorflow as tf
from tensorflow import keras
# import tensorflow_addons as tfa
import kerastuner as kt
from sklearn.model_selection import train_test_split
import random
from tqdm.notebook import tqdm
from tqdm.keras import TqdmCallback
# import tqdm

# # quietly deep-reload tqdm
# import sys
# from IPython.lib import deepreload
# stdout = sys.stdout
# sys.stdout = open('junk','w')
# deepreload.reload(tqdm)
# sys.stdout = stdout

physical_devices = tf.config.experimental.list_physical_devices('GPU')
print('Number of GPUs available: ', len(physical_devices))
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, enable=True) 
random.seed(19980603)

Number of GPUs available:  2


In [2]:
# gc.enable()
tf.__version__

'2.2.0'

In [3]:
def createImageData(data_dir, subdir, img_size=96):
    data = []
    Features = []
    Labels = []
    with tqdm(total=len(subdir), desc="Directory progress", position=0) as pbar:
        for directory in subdir:
            path = os.path.join(data_dir, directory)
            class_num = subdir.index(directory)
            images = list(filter(lambda x: x.endswith('jpeg'), os.listdir(path)))
            random.shuffle(images)
            l=int(len(images)/7)
            images = images[:l]
            with tqdm(total=len(images), desc="Reading images", position=1) as pbar2:
                for image in images:
                    image = os.path.join(path, image)
                    image_as_array = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
                    image_as_array = image_as_array/255.0
                    data.append([image_as_array, class_num])
                    pbar2.update()
            pbar.update()
#     random.shuffle(data)
    with tqdm(total=len(data), desc='Appending features and labels') as pbar3:
        for feature, label in data:
            Features.append(feature)
            Labels.append(label)
            pbar3.update()
    Features = np.array(Features).reshape((-1, img_size, img_size, 1))
#     Labels = np.asarray(Labels).astype('float32').reshape((-1,1))
    Labels = np.array(Labels).reshape((-1,1))
    return (Features, Labels)


def build_model(hp):
    HP_NUM_UNITS = hp.Int('units', min_value=32, max_value=64, step=16)
    HP_DROPOUT = hp.Choice('dropout', [0.2, 0.3, 0.4, 0.5])
    HP_OPTIMIZER = hp.Choice('optimizer', ['adam', 'rmsprop'])
    model = keras.models.Sequential([
        keras.layers.Conv2D(HP_NUM_UNITS, input_shape=(96,96,1,), kernel_size=(3,3), activation='relu'),
        keras.layers.Conv2D(HP_NUM_UNITS, kernel_size=(3,3), activation='relu'),
        keras.layers.MaxPool2D(pool_size=(2,2), strides=2),
        keras.layers.Dropout(HP_DROPOUT),
        keras.layers.Flatten(),
        keras.layers.Dense(units=128, activation='relu'),
#         keras.layers.Dropout(HP_DROPOUT),
        keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(
      optimizer=HP_OPTIMIZER,
      loss='binary_crossentropy',
      metrics=['accuracy'],
    )
    return model


class MyTuner(kt.tuners.BayesianOptimization):
    def run_trial(self, trial, *args, **kwargs):
        kwargs['epochs'] = trial.hyperparameters.Int('epochs', min_value=30, max_value=50, step=20)
        kwargs['batch_size'] = trial.hyperparameters.Choice('batch_size', [64, 128])
        super(MyTuner, self).run_trial(trial, *args, **kwargs)


class FreeMemoryCallback(keras.callbacks.Callback):
    def on_train_end(self, logs=None):
        tf.keras.backend.clear_session()
        gc.collect()
#     def on_epoch_begin(self):
#         tf.keras.backend.clear_session()

In [4]:
os.chdir('/home/campbell/Documents/Rudolph/data/')
dataset_directory = 'MetazoaZoonosisData/train'
data_subdirectories = ['human-false', 'human-true']

In [5]:
X, y = createImageData(data_dir=dataset_directory, subdir=data_subdirectories)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=19970912, test_size=0.25)

del X, y
print(X_train.shape)
print(X_test.shape)

Directory progress:   0%|          | 0/2 [00:00<?, ?it/s]

Reading images:   0%|          | 0/2857 [00:00<?, ?it/s]

Reading images:   0%|          | 0/27875 [00:00<?, ?it/s]

Appending features and labels:   0%|          | 0/30732 [00:00<?, ?it/s]

(23049, 96, 96, 1)
(7683, 96, 96, 1)


In [6]:
print(y_test.shape)
print(y_train.shape)

(7683, 1)
(23049, 1)


In [7]:
y_train

array([[1],
       [1],
       [1],
       ...,
       [1],
       [1],
       [1]])

In [8]:
y_train = np.asarray(y_train).astype('float32').reshape((-1,1))
y_test = np.asarray(y_test).astype('float32').reshape((-1,1))

In [9]:
# free_memory_callback = FreeMemoryCallback()
# progress_bar = TqdmCallback()
# stop_early_callback = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)

callbacks = [FreeMemoryCallback(), keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)]

In [10]:
# Uses same arguments as the BayesianOptimization Tuner.
tuner = MyTuner(build_model,
#                 overwrite=True,
                seed=710219,
                objective='val_accuracy',
                directory='logs',
                project_name='BayOptHparams',
                max_trials=5
                )

INFO:tensorflow:Reloading Oracle from existing project logs/BayOptHparams/oracle.json
INFO:tensorflow:Reloading Tuner from logs/BayOptHparams/tuner0.json


In [11]:
tuner.search_space_summary()

In [12]:
tuner.search(X_train, y_train, validation_split=0.2, verbose=2, callbacks=callbacks)

Epoch 1/30
145/145 - 30s - loss: 0.4162 - accuracy: 0.8824 - val_loss: 0.2726 - val_accuracy: 0.9082
Epoch 2/30
145/145 - 22s - loss: 0.1809 - accuracy: 0.9313 - val_loss: 0.1077 - val_accuracy: 0.9536
Epoch 3/30
145/145 - 22s - loss: 0.1173 - accuracy: 0.9534 - val_loss: 0.0814 - val_accuracy: 0.9659
Epoch 4/30
145/145 - 22s - loss: 0.0818 - accuracy: 0.9714 - val_loss: 0.5642 - val_accuracy: 0.7538
Epoch 5/30
145/145 - 23s - loss: 0.0639 - accuracy: 0.9759 - val_loss: 0.0629 - val_accuracy: 0.9761
Epoch 6/30
145/145 - 23s - loss: 0.0471 - accuracy: 0.9813 - val_loss: 0.0642 - val_accuracy: 0.9764
Epoch 7/30
145/145 - 23s - loss: 0.0387 - accuracy: 0.9863 - val_loss: 0.0645 - val_accuracy: 0.9809
Epoch 8/30
145/145 - 23s - loss: 0.0324 - accuracy: 0.9880 - val_loss: 0.0482 - val_accuracy: 0.9809
Epoch 9/30
145/145 - 23s - loss: 0.0231 - accuracy: 0.9922 - val_loss: 0.0554 - val_accuracy: 0.9857
Epoch 10/30
145/145 - 23s - loss: 0.0200 - accuracy: 0.9939 - val_loss: 0.0729 - val_accura

INFO:tensorflow:Oracle triggered exit


In [13]:
tuner.results_summary()

In [14]:
best_model = tuner.get_best_models()[0]

In [15]:
best_model.get_config()

{'name': 'sequential',
 'layers': [{'class_name': 'Conv2D',
   'config': {'name': 'conv2d',
    'trainable': True,
    'batch_input_shape': (None, 96, 96, 1),
    'dtype': 'float32',
    'filters': 48,
    'kernel_size': (3, 3),
    'strides': (1, 1),
    'padding': 'valid',
    'data_format': 'channels_last',
    'dilation_rate': (1, 1),
    'activation': 'relu',
    'use_bias': True,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None}},
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'kernel_regularizer': None,
    'bias_regularizer': None,
    'activity_regularizer': None,
    'kernel_constraint': None,
    'bias_constraint': None}},
  {'class_name': 'Conv2D',
   'config': {'name': 'conv2d_1',
    'trainable': True,
    'dtype': 'float32',
    'filters': 48,
    'kernel_size': (3, 3),
    'strides': (1, 1),
    'padding': 'valid',
    'data_format': 'channels_last',
    'dilation_rate': (1, 1),
    'activation': 'relu',
    'use

In [16]:
best_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 94, 94, 48)        480       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 92, 92, 48)        20784     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 46, 46, 48)        0         
_________________________________________________________________
dropout (Dropout)            (None, 46, 46, 48)        0         
_________________________________________________________________
flatten (Flatten)            (None, 101568)            0         
_________________________________________________________________
dense (Dense)                (None, 128)               13000832  
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 1