# Jupyter Notebook to train a model

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import datetime

# use random seed to reproduce results
np.random.seed(42)
tf.random.set_seed(42)

2021-12-06 12:35:46.433961: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


lists possible devices (CPU, GPU), used to check if GPU is recognized/exists

In [2]:
tf.config.get_visible_devices()

2021-12-06 12:35:49.508380: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-12-06 12:35:49.515645: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-12-06 12:35:49.575862: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-06 12:35:49.576076: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.8225GHz coreCount: 20 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 298.32GiB/s
2021-12-06 12:35:49.576101: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2021-12-06 12:35:49.606929: I tensorflow/stream_executor/platform/d

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

atform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10
2021-12-06 12:35:49.623277: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10
2021-12-06 12:35:49.627922: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10
2021-12-06 12:35:49.658841: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.10
2021-12-06 12:35:49.663387: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.10
2021-12-06 12:35:49.722320: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
2021-12-06 12:35:49.722470: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, 

## Methods

### used to save a trained model as a json file and its weights as a h5 file

In [3]:
def save_model(model, model_name):
    my_model = model.to_json()
    with open(f'./saved_models/{model_name}.json', "w") as file:
        file.write(my_model)
    # serialize weights to HDF5
    model.save_weights(f'./saved_models/{model_name}_weights.h5')

### used to build the base model using predefined architectures
currently: vgg16, xception, resnet

In [23]:
def build_base_model(architecture, weights):
    input = tf.keras.Input(shape=(224, 224, 3))
    if architecture == 'vgg16':
        return tf.keras.applications.vgg16.VGG16(weights=weights,include_top=False,input_tensor=input)
    if architecture == 'xception':
        return tf.keras.applications.xception.Xception(weights=weights,include_top=False,input_tensor=input)
    if architecture == 'resnet':
        return tf.keras.applications.resnet.ResNet50(weights=weights,include_top=False,input_tensor=input)

### gets base model as input and builds a new top layer and returns the model with custom top layers

In [25]:
def build_model(base_model):
    flat = keras.layers.Flatten(name='flatten')(base_model.output)
    dense_1 = keras.layers.Dense(1000)(flat)
    dropout = keras.layers.Dropout(0.25)(dense_1)
    batch = keras.layers.BatchNormalization()(dropout)
    output = keras.layers.Dense(1, activation='sigmoid')(batch)
    return tf.keras.Model(base_model.input, output)

### gets a model as input and returns a model compiled with the adam optimizer

In [26]:
def compile_model(model, alpha, beta1, beta2, metrics):
    optimizer = keras.optimizers.Adam(learning_rate=alpha, beta_1=beta1, beta_2=beta2)
    model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=metrics)
    return model

### gets a model as input and trains it on the data-set with the defined callbacks and epochs

In [27]:
def train_model(model, train_set, test_set, epochs, callback):
    return model.fit(train_set,
                     validation_data=test_set,
                     epochs=epochs,
                     callbacks=callback)

### gets a model as input and changes its layers trainable attribute

In [28]:
def set_layers_trainable(trainable, input_model):
    for layer in input_model.layers:
        layer.trainable = trainable

## Data preprocessing

preprocessing of the images applied when loading image data set from disk with tensorflows flow_from_directory


In [4]:
image_gen = keras.preprocessing.image.ImageDataGenerator(rotation_range=20,  # rotate the image 20 degrees
                                                         width_shift_range=0.2,
                                                         height_shift_range=0.2,
                                                         rescale=1 / 255,  # Rescale the image by normalzing it.
                                                         shear_range=0.15,
                                                         # Shear means cutting away part of the image (max 20%)
                                                         zoom_range=0.15,  # Zoom in by 15% max
                                                         horizontal_flip=True,  # Allow horizontal flipping
                                                         fill_mode='nearest'
                                                         # Fill in missing pixels with the nearest filled value
                                                         )

path to the data set

In [29]:
train_data_path = './data/data_heavily_reduced/data_balanced/train'  #local notebook
test_data_path = './data/data_heavily_reduced/data_balanced/test'  #local notebook
validation_data_path = './data/data_heavily_reduced/data_balanced/val'  #local notebook

generate training set by loading the images from their directories with flow_from_directory
important: the folder structure has to match! i.e {train} -> {ok,def}
at the "same time" the data augmentation is applied on the images through the ImageDataGenerator

In [30]:
train_image_gen = image_gen.flow_from_directory(train_data_path,
                                                target_size=(224, 224),
                                                class_mode='binary')

Found 7999 images belonging to 2 classes.


In [31]:
valid_image_gen = image_gen.flow_from_directory(validation_data_path,
                                                target_size=(224, 224),
                                                class_mode='binary')

Found 999 images belonging to 2 classes.


In [32]:
test_image_gen = image_gen.flow_from_directory(test_data_path,
                                               target_size=(224, 224),
                                               class_mode='binary')

Found 1001 images belonging to 2 classes.


# Training of the model

* build base model with vgg16 architecture and pretrained with imagenet weights
* add custom layers on base model
* compile model with adam optimizer
* freeze layers of pretrained vgg16 base model to not destroy imagenet weights
* train model

In [33]:
vgg16 = build_base_model('vgg16', 'imagenet')
model = build_model(vgg16)
model = compile_model(model, 0.0001, 0.9, 0.999, ['accuracy', 'Recall', 'Precision', 'AUC'])
set_layers_trainable(False, vgg16)
history = train_model(model, train_image_gen, test_image_gen, 5, [])

Epoch 1/5

2021-12-06 13:08:19.587442: W tensorflow/core/common_runtime/bfc_allocator.cc:248] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.35GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2021-12-06 13:08:19.587496: W tensorflow/core/common_runtime/bfc_allocator.cc:248] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.01GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2021-12-06 13:08:19.733622: W tensorflow/core/common_runtime/bfc_allocator.cc:248] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.74GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2021-12-06 13:08:20.357028: W tensorflow/core/common_runtime/bfc_allocator.cc:248] Alloc

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


* un freeze layers of pretrained vgg16 base model
* define callbacks
    * tensorboard callback to generate reports which can be viewed in tensorboard
    * early stopping callback to stop training after monitored metric has not changed after, by the patience defined, epochs
* compile model with adam optimizer
* train model

In [None]:
set_layers_trainable(False, vgg16)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs")
custom_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

model = compile_model(model, 0.0001, 0.9, 0.999, ['accuracy', 'Recall', 'Precision', 'AUC'])
history = train_model(model, train_image_gen, test_image_gen, 100, [custom_early_stopping, tensorboard_callback])

2021-12-06 13:20:29.322337: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-12-06 13:20:29.322361: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-12-06 13:20:29.322631: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1365] Profiler found 1 GPUs
2021-12-06 13:20:29.331522: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcupti.so.10.1
2021-12-06 13:20:29.432615: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1415] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
2021-12-06 13:20:29.432775: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


Epoch 1/100
  1/250 [..............................] - ETA: 5:23 - loss: 0.0875 - accuracy: 0.9688 - recall: 0.9500 - precision: 1.0000 - auc: 0.9958

2021-12-06 13:20:31.076867: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-12-06 13:20:31.076888: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-12-06 13:20:31.077294: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1415] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI_ERROR_INSUFFICIENT_PRIVILEGES


  2/250 [..............................] - ETA: 1:09 - loss: 0.0695 - accuracy: 0.9766 - recall: 0.9607 - precision: 1.0000 - auc: 0.9969

2021-12-06 13:20:31.333021: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-12-06 13:20:31.337220: I tensorflow/core/profiler/internal/gpu/cupti_collector.cc:228]  GpuTracer has collected 0 callback api events and 0 activity events. 
2021-12-06 13:20:31.338865: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-12-06 13:20:31.342841: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./logs/train/plugins/profile/2021_12_06_13_20_31
2021-12-06 13:20:31.343501: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to ./logs/train/plugins/profile/2021_12_06_13_20_31/pop-os.trace.json.gz
2021-12-06 13:20:31.353036: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./logs/train/plugins/profile/2021_12_06_13_20_31
2021-12-06 13:20:31.355347: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped



# Evaluation of the model on the test set

In [None]:
loss, accuracy, recall, precision, auc = model.evaluate(test_image_gen)

In [None]:
# F1 score
f1 = 2 * ((precision * recall) / (precision + recall))

print(f"loss: {loss}, \n"
      f"accuracy: {accuracy}, \n"
      f"recall: {recall}, \n"
      f"precision: {precision}, \n"
      f"auc: {auc}, \n"
      f"F1: {f1}")

# Saving the model to use it for predictions, heatmaps, etc.

In [34]:
save_model(model, 'vgg16')

AttributeError: 'History' object has no attribute 'to_json'