In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import time
import pydot
import graphviz
from clr_callback import CyclicLR

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.python.client import device_lib 
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet101V2
from tensorflow.keras.applications.resnet_v2 import preprocess_input
from tensorflow.keras.models import save_model, load_model

tf.random.set_seed(42)

train_directory = "./data/organized/train/"
val_directory = "./data/organized/val/"
test_directory = "./data/organized/test/"

In [2]:
# Allow Tensorflow to allocate GPU memory as needed, rather than pre-allocating the entire GPU memory at the start of program execution.
# This option allows for better monitoring of system resource utilization.
physical_devices = tf.config.list_physical_devices('GPU')

tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [4]:
# Function to plot the resnet architecture.
# tf.keras.utils.plot_model(res101_base, show_shapes=True, expand_nested=True, show_dtype=True)

### Creating tf datasets

In [5]:
# ===============================================================================================================
# This function creates training, validation and test datasets using the file structure created in the
# 01_create_train_val_test_directories notebook. 
# ===============================================================================================================
def create_tensorflow_datasets(image_size, train_directory, val_directory, test_directory, batch_size=32):
    
    train_dataset = image_dataset_from_directory(directory = train_directory,
                                                 labels='inferred',
                                                 label_mode = 'int',
                                                 image_size=image_size,
                                                 batch_size=batch_size,
                                                 smart_resize=True)

    val_dataset = image_dataset_from_directory(directory = val_directory,
                                               labels='inferred',
                                               label_mode = 'int',
                                               image_size=image_size,
                                               batch_size=batch_size,
                                               smart_resize=True)

    test_dataset = image_dataset_from_directory(directory = test_directory,
                                                labels = "inferred",
                                                label_mode = "int",
                                                image_size=image_size,
                                                batch_size=batch_size,
                                                smart_resize=True)
    
    return train_dataset, val_dataset, test_dataset

In [6]:
train_dataset, val_dataset, test_dataset = create_tensorflow_datasets(image_size=(520, 520),
                                                                      train_directory=train_directory,
                                                                      val_directory=val_directory,
                                                                      test_directory=test_directory,
                                                                      batch_size=32)

Found 10520 files belonging to 196 classes.
Found 3234 files belonging to 196 classes.
Found 2431 files belonging to 196 classes.


### Resnet Modeling Functions

In [7]:
# ===============================================================================================================
# This function is used to instantiate either the Adam or RMSProp optimizers with the desired learning rate.
# ===============================================================================================================
def get_optimizer(optimizer_name, lr):
    
    if optimizer_name == 'rmsprop':
        
        optimizer = tf.keras.optimizers.RMSProp(learning_rate = lr)
    
    elif optimizer_name == 'adam':
        
        optimizer = tf.keras.optimizers.Adam(learning_rate = lr)
        
    return optimizer

In [8]:
# ===============================================================================================================
# This function is used to generate a unique filename to save the "best model" found during training.
# ===============================================================================================================
def get_model_save_path(optimizer, lr, epochs, batch_size, model_name):
    
    optimizer_string = "Adam_Cyclic_Decay_4e-4_4e-6"
    time_stamp = time.strftime("%Y_%m_%d-%H_%M_%S")
    
    model_save_path = os.path.join(os.getcwd(), f"trained_models/convnet/{time_stamp}_{model_name}_E{epochs}_O{optimizer_string}_B{batch_size}.keras")
    
    return model_save_path

In [9]:
# ===============================================================================================================
# This function insantiates and compiles a model that contains the following:
#
# 1. A pretrained Resnet101 base model with all weights frozen.
# 2. A set of keras preprocessing layers to perform random data augmentations.
# 3. A "model top" (output dense classifier) that needs to be trained.
# ===============================================================================================================
def build_resnet_classifier(input_shape, optimizer, learning_rate, metrics):
    
    
    optimizer = get_optimizer(optimizer, learning_rate)

    res101_base = keras.applications.ResNet101V2(weights='imagenet',
                                                 input_shape = input_shape,
                                                 include_top=False)
    
    # Freeze the resnet backbone.
    res101_base.trainable = False
    
    # Create a layer that is a set of data augmentations.
    data_augmentation = keras.Sequential([layers.experimental.preprocessing.RandomFlip("horizontal"),
                                          layers.experimental.preprocessing.RandomRotation(0.2),
                                          layers.experimental.preprocessing.RandomZoom(0.2),
                                          layers.experimental.preprocessing.RandomTranslation(height_factor=0.2,
                                                                                              width_factor=0.2)])
    
    # Input layer
    inputs = keras.Input(shape=input_shape)
    
    # Perform data augmentation
    x = data_augmentation(inputs)
    
    # Preprocess the images the way resnet101 expects them.
    x = preprocess_input(x)
    
    # Pass the input to the resnet101 backbone.
    # Setting training = False tells the resnet to run its forward pass in inference mode
    # rather than training mode.
    x = res101_base(x, training = False)
    
    x = layers.GlobalAvgPool2D()(x)
    
    x = layers.Flatten()(x)
    
    x = layers.Dense(480, activation='relu')(x)
    
    x = layers.Dropout(0.6)(x)
    
    outputs = layers.Dense(196, activation='softmax')(x)
    
    model = keras.Model(inputs, outputs)
    
    # Compile the model
    model.compile(loss = SparseCategoricalCrossentropy(),
                  optimizer=optimizer,
                  metrics=metrics)
    
    return model

In [10]:
# ===============================================================================================================
# This function is used for the following:
#
# 1. Setup callbacks and fit the model instantiated by the function above.
# 2. Save the model history attribtue after training is completed.
# ===============================================================================================================
def train_convnet_classifier(model, train_ds, val_ds, epochs=20, model_save_path=None):
    
    clr = CyclicLR(base_lr = 6e-6,
                   max_lr = 1.2e-3,
                   step_size = 1645,
                   mode='exp_range',
                   gamma=0.99997)
    
    callbacks = [keras.callbacks.ModelCheckpoint(filepath=model_save_path,
                                                 save_best_only=True,
                                                 monitor="val_loss",
                                                 verbose=1),
                 clr]
    
    history = model.fit(train_ds,
                        epochs=epochs,
                        validation_data=val_ds,
                        callbacks=callbacks)
    
    try:
        history_save_path = model_save_path.split(".")[0] + "_HISTORY.csv"
        df = pd.DataFrame(history.history)
        df.to_csv(history_save_path, index=False)
    except:
        print("Couldn't save history!")
        
    try:
        lr_history_save = model_save_path.split(".")[0] + "_LR_HISTORY.csv"
        df = pd.DataFrame(clr.history)
        df.to_csv(lr_history_save, index=False)
    except:
        print("Couldn't save LR history!!!")
        
    try:
        final_save_path = model_save_path.split(".")[0] + "_FINAL_SAVE.keras"
        save_model(model=model, filepath=final_save_path, overwrite=True, include_optimizer=True, save_format='tf')
    except:
        print("Couldn't save model!")
    
    return history, clr

In [11]:
# ===============================================================================================================
# This function takes as input a tensorflow dataset containing test data, and either a trained model
# or the path to where a trained model is located. 
#
# The function then evaluates the model using the test data and returns the associated test metrics.
# ===============================================================================================================
def test_convnet_classifier(test_ds, model=None, model_path=None):
    
    if model is not None:
        
        test_loss, test_acc = model.evaluate(test_ds)
        print("\n========================== Model Test Results ===============================")
        print(f"Test Accuracy: {test_acc}")
        print(f"Test Loss: {test_loss}")
        print("=============================================================================\n")
        
    elif model_path is not None:
        
        model = keras.models.load_model(model_path)
        test_loss, test_acc = model.evaluate(test_dataset)
        print("\n========================== Model Test Results ===============================")
        print(f"Test Accuracy: {test_acc}")
        print(f"Test Loss: {test_loss}")
        print("=============================================================================\n")
        
    else:
        print("\n========================== Error ===============================")
        print("Must pass either a trained model or a path to a trained model file.")
        print("Cannot have both model and model_path = None")
        print("=============================================================================\n")
        return -1
    
    return test_loss, test_acc, model

In [12]:
# ===============================================================================================================
# This function uses all the other functions defined above to drive the entire model training process.
# The full process implemented by this function is as follows:
#
# 1. Instantiate and compile the model using the build_convnet_classifier function.
# 2. Generate a uniue filepath to save the best model found during training.
# 3. Train the model and save the history attribute after training.
# 4. Evaluate the best model on the test data.
# ===============================================================================================================
def build_and_train_resnet(train_ds, val_ds, test_ds = None, input_shape=(520, 520, 3), optimizer='adam', metrics=['accuracy'],
                           epochs=20, batch_size=32, lr = 0.001, model_name = 'resnet101_ARCH2_LR_DECAY_CYCLIC'):
    
    
    # Build and compile the model 
    model = build_resnet_classifier(input_shape=input_shape, optimizer=optimizer, learning_rate=lr, metrics=metrics)
    
    print(model.summary())
    
    model_save_path = get_model_save_path(optimizer=optimizer, lr=lr, epochs=epochs, batch_size=batch_size, model_name=model_name)
    
    # Train the model
    training_history, learning_rate_history = train_convnet_classifier(model, train_ds, val_ds, epochs=epochs, model_save_path=model_save_path)
    
    if test_ds is not None:
        
        test_loss, test_acc, best_model = test_convnet_classifier(test_ds, model=None, model_path=model_save_path)
    
    return training_history, learning_rate_history, best_model

In [13]:
history, lr_history, model = build_and_train_resnet(train_ds=train_dataset,
                                         val_ds = val_dataset,
                                         test_ds = test_dataset,
                                         input_shape = (520, 520, 3),
                                         optimizer = 'adam',
                                         metrics=['accuracy'],
                                         lr = 6e-6,
                                         epochs=300,
                                         batch_size=32,
                                        model_name = 'LR_DECAY_1_2e-3_6e-6')

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 520, 520, 3)]     0         
_________________________________________________________________
sequential (Sequential)      (None, 520, 520, 3)       0         
_________________________________________________________________
tf.math.truediv (TFOpLambda) (None, 520, 520, 3)       0         
_________________________________________________________________
tf.math.subtract (TFOpLambda (None, 520, 520, 3)       0         
_________________________________________________________________
resnet101v2 (Functional)     (None, 17, 17, 2048)      42626560  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
flatten (Flatten)            (None, 2048)              0     



Epoch 2/300

Epoch 00002: val_loss improved from 5.17362 to 4.73771, saving model to C:\Users\Braden\Desktop\Data_Science\04_General_Assembly\05_Projects\03_car\trained_models/convnet\2021_07_24-09_17_59_LR_DECAY_1_2e-3_6e-6_E300_OAdam_Cyclic_Decay_4e-4_4e-6_B32.keras
Epoch 3/300

Epoch 00003: val_loss improved from 4.73771 to 4.19969, saving model to C:\Users\Braden\Desktop\Data_Science\04_General_Assembly\05_Projects\03_car\trained_models/convnet\2021_07_24-09_17_59_LR_DECAY_1_2e-3_6e-6_E300_OAdam_Cyclic_Decay_4e-4_4e-6_B32.keras
Epoch 4/300

Epoch 00004: val_loss improved from 4.19969 to 3.79812, saving model to C:\Users\Braden\Desktop\Data_Science\04_General_Assembly\05_Projects\03_car\trained_models/convnet\2021_07_24-09_17_59_LR_DECAY_1_2e-3_6e-6_E300_OAdam_Cyclic_Decay_4e-4_4e-6_B32.keras
Epoch 5/300

Epoch 00005: val_loss improved from 3.79812 to 3.53814, saving model to C:\Users\Braden\Desktop\Data_Science\04_General_Assembly\05_Projects\03_car\trained_models/convnet\2021_07_2