In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import time
import keras_tuner as kt
import tensorboard
from functools import partial

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.python.client import device_lib 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet101V2, Xception, InceptionResNetV2
from tensorflow.keras.applications import resnet_v2, xception, inception_resnet_v2
from tensorflow.keras.models import save_model, load_model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.losses import SparseCategoricalCrossentropy

tf.random.set_seed(42)

global PROJECT_DIRECTORY
PROJECT_DIRECTORY = os.getcwd()

train_directory = "./data/organized/train/"
val_directory = "./data/organized/val/"
test_directory = "./data/organized/test/"

In [2]:
# Allow Tensorflow to allocate GPU memory as needed, rather than pre-allocating the entire GPU memory at the start of program execution.
# This option allows for better monitoring of system resource utilization.
physical_devices = tf.config.list_physical_devices('GPU')

tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

## Creating a tensorflow dataset from TFRecord Files

In [4]:
# ===============================================================================================================
# This function is used to return the file paths to the tensorflow record shard files that contain the precalculated
# base model (e.g. resnet) outputs. These files containing the precalculated outputs are created in the 
# 03_save_pretrained_model_outputs_tfr notebook.
#
# Additional logic has been implemented to give unique names to these files that would be useful if it was 
# ever desired to create multiple datasets worth of precalculated outputs (using randomly augmented image
# variations) and implement a system where the datasets are swapped out at the end of each epoch. The idea behind
# using such a system would be to better simulate the enviornment where the base model (resnet) is in the loop
# and therefore data augmentation can be utilized as a preprocessing layer in the network. 
#
# Using data augmentation as a preprocessing layer is highly desireable because it randomly augments the training
# images at each training iteration, which results in the model being unlikely to ever see the same image twice. 
#
# Unfortunately, using data augmentation as a preprocessing layer is not possible when precalculated resnet outputs
# are utilized (and the resnet is removed from the loop) because the resnet is no longer there to continually provide
# outputs for the randomly augmented images.
#
# The challenge with leaving the base model in the loop is that each training iteration becomes signiciantly more
# expensive, because the most time consuming part of the network is processing each image through the large base model.
# This increased overhead that occurs when leaving the base model in the loop makes it difficult to quickly
# try out different architectures and hyperparameter options.
# ===============================================================================================================
def get_all_tf_shard_paths(base_model_type, input_shape, output_pooling, epoch_num, dataset_type):
    
    global PROJECT_DIRECTORY
    
    valid_dataset_types = ['train', 'val', 'test']
    
    if dataset_type not in valid_dataset_types:
        print("/n===========================================================")
        print("Invalid input for parameter dataset_type")
        print(f"Valid inputs are: {valid_dataset_types}")
        print("===========================================================\n")
        return -1
    
    
    base_outputs_directory = os.path.join(os.path.join(PROJECT_DIRECTORY, "pretrained_model_output_features"),
                                          f"{base_model_type}_pool{str(output_pooling)}_inShape_{str(input_shape)}")
    
    
    all_dirs = os.listdir(base_outputs_directory)
    
    correct_dir = [directory for directory in all_dirs if (dataset_type in directory) and (f'epoch_{epoch_num}' in directory)]
        
    if len(correct_dir) != 1:
        print("/n============================ Error ===============================")
        print("Invalid directory filtering!")
        print(f"Filtering returned: {correct_dir}")
        return -1
        print("=========================================================================\n")
            
    correct_dir = correct_dir[0]
        
    filepath = os.path.join(base_outputs_directory, correct_dir)
    
    files = os.listdir(filepath)
    
    full_file_paths = [os.path.join(filepath, file) for file in files]
    
    return full_file_paths

In [5]:
# ===============================================================================================================
# This function is used to parse the tfrecord files containing the precalculated resnet outputs and reshape the
# tensors into the format that would be provided if the base model were left in the network.
#
# ===============================================================================================================
def parse_tfrecords(example):
    
    feature_description = {
        "dim_1": tf.io.FixedLenFeature([], tf.int64),
        "dim_2": tf.io.FixedLenFeature([], tf.int64),
        "dim_3": tf.io.FixedLenFeature([], tf.int64),
        "raw_image": tf.io.FixedLenFeature([], tf.string),
        "label": tf.io.FixedLenFeature([], tf.int64),
    }
    
    example = tf.io.parse_single_example(example, feature_description)
    
    dim_1 = example['dim_1']
    dim_2 = example['dim_2']
    dim_3 = example['dim_3']
    raw_image = example['raw_image']
    label = example['label']
    
    feature = tf.io.parse_tensor(raw_image, out_type=tf.float32)
    feature = tf.reshape(feature, shape=[dim_1, dim_2, dim_3])
    
    
    return (feature, label)

In [6]:
# ===============================================================================================================
# This function is not utilized because the SparseCategoricalCrossentropy loss function utilized by the 
# build model function expects the target labels to be integers rather than one-hot encoder. 
#
# If however, there was a desire to instead utilized one-hot encoded target labels and along with the similar
# categorical_crossentropy loss function, this one_hot_encode_label helper function could be utilize to efficiently
# perform the encoding as the data is read in.
# 
# ===============================================================================================================
def one_hot_encode_label(features, label):
    label = tf.one_hot(tf.cast(label, tf.int32), 196)
    return features, label

In [7]:
# ===============================================================================================================
# This function uses the helper functions defined above to drive the entire process of obtaining the filepaths
# for a desired set of tfrecord files, parsing the files and storing the information in a tensorflow dataset.
#
# ===============================================================================================================
def get_dataset_from_multiple_tfrecords(base_model_type, dataset_type, batch_size, input_shape, output_pooling, epoch_num, compression = "ZLIB"):
    
    filepaths = get_all_tf_shard_paths(base_model_type = base_model_type,
                                     input_shape = input_shape,
                                     output_pooling = output_pooling,
                                     epoch_num = epoch_num,
                                     dataset_type = dataset_type)
    
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    #create the dataset
    dataset = tf.data.TFRecordDataset(filepaths, compression_type = compression,num_parallel_reads=AUTOTUNE)

    #pass every single feature through our mapping function
    dataset = dataset.map(parse_tfrecords).shuffle(batch_size * 10).batch(32).prefetch(AUTOTUNE)
    
    # Uncomment if one-hot encoded targets are desired
    #dataset = dataset.map(lambda x, y : (x, one_hot_encode_label(y)))
    
    return dataset

### Custom file swapping sequence

In [10]:
# ===============================================================================================================
# The code in this cell is not currently utilized, but is left here as a starting point if there is a desire to
# implement the idea of swapping out unique copies of precalculated output data at each epoch (or some multiple 
# of epochs).
# 
# ===============================================================================================================
'''
class customSequence(Sequence):

    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.epoch = 0
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

        return np.array([
            resize(imread(file_name), (200, 200))
               for file_name in batch_x]), np.array(batch_y)

    def on_epoch_end(self):
        if self.epoch % N == 0:
            pass
        
        # modify data
        self.epoch += 1''';

### Tuning

In [11]:
#===================================================================================
# Model building function to pass to keras tuner.
#===================================================================================
def tune_output_classifier(hp):
    
    # Number of Dense layers before the final Dense classifier. 
    num_layers = hp.Choice('num_layers', values=[0, 1, 2, 3])
    
    # These are always the first three layers no matter what. 
    inputs = keras.Input(shape=(17, 17, 2048))
    
    x = layers.GlobalAvgPool2D()(inputs)
    
    x = layers.Flatten()(x)
    
    #===================================================================================
    # Layer 3
    # If there are three Dense layers before the final dense classifier.
    #===================================================================================   
    if hp.get('num_layers') >= 3:
        with hp.conditional_scope("num_layers", [3]):
                 
            # L2 regularization on the layers weights and biases.
            layer3_kreg = hp.Choice('layer3_kreg', values = [0.0, 0.01, 0.001, 0.0001])
            layer3_breg = hp.Choice('layer3_breg', values = [0.0, 0.01, 0.001, 0.0001])
            
            # Number of hidden units in the dense layer 3.
            layer3_hidden_units = hp.Int('layer3_hidden_units',
                                         min_value=768,
                                         max_value=1024,
                                         step=32,
                                         default=1024)
            
            x = layers.Dense(layer3_hidden_units,
                             activation="relu",
                             kernel_regularizer=l2(layer3_kreg),
                             bias_regularizer=l2(layer3_breg))(x)
            
            # Dropout applied to the layers output.
            #layer3_dropout = hp.Choice('layer3_dropout', values=[0.35, 0.4, 0.5], default = 0.5)
            layer3_dropout = hp.Fixed('layer3_dropout', value=0.5)
            x = layers.Dropout(layer3_dropout)(x)
    
    #===================================================================================
    # Layer 2
    # If there are two or more dense layers before the final dense classifier.
    #===================================================================================   
    if hp.get('num_layers') >= 2:
        with hp.conditional_scope("num_layers", [2, 3]):
                
            # L2 regularization on the layers weights and biases.
            layer2_kreg = hp.Choice('layer2_kreg', values = [0.0, 0.01, 0.001, 0.0001])
            layer2_breg = hp.Choice('layer2_breg', values = [0.0, 0.01, 0.001, 0.0001])
            
            # Options for the number of hidden units in Dense layer 2
            layer2_hidden_units = hp.Int('layer2_hidden_units',
                                         min_value=512,
                                         max_value=768,
                                         step=32,
                                         default=512)
            
            # Adding the dense layer
            x = layers.Dense(layer2_hidden_units,
                             activation="relu",
                             kernel_regularizer=l2(layer2_kreg),
                             bias_regularizer=l2(layer2_breg))(x)
            
            # Dropout applied to the output of layer 2.
            #layer2_dropout = hp.Choice('layer2_dropout', values=[0.35, 0.4, 0.5], default = 0.5)
            layer2_dropout = hp.Fixed('layer2_dropout', value=0.5)
            x = layers.Dropout(layer2_dropout)(x)
    
    #===================================================================================
    # Layer 1
    # If there are one or more dense layers before the final dense classifier.
    #===================================================================================        
    if hp.get('num_layers') >= 1:
        with hp.conditional_scope("num_layers", [1, 2, 3]):
            
            # L2 regularization on the layers weights and biases.
            layer1_kreg = hp.Choice('layer1_kreg', values = [0.0, 0.01, 0.001, 0.0001])
            layer1_breg = hp.Choice('layer1_breg', values = [0.0, 0.01, 0.001, 0.0001])
            
            # Options for the number of hidden units in layer 2
            layer1_hidden_units = hp.Int('layer1_hidden_units',
                                         min_value=224,
                                         max_value=512,
                                         step=32,
                                         default=256)

            x = layers.Dense(layer1_hidden_units,
                             activation="relu",
                             kernel_regularizer=l2(layer1_kreg),
                             bias_regularizer=l2(layer1_breg))(x)
    
    # Dropout applied to the output of layer 2.
    #layer1_dropout = hp.Choice('layer1_dropout', values=[0.35, 0.4, 0.5], default = 0.5)
    layer1_dropout = hp.Fixed('layer1_dropout', value=0.5)
    x = layers.Dropout(layer1_dropout)(x)
    
    
    # Final Dense Classifier.
    outputs = layers.Dense(196, activation = 'softmax')(x)
    
    
    # Instantiate the model.
    model = keras.Model(inputs, outputs)
    
    learning_rate = hp.Choice(name='learning_rate',
                              values = [0.001, 0.002, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009, 0.00009])
    
    model.compile(loss = 'sparse_categorical_crossentropy',
                  optimizer=Adam(learning_rate=learning_rate),
                  metrics=  ['accuracy'])
    
    return model

In [20]:
#===================================================================================
# Model building function to pass to keras tuner.
#===================================================================================
def tune_output_classifier_experiment_two(hp):
    
    # These are always the first three layers no matter what. 
    inputs = keras.Input(shape=(17, 17, 2048))
    
    pool_type = hp.Choice('pooling_strategy', values=['avg', 'max'])
    
    if pool_type == 'avg':
        x = layers.GlobalAvgPool2D()(inputs)
    elif pool_type == 'max':
        x = layers.GlobalMaxPooling2D()(inputs)
    
    x = layers.Flatten()(x)
    
    # Options for the number of hidden units in layer 1
    layer1_hidden_units = hp.Int('layer1_hidden_units',
                                 min_value=256,
                                 max_value=512,
                                 step=32)

    x = layers.Dense(layer1_hidden_units,
                             activation="relu")(x)
    
    # Dropout applied to the output of layer 2.
    x = layers.Dropout(0.5)(x)
    
    # Final Dense Classifier.
    outputs = layers.Dense(196, activation = 'softmax')(x)
    
    # Instantiate the model.
    model = keras.Model(inputs, outputs)
    
    learning_rate = hp.Float(name='learning_rate',
                             min_value = 0.0001,
                             max_value = 0.002,
                             sampling = 'log')
    
    model.compile(loss = 'sparse_categorical_crossentropy',
                  optimizer=Adam(learning_rate=learning_rate),
                  metrics=  ['accuracy'])
    
    return model

In [21]:
# ===============================================================================================================
# This function is used to instantiate and run a keras tuner based on a provided model building function and 
# a path that specifies the location of tf datasets containing precalculated base model (e.g. resnet) outputs.
# ===============================================================================================================
def tune_driver(base_model_type, input_shape, max_epochs, tuner_iterations, tuner_reduction_factor, base_directory, tb_directory,
                verbose=True, patience=5, model_builder_func=tune_output_classifier, output_pooling=None, file_compression = 'ZLIB',
                batch_size = 32, expierment_name = ''):
    
    
    
    if verbose:
        start_time = time.time()
        print("\n=======================================================================================")
        print(f"Getting training dataset containing output features for the {base_model_type} model.")
        print("=======================================================================================\n")
    
    train_ds = get_dataset_from_multiple_tfrecords(base_model_type = base_model_type,
                                                   dataset_type = 'train',
                                                   batch_size = batch_size,
                                                   input_shape = input_shape,
                                                   output_pooling = output_pooling,
                                                   epoch_num = 1,
                                                   compression = file_compression)
    
    if verbose:
        print("\n=======================================================================================")
        print(f"Getting validation dataset containing output features for the {base_model_type} model.")
        print("=======================================================================================\n")
        
        
    val_ds = get_dataset_from_multiple_tfrecords(base_model_type = base_model_type,
                                                   dataset_type = 'val',
                                                   batch_size = batch_size,
                                                   input_shape = input_shape,
                                                   output_pooling = output_pooling,
                                                   epoch_num = 1,
                                                   compression = file_compression)
    
    
    if verbose:
        print("\n=======================================================================================")
        print("Finished loading training and validation sets.")
        print(f"Total time to load datasets: {time.time() - start_time}\n")
        print(f"Insantiating Keras Tuner.")
        print("=======================================================================================\n")

    
    if False:
        print("\n=======================================================================================")
        inspect_tf_dataset(train_ds)
        pause_for_input = input("Press any key to start the search process, or press 'q' to quit: ")
        if pause_for_input == 'q':
            return
        else:
            print("Starting the search!")
        print("=======================================================================================\n")
        
    tuner = kt.Hyperband(hypermodel=model_builder_func,
                         max_epochs=max_epochs,
                         objective=kt.Objective('accuracy', direction="max"), 
                         factor=tuner_reduction_factor,
                         directory=os.path.join(base_directory, f"keras_tuning_{expierment_name}{base_model_type}_{output_pooling}/"),
                         hyperband_iterations = tuner_iterations,
                         project_name=f"kt_hyperband_tuning_{base_model_type}")
    
    if verbose:
        print("\n=======================================================================================")
        print(f"Tuner Search Space Summary:\n")
        print(tuner.search_space_summary())
        pause_for_input = input("Press any key to start the search process, or press 'q' to quit: ")
        if pause_for_input == 'q':
            return
        else:
            print("Starting the search!")
        print("=======================================================================================\n")
    
    callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss',
                                               patience=10),
                 keras.callbacks.TensorBoard(tb_directory)]
    
    tuner.search(train_ds,
                 epochs=max_epochs,
                 validation_data=val_ds,
                 callbacks=callbacks)
    
    return tuner

### Tuning the output classifier for the Resnet models

In [18]:
'''
time_stamp = time.strftime("%Y_%m_%d-%H_%M_%S")
tb_callback_directory = os.path.join(PROJECT_DIRECTORY, f"tb_logs_kt_resnet_{time_stamp}")
#os.makedirs(tb_callback_directory, exist_ok=True)

#%load_ext tensorboard
#%tensorboard --logdir tb_callback_directory

resnet_tuner = tune_driver(base_model_type = 'resnet101',
                           input_shape = (520, 520, 3),
                           max_epochs = 10,
                           tuner_iterations = 1,
                           tuner_reduction_factor = 3,
                           base_directory = PROJECT_DIRECTORY,
                           tb_directory=tb_callback_directory)
                           ''';

In [28]:
#%load_ext tensorboard
#%tensorboard --logdir tb_callback_directory

In [22]:
time_stamp = time.strftime("%Y_%m_%d-%H_%M_%S")
tb_callback_directory = os.path.join(PROJECT_DIRECTORY, f"tb_logs_kt_resnet_{time_stamp}")
os.makedirs(tb_callback_directory, exist_ok=True)

#%load_ext tensorboard
#%tensorboard --logdir tb_callback_directory

resnet_tuner = tune_driver(base_model_type = 'resnet101',
                           input_shape = (520, 520, 3),
                           max_epochs = 10,
                           tuner_iterations = 1,
                           tuner_reduction_factor = 3,
                           base_directory = PROJECT_DIRECTORY,
                           tb_directory=tb_callback_directory,
                           expierment_name = 'CONFIG2',
                           model_builder_func=tune_output_classifier_experiment_two)

Trial 30 Complete [00h 12m 03s]
accuracy: 0.008460075594484806

Best accuracy So Far: 0.4495247006416321
Total elapsed time: 02h 25m 03s
INFO:tensorflow:Oracle triggered exit


In [29]:
resnet_tuner.results_summary(num_trials=1)

Results summary
Results in C:\Users\Braden\Desktop\Data_Science\04_General_Assembly\05_Projects\03_car\keras_tuning_CONFIG2resnet101_None/kt_hyperband_tuning_resnet101
Showing 1 best trials
Objective(name='accuracy', direction='max')
Trial summary
Hyperparameters:
pooling_strategy: avg
layer1_hidden_units: 480
learning_rate: 0.0003419781419918064
tuner/epochs: 10
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.4495247006416321


In [24]:
def get_test_data_performance(tuner, base_model_type, input_shape=(520, 520, 3), verbose=True, output_pooling=None, epoch_num=1,
                              file_compression = 'ZLIB', batch_size=32):
    
    if verbose:
        print("\n=======================================================================================")
        print(f"Getting test data for the {base_model_type} model.")
        print("=======================================================================================\n")
        
    train_ds = get_dataset_from_multiple_tfrecords(base_model_type = base_model_type,
                                                   dataset_type = 'test',
                                                   batch_size = batch_size,
                                                   input_shape = input_shape,
                                                   output_pooling = output_pooling,
                                                   epoch_num = epoch_num,
                                                   compression = file_compression)
    
    
    best_model = tuner.get_best_models(num_models=1)[0]
    
    if verbose:
        print("\n=======================================================================================")
        print(f"Best Hyperparams:\n {tuner.get_best_hyperparameters(num_trials=1)[0]}\n\n")
        print("Performing evaluation...")
        print("=======================================================================================\n")
    
    evaluation_results = best_model.evaluate(train_ds)
    
    if verbose:
        print("\n=======================================================================================")
        print(f"Evaluation Results:\n {evaluation_results}")
        print("=======================================================================================\n")
    
    return best_model, evaluation_results

In [25]:
best_resnet_classifier, resnet_test_eval = get_test_data_performance(tuner = resnet_tuner,
                                                                     base_model_type = 'resnet101',
                                                                     output_pooling=None,
                                                                     epoch_num = 1,
                                                                     input_shape=(520, 520, 3),
                                                                     verbose=True)


Getting test data for the resnet101 model.


Best Hyperparams:
 <keras_tuner.engine.hyperparameters.HyperParameters object at 0x0000018F5D931F40>


Performing evaluation...


Evaluation Results:
 [2.548501491546631, 0.3957219123840332]



In [None]:
# ===============================================================================================================
# This function is not currently utilized in this notebook, but would facilitate loading in precalculated 
# base model outputs that were stored in a numpy array using the process shown in the
# 03_save_pretrained_model_outputs_numpy notebook.
#
# The method of storing precalculated outputs in a numpy array is generally less desireable than storing
# the outputs as tfrecord files because tfrecords can be easily read in as a tensorflow dataset, and are 
# therefore able to take advantage of the tf dataset feature where data is only loaded into memory batch-wise.
#
# Reading an entire dataset into memory all at once in the form of a numpy array is not feasible for anything
# other than very small datasets.
# ===============================================================================================================
def load_base_model_outputs_from_numpy(base_model_type, input_shape, output_pooling, epoch_num, dataset_type = 'all'):
    
    global PROJECT_DIRECTORY
    
    valid_dataset_types = ['all', 'train', 'val', 'test']
    
    if dataset_type not in valid_dataset_types:
        print("/n===========================================================")
        print("Invalid input for parameter dataset_type")
        print(f"Valid inputs are: {valid_dataset_types}")
        print("===========================================================\n")
        return -1
    
    
    base_dir_name = f"pretrained_model_output_features/{base_model_type}_pool{str(output_pooling)}_inShape_{str(input_shape)}"
    base_outputs_directory = os.path.join(PROJECT_DIRECTORY, base_dir_name)
    
    all_dirs = os.listdir(base_outputs_directory)
    
    if dataset_type != 'all':
    
        correct_dir = [directory for directory in all_dirs if (dataset_type in directory) and (f'epoch_{epoch_num}' in directory)]
        
        if len(correct_dir) != 1:
            print("/n============================ Error ===============================")
            print("Invalid directory filtering!")
            print(f"Filtering returned: {correct_dir}")
            return -1
            print("=========================================================================\n")
            
        correct_dir = correct_dir[0]
        
        filepath = os.path.join(base_outputs_directory, correct_dir + f"/{dataset_type}.npz")
        
        arrays = np.load(filepath)
        features = arrays[f"{dataset_type}_features"]
        labels = arrays[f"{dataset_type}_labels"]
        
        return features, labels
    
    elif dataset_type == 'all':
        
        correct_dirs = [directory for directory in dirs if (f'epoch_{epoch_num}' in directory)]
        train_dir = [directory for directory in correct_dirs if ('train' in directory)]
        val_dir = [directory for directory in correct_dirs if ('val' in directory)]
        test_dir = [directory for directory in correct_dirs if ('test' in directory)]
    
    if len(train_dir) != 1 or len(val_dir) != 1 or len(test_dir) != 1:
        print("/n============================ Error ===============================")
        print("Invalid directory filtering!")
        print(f"Train Directory: {train_dir}")
        print(f"Val Directory: {val_dir}")
        print(f"Test Directory: {test_dir}")
        return -1
        print("=========================================================================\n")
            
    train_filepath = os.path.join(base_outputs_directory, train_dir)
    val_filepath = os.path.join(base_outputs_directory, val_dir)
    test_filepath = os.path.join(base_outputs_directory, test_dir)
    
    filepaths = {'train' : train_filepath, 'val' : val_filepath, 'test' : test_filepath}
    
    output_dict = {}
    
    for dataset_type, filepath in filepeths.items():
        
        arrays = np.load(filepath)
        feats = arrays[f"{dataset_type}_features"]
        labels = arrays[f"{dataset_type}_labels"]
        temp_dict = {'features' : feats, 'labels' : labels}
        output_dict[dataset_type] = temp_dict
    
    return output_dict