# TFMA Model Evaluation Visualisations

This Notebook will guide the user as to how to obtain embedded HTML visualisations of TFMA model evaluation metrics used and created during training. This Notebook must be run after the completion of the training pipeline. 

The following steps are required:
1. Install necessary packages 
2. Define path to predictions file in GCS and desired metrics to evaluate
3. Run TFMA evaluation
4. Obtain HTML files to visualise

<span style="color:red">*Disclaimer:*</span> This Notebook is meant to be run as a Vertex AI Workbench within the GCP environment. If you wish to run this Notebook locally you would need to:
1. Download the `predictions` file you wish to evaluate from GCS into your local machine
2. Replace the `csv_file` variable to point to the local path instead
3. Download the `<custom_metric_name>.py` custom metric you wish to use from GCS into your local machine. Save these files in the same folder as this Notebook.
4. Comment out the `Custom Metrics` section of the Notebook.
5. Run the rest of the Notebook as normal

# Install Packages

In [12]:
!pip install tensorflow_model_analysis==0.37.0 pandas==1.3.5 google_cloud_storage==1.43.0

## Import Packages

In [2]:
# Visualisation-specific imports
import tensorflow_model_analysis as tfma
from tensorflow_model_analysis.view import render_slicing_metrics
from ipywidgets.embed import embed_minimal_html

import os
from google.cloud import storage

# TFMA Evaluation
import pandas as pd
from google.protobuf import text_format
import tensorflow_model_analysis as tfma

2022-03-14 15:18:47.904516: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-03-14 15:18:47.904578: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## User Inputs

In [3]:
"""This is the link to the predictions generated during the training pipeline, which are stored in GCS. These are the output of the "Predict Test Data"
component, and are saved in an Dataset Artefact called "predictions", which then act as the input to the "Evaluate test metrics for <challenger>/<champion> model"
component
"""
csv_file = 'gs://alvaro-sandbox/pipeline_root/805011877165/tensorflow-train-pipeline-20220223132851/predict-tensorflow-model_-2494514806493544448/predictions'

In [4]:
label_column_name = "total_fare" # Label column name (this is the ground truth)
pred_column_name = "predictions" # Model prediction column name

metrics_names = ["MeanSquaredError"] # Metric used to evaluate the model. Could be more than one (["MeanSquaredError", "<metric_name>"]
custom_metrics = {"SquaredPearson": "squared_pearson"} # Custom metric used to evaluate the model. If None used, leave it as custom_metrics = {}. If more 
                                                        # than use used, then custom_metrics = {"SquaredPearson": "squared_pearson", <"MetricName">:<"module_name">}

# Slicing types used during evaluation. If no slicing used, leave it as slicing_specs = []
slicing_specs=[
        'feature_keys: ["payment_type"]',
        'feature_keys: ["payment_type", "company"]',
        'feature_values: [{key: "payment_type", value: "Cash"}]',
        'feature_keys: ["company", "dayofweek"] feature_values: [{key: "payment_type", value:  "Cash"}]',
    ]

In [5]:
# Location to pipeline assets. Used only if custom metrics are available
PIPELINE_FILES_GCS_PATH='gs://alvaro-sandbox/pipelines'
VERTEX_PROJECT_ID='datatonic-vertex-pipeline-dev'

## Custom Metrics

In [6]:
# The custom metric module must be downloaded from GCS where it is being stored.
# If no custom metrics are used, this cell won't run anything.

if custom_metrics:

    custom_metrics_path = f"{PIPELINE_FILES_GCS_PATH}/training/assets/tfma_custom_metrics"

    storage_client = storage.Client(project=VERTEX_PROJECT_ID)
    for custom_metric in custom_metrics.values():
        with open(f"{custom_metric}.py", "wb") as fp:
            storage_client.download_blob_to_file(f"{custom_metrics_path}/{custom_metric}.py", fp)
    
    for custom_metric in custom_metrics.values():
        assert f"{custom_metric}.py" in os.listdir(), f"Custom Metric module {custom_metric}.py could not be found at {custom_metrics_path}"
        
        print(f"Downloaded custom metric module {custom_metric}.py to Notebook storage")
    
    
else:
    
    print("No custom metrics were specified by the user")

Downloaded custom metric module squared_pearson.py to Notebook storage


## Define TFMA model evaluation specs

In [7]:
df = pd.read_csv(csv_file) # Read predictions and convert to dataframe

# Iterate through all metrics
metrics_specs = ""
for metric in metrics_names:
    metrics_specs += f'metrics {{ class_name: "{metric}" }}\n'

# Adding custom metrics if specified
if custom_metrics:
    for class_name, module_name in custom_metrics.items():
        metric_spec = f' {{ class_name: "{class_name}" module: "{module_name}" }}'
        metrics_specs += f"metrics {metric_spec}\n"

# Iterate through all slices
slicing_spec_proto = "slicing_specs {}\n"
if slicing_specs:
    for single_slice in slicing_specs:
        slicing_spec_proto += f"slicing_specs {{ {single_slice} }}\n"

# Create evaluation configuration
protobuf = """
            ## Model information
            model_specs {{
                label_key: "{0}"
                prediction_key: "{1}"
            }}
            ## Post export metric information
            metrics_specs {{
                {2}
            }}
            ## Slicing information inc. overall
            {3}
            """

eval_config = text_format.Parse(
    protobuf.format(
        label_column_name, pred_column_name, metrics_specs, slicing_spec_proto
    ),
    tfma.EvalConfig(),
)

print(eval_config)
    

model_specs {
  label_key: "total_fare"
  prediction_key: "predictions"
}
slicing_specs {
}
slicing_specs {
  feature_keys: "payment_type"
}
slicing_specs {
  feature_keys: "payment_type"
  feature_keys: "company"
}
slicing_specs {
  feature_values {
    key: "payment_type"
    value: "Cash"
  }
}
slicing_specs {
  feature_keys: "company"
  feature_keys: "dayofweek"
  feature_values {
    key: "payment_type"
    value: "Cash"
  }
}
metrics_specs {
  metrics {
    class_name: "MeanSquaredError"
  }
  metrics {
    class_name: "SquaredPearson"
    module: "squared_pearson"
  }
}



## Run Evaluation

This will save the results of the TFMA evaluation under a file called `eval_outputs` which is created by TFMA itself.

In [8]:
eval_result = tfma.analyze_raw_data(df, eval_config=eval_config, output_path="eval_outputs/")
evaluation = eval_result.get_metrics_for_all_slices()



2022-03-14 15:18:51.981807: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-03-14 15:18:51.981860: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-03-14 15:18:51.981893: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (tfma-visualisations-final): /proc/driver/nvidia/version does not exist
2022-03-14 15:18:51.982214: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


## Save Evaluation in HTML Visualisations

This will save the HTML plots under a file called `html_outputs`. It will create one file for every slice specified in `slicing_specs`, as well as a plot with the overall metrics, without any slice specified. For example, if 

```
slicing_specs=[
        'feature_keys: ["payment_type"]',
        'feature_keys: ["payment_type", "company"]',
        'feature_values: [{key: "payment_type", value: "Cash"}]',
        'feature_keys: ["company", "dayofweek"] feature_values: [{key: "payment_type", value: "Cash"}]',
    ]
```
four HTML files will be created, as follows:
1. `feature_keys: ["payment_type"]` will show the metrics for all of the different `payment_type` values available
2. `feature_keys: ["payment_type", "company"]` will show the metrics for every unique combination of `payment_type` and `company` values available
3. `feature_values: [{key: "payment_type", value: "Cash"}]` will show the metrics only for the cases where the `payment_type` is `Cash`
4. `feature_keys: ["company", "dayofweek"] feature_values: [{key: "payment_type", value: "Cash"}]` will show the metrics for every unique combination of `company` and `dayofweek` wherever the `payment_type` is `Cash`.

Additionally, a fifth plot would be created, which contains the metrics with no slice applied.

Once the plots are created, to view and interact with them, double click on the file you wish to open. This will open a new tab with the name of the plot. Then click on `Trust HTML` and wait for a few seconds to see the plot. 

In [9]:
def get_key_value_pair(key_value_string):
    """String manipulation to obtain the key-value pair from the slicing specification. Currently TFMA only
        supports having a single key-value pair as part of a slicing specification. If this changes, this 
        function must also change.
    
    Args:
        key_value_string (str): String containing the key-value pair. This string has the following naming convention:
            'feature_keys: ["<feature_key>"] feature_values: [{key: "<key>", value: "<value>"}]'. The string
            manipulation aims to obtain the <key> and <value> names.
            
    Returns:
        key (str): Key name given in slicing spec.
        value (str): Value name given in slicing spec.
    """
    
    # Get key name
    key = key_value_string\
                        .split("key:")[1]\
                        .split(",")[0]\
                        .replace('"',"")\
                        .replace("'","")\
                        .strip()
    
    # Get value name
    value = key_value_string\
                        .split("value:")[1]\
                        .split("}")[0]\
                        .replace('"',"")\
                        .replace("'","")\
                        .strip()
    return key, value

In [10]:
def get_feature_keys(keys_string):
    """String manipulation to obtain all feature keys from a single slicing specification returned as a single list
    
    Args:
        keys_string (str): String containing the feature keys. This string has the following naming convention:
            'feature_keys: ["<feature_one>", "<feature_two>"]'. The string manipulation aims to obtain 
            all of the <feature_XX> keys in a single list
            
    Returns:
        feature_keys (list): List containing all feature keys in the given slice
    """
    
    feature_keys = [] # Initialise empty list
    
    # Get all keys as list of string
    """
    Need to convert string 'feature_keys: ["<feature_one>", "<feature_two>"]' 
    into list of strings ["<feature_one>", "<feature_two>"]
    """
    keys_list = keys_string\
                        .split("feature_keys:")[1]\
                        .lstrip()\
                        .split("[")[1]\
                        .split("]")[0]\
                        .split(",")
    
    # Clean every string item in list
    for onekey in keys_list:
            keyname = onekey.replace('"',"").replace("'","").strip()
            feature_keys.append(keyname)
            
    return feature_keys

In [11]:
os.makedirs("html_outputs/", exist_ok=True) # Save files in this local folder

# Create an output file fore very slice type
for onespec in slicing_specs:
    
    # If only feature keys are specified
    if "feature_keys:" in onespec and "feature_values: " not in onespec:
        spec_keys = get_feature_keys(onespec) # Get all keys as list of strings
        specs = tfma.SlicingSpec(feature_keys=spec_keys) # Create slicing spec
        plots_tfma = render_slicing_metrics(eval_result, slicing_spec=specs) # Plot metrics
        embed_minimal_html(f'html_outputs/plots_{"_&_".join(spec_keys)}.html', views=[plots_tfma], title='Slicing Metrics')
        
    # If only feature values are specified
    elif "feature_values: " in onespec and "feature_keys:" not in onespec:
        keyname, valname = get_key_value_pair(onespec) # Get key-value pair names
        specs = tfma.SlicingSpec(feature_values={keyname:valname}) # Create slicing spec
        plots_tfma = render_slicing_metrics(eval_result, slicing_spec=specs) # Plot metrics
        embed_minimal_html(f'html_outputs/plots_{keyname}_-->_{valname}.html', views=[plots_tfma], title='Slicing Metrics')
    
    # If a combination of feature keys and values are specified
    elif "feature_keys:" in onespec and "feature_values: " in onespec:
        keyname, valname = get_key_value_pair(onespec) # Get key-value pair names
        spec_keys = get_feature_keys(onespec) # Get all keys as list of strings
        specs = tfma.SlicingSpec(feature_keys=spec_keys, 
                                 feature_values={keyname:valname}) # Create slicing spec
        plots_tfma = render_slicing_metrics(eval_result, slicing_spec=specs) # Plot metrics
        embed_minimal_html(f'html_outputs/plots_{"_&_".join(spec_keys)}_<>_{keyname}_-->_{valname}.html', views=[plots_tfma], title='Slicing Metrics')

# Create a final plot without any slice, just for the overall metric
plots_tfma = render_slicing_metrics(eval_result)
embed_minimal_html(f'html_outputs/plots_overall.html', views=[plots_tfma], title='Slicing Metrics')