In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Feedback or issues?

Let us know if you have any [feedback or questions](https://forms.gle/hXDnv1T4WanMwTi79). If you provide an email address, we will follow up with you.

# Tracking training parameters and metrics for locally trained models

To use this Jupyter notebook, copy the notebook to an AI Platform (Unified) Notebooks instance with Tensorflow installed and open it. You can run each step, or cell, and see its results. To run a cell, press Shift+Enter. The notebook automatically displays the return value of the last line in each cell. For more information about running notebooks in AI Platform(Unified) Notebook, see the [AI Platform (Unified) Notebooks guide](https://cloud.google.com/ai-platform-unified/docs/general/notebooks).


This notebook demonstrates how to track metrics and parameters for ML training jobs, and how to analyze this metadata.



Note: You may incur charges for training, prediction, storage, or usage of other GCP products in connection with testing this SDK.

## Concepts

To better understanding how parameters and metrics are stored and organized, we'd like to introduce the following concepts:


### Experiment
Experiments describe a context that groups your runs and the artifacts you create into a logical session. For example, in this notebook you create an Experiment and log data to that experiment.

### Run
A run represents a single path/avenue that you executed while performing an experiment. A run includes artifacts that you used as inputs or outputs, and parameters that you used in this execution. An Experiment can contain multiple runs. 

## Install the Model Builder SDK

Run the following commands to install the Model Builder SDK.

After the SDK is installed, the kernel is restarted.

In [None]:
%%capture
!pip3 uninstall -y google-cloud-aiplatform
!pip3 install --user git+https://github.com/googleapis/python-aiplatform.git@dev-test
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)

## Enter Your Project ID

Enter your Project Id in the cell below. Then run the cell to make sure the Cloud SDK uses the right project for all the commands in this notebook.

In [None]:
MY_PROJECT = 'YOUR PROJECT ID'

In [None]:
import sys
if 'google.colab' in sys.modules:
  from google.colab import auth
  auth.authenticate_user()

## Getting started tracking parameters and metrics

You can use the Model Builder SDK to track metrics and parameters for models trained locally. 

In the following example, you train a simple distributed neural network (DNN) model to predict automobile's miles per gallon (MPG) based on automobile information in the [auto-mpg dataset](https://www.kaggle.com/devanshbesain/exploration-and-analysis-auto-mpg).

### Load and process the training dataset

Import required libraries.

In [None]:
import pathlib
import numpy as np
import pandas as pd
from tensorflow.python.keras import layers, Sequential
from tensorflow.python.keras.utils import data_utils


Download and process the dataset.

In [None]:
def read_data(uri):
    dataset_path = data_utils.get_file("auto-mpg.data", uri)
    column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                    'Acceleration', 'Model Year', 'Origin']
    raw_dataset = pd.read_csv(dataset_path, names=column_names, na_values = "?",
                              comment='\t', sep=" ", skipinitialspace=True)
    dataset = raw_dataset.dropna()
    dataset['Origin'] = dataset['Origin'].map(
        lambda x: {1: 'USA', 2: 'Europe', 3: 'Japan'}.get(x))
    dataset = pd.get_dummies(dataset, prefix='', prefix_sep='')        
    return dataset


dataset = read_data("http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")

Split dataset for training and testing.

In [None]:
def train_test_split(dataset, split_frac=0.8, random_state=0):    
    train_dataset = dataset.sample(frac=split_frac, random_state=random_state)
    test_dataset = dataset.drop(train_dataset.index)
    train_labels = train_dataset.pop('MPG')
    test_labels = test_dataset.pop('MPG')
    
    return train_dataset, test_dataset, train_labels, test_labels

train_dataset, test_dataset, train_labels, test_labels = train_test_split(dataset)

Normalize the features in the dataset for better model performance.

In [None]:
def normalize_dataset(train_dataset, test_dataset):
    train_stats = train_dataset.describe()
    train_stats = train_stats.transpose()
    def norm(x):
        return (x - train_stats['mean']) / train_stats['std']
    normed_train_data = norm(train_dataset)
    normed_test_data = norm(test_dataset)
    
    return normed_train_data, normed_test_data

normed_train_data, normed_test_data = normalize_dataset(train_dataset,
                                                        test_dataset)

### Define ML model and training function

In [None]:
def train(train_data, train_labels, num_units=64, activation='relu',
          dropout_rate=0.0, validation_split = 0.2, epochs=1000):
    
    model = Sequential([
        layers.Dense(num_units, activation=activation,
                     input_shape=[len(train_dataset.keys())]),
        layers.Dropout(rate=dropout_rate),
        layers.Dense(num_units, activation=activation),
        layers.Dense(1)
    ])


    model.compile(loss='mse',
                optimizer='adam',
                metrics=['mae', 'mse'])
    print(model.summary())
    
    history = model.fit(
      train_data, train_labels,
      epochs=epochs, validation_split=validation_split)

    return model, history

### Initialize the Model Builder SDK and create an Experiment

Initialize the *client* for AI Platform (Unified) and create an experiment.

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=MY_PROJECT, experiment="auto-mpg-experiment")

### Start several model training runs

Training parameters and metrics are logged for each run.

In [None]:
parameters = [
             {'num_units':16, 'epochs':3, 'dropout_rate':0.1}, 
             {'num_units':16, 'epochs':10, 'dropout_rate':0.1}, 
             {'num_units':16, 'epochs':10, 'dropout_rate':0.2},
             {'num_units':32, 'epochs':10, 'dropout_rate':0.1},
             {'num_units':32, 'epochs':10, 'dropout_rate':0.2},
            ]

for i, params in enumerate(parameters):
  aiplatform.start_run(run=f'auto-mpg-local-run-{i}')
  aiplatform.log_params(params)
  model, history = train(normed_train_data, train_labels, num_units=params['num_units'],
                        activation='relu', epochs=params['epochs'], dropout_rate=params['dropout_rate'])
  aiplatform.log_metrics({metric: values[-1]
                            for metric, values in history.history.items()})
  
  loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2) 
  aiplatform.log_metrics({'eval_loss':loss, 'eval_mae':mae, 'eval_mse':mse})

### Extract parameters and metrics into a dataframe for analysis

In [None]:
experiment_df = aiplatform.get_experiment_df()
experiment_df

### Visualizing an experiment's parameters and metrics

In [None]:
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [15, 5]

ax = pd.plotting.parallel_coordinates(
    experiment_df.reset_index(level=0),
    'run_name', cols=['param.num_units', 'param.dropout_rate',
                             'param.epochs', 'metric.loss',
                             'metric.val_loss', 'metric.eval_loss'],
    color=['blue', 'green', 'pink', 'red'],
    )
ax.set_yscale('symlog')
ax.legend(bbox_to_anchor=(1.0, 0.5))

## Visualizing experiments in Cloud Console

Run the following to get the URL of AI Platform (Unified) Experiments for your project.


In [None]:
print("Prod UI:")
print(f'https://pantheon.corp.google.com/ai/platform/experiments/experiments?folder=&organizationId=&project={MY_PROJECT}')