In [1]:
# Copyright 2020 NVIDIA Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

<img src="http://developer.download.nvidia.com/compute/machine-learning/frameworks/nvidia_logo.png" style="width: 90px; float: right;">

## Overview

In the previous notebook we explained and showed how we can preprocess data with multi-hot columns with NVTabular, and train an TF MLP model using NVTabular KerasSequenceLoader. We learned how to save a workflow, a trained TF model, and the ensemble model. In this notebook, we will show example request scripts sent to triton inference server

- to transform new/streaming data with NVTabular library
- to deploy the end-to-end pipeline to generate prediction results for new data from trained TF model

## Getting Started

In [2]:
# External dependencies
import os
from os import path                
import time
import gc

import nvtabular
import cudf 
from tritonclient.utils import *
import tritonclient.grpc as grpcclient
import nvtabular.inference.triton as nvt_triton

We define our base directory, containing the data.

In [3]:
# path to store raw and preprocesses data
BASE_DIR = '/model/data/'

## Verify Triton Is Running Correctly

In [4]:
#!apt-get install curl -y

In [5]:
!curl -i localhost:8000/v2/health/ready

curl: /opt/conda/lib/libcurl.so.4: no version information available (required by curl)
HTTP/1.1 200 OK
[1mContent-Length[0m: 0
[1mContent-Type[0m: text/plain



## Send request to Triton IS to transform raw dataset

Let's read the raw validation set, and send 3 rows of `userid` and `movieId` as input to the saved NVTabular model.

In [6]:
df_valid = cudf.read_parquet(BASE_DIR + 'valid.parquet')
df_valid.head()

Unnamed: 0,userId,movieId,rating
15347762,99476,104374,3.5
16647840,107979,2634,4.0
23915192,155372,1614,3.0
10052313,65225,7153,4.0
12214125,79161,500,5.0


In [8]:
df = cudf.DataFrame({"userId": [99476, 107979, 155372], "movieId": [104374, 2634, 1614]})
inputs = nvt_triton.convert_df_to_triton_input(["userId", "movieId"], df, grpcclient.InferInput)

outputs = [
    grpcclient.InferRequestedOutput(col)
    for col in ["userId", "movieId", "genres__nnzs", "genres__values"]
]

with grpcclient.InferenceServerClient("localhost:8001") as client:
    response = client.infer("movielens_mh_nvt", inputs, request_id="1", outputs=outputs)

for col in ["userId", "movieId", "genres__nnzs", "genres__values"]:
    print(col, response.as_numpy(col), response.as_numpy(col).shape)

userId [[ 99476]
 [107979]
 [155372]] (3, 1)
movieId [[19997]
 [ 2543]
 [ 1557]] (3, 1)
genres__nnzs [[3]
 [1]
 [1]] (3, 1)
genres__values [[ 9]
 [10]
 [16]
 [12]
 [ 6]] (5, 1)


You might notice that we don't need to send the genres column as an input. The reason for that is the nvt model will look up the genres for each movie as part of the `JoinExternal` op it applies. Also notice that when creating the request for the `movielens_mh_nvt` model, we return 2 columns (values and nnzs) for the `genres` column rather than 1.

## END-2-END INFERENCE PIPELINE

We will do the same, but this time we directly read in first 3 rows of the the raw `valid.parquet` file with cuDF.

In [10]:
# read in the workflow (to get input/output schema to call triton with)
batch = cudf.read_parquet("/model/data/valid.parquet", num_rows=3, columns=['userId', 'movieId'])
print(batch, "\n")

# convert the batch to a triton inputs
inputs = nvt_triton.convert_df_to_triton_input(["userId", "movieId"], batch, grpcclient.InferInput)

# placeholder variables for the output
outputs = [grpcclient.InferRequestedOutput("dense_3")]

# build a client to connect to our server. 
# This InferenceServerClient object is what we'll be using to talk to Triton.
# make the request with tritonclient.grpc.InferInput object
with grpcclient.InferenceServerClient("localhost:8001") as client:
    response = client.infer("movielens_mh", inputs, request_id="1",outputs=outputs)

print("predicted softmax result:\n", response.as_numpy('dense_3'))


          userId  movieId
15347762   99476   104374
16647840  107979     2634
23915192  155372     1614 

predicted softmax result:
 [[0.62854725]
 [0.6393233 ]
 [0.59708184]]


In [11]:
del df_valid
gc.collect()

90