In this notebook, we pick up a model trained using `train_and_save_models_for_benchmarking.ipynb` and stored on google drive to perform inference and benchmark performance.

In [1]:
%%bash
set -e

#### Install requirements
cd examples/t4rec_paper_experiments
pip install -r requirements.txt

### Get data
cd t4r_paper_repro

FEATURE_SCHEMA_PATH=../datasets_configs/ecom_rees46/rees46_schema.pbtxt
pip install gdown
gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV
apt-get update -y
apt-get install unzip -y
DATA_PATH=/transformers4rec/examples/t4rec_paper_experiments/t4r_paper_repro/
unzip -d $DATA_PATH "rees46_ecom_dataset_small_for_ci.zip"
# gdown https://drive.google.com/uc?id=18EllaKaodqaesrNJ3YGEmv0YUD3NX0vK
# mkdir -p /transformers4rec/TF4Rec/models/
# MODEL_PATH=/transformers4rec/TF4Rec/models/
# unzip -d $MODEL_PATH "model.zip"
exit 0



Downloading...
From: https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV
To: /workspace/examples/t4rec_paper_experiments/t4r_paper_repro/rees46_ecom_dataset_small_for_ci.zip
100%|██████████| 43.4M/43.4M [00:06<00:00, 6.42MB/s]


Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease
Hit:2 http://archive.ubuntu.com/ubuntu focal InRelease
Hit:3 http://security.ubuntu.com/ubuntu focal-security InRelease
Hit:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease
Hit:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease
Reading package lists...
Reading package lists...
Building dependency tree...
Reading state information...
unzip is already the newest version (6.0-25ubuntu1.1).
0 upgraded, 0 newly installed, 0 to remove and 74 not upgraded.
Archive:  rees46_ecom_dataset_small_for_ci.zip


replace /transformers4rec/examples/t4rec_paper_experiments/t4r_paper_repro/0001/valid.parquet? [y]es, [n]o, [A]ll, [N]one, [r]ename: error:  invalid response [# gdown h]
replace /transformers4rec/examples/t4rec_paper_experiments/t4r_paper_repro/0001/valid.parquet? [y]es, [n]o, [A]ll, [N]one, [r]ename: error:  invalid response [ttps://dr]
replace /transformers4rec/examples/t4rec_paper_experiments/t4r_paper_repro/0001/valid.parquet? [y]es, [n]o, [A]ll, [N]one, [r]ename: error:  invalid response [ive.googl]
replace /transformers4rec/examples/t4rec_paper_experiments/t4r_paper_repro/0001/valid.parquet? [y]es, [n]o, [A]ll, [N]one, [r]ename: error:  invalid response [e.com/uc?]
replace /transformers4rec/examples/t4rec_paper_experiments/t4r_paper_repro/0001/valid.parquet? [y]es, [n]o, [A]ll, [N]one, [r]ename: error:  invalid response [id=18Ella]
replace /transformers4rec/examples/t4rec_paper_experiments/t4r_paper_repro/0001/valid.parquet? [y]es, [n]o, [A]ll, [N]one, [r]ename: error:  invalid r

In [1]:
import glob
import logging
import os
from functools import partial
import pandas as pd
import cudf
import numpy as np
import nvtabular.inference.triton as nvt_triton
import tritonclient.grpc as grpcclient
import subprocess
import time

In [2]:
!mkdir -p /workspace/examples/t4rec_paper_experiments/t4r_paper_repro

In [3]:
os.chdir('/workspace/examples/t4rec_paper_experiments/t4r_paper_repro')

In [4]:
eval_path = os.path.join(
    '/transformers4rec/examples/t4rec_paper_experiments/t4r_paper_repro/',
    str(2,).zfill(4),
    "valid.parquet",
)


In [7]:
# load model trained locally using train_and_save_models_for_benchmarking.ipynb

my_env = os.environ.copy()

# # run on the CPU
# my_env["CUDA_VISIBLE_DEVICES"] = ''
# my_env["HAS_GPU"] = '0'

# run on the GPU
my_env["HAS_GPU"] = '1'

subprocess.Popen(['tritonserver', '--model-repository=/workspace/models_for_benchmarking/'], env=my_env)

<subprocess.Popen at 0x7f7b8b8666a0>

I0220 02:43:34.847979 18298 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f03d6000000' with size 268435456
I0220 02:43:34.848302 18298 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864
I0220 02:43:34.850169 18298 model_lifecycle.cc:459] loading: t4r_pytorch_pt:1
I0220 02:43:38.522804 18298 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: t4r_pytorch_pt (GPU device 0)


In [8]:
# # load model downloaded from google drive
# subprocess.Popen(['tritonserver',  '--model-repository=/transformers4rec/TF4Rec/models/'])

In [9]:
time.sleep(15)

I0220 02:43:42.878213 18298 model_lifecycle.cc:694] successfully loaded 't4r_pytorch_pt' version 1
I0220 02:43:42.878340 18298 server.cc:563] 
+------------------+------+
| Repository Agent | Path |
+------------------+------+
+------------------+------+

I0220 02:43:42.878405 18298 server.cc:590] 
+---------+-------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Backend | Path                                                  | Config                                                                                                                                                        |
+---------+-------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
| python  | /opt

In [10]:
import tritonhttpclient
try:
    triton_client = tritonhttpclient.InferenceServerClient(url="localhost:8000", verbose=True)
    print("client created.")
except Exception as e:
    print("channel creation failed: " + str(e))
triton_client.is_server_live()

client created.
GET /v2/health/live, headers None
<HTTPSocketPoolResponse status=200 headers={'content-length': '0', 'content-type': 'text/plain'}>




True

In [11]:
prediction_data = cudf.read_parquet(eval_path)

In [12]:
col_names = ['sess_pid_seq']
inputs = nvt_triton.convert_df_to_triton_input(col_names, prediction_data.loc[6, col_names], grpcclient.InferInput)

In [12]:
import time
MODEL_NAME_PT = "t4r_pytorch_pt"

N_TRIALS = 1000

# WarmUp
for _ in range(N_TRIALS):
    payload = cudf.DataFrame(data={'sess_pid_seq': np.random.randint(0, 390001, 20), 'id': 0}).groupby('id').agg({'sess_pid_seq': list})
    with grpcclient.InferenceServerClient("localhost:8001") as client:
        col_names = ['sess_pid_seq']
        inputs = nvt_triton.convert_df_to_triton_input(col_names, payload, grpcclient.InferInput)
        response = client.infer(MODEL_NAME_PT, inputs)
    end_time = time.time()


# Collecting

out = []
for _ in range(N_TRIALS):
    payload = cudf.DataFrame(data={'sess_pid_seq': np.random.randint(0, 390001, 20), 'id': 0}).groupby('id').agg({'sess_pid_seq': list})
    start_time = time.time()
    with grpcclient.InferenceServerClient("localhost:8001") as client:
        col_names = ['sess_pid_seq']
        inputs = nvt_triton.convert_df_to_triton_input(col_names, payload, grpcclient.InferInput)
        response = client.infer(MODEL_NAME_PT, inputs)
    end_time = time.time()
    out.append(end_time-start_time)
    
# P95
np.sort(out)[int(0.95 * N_TRIALS)]

0.058879852294921875

In [13]:
import time
MODEL_NAME_PT = "t4r_pytorch_pt"

N_TRIALS = 1000

# WarmUp
for _ in range(N_TRIALS):
    payload = cudf.DataFrame(data={'sess_pid_seq': np.random.randint(0, 390001, 20), 'id': 0}).groupby('id').agg({'sess_pid_seq': list})
    with grpcclient.InferenceServerClient("localhost:8001") as client:
        col_names = ['sess_pid_seq']
        inputs = nvt_triton.convert_df_to_triton_input(col_names, payload, grpcclient.InferInput)
        response = client.infer(MODEL_NAME_PT, inputs)
    end_time = time.time()


# Collecting

out = []
for _ in range(N_TRIALS):
    payload = cudf.DataFrame(data={'sess_pid_seq': np.random.randint(0, 390001, 20), 'id': 0}).groupby('id').agg({'sess_pid_seq': list})
    start_time = time.time()
    with grpcclient.InferenceServerClient("localhost:8001") as client:
        col_names = ['sess_pid_seq']
        inputs = nvt_triton.convert_df_to_triton_input(col_names, payload, grpcclient.InferInput)
        response = client.infer(MODEL_NAME_PT, inputs)
    end_time = time.time()
    out.append(end_time-start_time)
    
# P95
np.sort(out)[int(0.95 * N_TRIALS)]

0.008340835571289062

In [None]:
# %%timeit

# output_names = ["output"]

# outputs = []
# for col in output_names:
#     outputs.append(grpcclient.InferRequestedOutput(col))
    
# MODEL_NAME_PT = "t4r_pytorch_pt"
# payload = cudf.DataFrame(data={'sess_pid_seq': np.random.randint(0, 390001, 20), 'id': 0}).groupby('id').agg({'sess_pid_seq': list})

# with grpcclient.InferenceServerClient("localhost:8001") as client:
#     col_names = ['sess_pid_seq']
#     inputs = nvt_triton.convert_df_to_triton_input(col_names, payload, grpcclient.InferInput)
#     response = client.infer(MODEL_NAME_PT, inputs)