# Profiling CUDA Inference Implementations

## Setup

Add source paths to allow importing scripts from other directories

In [1]:
import sys
import os
import json
import warnings

from tqdm.notebook import (tqdm, trange)

module_path = os.path.abspath(os.path.join(".."))
code_dirs = ["src", "src/profile_utils"]

for d in code_dirs:
    code_path = os.path.join(module_path, d)
    if code_path not in sys.path:
        sys.path.append(code_path)

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [4]:
%load_ext rpy2.ipython

In [5]:
import numpy as np
import pandas as pd
import os
from tqdm.notebook import trange, tqdm

import xgboost as xgb
import catboost
import sklearn.ensemble
import cuml

import pickle

from sklearn.metrics import (mean_squared_error,
                             classification_report,
                             accuracy_score)

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.datasets import fetch_covtype

from profile_utils import (train_model_xgb,
                           train_model_rf,
                           train_model_cb,
                           profile_inference,
                           clean_profile)

from mlflow_loader import (_get_best_run_info,
                           load_model,
                           load_dataset,
                           load_dataset_from_str,
                           load_model_from_str)

from cosmetics import *

import mlflow
import ray
import yaml

import typing as t
import pandas as pd
import numpy as np

from mlflow.utils.file_utils import local_file_uri_to_path

from ray import tune
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.trial import Trial

## Profiling Inference on GPU

### Loading MLFlow Selected Models

See `parse_mlflow_notebook.ipynb` to generate the `csv` file that guided the choices below:

In [6]:
               # Churn modelling (numerical)
mlflow_runs = ["mlflow:///c8ac001b4cb14ce895193fbf4c2b66c3", # CatBoost (Round 5)
               # 
               # Eye movements (numerical)
               "mlflow:///0dbcbda035614d3c84ea3c6a123a9488", # XGBoost (Round 5)
               # 
               # Forest cover (numerical)
               "mlflow:///165346fb809b4ccfabe39ce9617393f1", # XGBoost (Round 6)
               #                            
               # Gas concentration (numerical)
               "mlflow:///56bc305886f240588ce1a32f8deadd09", # RF (Round 6)
               # 
               # Gesture phase (numerical)
               "mlflow:///8871a63a748a44a3b93bc6108de8ea79", # XGBoost (Round 5)
               # 
               # Telco (numerical)
               "mlflow:///f1630f43b74447d58a317aa4b58868a5", # XGBoost (Round 6)
               #
               # Rossman (numerical)
               "mlflow:///a4a41de730bc49339c694c0a3576a5d5", # XGBoost (Round 6)
              ]

model_info_list = [_get_best_run_info(run) for run in mlflow_runs]



### Profile Selected Models

In [7]:
catboost_batches = np.logspace(0, 11, num = 6, base = 2.0, dtype = int)
others_batches = np.logspace(0, 19, num = 6, base = 2.0, dtype = int)
# catboost_batches = np.logspace(0, 4, num = 2, base = 2.0, dtype = int)
# others_batches = np.logspace(0, 4, num = 2, base = 2.0, dtype = int)

results_df = pd.DataFrame()

for model_info in tqdm(model_info_list):
    if model_info["model"] == "catboost":
        batches = catboost_batches
    else:
        batches = others_batches
        
    print(f"Current batches: {batches}")
    
    for batch_size in batches:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            partial_df = profile_inference(model_info, runs = 3, batch_size = batch_size)

        if results_df.empty:
            results_df = partial_df
        else:
            results_df = pd.concat([results_df, partial_df])
            results_df.reset_index(drop = True, inplace = True)
            
        results_df.to_csv("../data/profiling_results/gpu_profiling_results_P100.csv", index = False)

  0%|          | 0/7 [00:00<?, ?it/s]

Current batches: [   1    4   21   97  445 2048]
Current batches: [     1     13    194   2702  37640 524288]
Current batches: [     1     13    194   2702  37640 524288]
Current batches: [     1     13    194   2702  37640 524288]




[W] [02:55:31.816676] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:55:42.637135] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:55:53.077354] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:56:03.575239] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:56:13.990968] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:56:24.388572] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:56:34.490108] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:56:44.824747] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:56:54.994621] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:57:05.984413] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:57:16.469211] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:57:26.822091] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:57:37.310990] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:57:47.862316] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:57:58.258018] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:58:08.640815] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:58:21.178781] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


[W] [02:58:33.553947] Treelite currently does not support float64 model parameters. Accuracy may degrade slightly relative to native sklearn invocation.


Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'
Exception ignored in: 'cuml.fil.fil.ForestInference_impl.__dealloc__'
Traceback (most recent call last):
  File "cuml/fil/fil.pyx", line 273, in cuml.fil.fil.ForestInference_impl.get_dtype
AttributeError: 'NoneType' object has no attribute 'float32'


Current batches: [     1     13    194   2702  37640 524288]
Current batches: [     1     13    194   2702  37640 524288]
Current batches: [     1     13    194   2702  37640 524288]


In [8]:
results_df

Unnamed: 0,time_ms,kernel,run_id,batch_size,time_proportion,model,dataset
0,0.244350,"EvalObliviousTrees(uchar4 const *, unsigned in...",0,1,0.854619,catboost,churn_modelling:numerical
1,0.261182,"EvalObliviousTrees(uchar4 const *, unsigned in...",1,1,0.864343,catboost,churn_modelling:numerical
2,0.242590,"EvalObliviousTrees(uchar4 const *, unsigned in...",2,1,0.852276,catboost,churn_modelling:numerical
3,0.259357,"EvalObliviousTrees(uchar4 const *, unsigned in...",0,4,0.861866,catboost,churn_modelling:numerical
4,0.262686,"EvalObliviousTrees(uchar4 const *, unsigned in...",1,4,0.863562,catboost,churn_modelling:numerical
...,...,...,...,...,...,...,...
121,36.877372,void xgboost::predictor::PredictKernel<xgboost...,1,37640,0.670699,xgboost,rossmann_store_sales:numerical
122,37.644087,void xgboost::predictor::PredictKernel<xgboost...,2,37640,0.701865,xgboost,rossmann_store_sales:numerical
123,357.568274,void xgboost::predictor::PredictKernel<xgboost...,0,524288,0.856416,xgboost,rossmann_store_sales:numerical
124,343.221714,void xgboost::predictor::PredictKernel<xgboost...,1,524288,0.844479,xgboost,rossmann_store_sales:numerical


### Plot Profiling Results: Saturation Point for All Datasets

In [9]:
%%R -i results_df

library(stringr)
library(dplyr)
library(ggplot2)

results_df = results_df %>%
mutate(dataset_label = str_split_fixed(dataset, fixed(":"), 2)[, 1 ],
       time_ms = as.numeric(time_ms),
       group = paste(model, ": ", dataset_label, sep = "")) %>%
group_by(dataset_label) %>%
mutate(throughput = (batch_size / time_ms)) %>%
ungroup() %>%
group_by(interaction(dataset_label, as.factor(batch_size))) %>%
mutate(mean_throughput = mean(throughput)) %>%
ungroup()

ggplot() +
    geom_line(data = results_df,
              size = 1.8,
              alpha = 0.6,
              aes(x = batch_size,
                  y = mean_throughput,
                  color = group)) +
    geom_point(data = results_df,
               size = 3,
               aes(x = batch_size,
                   y = throughput,
                   color = group)) +
    ylab("GPU Throughput (ksamples/s)") +
    xlab("Samples") +
    #scale_x_continuous(trans = "log2") +
    scale_color_brewer(palette = "Dark2") +
    facet_wrap(group ~ ., scales = "free") +
    theme_bw(base_size = 18) +
    theme(legend.position = c(0.55, 0.2),
          legend.text = element_text(size = 12),
          legend.title = element_blank(),
          legend.background = element_rect(fill = "transparent"))
    
ggsave(filename = "../data/profiling_results/throughput_saturation_P100.pdf", width = 20, height = 10, dpi = 300)

R[write to console]: 
Attaching package: ‘dplyr’


R[write to console]: The following objects are masked from ‘package:stats’:

    filter, lag


R[write to console]: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




### Latency and Throughput

In [10]:
%%R -i results_df

library(stringr)
library(dplyr)
library(ggplot2)
library(tidyr)
library(ggforce)
library(patchwork)

results_df = results_df %>%
mutate(dataset_label = str_split_fixed(dataset, fixed(":"), 2)[, 1 ],
       time_ms = as.numeric(time_ms),
       group = paste(model, ": ", dataset_label, sep = "")) %>%
group_by(dataset_label) %>%
mutate(throughput = (batch_size / time_ms)) %>%
ungroup() %>%
group_by(interaction(dataset_label, as.factor(batch_size))) %>%
mutate(mean_throughput = mean(throughput)) %>%
ungroup() %>%
group_by(dataset_label) %>%
mutate(max_throughput = max(throughput),
       latency = min(time_ms) * 1e6) %>%
ungroup() %>%
pivot_longer(cols = c("max_throughput", "latency"),
             names_to = "performance_metric",
             values_to = "metric_value") %>%
mutate(performance_metric =  recode_factor(performance_metric,
                                           max_throughput = "Throughput (ksamples/s)",
                                           latency = "Latency (ns)"))


ggplot() +
    geom_point(data = results_df,
               size = 3,
               stat = "unique",
               aes(x = group,
                   y = metric_value,
                   color = group)) +
    facet_col(vars(performance_metric), scales = "free") +
    ylab("GPU Performance Metric (log10 scale)") +
    xlab("Model and Dataset") +
    scale_color_brewer(palette = "Dark2") +
    scale_y_continuous(trans = "log10") +
    theme_minimal(base_size = 18) +
    theme(legend.position = c(0.1, 0.9),
          legend.text = element_text(size = 12),
          legend.title = element_blank(),
          legend.background = element_rect(fill = "transparent"),
          axis.text.x = element_text(size = 10))
    
ggsave(filename = "../data/profiling_results/latency_throughput_P100.pdf", width = 20, height = 10, dpi = 300)