In [1]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)

pd.set_option('display.max_rows', 10)

In [4]:
import warnings
import logging
import os
import onnxruntime
from azureml.automl.runtime.onnx_convert import OnnxInferenceHelper
# from azureml.automl.core.onnx_convert import OnnxInferenceHelper
import json
import time

In [6]:
from typing import Any, Tuple
from numpy import ndarray


class OnnxModelWrapper:
    """
        helper class for prediction when using onnx model
    """
    def __init__(self, onnx_model_bytes: bytes, onnx_input_map: dict):
        """
        :param onnx_model_bytes: the onnx model in bytes
        :param onnx_input_map: the onnx_resource dictionary
        """
        self.onnx_model_bytes = onnx_model_bytes
        self.onnx_input_map = onnx_input_map
        self.wrapper_model = OnnxInferenceHelper(self.onnx_model_bytes, self.onnx_input_map)

    def predict(self, X) -> Tuple[Any, Any]:
        """
        predict by using OnnxInferenceHelper
        :param X: features to predict
        :returns tuple of <label, prob>
        """
        return self.wrapper_model.predict(X)

    def predict_proba(self, X) -> ndarray:
        """
        predict proba by using OnnxInferenceHelper
        :param X: features to predict
        :returns ndarray of prob
        """
        _, y_prob = self.wrapper_model.predict(X, with_prob=True)
        return y_prob


In [7]:
onnx_model_file = open('onnx.model', 'rb')
onnx_res_file = open('onnx.res', 'r')
onnx_model_data = onnx_model_file.read()
onnx_res_data = onnx_res_file.read()

In [8]:
onnxrt_wrapper = OnnxModelWrapper(onnx_model_data, json.loads(onnx_res_data))

In [9]:
filepath = 'invoice.csv'
data_df = pd.read_csv(filepath)

In [10]:
data_df.head()

Unnamed: 0,INVOICECUSTTRANSRECID,WASDISPUTED_VALUE,ROW_UNIQUEKEY,ISCLOSED_VALUE,TRANSTYPE,PAYMENTSCHEDULEID,CASHDISCOUNTCODE,RECID,ACCOUNTINGCURRENCY,WASCOLLECTIONLETTERSENT,WASDISPUTED,CASHDISCOUTPERCENT,WASCOLLECTIONLETTERSENT_VALUE,ISCLOSED,CUSTOMERRECID,INVOICEAMOUNTACCOUNTING,TRANSTYPE_VALUE,NUM_CHARACTERS(DUEDATE),NUM_CHARACTERS(DATEFULLYPAID),NUM_CHARACTERS(CUSTOMERID),NUM_CHARACTERS(ORDERACCOUNT),NUM_CHARACTERS(INVOICEDATE),InvoiceAccount.PARTYNUMBER,InvoiceAccount.CUSTOMERRECID,InvoiceAccount.RECID,InvoiceAccount.COMPANY,InvoiceAccount.CUSTOMERGROUPID,InvoiceAccount.CUSTOMERCOUNTRYREGIONID,InvoiceAccount.AVERAGEDAYSTOPAY,InvoiceAccount.MIN(Invoice.WASDISPUTED_VALUE),InvoiceAccount.MIN(Invoice.ROW_UNIQUEKEY),InvoiceAccount.MIN(Invoice.ISCLOSED_VALUE),InvoiceAccount.MIN(Invoice.RECID),InvoiceAccount.MIN(Invoice.CASHDISCOUTPERCENT),InvoiceAccount.MIN(Invoice.WASCOLLECTIONLETTERSENT_VALUE),InvoiceAccount.MIN(Invoice.CUSTOMERRECID),InvoiceAccount.MIN(Invoice.INVOICEAMOUNTACCOUNTING),InvoiceAccount.MIN(Invoice.TRANSTYPE_VALUE),InvoiceAccount.MAX(Invoice.WASDISPUTED_VALUE),InvoiceAccount.MAX(Invoice.ROW_UNIQUEKEY),InvoiceAccount.MAX(Invoice.ISCLOSED_VALUE),InvoiceAccount.MAX(Invoice.RECID),InvoiceAccount.MAX(Invoice.CASHDISCOUTPERCENT),InvoiceAccount.MAX(Invoice.WASCOLLECTIONLETTERSENT_VALUE),InvoiceAccount.MAX(Invoice.CUSTOMERRECID),InvoiceAccount.MAX(Invoice.INVOICEAMOUNTACCOUNTING),InvoiceAccount.MAX(Invoice.TRANSTYPE_VALUE),InvoiceAccount.SUM(Invoice.WASDISPUTED_VALUE),InvoiceAccount.SUM(Invoice.ROW_UNIQUEKEY),InvoiceAccount.SUM(Invoice.ISCLOSED_VALUE),InvoiceAccount.SUM(Invoice.RECID),InvoiceAccount.SUM(Invoice.CASHDISCOUTPERCENT),InvoiceAccount.SUM(Invoice.WASCOLLECTIONLETTERSENT_VALUE),InvoiceAccount.SUM(Invoice.CUSTOMERRECID),InvoiceAccount.SUM(Invoice.INVOICEAMOUNTACCOUNTING),InvoiceAccount.SUM(Invoice.TRANSTYPE_VALUE),InvoiceAccount.MEAN(Invoice.WASDISPUTED_VALUE),InvoiceAccount.MEAN(Invoice.ROW_UNIQUEKEY),InvoiceAccount.MEAN(Invoice.ISCLOSED_VALUE),InvoiceAccount.MEAN(Invoice.RECID),InvoiceAccount.MEAN(Invoice.CASHDISCOUTPERCENT),InvoiceAccount.MEAN(Invoice.WASCOLLECTIONLETTERSENT_VALUE),InvoiceAccount.MEAN(Invoice.CUSTOMERRECID),InvoiceAccount.MEAN(Invoice.INVOICEAMOUNTACCOUNTING),InvoiceAccount.MEAN(Invoice.TRANSTYPE_VALUE),InvoiceAccount.COUNT(Invoice),InvoiceAccount.NUM_CHARACTERS(CUSTOMERID)
0,5637144576,0,720,1,5,1,5,5637144576,11,0,0,0.0,0,1,22565421558,328130.0,2,19,19,6,6,19,838,22565421558,22565421558,14,6,2,0,0,720,1,5637144576,0.0,0,22565421558,13645.5,2,0,14877,1,5637146076,0.0,0,22565421558,382761.5,2,0,71671,24,135291000000.0,0.0,0,541570000000.0,7656368.5,48,0.0,2986.29,1.0,5637145550,0.0,0.0,22565421558,319015.35,2.0,24,6
1,5637144577,0,740,1,5,1,5,5637144577,11,0,0,0.0,0,1,22565421559,306348.9,2,19,19,6,6,19,839,22565421559,22565421559,14,2,15,0,0,740,0,5637144577,0.0,0,22565421559,20.0,2,0,14880,1,68719532061,0.0,1,22565421559,4250000.0,13,0,99275,26,445807000000.0,0.0,4,654397000000.0,11455852.48,93,0.0,3423.28,0.9,15372649960,0.0,0.14,22565421559,395029.4,3.21,29,6
2,5637144578,0,763,1,5,1,5,5637144578,11,0,0,0.0,0,1,22565421560,309673.65,2,19,19,6,6,19,840,22565421560,22565421560,14,2,15,0,0,763,1,5637144578,0.0,0,22565421560,17286.55,2,1,14881,1,5637146078,0.0,0,22565421560,360219.5,2,1,79383,24,135291000000.0,0.0,0,541570000000.0,7226285.06,48,0.04,3307.62,1.0,5637145552,0.0,0.0,22565421560,301095.21,2.0,24,6
3,5637144579,0,782,1,5,1,5,5637144579,11,0,0,0.0,0,1,22565421562,213427.5,2,19,19,6,6,19,842,22565421562,22565421562,14,0,15,0,0,782,0,5637144579,0.0,0,22565421562,20.0,2,0,14884,1,68719530562,0.01,1,22565421562,248659.13,13,0,122794,26,347087000000.0,0.01,2,631832000000.0,5123577.83,79,0.0,4385.5,0.93,12395974712,0.0,0.07,22565421562,182984.92,2.82,28,6
4,5637144580,0,802,1,5,1,5,5637144580,11,0,0,0.0,0,1,22565421563,88784.8,2,19,19,6,6,19,843,22565421563,22565421563,14,0,15,0,0,802,0,5637144580,0.0,0,22565421563,1040.0,2,0,14887,1,68719524561,0.01,0,22565421563,104665.6,8,0,122407,26,278368000000.0,0.02,0,609266000000.0,2180079.2,66,0.0,4533.59,0.96,10309914194,0.0,0.0,22565421563,80743.67,2.44,27,6


In [11]:
data_df.shape

(14917, 67)

In [12]:
start = time.time()
predictions = onnxrt_wrapper.predict_proba(data_df)
end = time.time()
print(end - start)

31.1851806640625


In [13]:
from azureml.explain.model.mimic.mimic_explainer import MimicExplainer
from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel
start = time.time()
explainer = MimicExplainer(onnxrt_wrapper, data_df, LGBMExplainableModel, augment_data=False)
end = time.time()
print(end - start)

32.04781436920166


In [14]:
def explain(train_data):
    explanation = explainer.explain_global(train_data, include_local=False)

In [15]:
sample = data_df.sample(1000)
start = time.time()
explain(sample)
end = time.time()
print(end - start)

47.51412320137024


In [16]:
sample = data_df.sample(2000)
start = time.time()
explain(sample)
end = time.time()
print(end - start)

91.24858832359314


In [17]:
sample = data_df.sample(5000)
start = time.time()
explain(sample)
end = time.time()
print(end - start)

218.98222756385803


In [18]:
start = time.time()
explain(data_df)
end = time.time()
print(end - start)

639.5890746116638


In [19]:
data_df.shape

(14917, 67)