In [1]:
import snowflake.snowpark
from snowflake.snowpark import functions as F
from snowflake.snowpark.session import Session
from snowflake.snowpark.types import IntegerType, StringType, StructType, FloatType, StructField, DateType, Variant
from snowflake.snowpark.functions import udf, sum, col,array_construct,month,year,call_udf,lit,count
from snowflake.snowpark.version import VERSION
# Misc
import json
import pandas as pd
import numpy as np
import logging 
logger = logging.getLogger("snowflake.snowpark.session")
logger.setLevel(logging.ERROR)

In [2]:
# Create Snowflake Session object
connection_parameters = json.load(open('connection.json'))
session = Session.builder.configs(connection_parameters).create()
session.sql_simplifier_enabled = True

snowflake_environment = session.sql('select current_user(), current_role(), current_database(), current_schema(), current_version(), current_warehouse()').collect()
snowpark_version = VERSION

# Current Environment Details
print('User                        : {}'.format(snowflake_environment[0][0]))
print('Role                        : {}'.format(snowflake_environment[0][1]))
print('Database                    : {}'.format(snowflake_environment[0][2]))
print('Schema                      : {}'.format(snowflake_environment[0][3]))
print('Warehouse                   : {}'.format(snowflake_environment[0][5]))
print('Snowflake version           : {}'.format(snowflake_environment[0][4]))
print('Snowpark for Python version : {}.{}.{}'.format(snowpark_version[0],snowpark_version[1],snowpark_version[2]))

User                        : GREENTOMATO
Role                        : ACCOUNTADMIN
Database                    : None
Schema                      : None
Warehouse                   : COMPUTE_WH
Snowflake version           : 7.9.0
Snowpark for Python version : 1.1.0


## Environment Setup

In [5]:
session.sql('''create database if not exists snowflake_sample_data from share sfc_samples.sample_data''').collect()

[Row(status='SNOWFLAKE_SAMPLE_DATA already exists, statement succeeded.')]

In [6]:
session.sql('CREATE DATABASE IF NOT EXISTS tpcds_xgboost').collect()
session.sql('CREATE SCHEMA IF NOT EXISTS tpcds_xgboost.demo').collect()
session.sql("create or replace warehouse FE_AND_INFERENCE_WH with warehouse_size='3X-LARGE'").collect()
session.sql("create or replace warehouse snowpark_opt_wh with warehouse_size = 'MEDIUM' warehouse_type = 'SNOWPARK-OPTIMIZED'").collect()
session.sql("alter warehouse snowpark_opt_wh set max_concurrency_level = 1").collect()
session.use_warehouse('FE_AND_INFERENCE_WH')

Select either 100 or 10 for the TPC-DS Dataset size to use below. See (https://docs.snowflake.com/en/user-guide/sample-data-tpcds.html)[here] for more information If you choose 100, I recommend >= 3XL warehouse. 

In [7]:
TPCDS_SIZE_PARAM = 10
SNOWFLAKE_SAMPLE_DB = 'SNOWFLAKE_SAMPLE_DATA' # Name of Snowflake Sample Database might be different...

if TPCDS_SIZE_PARAM == 100: 
    TPCDS_SCHEMA = 'TPCDS_SF100TCL'
elif TPCDS_SIZE_PARAM == 10:
    TPCDS_SCHEMA = 'TPCDS_SF10TCL'
else:
    raise ValueError("Invalid TPCDS_SIZE_PARAM selection")
    
store_sales = session.table(f'{SNOWFLAKE_SAMPLE_DB}.{TPCDS_SCHEMA}.store_sales')
catalog_sales = session.table(f'{SNOWFLAKE_SAMPLE_DB}.{TPCDS_SCHEMA}.catalog_sales') 
web_sales = session.table(f'{SNOWFLAKE_SAMPLE_DB}.{TPCDS_SCHEMA}.web_sales') 
date = session.table(f'{SNOWFLAKE_SAMPLE_DB}.{TPCDS_SCHEMA}.date_dim')
dim_stores = session.table(f'{SNOWFLAKE_SAMPLE_DB}.{TPCDS_SCHEMA}.store')
customer = session.table(f'{SNOWFLAKE_SAMPLE_DB}.{TPCDS_SCHEMA}.customer')
address = session.table(f'{SNOWFLAKE_SAMPLE_DB}.{TPCDS_SCHEMA}.customer_address')
demo = session.table(f'{SNOWFLAKE_SAMPLE_DB}.{TPCDS_SCHEMA}.customer_demographics')

In [8]:
#print((store_sales.count(), len(store_sales.columns)))


## Feature Engineering
We will aggregate sales by customer across all channels(web, store, catalogue) and join that to customer demographic data. 

In [9]:
store_sales_agged = store_sales.group_by('ss_customer_sk').agg(F.sum('ss_sales_price').as_('total_sales'))
web_sales_agged = web_sales.group_by('ws_bill_customer_sk').agg(F.sum('ws_sales_price').as_('total_sales'))
catalog_sales_agged = catalog_sales.group_by('cs_bill_customer_sk').agg(F.sum('cs_sales_price').as_('total_sales'))
store_sales_agged = store_sales_agged.rename('ss_customer_sk', 'customer_sk')
web_sales_agged = web_sales_agged.rename('ws_bill_customer_sk', 'customer_sk')
catalog_sales_agged = catalog_sales_agged.rename('cs_bill_customer_sk', 'customer_sk')

In [10]:
total_sales = store_sales_agged.union_all(web_sales_agged)
total_sales = total_sales.union_all(catalog_sales_agged)

In [11]:
total_sales = total_sales.group_by('customer_sk').agg(F.sum('total_sales').as_('total_sales'))

In [12]:
customer = customer.select('c_customer_sk','c_current_hdemo_sk', 'c_current_addr_sk', 'c_customer_id', 'c_birth_year')

In [13]:
customer = customer.join(address.select('ca_address_sk', 'ca_zip'), customer['c_current_addr_sk'] == address['ca_address_sk'] )
customer = customer.join(demo.select('cd_demo_sk', 'cd_gender', 'cd_marital_status', 'cd_credit_rating', 'cd_education_status', 'cd_dep_count'),
                                customer['c_current_hdemo_sk'] == demo['cd_demo_sk'] )
customer = customer.rename('c_customer_sk', 'customer_sk')
#customer.show()

In [14]:
final_df = total_sales.join(customer, on='customer_sk')

In [15]:
session.use_database('tpcds_xgboost')
session.use_schema('demo')
final_df.write.mode('overwrite').save_as_table('feature_store')

In [16]:
session.add_packages('snowflake-snowpark-python', 'scikit-learn', 'pandas', 'numpy', 'joblib', 'cachetools', 'xgboost', 'joblib')

In [17]:
session.sql('CREATE OR REPLACE STAGE ml_models_10T ').collect()

[Row(status='Stage area ML_MODELS_10T successfully created.')]

In [18]:
#Create stage for LR
session.sql('CREATE OR REPLACE STAGE ml_models_LR_10T ').collect()

[Row(status='Stage area ML_MODELS_LR_10T successfully created.')]

## Training Model
In this part, we will training a XGBoost model and upload to snowflake stage. This will create `stored procedure` on snowflake, and we can call this model to predict.

In [31]:
train_metric, test_metric = {}, {}

### XGBoost

In [69]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.compose import ColumnTransformer
from xgboost import XGBRegressor
import joblib
import os

def train_model(session: snowflake.snowpark.Session) -> list:
    snowdf = session.table("feature_store")
    
    snowdf = snowdf.drop(['CUSTOMER_SK', 'C_CURRENT_HDEMO_SK', 'C_CURRENT_ADDR_SK', 'C_CUSTOMER_ID', 'CA_ADDRESS_SK', 'CD_DEMO_SK'])
    snowdf_train, snowdf_test = snowdf.random_split([0.8, 0.2], seed=82) 

    # save the train and test sets as time stamped tables in Snowflake 
    snowdf_train.write.mode("overwrite").save_as_table("tpcds_xgboost.demo.tpc_TRAIN")
    snowdf_test.write.mode("overwrite").save_as_table("tpcds_xgboost.demo.tpc_TEST")
    train_x = snowdf_train.drop("TOTAL_SALES").to_pandas() # drop labels for training set
    train_y = snowdf_train.select("TOTAL_SALES").to_pandas()
    test_x = snowdf_test.drop("TOTAL_SALES").to_pandas()
    test_y = snowdf_test.select("TOTAL_SALES").to_pandas()
    cat_cols = ['CA_ZIP', 'CD_GENDER', 'CD_MARITAL_STATUS', 'CD_CREDIT_RATING', 'CD_EDUCATION_STATUS']
    num_cols = ['C_BIRTH_YEAR', 'CD_DEP_COUNT']

    num_pipeline = Pipeline([
            ('imputer', SimpleImputer(strategy="median")),
            ('std_scaler', StandardScaler()),
        ])

    preprocessor = ColumnTransformer(
    transformers=[('num', num_pipeline, num_cols),
                  ('encoder', OneHotEncoder(handle_unknown="ignore"), cat_cols) ])

    pipe = Pipeline([('preprocessor', preprocessor), 
                        ('xgboost', XGBRegressor())])
    pipe.fit(train_x, train_y)

    test_preds = pipe.predict(test_x)
    train_preds = pipe.predict(train_x)
    #rmse = mean_squared_error(test_y, test_preds)
    train_metric, test_metric = {}, {}
    train_metric['MAPE'] = mean_absolute_percentage_error(train_y, train_preds)
    train_metric['RMSE'] = mean_absolute_error(train_y, train_preds)
    train_metric['R2'] = r2_score(train_y, train_preds)
    
    test_metric['MAPE'] =  mean_absolute_percentage_error(test_y, test_preds)
    test_metric['RMSE'] =  mean_absolute_error(test_y, test_preds)
    test_metric['R2'] =  r2_score(test_y, test_preds)
    
    model_file = os.path.join('/tmp', 'model.joblib')
    joblib.dump(pipe, model_file)
    session.file.put(model_file, "@ml_models_10T",overwrite=True)
    print('successes')
    return ['Training',train_metric,'Test', test_metric]

In [70]:
session.use_warehouse('snowpark_opt_wh')
train_model_sp = F.sproc(train_model, session=session, replace=True, is_permanent=True, name="xgboost_sproc", stage_location="@ml_models_10T")
# Switch to Snowpark Optimized Warehouse for training and to run the stored proc
metrics = train_model_sp(session=session)

In [71]:
print(metrics)

[
  "Training",
  {
    "MAPE": 0.10485963767796148,
    "R2": 0.0002156159931520074,
    "RMSE": 3648.279975985239
  },
  "Test",
  {
    "MAPE": 0.10488693273468783,
    "R2": -4.803458124280624e-05,
    "RMSE": 3649.372248018043
  }
]


### Linear Regression

In [66]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
import joblib
import os

def train_model_LR(session: snowflake.snowpark.Session) -> list:
    
#---------Preprocess Start ---------
    snowdf = session.table("feature_store")
    snowdf = snowdf.drop(['CUSTOMER_SK', 'C_CURRENT_HDEMO_SK', 'C_CURRENT_ADDR_SK', 'C_CUSTOMER_ID', 'CA_ADDRESS_SK', 'CD_DEMO_SK'])
    snowdf_train, snowdf_test = snowdf.random_split([0.8, 0.2], seed=82) 

    # save the train and test sets as time stamped tables in Snowflake 
    snowdf_train.write.mode("overwrite").save_as_table("tpcds_xgboost.demo.tpc_TRAIN")
    snowdf_test.write.mode("overwrite").save_as_table("tpcds_xgboost.demo.tpc_TEST")
    train_x = snowdf_train.drop("TOTAL_SALES").to_pandas() # drop labels for training set
    train_y = snowdf_train.select("TOTAL_SALES").to_pandas()
    test_x = snowdf_test.drop("TOTAL_SALES").to_pandas()
    test_y = snowdf_test.select("TOTAL_SALES").to_pandas()
    cat_cols = ['CA_ZIP', 'CD_GENDER', 'CD_MARITAL_STATUS', 'CD_CREDIT_RATING', 'CD_EDUCATION_STATUS']
    num_cols = ['C_BIRTH_YEAR', 'CD_DEP_COUNT']

    num_pipeline = Pipeline([
            ('imputer', SimpleImputer(strategy="median")),
            ('std_scaler', StandardScaler()),
        ])

    preprocessor = ColumnTransformer(
    transformers=[('num', num_pipeline, num_cols),
                  ('encoder', OneHotEncoder(handle_unknown="ignore"), cat_cols) ])
#---------Preprocess End---------

    pipe = Pipeline([('preprocessor', preprocessor), 
                        ('LinearRegression', LinearRegression())])
    pipe.fit(train_x, train_y)

    test_preds = pipe.predict(test_x)
    train_preds = pipe.predict(train_x)
    #rmse = mean_squared_error(test_y, test_preds)
    train_metric, test_metric = {}, {}
    train_metric['MAPE'] = mean_absolute_percentage_error(train_y, train_preds)
    train_metric['RMSE'] = mean_absolute_error(train_y, train_preds)
    train_metric['R2'] = r2_score(train_y, train_preds)
    
    test_metric['MAPE'] =  mean_absolute_percentage_error(test_y, test_preds)
    test_metric['RMSE'] =  mean_absolute_error(test_y, test_preds)
    test_metric['R2'] =  r2_score(test_y, test_preds)
    model_file = os.path.join('/tmp', 'model_LR.joblib')
    joblib.dump(pipe, model_file)
    session.file.put(model_file, "@ml_models_LR_10T",overwrite=True)
    print('successes')
    return ['Training',train_metric,'Test', test_metric]

In [67]:
#----USE DATABASE AND SESSION
session.use_database('tpcds_xgboost')
session.use_schema('demo')
session.add_packages('snowflake-snowpark-python', 'scikit-learn', 'pandas', 'numpy', 'joblib', 'cachetools', 'xgboost', 'joblib')
#Create stage for LR
session.sql('CREATE OR REPLACE STAGE ml_models_LR_10T ').collect()

session.use_warehouse('snowpark_opt_wh')
train_model_sp = F.sproc(train_model_LR, session=session, replace=True, is_permanent=True, name="LinearRegresson_sproc", stage_location="@ml_models_LR_10T")
# Switch to Snowpark Optimized Warehouse for training and to run the stored proc
LR_metric = train_model_sp(session=session)

In [68]:
print(LR_metric)

[
  "Training",
  {
    "MAPE": 0.10487981806292808,
    "R2": 0.00018172684338746414,
    "RMSE": 3648.985233540816
  },
  "Test",
  {
    "MAPE": 0.10488251067703308,
    "R2": -0.00020184804454337346,
    "RMSE": 3648.8711500677236
  }
]


## Inference
Call `stored procedure` or say our `model` is billed by predict times, \$0.52 for 10T model, \\$1.3 for 100T model.
- XGBoost model in stage `@ml_models_10T` is trained on 10T model and called `model.joblib`.
- Linear regression in stage `ml_models_LR_10T` is trained on 10T model and called `model_LR.joblib`.

In [3]:
# Switch back to feature engineering/inference warehouse
session.use_warehouse('FE_AND_INFERENCE_WH')
session.use_database('tpcds_xgboost')
session.use_schema('demo')
session.add_packages('snowflake-snowpark-python', 'scikit-learn', 'pandas', 'numpy', 'joblib', 'cachetools', 'xgboost', 'joblib')
features = [ 'C_BIRTH_YEAR', 'CA_ZIP', 'CD_GENDER', 'CD_MARITAL_STATUS', 'CD_CREDIT_RATING', 'CD_EDUCATION_STATUS', 'CD_DEP_COUNT']
session.add_import("@ml_models_10T/model.joblib")  
session.add_import("@ml_models_LR_10T/model_LR.joblib")
features = [ 'C_BIRTH_YEAR', 'CA_ZIP', 'CD_GENDER', 'CD_MARITAL_STATUS', 'CD_CREDIT_RATING', 'CD_EDUCATION_STATUS', 'CD_DEP_COUNT']


In [4]:
import sys
import pandas as pd
import cachetools
import joblib
from snowflake.snowpark import types as T
# choose model here
stage_name = 'ml_models_10T'
model_name = 'model.joblib'

# stage_name = 'ml_models_LR_10T'
# model_name = 'model_LR.joblib' 

@cachetools.cached(cache={})
def read_file(filename):
    import os, joblib
    import_dir = sys._xoptions.get("snowflake_import_directory")
    if import_dir:
        with open(os.path.join(import_dir, filename), 'rb') as file:
            m = joblib.load(file)
            return m

@F.pandas_udf(session=session, max_batch_size=10000, is_permanent=True, stage_location=f'@{stage_name}', replace=True, name="clv_xgboost_udf")
def predict(df:  T.PandasDataFrame[int, str, str, str, str, str, int]) -> T.PandasSeries[float]:
    m = read_file(model_name)        
    df.columns = features
    return m.predict(df)

#### Inference on data from feature_store.

In [23]:
# this will take all training dataset as input, let use smaller dataset in the following
# inference_df = session.table('feature_store').limit(100)
# inference_df = inference_df.drop(['CUSTOMER_SK', 'C_CURRENT_HDEMO_SK', 'C_CURRENT_ADDR_SK', 'C_CUSTOMER_ID', 'CA_ADDRESS_SK', 'CD_DEMO_SK'])
# inputs = inference_df.drop("TOTAL_SALES")
# snowdf_results = inference_df.select(*inputs,
#                     predict(*inputs).alias('PREDICTION'), 
#                     (F.col('TOTAL_SALES')).alias('ACTUAL_SALES')
#                     )
# snowdf_results.write.mode('overwrite').save_as_table('predictions')

#### Calculate MAPE metrics

In [None]:
snowdf_train.write.mode("overwrite").save_as_table("tpcds_xgboost.demo.tpc_TRAIN")
snowdf_test.write.mode("overwrite").save_as_table("tpcds_xgboost.demo.tpc_TEST")
train_x = snowdf_train.drop("TOTAL_SALES").to_pandas() # drop labels for training set
train_y = snowdf_train.select("TOTAL_SALES").to_pandas()
test_x = snowdf_test.drop("TOTAL_SALES").to_pandas()
test_y = snowdf_test.select("TOTAL_SALES").to_pandas()

In [14]:
session.sql('select count(*) from tpcds_xgboost.demo.tpc_TRAIN').show()

--------------
|"COUNT(*)"  |
--------------
|50178278    |
--------------



In [15]:
session.sql('select count(*) from tpcds_xgboost.demo.tpc_TEST').show()

--------------
|"COUNT(*)"  |
--------------
|12548711    |
--------------



#### Inference on typed data

In [5]:
# assume this is from strealit UI
typed_input = [[1969, '66060','M','U','Low Risk','2 yr Degree', 1]]

In [6]:
input_df = session.create_dataframe(typed_input, schema=features)

In [7]:
typed_output = input_df.select(*input_df,
                    predict(*input_df).alias('PREDICTION'))


In [8]:
temp = pd.DataFrame(typed_output.collect())

In [9]:
temp1 = temp.T
temp1.columns = ['']
temp1

Unnamed: 0,Unnamed: 1
C_BIRTH_YEAR,1969
CA_ZIP,66060
CD_GENDER,M
CD_MARITAL_STATUS,U
CD_CREDIT_RATING,Low Risk
CD_EDUCATION_STATUS,2 yr Degree
CD_DEP_COUNT,1
PREDICTION,32337.777344


In [14]:
typed_output.collect()

Failed to execute query [queryID: 01ab04bd-0004-5eaf-0052-d38700020256]  SELECT "C_BIRTH_YEAR", "CA_ZIP", "CD_GENDER", "CD_MARITAL_STATUS", "CD_CREDIT_RATING", "CD_EDUCATION_STATUS", "CD_DEP_COUNT", clv_xgboost_udf("C_BIRTH_YEAR", "CA_ZIP", "CD_GENDER", "CD_MARITAL_STATUS", "CD_CREDIT_RATING", "CD_EDUCATION_STATUS", "CD_DEP_COUNT") AS "PREDICTION" FROM ( SELECT $1 AS "C_BIRTH_YEAR", $2 AS "CA_ZIP", $3 AS "CD_GENDER", $4 AS "CD_MARITAL_STATUS", $5 AS "CD_CREDIT_RATING", $6 AS "CD_EDUCATION_STATUS", $7 AS "CD_DEP_COUNT" FROM  VALUES (1969 :: INT, '66060' :: STRING, 'M' :: STRING, 'U' :: STRING, 'Low Risk' :: STRING, '2 yr Degree' :: STRING, 1 :: INT))
100357 (P0000): Python Interpreter Error:
Traceback (most recent call last):
  File "/home/udf/1389594464/udf_py_1835141308.zip/udf_py_1835141308.py", line 49, in compute
    return lock_function_once(func, invoked)(df)
  File "/home/udf/1389594464/udf_py_1835141308.zip/udf_py_1835141308.py", line 38, in wrapper
    result = f(*args, **kwar

SnowparkSQLException: (1304): 01ab04bd-0004-5eaf-0052-d38700020256: 100357 (P0000): Python Interpreter Error:
Traceback (most recent call last):
  File "/home/udf/1389594464/udf_py_1835141308.zip/udf_py_1835141308.py", line 49, in compute
    return lock_function_once(func, invoked)(df)
  File "/home/udf/1389594464/udf_py_1835141308.zip/udf_py_1835141308.py", line 38, in wrapper
    result = f(*args, **kwargs)
  File "C:\Users\zwdua\AppData\Local\Temp\ipykernel_14476\3989733615.py", line 24, in predict
  File "C:\Users\zwdua\anaconda3\envs\snowpark\lib\site-packages\cachetools\decorators.py", line 26, in wrapper
  File "C:\Users\zwdua\AppData\Local\Temp\ipykernel_14476\3989733615.py", line 19, in read_file
  File "/usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/python3.8/site-packages/joblib/numpy_pickle.py", line 577, in load
    obj = _unpickle(fobj)
  File "/usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/python3.8/site-packages/joblib/numpy_pickle.py", line 506, in _unpickle
    obj = unpickler.load()
  File "/usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/python3.8/pickle.py", line 1212, in load
    dispatch[key[0]](self)
  File "/usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/python3.8/site-packages/joblib/numpy_pickle.py", line 331, in load_build
    Unpickler.load_build(self)
  File "/usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/python3.8/pickle.py", line 1705, in load_build
    setstate(state)
  File "/usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/python3.8/site-packages/xgboost/core.py", line 1451, in __setstate__
    _check_call(
  File "/usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/python3.8/site-packages/xgboost/core.py", line 218, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [19:05:50] /tmp/abs_3fs5_ko2hu/croots/recipe/xgboost-split_1659548945693/work/src/common/json.cc:458: Expecting: """, got: "76 ", around character position: 1
    {L\0\0\0\0\0\0\0
    ^~~~~~~~~
Stack trace:
  [bt] (0) /usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/libxgboost.so(+0x9dbf0) [0xffff7c420bf0]
  [bt] (1) /usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/libxgboost.so(xgboost::JsonReader::Error(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) const+0xadc) [0xffff7c45043c]
  [bt] (2) /usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/libxgboost.so(+0xd4b10) [0xffff7c457b10]
  [bt] (3) /usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/libxgboost.so(xgboost::JsonReader::ParseObject()+0x304) [0xffff7c4553c4]
  [bt] (4) /usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/libxgboost.so(xgboost::JsonReader::Parse()+0x178) [0xffff7c450dd8]
  [bt] (5) /usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/libxgboost.so(xgboost::JsonReader::Load()+0x30) [0xffff7c450f00]
  [bt] (6) /usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/libxgboost.so(xgboost::Json::Load(xgboost::StringView)+0x4c) [0xffff7c450f8c]
  [bt] (7) /usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/libxgboost.so(+0x1d2038) [0xffff7c555038]
  [bt] (8) /usr/lib/python_udf/ecd2f7835d1dd3652fa432f40f924b16e72b73e47f2a181f4b0c6bcc9157db9e/lib/libxgboost.so(XGBoosterUnserializeFromBuffer+0x60) [0xffff7c40ca40]


 in function CLV_XGBOOST_UDF with handler udf_py_1835141308.compute