In [9]:
import onnx
from onnxmltools.convert import convert_lightgbm
import onnxruntime as rt
import pandas as pd
from skl2onnx.common.data_types import FloatTensorType
from settings import settings
import ml_tune_helpers.ts_splitter as ts_splitter
from ml_tune_helpers.lgbm_optuna.optuna_lgb_search import OptunaLgbSearch
import warnings
warnings.filterwarnings('ignore')

In [3]:
df_train = pd.read_csv("experiments_results/lgbm/6001/train.csv", parse_dates=True,
                       index_col=settings.DATE_COLUMN_NAME)
df_val = pd.read_csv("experiments_results/lgbm/6001/val.csv", parse_dates=True,
                     index_col=settings.DATE_COLUMN_NAME)
x_train, y_train = ts_splitter.extract_labels(df_train, 'AQI_PM25')
x_val, y_val = ts_splitter.extract_labels(df_val, 'AQI_PM25')


best_pm25_params = {'n_jobs': -1, 'verbosity': -1, 'objective': 'regression', 'metric': 'rmse', 'boosting_type': 'gbdt', 'extra_trees': False, 'n_estimators': 2100, 'num_leaves': 97, 'learning_rate': 0.11890042003021366, 'subsample': 0.4318782580934736, 'subsample_freq': 0, 'subsample_for_bin': 193295, 'min_child_samples': 14, 'reg_alpha': 0.394194026403052, 'reg_lambda': 0.708405421324422, 'max_depth': 3, 'max_bin': 206}
best_pm25_categories = ['month', 'weekday']
best_pm25_features_count = 73
categories = [
    ['month', 'weekday'],
    ['season', 'month'],
    ['season', 'month', 'weekday']
  ]

optuna_tuner = OptunaLgbSearch(
    study_name=f'lgbm_6001',
    metric='rmse',
    objective='regression',
    x_train=x_train, y_train=y_train,
    x_val=x_val, y_val=y_val,
    default_params=best_pm25_params,
    default_category=best_pm25_categories,
    categories_for_optimization=categories,
    default_top_features_count=best_pm25_features_count
)

score_train, score_val, model_best = optuna_tuner.run_model_and_eval(best_features_only=True, set_as_best_model=True)
score_train, score_val

(7.286649673817787, 27.724438877025666)

In [5]:
# # Convert model to ONNX
onnxfile = 'experiments_results/lgbm/6001/model.onnx'
initial_type = [('float_input', FloatTensorType([None, x_train.shape[1]]))]
onnx_model = convert_lightgbm(model_best, initial_types=initial_type, target_opset=8)
#onnx_model = onnxmltools.convert_lightgbm(lgb_model, initial_types=[(variable_name, data_type), (variable_name, data_type)])

In [6]:
onnx.checker.check_model(onnx_model)
with open(onnxfile, "wb") as f:
    f.write( onnx_model.SerializeToString())

In [7]:
onnx_model_pred_test = onnx.load(onnxfile)
onnx.checker.check_model(onnx_model_pred_test)

In [29]:
import numpy
sess = rt.InferenceSession(onnxfile)
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: x_val.astype(numpy.float32).to_numpy()})[0]
print(pred_onx)

[[55.831226]
 [55.250202]
 [66.52815 ]]


In [25]:
print(pred_onx)

RuntimeError: Input must be a list of dictionaries or a single numpy array for input 'float_input'.