In [13]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
# specify substep parameters for interactive run
# this cell will be replaced during job run with the parameters from json within params subfolder
substep_params={
    "r2_threshold": 1,
    "mae_threshold":1.5 ,
    "mape_threshold":0.8}

In [15]:
# load pipeline and step parameters - do not edit
from sinara.substep import get_pipeline_params, get_step_params
pipeline_params = get_pipeline_params(pprint=True)
step_params = get_step_params(pprint=True)

**Pipeline params:**


{'X': 'something',
 'env_name': 'user',
 'pipeline_name': 'pipeline',
 'zone_name': 'zone'}




**Step params:**


{'Y': 'something_else'}




In [16]:
#3 define substep interface
from sinara.substep import NotebookSubstep, ENV_NAME, PIPELINE_NAME, ZONE_NAME, STEP_NAME, RUN_ID, ENTITY_NAME, ENTITY_PATH, SUBSTEP_NAME

substep = NotebookSubstep(pipeline_params, step_params, substep_params)

substep.interface(
   
    inputs =
    [
        { STEP_NAME: "model_train", ENTITY_NAME: "california_bento" }, #, ZONE_NAME: "zone" },
        
    ],
    outputs = 
    [
       
    ]
)

substep.print_interface_info()

substep.exit_in_visualize_mode()

**STEP NAME:**


'model_eval'




**INPUTS:**


[{'user.pipeline.zone.model_train.california_bento': '/data/home/jovyan/pipeline/zone/model_train/run-25-01-15-100600/california_bento'}]




In [17]:
#4 run spark
from sinara.spark import SinaraSpark

spark = SinaraSpark.run_session(0)
SinaraSpark.ui_url()

Session is run


In [18]:

substep.add_metric('experiment_name', 'evaluation_metrics_example')

In [19]:
# read inputs 
bento_step_inputs = substep.inputs(step_name="model_train")

In [26]:

# load a bentoservice
from sinara.bentoml import load_bentoservice
from sklearn.metrics import mean_absolute_percentage_error,mean_absolute_error,r2_score
bento_serv = load_bentoservice(bento_step_inputs.california_bento)



In [21]:
bento_service_version = bento_serv.service_version().split('.')[-1]
substep.add_metric("version", 
                   {'bento_service_version': bento_service_version})
print(bento_service_version)

run-25-01-15-100600


In [27]:
import pandas as pd
test_data = bento_serv.test_data()
predictions = bento_serv.predict(pd.DataFrame(test_data['X']))
##Добавил pacчет R2
r2 = bento_serv.score(pd.DataFrame(test_data['X']),pd.DataFrame(predictions))
r21 = r2_score(pd.DataFrame(test_data['Y']).values, predictions)
mae = mean_absolute_error(pd.DataFrame(test_data['Y']).values, predictions)
mape = mean_absolute_percentage_error(pd.DataFrame(test_data['Y']).values, predictions)

print("The mean_absolute_error (MAE) on test set: {:.4f}".format(mae))
print("The mean_absolute_percentage_errorr (MAPE) on test set: {:.4f}".format(mape))

The mean_absolute_error (MAE) on test set: 0.9240
The mean_absolute_percentage_errorr (MAPE) on test set: 0.6168


In [23]:
# add evaluation results to the step metrics
substep.add_metric('eval_result', {'r2':r2,
                              'mae': mae,
                              'mape': mape})

In [24]:
# check eval result
r2_threshold = substep_params["r2_threshold"]
mae_threshold = substep_params["mae_threshold"]
mape_threshold = substep_params["mape_threshold"]
if r2 >= r2_threshold:
    raise Exception(f'R2 is {r2}, more than acceptable value of {r2_threshold}')
if mae > mae_threshold:
    raise Exception(f'MAE is {mae}, more than acceptable value of {mae_threshold}')
if mape > mape_threshold:
    raise Exception(f'MAPE is {mape}, more than acceptable value of {mape_threshold}')

Exception: R2 is 1.0, more than acceptable value of 1

In [25]:
# stop spark
SinaraSpark.stop_session()