In [1]:
# Resolve path when used in use case project
import sys
from pathlib import Path

sys.path.insert(0, str(Path("../../").resolve()))

In [2]:
import recommend
print(f'Using {recommend.__version__} version of recommend package')

Using 0.39.0 version of recommend package


# Export tutorial

This notebook explains the exporting functionality of the `recommend` package. 

Exporting is only relevant if you are using the control room advisor (CRA) from the OptimusAI offering. If not, feel free to skip this tutorial. This tutorial does not document the CRA itself. See the OAI documentation for more on this.

<div class="alert alert-warning">
<b>Warning</b>
 
This notebook is purely for documentation purposes. You should <i>not</i> deploy results to any live environment using a notebook workflow. 
    
</div>

## Setup

This notebook assumes you already have some results from the `bulk_optimize` function described in [this tutorial](./recommend.ipynb). 

In [3]:
from recommend import datasets

solutions = datasets.get_sample_solutions()
solutions.to_frame().head()

Unnamed: 0_level_0,timestamp,air_flow01,air_flow02,air_flow03,air_flow04,air_flow05,air_flow06,air_flow07,amina_flow,amina_flow,...,silica_conc_cluster,objective,objective,starch_and_amina_flow_penalty,starch_and_amina_flow_penalty,starch_and_amina_flow_slack,starch_and_amina_flow_slack,run_id,is_successful_optimization,uplift
type,initial,initial,initial,initial,initial,initial,initial,initial,initial,optimized,...,initial,initial,optimized,initial,optimized,initial,optimized,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
48,2017-09-05 23:00:00,299.931919,299.949778,299.903115,298.778222,299.975283,299.496293,299.793731,630.368125,544.972644,...,"(12.191, 15.166]",12.808896,10.57103,0.0,0.0,195.054585,10.930091,881e98b3-41e6-4b82-815a-d99435ea632b,True,-2.237866
49,2017-09-06 02:00:00,299.930813,299.807355,299.954561,295.9415,300.0,299.695972,299.812304,526.85064,506.006817,...,"(12.191, 15.166]",14.658809,10.625636,0.0,0.0,46.703399,5.294093,836baf92-3093-430a-b4c4-480599dfde9a,True,-4.033173
50,2017-09-06 05:00:00,299.675317,299.443365,299.887433,296.489529,300.0,299.86097,299.507028,571.589206,503.683974,...,"(9.198, 12.191]",12.856431,11.071956,0.311756,0.0,,4.673848,0a60ecf8-1071-4384-b31c-680961f20dd0,True,-1.784475
51,2017-09-06 08:00:00,299.085586,299.978819,299.920452,297.037559,300.0,300.0,296.227219,537.674545,502.476467,...,"(9.198, 12.191]",13.058126,11.918354,7.15046,0.0,,3.356127,93407d58-a52b-403d-8e9e-04b3eb0e7a6c,True,-1.139772
52,2017-09-06 11:00:00,299.959681,299.943106,299.901202,297.585588,300.0,300.0,,475.780439,500.112334,...,"(12.191, 15.166]",13.771592,12.008764,14.637061,0.0,,3.392504,e7207272-8376-41c8-a22f-447c6c76f428,True,-1.762828


<div class="alert alert-info">
<b>Note</b>
    
Each row of our solution export has unique `run_id` value. It will act as a unique ID for each optimization loaded into the CRA backend database.
</div>

We'll also need information regarding optimization controlled parameters.

In [4]:
controlled_parameters = datasets.get_sample_controlled_parameters_config()
controlled_parameters

ControlledParametersConfig(
    keys={
        'amina_flow', 'ore_pulp_density', 'ore_pulp_flow', 'ore_pulp_ph',
        'starch_flow', 'total_air_flow', 'total_column_level',
    },
    values=(...),
)

Tag meta is used to be sent directly to the CRA and to assist in the creation of other endpoint contents. In particular, each tag is associated with a unique id that represents it.

In [5]:
tags_meta = datasets.get_sample_tags_meta()
tags_meta

MetaDataConfig(...)

Target meta has a similar structure and use than the tag meta.

In [6]:
target_meta = datasets.get_sample_targets_meta()
target_meta

MetaDataConfig(...)

Plant status meta contains information regarding the tags whose value will be used to assist with the implementation of the recommendations.

In [7]:
plant_status = datasets.get_sample_plant_info()
plant_status

MetaDataConfig(...)

We will also use the actual values of the tags (including the target).

In [8]:
actual_values = datasets.get_sample_actual_values_after_recs()
actual_values.head()

Unnamed: 0,timestamp,air_flow01,air_flow02,air_flow03,air_flow04,air_flow05,air_flow06,air_flow07,amina_flow,column_level01,...,ore_pulp_ph,silica_conc,silica_feed,starch_flow,iron_minus_silica,feed_diff_divide_silica,total_column_level,total_air_flow,silica_conc_lagged,silica_conc_cluster
0,2017-09-05 23:00:00,299.931919,299.949778,299.903115,298.778222,299.975283,299.496293,,630.368125,,...,,11.713022,12.46,3564.68646,43.633333,3.501873,2638.478359,2097.828341,3.573442,"(12.191, 15.166]"
1,2017-09-06 02:00:00,299.930813,,299.954561,295.9415,300.0,299.695972,299.812304,526.85064,400.034609,...,9.236773,10.09747,12.82,3519.852759,43.06,3.358814,2625.791469,2095.142504,3.397922,"(12.191, 15.166]"
2,2017-09-06 05:00:00,299.675317,299.443365,299.887433,296.489529,300.0,299.86097,,571.589206,399.210107,...,9.57158,11.359659,18.546667,3403.470278,33.706667,1.817398,2605.075156,2094.863642,3.33,"(9.198, 12.191]"
3,2017-09-06 08:00:00,299.085586,299.978819,299.920452,,300.0,300.0,296.227219,537.674545,422.467241,...,9.518191,11.825831,30.0,2890.288616,15.0,0.5,2926.71055,2092.249634,3.166667,"(9.198, 12.191]"
4,2017-09-06 11:00:00,299.959681,299.943106,299.901202,297.585588,300.0,300.0,300.0,475.780439,400.564206,...,9.514627,11.802973,30.0,2353.254688,15.0,0.5,2799.414237,2097.389577,1.763333,"(12.191, 15.166]"


We need the baseline predictions, which are the predictions of the target value if no recommendations had been made.

In [9]:
import pandas as pd

baseline_model = datasets.get_baseline_trained_model()
baseline_predictions = pd.DataFrame({
    "timestamp": actual_values["timestamp"],
    "baseline": baseline_model.predict(actual_values)
})
baseline_predictions.head()

Unnamed: 0,timestamp,baseline
0,2017-09-05 23:00:00,12.070672
1,2017-09-06 02:00:00,12.152153
2,2017-09-06 05:00:00,12.964687
3,2017-09-06 08:00:00,13.528027
4,2017-09-06 11:00:00,13.528027


We also provide the target estimation after the optimization process. 

<div class="alert alert-warning">
<b>Warning</b>
 
This notebook is purely for documentation purposes. You should <i>not</i> calculate this values as it is shown here and instead follow the instructions on the [uplift calculation tutorial notebook](uplift_calculation.ipynb).
    
</div>

In [10]:
optimized_values = pd.DataFrame({
    "timestamp": solutions.to_frame()[("timestamp", "initial")],
    "optimized": solutions.to_frame()[('objective', 'optimized')],
})
optimized_values.head()

Unnamed: 0,timestamp,optimized
48,2017-09-05 23:00:00,10.57103
49,2017-09-06 02:00:00,10.625636
50,2017-09-06 05:00:00,11.071956
51,2017-09-06 08:00:00,11.918354
52,2017-09-06 11:00:00,12.008764


In addition to the model predictions, the model performance is also recorded.

<div class="alert alert-info">
<b>Note</b>
 
This notebook creates a dummy datasets to showcase the export functionalities with the expected structure. This dataframe can be obtained using the OAI `modeling` package.
    
</div>

In [11]:
model = datasets.get_trained_model()
model_predictions = model.predict(actual_values)
model_prediction_bounds = pd.DataFrame({
    "timestamp": actual_values["timestamp"],
    "actuals": actual_values["silica_conc"],
    "predictions": model_predictions,
    "upper_bound": model_predictions + 0.2,
    "lower_bound": model_predictions - 0.2,
})
model_prediction_bounds.head()

Unnamed: 0,timestamp,actuals,predictions,upper_bound,lower_bound
0,2017-09-05 23:00:00,11.713022,12.808896,13.008896,12.608896
1,2017-09-06 02:00:00,10.09747,14.696146,14.896146,14.496146
2,2017-09-06 05:00:00,11.359659,12.856431,13.056431,12.656431
3,2017-09-06 08:00:00,11.825831,13.058126,13.258126,12.858126
4,2017-09-06 11:00:00,11.802973,13.842484,14.042484,13.642484


Finally, the implementation status is also used.

<div class="alert alert-info">
<b>Note</b>
 
This notebook creates a dummy datasets to showcase the export functionalities. You should  follow the instructions on the [implementation tracking tutorial notebook](implementation_tracking.ipynb) to obtain the implementation status.
    
</div>

In [12]:
implementation_status = pd.DataFrame(
    [
        ["71a2729e-9275-4b09-a2ec-e3158d59c1b5", "starch_flow", "3324b355-4f73-449a-b261-35c84a6ea414", 0.75],
        ["ac78f56f-56b8-49a0-bbec-5eefe824d1d4", "amina_flow", "3324b355-4f73-449a-b261-35c84a6ea414", 0.25],
    ],
    columns=["id", "tag_id", "run_id", "implementation_perc"]
)
implementation_status.head()

Unnamed: 0,id,tag_id,run_id,implementation_perc
0,71a2729e-9275-4b09-a2ec-e3158d59c1b5,starch_flow,3324b355-4f73-449a-b261-35c84a6ea414,0.75
1,ac78f56f-56b8-49a0-bbec-5eefe824d1d4,amina_flow,3324b355-4f73-449a-b261-35c84a6ea414,0.25


In addition to the datasets, we define the name of the timestamp column and the timestamp format. This will be used on all the export functions that include a timestamp.

In [13]:
iso_format = "%Y-%m-%dT%H:%M:%SZ"
timestamp_column = "timestamp"

## Prepare results

The CRA needs several pieces of information to display results. Each function below provides the body for a `POST` request to a CRA endpoint.

In [14]:
from recommend import cra_export

### General information

#### ``prepare_runs``

We format the run ids to be JSON compatible and append the timestamp of the recommendation. If the optimization has not been successfully, an error message is sent and no more outputs are required by the CRA.

Error message logic can be configured based on the use case and client requirements.

In [15]:
cra_export.prepare_runs(
    solutions=solutions,
    iso_format=iso_format,
    timestamp_column=timestamp_column,
)[0]

{'id': '881e98b3-41e6-4b82-815a-d99435ea632b',
 'timestamp': '2017-09-05T23:00:00Z'}

#### `prepare_tags`

Next we compile information about the tags that will be sent to the CRA. It includes things like units and a formatted name for the UI. We'll use preloaded metadata config. This information only needs to be sent once at the beginning and if there are any changes on the tags.

In [16]:
cra_export.prepare_tags(
    tag_meta=tags_meta,
    plant_status=plant_status,
)[0]

{'id': 'ec60d156-eb79-41b0-a907-ccedf677da9a',
 'clear_name': 'Starch Flow',
 'unit': 'cc/min',
 'area': None,
 'precision': 2,
 'priority': 0}

#### `prepare_targets`

We send similar information for the target values. Again, this information only needs to be sent once at the beginning and if there are any changes on the targets.

In [17]:
cra_export.prepare_targets(target_meta=target_meta)[0]

{'id': '7d583622-e55e-49df-9978-6d2b4bcaa5c3',
 'name': 'Silica concentration',
 'unit': '%',
 'aggregation': 'avg',
 'objective': 'min',
 'precision': 2}

#### `prepare_plant_info`

Here we provide information of the current status of the plant.

In [18]:
cra_export.prepare_plant_info(
    plant_info=plant_status,
    solutions=solutions, 
    actual_data=actual_values,
    iso_format=iso_format,
    timestamp_column=timestamp_column,
)[0]

{'id': '71b6a8c5-3cde-40c5-a8ea-c8074a0e31a5',
 'run_id': '881e98b3-41e6-4b82-815a-d99435ea632b',
 'tag_id': 'cf1f3202-3db8-46c5-babf-99981853911c',
 'value': 63.73835844567815,
 'column_name': '',
 'section': 'states'}

### Target value

The functions in this section are prepared to run for only one target variable. If the use case has more than one target, each function should be run once for each target and all the results should be sent to the CRA.

#### ``prepare_actuals``

We provide the actual value of the target. 

If this function is used, the target will be provided after the cleaning and aggregation steps that the Optimus pipelines provide. This eases the comparison with other values, such as the baseline or the optimized value. However, as it is cleaned and aggregated it will not represent the value that the sensors provide. If the latter is desired, data for this endpoint should be sent to the CRA without processing and using a smaller granularity. 

In [19]:
cra_export.prepare_actuals(
    actual_values_data=actual_values,
    actual_values_col="silica_conc",
    target_meta=target_meta,
    iso_format=iso_format,
    timestamp_column=timestamp_column,
)[0]

{'id': '0383142e-7de3-468a-8d19-6fca9a076f6d',
 'target_id': '7d583622-e55e-49df-9978-6d2b4bcaa5c3',
 'value': 11.713022335543368,
 'timestamp': '2017-09-05T23:00:00Z'}

#### ``prepare_predictions``

This endpoint provides information about the different values that the target can take apart from the actual value (which is already sent using the previous function):
* Baseline: Target estimation if no recommendations had been provided.
* Optimized: Target estimation after the optimization process.
* Predicted: Model prediction of the target value.

In [20]:
cra_export.prepare_predictions(
    baseline_values=baseline_predictions,
    optimized_values=optimized_values,
    model_prediction_bounds=model_prediction_bounds,
    solutions=solutions,
    target_meta=target_meta,
    target_name="silica_conc",
    cols_export={
        "baseline": "baseline",
        "optimized": "optimized",
    },
    iso_format=iso_format,
    timestamp_column=timestamp_column,
)[0]

{'id': 'b9f7e23a-9d52-4730-ab56-37dc4a114a4b',
 'run_id': '881e98b3-41e6-4b82-815a-d99435ea632b',
 'target_id': '7d583622-e55e-49df-9978-6d2b4bcaa5c3',
 'baseline': 12.07067230477338,
 'optimized': 10.57103023798864,
 'predicted': 12.808895931727255,
 'upper_bound': 13.008895931727254,
 'lower_bound': 12.608895931727256}

### Set points

#### `prepare_recommendations`

Here we send the recommended value for the set points. To prepare each recommendation, there are several parameters that need to be considered: 

* A default status to display. `"Pending"` below refers to the fact that a recommendation is waiting to be approved.
* A default setting on whether a recommendation should be flagged. Here it defaults to `False` for all recommendations, but a custom logic could be implemented based on each CST's needs.
* The keyword `on_control_only=True` refers to displaying all controls vs. only those that are currently "on". Use whichever option makes sense for your use case.

In [21]:
cra_export.prepare_recommendations(
    solutions=solutions,
    controlled_parameters_config=controlled_parameters,
    tag_meta=tags_meta,
    target_meta=target_meta,
    target_name="silica_conc",
    default_status="Pending",
    default_flagged = False,
    active_controls_only=True,
)[0]

{'id': 'eca31177-cada-4768-8b41-3c23b077fa47',
 'value': 3465.9574468925084,
 'tolerance': 400.0,
 'run_id': '881e98b3-41e6-4b82-815a-d99435ea632b',
 'tag_id': 'ec60d156-eb79-41b0-a907-ccedf677da9a',
 'target_id': '7d583622-e55e-49df-9978-6d2b4bcaa5c3',
 'is_flagged': False,
 'status': 'Pending'}

#### `prepare_implementation_status`

Next, we can update a previously sent recommendation with its implementation status.

In [22]:
cra_export.prepare_implementation_status(implementation_status=implementation_status)[0]

{'id': '71a2729e-9275-4b09-a2ec-e3158d59c1b5', 'implementation_status': 0.75}

#### `prepare_states`

Finally, we send the values before optimization of the set points that are used in the recommendations.

In [23]:
cra_export.prepare_states(
    solutions=solutions,
    tag_meta=tags_meta,
)[0]

{'id': '376376c5-b270-4c20-90dc-6fd1d6ca53d2',
 'value': 3564.6864600000004,
 'run_id': '881e98b3-41e6-4b82-815a-d99435ea632b',
 'tag_id': 'ec60d156-eb79-41b0-a907-ccedf677da9a'}

### UI update

#### `prepare_sse`

Once all information regarding a new optimization run is sent to the CRA, the CRA update needs to be updated. To signal it, the following is sent:

In [24]:
cra_export.prepare_sse()

{'event': 'ui_update'}

## Post results to API 

The final step is to post your results to the OptimusAI CRA. Use the `requests` library to send a `POST` request to the corresponding CRA endpoints.

Something like the following will work:

```python
requests.request(
    {
        "url": <cra-endpoint-url>,
        "method": "POST",
        "auth": <auth-details>,
        "json": json_compatible_data,
    }
)
```

The pipeline version of the export code handles this step and only the url endpoint is required.