# CO2e Emission WML Train Recipe¶

This notebook demonstrates the use of AI Model Factory recipes to deploy rule-based anomaly pipeline to WML


# Import Libraries & Functions

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

In [3]:
# Print functions
def print_job_status(job_id, endpoint_url):
    # Extract the job ID and construct the URL
    url = endpoint_url + "/summary/" + job_id
    
    # Send a GET request to fetch the job status
    get_response = requests.get(url, headers={})
    status_data = get_response.json()
    
    # Print the job status
    if "status" in status_data:
        print("The status of job {} is {}.".format(job_id, status_data['status']))
    else:
        print(status_data)
    if status_data['status']:
        return status_data['status']
    else:
        return "initializing"
    
def print_all_status(job_id, endpoint_url):
    # Extract the job ID and construct the URL
    url = endpoint_url + "/summary/" + job_id
    
    # Send a GET request to fetch the job status
    get_response = requests.get(url, headers={})
    status_data = get_response.json()
    
    # Print the job status
    print(status_data)


In [4]:
# More print functions using display 
from IPython.display import display, HTML
import requests

def print_job_details(job_id, endpoint_url):
    # Extract the job ID and construct the URL
    url = endpoint_url + "/summary/" + job_id
    
    # Send a GET request to fetch the job status
    get_response = requests.get(url, headers={})
    summary_data = get_response.json()
    
    # Display the job status
    display(HTML(print_keys_and_values(summary_data)))
    return summary_data

    
def print_keys_and_values(json_data):
    # Start the HTML code
    html_code = "<div style='font-family: Arial; font-size: 1.2em;'>"
    
    # Add the job details to the HTML code
    html_code += f"<p>job_id: {json_data['job_id']}</p>"
    if "status" in json_data:
        html_code += f"<p>status: {json_data['status']}</p>"
    html_code += "<br>"
    if "detailed_summary" in json_data:
        for summary in json_data['detailed_summary']:
            html_code += f"<p>run_id: {summary['run_id']}</p>"
            html_code += f"<p>experiment_id: {summary['experiment_id']}</p>"
            if "status" in summary:
                print('summary',summary)
                html_code += f"<p>status: {summary['status']}</p>"
            html_code += f"<p>artifact_uri: {summary['artifact_uri']}</p>"
            html_code += f"<p>artifact_name: {summary.get('tags.artifact_name', 'No artifact_name found')}</p>"
            html_code += "<br>"

    # Close the HTML code
    html_code += "</div>"
    
    return html_code


# CO2 Deployment

## Defining Paths

In [5]:
# Define the file paths
# endpoint_url = "https://tenant1.predict.masinst1.ibmmam.com/ibm/modelfactory/service"
endpoint_url = "http://localhost:8000/ibm/modelfactory/service"
        
data_file_path =  "../data/training.csv"
co2_conversion_file_path = "../data/co2e_conversion.csv"
config_file_path =  "../config/parameter_config.yaml"


## POST Response

In [6]:
import requests

files = {
    "data_file": ("data/training.csv", open(data_file_path, 'rb')),
    "co2_conversion_file": ("data/co2e_conversion.csv", open(co2_conversion_file_path, 'rb')), 
    "config_file": ("config/parameter_config.yaml", open(config_file_path, 'rb')),
   
}

#/ibm/modelfactory/service/recipe/electrical-transformer/kpi/emission/train
#/ibm/modelfactory/service/recipe/electrical-transformer/kpi/emission
url = endpoint_url + "/recipe/electrical-transformer/kpi/emission"
post_response = requests.request("POST", url, headers={}, data={}, files=files)

In [7]:
print(url)

http://localhost:8000/ibm/modelfactory/service/recipe/electrical-transformer/kpi/emission


In [29]:
post_r_json = post_response.json()

anomaly_service_jobId = None

if 'jobId' in post_r_json:
    anomaly_service_jobId = post_r_json['jobId']
    print ('submitted successfully job : ', post_r_json['jobId'])
else:
    print (post_r_json)


{'job_id': '9089a279-2fc6-4da9-b84d-7fec3dc0d3a9', 'message': 'Job 9089a279-2fc6-4da9-b84d-7fec3dc0d3a9 was submitted.', 'status': 'INITIALIZING'}


## GET Response  - Keep Refresh it for a while until seeing the STATUS changed to DONE

The status of the job may be running, flagged by INITALIZING or EXECUTING

After a while the model recipe training is complete, and the STATUS changes to DONE

In [30]:
log_url = endpoint_url + "/log/"
job_id = post_r_json['job_id']

In [31]:
print(log_url + job_id)

http://localhost:8000/ibm/modelfactory/service/log/9089a279-2fc6-4da9-b84d-7fec3dc0d3a9


In [32]:
json_data = print_job_details(job_id,endpoint_url)

summary {'run_id': '904dc38e208342fdaba2ebedc6ecca0c', 'experiment_id': '8', 'status': 'FINISHED', 'artifact_uri': 's3://testdataupload/8/904dc38e208342fdaba2ebedc6ecca0c/artifacts', 'start_time': '2023-11-08T21:40:27.421000+00:00', 'end_time': '2023-11-08T21:40:57.698000+00:00', 'params.output_artifact_name': 'energy_loss_to_CO2_epa', 'params.steps': 'energy_loss_to_CO2_epa', 'params.config_path': '', 'params.data_path': '', 'params.co2_conversion': '', 'params.storage_path': '', 'tags.mlflow.log-model.history': '[{"run_id": "904dc38e208342fdaba2ebedc6ecca0c", "artifact_path": "energy_loss_to_CO2_epa", "utc_time_created": "2023-11-08 21:40:38.601008", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.9.2", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", "sklearn_version": "1.1.1", "serialization_format": "cloudpickle", "code": null}}, "m

In [33]:
json_data

{'job_id': '9089a279-2fc6-4da9-b84d-7fec3dc0d3a9',
 'status': 'DONE',
 'summary': {'Deployment Instruction': 'For deployment, pass the parent_run_id as an argument in the deployment recipe',
  'model_uri': 'runs:/44ecd5273e0c4c61836dcc8b3e100a8d/energy_loss_to_CO2_epa',
  'parent_run_id': '4edcc4b348c948ccb6dea52b3bd66624'},
 'detailed_summary': [{'run_id': '904dc38e208342fdaba2ebedc6ecca0c',
   'experiment_id': '8',
   'status': 'FINISHED',
   'artifact_uri': 's3://testdataupload/8/904dc38e208342fdaba2ebedc6ecca0c/artifacts',
   'start_time': '2023-11-08T21:40:27.421000+00:00',
   'end_time': '2023-11-08T21:40:57.698000+00:00',
   'params.output_artifact_name': 'energy_loss_to_CO2_epa',
   'params.steps': 'energy_loss_to_CO2_epa',
   'params.config_path': '',
   'params.data_path': '',
   'params.co2_conversion': '',
   'params.storage_path': '',
   'tags.mlflow.log-model.history': '[{"run_id": "904dc38e208342fdaba2ebedc6ecca0c", "artifact_path": "energy_loss_to_CO2_epa", "utc_time_cr

In [37]:
# Check the log
log_url = endpoint_url + "/log/"
job_id = post_r_json['job_id']
get_response = requests.get(log_url+job_id, headers={})
log_data = get_response.json()
print(log_data)



### Saving details for deployment

In [39]:
json_data["detailed_summary"]

[{'run_id': '904dc38e208342fdaba2ebedc6ecca0c',
  'experiment_id': '8',
  'status': 'FINISHED',
  'artifact_uri': 's3://testdataupload/8/904dc38e208342fdaba2ebedc6ecca0c/artifacts',
  'start_time': '2023-11-08T21:40:27.421000+00:00',
  'end_time': '2023-11-08T21:40:57.698000+00:00',
  'params.output_artifact_name': 'energy_loss_to_CO2_epa',
  'params.steps': 'energy_loss_to_CO2_epa',
  'params.config_path': '',
  'params.data_path': '',
  'params.co2_conversion': '',
  'params.storage_path': '',
  'tags.mlflow.log-model.history': '[{"run_id": "904dc38e208342fdaba2ebedc6ecca0c", "artifact_path": "energy_loss_to_CO2_epa", "utc_time_created": "2023-11-08 21:40:38.601008", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.9.2", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", "sklearn_version": "1.1.1", "serialization_format": "cloudpickle", "

In [None]:
import yaml
if 'status' in json_data:
    if json_data['status'] == 'DONE':
        for summary in json_data["detailed_summary"]:
            if summary["params.output_artifact_name"] != "":
                s3_uri = summary["artifact_uri"] + "/" + summary["params.output_artifact_name"]
            if summary['tags.artifact_name']!="":
                model_uri = "runs:/" + summary['run_id'] + "/" + summary['tags.artifact_name']
        output_data = {
            "model_uri" : model_uri,
            "train_job_id" : job_id,
            "s3_uri": s3_uri,
        }
        with open("../config/model_info.yml","w") as file:
            yaml.dump(output_data, file)
        output_data

In [43]:
output_data

{'model_uri': 'runs:/44ecd5273e0c4c61836dcc8b3e100a8d/energy_loss_to_CO2_epa',
 'train_job_id': '9089a279-2fc6-4da9-b84d-7fec3dc0d3a9',
 's3_uri': 's3://testdataupload/8/904dc38e208342fdaba2ebedc6ecca0c/artifacts/energy_loss_to_CO2_epa'}