# Anomaly Model Train Noteook

This notebook demonstrates the use of AI Model Factory recipes to train energy loss model

Last Updated November 29, 2023

# Import Libraries & Functions

In [2]:
#Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

In [3]:
# Print functions
def print_job_status(job_id, endpoint_url):
    # Extract the job ID and construct the URL
    url = endpoint_url + "/summary/" + job_id
    
    # Send a GET request to fetch the job status
    get_response = requests.get(url, headers={})
    status_data = get_response.json()
    
    # Print the job status
    if "status" in status_data:
        print("The status of job {} is {}.".format(job_id, status_data['status']))
    else:
        print(status_data)
    if status_data['status']:
        return status_data['status']
    else:
        return "initializing"
    
def print_all_status(job_id, endpoint_url):
    # Extract the job ID and construct the URL
    url = endpoint_url + "/summary/" + job_id
    
    # Send a GET request to fetch the job status
    get_response = requests.get(url, headers={})
    status_data = get_response.json()
    
    # Print the job status
    print(status_data)

In [4]:
# More print functions using display 
from IPython.display import display, HTML
import requests

def print_job_details(job_id, endpoint_url):
    # Extract the job ID and construct the URL
    url = endpoint_url + "/summary/" + job_id
    
    # Send a GET request to fetch the job status
    get_response = requests.get(url, headers={})
    summary_data = get_response.json()
    
    # Display the job status
    display(HTML(print_keys_and_values(summary_data)))
    return summary_data

    
def print_keys_and_values(json_data):
    # Start the HTML code
    html_code = "<div style='font-family: Arial; font-size: 1.2em;'>"
    
    # Add the job details to the HTML code
    html_code += f"<p>job_id: {json_data['job_id']}</p>"
    if "status" in json_data:
        html_code += f"<p>status: {json_data['status']}</p>"
    html_code += "<br>"
    if "detailed_summary" in json_data:
        for summary in json_data['detailed_summary']:
            html_code += f"<p>run_id: {summary['run_id']}</p>"
            html_code += f"<p>experiment_id: {summary['experiment_id']}</p>"
            if "status" in summary:
                print('summary',summary)
                html_code += f"<p>status: {summary['status']}</p>"
            html_code += f"<p>artifact_uri: {summary['artifact_uri']}</p>"
            html_code += f"<p>artifact_name: {summary.get('tags.artifact_name', 'No artifact_name found')}</p>"
            html_code += "<br>"

    # Close the HTML code
    html_code += "</div>"
    
    return html_code

# Loading  Dataset

Loading the power quality dataset from the local project space

In [5]:
dict_variable_meaning = {
    'KW':'the amount of energy transferred per unit of time',
    'KVA':'kilovolt-ampere, apparent power',
    'KVAR':'kilovolt-ampere reactive',
    'OTI':'Oil Temperature Indicator Alarm',
    'tag_WTI':'Winding Temperature Indicator',
    'ATI':'Ambient Temperature Indicator',
    'OLI':'Oil Level Indicator',
    'tag_OTI_A':'Oil Temperature Indicator Alarm',
    'tag_OTI_T':'Oil Temperature Indicator Trip',
    'tag_MOG_A':'Magnetic oil gauge indicator',
}

In [6]:
df = pd.read_csv("../data/training.csv")
df

Unnamed: 0,datetime,load_factor,loss_factor,energy_loss,tag_WTI,tag_OTI_A,tag_OTI_T,tag_MOG_A
0,2019-07-04 17:00:00,0.000392,3.063004e-07,5.375451e-07,0,0,0,0
1,2019-07-04 18:00:00,0.000519,5.247114e-07,9.439461e-07,0,0,0,0
2,2019-07-04 19:00:00,0.000517,5.200765e-07,9.352261e-07,0,0,0,0
3,2019-07-04 20:00:00,0.000536,5.577041e-07,1.006119e-06,0,0,0,0
4,2019-07-04 21:00:00,0.000548,5.818571e-07,1.051742e-06,0,0,0,0
...,...,...,...,...,...,...,...,...
4446,2020-04-13 20:00:00,0.517957,2.842694e-01,9.389795e-01,1,0,0,0
4447,2020-04-13 21:00:00,0.517996,2.843098e-01,9.391190e-01,1,0,0,0
4448,2020-04-13 22:00:00,0.521028,2.875000e-01,9.501444e-01,1,0,0,0
4449,2020-04-13 23:00:00,0.498605,2.643085e-01,8.701254e-01,1,0,0,0


# Energy Loss Anomaly Deployment

## Defining Paths

In [6]:
# Define the file paths
endpoint_url = "http://localhost:8000/ibm/modelfactory/service"
data_file_path = "../data/training.csv"

## Preparation of Configuration File

A requirement of the recipe is preparation of the configuration file for training. 


Information including column specification, loss factor calculation method, the option to use an a priori peak power are all options that can be configured.


Most important the "output" can be changed to "energy_loss", "load_factor" or "loss_factor"

For more detail for preparing the configuration file see the [documentation](https://github.ibm.com/srom/ModelFactory/blob/development/model_factory/recipes/electrical_transformer/docs/energyloss.md)

## POST Response

In [7]:
import requests

data = {
    "feature_columns": "load_factor,loss_factor,energy_loss",
    "result": "score"
}
files = {
    "data_file": ("training.csv", open(data_file_path, 'rb')),
}

url = endpoint_url + "/recipe/electrical-transformer/anomaly/energyloss"
post_response = requests.request("POST", url, headers={}, data=data, files=files)

In [8]:
post_r_json = post_response.json()

anomaly_service_jobId = None

if 'jobId' in post_r_json:
    anomaly_service_jobId = post_r_json['jobId']
    print ('submitted successfully job : ', post_r_json['jobId'])
else:
    print (post_r_json)
time.sleep(5)

{'job_id': '1eecb3db-4b03-49b7-a781-7c1b2e12a707', 'message': 'Job 1eecb3db-4b03-49b7-a781-7c1b2e12a707 was submitted.', 'status': 'INITIALIZING'}


## GET Response  - Keep Refresh it for a while until seeing the STATUS changed to DONE

The status of the job may be running, flagged by INITALIZING or EXECUTING

After a while the model recipe training is complete, and the STATUS changes to DONE

In [9]:
log_url = endpoint_url + "/log/"
job_id = post_r_json['job_id']

In [10]:
print(log_url + job_id)

http://localhost:8000/ibm/modelfactory/service/log/1eecb3db-4b03-49b7-a781-7c1b2e12a707


## GET Response

In [11]:
job_id = post_r_json["job_id"]
# Get Status
print_job_status(job_id,endpoint_url)

The status of job 1eecb3db-4b03-49b7-a781-7c1b2e12a707 is DONE.


'DONE'

The status of the job may be running, flagged by INITALIZING or EXECUTING

After a while the model recipe training is complete, and the STATUS changes to DONE

In [12]:
# Get Status
print_job_status(job_id,endpoint_url)

The status of job 1eecb3db-4b03-49b7-a781-7c1b2e12a707 is DONE.


'DONE'

In [24]:
# Get selected details
json_data = print_job_details(job_id,endpoint_url)

# Get all details

print_all_status(job_id, endpoint_url)

summary {'run_id': 'ec9217cf80f841f5a3bebca8ef8632be', 'experiment_id': '16', 'status': 'FINISHED', 'artifact_uri': 's3://testdataupload/16/ec9217cf80f841f5a3bebca8ef8632be/artifacts', 'start_time': '2023-11-17T21:09:00.092000+00:00', 'end_time': '2023-11-17T21:09:39.665000+00:00', 'params.data_path': 's3://testdataupload/1eecb3db-4b03-49b7-a781-7c1b2e12a707', 'params.result': 'label', 'params.feature_columns': 'load_factor,loss_factor,energy_loss', 'tags.mlflow.user': 'root', 'tags.mlflow.source.name': '/tmp/ray/session_2023-11-17_16-39-41_914185_1/runtime_resources/working_dir_files/_ray_pkg_9ef5300bceae2146', 'tags.mlflow.project.entryPoint': 'anomaly_unsupervised', 'tags.step': 'anomaly_unsupervised', 'tags.mlflow.project.backend': 'local', 'tags.mlflow.runName': 'gregarious-kit-47', 'tags.mlflow.source.type': 'PROJECT', 'tags.mlflow.log-model.history': '[{"run_id": "ec9217cf80f841f5a3bebca8ef8632be", "artifact_path": "energyloss_AD", "utc_time_created": "2023-11-17 21:09:35.287806

{'job_id': '1eecb3db-4b03-49b7-a781-7c1b2e12a707', 'status': 'DONE', 'summary': {'model_uri': 'runs:/ec9217cf80f841f5a3bebca8ef8632be/energyloss_AD', 'dag_info': {'total_stage': 1, 'total_nodes': 4, 'total_paths': 4, 'total_pipelines': 4}, 'best_score': 19.711957218209626}, 'detailed_summary': [{'run_id': 'ec9217cf80f841f5a3bebca8ef8632be', 'experiment_id': '16', 'status': 'FINISHED', 'artifact_uri': 's3://testdataupload/16/ec9217cf80f841f5a3bebca8ef8632be/artifacts', 'start_time': '2023-11-17T21:09:00.092000+00:00', 'end_time': '2023-11-17T21:09:39.665000+00:00', 'params.data_path': 's3://testdataupload/1eecb3db-4b03-49b7-a781-7c1b2e12a707', 'params.result': 'label', 'params.feature_columns': 'load_factor,loss_factor,energy_loss', 'tags.mlflow.user': 'root', 'tags.mlflow.source.name': '/tmp/ray/session_2023-11-17_16-39-41_914185_1/runtime_resources/working_dir_files/_ray_pkg_9ef5300bceae2146', 'tags.mlflow.project.entryPoint': 'anomaly_unsupervised', 'tags.step': 'anomaly_unsupervised

In [25]:
json_data

{'job_id': '1eecb3db-4b03-49b7-a781-7c1b2e12a707',
 'status': 'DONE',
 'summary': {'model_uri': 'runs:/ec9217cf80f841f5a3bebca8ef8632be/energyloss_AD',
  'dag_info': {'total_stage': 1,
   'total_nodes': 4,
   'total_paths': 4,
   'total_pipelines': 4},
  'best_score': 19.711957218209626},
 'detailed_summary': [{'run_id': 'ec9217cf80f841f5a3bebca8ef8632be',
   'experiment_id': '16',
   'status': 'FINISHED',
   'artifact_uri': 's3://testdataupload/16/ec9217cf80f841f5a3bebca8ef8632be/artifacts',
   'start_time': '2023-11-17T21:09:00.092000+00:00',
   'end_time': '2023-11-17T21:09:39.665000+00:00',
   'params.data_path': 's3://testdataupload/1eecb3db-4b03-49b7-a781-7c1b2e12a707',
   'params.result': 'label',
   'params.feature_columns': 'load_factor,loss_factor,energy_loss',
   'tags.mlflow.user': 'root',
   'tags.mlflow.source.name': '/tmp/ray/session_2023-11-17_16-39-41_914185_1/runtime_resources/working_dir_files/_ray_pkg_9ef5300bceae2146',
   'tags.mlflow.project.entryPoint': 'anomaly_

In [28]:
s3_folder_name = 'energyloss_AD'

In [29]:
import yaml
if 'status' in json_data:
    if json_data['status'] == 'DONE':
        if "summary" in json_data:
            if "model_uri" in json_data['summary']:
                model_uri = json_data['summary']['model_uri']                 
        for summary in json_data["detailed_summary"]:
            if summary['tags.step']!="":
                s3_uri = summary["artifact_uri"] + "/" + s3_folder_name                
        output_data = {
            "model_uri" : model_uri,
            "train_job_id" : job_id,
            "s3_uri": s3_uri,
        }
        with open("../config/model_info.yml","w") as file:
            yaml.dump(output_data, file)
        output_data

In [30]:
output_data

{'model_uri': 'runs:/ec9217cf80f841f5a3bebca8ef8632be/energyloss_AD',
 'train_job_id': '1eecb3db-4b03-49b7-a781-7c1b2e12a707',
 's3_uri': 's3://testdataupload/16/ec9217cf80f841f5a3bebca8ef8632be/artifacts/energyloss_AD'}