# Health DGA Score Notebook
Last Updated August 29 2023

In [1]:
import os
import requests
import json
import ast
import pandas as pd
import yaml

In [2]:
def load_credentials(credentials_file):
    from ibm_watson_studio_lib import access_project_or_space
    wslib = access_project_or_space()
    
    for file in [credentials_file]:
        data = wslib.load_data(file)
        with open(f"./{file}", "wb") as f:
            f.write(data.read())

### Create the Bearer Token

In [3]:
credentials_file = "../../config/credentials/wml-credentials.json"
with open(credentials_file, "r") as f:
    credentials = json.load(f)

In [4]:
# Assuming you have these variables set somewhere in your script
USERNAME = credentials["username"]
API_KEY = credentials["apikey"]
DATAPLATFORM_URL = credentials["url"]

In [5]:
headers = {
    'Content-type': 'application/json'
}
data = {
    'username': USERNAME,
    'api_key': API_KEY
}

In [6]:
response = requests.post(
    f"{DATAPLATFORM_URL}/icp4d-api/v1/authorize", 
    headers=headers, 
    data=json.dumps(data),
    verify=False  # This corresponds to the '-k' or '--insecure' flag in curl
)
token = response.text
dict_token = ast.literal_eval(token)
bearer_token = dict_token['token']



### Load the Data

In [7]:
data_path = '../data/training.csv'

In [8]:
df = pd.read_csv(data_path)

In [9]:
df.head(5)

Unnamed: 0,AssetID,Timestamps,DGAR-H2,DGAR-O2,DGAR-N2,DGAR-CH4,DGAR-CO,DGAR-CO2,DGAR-C2H4,DGAR-C2H6,DGAR-C2H2,DBDS,POWER_FACT,INTER_V,DI_RIG,H2O,Health index
0,9010,53:22.1,2845,5860,27842,7406,32,1344,16684,5467,7,19.0,1.0,45,55,0,95.2
1,9010,53:22.1,12886,61,25041,877,83,864,4,305,0,45.0,1.0,45,55,0,85.5
2,9010,53:22.1,2820,16400,56300,144,257,1080,206,11,2190,1.0,1.0,39,52,11,85.3
3,9010,53:22.1,1099,70,37520,545,184,1402,6,230,0,87.0,4.58,33,49,5,85.3
4,9010,53:22.1,3210,3570,47900,160,360,2130,4,43,4,1.0,0.77,44,55,3,85.2


In [10]:
df.shape

(470, 17)

In [11]:
df.columns

Index(['AssetID', 'Timestamps', 'DGAR-H2', 'DGAR-O2', 'DGAR-N2', 'DGAR-CH4',
       'DGAR-CO', 'DGAR-CO2', 'DGAR-C2H4', 'DGAR-C2H6', 'DGAR-C2H2', 'DBDS',
       'POWER_FACT', 'INTER_V', 'DI_RIG', 'H2O', 'Health index'],
      dtype='object')

### Choose all three electrical lines

In [13]:
feature_columns = ['DGAR-H2','DGAR-O2','DGAR-N2','DGAR-CH4','DGAR-CO','DGAR-CO2','DGAR-C2H4','DGAR-C2H6','DGAR-C2H2','DBDS','POWER_FACT','INTER_V','DI_RIG','H2O']

In [15]:
df_score_all = df[feature_columns]

In [16]:
df_score_all.head(100)

Unnamed: 0,DGAR-H2,DGAR-O2,DGAR-N2,DGAR-CH4,DGAR-CO,DGAR-CO2,DGAR-C2H4,DGAR-C2H6,DGAR-C2H2,DBDS,POWER_FACT,INTER_V,DI_RIG,H2O
0,2845,5860,27842,7406,32,1344,16684,5467,7,19.0,1.00,45,55,0
1,12886,61,25041,877,83,864,4,305,0,45.0,1.00,45,55,0
2,2820,16400,56300,144,257,1080,206,11,2190,1.0,1.00,39,52,11
3,1099,70,37520,545,184,1402,6,230,0,87.0,4.58,33,49,5
4,3210,3570,47900,160,360,2130,4,43,4,1.0,0.77,44,55,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0,21800,52500,3,78,2180,9,8,0,0.0,0.41,42,56,6
96,0,2220,58800,13,131,1520,4,22,0,173.0,0.55,42,52,4
97,0,944,58700,6,266,3530,8,11,0,153.0,0.35,40,56,4
98,0,27000,56600,0,47,480,0,0,0,11.0,1.00,43,51,4


In [17]:
df.shape

(470, 17)

## Send the Data for Scoring

In [21]:
model_factory_config_file_name = "../config/deployment_info.yml"

with open(model_factory_config_file_name, 'r') as file:
    model_factory_config = yaml.safe_load(file)

print(model_factory_config)

{'cp4d_url': 'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/44f8fbef-5343-49ee-a31b-fab8b07c284e/predictions?version=2023-11-17', 'online_url': {'url': 'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/44f8fbef-5343-49ee-a31b-fab8b07c284e/predictions'}, 'serving_urls': ['https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/44f8fbef-5343-49ee-a31b-fab8b07c284e/predictions'], 'state': 'ready'}


In [22]:
array_of_input_fields = list(df_score_all.columns) 
array_of_values_to_be_scored = df_score_all[array_of_input_fields].values.tolist()

print("Input Data")
print(array_of_input_fields)
df_score_all[array_of_input_fields].head(10)

Input Data
['DGAR-H2', 'DGAR-O2', 'DGAR-N2', 'DGAR-CH4', 'DGAR-CO', 'DGAR-CO2', 'DGAR-C2H4', 'DGAR-C2H6', 'DGAR-C2H2', 'DBDS', 'POWER_FACT', 'INTER_V', 'DI_RIG', 'H2O']


Unnamed: 0,DGAR-H2,DGAR-O2,DGAR-N2,DGAR-CH4,DGAR-CO,DGAR-CO2,DGAR-C2H4,DGAR-C2H6,DGAR-C2H2,DBDS,POWER_FACT,INTER_V,DI_RIG,H2O
0,2845,5860,27842,7406,32,1344,16684,5467,7,19.0,1.0,45,55,0
1,12886,61,25041,877,83,864,4,305,0,45.0,1.0,45,55,0
2,2820,16400,56300,144,257,1080,206,11,2190,1.0,1.0,39,52,11
3,1099,70,37520,545,184,1402,6,230,0,87.0,4.58,33,49,5
4,3210,3570,47900,160,360,2130,4,43,4,1.0,0.77,44,55,3
5,13500,343,36500,3150,113,984,5,1230,1,1.0,4.93,37,52,6
6,10200,11900,33700,573,87,611,0,162,0,1.0,3.53,45,55,5
7,3,15459,41347,5,68,902,12,2,13,5.0,0.58,41,71,6
8,16,2470,59600,8,520,2660,5,8,2,164.0,0.29,44,56,4
9,488,11861,48353,13,85,1957,29,23,0,164.0,0.27,37,72,10


In [23]:
# retrieve the CP4D endpoint at the IBM Watson Studio
scoring_url = model_factory_config['cp4d_url']
scoring_url

'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/44f8fbef-5343-49ee-a31b-fab8b07c284e/predictions?version=2023-11-17'

In [24]:
array_of_input_fields

['DGAR-H2',
 'DGAR-O2',
 'DGAR-N2',
 'DGAR-CH4',
 'DGAR-CO',
 'DGAR-CO2',
 'DGAR-C2H4',
 'DGAR-C2H6',
 'DGAR-C2H2',
 'DBDS',
 'POWER_FACT',
 'INTER_V',
 'DI_RIG',
 'H2O']

In [25]:
array_of_values_to_be_scored

[[2845.0,
  5860.0,
  27842.0,
  7406.0,
  32.0,
  1344.0,
  16684.0,
  5467.0,
  7.0,
  19.0,
  1.0,
  45.0,
  55.0,
  0.0],
 [12886.0,
  61.0,
  25041.0,
  877.0,
  83.0,
  864.0,
  4.0,
  305.0,
  0.0,
  45.0,
  1.0,
  45.0,
  55.0,
  0.0],
 [2820.0,
  16400.0,
  56300.0,
  144.0,
  257.0,
  1080.0,
  206.0,
  11.0,
  2190.0,
  1.0,
  1.0,
  39.0,
  52.0,
  11.0],
 [1099.0,
  70.0,
  37520.0,
  545.0,
  184.0,
  1402.0,
  6.0,
  230.0,
  0.0,
  87.0,
  4.58,
  33.0,
  49.0,
  5.0],
 [3210.0,
  3570.0,
  47900.0,
  160.0,
  360.0,
  2130.0,
  4.0,
  43.0,
  4.0,
  1.0,
  0.77,
  44.0,
  55.0,
  3.0],
 [13500.0,
  343.0,
  36500.0,
  3150.0,
  113.0,
  984.0,
  5.0,
  1230.0,
  1.0,
  1.0,
  4.93,
  37.0,
  52.0,
  6.0],
 [10200.0,
  11900.0,
  33700.0,
  573.0,
  87.0,
  611.0,
  0.0,
  162.0,
  0.0,
  1.0,
  3.53,
  45.0,
  55.0,
  5.0],
 [3.0,
  15459.0,
  41347.0,
  5.0,
  68.0,
  902.0,
  12.0,
  2.0,
  13.0,
  5.0,
  0.58,
  41.0,
  71.0,
  6.0],
 [16.0,
  2470.0,
  59600.0,
  8

In [29]:
import urllib3, requests, json

# scoring_url = 'https://cpd-ibm-cpd.mas-demo-cp4d-822c5cdfc486f5db3c3145c89ca6409d-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/14b272a1-111b-4196-9b07-eec757faaaca/predictions?version=2023-08-29'

header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + bearer_token}

# NOTE: manually define and pass the array(s) of values to be scored in the next line
payload_scoring = {"input_data": [{"fields": array_of_input_fields, "values": array_of_values_to_be_scored}]}
print("Input payload")
print(json.dumps(payload_scoring, indent=2))

response_scoring = requests.post(scoring_url, json=payload_scoring, headers=header, verify=False)
print("Scoring response")
predictions = json.loads(response_scoring.text)
print(json.dumps(predictions, indent=2))

Input payload
{
  "input_data": [
    {
      "fields": [
        "DGAR-H2",
        "DGAR-O2",
        "DGAR-N2",
        "DGAR-CH4",
        "DGAR-CO",
        "DGAR-CO2",
        "DGAR-C2H4",
        "DGAR-C2H6",
        "DGAR-C2H2",
        "DBDS",
        "POWER_FACT",
        "INTER_V",
        "DI_RIG",
        "H2O"
      ],
      "values": [
        [
          2845.0,
          5860.0,
          27842.0,
          7406.0,
          32.0,
          1344.0,
          16684.0,
          5467.0,
          7.0,
          19.0,
          1.0,
          45.0,
          55.0,
          0.0
        ],
        [
          12886.0,
          61.0,
          25041.0,
          877.0,
          83.0,
          864.0,
          4.0,
          305.0,
          0.0,
          45.0,
          1.0,
          45.0,
          55.0,
          0.0
        ],
        [
          2820.0,
          16400.0,
          56300.0,
          144.0,
          257.0,
          1080.0,
          206.0,
      



Scoring response
{
  "predictions": [
    {
      "fields": [
        "prediction"
      ],
      "values": [
        [
          -3522038435916856.0
        ],
        [
          3.63838972778789e+17
        ],
        [
          1.481311404095217e+18
        ],
        [
          9.644689572952566e+17
        ],
        [
          1.2135859820817016e+18
        ],
        [
          5.492644797198467e+17
        ],
        [
          6.406937864078565e+17
        ],
        [
          1.0826495386928968e+18
        ],
        [
          1.607089336635625e+18
        ],
        [
          1.2780995982831905e+18
        ],
        [
          1.5810553841673457e+18
        ],
        [
          1.1980404084932508e+18
        ],
        [
          1.6241835441869622e+18
        ],
        [
          -2517776784940853.0
        ],
        [
          8.938780637747836e+17
        ],
        [
          2.0333793597289744e+17
        ],
        [
          9.30613227391871e+17

In [27]:
output = predictions["predictions"][0]["values"]

In [28]:
output

[[-3522038435916856.0],
 [3.63838972778789e+17],
 [1.481311404095217e+18],
 [9.644689572952566e+17],
 [1.2135859820817016e+18],
 [5.492644797198467e+17],
 [6.406937864078565e+17],
 [1.0826495386928968e+18],
 [1.607089336635625e+18],
 [1.2780995982831905e+18],
 [1.5810553841673457e+18],
 [1.1980404084932508e+18],
 [1.6241835441869622e+18],
 [-2517776784940853.0],
 [8.938780637747836e+17],
 [2.0333793597289744e+17],
 [9.30613227391871e+17],
 [1.2304371717969441e+18],
 [1.33145601088696e+18],
 [1.6523843430388316e+18],
 [1.6024366764991616e+18],
 [1.273768590408992e+18],
 [1.9609343133765071e+18],
 [2.0352594214110323e+18],
 [2.1727583065071585e+18],
 [2.060936013209634e+18],
 [2.0458969109058975e+18],
 [1.0960799348193614e+18],
 [1.9553638591020365e+18],
 [1.168547064103244e+18],
 [8.391720747362831e+17],
 [1.7458965615291886e+18],
 [9.955496233734547e+17],
 [8.735447380454578e+17],
 [1.1289165133099895e+18],
 [6.79219127622458e+17],
 [5.997992781621892e+17],
 [1.1217471169165969e+18],
 

## Combine the Results

In [30]:
output = predictions["predictions"][0]["values"]

In [31]:
reshaped_data = [row[0] for row in output]

In [32]:
column_names = ['health_score']

In [33]:
predictions_df = pd.DataFrame(reshaped_data, columns=column_names)

In [34]:
predictions_df = predictions_df.astype(int)

In [35]:
display(predictions_df)

Unnamed: 0,health_score
0,-3522038435916856
1,363838972778788992
2,1481311404095216896
3,964468957295256576
4,1213585982081701632
...,...
465,1415929779458547456
466,1258734805982444800
467,949381627199599744
468,1350843190858483200


In [36]:
df_merged = pd.concat([df, predictions_df], axis=1)

In [37]:
display(df_merged.tail(10))

Unnamed: 0,AssetID,Timestamps,DGAR-H2,DGAR-O2,DGAR-N2,DGAR-CH4,DGAR-CO,DGAR-CO2,DGAR-C2H4,DGAR-C2H6,DGAR-C2H2,DBDS,POWER_FACT,INTER_V,DI_RIG,H2O,Health index,health_score
460,9010,53:22.1,12,151,48200,2,34,569,4,48,0,0.0,1.0,31,58,20,13.4,1288049602844288768
461,9010,53:22.1,12,1840,61900,2,36,1560,3,72,0,0.0,4.5,31,55,36,13.4,1661246526984518912
462,9010,53:22.1,12,285,52500,2,35,895,0,51,0,0.0,3.22,33,54,37,13.4,1406088145301809920
463,9010,53:22.1,12,465,52700,2,52,918,0,84,0,0.0,3.83,33,55,24,13.4,1411234494735091968
464,9010,53:22.1,12,2150,11100,2,170,1440,0,0,0,1.0,0.73,45,54,6,13.4,305722399167593280
465,9010,53:22.1,15,227,52900,3,60,853,3,84,0,0.0,1.0,32,56,28,13.4,1415929779458547456
466,9010,53:22.1,15,334,47100,3,64,622,3,108,0,0.0,1.0,32,55,12,13.4,1258734805982444800
467,9010,53:22.1,15,1280,35000,2,675,2530,0,0,0,5.0,0.3,45,58,8,13.4,949381627199599744
468,9010,53:22.1,15,169,50600,5,77,532,0,72,0,0.0,1.21,33,54,11,13.4,1350843190858483200
469,9010,53:22.1,15,308,39700,3,64,581,5,27,0,0.0,1.0,32,60,18,13.4,1061159543847744256
