# Health DGA Score Notebook
Last Updated August 29 2023

In [None]:
import os
import requests
import json
import ast
import pandas as pd
import yaml

In [None]:
def load_credentials(credentials_file):
    from ibm_watson_studio_lib import access_project_or_space
    wslib = access_project_or_space()
    
    for file in [credentials_file]:
        data = wslib.load_data(file)
        with open(f"./{file}", "wb") as f:
            f.write(data.read())

### Create the Bearer Token

In [None]:
credentials_file = "../../config/credentials/wml-credentials.json"
with open(credentials_file, "r") as f:
    credentials = json.load(f)

In [None]:
# Assuming you have these variables set somewhere in your script
USERNAME = credentials["username"]
API_KEY = credentials["apikey"]
DATAPLATFORM_URL = credentials["url"]

In [None]:
headers = {
    'Content-type': 'application/json'
}
data = {
    'username': USERNAME,
    'api_key': API_KEY
}

In [None]:
response = requests.post(
    f"{DATAPLATFORM_URL}/icp4d-api/v1/authorize", 
    headers=headers, 
    data=json.dumps(data),
    verify=False  # This corresponds to the '-k' or '--insecure' flag in curl
)
token = response.text
dict_token = ast.literal_eval(token)
bearer_token = dict_token['token']

### Load the Data

In [None]:
data_path = '../data/training.csv'

In [None]:
df = pd.read_csv(data_path)

In [None]:
df.head(5)

In [None]:
df.shape

In [None]:
df.columns

### Choose all three electrical lines

In [None]:
feature_columns = ['DGAR-H2','DGAR-O2','DGAR-N2','DGAR-CH4','DGAR-CO','DGAR-CO2','DGAR-C2H4','DGAR-C2H6','DGAR-C2H2','DBDS','POWER_FACT','INTER_V','DI_RIG','H2O']

In [None]:
df_score_all = df[feature_columns]

In [None]:
df_score_all.head(100)

In [None]:
df.shape

## Send the Data for Scoring

In [None]:
model_factory_config_file_name = "../config/deployment_info.yml"

with open(model_factory_config_file_name, 'r') as file:
    model_factory_config = yaml.safe_load(file)

print(model_factory_config)

In [None]:
array_of_input_fields = list(df_score_all.columns) 
array_of_values_to_be_scored = df_score_all[array_of_input_fields].values.tolist()

print("Input Data")
print(array_of_input_fields)
df_score_all[array_of_input_fields].head(10)

In [None]:
# retrieve the CP4D endpoint at the IBM Watson Studio
scoring_url = model_factory_config['cp4d_url']
scoring_url

In [None]:
array_of_input_fields

In [None]:
array_of_values_to_be_scored

In [None]:
import urllib3, requests, json

# scoring_url = 'https://cpd-ibm-cpd.mas-demo-cp4d-822c5cdfc486f5db3c3145c89ca6409d-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/14b272a1-111b-4196-9b07-eec757faaaca/predictions?version=2023-08-29'

header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + bearer_token}

# NOTE: manually define and pass the array(s) of values to be scored in the next line
payload_scoring = {"input_data": [{"fields": array_of_input_fields, "values": array_of_values_to_be_scored}]}
print("Input payload")
print(json.dumps(payload_scoring, indent=2))

response_scoring = requests.post(scoring_url, json=payload_scoring, headers=header, verify=False)
print("Scoring response")
predictions = json.loads(response_scoring.text)
print(json.dumps(predictions, indent=2))

In [None]:
output = predictions["predictions"][0]["values"]

In [None]:
output

## Combine the Results

In [None]:
output = predictions["predictions"][0]["values"]

In [None]:
reshaped_data = [row[0] for row in output]

In [None]:
column_names = ['health_score']

In [None]:
predictions_df = pd.DataFrame(reshaped_data, columns=column_names)

In [None]:
predictions_df = predictions_df.astype(int)

In [None]:
display(predictions_df)

In [None]:
df_merged = pd.concat([df, predictions_df], axis=1)

In [None]:
display(df_merged.tail(10))