# Rule-Based Scoring Notebook
Last Updated August 29 2023

In [1]:
import os
import requests
import json
import ast
import pandas as pd
import yaml

In [2]:
def load_credentials(credentials_file):
    from ibm_watson_studio_lib import access_project_or_space
    wslib = access_project_or_space()
    
    for file in [credentials_file]:
        data = wslib.load_data(file)
        with open(f"./{file}", "wb") as f:
            f.write(data.read())

### Create the Bearer Token

In [3]:
credentials_file = "../../config/credentials/wml-credentials.json"
with open(credentials_file, "r") as f:
    credentials = json.load(f)

In [4]:
# Assuming you have these variables set somewhere in your script
USERNAME = credentials["username"]
API_KEY = credentials["apikey"]
DATAPLATFORM_URL = credentials["url"]

In [5]:
headers = {
    'Content-type': 'application/json'
}
data = {
    'username': USERNAME,
    'api_key': API_KEY
}

In [6]:
response = requests.post(
    f"{DATAPLATFORM_URL}/icp4d-api/v1/authorize", 
    headers=headers, 
    data=json.dumps(data),
    verify=False  # This corresponds to the '-k' or '--insecure' flag in curl
)
token = response.text
dict_token = ast.literal_eval(token)
bearer_token = dict_token['token']



### Load the Data

In [7]:
data_path = '../data/training.csv'

In [8]:
df = pd.read_csv(data_path)

In [9]:
df.head(5)

Unnamed: 0,datetime,OTI,THDVL1,THDVL2,THDVL3
0,2019-07-16 13:30:00,146.0,2.0,1.8,1.7
1,2019-07-16 13:45:00,246.0,2.0,1.8,1.8
2,2019-07-16 14:00:00,246.0,2.0,1.9,1.9
3,2019-07-16 14:15:00,49.0,2.1,1.8,1.8
4,2019-07-16 14:30:00,44.0,2.0,1.8,2.5


In [10]:
df.shape

(4711, 5)

In [13]:
df.columns

Index(['datetime', 'OTI', 'THDVL1', 'THDVL2', 'THDVL3'], dtype='object')

### Choose all three electrical lines

In [14]:
selected_columns = ['datetime','THDVL1', 'THDVL2', 'THDVL3']

In [15]:
df_score_all = df[selected_columns]

In [16]:
df_score_all = df_score_all.dropna()
df_score_all = df_score_all.reset_index(drop=True)

In [17]:
df_score_all.head(100)

Unnamed: 0,datetime,THDVL1,THDVL2,THDVL3
0,2019-07-16 13:30:00,2.0,1.8,1.7
1,2019-07-16 13:45:00,2.0,1.8,1.8
2,2019-07-16 14:00:00,2.0,1.9,1.9
3,2019-07-16 14:15:00,2.1,1.8,1.8
4,2019-07-16 14:30:00,2.0,1.8,2.5
...,...,...,...,...
95,2019-07-17 13:15:00,1.8,1.7,1.6
96,2019-07-17 13:30:00,1.8,1.7,1.8
97,2019-07-17 13:45:00,1.9,1.7,1.6
98,2019-07-17 14:00:00,1.9,1.7,1.8


In [18]:
df.shape

(4711, 5)

## Send the Data for Scoring

In [19]:
config_file = 'deployment_info_harmonic.yml'

In [20]:
model_factory_config_file_name = "../config/" + config_file

with open(model_factory_config_file_name, 'r') as file:
    model_factory_config = yaml.safe_load(file)

print(model_factory_config)

{'cp4d_url': 'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/ccfa8aa6-3fe4-4f78-b98d-3f2b7b4e7d7d/predictions?version=2023-11-20', 'online_url': {'url': 'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/ccfa8aa6-3fe4-4f78-b98d-3f2b7b4e7d7d/predictions'}, 'serving_urls': ['https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/ccfa8aa6-3fe4-4f78-b98d-3f2b7b4e7d7d/predictions'], 'state': 'ready'}


In [21]:
array_of_input_fields = list(df_score_all.columns) 
array_of_values_to_be_scored = df_score_all[array_of_input_fields].values.tolist()

print("Input Data")
print(array_of_input_fields)
df_score_all[array_of_input_fields].head(10)

Input Data
['datetime', 'THDVL1', 'THDVL2', 'THDVL3']


Unnamed: 0,datetime,THDVL1,THDVL2,THDVL3
0,2019-07-16 13:30:00,2.0,1.8,1.7
1,2019-07-16 13:45:00,2.0,1.8,1.8
2,2019-07-16 14:00:00,2.0,1.9,1.9
3,2019-07-16 14:15:00,2.1,1.8,1.8
4,2019-07-16 14:30:00,2.0,1.8,2.5
5,2019-07-16 14:45:00,2.0,1.9,1.7
6,2019-07-16 15:00:00,2.2,1.9,1.7
7,2019-07-16 15:15:00,2.1,1.9,1.8
8,2019-07-16 15:30:00,2.0,1.8,1.8
9,2019-07-16 15:45:00,2.1,2.0,1.8


In [22]:
# retrieve the CP4D endpoint at the IBM Watson Studio
scoring_url = model_factory_config['cp4d_url']
scoring_url

'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/ccfa8aa6-3fe4-4f78-b98d-3f2b7b4e7d7d/predictions?version=2023-11-20'

In [23]:
array_of_input_fields

['datetime', 'THDVL1', 'THDVL2', 'THDVL3']

In [24]:
array_of_values_to_be_scored

[['2019-07-16 13:30:00', 2.0, 1.8, 1.7],
 ['2019-07-16 13:45:00', 2.0, 1.8, 1.8],
 ['2019-07-16 14:00:00', 2.0, 1.9, 1.9],
 ['2019-07-16 14:15:00', 2.1, 1.8, 1.8],
 ['2019-07-16 14:30:00', 2.0, 1.8, 2.5],
 ['2019-07-16 14:45:00', 2.0, 1.9, 1.7],
 ['2019-07-16 15:00:00', 2.2, 1.9, 1.7],
 ['2019-07-16 15:15:00', 2.1, 1.9, 1.8],
 ['2019-07-16 15:30:00', 2.0, 1.8, 1.8],
 ['2019-07-16 15:45:00', 2.1, 2.0, 1.8],
 ['2019-07-16 16:00:00', 2.0, 2.0, 1.8],
 ['2019-07-16 16:15:00', 2.1, 2.1, 1.9],
 ['2019-07-16 16:30:00', 2.1, 2.0, 1.6],
 ['2019-07-16 16:45:00', 2.2, 2.1, 1.9],
 ['2019-07-16 17:00:00', 2.3, 2.2, 2.0],
 ['2019-07-16 17:15:00', 2.2, 2.2, 2.0],
 ['2019-07-16 17:30:00', 2.4, 2.3, 2.1],
 ['2019-07-16 17:45:00', 2.5, 2.4, 2.2],
 ['2019-07-16 18:00:00', 2.5, 2.4, 2.4],
 ['2019-07-16 18:15:00', 3.8, 3.8, 3.7],
 ['2019-07-16 18:30:00', 0.0, 0.0, 0.0],
 ['2019-07-16 18:45:00', 0.0, 0.0, 0.0],
 ['2019-07-16 19:00:00', 0.0, 0.0, 0.0],
 ['2019-07-16 19:15:00', 0.0, 0.0, 0.0],
 ['2019-07-16 19

In [25]:
import urllib3, requests, json

# scoring_url = 'https://cpd-ibm-cpd.mas-demo-cp4d-822c5cdfc486f5db3c3145c89ca6409d-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/14b272a1-111b-4196-9b07-eec757faaaca/predictions?version=2023-08-29'

header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + bearer_token}

# NOTE: manually define and pass the array(s) of values to be scored in the next line
payload_scoring = {"input_data": [{"fields": array_of_input_fields, "values": array_of_values_to_be_scored}]}
print("Input payload")
print(json.dumps(payload_scoring, indent=2))

response_scoring = requests.post(scoring_url, json=payload_scoring, headers=header, verify=False)
print("Scoring response")
predictions = json.loads(response_scoring.text)
print(json.dumps(predictions, indent=2))

Input payload
{
  "input_data": [
    {
      "fields": [
        "datetime",
        "THDVL1",
        "THDVL2",
        "THDVL3"
      ],
      "values": [
        [
          "2019-07-16 13:30:00",
          2.0,
          1.8,
          1.7
        ],
        [
          "2019-07-16 13:45:00",
          2.0,
          1.8,
          1.8
        ],
        [
          "2019-07-16 14:00:00",
          2.0,
          1.9,
          1.9
        ],
        [
          "2019-07-16 14:15:00",
          2.1,
          1.8,
          1.8
        ],
        [
          "2019-07-16 14:30:00",
          2.0,
          1.8,
          2.5
        ],
        [
          "2019-07-16 14:45:00",
          2.0,
          1.9,
          1.7
        ],
        [
          "2019-07-16 15:00:00",
          2.2,
          1.9,
          1.7
        ],
        [
          "2019-07-16 15:15:00",
          2.1,
          1.9,
          1.8
        ],
        [
          "2019-07-16 15:30:00",
          2.0,




Scoring response
{
  "predictions": [
    {
      "fields": [
        "prediction"
      ],
      "values": [
        [
          [
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0
          ]
        ],
        [
          [
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0
          ]
        ],
        [
          [
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0
          ]
        ],
        [
          [
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0
          ]
        ],
        [
          [
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0
          ]
      

In [26]:
output = predictions["predictions"][0]["values"]

In [27]:
output

[[[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0,

## Combine the Results

In [28]:
output = predictions["predictions"][0]["values"]

In [29]:
reshaped_data = [row[0] for row in output]

In [32]:
column_names = ['THDVL1_anomaly','THDVL1_anomaly_sum','THDVL1_anomaly_sum_flag',
                'THDVL2_anomaly','THDVL2_anomaly_sum','THDVL2_anomaly_sum_flag',
                'THDVL3_anomaly','THDVL3_anomaly_sum','THDVL3_anomaly_sum_flag'
               ]

In [33]:
predictions_df = pd.DataFrame(reshaped_data, columns=column_names)

In [34]:
predictions_df = predictions_df.astype(int)

In [35]:
display(predictions_df)

Unnamed: 0,THDVL1_anomaly,THDVL1_anomaly_sum,THDVL1_anomaly_sum_flag,THDVL2_anomaly,THDVL2_anomaly_sum,THDVL2_anomaly_sum_flag,THDVL3_anomaly,THDVL3_anomaly_sum,THDVL3_anomaly_sum_flag
0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
4706,0,0,0,0,0,0,0,0,0
4707,0,0,0,0,0,0,0,0,0
4708,0,0,0,0,0,0,0,0,0
4709,0,0,0,0,0,0,0,0,0


In [36]:
df_merged = pd.concat([df, predictions_df], axis=1)

In [38]:
display(df_merged.head(10))

Unnamed: 0,datetime,OTI,THDVL1,THDVL2,THDVL3,THDVL1_anomaly,THDVL1_anomaly_sum,THDVL1_anomaly_sum_flag,THDVL2_anomaly,THDVL2_anomaly_sum,THDVL2_anomaly_sum_flag,THDVL3_anomaly,THDVL3_anomaly_sum,THDVL3_anomaly_sum_flag
0,2019-07-16 13:30:00,146.0,2.0,1.8,1.7,0,0,0,0,0,0,0,0,0
1,2019-07-16 13:45:00,246.0,2.0,1.8,1.8,0,0,0,0,0,0,0,0,0
2,2019-07-16 14:00:00,246.0,2.0,1.9,1.9,0,0,0,0,0,0,0,0,0
3,2019-07-16 14:15:00,49.0,2.1,1.8,1.8,0,0,0,0,0,0,0,0,0
4,2019-07-16 14:30:00,44.0,2.0,1.8,2.5,0,0,0,0,0,0,0,0,0
5,2019-07-16 14:45:00,40.0,2.0,1.9,1.7,0,0,0,0,0,0,0,0,0
6,2019-07-16 15:00:00,38.0,2.2,1.9,1.7,0,0,0,0,0,0,0,0,0
7,2019-07-16 15:15:00,36.0,2.1,1.9,1.8,0,0,0,0,0,0,0,0,0
8,2019-07-16 15:30:00,31.0,2.0,1.8,1.8,0,0,0,0,0,0,0,0,0
9,2019-07-16 15:45:00,35.0,2.1,2.0,1.8,0,0,0,0,0,0,0,0,0


In [39]:
df_merged[df_merged['THDVL1']>= 4.0]

Unnamed: 0,datetime,OTI,THDVL1,THDVL2,THDVL3,THDVL1_anomaly,THDVL1_anomaly_sum,THDVL1_anomaly_sum_flag,THDVL2_anomaly,THDVL2_anomaly_sum,THDVL2_anomaly_sum_flag,THDVL3_anomaly,THDVL3_anomaly_sum,THDVL3_anomaly_sum_flag
210,2019-07-18 18:00:00,36.0,4.5,4.6,4.6,0,0,0,0,0,0,0,0,0
212,2019-07-18 18:30:00,35.0,4.7,4.8,2.0,0,0,0,0,0,0,0,0,0
244,2019-07-19 02:30:00,29.0,5.1,5.2,5.2,1,1,0,1,1,0,1,1,0
303,2019-07-19 17:15:00,40.0,4.9,5.1,5.3,0,1,0,1,2,0,1,2,0
474,2019-07-21 12:00:00,47.0,4.7,4.8,4.6,0,0,0,0,0,0,0,0,0
491,2019-07-21 16:15:00,47.0,5.0,5.2,5.0,1,1,0,1,1,0,1,1,0
567,2019-07-22 11:15:00,28.0,4.6,4.6,4.6,0,1,0,0,1,0,0,1,0
628,2019-07-23 02:30:00,30.0,5.0,5.0,5.0,1,1,0,1,1,0,1,1,0
649,2019-07-23 07:45:00,33.0,4.9,5.1,1.4,0,1,0,1,2,0,0,1,0
664,2019-07-23 11:30:00,40.0,4.1,4.0,4.2,0,1,0,0,2,0,0,1,0
