# Anomaly Scoring Notebook

Last Updated November 29, 2023

In [1]:
import os
import requests
import json
import ast
import pandas as pd
import yaml

In [2]:
def load_credentials(credentials_file):
    from ibm_watson_studio_lib import access_project_or_space
    wslib = access_project_or_space()
    
    for file in [credentials_file]:
        data = wslib.load_data(file)
        with open(f"./{file}", "wb") as f:
            f.write(data.read())

### Create the Bearer Token

In [3]:
credentials_file = "../../config/credentials/wml-credentials.json"
with open(credentials_file, "r") as f:
    credentials = json.load(f)

In [4]:
# Assuming you have these variables set somewhere in your script
USERNAME = credentials["username"]
API_KEY = credentials["apikey"]
DATAPLATFORM_URL = credentials["url"]

In [5]:
headers = {
    'Content-type': 'application/json'
}
data = {
    'username': USERNAME,
    'api_key': API_KEY
}

In [6]:
response = requests.post(
    f"{DATAPLATFORM_URL}/icp4d-api/v1/authorize", 
    headers=headers, 
    data=json.dumps(data),
    verify=False  # This corresponds to the '-k' or '--insecure' flag in curl
)
token = response.text
dict_token = ast.literal_eval(token)
bearer_token = dict_token['token']



### Load the Data

In [7]:
data_path = '../data/training.csv'

In [8]:
df = pd.read_csv(data_path)

In [9]:
df.head(5)

Unnamed: 0,datetime,load_factor,loss_factor,energy_loss,tag_WTI,tag_OTI_A,tag_OTI_T,tag_MOG_A
0,2019-07-04 17:00:00,0.000392,3.063004e-07,5.375451e-07,0,0,0,0
1,2019-07-04 18:00:00,0.000519,5.247114e-07,9.439461e-07,0,0,0,0
2,2019-07-04 19:00:00,0.000517,5.200765e-07,9.352261e-07,0,0,0,0
3,2019-07-04 20:00:00,0.000536,5.577041e-07,1.006119e-06,0,0,0,0
4,2019-07-04 21:00:00,0.000548,5.818571e-07,1.051742e-06,0,0,0,0


In [10]:
df.shape

(4451, 8)

In [11]:
df.columns

Index(['datetime', 'load_factor', 'loss_factor', 'energy_loss', 'tag_WTI',
       'tag_OTI_A', 'tag_OTI_T', 'tag_MOG_A'],
      dtype='object')

### Choose all three electrical lines

In [12]:
feature_columns = ['load_factor','loss_factor','energy_loss']

In [13]:
df_score_all = df[feature_columns]

In [14]:
df_score_all = df_score_all.dropna()
df_score_all = df_score_all.reset_index(drop=True)

In [15]:
df_score_all.head(100)

Unnamed: 0,load_factor,loss_factor,energy_loss
0,0.000392,3.063004e-07,5.375451e-07
1,0.000519,5.247114e-07,9.439461e-07
2,0.000517,5.200765e-07,9.352261e-07
3,0.000536,5.577041e-07,1.006119e-06
4,0.000548,5.818571e-07,1.051742e-06
...,...,...,...
95,0.371416,1.505130e-01,4.828251e-01
96,0.564848,3.355007e-01,1.116687e+00
97,0.689818,4.916550e-01,1.665471e+00
98,0.834027,7.067997e-01,2.434604e+00


In [16]:
df.shape

(4451, 8)

## Send the Data for Scoring

In [17]:
model_factory_config_file_name = "../config/deployment_info.yml"

with open(model_factory_config_file_name, 'r') as file:
    model_factory_config = yaml.safe_load(file)

print(model_factory_config)

{'cp4d_url': 'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/654f68d2-d8e6-43b4-a13b-0ec2f265bb93/predictions?version=2023-11-17', 'online_url': {'url': 'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/654f68d2-d8e6-43b4-a13b-0ec2f265bb93/predictions'}, 'serving_urls': ['https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/654f68d2-d8e6-43b4-a13b-0ec2f265bb93/predictions'], 'state': 'ready'}


In [18]:
array_of_input_fields = list(df_score_all.columns) 
array_of_values_to_be_scored = df_score_all[array_of_input_fields].values.tolist()

print("Input Data")
print(array_of_input_fields)
df_score_all[array_of_input_fields].head(10)

Input Data
['load_factor', 'loss_factor', 'energy_loss']


Unnamed: 0,load_factor,loss_factor,energy_loss
0,0.000392,3.063004e-07,5.375451e-07
1,0.000519,5.247114e-07,9.439461e-07
2,0.000517,5.200765e-07,9.352261e-07
3,0.000536,5.577041e-07,1.006119e-06
4,0.000548,5.818571e-07,1.051742e-06
5,0.000548,5.818571e-07,1.051742e-06
6,0.000558,6.015309e-07,1.088969e-06
7,0.000558,6.015309e-07,1.088969e-06
8,0.000567,6.215165e-07,1.126844e-06
9,0.000567,6.215165e-07,1.126844e-06


In [19]:
# retrieve the CP4D endpoint at the IBM Watson Studio
scoring_url = model_factory_config['cp4d_url']
scoring_url

'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/654f68d2-d8e6-43b4-a13b-0ec2f265bb93/predictions?version=2023-11-17'

In [20]:
array_of_input_fields

['load_factor', 'loss_factor', 'energy_loss']

In [21]:
array_of_values_to_be_scored

[[0.0003918983505312, 3.0630043163682066e-07, 5.375451100219294e-07],
 [0.0005193254215628, 5.247113776609341e-07, 9.439461271851828e-07],
 [0.0005169211372038, 5.200765170230908e-07, 9.352261173082796e-07],
 [0.0005361554120765, 5.577040615920426e-07, 1.0061191906462613e-06],
 [0.0005481768338719, 5.818570980886364e-07, 1.0517424441785531e-06],
 [0.0005481768338719, 5.818570980886364e-07, 1.0517424441785531e-06],
 [0.0005577939713082, 6.015308928123915e-07, 1.0889694004975845e-06],
 [0.0005577939713082, 6.015308928123915e-07, 1.0889694004975845e-06],
 [0.0005674111087446, 6.215164944247056e-07, 1.1268437821439034e-06],
 [0.0005674111087446, 6.215164944247056e-07, 1.1268437821439034e-06],
 [0.0005674111087446, 6.215164944247056e-07, 1.1268437821439034e-06],
 [0.0005601982556673, 6.064980797785535e-07, 1.0983772997847312e-06],
 [0.0005577939713082, 6.015308928123915e-07, 1.0889694004975845e-06],
 [0.0005337511277174, 5.529320911850179e-07, 9.971159321886695e-07],
 [0.0005289425589992, 5

In [22]:
import urllib3, requests, json

# scoring_url = 'https://cpd-ibm-cpd.mas-demo-cp4d-822c5cdfc486f5db3c3145c89ca6409d-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/14b272a1-111b-4196-9b07-eec757faaaca/predictions?version=2023-08-29'

header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + bearer_token}

# NOTE: manually define and pass the array(s) of values to be scored in the next line
payload_scoring = {"input_data": [{"fields": array_of_input_fields, "values": array_of_values_to_be_scored}]}
print("Input payload")
print(json.dumps(payload_scoring, indent=2))

response_scoring = requests.post(scoring_url, json=payload_scoring, headers=header, verify=False)
print("Scoring response")
predictions = json.loads(response_scoring.text)
print(json.dumps(predictions, indent=2))

Input payload
{
  "input_data": [
    {
      "fields": [
        "load_factor",
        "loss_factor",
        "energy_loss"
      ],
      "values": [
        [
          0.0003918983505312,
          3.0630043163682066e-07,
          5.375451100219294e-07
        ],
        [
          0.0005193254215628,
          5.247113776609341e-07,
          9.439461271851828e-07
        ],
        [
          0.0005169211372038,
          5.200765170230908e-07,
          9.352261173082796e-07
        ],
        [
          0.0005361554120765,
          5.577040615920426e-07,
          1.0061191906462613e-06
        ],
        [
          0.0005481768338719,
          5.818570980886364e-07,
          1.0517424441785531e-06
        ],
        [
          0.0005481768338719,
          5.818570980886364e-07,
          1.0517424441785531e-06
        ],
        [
          0.0005577939713082,
          6.015308928123915e-07,
          1.0889694004975845e-06
        ],
        [
          0.00055779



Scoring response
{
  "predictions": [
    {
      "fields": [
        "prediction"
      ],
      "values": [
        [
          [
            1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
            -1.0
          ]
        ],
        [
          [
 

In [23]:
output = predictions["predictions"][0]["values"]

In [24]:
output

[[[1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[1.0]],
 [[1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[-1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 [[1.0]],
 

### Combine the Results

In [25]:
output = predictions["predictions"][0]["values"]

In [26]:
reshaped_data = [row[0] for row in output]
column_names = ['Anomaly Output']

In [27]:
predictions_df = pd.DataFrame(reshaped_data, columns=column_names)

In [28]:
predictions_df['Anomaly Output'] = predictions_df['Anomaly Output'].astype(int)

In [29]:
display(predictions_df)

Unnamed: 0,Anomaly Output
0,1
1,-1
2,-1
3,-1
4,-1
...,...
4446,1
4447,1
4448,1
4449,1


In [30]:
df_merged = pd.concat([df, predictions_df], axis=1)

In [31]:
df_merged.tail(10)

Unnamed: 0,datetime,load_factor,loss_factor,energy_loss,tag_WTI,tag_OTI_A,tag_OTI_T,tag_MOG_A,Anomaly Output
4441,2020-04-13 15:00:00,0.443362,0.211155,0.687995,1,0,0,0,1
4442,2020-04-13 16:00:00,0.414162,0.185366,0.600356,1,0,0,0,1
4443,2020-04-13 17:00:00,0.408793,0.180799,0.584892,1,0,0,0,1
4444,2020-04-13 18:00:00,0.500329,0.266058,0.876152,1,0,0,0,1
4445,2020-04-13 19:00:00,0.495787,0.26146,0.860318,1,0,0,0,1
4446,2020-04-13 20:00:00,0.517957,0.284269,0.938979,1,0,0,0,1
4447,2020-04-13 21:00:00,0.517996,0.28431,0.939119,1,0,0,0,1
4448,2020-04-13 22:00:00,0.521028,0.2875,0.950144,1,0,0,0,1
4449,2020-04-13 23:00:00,0.498605,0.264309,0.870125,1,0,0,0,1
4450,2020-04-14 00:00:00,0.44302,0.210844,0.686933,1,0,0,0,1


In [32]:
display(df_merged.tail(10))

Unnamed: 0,datetime,load_factor,loss_factor,energy_loss,tag_WTI,tag_OTI_A,tag_OTI_T,tag_MOG_A,Anomaly Output
4441,2020-04-13 15:00:00,0.443362,0.211155,0.687995,1,0,0,0,1
4442,2020-04-13 16:00:00,0.414162,0.185366,0.600356,1,0,0,0,1
4443,2020-04-13 17:00:00,0.408793,0.180799,0.584892,1,0,0,0,1
4444,2020-04-13 18:00:00,0.500329,0.266058,0.876152,1,0,0,0,1
4445,2020-04-13 19:00:00,0.495787,0.26146,0.860318,1,0,0,0,1
4446,2020-04-13 20:00:00,0.517957,0.284269,0.938979,1,0,0,0,1
4447,2020-04-13 21:00:00,0.517996,0.28431,0.939119,1,0,0,0,1
4448,2020-04-13 22:00:00,0.521028,0.2875,0.950144,1,0,0,0,1
4449,2020-04-13 23:00:00,0.498605,0.264309,0.870125,1,0,0,0,1
4450,2020-04-14 00:00:00,0.44302,0.210844,0.686933,1,0,0,0,1
