# CO2e Scoring Notebook (Using EPA Mapping)

Last Updated November 29, 2023

In [1]:
import os
import requests
import json
import ast
import pandas as pd
import yaml

### Retrieve the Credential

In [2]:
credentials_file = '../../config/credentials/wml-credentials.json'

with open(credentials_file, "r") as f:
    credentials = json.load(f)

In [3]:
# Assuming you have these variables set somewhere in your script
USERNAME = credentials["username"]
API_KEY = credentials["apikey"]
DATAPLATFORM_URL = credentials["url"]

In [4]:
headers = {
    'Content-type': 'application/json'
}
data = {
    'username': USERNAME,
    'api_key': API_KEY
}

In [5]:
response = requests.post(
    f"{DATAPLATFORM_URL}/icp4d-api/v1/authorize", 
    headers=headers, 
    data=json.dumps(data),
    verify=False  # This corresponds to the '-k' or '--insecure' flag in curl
)
token = response.text
dict_token = ast.literal_eval(token)
bearer_token = dict_token['token']



### Load the Data

In [6]:
df = pd.read_csv('../data/co2e_scoring.csv')

In [7]:
df.tail(10)

Unnamed: 0,asset_id,timestamp,energy_loss,location_zip
4441,9010,2023-10-29 23:00:00,0.687995,99557
4442,9010,2023-10-30 00:00:00,0.600356,99557
4443,9010,2023-10-30 01:00:00,0.584892,99557
4444,9010,2023-10-30 02:00:00,0.876152,99557
4445,9010,2023-10-30 03:00:00,0.860318,99557
4446,9010,2023-10-30 04:00:00,0.938979,99557
4447,9010,2023-10-30 05:00:00,0.939119,99557
4448,9010,2023-10-30 06:00:00,0.950144,99557
4449,9010,2023-10-30 07:00:00,0.870125,99557
4450,9010,2023-10-30 08:00:00,0.686933,99557


In [8]:
df_loss = df[[ 'energy_loss','location_zip']]

In [9]:
# df_loss = df_loss.dropna()
df_loss = df_loss.reset_index(drop = True)

In [10]:
df_loss.columns

Index(['energy_loss', 'location_zip'], dtype='object')

In [11]:
# df_loss['location_zip_list'] = df_loss['location_zip']

In [12]:
location_zip = '99557'
df_loss['location_zip'] = location_zip

In [13]:
df_loss.shape

(4451, 2)

### Send the Data for Scoring

In [14]:
model_factory_config_file_name = "../config/deployment_info.yml"

with open(model_factory_config_file_name, 'r') as file:
    model_factory_config = yaml.safe_load(file)

print(model_factory_config)

{'cp4d_url': 'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/339692d8-b809-4e30-b154-c3613ecf9d60/predictions?version=2023-11-21', 'online_url': {'url': 'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/339692d8-b809-4e30-b154-c3613ecf9d60/predictions'}, 'serving_urls': ['https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/339692d8-b809-4e30-b154-c3613ecf9d60/predictions'], 'state': 'ready'}


In [15]:
# retrieve the CP4D endpoint at the IBM Watson Studio
scoring_url = model_factory_config['cp4d_url']
scoring_url

'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/339692d8-b809-4e30-b154-c3613ecf9d60/predictions?version=2023-11-21'

In [16]:
#array_of_input_fields = list(df.columns)
array_of_input_fields=['energy_loss', 'location_zip']

array_of_values_to_be_scored = df_loss[array_of_input_fields].values.tolist()

In [17]:
%%time
import urllib3, requests, json

#scoring_url = 'https://cpd-ibm-cpd.modelfactory-9ca4d14d48413d18ce61b80811ba4308-0000.us-south.containers.appdomain.cloud/ml/v4/deployments/f1ec7cd7-6db7-4e52-a958-ce1beed57e49/predictions?version=2023-11-08'

# NOTE: you must construct mltoken based on provided documentation
header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + bearer_token}

# NOTE: manually define and pass the array(s) of values to be scored in the next line
payload_scoring = {"input_data": [{"fields": array_of_input_fields, "values": array_of_values_to_be_scored}]}
# print("Input payload")
# print(json.dumps(payload_scoring, indent=2))

response_scoring = requests.post(scoring_url, json=payload_scoring, headers=header, verify=False)
print("Scoring response")
predictions = json.loads(response_scoring.text)
print(json.dumps(predictions, indent=2))



Scoring response
{
  "predictions": [
    {
      "fields": [
        "prediction"
      ],
      "values": [
        [
          0.0001188622249463495
        ],
        [
          0.00020856122741515597
        ],
        [
          0.00020657282588259628
        ],
        [
          0.0002231428386539275
        ],
        [
          0.00023198017879863747
        ],
        [
          0.00023198017879863747
        ],
        [
          0.0002408175189433475
        ],
        [
          0.0002408175189433475
        ],
        [
          0.0002496548590880575
        ],
        [
          0.0002496548590880575
        ],
        [
          0.0002496548590880575
        ],
        [
          0.00024302685397952498
        ],
        [
          0.0002408175189433475
        ],
        [
          0.00022027070310689674
        ],
        [
          0.00021629390004177724
        ],
        [
          0.0002231428386539275
        ],
        [
          0.0002162939000

In [19]:
predictions

{'predictions': [{'fields': ['prediction'],
   'values': [[0.0001188622249463495],
    [0.00020856122741515597],
    [0.00020657282588259628],
    [0.0002231428386539275],
    [0.00023198017879863747],
    [0.00023198017879863747],
    [0.0002408175189433475],
    [0.0002408175189433475],
    [0.0002496548590880575],
    [0.0002496548590880575],
    [0.0002496548590880575],
    [0.00024302685397952498],
    [0.0002408175189433475],
    [0.00022027070310689674],
    [0.00021629390004177724],
    [0.0002231428386539275],
    [0.00021629390004177724],
    [0.00020657282588259628],
    [0.00019950295376682824],
    [0.00018978187960764722],
    [1.2085062647890924e-05],
    [0.00047721636781434005],
    [0.0019817735274512173],
    [0.0020259602281747675],
    [0.0019596801770894426],
    [0.002001657542776815],
    [0.0019884015325597497],
    [0.001877934780750875],
    [0.001029550126858715],
    [0.0018536320953529222],
    [0.0018845627858594073],
    [0.0018558414303890998],
    [0.0

### Combine the Results

In [20]:
output = predictions["predictions"][0]["values"]

In [21]:
reshaped_data = [row[0] for row in output]
column_names = ['CO2 (g)']

predictions_df = pd.DataFrame(reshaped_data, columns=column_names)
display(predictions_df)

Unnamed: 0,CO2 (g)
0,0.000119
1,0.000209
2,0.000207
3,0.000223
4,0.000232
...,...
4446,207.452026
4447,207.482842
4448,209.918722
4449,192.239843


In [22]:
df.shape

(4451, 4)

In [23]:
df_merged = pd.concat([df, predictions_df], axis=1)

In [24]:
df_merged.tail(10)

Unnamed: 0,asset_id,timestamp,energy_loss,location_zip,CO2 (g)
4441,9010,2023-10-29 23:00:00,0.687995,99557,152.001079
4442,9010,2023-10-30 00:00:00,0.600356,99557,132.638674
4443,9010,2023-10-30 01:00:00,0.584892,99557,129.222182
4444,9010,2023-10-30 02:00:00,0.876152,99557,193.571436
4445,9010,2023-10-30 03:00:00,0.860318,99557,190.073132
4446,9010,2023-10-30 04:00:00,0.938979,99557,207.452026
4447,9010,2023-10-30 05:00:00,0.939119,99557,207.482842
4448,9010,2023-10-30 06:00:00,0.950144,99557,209.918722
4449,9010,2023-10-30 07:00:00,0.870125,99557,192.239843
4450,9010,2023-10-30 08:00:00,0.686933,99557,151.766525
