In [1]:
import gurobipy as gp
from gurobipy import GRB
from eventlet.timeout import Timeout

# import auxillary packages
import requests  # for loading the example source code
import openai
import json
import time
import random

# import flaml and autogen
from flaml import autogen
from flaml.autogen.agentchat import Agent, UserProxyAgent
from flaml.autogen.code_utils import extract_code
from EnergySaverLLM.Agent import ChargingAgent, reset_params_file, clear_param_backups

In [2]:
benchmark_dataset_path = "benchmark/EV_combined.benchmark.json"

In [3]:
# global params_filepath
params_filepath = "EnergySaverLLM/Model/params/EVCharging.json"
params_filepath_backup = "EnergySaverLLM/Model/params/EVCharging_original.json"

In [4]:
with open(benchmark_dataset_path, 'r') as f:
    benchmark_dataset = json.loads(f.read())

In [5]:
log_history = {}
autogen.oai.ChatCompletion.start_logging(log_history)

In [6]:
code_path = "EnergySaverLLM/Model/EVCharging.py"

In [7]:
with open(code_path) as f:
    code = f.read()

In [8]:
example_qa = """
----------
Instruction: Charge the car till 9 AM.
Answer Code:
```JSON
"end_charge_time": 9
```

----------
Question: Charge the car to full charge by 9 AM
Answer Code:
```JSON
"end_charge": 1.00,
"end_charge_time": 9
```
"""

In [22]:
config_list = autogen.config_list_from_json(
    env_or_file = "OAI_CONFIG_LIST",
    filter_dict={
        "model": ["gpt-4"],
    },
)

In [23]:
agent = ChargingAgent(
    name="Tesla Charging Example",
    source_code=code,
    example_qa=example_qa,
    json_filepath=params_filepath,
    llm_config={
        "request_timeout": 600,
        "seed": 42,
        "config_list": config_list,
    },
    evaluate=True
)

user = UserProxyAgent(
    "user", max_consecutive_auto_reply=0,
    human_input_mode="NEVER", code_execution_config=False
)

{'0': 0.3, '1': 0.3, '2': 0.3, '3': 0.3, '4': 0.3, '5': 0.3, '6': 0.3, '7': 0.35, '8': 0.35, '9': 0.35, '10': 0.35, '11': 0.3, '12': 0.3, '13': 0.3, '14': 0.3, '15': 0.3, '16': 0.3, '17': 0.3, '18': 0.3, '19': 0.35, '20': 0.35, '21': 0.35, '22': 0.3, '23': 0.3}
Gurobi Optimizer version 10.0.3 build v10.0.3rc0 (mac64[arm])

CPU model: Apple M1
Thread count: 8 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 2 rows, 48 columns and 96 nonzeros
Model fingerprint: 0x7c396a84
Variable types: 0 continuous, 48 integer (0 binary)
Coefficient statistics:
  Matrix range     [3e-01, 1e+00]
  Objective range  [8e+00, 8e+00]
  Bounds range     [1e+01, 1e+01]
  RHS range        [2e+01, 4e+01]
Presolve removed 2 rows and 48 columns
Presolve time: 0.00s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)
Thread count was 1 (of 8 available processors)

Solution count 1: 327.6 

Optimal solution found (toleran

In [24]:
len(benchmark_dataset)

853

In [25]:
def parse_json(json_str):
    
    
    try:    
        return json.loads(json_str[7:-4])
    except:
        pass

    try:
        return json.loads('{' + json_str[7:-4] + '}')
    except:
        raise TypeError("Invalid String, unable to convert to json") 


In [76]:
benchmark_result = []
sampled_benchmark = random.sample(benchmark_dataset, 100)
count_correct = 0
for benchmark_instance in sampled_benchmark:
    
    log_history = {}
    autogen.oai.ChatCompletion.start_logging(log_history)

    user.initiate_chat(agent, message=benchmark_instance['prompt'], silent = True, clear_history=True)

    try:
        predicted_param = parse_json(eval(list(log_history.keys())[-1])[-1]['content'])
    except:
        benchmark_result.append((0, 'Invalid Response: ' + eval(list(log_history.keys())[-1])[-1]['content']))

    bench_json_str = benchmark_instance['json_str']
    
    truth_param = json.loads('{'+extract_code(bench_json_str)[0][1]+'}')
    result_instance_str = "; bench: " + extract_code(bench_json_str)[0][1] + " pred: " + json.dumps(predicted_param)
    result_instance = {'result' : truth_param == predicted_param, 
                       'index' : benchmark_instance['index'], 
                       'bench' : '{' + extract_code(bench_json_str)[0][1] + '}',
                       'pred' : json.dumps(predicted_param)}

    count_correct = count_correct + int(truth_param == predicted_param)
    benchmark_result.append(result_instance)

    time.sleep(0.5)


[33mTesla Charging Example[0m (to writer):


Answer JSON:


--------------------------------------------------------------------------------
[33mwriter[0m (to Tesla Charging Example):

```JSON
"end_charge": 0.55,
"battery_capacity": 25
```

--------------------------------------------------------------------------------
[33mTesla Charging Example[0m (to writer):


Answer JSON:


--------------------------------------------------------------------------------
[33mwriter[0m (to Tesla Charging Example):

```JSON
{
    "end_charge": 0.65,
    "max_power": 14.3
}
```

--------------------------------------------------------------------------------
[33mTesla Charging Example[0m (to writer):


Answer JSON:


--------------------------------------------------------------------------------
[33mwriter[0m (to Tesla Charging Example):

```JSON
"carbon_cost_weight": 1.15,
"max_energy_cost": 12
```

--------------------------------------------------------------------------------
[33mTes

In [77]:
count_correct

91

In [78]:
[benchmark_dataset[result['index']] for result in benchmark_result if not result['result']]

[{'json_str': '```JSON\n"carbon_cost_weight": 0.0,\n"battery_capacity": 40\n```',
  'prompt': 'I want to not consider carbon emissions while charging and change the battery capacity to 90 KWH.',
  'index': 297},
 {'json_str': '```JSON\n"end_charge": 0.35,\n"end_charge_time": 0\n```',
  'prompt': 'Start charging the car immediately and stop when it reaches 35% battery level.',
  'index': 704},
 {'json_str': '```JSON\n"end_charge": 0.7,\n"max_power": 17.8\n```',
  'prompt': 'Change the value of "end_charge" to 0.7.',
  'index': 572},
 {'json_str': '```JSON\n"carbon_cost_weight": 0.0,\n"end_charge": 0.95\n```',
  'prompt': 'Change the value of the "end_charge" parameter to 0.95.',
  'index': 174},
 {'json_str': '```JSON\n"max_power": 10.7,\n"battery_capacity": 15\n```',
  'prompt': 'The change is to update the parameters "max_power" to 11.7 and "battery_capacity" to 70.',
  'index': 751},
 {'json_str': '```JSON\n"carbon_cost_weight": 0.15,\n"end_charge": 0.5\n```',
  'prompt': 'Update the

In [85]:
[result['pred'] + ";" + benchmark_dataset[result['index']]['prompt'] for result in benchmark_result if not result['result']]

['{"carbon_cost_weight": 0, "battery_capacity": 90};I want to not consider carbon emissions while charging and change the battery capacity to 90 KWH.',
 '{"start_charge": 0.0, "end_charge": 0.35};Start charging the car immediately and stop when it reaches 35% battery level.',
 '{"end_charge": 0.7};Change the value of "end_charge" to 0.7.',
 '{"end_charge": 0.95};Change the value of the "end_charge" parameter to 0.95.',
 '{"max_power": 11.7, "battery_capacity": 70};The change is to update the parameters "max_power" to 11.7 and "battery_capacity" to 70.',
 '{"carbon_cost_weight": 0.15, "end_charge": 1.0};Update the weight of carbon cost to 15% and charge my car to full battery.',
 '{"carbon_cost_weight": 0.85, "max_power": 11.3};Increase the importance of carbon cost in total cost to 85% and reduce the maximum charge power to 11.3 KW.',
 '{"max_power": 18.8};Increase the max charge power to 18.8 KW while leaving the battery capacity unchanged at 70 KWH.',
 '{"max_power": 17.3, "end_charg

In [100]:
config_list = autogen.config_list_from_json(
    env_or_file = "OAI_CONFIG_LIST",
    filter_dict={
        "model": ["gpt-3.5-turbo-16k"],
    },
)

In [104]:
agent_gpt3 = ChargingAgent(
    name="Tesla Charging Example",
    source_code=code,
    example_qa=example_qa,
    json_filepath=params_filepath,
    llm_config={
        "request_timeout": 600,
        "seed": 42,
        "config_list": config_list,
    },
    evaluate=True
)

{'0': 0.3, '1': 0.3, '2': 0.3, '3': 0.3, '4': 0.3, '5': 0.3, '6': 0.3, '7': 0.35, '8': 0.35, '9': 0.35, '10': 0.35, '11': 0.3, '12': 0.3, '13': 0.3, '14': 0.3, '15': 0.3, '16': 0.3, '17': 0.3, '18': 0.3, '19': 0.35, '20': 0.35, '21': 0.35, '22': 0.3, '23': 0.3}
Gurobi Optimizer version 10.0.3 build v10.0.3rc0 (mac64[arm])

CPU model: Apple M1
Thread count: 8 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 2 rows, 48 columns and 96 nonzeros
Model fingerprint: 0x7c396a84
Variable types: 0 continuous, 48 integer (0 binary)
Coefficient statistics:
  Matrix range     [3e-01, 1e+00]
  Objective range  [8e+00, 8e+00]
  Bounds range     [1e+01, 1e+01]
  RHS range        [2e+01, 4e+01]
Presolve removed 2 rows and 48 columns
Presolve time: 0.00s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)
Thread count was 1 (of 8 available processors)

Solution count 1: 327.6 

Optimal solution found (toleran

In [102]:
log_history = {}
autogen.oai.ChatCompletion.start_logging(log_history)

In [105]:
benchmark_result_gpt3 = []
count_correct_gpt3 = 0
for benchmark_instance in sampled_benchmark:
    
    log_history = {}
    autogen.oai.ChatCompletion.start_logging(log_history)

    user.initiate_chat(agent_gpt3, message=benchmark_instance['prompt'], silent = True, clear_history=True)

    try:
        predicted_param = parse_json(eval(list(log_history.keys())[-1])[-1]['content'])
    except:
        benchmark_result.append((0, 'Invalid Response: ' + eval(list(log_history.keys())[-1])[-1]['content']))

    bench_json_str = benchmark_instance['json_str']
    
    truth_param = json.loads('{'+extract_code(bench_json_str)[0][1]+'}')
    result_instance_str = "; bench: " + extract_code(bench_json_str)[0][1] + " pred: " + json.dumps(predicted_param)
    result_instance = {'result' : truth_param == predicted_param, 
                       'index' : benchmark_instance['index'], 
                       'bench' : '{' + extract_code(bench_json_str)[0][1] + '}',
                       'pred' : json.dumps(predicted_param)}

    count_correct_gpt3 = count_correct_gpt3 + int(truth_param == predicted_param)
    benchmark_result_gpt3.append(result_instance)

    time.sleep(0.5)


[33mTesla Charging Example[0m (to writer):


Answer JSON:


--------------------------------------------------------------------------------
[33mwriter[0m (to Tesla Charging Example):

"end_charge": 0.55,
"battery_capacity": 25

--------------------------------------------------------------------------------
[33mTesla Charging Example[0m (to writer):


Answer JSON:


--------------------------------------------------------------------------------
[33mwriter[0m (to Tesla Charging Example):

{
    "end_charge": 0.65,
    "max_power": 14.3
}

--------------------------------------------------------------------------------
[33mTesla Charging Example[0m (to writer):


Answer JSON:


--------------------------------------------------------------------------------
[33mwriter[0m (to Tesla Charging Example):

```JSON
{
    "carbon_cost_weight": 1.15,
    "max_energy_cost": 12
}
```

--------------------------------------------------------------------------------
[33mTesla Charging 

In [107]:
count_correct_gpt3

24

In [90]:
benchmark_result_gpt3

[{'result': True,
  'index': 641,
  'bench': '{"end_charge": 0.55,\n"battery_capacity": 25}',
  'pred': '{"end_charge": 0.55, "battery_capacity": 25}'},
 {'result': True,
  'index': 590,
  'bench': '{"end_charge": 0.65,\n"max_power": 14.3}',
  'pred': '{"end_charge": 0.65, "max_power": 14.3}'},
 {'result': True,
  'index': 155,
  'bench': '{"carbon_cost_weight": 1.15,\n"max_energy_cost": 12}',
  'pred': '{"carbon_cost_weight": 1.15, "max_energy_cost": 12}'},
 {'result': True,
  'index': 87,
  'bench': '{"battery_capacity": 55}',
  'pred': '{"battery_capacity": 55}'},
 {'result': True,
  'index': 543,
  'bench': '{"max_energy_cost": 52,\n"end_charge_time": 3}',
  'pred': '{"end_charge_time": 3, "max_energy_cost": 52}'},
 {'result': True,
  'index': 659,
  'bench': '{"end_charge": 0.7,\n"end_charge_time": 20}',
  'pred': '{"end_charge": 0.7, "end_charge_time": 20}'},
 {'result': True,
  'index': 122,
  'bench': '{"carbon_cost_weight": 0.45,\n"max_energy_cost": 97}',
  'pred': '{"carbon_c

In [108]:
[result['pred'] + ";" + benchmark_dataset[result['index']]['prompt'] for result in benchmark_result_gpt3 if not result['result']][:10]

['{"carbon_cost_weight": 0.35, "battery_capacity": 90};Charge the electric car to 55% and set the battery capacity to 25 KWH.',
 '{"carbon_cost_weight": 0.35, "battery_capacity": 90};Charge the car until it reaches 65% battery level and increase the maximum charge power to 14.3 KW.',
 '{"carbon_cost_weight": 1.15, "max_energy_cost": 12};Battery capacity is updated to 55 kWh.',
 '{"carbon_cost_weight": 1.15, "max_energy_cost": 12};Charge the car until 3 AM with a maximum energy cost of 52 USD.',
 '{"carbon_cost_weight": 1.15, "max_energy_cost": 12};Charge the car till 70% and complete it by 8 PM.',
 '{"carbon_cost_weight": 1.15, "max_energy_cost": 12};Increase the weight of carbon cost in total cost to 0.45 and raise the maximum electricity cost allowed per charge cycle to $97.',
 '{"carbon_cost_weight": 1.15, "max_energy_cost": 12};Charge the car until 3 PM.',
 '{"carbon_cost_weight": 0.7, "end_charge": 0.95};Increase the weightage of carbon cost to 0.35 and upgrade the car\'s battery 

In [109]:
with open('benchmark/results/gpt4_20231203.json', 'w') as f:
    f.write(json.dumps(benchmark_result))

In [110]:
with open('benchmark/results/gpt35_20231203.json', 'w') as f:
    f.write(json.dumps(benchmark_result_gpt3))