In [6]:
import json

with open("clickup_space_schema.json") as f:
    schema = json.load(f)
    
schema

[{'name': 'get_spaces',
  'description': 'View the Spaces available in a Workspace.',
  'parameters': {'type': 'object',
   'properties': {'team_id': {'type': 'string',
     'description': 'The ID of the team'},
    'archived': {'type': 'boolean',
     'description': 'A flag to decide whether to include archived spaces or not'}},
   'required': ['team_id', 'archived']}},
 {'name': 'create_space',
  'description': 'Add a new Space to a Workspace.',
  'parameters': {'type': 'object',
   'properties': {'team_id': {'type': 'string',
     'description': 'The ID of the team'},
    'name': {'type': 'string', 'description': 'The name of the new space'},
    'multiple_assignees': {'type': 'boolean',
     'description': 'Enable or disable multiple assignees for tasks within the space'},
    'features': {'type': 'object',
     'description': 'Enabled features within the space',
     'properties': {'due_dates': {'type': 'object',
       'description': 'Due dates feature settings',
       'properti

In [7]:
def get_key_trees(schema_props):
    # print(schema_props)
    key_list = []
    for k, v in schema_props.items():
        if v["type"] != "object":
            key_list.append(f'{k}:{v["type"]}')
        else:
            branch = get_key_trees(v["properties"])
            key_list.append(f'{k}:{branch}')
    return f'({", ".join(key_list)})'
            
    
for sc in schema:
    print(f'{sc["name"]}{get_key_trees(sc["parameters"]["properties"])}')
    # print()


get_spaces(team_id:string, archived:boolean)
create_space(team_id:string, name:string, multiple_assignees:boolean, features:(due_dates:(enabled:boolean, start_date:boolean, remap_due_dates:boolean, remap_closed_due_date:boolean), time_tracking:(enabled:boolean)))
get_space(space_id:string)
update_space(space_id:string, name:string, color:string, private:boolean, admin_can_manage:boolean, multiple_assignees:boolean, features:(due_dates:(enabled:boolean, start_date:boolean, remap_due_dates:boolean, remap_closed_due_date:boolean), time_tracking:(enabled:boolean)))
delete_space(space_id:string)
delete_space(space_id:string)
create_space_tag(space_id:string, tag:(name:string, tag_fg:string, tag_bg:string))
delete_space_tag(space_id:string, tag_name:string, tag:(name:string, tag_fg:string, tag_bg:string))


In [8]:
import os
import json
import dotenv
import openai
from textwrap import dedent
from datetime import datetime

dotenv.load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

tools = [{"type":"function", "function":sc} for sc in schema]
tools

[{'type': 'function',
  'function': {'name': 'get_spaces',
   'description': 'View the Spaces available in a Workspace.',
   'parameters': {'type': 'object',
    'properties': {'team_id': {'type': 'string',
      'description': 'The ID of the team'},
     'archived': {'type': 'boolean',
      'description': 'A flag to decide whether to include archived spaces or not'}},
    'required': ['team_id', 'archived']}}},
 {'type': 'function',
  'function': {'name': 'create_space',
   'description': 'Add a new Space to a Workspace.',
   'parameters': {'type': 'object',
    'properties': {'team_id': {'type': 'string',
      'description': 'The ID of the team'},
     'name': {'type': 'string', 'description': 'The name of the new space'},
     'multiple_assignees': {'type': 'boolean',
      'description': 'Enable or disable multiple assignees for tasks within the space'},
     'features': {'type': 'object',
      'description': 'Enabled features within the space',
      'properties': {'due_dates':

In [9]:
from openai import OpenAI
client = OpenAI()

response = client.chat.completions.create(
  model="gpt-4-turbo-preview",
  messages=[
    {
      "role": "system",
      "content": """\
You are a technically example writer as well as function usage question setter. 
You can follow some given case and constrains to you and craft examples according 
to that.

You will be given the arguments and the type what kind of values are 
used in which functionality. When you make an example targeting a specific 
functionality, be sure to to subtly mention all those parameters inside the 
example description so that nothing relevant has to be assumed and so that 
the example can have a deterministic fixed answer, as it will be used in a test.

No parameter that is to be used in the target functionality of an example should 
be left to mention.\n\n\nMake multiple similar example scenarios long as well as 
not directly mentioning which endpoint to execute, so that the skill of the agent 
to utilize the given functionality might be tested.

To increase the complexity 
level, you might even put some false or trap type information in the context, make 
the contexts bigger so that the determination and parameter extraction takes more 
skill.

The samples should never have two possible solution, only one fixed solution, along 
with all its parameters fixed.

Every example must solved in only one function call, 
not more that one."""
    },
    {
      "role": "user",
      "content": """\
The software development team at FastTech recently concluded a project, and it’s been 
decided to archive the specific ClickUp space identified by \"sd456\" to ensure it doesn't 
clutter the current workspace. Before doing so, the team leader wants to retrieve the list 
of custom tags created in this space for reporting purposes."""
    },
  ],
  temperature=1,
  max_tokens=4096,
  top_p=1,
  tools=tools,
  tool_choice="auto"
)

response

ChatCompletion(id='chatcmpl-91atUwUHLHrLZf5dDbhDn8N2YJNLZ', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_4nMvml0MiXgEn18kuJghNumW', function=Function(arguments='{"space_id":"sd456"}', name='get_space'), type='function')]))], created=1710167176, model='gpt-4-0125-preview', object='chat.completion', system_fingerprint='fp_31c0f205d1', usage=CompletionUsage(completion_tokens=16, prompt_tokens=962, total_tokens=978))

In [5]:
import json

with open("clickup_space_benchmark.json") as f:
    bench_data = json.load(f)
    
bench_data[1]

{'prompt': 'Ella, the project coordinator, is setting up a new project space in ClickUp for the "Creative Minds" team with team ID "cm789". This space, named "Innovative Campaigns 2023", should allow multiple assignees for tasks, but keep due dates and time tracking disabled, as the initial planning phase doesn\'t require strict deadlines or time monitoring.',
 'solution': 'create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))'}

In [44]:
bench_data[1]["solution"]

'create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))'

In [42]:
for b in bench_data:
    print(b["solution"])

get_spaces(team_id="team123", archived=False)
create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))
get_space_tags(space_id="sd456")
create_space_tag(space_id="prj1122", tag=(name="Critical Bug", tag_fg="#FFFFFF", tag_bg="#8B0000"))
delete_space_tag(space_id="qa789", tag_name="MinorIssue", tag=(name="MinorIssue", tag_fg="#000000", tag_bg="#FFFFE0"))
update_space(space_id="bkend345", name="Backend_Development_Space", color="#000000", private=False, admin_can_manage=True, multiple_assignees=False, features=(due_dates=(enabled=True, start_date=True, remap_due_dates=True, remap_closed_due_date=False), time_tracking=(enabled=True)))
delete_space(space_id="gt678")
create_space(team_id="ds101", name="Design Studio 2023", multiple_assignees=True, features=(due_dates=(enabled=True, start_date=True, remap_due_dates=False, re

In [44]:
fcalling_llm = lambda fprompt : client.chat.completions.create(
  model="gpt-4-turbo-preview",
  messages=[
    {
      "role": "system",
      "content": """\
You are a technically example writer as well as function usage question setter. 
You can follow some given case and constrains to you and craft examples according 
to that.

You will be given the arguments and the type what kind of values are 
used in which functionality. When you make an example targeting a specific 
functionality, be sure to to subtly mention all those parameters inside the 
example description so that nothing relevant has to be assumed and so that 
the example can have a deterministic fixed answer, as it will be used in a test.

No parameter that is to be used in the target functionality of an example should 
be left to mention.\n\n\nMake multiple similar example scenarios long as well as 
not directly mentioning which endpoint to execute, so that the skill of the agent 
to utilize the given functionality might be tested.

To increase the complexity 
level, you might even put some false or trap type information in the context, make 
the contexts bigger so that the determination and parameter extraction takes more 
skill.

The samples should never have two possible solution, only one fixed solution, along 
with all its parameters fixed.

Every example must solved in only one function call, 
not more that one."""
    },
    {
      "role": "user",
      "content": fprompt
    },
  ],
  temperature=0,
  max_tokens=4096,
  top_p=1,
  tools=tools,
  tool_choice="auto"
)

response = fcalling_llm(bench_data[1]["prompt"])
# response
response.choices[0].message.tool_calls[0].function

Function(arguments='{"team_id":"cm789","name":"Innovative Campaigns 2023","multiple_assignees":true,"features":{"due_dates":{"enabled":false},"time_tracking":{"enabled":false}}}', name='create_space')

In [45]:
import re 
from pprint import pprint



class FuncStringParser():
    def __init__(self):
        self.record_keeper = {}
        
        
    def replace_with_dict(self, match):
        replacement = f"dict{len(self.record_keeper)}"
        self.record_keeper[replacement] = match.group()
        # dict_counter += 1
        return replacement

    def arg_tuple_str_to_args_dict(self, func_args):
        # print(func_args)
        args_dict = {}
        func_args = func_args[1:-1].split(",")
        for arg in func_args:
            k, v = arg[:arg.find("=")], arg[arg.find("=")+1:]
            k, v = k.strip(), v.strip()
            
            if v.startswith("(") and v.endswith(")"):
                v = self.func_to_args_dict(v)
                # print(v)
            
            args_dict[k] = v

        return args_dict

    def replace_true_false_dfs(self, nested_dict):
        for key, value in nested_dict.items():
            # If the value is a nested dictionary, recursively process it
            if isinstance(value, dict):
                self.replace_true_false_dfs(value)
            else:
                nested_dict[key] = eval(value)
                
                    
    def func_to_args_dict(self, func_args):
        pattern_uncompiled = r'\(([^()]+)\)'
        pattern = re.compile(pattern_uncompiled)
        # print(re.findall(pattern_uncompiled, func_args))
        while len(re.findall(pattern_uncompiled, func_args)) > 1:
            func_args = re.sub(pattern, self.replace_with_dict, func_args)
            
        # print(re.findall(pattern_uncompiled, func_args))
        if sum([m.start(0) > 2 for m in re.finditer(r'\(([^()]+)\)', func_args)]):
            func_args = re.sub(pattern, self.replace_with_dict, func_args)
            # print(func_args)
        func_args = re.sub(pattern, self.replace_with_dict, func_args)
        # pprint(func_args)
            
        dict_record_keeper = {k:self.arg_tuple_str_to_args_dict(v) for k, v in self.record_keeper.items()}
        # print(dict_record_keeper)
        
        # inv_dict_record_keeper = {v:k for k,v in dict_record_keeper.items()}
        for i1, (k1, v1) in enumerate(dict_record_keeper.items()):
            for i2, (k2, v2) in enumerate(dict_record_keeper.items()):
                if i2 == i1:
                    break
                if k2 in v1.values():
                    # print(k2, v1)
                    dict_record_keeper[k1][list(v1.keys())[list(v1.values()).index(k2)]] = v2
        args_dict = list(dict_record_keeper.values())[-1]
        self.replace_true_false_dfs(args_dict)
        self.record_keeper = {}
        return args_dict
    
    def func_string_to_func_dict(self, func_string):
        func_name = func_string[:func_string.find("(")]
        func_args = func_string[func_string.find("("):]
        func_args_dict = self.func_to_args_dict(func_args)
        return {"name": func_name, "arguments": func_args_dict}
    
    def llm_response_to_func_dict(self, llm_response):
        if llm_response is not None:
            func_name = llm_response.name
            func_args_dict = json.loads(llm_response.arguments)
        else:
            func_name = None
            func_args_dict = None 
        return {"name": func_name, "arguments": func_args_dict}
        

                    
        
    
# Sample string
func = 'create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))'

func_name = func[:func.find("(")]
func_args = func[func.find("("):]

fsp = FuncStringParser()
# fsp.func_to_args_dict(func_args)

fsp.func_string_to_func_dict(func)


{'name': 'create_space',
 'arguments': {'team_id': 'cm789',
  'name': 'Innovative Campaigns 2023',
  'multiple_assignees': True,
  'features': {'due_dates': {'enabled': False,
    'start_date': False,
    'remap_due_dates': False,
    'remap_closed_due_date': False},
   'time_tracking': {'enabled': False}}}}

In [43]:
response.choices[0].message.tool_calls[0].function


Function(arguments='{"space_id":"sd456"}', name='get_space')

In [33]:
{"space_id":"sd456"} == {"space_id":"sd456"}

True

In [47]:
from tqdm.auto import tqdm

llm_output = []

for bdata in tqdm(bench_data):
    response = fcalling_llm(bdata["prompt"])
    try:
        llm_output.append(response.choices[0].message.tool_calls[0].function)
    except TypeError:
        print(bdata["prompt"])
        print(response)
        llm_output.append(None)
        print("------")
llm_output

  0%|          | 0/30 [00:00<?, ?it/s]

The operations team at a software company intends to introduce a new naming convention for all their ClickUp spaces to better reflect their project categorization strategy. The first space to update is for the front-end development team, with ID "fe987", to be renamed as "FE Development 2023", with an additional requirement to make the space private for focused team collaboration.
ChatCompletion(id='chatcmpl-91bi9Hd3TlNjy7SsnAkq0xyNMsTAe', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='To address the operations team\'s requirement at the software company, the following action needs to be taken:\n\n- The space with ID "fe987" needs to be updated to reflect the new naming convention. The new name for this space will be "FE Development 2023".\n- Additionally, the space must be set to private to ensure focused team collaboration.\n\nGiven these requirements, the appropriate function to use is `functions.update_space`. The parameters for

[Function(arguments='{"team_id":"team123","archived":false}', name='get_spaces'),
 Function(arguments='{"team_id":"cm789","name":"Innovative Campaigns 2023","multiple_assignees":true,"features":{"due_dates":{"enabled":false},"time_tracking":{"enabled":false}}}', name='create_space'),
 Function(arguments='{"space_id":"sd456"}', name='get_space'),
 Function(arguments='{"space_id":"prj1122","tag":{"name":"Critical Bug","tag_fg":"#FFFFFF","tag_bg":"#8B0000"}}', name='create_space_tag'),
 Function(arguments='{"space_id":"qa789","tag_name":"MinorIssue","tag":{"name":"MinorIssue","tag_fg":"#000000","tag_bg":"#FFFFE0"}}', name='delete_space_tag'),
 Function(arguments='{"space_id":"bkend345","name":"Backend Development","color":"#000000","private":false,"admin_can_manage":true,"multiple_assignees":false,"features":{"due_dates":{"enabled":true,"start_date":true,"remap_due_dates":false,"remap_closed_due_date":false},"time_tracking":{"enabled":true}}}', name='update_space'),
 Function(arguments='{

In [48]:
from pprint import pprint

fsp = FuncStringParser()
result_list = []

# fsp.func_to_args_dict(func_args)
for bdata, llm_response in zip(bench_data, llm_output):
    bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
    llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
    pprint(bdata_solution_dict)
    pprint(llm_response_dict)
    print(llm_response_dict == bdata_solution_dict)
    result_list.append(llm_response_dict == bdata_solution_dict)
    print("-----")

{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
True
-----
{'arguments': {'features': {'due_dates': {'enabled': False,
                                          'remap_closed_due_date': False,
                                          'remap_due_dates': False,
                                          'start_date': False},
                            'time_tracking': {'enabled': False}},
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
{'arguments': {'features': {'due_dates': {'enabled': False},
                            'time_tracking': {'enabled': False}},
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
False
-----
{'arguments': {'space_id': 'sd456'}, 'name': 'ge

In [49]:
sum(result_list)/len(result_list)

0.4

In [50]:
from pprint import pprint
from tqdm.auto import tqdm

llm_output = []

for bdata in tqdm(bench_data):
    response = fcalling_llm(bdata["prompt"])
    try:
        llm_output.append(response.choices[0].message.tool_calls[0].function)
    except TypeError:
        print(bdata["prompt"])
        print(response)
        print("-------")
        llm_output.append(None)

  0%|          | 0/30 [00:00<?, ?it/s]

In preparing for an upcoming presentation, Derek, a team leader, realizes that a specific tag used in their ClickUp space "prj1122" is incorrectly labeled. The tag intended for critical bugs, currently named "CriticalIssue", should be more appropriately labeled as "Critical Bug" with a foreground color of white "#FFFFFF" and a background color of dark red "#8B0000" for better visibility.
ChatCompletion(id='chatcmpl-91bnIT6OpnLJ9SseqGsyxQWjNqEDx', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{\n  "space_id": "prj1122",\n  "tag_name": "CriticalIssue",\n  "tag": {\n    "name": "Critical Bug",\n    "tag_fg": "#FFFFFF",\n    "tag_bg": "#8B0000"\n  }\n}\n```', role='assistant', function_call=None, tool_calls=None))], created=1710170636, model='gpt-4-0125-preview', object='chat.completion', system_fingerprint='fp_31c0f205d1', usage=CompletionUsage(completion_tokens=61, prompt_tokens=977, total_tokens=1038))
-------
A project man

In [51]:
from pprint import pprint

fsp = FuncStringParser()
result_list = []

# fsp.func_to_args_dict(func_args)
for bdata, llm_response in zip(bench_data, llm_output):
    bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
    llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
    pprint(bdata_solution_dict)
    pprint(llm_response_dict)
    print(llm_response_dict == bdata_solution_dict)
    result_list.append(llm_response_dict == bdata_solution_dict)
    print("-----")
    
sum(result_list)/len(result_list)

{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
True
-----
{'arguments': {'features': {'due_dates': {'enabled': False,
                                          'remap_closed_due_date': False,
                                          'remap_due_dates': False,
                                          'start_date': False},
                            'time_tracking': {'enabled': False}},
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
{'arguments': {'features': {'due_dates': {'enabled': False},
                            'time_tracking': {'enabled': False}},
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
False
-----
{'arguments': {'space_id': 'sd456'}, 'name': 'ge

0.3333333333333333

In [52]:
from pprint import pprint
from tqdm.auto import tqdm

llm_output = []

for bdata in tqdm(bench_data):
    response = fcalling_llm(bdata["prompt"])
    try:
        llm_output.append(response.choices[0].message.tool_calls[0].function)
    except TypeError:
        print(bdata["prompt"])
        print(response)
        print("-------")
        llm_output.append(None)

  0%|          | 0/30 [00:00<?, ?it/s]

The operations team at a software company intends to introduce a new naming convention for all their ClickUp spaces to better reflect their project categorization strategy. The first space to update is for the front-end development team, with ID "fe987", to be renamed as "FE Development 2023", with an additional requirement to make the space private for focused team collaboration.
ChatCompletion(id='chatcmpl-91cKe6EDZt5n56sjze8yHht4FLuTL', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='To update the space for the front-end development team according to the new naming convention and privacy requirement, the following parameters need to be set:\n\n- **space_id**: "fe987" (The ID of the space to be updated)\n- **name**: "FE Development 2023" (The new name for the space)\n- **private**: true (To make the space private for focused team collaboration)\n- **color**: This parameter is required but not specified in the context, so a default 

In [53]:
from pprint import pprint

fsp = FuncStringParser()
result_list = []

# fsp.func_to_args_dict(func_args)
for bdata, llm_response in zip(bench_data, llm_output):
    bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
    llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
    pprint(bdata_solution_dict)
    pprint(llm_response_dict)
    print(llm_response_dict == bdata_solution_dict)
    result_list.append(llm_response_dict == bdata_solution_dict)
    print("-----")
    
sum(result_list)/len(result_list)

{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
True
-----
{'arguments': {'features': {'due_dates': {'enabled': False,
                                          'remap_closed_due_date': False,
                                          'remap_due_dates': False,
                                          'start_date': False},
                            'time_tracking': {'enabled': False}},
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
{'arguments': {'features': {'due_dates': {'enabled': False},
                            'time_tracking': {'enabled': False}},
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
False
-----
{'arguments': {'space_id': 'sd456'}, 'name': 'ge

0.4