In [61]:
import json

with open("clickup_space_schema.json") as f:
    schema = json.load(f)
    
schema

[{'name': 'get_spaces',
  'description': 'View the Spaces available in a Workspace.',
  'parameters': {'type': 'object',
   'properties': {'team_id': {'type': 'string',
     'description': 'The ID of the team'},
    'archived': {'type': 'boolean',
     'description': 'A flag to decide whether to include archived spaces or not'}},
   'required': ['team_id', 'archived']}},
 {'name': 'create_space',
  'description': 'Add a new Space to a Workspace.',
  'parameters': {'type': 'object',
   'properties': {'team_id': {'type': 'string',
     'description': 'The ID of the team'},
    'name': {'type': 'string', 'description': 'The name of the new space'},
    'multiple_assignees': {'type': 'boolean',
     'description': 'Enable or disable multiple assignees for tasks within the space'},
    'features': {'type': 'object',
     'description': 'Enabled features within the space',
     'properties': {'due_dates': {'type': 'object',
       'description': 'Due dates feature settings',
       'properti

In [62]:
def get_key_trees(schema_props):
    # print(schema_props)
    key_list = []
    for k, v in schema_props.items():
        if v["type"] != "object":
            key_list.append(f'{k}:{v["type"]}')
        else:
            branch = get_key_trees(v["properties"])
            key_list.append(f'{k}:{branch}')
    return f'({", ".join(key_list)})'
            
    
for sc in schema:
    print(f'{sc["name"]}{get_key_trees(sc["parameters"]["properties"])}')
    # print()


get_spaces(team_id:string, archived:boolean)
create_space(team_id:string, name:string, multiple_assignees:boolean, features:(due_dates:(enabled:boolean, start_date:boolean, remap_due_dates:boolean, remap_closed_due_date:boolean), time_tracking:(enabled:boolean)))
get_space(space_id:string)
update_space(space_id:string, name:string, color:string, private:boolean, admin_can_manage:boolean, multiple_assignees:boolean, features:(due_dates:(enabled:boolean, start_date:boolean, remap_due_dates:boolean, remap_closed_due_date:boolean), time_tracking:(enabled:boolean)))
delete_space(space_id:string)
get_space_tags(space_id:string)
create_space_tag(space_id:string, tag:(name:string, tag_fg:string, tag_bg:string))
delete_space_tag(space_id:string, tag_name:string, tag:(name:string, tag_fg:string, tag_bg:string))


In [63]:
import os
import json
import dotenv
from textwrap import dedent
from datetime import datetime

dotenv.load_dotenv()

tools = [{"name":sc["name"],  "description":sc["description"], "input_schema": sc["parameters"]} for sc in schema]
tools

[{'name': 'get_spaces',
  'description': 'View the Spaces available in a Workspace.',
  'input_schema': {'type': 'object',
   'properties': {'team_id': {'type': 'string',
     'description': 'The ID of the team'},
    'archived': {'type': 'boolean',
     'description': 'A flag to decide whether to include archived spaces or not'}},
   'required': ['team_id', 'archived']}},
 {'name': 'create_space',
  'description': 'Add a new Space to a Workspace.',
  'input_schema': {'type': 'object',
   'properties': {'team_id': {'type': 'string',
     'description': 'The ID of the team'},
    'name': {'type': 'string', 'description': 'The name of the new space'},
    'multiple_assignees': {'type': 'boolean',
     'description': 'Enable or disable multiple assignees for tasks within the space'},
    'features': {'type': 'object',
     'description': 'Enabled features within the space',
     'properties': {'due_dates': {'type': 'object',
       'description': 'Due dates feature settings',
       'prop

In [64]:
import anthropic
client = anthropic.Anthropic()

response = client.beta.tools.messages.create(
  model="claude-3-sonnet-20240229",
  messages=[

    {
      "role": "user",
      "content": """\
The software development team at FastTech recently concluded a project, and it’s been 
decided to archive the specific ClickUp space identified by \"sd456\" to ensure it doesn't 
clutter the current workspace. Before doing so, the team leader wants to retrieve the list 
of custom tags created in this space for reporting purposes."""
    },
  ],

  temperature=1,
  max_tokens=4096,
  top_p=1,
  tools=tools,
)

response

ToolsBetaMessage(id='msg_019JTovUyeNrdDTjBS7HeNiL', content=[TextBlock(text='To get the list of custom tags for the ClickUp space with ID "sd456", we can use the `get_space_tags` tool:', type='text'), ToolUseBlock(id='toolu_013E8uZcJHFykk7nZfb6sdeM', input={'space_id': 'sd456'}, name='get_space_tags', type='tool_use')], model='claude-3-sonnet-20240229', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=Usage(input_tokens=1551, output_tokens=91))

In [65]:
import json

with open("clickup_space_benchmark.json") as f:
    bench_data = json.load(f)
    
bench_data[1]

{'prompt': 'Ella, the project coordinator, is setting up a new project space in ClickUp for the "Creative Minds" team with team ID "cm789". This space, named "Innovative Campaigns 2023", should allow multiple assignees for tasks, but keep due dates and time tracking disabled, as the initial planning phase doesn\'t require strict deadlines or time monitoring.',
 'solution': 'create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))'}

In [66]:
bench_data[1]["solution"]

'create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))'

In [67]:
for b in bench_data:
    print(b["solution"])

get_spaces(team_id="team123", archived=False)
create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))
get_space_tags(space_id="sd456")
create_space_tag(space_id="prj1122", tag=(name="Critical Bug", tag_fg="#FFFFFF", tag_bg="#8B0000"))
delete_space_tag(space_id="qa789", tag_name="MinorIssue", tag=(name="MinorIssue", tag_fg="#000000", tag_bg="#FFFFE0"))
update_space(space_id="bkend345", name="Backend_Development_Space", color="#000000", private=False, admin_can_manage=True, multiple_assignees=False, features=(due_dates=(enabled=True, start_date=True, remap_due_dates=True, remap_closed_due_date=False), time_tracking=(enabled=True)))
delete_space(space_id="gt678")
create_space(team_id="ds101", name="Design Studio 2023", multiple_assignees=True, features=(due_dates=(enabled=True, start_date=True, remap_due_dates=False, re

In [68]:
import anthropic
client = anthropic.Anthropic()

fcalling_llm = lambda fprompt : client.beta.tools.messages.create(
  model="claude-3-sonnet-20240229",
  messages=[
    {
      "role": "user",
      "content": fprompt
    },
  ],
  temperature=0,
  max_tokens=4096,
  top_p=1,
  tools=tools,
)

response = fcalling_llm(bench_data[1]["prompt"])
# response
response

ToolsBetaMessage(id='msg_01A67Qo5ojQmHBKbK3q9NqgV', content=[TextBlock(text='Okay, let\'s set up the new "Innovative Campaigns 2023" space for the "Creative Minds" team with the specified requirements:', type='text'), ToolUseBlock(id='toolu_011YkqrWgX8t2DZ7kTzqe9ZD', input={'team_id': 'cm789', 'name': 'Innovative Campaigns 2023', 'multiple_assignees': True, 'features': {'due_dates': {'enabled': False, 'remap_closed_due_date': False, 'remap_due_dates': False, 'start_date': False}, 'time_tracking': {'enabled': False}}}, name='create_space', type='tool_use')], model='claude-3-sonnet-20240229', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=Usage(input_tokens=1557, output_tokens=223))

In [69]:
import re 
from pprint import pprint

class FuncStringParser():
    def __init__(self):
        self.record_keeper = {}
        
    def replace_with_dict(self, match):
        replacement = f"dict{len(self.record_keeper)}"
        self.record_keeper[replacement] = match.group()
        # dict_counter += 1
        return replacement

    def arg_tuple_str_to_args_dict(self, func_args):
        # print(func_args)
        args_dict = {}
        func_args = func_args[1:-1].split(",")
        for arg in func_args:
            k, v = arg[:arg.find("=")], arg[arg.find("=")+1:]
            k, v = k.strip(), v.strip()
            
            if v.startswith("(") and v.endswith(")"):
                v = self.func_to_args_dict(v)
                # print(v)
            
            args_dict[k] = v

        return args_dict

    def replace_true_false_dfs(self, nested_dict):
        for key, value in nested_dict.items():
            # If the value is a nested dictionary, recursively process it
            if isinstance(value, dict):
                self.replace_true_false_dfs(value)
            else:
                nested_dict[key] = eval(value)
                
                    
    def func_to_args_dict(self, func_args):
        pattern_uncompiled = r'\(([^()]+)\)'
        pattern = re.compile(pattern_uncompiled)
        # print(re.findall(pattern_uncompiled, func_args))
        while len(re.findall(pattern_uncompiled, func_args)) > 1:
            func_args = re.sub(pattern, self.replace_with_dict, func_args)
            
        # print(re.findall(pattern_uncompiled, func_args))
        if sum([m.start(0) > 2 for m in re.finditer(r'\(([^()]+)\)', func_args)]):
            func_args = re.sub(pattern, self.replace_with_dict, func_args)
            # print(func_args)
        func_args = re.sub(pattern, self.replace_with_dict, func_args)
        # pprint(func_args)
            
        dict_record_keeper = {k:self.arg_tuple_str_to_args_dict(v) for k, v in self.record_keeper.items()}
        # print(dict_record_keeper)
        
        # inv_dict_record_keeper = {v:k for k,v in dict_record_keeper.items()}
        for i1, (k1, v1) in enumerate(dict_record_keeper.items()):
            for i2, (k2, v2) in enumerate(dict_record_keeper.items()):
                if i2 == i1:
                    break
                if k2 in v1.values():
                    # print(k2, v1)
                    dict_record_keeper[k1][list(v1.keys())[list(v1.values()).index(k2)]] = v2
        args_dict = list(dict_record_keeper.values())[-1]
        self.replace_true_false_dfs(args_dict)
        self.record_keeper = {}
        return args_dict
    
    def func_string_to_func_dict(self, func_string):
        func_name = func_string[:func_string.find("(")]
        func_args = func_string[func_string.find("("):]
        func_args_dict = self.func_to_args_dict(func_args)
        return {"name": func_name, "arguments": func_args_dict}
    
    def llm_response_to_func_dict(self, llm_response):
        for message in llm_response.content:
            if message.type == "tool_use":
                return {
                    "name": message.name,
                    "arguments": message.input,
                }
        return {"name": None, "arguments": None}
        

                    
        
    
# Sample string
func = 'create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))'

func_name = func[:func.find("(")]
func_args = func[func.find("("):]

fsp = FuncStringParser()
# fsp.func_to_args_dict(func_args)

fsp.func_string_to_func_dict(func)


{'name': 'create_space',
 'arguments': {'team_id': 'cm789',
  'name': 'Innovative Campaigns 2023',
  'multiple_assignees': True,
  'features': {'due_dates': {'enabled': False,
    'start_date': False,
    'remap_due_dates': False,
    'remap_closed_due_date': False},
   'time_tracking': {'enabled': False}}}}

In [70]:
{"space_id":"sd456"} == {"space_id":"sd456"}

True

In [71]:
from tqdm.auto import tqdm

llm_output = []

for bdata in tqdm(bench_data):
    response = fcalling_llm(bdata["prompt"])
    try:
        llm_output.append(response)
    except TypeError:
        print(bdata["prompt"])
        print(response)
        llm_output.append(None)
        print("------")
llm_output

100%|██████████| 50/50 [02:50<00:00,  3.41s/it]


[ToolsBetaMessage(id='msg_01EVP6KCYbfzHsmfGcXArMdh', content=[TextBlock(text='To get the list of spaces under the "Innovative Solutions" team with the team ID "team123", we can use the `get_spaces` tool:', type='text'), ToolUseBlock(id='toolu_01GDQZVErH9AVcA6jHCMk3iR', input={'team_id': 'team123', 'archived': False}, name='get_spaces', type='tool_use')], model='claude-3-sonnet-20240229', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=Usage(input_tokens=1551, output_tokens=107)),
 ToolsBetaMessage(id='msg_01WPw3U4dCPxCdeRtFvKF5zM', content=[TextBlock(text='Okay, let\'s set up the new "Innovative Campaigns 2023" space for the "Creative Minds" team with the specified requirements:', type='text'), ToolUseBlock(id='toolu_018TLUteJAXATEWs9fGqrsSu', input={'team_id': 'cm789', 'name': 'Innovative Campaigns 2023', 'multiple_assignees': True, 'features': {'due_dates': {'enabled': False, 'remap_closed_due_date': False, 'remap_due_dates': False, 'start_date': F

In [72]:
from pprint import pprint

fsp = FuncStringParser()
result_list = []


def convert_str_to_bool(nested_dict):
    for key, value in nested_dict.items():
        # If the value is a nested dictionary, recursively process it
        if isinstance(value, dict):
            convert_str_to_bool(value)
        else:
            if value == "True":
                nested_dict[key] = True
            elif value == "False":
                nested_dict[key] = False
    return nested_dict


# fsp.func_to_args_dict(func_args)
for bdata, llm_response in zip(bench_data, llm_output):
    bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
    llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    llm_response_dict = convert_str_to_bool(llm_response_dict)

    pprint(bdata_solution_dict)
    pprint(llm_response_dict)
    print(llm_response_dict == bdata_solution_dict)
    result_list.append(llm_response_dict == bdata_solution_dict)
    print("-----")

{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
True
-----
{'arguments': {'features': {'due_dates': {'enabled': False,
                                          'remap_closed_due_date': False,
                                          'remap_due_dates': False,
                                          'start_date': False},
                            'time_tracking': {'enabled': False}},
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
{'arguments': {'features': {'due_dates': {'enabled': False,
                                          'remap_closed_due_date': False,
                                          'remap_due_dates': False,
                                          'start_date': False},
                            'time_tracking': {'enabled': False}},
           

In [73]:
run_acc_1 = sum(result_list)/len(result_list)
run_acc_1

0.6

In [74]:
# from pprint import pprint
# from tqdm.auto import tqdm

# llm_output = []

# for bdata in tqdm(bench_data):
#     response = fcalling_llm(bdata["prompt"])
#     try:
#         llm_output.append(response)
#     except TypeError:
#         print(bdata["prompt"])
#         print(response)
#         print("-------")
#         llm_output.append(None)

In [75]:
# from pprint import pprint

# fsp = FuncStringParser()
# result_list = []

# # fsp.func_to_args_dict(func_args)
# for bdata, llm_response in zip(bench_data, llm_output):
#     bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
#     llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
#     pprint(bdata_solution_dict)
#     pprint(llm_response_dict)
#     print(llm_response_dict == bdata_solution_dict)
#     result_list.append(llm_response_dict == bdata_solution_dict)
#     print("-----")
    
# run_acc_2 = sum(result_list)/len(result_list)
# run_acc_2

In [76]:
# from pprint import pprint
# from tqdm.auto import tqdm

# llm_output = []

# for bdata in tqdm(bench_data):
#     response = fcalling_llm(bdata["prompt"])
#     try:
#         llm_output.append(response)
#     except TypeError:
#         print(bdata["prompt"])
#         print(response)
#         print("-------")
#         llm_output.append(None)

In [77]:
# from pprint import pprint

# fsp = FuncStringParser()
# result_list = []

# # fsp.func_to_args_dict(func_args)
# for bdata, llm_response in zip(bench_data, llm_output):
#     bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
#     llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
#     pprint(bdata_solution_dict)
#     pprint(llm_response_dict)
#     print(llm_response_dict == bdata_solution_dict)
#     result_list.append(llm_response_dict == bdata_solution_dict)
#     print("-----")
    
# run_acc_3 = sum(result_list)/len(result_list)
# run_acc_3

In [78]:
print("ClickUp Benchmark - No System Prompt : Accuracy")
print("=========================================")
print(f"Run 1: {run_acc_1}")
# print(f"Run 2: {run_acc_2}")
# print(f"Run 3: {run_acc_3}")


ClickUp Benchmark - No System Prompt : Accuracy
Run 1: 0.6
