In [19]:
import json

with open("clickup_space_flattened_schema.json") as f:
    flattened_schema = json.load(f)
    
flattened_schema

[{'description': 'View the Spaces available in a Workspace.',
  'name': 'get_spaces',
  'parameters': {'properties': {'archived': {'description': 'A flag to decide whether to include archived spaces or not__',
     'type': 'boolean'},
    'team_id': {'description': 'The ID of the team__', 'type': 'string'}},
   'required': ['team_id', 'archived'],
   'type': 'object'}},
 {'description': 'Add a new Space to a Workspace.',
  'name': 'create_space',
  'parameters': {'properties': {'features__due_dates__enabled': {'description': 'enabled__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due_dates__remap_closed_due_date': {'description': 'remap_closed_due_date__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due_dates__remap_due_dates': {'description': 'remap_due_dates__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due

In [20]:
for sc in flattened_schema:
    print(sc["name"], "-", sc["description"])

get_spaces - View the Spaces available in a Workspace.
create_space - Add a new Space to a Workspace.
get_space - View the details of a specific Space in a Workspace.
update_space - Rename, set the Space color, and enable ClickApps for a Space.
delete_space - Delete a Space from your Workspace.
get_space_tags - View the task Tags available in a Space.
create_space_tag - Add a new task Tag to a Space.
delete_space_tag - Delete a task Tag from a Space.


In [21]:
schema_func_decription_dict = {
    "get_spaces": "Retrives information of all the spaces available in user's Clickup Workspace.",
    "create_space": "Creates a new ClickUp space",
    "get_space": "Retrives information of a specific Clickup space",
    "update_space": "Modifies name, settings the Space color, and assignee management Space.",
    "delete_space": "Delete an existing space from user's ClickUp Workspace",
    "get_space_tags": "Retrives all the Tags assigned on all the tasks in a Space.",
    "create_space_tag": "Assigns a customized Tag in a ClickUp Space.",
    "delete_space_tag": "Deletes a specific tag previously assigned in a space.",
}

optimized_schema = []
for sc in flattened_schema:
    temp_dict = sc.copy()
    temp_dict["description"] = schema_func_decription_dict[temp_dict["name"]]
    optimized_schema.append(temp_dict)

with open('clickup_space_flattened_optimized2_schema.json', 'w') as f:
    json.dump(optimized_schema, f, indent=4, sort_keys=True)
    
optimized_schema

[{'description': "Retrives information of all the spaces available in user's Clickup Workspace.",
  'name': 'get_spaces',
  'parameters': {'properties': {'archived': {'description': 'A flag to decide whether to include archived spaces or not__',
     'type': 'boolean'},
    'team_id': {'description': 'The ID of the team__', 'type': 'string'}},
   'required': ['team_id', 'archived'],
   'type': 'object'}},
 {'description': 'Creates a new ClickUp space',
  'name': 'create_space',
  'parameters': {'properties': {'features__due_dates__enabled': {'description': 'enabled__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due_dates__remap_closed_due_date': {'description': 'remap_closed_due_date__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due_dates__remap_due_dates': {'description': 'remap_due_dates__Due dates feature settings__Enabled features within the space__',
     'type'

In [22]:
from pprint import pprint

def flatten_dict(d, parent_key='', sep='__'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

# Your original dictionary
original_dict = {
    'features': {
        'due_dates': {
            'enabled': False,
            'remap_closed_due_date': False,
            'remap_due_dates': False,
            'start_date': False
        },
        'time_tracking': {
            'enabled': False
        }
    },
    'multiple_assignees': True,
    'name': 'Innovative Campaigns 2023',
    'team_id': 'cm789'
}

# Flatten the dictionary
flattened_dict = flatten_dict(original_dict)

pprint(flattened_dict)

{'features__due_dates__enabled': False,
 'features__due_dates__remap_closed_due_date': False,
 'features__due_dates__remap_due_dates': False,
 'features__due_dates__start_date': False,
 'features__time_tracking__enabled': False,
 'multiple_assignees': True,
 'name': 'Innovative Campaigns 2023',
 'team_id': 'cm789'}


In [23]:
import json

with open("clickup_space_benchmark.json") as f:
    bench_data = json.load(f)
    
bench_data[1]

{'prompt': 'Ella, the project coordinator, is setting up a new project space in ClickUp for the "Creative Minds" team with team ID "cm789". This space, named "Innovative Campaigns 2023", should allow multiple assignees for tasks, but keep due dates and time tracking disabled, as the initial planning phase doesn\'t require strict deadlines or time monitoring.',
 'solution': 'create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))'}

In [24]:
import os
import json
import dotenv
from textwrap import dedent
from datetime import datetime

dotenv.load_dotenv()

tools = [{"name":sc["name"],  "description":sc["description"], "input_schema": sc["parameters"]} for sc in optimized_schema]
tools

[{'name': 'get_spaces',
  'description': 'View the Spaces available in a Workspace.',
  'input_schema': {'properties': {'archived': {'description': 'A flag to decide whether to include archived spaces or not__',
     'type': 'boolean'},
    'team_id': {'description': 'The ID of the team__', 'type': 'string'}},
   'required': ['team_id', 'archived'],
   'type': 'object'}},
 {'name': 'create_space',
  'description': 'Add a new Space to a Workspace.',
  'input_schema': {'properties': {'features__due_dates__enabled': {'description': 'enabled__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due_dates__remap_closed_due_date': {'description': 'remap_closed_due_date__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due_dates__remap_due_dates': {'description': 'remap_due_dates__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features_

In [25]:
import anthropic
client = anthropic.Anthropic()

fcalling_llm = lambda fprompt : client.beta.tools.messages.create(
  model="claude-3-opus-20240229",
  system = f"""
You are an agent who is responsible for managing various employee management platform, 
one of which is CliuckUp. 

You are given a number of tools as functions, you must use one of those tools and fillup 
all the parameters of those tools ,whose answers you will get from the given situation.

When you are presented with a technical situation, that a person of a team is facing, 
you must give the soulution utilizing your functionalities. 

First analyze the given situation to fully anderstand what is the intention of the user,
what they need and exactly which tool will fill up that necessity.

Then look into the parameters and extract all the relevant informations to fillup the 
parameter with right values.
""",
  messages=[
    {
      "role": "user",
      "content": fprompt
    },
  ],
  temperature=0,
  max_tokens=4096,
  top_p=1,
  tools=tools,
)

response = fcalling_llm(bench_data[1]["prompt"])
# response
response

RateLimitError: Error code: 429 - {'type': 'error', 'error': {'type': 'rate_limit_error', 'message': 'Number of request tokens has exceeded your per-minute rate limit (https://docs.anthropic.com/claude/reference/rate-limits); see the response headers for current usage. Please reduce the prompt length or the maximum tokens requested, or try again later. You may also contact sales at https://www.anthropic.com/contact-sales to discuss your options for a rate limit increase.'}}

In [None]:
import re 
from pprint import pprint



class FuncStringParser():
    def __init__(self):
        self.record_keeper = {}
        
        
    def replace_with_dict(self, match):
        replacement = f"dict{len(self.record_keeper)}"
        self.record_keeper[replacement] = match.group()
        # dict_counter += 1
        return replacement

    def arg_tuple_str_to_args_dict(self, func_args):
        # print(func_args)
        args_dict = {}
        func_args = func_args[1:-1].split(",")
        for arg in func_args:
            k, v = arg[:arg.find("=")], arg[arg.find("=")+1:]
            k, v = k.strip(), v.strip()
            
            if v.startswith("(") and v.endswith(")"):
                v = self.func_to_args_dict(v)
                # print(v)
            
            args_dict[k] = v

        return args_dict

    def replace_true_false_dfs(self, nested_dict):
        for key, value in nested_dict.items():
            # If the value is a nested dictionary, recursively process it
            if isinstance(value, dict):
                self.replace_true_false_dfs(value)
            else:
                nested_dict[key] = eval(value)
                
                    
    def func_to_args_dict(self, func_args):
        pattern_uncompiled = r'\(([^()]+)\)'
        pattern = re.compile(pattern_uncompiled)
        # print(re.findall(pattern_uncompiled, func_args))
        while len(re.findall(pattern_uncompiled, func_args)) > 1:
            func_args = re.sub(pattern, self.replace_with_dict, func_args)
            
        # print(re.findall(pattern_uncompiled, func_args))
        if sum([m.start(0) > 2 for m in re.finditer(r'\(([^()]+)\)', func_args)]):
            func_args = re.sub(pattern, self.replace_with_dict, func_args)
            # print(func_args)
        func_args = re.sub(pattern, self.replace_with_dict, func_args)
        # pprint(func_args)
            
        dict_record_keeper = {k:self.arg_tuple_str_to_args_dict(v) for k, v in self.record_keeper.items()}
        # print(dict_record_keeper)
        
        # inv_dict_record_keeper = {v:k for k,v in dict_record_keeper.items()}
        for i1, (k1, v1) in enumerate(dict_record_keeper.items()):
            for i2, (k2, v2) in enumerate(dict_record_keeper.items()):
                if i2 == i1:
                    break
                if k2 in v1.values():
                    # print(k2, v1)
                    dict_record_keeper[k1][list(v1.keys())[list(v1.values()).index(k2)]] = v2
        args_dict = list(dict_record_keeper.values())[-1]
        self.replace_true_false_dfs(args_dict)
        self.record_keeper = {}
        return args_dict
    
    def func_string_to_func_dict(self, func_string):
        func_name = func_string[:func_string.find("(")]
        func_args = func_string[func_string.find("("):]
        func_args_dict = self.func_to_args_dict(func_args)
        return {"name": func_name, "arguments": func_args_dict}
    
    def llm_response_to_func_dict(self, llm_response):
        for message in llm_response.content:
            if message.type == "tool_use":
                return {
                    "name": message.name,
                    "arguments": message.input,
                }
        return {"name": None, "arguments": None}
        

                    
        
    
# Sample string
func = 'create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))'

func_name = func[:func.find("(")]
func_args = func[func.find("("):]

fsp = FuncStringParser()
# fsp.func_to_args_dict(func_args)

fsp.func_string_to_func_dict(func)


{'name': 'create_space',
 'arguments': {'team_id': 'cm789',
  'name': 'Innovative Campaigns 2023',
  'multiple_assignees': True,
  'features': {'due_dates': {'enabled': False,
    'start_date': False,
    'remap_due_dates': False,
    'remap_closed_due_date': False},
   'time_tracking': {'enabled': False}}}}

In [None]:
from tqdm import tqdm
# from tqdm.auto import tqdm


llm_output = []

for bdata in tqdm(bench_data):
    response = fcalling_llm(bdata["prompt"])
    try:
        llm_output.append(response)
    except TypeError:
        print(bdata["prompt"])
        print(response)
        llm_output.append(None)
        print("------")
llm_output

  4%|▍         | 2/50 [00:33<13:16, 16.59s/it]


RateLimitError: Error code: 429 - {'type': 'error', 'error': {'type': 'rate_limit_error', 'message': 'Number of request tokens has exceeded your per-minute rate limit (https://docs.anthropic.com/claude/reference/rate-limits); see the response headers for current usage. Please reduce the prompt length or the maximum tokens requested, or try again later. You may also contact sales at https://www.anthropic.com/contact-sales to discuss your options for a rate limit increase.'}}

In [None]:
from pprint import pprint

fsp = FuncStringParser()
result_list = []

# fsp.func_to_args_dict(func_args)
for bdata, llm_response in zip(bench_data, llm_output):
    bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
    # bdata_solution_dict["name"] = schema_func_name_dict[bdata_solution_dict["name"]]
    bdata_solution_dict["arguments"] = flatten_dict(bdata_solution_dict["arguments"])
    
    llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
    pprint(bdata_solution_dict)
    pprint(llm_response_dict)
    print(llm_response_dict == bdata_solution_dict)
    result_list.append(llm_response_dict == bdata_solution_dict)
    print("-----")
    
run_acc_1 = sum(result_list)/len(result_list)
run_acc_1

{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
True
-----
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative

0.66

In [None]:
from tqdm import tqdm
# from tqdm.auto import tqdm


llm_output = []

for bdata in tqdm(bench_data):
    response = fcalling_llm(bdata["prompt"])
    try:
        llm_output.append(response)
    except TypeError:
        print(bdata["prompt"])
        print(response)
        llm_output.append(None)
        print("------")
llm_output

100%|██████████| 50/50 [03:55<00:00,  4.71s/it]


[Function(arguments='{"archived":false,"team_id":"team123"}', name='get_spaces'),
 Function(arguments='{"features__due_dates__enabled":false,"features__due_dates__remap_closed_due_date":false,"features__due_dates__remap_due_dates":false,"features__due_dates__start_date":false,"features__time_tracking__enabled":false,"multiple_assignees":true,"name":"Innovative Campaigns 2023","team_id":"cm789"}', name='create_space'),
 Function(arguments='{"space_id":"sd456"}', name='get_space_tags'),
 Function(arguments='{"space_id":"prj1122"}', name='get_space_tags'),
 Function(arguments='{"space_id":"qa789","tag__name":"MinorIssue","tag__tag_bg":"#FFFFE0","tag__tag_fg":"#000000","tag_name":"MinorIssue"}', name='delete_space_tag'),
 Function(arguments='{"admin_can_manage":true,"color":"#000000","features__due_dates__enabled":true,"features__due_dates__remap_closed_due_date":true,"features__due_dates__remap_due_dates":true,"features__due_dates__start_date":true,"features__time_tracking__enabled":true,

In [None]:
from pprint import pprint

fsp = FuncStringParser()
result_list = []

# fsp.func_to_args_dict(func_args)
for bdata, llm_response in zip(bench_data, llm_output):
    bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
    # bdata_solution_dict["name"] = schema_func_name_dict[bdata_solution_dict["name"]]
    bdata_solution_dict["arguments"] = flatten_dict(bdata_solution_dict["arguments"])
    
    llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
    pprint(bdata_solution_dict)
    pprint(llm_response_dict)
    print(llm_response_dict == bdata_solution_dict)
    result_list.append(llm_response_dict == bdata_solution_dict)
    print("-----")
    
run_acc_2 = sum(result_list)/len(result_list)
run_acc_2

{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
True
-----
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative

0.68

In [None]:
from tqdm import tqdm
# from tqdm.auto import tqdm


llm_output = []

for bdata in tqdm(bench_data):
    response = fcalling_llm(bdata["prompt"])
    try:
        llm_output.append(response)
    except TypeError:
        print(bdata["prompt"])
        print(response)
        llm_output.append(None)
        print("------")
llm_output

100%|██████████| 50/50 [03:41<00:00,  4.43s/it]


[Function(arguments='{"archived":false,"team_id":"team123"}', name='get_spaces'),
 Function(arguments='{"features__due_dates__enabled":false,"features__due_dates__remap_closed_due_date":false,"features__due_dates__remap_due_dates":false,"features__due_dates__start_date":false,"features__time_tracking__enabled":false,"multiple_assignees":true,"name":"Innovative Campaigns 2023","team_id":"cm789"}', name='create_space'),
 Function(arguments='{"space_id":"sd456"}', name='get_space_tags'),
 Function(arguments='{"space_id":"prj1122"}', name='get_space_tags'),
 Function(arguments='{"space_id":"qa789","tag__name":"MinorIssue","tag__tag_bg":"#FFFFE0","tag__tag_fg":"#000000","tag_name":"MinorIssue"}', name='delete_space_tag'),
 Function(arguments='{"admin_can_manage":true,"color":"#000000","features__due_dates__enabled":true,"features__due_dates__remap_closed_due_date":false,"features__due_dates__remap_due_dates":true,"features__due_dates__start_date":true,"features__time_tracking__enabled":true

In [None]:
from pprint import pprint

fsp = FuncStringParser()
result_list = []

# fsp.func_to_args_dict(func_args)
for bdata, llm_response in zip(bench_data, llm_output):
    bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
    bdata_solution_dict["arguments"] = flatten_dict(bdata_solution_dict["arguments"])
    
    llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
    pprint(bdata_solution_dict)
    pprint(llm_response_dict)
    print(llm_response_dict == bdata_solution_dict)
    result_list.append(llm_response_dict == bdata_solution_dict)
    print("-----")
    
run_acc_3 = sum(result_list)/len(result_list)
run_acc_3

{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
True
-----
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative

0.66

In [None]:
print("ClickUp Benchmark - Flattened Schema + Focused System Prompt + Function Description Optimized : Accuracy")
print("=========================================")
print(f"Run 1: {run_acc_1}")
print(f"Run 2: {run_acc_2}")
print(f"Run 3: {run_acc_3}")

ClickUp Benchmark - Flattened Schema + Focused System Prompt + Function Description Optimized : Accuracy
Run 1: 0.66
Run 2: 0.68
Run 3: 0.66
