In [16]:
import json

with open("clickup_space_flattened_schema.json") as f:
    flattened_schema = json.load(f)
    
flattened_schema

[{'description': 'View the Spaces available in a Workspace.',
  'name': 'get_spaces',
  'parameters': {'properties': {'archived': {'description': 'A flag to decide whether to include archived spaces or not__',
     'type': 'boolean'},
    'team_id': {'description': 'The ID of the team__', 'type': 'string'}},
   'required': ['team_id', 'archived'],
   'type': 'object'}},
 {'description': 'Add a new Space to a Workspace.',
  'name': 'create_space',
  'parameters': {'properties': {'features__due_dates__enabled': {'description': 'enabled__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due_dates__remap_closed_due_date': {'description': 'remap_closed_due_date__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due_dates__remap_due_dates': {'description': 'remap_due_dates__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due

In [17]:
for sc in flattened_schema:
    print(sc["name"], "-", sc["description"])

get_spaces - View the Spaces available in a Workspace.
create_space - Add a new Space to a Workspace.
get_space - View the details of a specific Space in a Workspace.
update_space - Rename, set the Space color, and enable ClickApps for a Space.
delete_space - Delete a Space from your Workspace.
get_space_tags - View the task Tags available in a Space.
create_space_tag - Add a new task Tag to a Space.
delete_space_tag - Delete a task Tag from a Space.


In [18]:
schema_func_decription_dict = {
    "get_spaces": "Retrives information of all the spaces available in user's Clickup Workspace.",
    "create_space": "Creates a new ClickUp space",
    "get_space": "Retrives information of a specific Clickup space",
    "update_space": "Modifies name, settings the Space color, and assignee management Space.",
    "delete_space": "Delete an existing space from user's ClickUp Workspace",
    "get_space_tags": "Retrives all the Tags assigned on all the tasks in a Space.",
    "create_space_tag": "Assigns a customized Tag in a ClickUp Space.",
    "delete_space_tag": "Deletes a specific tag previously assigned in a space.",
}

optimized_schema = []
for sc in flattened_schema:
    temp_dict = sc.copy()
    temp_dict["description"] = schema_func_decription_dict[temp_dict["name"]]
    optimized_schema.append(temp_dict)

with open('clickup_space_flattened_optimized2_schema.json', 'w') as f:
    json.dump(optimized_schema, f, indent=4, sort_keys=True)
    
optimized_schema

[{'description': "Retrives information of all the spaces available in user's Clickup Workspace.",
  'name': 'get_spaces',
  'parameters': {'properties': {'archived': {'description': 'A flag to decide whether to include archived spaces or not__',
     'type': 'boolean'},
    'team_id': {'description': 'The ID of the team__', 'type': 'string'}},
   'required': ['team_id', 'archived'],
   'type': 'object'}},
 {'description': 'Creates a new ClickUp space',
  'name': 'create_space',
  'parameters': {'properties': {'features__due_dates__enabled': {'description': 'enabled__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due_dates__remap_closed_due_date': {'description': 'remap_closed_due_date__Due dates feature settings__Enabled features within the space__',
     'type': 'boolean'},
    'features__due_dates__remap_due_dates': {'description': 'remap_due_dates__Due dates feature settings__Enabled features within the space__',
     'type'

In [19]:
from pprint import pprint

def flatten_dict(d, parent_key='', sep='__'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

# Your original dictionary
original_dict = {
    'features': {
        'due_dates': {
            'enabled': False,
            'remap_closed_due_date': False,
            'remap_due_dates': False,
            'start_date': False
        },
        'time_tracking': {
            'enabled': False
        }
    },
    'multiple_assignees': True,
    'name': 'Innovative Campaigns 2023',
    'team_id': 'cm789'
}

# Flatten the dictionary
flattened_dict = flatten_dict(original_dict)

pprint(flattened_dict)

{'features__due_dates__enabled': False,
 'features__due_dates__remap_closed_due_date': False,
 'features__due_dates__remap_due_dates': False,
 'features__due_dates__start_date': False,
 'features__time_tracking__enabled': False,
 'multiple_assignees': True,
 'name': 'Innovative Campaigns 2023',
 'team_id': 'cm789'}


In [20]:
import json

with open("clickup_space_benchmark.json") as f:
    bench_data = json.load(f)
    
bench_data[1]

{'prompt': 'Ella, the project coordinator, is setting up a new project space in ClickUp for the "Creative Minds" team with team ID "cm789". This space, named "Innovative Campaigns 2023", should allow multiple assignees for tasks, but keep due dates and time tracking disabled, as the initial planning phase doesn\'t require strict deadlines or time monitoring.',
 'solution': 'create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))'}

In [21]:
import os
import json
import dotenv
import openai
from textwrap import dedent
from datetime import datetime

dotenv.load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

tools = [{"type":"function", "function":sc} for sc in optimized_schema]
tools

[{'type': 'function',
  'function': {'description': "Retrives information of all the spaces available in user's Clickup Workspace.",
   'name': 'get_spaces',
   'parameters': {'properties': {'archived': {'description': 'A flag to decide whether to include archived spaces or not__',
      'type': 'boolean'},
     'team_id': {'description': 'The ID of the team__', 'type': 'string'}},
    'required': ['team_id', 'archived'],
    'type': 'object'}}},
 {'type': 'function',
  'function': {'description': 'Creates a new ClickUp space',
   'name': 'create_space',
   'parameters': {'properties': {'features__due_dates__enabled': {'description': 'enabled__Due dates feature settings__Enabled features within the space__',
      'type': 'boolean'},
     'features__due_dates__remap_closed_due_date': {'description': 'remap_closed_due_date__Due dates feature settings__Enabled features within the space__',
      'type': 'boolean'},
     'features__due_dates__remap_due_dates': {'description': 'remap_due_d

In [22]:
from openai import OpenAI
client = OpenAI(api_key=os.environ.get("TOGETHER_API_KEY"), base_url="https://api.together.xyz/v1")

fcalling_llm = lambda fprompt : client.chat.completions.create(
  model="mistralai/Mixtral-8x7B-Instruct-v0.1",
  messages=[
    {
      "role": "system",
      "content": f"""
You are an agent who is responsible for managing various employee management platform, 
one of which is CliuckUp. 

You are given a number of tools as functions, you must use one of those tools and fillup 
all the parameters of those tools ,whose answers you will get from the given situation.

When you are presented with a technical situation, that a person of a team is facing, 
you must give the soulution utilizing your functionalities. 

First analyze the given situation to fully anderstand what is the intention of the user,
what they need and exactly which tool will fill up that necessity.

Then look into the parameters and extract all the relevant informations to fillup the 
parameter with right values.
"""
    },
    {
      "role": "user",
      "content": fprompt
    },
  ],
  temperature=0,
  max_tokens=4096,
  top_p=1,
  tools=tools,
  tool_choice="auto"
)

response = fcalling_llm(bench_data[1]["prompt"])
# response
response.choices[0].message.tool_calls[0].function

Function(arguments='{"team_id":"cm789","name":"Innovative Campaigns 2023","multiple_assignees":true,"features__due_dates__enabled":false,"features__due_dates__start_date":false,"features__due_dates__remap_due_dates":false,"features__due_dates__remap_closed_due_date":false,"features__time_tracking__enabled":false}', name='create_space')

In [23]:
import re 
from pprint import pprint



class FuncStringParser():
    def __init__(self):
        self.record_keeper = {}
        
        
    def replace_with_dict(self, match):
        replacement = f"dict{len(self.record_keeper)}"
        self.record_keeper[replacement] = match.group()
        # dict_counter += 1
        return replacement

    def arg_tuple_str_to_args_dict(self, func_args):
        # print(func_args)
        args_dict = {}
        func_args = func_args[1:-1].split(",")
        for arg in func_args:
            k, v = arg[:arg.find("=")], arg[arg.find("=")+1:]
            k, v = k.strip(), v.strip()
            
            if v.startswith("(") and v.endswith(")"):
                v = self.func_to_args_dict(v)
                # print(v)
            
            args_dict[k] = v

        return args_dict

    def replace_true_false_dfs(self, nested_dict):
        for key, value in nested_dict.items():
            # If the value is a nested dictionary, recursively process it
            if isinstance(value, dict):
                self.replace_true_false_dfs(value)
            else:
                nested_dict[key] = eval(value)
                
                    
    def func_to_args_dict(self, func_args):
        pattern_uncompiled = r'\(([^()]+)\)'
        pattern = re.compile(pattern_uncompiled)
        # print(re.findall(pattern_uncompiled, func_args))
        while len(re.findall(pattern_uncompiled, func_args)) > 1:
            func_args = re.sub(pattern, self.replace_with_dict, func_args)
            
        # print(re.findall(pattern_uncompiled, func_args))
        if sum([m.start(0) > 2 for m in re.finditer(r'\(([^()]+)\)', func_args)]):
            func_args = re.sub(pattern, self.replace_with_dict, func_args)
            # print(func_args)
        func_args = re.sub(pattern, self.replace_with_dict, func_args)
        # pprint(func_args)
            
        dict_record_keeper = {k:self.arg_tuple_str_to_args_dict(v) for k, v in self.record_keeper.items()}
        # print(dict_record_keeper)
        
        # inv_dict_record_keeper = {v:k for k,v in dict_record_keeper.items()}
        for i1, (k1, v1) in enumerate(dict_record_keeper.items()):
            for i2, (k2, v2) in enumerate(dict_record_keeper.items()):
                if i2 == i1:
                    break
                if k2 in v1.values():
                    # print(k2, v1)
                    dict_record_keeper[k1][list(v1.keys())[list(v1.values()).index(k2)]] = v2
        args_dict = list(dict_record_keeper.values())[-1]
        self.replace_true_false_dfs(args_dict)
        self.record_keeper = {}
        return args_dict
    
    def func_string_to_func_dict(self, func_string):
        func_name = func_string[:func_string.find("(")]
        func_args = func_string[func_string.find("("):]
        func_args_dict = self.func_to_args_dict(func_args)
        return {"name": func_name, "arguments": func_args_dict}
    
    def llm_response_to_func_dict(self, llm_response):
        if llm_response is not None:
            func_name = llm_response.name
            func_args_dict = json.loads(llm_response.arguments)
        else:
            func_name = None
            func_args_dict = None 
        return {"name": func_name, "arguments": func_args_dict}
        

                    
        
    
# Sample string
func = 'create_space(team_id="cm789", name="Innovative Campaigns 2023", multiple_assignees=True, features=(due_dates=(enabled=False, start_date=False, remap_due_dates=False, remap_closed_due_date=False), time_tracking=(enabled=False)))'

func_name = func[:func.find("(")]
func_args = func[func.find("("):]

fsp = FuncStringParser()
# fsp.func_to_args_dict(func_args)

fsp.func_string_to_func_dict(func)


{'name': 'create_space',
 'arguments': {'team_id': 'cm789',
  'name': 'Innovative Campaigns 2023',
  'multiple_assignees': True,
  'features': {'due_dates': {'enabled': False,
    'start_date': False,
    'remap_due_dates': False,
    'remap_closed_due_date': False},
   'time_tracking': {'enabled': False}}}}

In [24]:
from tqdm import tqdm
# from tqdm.auto import tqdm


llm_output = []

for bdata in tqdm(bench_data):
    response = fcalling_llm(bdata["prompt"])
    try:
        llm_output.append(response.choices[0].message.tool_calls[0].function)
    except TypeError:
        print(bdata["prompt"])
        print(response)
        llm_output.append(None)
        print("------")
llm_output

 22%|██▏       | 11/50 [04:51<17:12, 26.47s/it]


InternalServerError: <!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->
<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->
<!--[if IE 8]>    <html class="no-js ie8 oldie" lang="en-US"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en-US"> <!--<![endif]-->
<head>


<title>api.together.xyz | 524: A timeout occurred</title>
<meta charset="UTF-8" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta name="robots" content="noindex, nofollow" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<link rel="stylesheet" id="cf_styles-css" href="/cdn-cgi/styles/main.css" />


</head>
<body>
<div id="cf-wrapper">
    <div id="cf-error-details" class="p-0">
        <header class="mx-auto pt-10 lg:pt-6 lg:px-8 w-240 lg:w-full mb-8">
            <h1 class="inline-block sm:block sm:mb-2 font-light text-60 lg:text-4xl text-black-dark leading-tight mr-2">
              <span class="inline-block">A timeout occurred</span>
              <span class="code-label">Error code 524</span>
            </h1>
            <div>
               Visit <a href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_524&utm_campaign=api.together.xyz" target="_blank" rel="noopener noreferrer">cloudflare.com</a> for more information.
            </div>
            <div class="mt-3">2024-05-08 14:13:10 UTC</div>
        </header>
        <div class="my-8 bg-gradient-gray">
            <div class="w-240 lg:w-full mx-auto">
                <div class="clearfix md:px-8">
                  
<div id="cf-browser-status" class=" relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center">
  <div class="relative mb-10 md:m-0">
    
    <span class="cf-icon-browser block md:hidden h-20 bg-center bg-no-repeat"></span>
    <span class="cf-icon-ok w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4"></span>
    
  </div>
  <span class="md:block w-full truncate">You</span>
  <h3 class="md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3">
    
    Browser
    
  </h3>
  <span class="leading-1.3 text-2xl text-green-success">Working</span>
</div>

<div id="cf-cloudflare-status" class=" relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center">
  <div class="relative mb-10 md:m-0">
    <a href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_524&utm_campaign=api.together.xyz" target="_blank" rel="noopener noreferrer">
    <span class="cf-icon-cloud block md:hidden h-20 bg-center bg-no-repeat"></span>
    <span class="cf-icon-ok w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4"></span>
    </a>
  </div>
  <span class="md:block w-full truncate">Bangalore</span>
  <h3 class="md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3">
    <a href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_524&utm_campaign=api.together.xyz" target="_blank" rel="noopener noreferrer">
    Cloudflare
    </a>
  </h3>
  <span class="leading-1.3 text-2xl text-green-success">Working</span>
</div>

<div id="cf-host-status" class="cf-error-source relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center">
  <div class="relative mb-10 md:m-0">
    
    <span class="cf-icon-server block md:hidden h-20 bg-center bg-no-repeat"></span>
    <span class="cf-icon-error w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4"></span>
    
  </div>
  <span class="md:block w-full truncate">api.together.xyz</span>
  <h3 class="md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3">
    
    Host
    
  </h3>
  <span class="leading-1.3 text-2xl text-red-error">Error</span>
</div>

                </div>
            </div>
        </div>

        <div class="w-240 lg:w-full mx-auto mb-8 lg:px-8">
            <div class="clearfix">
                <div class="w-1/2 md:w-full float-left pr-6 md:pb-10 md:pr-0 leading-relaxed">
                    <h2 class="text-3xl font-normal leading-1.3 mb-4">What happened?</h2>
                    <p>The origin web server timed out responding to this request.</p>
                </div>
                <div class="w-1/2 md:w-full float-left leading-relaxed">
                    <h2 class="text-3xl font-normal leading-1.3 mb-4">What can I do?</h2>
                          <h3 class="text-15 font-semibold mb-2">If you're a visitor of this website:</h3>
      <p class="mb-6">Please try again in a few minutes.</p>

      <h3 class="text-15 font-semibold mb-2">If you're the owner of this website:</h3>
      <p><span>The connection to the origin web server was made, but the origin web server timed out before responding. The likely cause is an overloaded background task, database or application, stressing the resources on your web server. To resolve, please work with your hosting provider or web development team to free up resources for your database or overloaded application.</span> <a rel="noopener noreferrer" href="https://support.cloudflare.com/hc/en-us/articles/200171926-Error-524">Additional troubleshooting information here.</a></p>
                </div>
            </div>
        </div>

        <div class="cf-error-footer cf-wrapper w-240 lg:w-full py-10 sm:py-4 sm:px-8 mx-auto text-center sm:text-left border-solid border-0 border-t border-gray-300">
  <p class="text-13">
    <span class="cf-footer-item sm:block sm:mb-1">Cloudflare Ray ID: <strong class="font-semibold">880a0a11d99c1d1c</strong></span>
    <span class="cf-footer-separator sm:hidden">&bull;</span>
    <span id="cf-footer-item-ip" class="cf-footer-item hidden sm:block sm:mb-1">
      Your IP:
      <button type="button" id="cf-footer-ip-reveal" class="cf-footer-ip-reveal-btn">Click to reveal</button>
      <span class="hidden" id="cf-footer-ip">122.166.56.188</span>
      <span class="cf-footer-separator sm:hidden">&bull;</span>
    </span>
    <span class="cf-footer-item sm:block sm:mb-1"><span>Performance &amp; security by</span> <a rel="noopener noreferrer" href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_524&utm_campaign=api.together.xyz" id="brand_link" target="_blank">Cloudflare</a></span>
    
  </p>
  <script>(function(){function d(){var b=a.getElementById("cf-footer-item-ip"),c=a.getElementById("cf-footer-ip-reveal");b&&"classList"in b&&(b.classList.remove("hidden"),c.addEventListener("click",function(){c.classList.add("hidden");a.getElementById("cf-footer-ip").classList.remove("hidden")}))}var a=document;document.addEventListener&&a.addEventListener("DOMContentLoaded",d)})();</script>
</div><!-- /.error-footer -->


    </div>
</div>
</body>
</html>

In [None]:
from pprint import pprint

fsp = FuncStringParser()
result_list = []

# fsp.func_to_args_dict(func_args)
for bdata, llm_response in zip(bench_data, llm_output):
    bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
    # bdata_solution_dict["name"] = schema_func_name_dict[bdata_solution_dict["name"]]
    bdata_solution_dict["arguments"] = flatten_dict(bdata_solution_dict["arguments"])
    
    llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
    pprint(bdata_solution_dict)
    pprint(llm_response_dict)
    print(llm_response_dict == bdata_solution_dict)
    result_list.append(llm_response_dict == bdata_solution_dict)
    print("-----")
    
run_acc_1 = sum(result_list)/len(result_list)
run_acc_1

{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
True
-----
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative

0.66

In [None]:
from tqdm import tqdm
# from tqdm.auto import tqdm


llm_output = []

for bdata in tqdm(bench_data):
    response = fcalling_llm(bdata["prompt"])
    try:
        llm_output.append(response.choices[0].message.tool_calls[0].function)
    except TypeError:
        print(bdata["prompt"])
        print(response)
        llm_output.append(None)
        print("------")
llm_output

100%|██████████| 50/50 [03:55<00:00,  4.71s/it]


[Function(arguments='{"archived":false,"team_id":"team123"}', name='get_spaces'),
 Function(arguments='{"features__due_dates__enabled":false,"features__due_dates__remap_closed_due_date":false,"features__due_dates__remap_due_dates":false,"features__due_dates__start_date":false,"features__time_tracking__enabled":false,"multiple_assignees":true,"name":"Innovative Campaigns 2023","team_id":"cm789"}', name='create_space'),
 Function(arguments='{"space_id":"sd456"}', name='get_space_tags'),
 Function(arguments='{"space_id":"prj1122"}', name='get_space_tags'),
 Function(arguments='{"space_id":"qa789","tag__name":"MinorIssue","tag__tag_bg":"#FFFFE0","tag__tag_fg":"#000000","tag_name":"MinorIssue"}', name='delete_space_tag'),
 Function(arguments='{"admin_can_manage":true,"color":"#000000","features__due_dates__enabled":true,"features__due_dates__remap_closed_due_date":true,"features__due_dates__remap_due_dates":true,"features__due_dates__start_date":true,"features__time_tracking__enabled":true,

In [None]:
from pprint import pprint

fsp = FuncStringParser()
result_list = []

# fsp.func_to_args_dict(func_args)
for bdata, llm_response in zip(bench_data, llm_output):
    bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
    # bdata_solution_dict["name"] = schema_func_name_dict[bdata_solution_dict["name"]]
    bdata_solution_dict["arguments"] = flatten_dict(bdata_solution_dict["arguments"])
    
    llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
    pprint(bdata_solution_dict)
    pprint(llm_response_dict)
    print(llm_response_dict == bdata_solution_dict)
    result_list.append(llm_response_dict == bdata_solution_dict)
    print("-----")
    
run_acc_2 = sum(result_list)/len(result_list)
run_acc_2

{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
True
-----
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative

0.68

In [None]:
from tqdm import tqdm
# from tqdm.auto import tqdm


llm_output = []

for bdata in tqdm(bench_data):
    response = fcalling_llm(bdata["prompt"])
    try:
        llm_output.append(response.choices[0].message.tool_calls[0].function)
    except TypeError:
        print(bdata["prompt"])
        print(response)
        llm_output.append(None)
        print("------")
llm_output

100%|██████████| 50/50 [03:41<00:00,  4.43s/it]


[Function(arguments='{"archived":false,"team_id":"team123"}', name='get_spaces'),
 Function(arguments='{"features__due_dates__enabled":false,"features__due_dates__remap_closed_due_date":false,"features__due_dates__remap_due_dates":false,"features__due_dates__start_date":false,"features__time_tracking__enabled":false,"multiple_assignees":true,"name":"Innovative Campaigns 2023","team_id":"cm789"}', name='create_space'),
 Function(arguments='{"space_id":"sd456"}', name='get_space_tags'),
 Function(arguments='{"space_id":"prj1122"}', name='get_space_tags'),
 Function(arguments='{"space_id":"qa789","tag__name":"MinorIssue","tag__tag_bg":"#FFFFE0","tag__tag_fg":"#000000","tag_name":"MinorIssue"}', name='delete_space_tag'),
 Function(arguments='{"admin_can_manage":true,"color":"#000000","features__due_dates__enabled":true,"features__due_dates__remap_closed_due_date":false,"features__due_dates__remap_due_dates":true,"features__due_dates__start_date":true,"features__time_tracking__enabled":true

In [None]:
from pprint import pprint

fsp = FuncStringParser()
result_list = []

# fsp.func_to_args_dict(func_args)
for bdata, llm_response in zip(bench_data, llm_output):
    bdata_solution_dict = fsp.func_string_to_func_dict(bdata["solution"])
    bdata_solution_dict["arguments"] = flatten_dict(bdata_solution_dict["arguments"])
    
    llm_response_dict = fsp.llm_response_to_func_dict(llm_response)
    
    pprint(bdata_solution_dict)
    pprint(llm_response_dict)
    print(llm_response_dict == bdata_solution_dict)
    result_list.append(llm_response_dict == bdata_solution_dict)
    print("-----")
    
run_acc_3 = sum(result_list)/len(result_list)
run_acc_3

{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
{'arguments': {'archived': False, 'team_id': 'team123'}, 'name': 'get_spaces'}
True
-----
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative Campaigns 2023',
               'team_id': 'cm789'},
 'name': 'create_space'}
{'arguments': {'features__due_dates__enabled': False,
               'features__due_dates__remap_closed_due_date': False,
               'features__due_dates__remap_due_dates': False,
               'features__due_dates__start_date': False,
               'features__time_tracking__enabled': False,
               'multiple_assignees': True,
               'name': 'Innovative

0.66

In [None]:
print("ClickUp Benchmark - Flattened Schema + Focused System Prompt + Function Description Optimized : Accuracy")
print("=========================================")
print(f"Run 1: {run_acc_1}")
print(f"Run 2: {run_acc_2}")
print(f"Run 3: {run_acc_3}")

ClickUp Benchmark - Flattened Schema + Focused System Prompt + Function Description Optimized : Accuracy
Run 1: 0.66
Run 2: 0.68
Run 3: 0.66
