In [1]:
import json
from crewai import Crew, Process, Agent, Task
from langchain_openai import ChatOpenAI
from utils.config import (
    parse_json_des,
    expected_format,
    correct_format,
    search_op,
)
from utils.utilities import getCompanies
from tools.test_tool import getLocUUID, getCatUUID, getComp
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)

manager_llm = ChatOpenAI(model_name="gpt-4o")
agent_llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0,response_format={ "type": "json_object" })

JsonParseAgent = Agent(
    role="Given a text, identify and extract the relevant information for these keys and output it in a JSON format",
    goal="convert text to json json",
    backstory="json parser",
    verbose=False,
    allow_delegation=False,
    LLM=agent_llm,
)


JsonParseAgentAgent2 = Agent(
    role="take the json data from the previous agent and re parse it and output it in a JSON format",
    goal=f"get the json data in a json format {expected_format}",
    backstory="re parsing the json data in a json format",
    verbose=False,
    allow_delegation=False,
    LLM=agent_llm,
)

JsonParseAgentAgent3 = Agent(
    role="correctly format the json string and find companies",
    goal="correctly format the json string and find companies",
    # backstory="intelligent json string formatter and company finder",
    backstory="intelligent json string formatter",
    verbose=True,
    allow_delegation=False,
    LLM=agent_llm,
    max_iter=2,
)

ParseJsonTask = Task(
    agent=JsonParseAgent,
    description=parse_json_des,
    expected_output="json",
    human_input=True,
)

ParseJsonTaskTask2 = Task(
    agent=JsonParseAgentAgent2,
    description=f"""restructure the json data, by taking the non-null parameters and then output it in a json format,
                    Please do not hallucination any values, use the provided data in the json only.
                    when restructuring the json data, pay attention to search operators {search_op} and use the appropriate one
                    find the location's uuid using the tools you have if a location is provided, and do same for category if provided
                    for the field_ids leave it as it in the {expected_format} just focus on query section,
                    """,
    expected_output=f"""result should be in json string, in the example format {expected_format} also when parse the json, Please parse the final json string correctly
                """,
    context=[ParseJsonTask],
    tools=[getLocUUID, getCatUUID],
)

ParseJsonTask3 = Task(
    agent=JsonParseAgentAgent3,
    context=[ParseJsonTaskTask2],
    # description=f"{correct_format} after use the tools you have to find companies using the json string",
    description=f"{correct_format}",
    # tools=[getComp],
    expected_output="json",
)

agent_crew = Crew(
    agents=[JsonParseAgent, JsonParseAgentAgent2, JsonParseAgentAgent3],
    tasks=[ParseJsonTask, ParseJsonTaskTask2, ParseJsonTask3],
    process=Process.sequential,
    verbose=2,
    manager_llm=manager_llm,
)


results = agent_crew.kickoff()

query = json.loads(results)

import requests

url = "https://api.crunchbase.com/api/v4/searches/organizations"
headers = {
    "accept": "application/json",
    "X-cb-user-key": "843940256c0a8bdcddc952aaac2eebc8",
    "Content-Type": "application/json",
}
data = query

response = requests.post(url, headers=headers, data=json.dumps(data))
response_data = response.json()

print(json.dumps(response_data, indent=4))



                response_format was transferred to model_kwargs.
                Please confirm that response_format is what you intended.


[1m[95m [2024-07-20 13:33:09][DEBUG]: == Working Agent: Given a text, identify and extract the relevant information for these keys and output it in a JSON format[00m
[1m[95m [2024-07-20 13:33:09][INFO]: == Starting Task: 
Extract the defined parameters from the given text and convert them into a JSON object. Use the descriptions to determine which key has its value in the text. Convert numbers represented in words (e.g., "25 million") to figures (e.g., 25000000). 

Below are the keys and their descriptions:

acquirer_identifier: Name of the organization that made the acquisition
aliases: Alternate or previous names for the organization
categories: Category the organization falls under, eg AI, biotech, Saas etc
closed_on: Date the organization was closed
company_type: Whether an Organization is for profit or non-profit
created_on: Date the organization was created
delisted_on: The date when the Organization removed its stock from the stock exchange.
demo_days: Whether an accelerato

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [2]:
import json

In [4]:
results

'{\n  "field_ids": [\n    "identifier",\n    "categories",\n    "location_identifiers",\n    "short_description",\n    "rank_org"\n  ],\n  "order": [\n    {\n      "field_id": "rank_org",\n      "sort": "asc"\n    }\n  ],\n  "query": [\n    {\n      "type": "predicate",\n      "field_id": "categories",\n      "operator_id": "includes",\n      "values": [\n        "d82f9f46-8a29-6c2b-791c-8006474b7e42"\n      ]\n    },\n    {\n      "type": "predicate",\n      "field_id": "location_identifiers",\n      "operator_id": "includes",\n      "values": [\n        "b41404d2-8ecc-caf5-1268-8fdd3afe73ee"\n      ]\n    }\n  ],\n  "limit": 10\n}'

In [5]:
query =  json.loads(results)

In [6]:
query

{'field_ids': ['identifier',
  'categories',
  'location_identifiers',
  'short_description',
  'rank_org'],
 'order': [{'field_id': 'rank_org', 'sort': 'asc'}],
 'query': [{'type': 'predicate',
   'field_id': 'categories',
   'operator_id': 'includes',
   'values': ['d82f9f46-8a29-6c2b-791c-8006474b7e42']},
  {'type': 'predicate',
   'field_id': 'location_identifiers',
   'operator_id': 'includes',
   'values': ['b41404d2-8ecc-caf5-1268-8fdd3afe73ee']}],
 'limit': 10}

In [7]:
import requests

url = "https://api.crunchbase.com/api/v4/searches/organizations"
headers = {
    "accept": "application/json",
    "X-cb-user-key": "843940256c0a8bdcddc952aaac2eebc8",
    "Content-Type": "application/json",
}
data = query

response = requests.post(url, headers=headers, data=json.dumps(data))
response_data = response.json()

print(json.dumps(response_data, indent=4))


{
    "count": 1082,
    "entities": [
        {
            "uuid": "19f88e40-7f0c-4467-aeb0-22396f0950ac",
            "properties": {
                "identifier": {
                    "permalink": "fetcherr",
                    "image_id": "os9r2pxw6rt8c1eqt0rn",
                    "uuid": "19f88e40-7f0c-4467-aeb0-22396f0950ac",
                    "entity_def_id": "organization",
                    "value": "Fetcherr"
                },
                "short_description": "Fetcherr uses AI to revolutionize airline revenue management with real-time dynamic pricing and market simulation.",
                "rank_org": 309,
                "categories": [
                    {
                        "entity_def_id": "category",
                        "permalink": "air-transportation",
                        "uuid": "d82f9f46-8a29-6c2b-791c-8006474b7e42",
                        "value": "Air Transportation"
                    },
                    {
                        "

In [6]:
{"field_ids": ["identifier", "categories", "location_identifiers", "short_description", "rank_org"], "order": [{"field_id": "rank_org", "sort": "asc"}], "query": [{"type": "predicate", "field_id": "num_employees_enum", "operator_id": "includes", "values": ["c_00051_00100"]}], "limit": 10}

{'field_ids': ['identifier',
  'categories',
  'location_identifiers',
  'short_description',
  'rank_org'],
 'order': [{'field_id': 'rank_org', 'sort': 'asc'}],
 'query': [{'type': 'predicate',
   'field_id': 'num_employees_enum',
   'operator_id': 'includes',
   'values': ['c_00051_00100']}],
 'limit': 10}