In [107]:
from dotenv import load_dotenv
load_dotenv()
import logging
from bs4 import BeautifulSoup
import html2text
import httpx
import yaml
import json
from pydantic import Field, BaseModel
from langgraph.graph import MessagesState, StateGraph, START, END
from typing import List
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.prompts import PromptTemplate
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage
from langchain_openai import ChatOpenAI
from langgraph.types import Send
import operator
from typing import Annotated
from typing import NamedTuple
import composio_langchain

# Set up the logger
logging.basicConfig(
    level=logging.INFO,  # Set to DEBUG for detailed logs
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        # logging.FileHandler("scraper.log"),  # Log to a file
        logging.StreamHandler()  # Log to console
    ]
)

logger = logging.getLogger(__name__)

In [108]:
def fetch_documents_with_links_html(url: str) -> tuple[str, list[tuple[str, str]]]:
    """Fetch a document from a URL, return the markdownified text with links as markdown, and extract links with their titles.

    Args:
        url (str): The URL of the document to fetch.

    Returns:
        tuple[str, list[tuple[str, str]]]: A tuple containing the markdownified text of the document with links, and a list of (link, title) tuples.
    """
    httpx_client = httpx.Client(follow_redirects=True, timeout=10)
    links = []

    try:
        response = httpx_client.get(url, timeout=10)
        response.raise_for_status()
        html_content = response.text
        soup = BeautifulSoup(html_content, 'html.parser')

        target_div = soup.find('div', class_= "theme-doc-markdown markdown") #langchain

        if not target_div:
            target_div = soup.find('article') #langraph

        if not target_div:
            return "", links # Return empty text but still the links

        # Extract links *before* converting to markdown
        a_tags = target_div.find_all('a')
        for a_tag in a_tags:
            link = a_tag.get('href')
            title = a_tag.get_text(strip=True)
            if link:
                links.append((link, title))

        markdown_converter = html2text.HTML2Text()
        markdown_converter.body_width = 0  # Disable line wrapping for links to stay on one line
        markdown_text = markdown_converter.handle(str(target_div))

        return markdown_text, links
    except (httpx.HTTPStatusError, httpx.RequestError) as e:
        return f"Encountered an HTTP error: {str(e)}", [] # Return error message and empty links

In [109]:
import re

# Your input text
text = """
[ADS4GPTs](/docs/integrations/tools/ads4gpts)| Integrate AI native advertising into your Agentic application.  
[AgentQL](/docs/integrations/tools/agentql)| AgentQL tools provides web interaction and structured data extraction...  
[AINetwork Toolkit](/docs/integrations/tools/ainetwork)| AI Network is a layer 1 blockchain designed to accommodate large-scal...  
[Alpha Vantage](/docs/integrations/tools/alpha_vantage)| Alpha Vantage Alpha Vantage provides realtime and historical financia...  
[Amadeus Toolkit](/docs/integrations/tools/amadeus)| This notebook walks you through connecting LangChain to the Amadeus t...  
[Apify Actor](/docs/integrations/tools/apify_actors)| Apify Actors are cloud programs designed for a wide range of web scra...  
[ArXiv](/docs/integrations/tools/arxiv)| This notebook goes over how to use the arxiv tool with an agent.  
[AskNews](/docs/integrations/tools/asknews)| AskNews infuses any LLM with the latest global news (or historical ne...  
[AWS Lambda](/docs/integrations/tools/awslambda)| Amazon AWS Lambda is a serverless computing service provided by Amazo...  
[Azure AI Services Toolkit](/docs/integrations/tools/azure_ai_services)| This toolkit is used to interact with the Azure AI Services API to ac...  
[Azure Cognitive Services Toolkit](/docs/integrations/tools/azure_cognitive_services)| This toolkit is used to interact with the Azure Cognitive Services AP...  
[Azure Container Apps dynamic sessions](/docs/integrations/tools/azure_dynamic_sessions)| Azure Container Apps dynamic sessions provides a secure and scalable ...  
[Shell (bash)](/docs/integrations/tools/bash)| Giving agents access to the shell is powerful (though risky outside a...  
[Bearly Code Interpreter](/docs/integrations/tools/bearly)| Bearly Code Interpreter allows for remote execution of code. This mak...  
[Bing Search](/docs/integrations/tools/bing_search)| Bing Search is an Azure service and enables safe, ad-free, location-a...  
[Brave Search](/docs/integrations/tools/brave_search)| This notebook goes over how to use the Brave Search tool.  
[Cassandra Database Toolkit](/docs/integrations/tools/cassandra_database)| Apache Cassandra® is a widely used database for storing transactional...  
[CDP](/docs/integrations/tools/cdp_agentkit)| The CDP Agentkit toolkit contains tools that enable an LLM agent to i...  
[ChatGPT Plugins](/docs/integrations/tools/chatgpt_plugins)| OpenAI has deprecated plugins.  
[ClickUp Toolkit](/docs/integrations/tools/clickup)| ClickUp is an all-in-one productivity platform that provides small an...  
[Cogniswitch Toolkit](/docs/integrations/tools/cogniswitch)| CogniSwitch is used to build production ready applications that can c...  
[Connery Toolkit and Tools](/docs/integrations/tools/connery)| Using the Connery toolkit and tools, you can integrate Connery Action...  
[Dall-E Image Generator](/docs/integrations/tools/dalle_image_generator)| OpenAI Dall-E are text-to-image models developed by OpenAI using deep...  
[Dappier](/docs/integrations/tools/dappier)| Dappier connects any LLM or your Agentic AI to real-time, rights-clea...  
[Databricks Unity Catalog (UC)](/docs/integrations/tools/databricks)| This notebook shows how to use UC functions as LangChain tools, with ...  
[DataForSEO](/docs/integrations/tools/dataforseo)| DataForSeo provides comprehensive SEO and digital marketing data solu...  
[Dataherald](/docs/integrations/tools/dataherald)| This notebook goes over how to use the dataherald component.  
[DuckDuckGo Search](/docs/integrations/tools/ddg)| This guide shows over how to use the DuckDuckGo search component.  
[Discord](/docs/integrations/tools/discord)| This notebook provides a quick overview for getting started with Disc...  
[E2B Data Analysis](/docs/integrations/tools/e2b_data_analysis)| E2B's cloud environments are great runtime sandboxes for LLMs.  
[Eden AI](/docs/integrations/tools/edenai_tools)| This Jupyter Notebook demonstrates how to use Eden AI tools with an A...  
[ElevenLabs Text2Speech](/docs/integrations/tools/eleven_labs_tts)| This notebook shows how to interact with the ElevenLabs API to achiev...  
[Exa Search](/docs/integrations/tools/exa_search)| Exa is a search engine fully designed for use by LLMs. Search for doc...  
[File System](/docs/integrations/tools/filesystem)| LangChain provides tools for interacting with a local file system out...  
[FinancialDatasets Toolkit](/docs/integrations/tools/financial_datasets)| The financial datasets stock market API provides REST endpoints that ...  
[FMP Data](/docs/integrations/tools/fmp-data)| Access financial market data through natural language queries.  
[Github Toolkit](/docs/integrations/tools/github)| The Github toolkit contains tools that enable an LLM agent to interac...  
[Gitlab Toolkit](/docs/integrations/tools/gitlab)| The Gitlab toolkit contains tools that enable an LLM agent to interac...  
[Gmail Toolkit](/docs/integrations/tools/gmail)| This will help you getting started with the GMail toolkit. This toolk...  
[GOAT](/docs/integrations/tools/goat)| GOAT is the finance toolkit for AI agents.  
[Golden Query](/docs/integrations/tools/golden_query)| Golden provides a set of natural language APIs for querying and enric...  
[Google Books](/docs/integrations/tools/google_books)| Overview  
[Google Calendar Toolkit](/docs/integrations/tools/google_calendar)| Google Calendar is a product of Google Workspace that allows users to...  
[Google Cloud Text-to-Speech](/docs/integrations/tools/google_cloud_texttospeech)| Google Cloud Text-to-Speech enables developers to synthesize natural-...  
[Google Drive](/docs/integrations/tools/google_drive)| This notebook walks through connecting a LangChain to the Google Driv...  
[Google Finance](/docs/integrations/tools/google_finance)| This notebook goes over how to use the Google Finance Tool to get inf...  
[Google Imagen](/docs/integrations/tools/google_imagen)| Imagen on Vertex AI brings Google's state of the art image generative...  
[Google Jobs](/docs/integrations/tools/google_jobs)| This notebook goes over how to use the Google Jobs Tool to fetch curr...  
[Google Lens](/docs/integrations/tools/google_lens)| This notebook goes over how to use the Google Lens Tool to fetch info...  
[Google Places](/docs/integrations/tools/google_places)| This notebook goes through how to use Google Places API  
[Google Scholar](/docs/integrations/tools/google_scholar)| This notebook goes through how to use Google Scholar Tool  
[Google Search](/docs/integrations/tools/google_search)| This notebook goes over how to use the google search component.  
[Google Serper](/docs/integrations/tools/google_serper)| This notebook goes over how to use the Google Serper component to sea...  
[Google Trends](/docs/integrations/tools/google_trends)| This notebook goes over how to use the Google Trends Tool to fetch tr...  
[Gradio](/docs/integrations/tools/gradio_tools)| There are many 1000s of Gradio apps on Hugging Face Spaces. This libr...  
[GraphQL](/docs/integrations/tools/graphql)| GraphQL is a query language for APIs and a runtime for executing thos...  
[HuggingFace Hub Tools](/docs/integrations/tools/huggingface_tools)| Huggingface Tools that supporting text I/O can be  
[Human as a tool](/docs/integrations/tools/human_tools)| Human are AGI so they can certainly be used as a tool to help out AI ...  
[Hyperbrowser Browser Agent Tools](/docs/integrations/tools/hyperbrowser_browser_agent_tools)| Hyperbrowser is a platform for running, running browser agents, and s...  
[Hyperbrowser Web Scraping Tools](/docs/integrations/tools/hyperbrowser_web_scraping_tools)| Hyperbrowser is a platform for running and scaling headless browsers....  
[IBM watsonx.ai](/docs/integrations/tools/ibm_watsonx)| WatsonxToolkit is a wrapper for IBM watsonx.ai Toolkit.  
[IFTTT WebHooks](/docs/integrations/tools/ifttt)| This notebook shows how to use IFTTT Webhooks.  
[Infobip](/docs/integrations/tools/infobip)| This notebook that shows how to use Infobip API wrapper to send SMS m...  
[Ionic Shopping Tool](/docs/integrations/tools/ionic_shopping)| Ionic is a plug and play ecommerce marketplace for AI Assistants. By ...  
[Jenkins](/docs/integrations/tools/jenkins)| Tools for interacting with Jenkins.  
[Jina Search](/docs/integrations/tools/jina_search)| This notebook provides a quick overview for getting started with Jina...  
[Jira Toolkit](/docs/integrations/tools/jira)| This notebook goes over how to use the Jira toolkit.  
[JSON Toolkit](/docs/integrations/tools/json)| This notebook showcases an agent interacting with large JSON/dict obj...  
[Lemon Agent](/docs/integrations/tools/lemonai)| Lemon Agent helps you build powerful AI assistants in minutes and aut...  
[LinkupSearchTool](/docs/integrations/tools/linkup_search)| Linkup provides an API to connect LLMs to the web and the Linkup Prem...  
[Memgraph](/docs/integrations/tools/memgraph)| Overview  
[Memorize](/docs/integrations/tools/memorize)| Fine-tuning LLM itself to memorize information using unsupervised lea...  
[Mojeek Search](/docs/integrations/tools/mojeek_search)| The following notebook will explain how to get results using Mojeek S...  
[MultiOn Toolkit](/docs/integrations/tools/multion)| MultiON has built an AI Agent that can interact with a broad array of...  
[NASA Toolkit](/docs/integrations/tools/nasa)| This notebook shows how to use agents to interact with the NASA toolk...  
[Naver Search](/docs/integrations/tools/naver_search)| Overview  
[Nuclia Understanding](/docs/integrations/tools/nuclia)| Nuclia automatically indexes your unstructured data from any internal...  
[NVIDIA Riva: ASR and TTS](/docs/integrations/tools/nvidia_riva)| NVIDIA Riva  
[Office365 Toolkit](/docs/integrations/tools/office365)| Microsoft 365 is a product family of productivity software, collabora...  
[OpenAPI Toolkit](/docs/integrations/tools/openapi)| We can construct agents to consume arbitrary APIs, here APIs conforma...  
[Natural Language API Toolkits](/docs/integrations/tools/openapi_nla)| Natural Language API Toolkits (NLAToolkits) permit LangChain Agents t...  
[OpenGradient](/docs/integrations/tools/opengradient_toolkit)| This notebook shows how to build tools using the OpenGradient toolkit...  
[OpenWeatherMap](/docs/integrations/tools/openweathermap)| This notebook goes over how to use the OpenWeatherMap component to fe...  
[Oracle AI Vector Search: Generate Summary](/docs/integrations/tools/oracleai)| Oracle AI Vector Search is designed for Artificial Intelligence (AI) ...  
[Oxylabs](/docs/integrations/tools/oxylabs)| Oxylabs is a market-leading web intelligence collection platform, dri...  
[Pandas Dataframe](/docs/integrations/tools/pandas)| This notebook shows how to use agents to interact with a Pandas DataF...  
[Passio NutritionAI](/docs/integrations/tools/passio_nutrition_ai)| To best understand how NutritionAI can give your agents super food-nu...  
[PaymanAI](/docs/integrations/tools/payman-tool)| PaymanAI provides functionality to send and receive payments (fiat an...  
[Permit](/docs/integrations/tools/permit)| Permit is an access control platform that provides fine-grained, real...  
[PlayWright Browser Toolkit](/docs/integrations/tools/playwright)| Playwright is an open-source automation tool developed by Microsoft t...  
[Polygon IO Toolkit and Tools](/docs/integrations/tools/polygon)| This notebook shows how to use agents to interact with the Polygon IO...  
[PowerBI Toolkit](/docs/integrations/tools/powerbi)| This notebook showcases an agent interacting with a Power BI Dataset....  
[Prolog](/docs/integrations/tools/prolog_tool)| LangChain tools that use Prolog rules to generate answers.  
[PubMed](/docs/integrations/tools/pubmed)| PubMed® comprises more than 35 million citations for biomedical liter...  
[Python REPL](/docs/integrations/tools/python)| Sometimes, for complex calculations, rather than have an LLM generate...  
[Reddit Search](/docs/integrations/tools/reddit_search)| In this notebook, we learn how the Reddit search tool works.  
[Requests Toolkit](/docs/integrations/tools/requests)| We can use the Requests toolkit to construct agents that generate HTT...  
[Riza Code Interpreter](/docs/integrations/tools/riza)| The Riza Code Interpreter is a WASM-based isolated environment for ru...  
[Robocorp Toolkit](/docs/integrations/tools/robocorp)| This notebook covers how to get started with Robocorp Action Server a...  
[Salesforce](/docs/integrations/tools/salesforce)| Tools for interacting with Salesforce.  
[SceneXplain](/docs/integrations/tools/sceneXplain)| SceneXplain is an ImageCaptioning service accessible through the Scen...  
[ScrapeGraph](/docs/integrations/tools/scrapegraph)| This notebook provides a quick overview for getting started with Scra...  
[SearchApi](/docs/integrations/tools/searchapi)| This notebook shows examples of how to use SearchApi to search the we...  
[SearxNG Search](/docs/integrations/tools/searx_search)| This notebook goes over how to use a self hosted SearxNG search API t...  
[Semantic Scholar API Tool](/docs/integrations/tools/semanticscholar)| This notebook demos how to use the semantic scholar tool with an agen...  
[SerpAPI](/docs/integrations/tools/serpapi)| This notebook goes over how to use the SerpAPI component to search th...  
[Slack Toolkit](/docs/integrations/tools/slack)| This will help you getting started with the Slack toolkit. For detail...  
[Spark SQL Toolkit](/docs/integrations/tools/spark_sql)| This notebook shows how to use agents to interact with Spark SQL. Sim...  
[SQLDatabase Toolkit](/docs/integrations/tools/sql_database)| This will help you getting started with the SQL Database toolkit. For...  
[StackExchange](/docs/integrations/tools/stackexchange)| Stack Exchange is a network of question-and-answer (Q&A) websites on ...  
[Steam Toolkit](/docs/integrations/tools/steam)| Steam (Wikipedia)) is a video game digital distribution service and s...  
[Stripe](/docs/integrations/tools/stripe)| This notebook provides a quick overview for getting started with Stri...  
[Tableau](/docs/integrations/tools/tableau)| This notebook provides a quick overview for getting started with Tabl...  
[Taiga](/docs/integrations/tools/taiga)| This notebook provides a quick overview for getting started with Taig...  
[Tavily Extract](/docs/integrations/tools/tavily_extract)| Tavily is a search engine built specifically for AI agents (LLMs), de...  
[Tavily Search](/docs/integrations/tools/tavily_search)| Tavily's Search API is a search engine built specifically for AI agen...  
[Tilores](/docs/integrations/tools/tilores)| This notebook covers how to get started with the Tilores tools.  
[Twilio](/docs/integrations/tools/twilio)| This notebook goes over how to use the Twilio API wrapper to send a m...  
[Upstage](/docs/integrations/tools/upstage_groundedness_check)| This notebook covers how to get started with Upstage groundedness che...  
[Valthera](/docs/integrations/tools/valthera)| Enable AI agents to engage users when they're most likely to respond.  
[ValyuContext](/docs/integrations/tools/valyu_context)| Valyu allows AI applications and agents to search the internet and pr...  
[Wikidata](/docs/integrations/tools/wikidata)| Wikidata is a free and open knowledge base that can be read and edite...  
[Wikipedia](/docs/integrations/tools/wikipedia)| Wikipedia is a multilingual free online encyclopedia written and main...  
[Wolfram Alpha](/docs/integrations/tools/wolfram_alpha)| This notebook goes over how to use the wolfram alpha component.  
[Writer Tools](/docs/integrations/tools/writer)| This notebook provides a quick overview for getting started with Writ...  
[Yahoo Finance News](/docs/integrations/tools/yahoo_finance_news)| This notebook goes over how to use the yahoofinancenews tool with an ...  
[You.com Search](/docs/integrations/tools/you)| The you.com API is a suite of tools designed to help developers groun...  
[YouTube](/docs/integrations/tools/youtube)| YouTube Search package searches YouTube videos avoiding using their h...  
[Zapier Natural Language Actions](/docs/integrations/tools/zapier)| Deprecated This API will be sunset on 2023-11-17//nla.zapier.com/star...  
[ZenGuard AI](/docs/integrations/tools/zenguard)| This tool lets you quickly set up ZenGuard AI in your Langchain-power...
"""

# Base URL for formatting
base_url = "https://python.langchain.com/docs/integrations/tools/"

# Regex to find and extract text after "/docs/integrations/tools/" stopping at ")"
pattern = r"/docs/integrations/tools/([^)\s]+)"
matches = re.findall(pattern, text)


# Format matches into full links
links_tools = [base_url + match for match in matches]
name_tools = [match for match in matches]

# Print the extracted links
print(name_tools)

['ads4gpts', 'agentql', 'ainetwork', 'alpha_vantage', 'amadeus', 'apify_actors', 'arxiv', 'asknews', 'awslambda', 'azure_ai_services', 'azure_cognitive_services', 'azure_dynamic_sessions', 'bash', 'bearly', 'bing_search', 'brave_search', 'cassandra_database', 'cdp_agentkit', 'chatgpt_plugins', 'clickup', 'cogniswitch', 'connery', 'dalle_image_generator', 'dappier', 'databricks', 'dataforseo', 'dataherald', 'ddg', 'discord', 'e2b_data_analysis', 'edenai_tools', 'eleven_labs_tts', 'exa_search', 'filesystem', 'financial_datasets', 'fmp-data', 'github', 'gitlab', 'gmail', 'goat', 'golden_query', 'google_books', 'google_calendar', 'google_cloud_texttospeech', 'google_drive', 'google_finance', 'google_imagen', 'google_jobs', 'google_lens', 'google_places', 'google_scholar', 'google_search', 'google_serper', 'google_trends', 'gradio_tools', 'graphql', 'huggingface_tools', 'human_tools', 'hyperbrowser_browser_agent_tools', 'hyperbrowser_web_scraping_tools', 'ibm_watsonx', 'ifttt', 'infobip', '

In [110]:
from bs4 import BeautifulSoup
import html2text
import httpx

def fetch_documents(url: str) -> str:
    """Fetch a document from a URL and return the markdownified text.

    Args:
        url (str): The URL of the document to fetch.

    Returns:
        str: The markdownified text of the document.
    """
    httpx_client = httpx.Client(follow_redirects=True, timeout=10)

    try:
        response = httpx_client.get(url, timeout=10)
        response.raise_for_status()
        html_content = response
        soup = BeautifulSoup(html_content, 'html.parser')
    
        img_tags = soup.find_all('img')
        for img_tag in img_tags:
            img_tag.decompose()

        target_div = soup.find('div', class_= "theme-doc-markdown markdown") #langchain
        
        if not target_div:
            target_div = soup.find('article') #langraph

        if not target_div:
            target_div = soup.find('html') #langraph

        if not target_div:
            return html2text.html2text(str(soup))
        
        return html2text.html2text(str(target_div))
    except (httpx.HTTPStatusError, httpx.RequestError) as e:
        return f"Encountered an HTTP error: {str(e)}"


def extract_text_between_headings(text):
    """
    Extracts the text between the first and second headings in the given text.

    Parameters:
        text (str): The input document containing headings and content.

    Returns:
        str: Extracted text between the first and second headings, or None if not found.
    """
    # Regex to match the first heading and the second heading
    pattern = r"#\s*[^\n]+\s*(.*?)\s*##\s*[^\n]+"
    match = re.search(pattern, text, re.DOTALL)

    # Extract the text between the headings
    return match.group(1).strip() if match else None
dict_tool_link = {}
dict_tool_doc = {}
    # dict_tool_doc[name_tools[i]] = extract_text_between_headings(fetch_documents(links_tools[i]))


In [111]:
import json

# Serialize data into file:
# json.dump( dict_tool_doc, open( "tools_doc_json.json", 'w' ) )
# json.dump( dict_tool_link, open( "tools_link_json.json", 'w' ) )
# Read data from file:
# data = json.load( open( "tools_json.json" ) )

In [112]:
dict_tool_link = json.load( open( "tools_link_json.json") )
dict_tool_doc = json.load( open( "tools_doc_json.json") )
def lowercase_keys(input_dict):
    """
    Returns a new dictionary with all keys converted to lowercase.
    """
    return {k.lower(): v for k, v in input_dict.items()}

dict_tool_link = lowercase_keys(dict_tool_link)
dict_tool_doc = lowercase_keys(dict_tool_doc)

In [113]:
Initial_prompt = """You are an expert python developer. You will be given a description of a python function. 

You job is to estimate and extract the following information:

- What exactly does this python do. What is the detailed objective of the function. Please write 1-5 lines
- Suggest or extract the name of the the function
- What would be the inputs/arguements required into this function to make it work. Please all mentioned the type of each input
- WHat would be output produced by this input. Please mention the output type 

Here is the description of the function you need to create:
<description>
{desc}
</description>
"""



In [114]:
llm = ChatOpenAI(temperature=0, model="gpt-4o-mini", streaming=True)

In [115]:


class FunctionInstructions(BaseModel):
    """Instructions for defining a python function"""
    objective: str = Field(description= "what does this pythion function do")
    name: str = Field(description="name of the python function")
    input : List[str] = Field(description= "what would be the input arguements to this function along with the types")
    output: List[str] = Field(description="what would be the output/return attributes for the function along with the types")
    name_toolkit: str = Field(description="what would be the toolkit/ code SDK that will be used")
    code: str = Field(description="the final python code")
# Annotated[str, operator.add]

class CodebuilderState(BaseModel):
    """Instructions for defining a python function"""
    code: str = Field(description= "tailored code for the python function")


In [116]:

def functional_analysis_node(state: FunctionInstructions):
  print("functional_analysis_node")
  llm_with_structured_output = llm.with_structured_output(FunctionInstructions)
  functionalReport: FunctionInstructions = llm_with_structured_output.invoke(
      [SystemMessage(content=Initial_prompt.format(desc = state.objective))])
  return {  "messages": [AIMessage(content="Generated JSON code!")],
           "objective": functionalReport.objective,
           "name": functionalReport.name,
           "input": functionalReport.input,
           "output": functionalReport.output}

In [117]:
write_code_prompt = """You are an expert Python developer tasked with creating Python functions (tools) based on user requests.

            Your process is as follows:
            1. Understand the user's request for a tool (e.g., "tool to send a discord message").
            2. Find relevant Python SDKs for the core task.
            3. See if Composio offers an integration for the relevant service (e.g., 'discord').
            4. Analyze the results:
                - If Composio has an integration, prioritize generating code that utilizes Composio (assume this involves calling a hypothetical 'composio.run_action()' function). Include a comment explaining this choice.
                - If Composio does not have a clear integration, choose the most promising Python SDK found
                - If no suitable SDK is found, state that you cannot create the function.
            5. Generate *only* the complete, runnable Python function code based on your decision.
                - The function should have clear arguments based on the user's likely intent (e.g., for discord, `channel_id` and `message_text`).
                - Include a comprehensive docstring explaining the function, its arguments, and what it returns.
                - Use type hints for all arguments and the return type.
                - If using a standard SDK, add a comment indicating which SDK is intended (e.g., `# Uses discord.py`).
                - If using Composio, structure the function to call `composio.run_action('service_name', 'action_name', params={{...}})` (you'll need to infer 'service_name' and 'action_name' and necessary params). Add comments explaining this structure.
            6. Do not include any explanatory text before or after the code block. Output only the Python code for the function.

    Here are some details about the python function you will be creating:
    <objective>
    {objective}
    </objective>

    <input schema>
    {inputs}
    </input schema>

    <output schema>
    {output}
    </output schema>

    <name of function>
    {name}
    </name of function>

"""

In [118]:
write_code_prompt = """
You are a skilled code generation assistant. Your task is to create executable code using the following information:
- SDK Documentation: The provided documentation outlines the functionalities and usage details of the SDK. Use this as the reference for constructing your code.
- Objective: A clear description of what the code is intended to achieve.
- Input: The expected input for the code (e.g., variables, parameters, data types).
- Output: The desired result or outcome of the code (e.g., format, type, or structure).
- SDK Name: The name of the SDK that must be used in the code.

Your goal is to generate executable code that:
- Adheres to the requirements outlined above.
- Follows standard coding practices and is optimized for readability and efficiency.
- Utilizes the specified SDK appropriately based on the documentation provided.
- Only return a self contained function
- Your output should only contain a code block containing the required function and nothing else. Please do no include any explainantions
- Write your code in python
- Please also provide which API keys will be required and define the API keys as part of the function
- Please also write the doc string for the python function

Here are some details about the python function you will be creating:
<objective>
{objective}
</objective>

<input schema>
{inputs}
</input schema>

<output schema>
{output}
</output schema>

<name of function>
{name}
</name of function>

Documentation for SDK that might be helpful:
<documentation>
{docs}
</documentation>

"""


In [119]:
Best_sdk_prompt = """
You are a highly specialized language model designed to assist in selecting the most suitable SDK for a given use case. You are provided with the following:
- A dictionary containing pairs of SDK names and their respective descriptions.
- Requirements for a piece of code, including the objective, input, and output.

Your task is to:
- Identify the SDK from the provided dictionary whose description best matches the given use case described in the code requirements.
- Also give preferences to SDKs that are generally more well known or are used more frequently in the industry (Use google tools for anything search related)
- Return only the name of the matching SDK without any additional text or formatting.

Input Example:
Dictionary:
{{
"SDK_A": "Provides tools for web scraping and data extraction.",
"SDK_B": "Enables natural language processing for unstructured text.",
"SDK_C": "Facilitates the integration of payment gateways in applications."
}}
Code Requirements:
Objective: Extract data from multiple web pages.
Input: URLs of the web pages.
Output: Structured data in JSON format.

Expected Output:
SDK_A


Input :
<dictionary>
{dictionary}
</dictionary>

<objective>
{objective}
</objective>

<input schema>
{inputs}
</input schema>

<output schema>
{output}
</output schema>

<name of function>
{name}
</name of function>


"""

In [120]:

def sdk_production_node(state: FunctionInstructions):
    objective_agent: str = state.objective
    name: str = state.name
    input_args : List[str] = state.input
    output_args: List[str] = state.output
    response = llm.invoke([SystemMessage(content=Best_sdk_prompt.format(
          objective=objective_agent,
          inputs=input_args,
          output=output_args,
          name=name,
          dictionary = dict_tool_doc
    ))])
    code_snips = response
    return {
            "name_toolkit": response.content.lower()}

In [121]:


def code_production_node(state: FunctionInstructions):
    objective_agent: str = state.objective
    name: str = state.name
    input_args : List[str] = state.input
    output_args: List[str] = state.output
    toolkit: str = state.name_toolkit
    docs = fetch_documents(dict_tool_link[toolkit])
    response = llm.invoke([SystemMessage(content=write_code_prompt.format(
          objective=objective_agent,
          inputs=input_args,
          output=output_args,
          name=name,
          docs = docs,
    ))])
    print(response.content)
    code_snips = response
    return {
            "code": response.content}

In [134]:
from langgraph.checkpoint.memory import InMemorySaver
workflow = StateGraph(FunctionInstructions)
workflow.add_node("func_analysis", functional_analysis_node)
workflow.add_node("sdk_write", sdk_production_node)
workflow.add_node("code_write", code_production_node)
checkpointer = InMemorySaver()
workflow.add_edge("code_write", END)
workflow.add_edge("sdk_write","code_write")
workflow.add_edge("func_analysis","sdk_write")
workflow.add_edge(START, "func_analysis")
tool_infograph = workflow.compile(checkpointer=checkpointer)


In [None]:
class toolcollector(MessagesState):
    tool : List[FunctionInstructions] = Field(description= "what tools are there")
    total_code: List[str]

tool_desc_prompt = """
You are an AI assistant designed to analyze Python code. Your task is to identify all function definitions in the provided Python snippet that are decorated with @tool. You must return a dictionary where:
- The keys are the names of the identified functions.
- The values are descriptions of what each function is supposed to do. If a function contains a docstring, extract it as the description. If a docstring is missing, infer the function's purpose from its structure and comments.
Example Input:
@tool
def calculate_area(length, width):
    "Calculates the area of a rectangle."
    return length * width

@tool
def greet(name):
    return f"Hello, {{name}}!"


Expected Output:
{
    "calculate_area": "Calculates the area of a rectangle.",
    "greet": "Greets a user by name."
}


Instructions:
- Identify functions that have the @tool decorator.
- Extract function names and descriptions (either from docstrings or inferred).
- Return the output as a structured JSON.
- Please only return a json object that can be converted into a json directly. DO NOT RETURN ANYTHING OTHER THAN A JSON

Python code:
<code>
from typing import Literal
from langgraph.graph import StateGraph, MessagesState, START, END
from langchain_core.messages import AIMessage
from langchain_core.tools import tool
from langgraph.prebuilt import ToolNode

# Define the tools needed by the LLM

@tool
def get_weather(location: str):
    \"\"\"Call to get the current weather.\"\"\"
    if location.lower() in ["sf", "san francisco"]:
        return "It's 60 degrees and foggy."
    else:
        return "It's 90 degrees and sunny."


@tool
def get_coolest_cities():
    \"\"\"Get a list of coolest cities\"\"\"
    return "nyc, sf"

tools = [get_weather, get_coolest_cities]

# Bind the model(llm) with tools
model_with_tools = ChatAnthropic(
    model="claude-3-haiku-20240307", temperature=0
).bind_tools(tools)

# Generate a tool node.
tool_node = ToolNode(tools)

# conditional edge
def should_continue(state: MessagesState):
    messages = state["messages"]
    last_message = messages[-1]
    if last_message.tool_calls:
        return "tools"
    return END
</code>

"""
import uuid

def graph_map_step(state: toolcollector):
    # Extract nodes and edges from json_objects
    
    response_1 = llm.invoke([SystemMessage(content=tool_desc_prompt)])
    json_objects = json.loads(response_1.content)
    print(json_objects)
    uuid_str = uuid.uuid4()
    config = {"configurable": {"thread_id": str(uuid_str)}}
    send = []
    for key in json_objects:
        print(key + " " + json_objects[key])
        for output in tool_infograph.stream({"objective":key + " " + json_objects[key], "name": key, "input":[], "output": [], "name_toolkit": "", "code":""}, config, stream_mode="updates"):
            print(output)
        send.append(tool_infograph.get_state(config).values["code"])
    return {
        "total_code": send
    }

# "objective":json_objects[key], "name": key, "input":[], "output": [], "name_toolkit": "", "code":""

In [148]:
workflow1 = StateGraph(toolcollector)
# workflow1.add_node("tool_infograph", tool_infograph)
workflow1.add_node("graph_map_step", graph_map_step)

workflow1.add_edge(START, "graph_map_step")
workflow1.add_edge("graph_map_step", END)
infograph = workflow1.compile()


In [149]:
import pprint
from langgraph.graph import END, StateGraph, START, MessagesState
from langgraph.prebuilt import ToolNode

inputs = {
    "messages": [
        ("user", "Create a python function that gets all my emails from gmail and filter them based on senders"),
    ]
}

code_snip = dict()
for output in infograph.stream(inputs):
    for key, value in output.items():
        pprint.pprint(f"Output from node '{key}':")
        pprint.pprint("---")
        pprint.pprint(value, indent=2, width=80, depth=None)
        code_snip = value
    pprint.pprint("\n---\n")

{'get_weather': 'Call to get the current weather.', 'get_coolest_cities': 'Get a list of coolest cities'}
get_weather Call to get the current weather.
functional_analysis_node
{'func_analysis': {'objective': 'The function retrieves the current weather information for a specified location, typically using an external weather API to fetch real-time data.', 'name': 'get_weather', 'input': ['location: str', 'api_key: str'], 'output': ['weather_data: dict']}}
{'sdk_write': {'name_toolkit': 'openweathermap'}}
```python
import os
from langchain_community.utilities import OpenWeatherMapAPIWrapper

def get_weather(location: str, api_key: str) -> dict:
    """
    Retrieves the current weather information for a specified location using the OpenWeatherMap API.

    Parameters:
    location (str): The location for which to retrieve the weather information.
    api_key (str): The API key for authenticating with the OpenWeatherMap API.

    Returns:
    dict: A dictionary containing the current weat

In [152]:
print(code_snip['total_code'][1])

```python
from langchain_hyperbrowser import HyperbrowserBrowserUseTool

def get_coolest_cities(criteria: str, limit: int) -> list:
    """
    Retrieves a list of cities that are considered the coolest based on certain criteria,
    such as climate, culture, or popularity.

    Parameters:
    criteria (str): The criteria to filter the coolest cities.
    limit (int): The maximum number of cities to retrieve.

    Returns:
    list: A list of the coolest cities based on the specified criteria.
    """
    # Define API key
    HYPERBROWSER_API_KEY = "<your-api-key>"
    
    # Initialize the browser tool
    tool = HyperbrowserBrowserUseTool()
    
    # Create the task to retrieve coolest cities
    task = f"Find the top {limit} coolest cities based on {criteria}."
    
    # Run the tool with the task
    result = tool.run({"task": task})
    
    # Extract and return the list of coolest cities
    return result.get('data', [])
```


In [None]:
def graph_map_step2(state: toolcollector):
    # Extract nodes and edges from json_objects
    
    response_1 = llm.invoke([SystemMessage(content=tool_desc_prompt)])
    json_objects = json.loads(response_1.content)
    print(json_objects)
    sends = []
    for key in json_objects:
        sends.append(Send("tool_infograph", {"objective":key + " " + json_objects[key], "name": key, "input":[], "output": [], "name_toolkit": "", "code":""}))    
    return sends
# "objective":json_objects[key], "name": key, "input":[], "output": [], "name_toolkit": "", "code":""


In [83]:
workflow2 = StateGraph(toolcollector)
workflow2.add_node("tool_infograph", tool_infograph)
workflow2.add_node("graph_map_step2", graph_map_step2)

workflow2.add_edge(START, "graph_map_step2")
workflow2.add_conditional_edges(START,graph_map_step2, ["tool_infograph"])
workflow2.add_edge("tool_infograph", END)
infograph2 = workflow2.compile()

In [84]:
import pprint
from langgraph.graph import END, StateGraph, START, MessagesState
from langgraph.prebuilt import ToolNode

inputs = {
    "messages": [
        ("user", "Create a python function that gets all my emails from gmail and filter them based on senders"),
    ]
}

code_snip = dict()
for output in infograph.stream(inputs):
    for key, value in output.items():
        pprint.pprint(f"Output from node '{key}':")
        pprint.pprint("---")
        pprint.pprint(value, indent=2, width=80, depth=None)
        code_snip = value
    pprint.pprint("\n---\n")

{'get_weather': 'Call to get the current weather.', 'get_coolest_cities': 'Get a list of coolest cities'}


InvalidUpdateError: Expected dict, got [Send(node='tool_infograph', arg={'objective': 'get_weather Call to get the current weather.', 'name': 'get_weather', 'input': [], 'output': [], 'name_toolkit': '', 'code': ''}), Send(node='tool_infograph', arg={'objective': 'get_coolest_cities Get a list of coolest cities', 'name': 'get_coolest_cities', 'input': [], 'output': [], 'name_toolkit': '', 'code': ''})]
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_GRAPH_NODE_RETURN_VALUE

In [None]:
tool_desc_prompt = """
You are an AI assistant designed to analyze Python code. Your task is to identify all function definitions in the provided Python snippet that are decorated with @tool. You must return a dictionary where:
- The keys are the names of the identified functions.
- The values are descriptions of what each function is supposed to do. If a function contains a docstring, extract it as the description. If a docstring is missing, infer the function's purpose from its structure and comments.
Example Input:
@tool
def calculate_area(length, width):
    "Calculates the area of a rectangle."
    return length * width

@tool
def greet(name):
    return f"Hello, {{name}}!"


Expected Output:
{
    "calculate_area": "Calculates the area of a rectangle.",
    "greet": "Greets a user by name."
}


Instructions:
- Identify functions that have the @tool decorator.
- Extract function names and descriptions (either from docstrings or inferred).
- Return the output as a structured Python dictionary.

"""

def graph_map_step(state: toolcollector):
    # Extract nodes and edges from json_objects
    response_1 = llm.invoke([SystemMessage(content=tool_desc_prompt)])
    json_objects = json.loads(response_1.content)
    sends = []
    for key in json_objects:
        sends.append(Send("tool_create", {"objective": key + " " + json_objects[key]}))    
    return sends
