In [2]:
import logging
# Set up the logger
logging.basicConfig(
    level=logging.INFO,  # Set to DEBUG for detailed logs
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        # logging.FileHandler("scraper.log"),  # Log to a file
        logging.StreamHandler()  # Log to console
    ]
)

logger = logging.getLogger(__name__)

In [1]:
from bs4 import BeautifulSoup
import html2text
import httpx


def fetch_documents(url: str) -> str:
    """Fetch a document from a URL and return the markdownified text.

    Args:
        url (str): The URL of the document to fetch.

    Returns:
        str: The markdownified text of the document.
    """
    httpx_client = httpx.Client(follow_redirects=True, timeout=10)

    try:
        response = httpx_client.get(url, timeout=10)
        response.raise_for_status()
        html_content = response
        soup = BeautifulSoup(html_content, 'html.parser')
        a_tags = soup.find_all('a')
        for a_tag in a_tags:
            a_tag.decompose()
        
        img_tags = soup.find_all('img')
        for img_tag in img_tags:
            img_tag.decompose()

        target_div = soup.find('div', class_= "theme-doc-markdown markdown") #langchain
        
        if not target_div:
            target_div = soup.find('article') #langraph
        
        if not target_div:
            return
        
        return html2text.html2text(str(target_div))
    except (httpx.HTTPStatusError, httpx.RequestError) as e:
        return f"Encountered an HTTP error: {str(e)}"

In [6]:
from langchain_core.prompts import PromptTemplate
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

PROMPT = PromptTemplate.from_template(
    """You are a helpful assistant. You will be provided with a document which explains the langgraph framework in detail.
<Document>
{document}
<Document>

    Understand the document and suggest a yaml format to represent a stategraph.
    """
)

output = llm.invoke([SystemMessage(content = PROMPT.format(document = fetch_documents("https://langchain-ai.github.io/langgraph/concepts/low_level/")))])


2025-04-10 20:56:38,386 - INFO - HTTP Request: GET https://langchain-ai.github.io/langgraph/concepts/low_level/ "HTTP/1.1 200 OK"
2025-04-10 20:56:50,403 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [8]:
from pprint import pprint
pprint(output.content)

('Based on the provided document about the LangGraph framework, here is a '
 'suggested YAML format to represent a `StateGraph`. This format captures the '
 'essential components of a `StateGraph`, including the state schema, nodes, '
 'edges, and configuration.\n'
 '\n'
 '```yaml\n'
 'StateGraph:\n'
 '  state:\n'
 '    type: OverallState\n'
 '    input_schema:\n'
 '      user_input: string\n'
 '    output_schema:\n'
 '      graph_output: string\n'
 '    private_schema:\n'
 '      bar: string\n'
 '  nodes:\n'
 '    - name: node_1\n'
 '      function: node_1_function\n'
 '      input_schema: InputState\n'
 '      output_schema: OverallState\n'
 '    - name: node_2\n'
 '      function: node_2_function\n'
 '      input_schema: OverallState\n'
 '      output_schema: PrivateState\n'
 '    - name: node_3\n'
 '      function: node_3_function\n'
 '      input_schema: PrivateState\n'
 '      output_schema: OutputState\n'
 '  edges:\n'
 '    - from: START\n'
 '      to: node_1\n'
 '    - from: n

In [14]:

PROMPT_2 = PromptTemplate.from_template(
    """You are a helpful assistant. You will be provided with a document which explains the langgraph framework in detail.
<Document>
{document}
<Document>

Your job is to output two things:
    1. Understand the document and suggest a yaml format to represent a stategraph.
    2. Also provide the code to generate a stategraph using the yaml format you suggested.
    
    """
)

output = llm.invoke([SystemMessage(content = PROMPT_2.format(document = fetch_documents("https://langchain-ai.github.io/langgraph/concepts/low_level/")))])


2025-04-10 21:08:28,235 - INFO - HTTP Request: GET https://langchain-ai.github.io/langgraph/concepts/low_level/ "HTTP/1.1 200 OK"
2025-04-10 21:08:43,276 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [None]:
pprint(output.content)

('To represent a `StateGraph` in YAML format, we can create a structured '
 'format that captures the essential components of the graph, including the '
 'state schemas, nodes, edges, and any configuration. Below is a suggested '
 'YAML format for representing a `StateGraph`.\n'
 '\n'
 '### Suggested YAML Format\n'
 '\n'
 '```yaml\n'
 'state_graph:\n'
 '  state:\n'
 '    type: OverallState\n'
 '    input_schema:\n'
 '      user_input: str\n'
 '    output_schema:\n'
 '      graph_output: str\n'
 '    private_schema:\n'
 '      bar: str\n'
 '  nodes:\n'
 '    - name: node_1\n'
 '      function: node_1\n'
 '      input_schema: InputState\n'
 '    - name: node_2\n'
 '      function: node_2\n'
 '      input_schema: OverallState\n'
 '    - name: node_3\n'
 '      function: node_3\n'
 '      input_schema: PrivateState\n'
 '  edges:\n'
 '    - from: START\n'
 '      to: node_1\n'
 '    - from: node_1\n'
 '      to: node_2\n'
 '    - from: node_2\n'
 '      to: node_3\n'
 '    - from: node_3\n'

In [19]:
import yaml
from langgraph.graph import StateGraph, START, END
from typing import TypedDict

# Define the state schemas\n'
class InputState(TypedDict):
    user_input: str

class OutputState(TypedDict):
    graph_output: str

class OverallState(TypedDict):
    foo: str
    user_input: str
    graph_output: str

class PrivateState(TypedDict):
    bar: str

# Define the node functions\n'
def node_1(state: InputState) -> OverallState:
    return {"foo": state["user_input"] + " name"}

def node_2(state: OverallState) -> PrivateState:
    return {"bar": state["foo"] + " is"}

def node_3(state: PrivateState) -> OutputState:
    return {"graph_output": state["bar"] + " Lance"}

# Load the YAML configuration\n'
yaml_content = """
state_graph:
  state:
    type: OverallState
    input_schema:
      user_input: str
    output_schema:
      graph_output: str
    private_schema:
      bar: str
  nodes:
    - name: node_1
      function: node_1
      input_schema: InputState
    - name: node_2
      function: node_2
      input_schema: OverallState
    - name: node_3
      function: node_3
      input_schema: PrivateState
  edges:
    - from: __start__
      to: node_1
    - from: node_1
      to: node_2
    - from: node_2
      to: node_3
    - from: node_3
      to: __end__
  config:
    config_schema:
      llm: str
"""

# Parse the YAML
config = yaml.safe_load(yaml_content)

# Create the StateGraph
builder = StateGraph(OverallState, input=InputState, output=OutputState)

# Add nodes
for node in config['state_graph']['nodes']:
    builder.add_node(node['name'], globals()[node['function']])

# Add edges
for edge in config['state_graph']['edges']:
    builder.add_edge(edge['from'], edge['to'])


# Compile the graph
graph = builder.compile()

# Example invocation
result = graph.invoke({"user_input": "My"})
print(result)  # Output: {'graph_output': 'My name is Lance'}

{'graph_output': 'My name is Lance'}


In [None]:
pprint(output.content)

('To represent a `StateGraph` in YAML format, we can create a structured '
 'format that captures the essential components of the graph, including the '
 'state schemas, nodes, edges, and any configuration. Below is a suggested '
 'YAML format for representing a `StateGraph`.\n'
 '\n'
 '### Suggested YAML Format\n'
 '\n'
 '```yaml\n'
 'state_graph:\n'
 '  state:\n'
 '    type: OverallState\n'
 '    input_schema:\n'
 '      user_input: str\n'
 '    output_schema:\n'
 '      graph_output: str\n'
 '    private_schema:\n'
 '      bar: str\n'
 '  nodes:\n'
 '    - name: node_1\n'
 '      function: node_1\n'
 '      input_schema: InputState\n'
 '    - name: node_2\n'
 '      function: node_2\n'
 '      input_schema: OverallState\n'
 '    - name: node_3\n'
 '      function: node_3\n'
 '      input_schema: PrivateState\n'
 '  edges:\n'
 '    - from: START\n'
 '      to: node_1\n'
 '    - from: node_1\n'
 '      to: node_2\n'
 '    - from: node_2\n'
 '      to: node_3\n'
 '    - from: node_3\n'

In [22]:

PROMPT_3 = PromptTemplate.from_template(
    """You are a helpful assistant. You will be provided with a document which explains the langgraph framework in detail.
<Document>
{document}
<Document>

Your job is to output two things:
    1. Understand the document and suggest a yaml format to represent a stategraph.
    2. Also provide the code to generate a stategraph using the yaml format you suggested.
    
    NOTE: 
Make sure to include the following concepts in your yaml format, you will find them in the document: 
1. node
2. edge
3. conditional_edge
4. state
5. config
6. input_schema, output_schema, private_schema, config_schema
7. messages
8. reducer 
9. SEND
10. COMMAND
11. recursion_limit 
12. interrupt
13. subgraphs
    """
)

output = llm.invoke([SystemMessage(content = PROMPT_3.format(document = fetch_documents("https://langchain-ai.github.io/langgraph/concepts/low_level/")))])


2025-04-10 21:36:46,542 - INFO - HTTP Request: GET https://langchain-ai.github.io/langgraph/concepts/low_level/ "HTTP/1.1 200 OK"
2025-04-10 21:37:15,245 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [23]:
pprint(output.content)

('Based on the provided document, here is a suggested YAML format to represent '
 'a `StateGraph` in LangGraph, along with the code to generate a `StateGraph` '
 'using this YAML format.\n'
 '\n'
 '### Suggested YAML Format\n'
 '\n'
 '```yaml\n'
 'stategraph:\n'
 '  state:\n'
 '    overall_state:\n'
 '      type: TypedDict\n'
 '      fields:\n'
 '        foo: str\n'
 '        user_input: str\n'
 '        graph_output: str\n'
 '    input_schema:\n'
 '      type: TypedDict\n'
 '      fields:\n'
 '        user_input: str\n'
 '    output_schema:\n'
 '      type: TypedDict\n'
 '      fields:\n'
 '        graph_output: str\n'
 '    private_schema:\n'
 '      type: TypedDict\n'
 '      fields:\n'
 '        bar: str\n'
 '    config_schema:\n'
 '      type: TypedDict\n'
 '      fields:\n'
 '        llm: str\n'
 '    messages:\n'
 '      type: list\n'
 '      item_type: AnyMessage\n'
 '      reducer: add_messages\n'
 '\n'
 '  nodes:\n'
 '    - name: node_1\n'
 '      function: node_1_function\n'