In [1]:
!pip install langchain==0.3.1 langchain-openai==0.2.0 chromadb==0.5.5 pandas==2.2.3 python-dotenv==1.0.1



In [2]:
!pip install langgraph
!pip install langchain_community

Collecting langgraph
  Using cached langgraph-0.4.1-py3-none-any.whl.metadata (7.9 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.0.10 (from langgraph)
  Using cached langgraph_checkpoint-2.0.25-py3-none-any.whl.metadata (4.6 kB)
Collecting langgraph-prebuilt>=0.1.8 (from langgraph)
  Using cached langgraph_prebuilt-0.1.8-py3-none-any.whl.metadata (5.0 kB)
Collecting langgraph-sdk>=0.1.42 (from langgraph)
  Using cached langgraph_sdk-0.1.66-py3-none-any.whl.metadata (1.8 kB)
Collecting xxhash<4.0.0,>=3.5.0 (from langgraph)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting ormsgpack<2.0.0,>=1.8.0 (from langgraph-checkpoint<3.0.0,>=2.0.10->langgraph)
  Downloading ormsgpack-1.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Downloading langgraph-0.4.1-py3-none-any.whl (151 kB)


In [3]:
# === Step 0: Import Libraries ===
import os
import re
import pandas as pd
from typing import Dict, Any
from langchain.docstore.document import Document
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, END
from langchain_community.embeddings import SentenceTransformerEmbeddings

In [5]:
# ===  Load CSV Dataset ===
def load_dataset(filepath: str = "/content/square_payments_api_aparna.csv") -> pd.DataFrame:
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"{filepath} not found. Please include the dataset with proper columns.")
    df = pd.read_csv(filepath)
    if df.empty:
        raise ValueError("Dataset is empty.")
    print(f"✔ Rows: {len(df)}, Columns: {list(df.columns)}")
    return df

df = load_dataset()

✔ Rows: 7, Columns: ['API Name', 'Endpoint', 'Description', 'Parameters', 'Node.js', 'Method']


In [6]:
# === Convert Rows to LangChain Documents ===
def build_documents(df: pd.DataFrame) -> list:
    documents = []
    for _, row in df.iterrows():
        text = (
            f"""API: {row['API Name']}
                Endpoint: {row['Endpoint']}
                Description: {row['Description']}
                Parameters: {row['Parameters']}
                Method: {row['Node.js']}
                HTTP: {row['Method']}"""
        )
        documents.append(Document(page_content=text, metadata={"api": row["API Name"]}))
    print(f"✔ {len(documents)} documents created.")
    return documents

docs = build_documents(df)

for api in docs:
    print(api)


✔ 7 documents created.
page_content='API: Register Domain
                Endpoint: https://connect.squareup.com/v2/apple-pay/domains
                Description: Activates a domain for use with Apple Pay on the Web and Square
                Parameters: domain_name:string:Required
                Method: import { SquareClient, SquareEnvironment } from 'square'; const client = new SquareClient({  environment: SquareEnvironment.Sandbox,  accessToken: 'YOUR_ACCESS_TOKEN', }); async function registerDomain() {  const response = await client.applePayApi.registerDomain({   domainName: 'example.com',  });  console.log(response.result); } registerDomain();
                HTTP: POST' metadata={'api': 'Register Domain'}
page_content='API: CreatePayment
                Endpoint: https://connect.squareup.com/v2/payments
                Description: Creates a payment using a card or other supported source.
                Parameters: idempotency_key:string:required, amount_money.object:required, 

In [7]:
# Setup SentenceTransformer Embeddings ===
def setup_embeddings(model_name: str = "all-MiniLM-L6-v2"):
    embeddings = SentenceTransformerEmbeddings(model_name=model_name)
    test_embed = embeddings.embed_query("Sample text for embedding")
    return embeddings

embeddings = setup_embeddings()

  embeddings = SentenceTransformerEmbeddings(model_name=model_name)
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [8]:
from langchain.vectorstores import Chroma

# Assuming you already have embeddings and docs
persist_dir = "chroma_db_a12"

def setup_vector_store(docs, embeddings, persist_dir):
    if os.path.exists(persist_dir):
        store = Chroma(persist_directory=persist_dir, embedding_function=embeddings)
    else:
        store = Chroma.from_documents(docs, embeddings, persist_directory=persist_dir)
        store.persist()
    return store

# === Fix: assign to global variable so search_apis can access it ===
vector_store = setup_vector_store(docs, embeddings, persist_dir)


  store.persist()


In [9]:
# === Set OpenAI API Key ===
import os
os.environ["OPENAI_API_KEY"] = ""


In [10]:
# === Setup LLM ===
from langchain_openai import ChatOpenAI

def setup_llm(model_name: str = "gpt-4o") -> ChatOpenAI:
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY environment variable not set.")
    return ChatOpenAI(model=model_name, temperature=0)

llm = setup_llm()


In [11]:
import json
import re
from typing import Dict, Any

from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage

# ----------------------------
# Helper Functions
# ----------------------------

def search_apis(query: str, k: int = 1) -> str:
    """Search the Chroma vector store for relevant APIs."""
    results = vector_store.similarity_search(query, k=k)
    top_docs = " ".join(res.page_content.replace('\n', ' ').strip() for res in results)
    print(f"Search API Tool Output:\n{top_docs}")
    return top_docs

def extract(field: str, text: str, default: str = "Unknown") -> str:
    """Extracts the value for a given field using regex."""
    match = re.search(fr"{field}:\s*(.*?)(?:\n|$)", text, flags=re.DOTALL | re.IGNORECASE)
    return match.group(1).strip() if match else default

def extract_section(code: str, label: str) -> str:
    """Extract a labeled code section from the generated output."""
    pattern = rf"{label}\n([\s\S]*?)(?:\n[A-Z][a-z]+ Code|$)"
    match = re.search(pattern, code)
    return match.group(1).strip() if match else f"No {label.lower()} generated."

In [12]:
from typing import Dict, Any
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, END

class AgentState(Dict[str, Any]):
    """
    A simple dictionary-based state object passed between workflow nodes.
    Stores the user query, retrieved API documentation, and generated code.
    """
    query: str
    api_docs: str
    generated_code: str

In [13]:
def retrieve_api(state: AgentState) -> AgentState:
    """
    Retrieves the most relevant Square API documentation for the given query.
    Updates the 'api_docs' field in the state.
    """
    query = state.get("query", "")
    try:
        docs = search_apis(query)
        if not docs:
            raise ValueError("No relevant API documentation found.")
        state["api_docs"] = docs
    except Exception as e:
        print(f" Failed to retrieve API for '{query}': {e}")
        state["generated_code"] = f"Error: {e}"
    return state

In [14]:
def generate_code(state: AgentState) -> AgentState:
    """
    Uses the provided query and API documentation to generate backend and frontend code.
    Updates the 'generated_code' field in the state.
    """
    try:
        prompt = PromptTemplate(
        input_variables=["query", "api_docs"],
        template=(
              "You are an advanced coding assistant trained to generate complete and production-ready code based on user requirements and official Square API documentation.\n\n"
              "User Request:\n{query}\n\n"
              "Relevant Square API Documentation:\n{api_docs}\n\n"
              "Using the above, generate the following:\n\n"
              "1. Backend Code:\n"
              "   - Use Node.js with Express and the official `square` SDK\n"
              "   - Include all required parameters and return fields\n"
              "   - Accept JSON input and respond with JSON\n"
              "   - Use the `SQUARE_ACCESS_TOKEN` environment variable\n"
              "   - Implement CORS and proper error handling\n"
              "   - Clearly label this section as: `Backend Code`\n\n"
              "2. Frontend Code:\n"
              "   - Use React with Tailwind CSS and Axios\n"
              "   - Create inputs for all required parameters\n"
              "   - Display success or error messages\n"
              "   - Use `useState` and `useEffect` hooks for state management\n"
              "   - Clearly label this section as: `Frontend Code`\n\n"
              "Only return the code for each section, with no explanations, comments, or file paths. Separate each section with its corresponding label."
          )
      )
        chain = prompt | llm
        output = chain.invoke({
            "query": state["query"],
            "api_docs": state["api_docs"]
        })
        state["generated_code"] = output.content
    except Exception as e:
        print(f"Code generation failed for '{state['query']}': {e}")
        state["generated_code"] = f"Error: {e}"
    return state


def generate_payment_code(query: str, workflow, llm_model: ChatOpenAI) -> str:
    """
    Executes the LangGraph workflow for a given query and prints the generated code.
    """
    print(f"\n Running code generation for query: '{query}'")
    if not query:
        return "Error: Query cannot be empty."

    try:
        global llm
        llm = llm_model

        state = AgentState(query=query, api_docs="", generated_code="")
        result = workflow.invoke(state)
        output = result.get("generated_code", "")

        frontend = extract_section(output, "Frontend Code")
        backend = extract_section(output, "Backend Code")

        print("\n Frontend Code:\n")
        print(frontend)
        print("\n Backend Code:\n")
        print(backend)

        return output
    except Exception as e:
        print(f"Workflow failed: {e}")
        return f"Error: {e}"


In [15]:

def build_workflow():
    """
    Builds a LangGraph workflow to first retrieve API docs, then generate code.
    """
    try:
        graph = StateGraph(AgentState)
        graph.add_node("retrieve_api", retrieve_api)
        graph.add_node("generate_code", generate_code)
        graph.add_edge("retrieve_api", "generate_code")
        graph.add_edge("generate_code", END)
        graph.set_entry_point("retrieve_api")
        return graph.compile()
    except Exception as e:
        print(f"Workflow build error: {e}")
        raise


In [16]:
import langchain
langchain.debug = True

query = "create a payment"
workflow = build_workflow()
generated_code = generate_payment_code(query, workflow, llm)


 Running code generation for query: 'create a payment'
[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph] Entering Chain run with input:
[0m{
  "query": "create a payment",
  "api_docs": "",
  "generated_code": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph > chain:retrieve_api] Entering Chain run with input:
[0m{
  "query": "create a payment",
  "api_docs": "",
  "generated_code": ""
}
Search API Tool Output:
API: CreatePayment                 Endpoint: https://connect.squareup.com/v2/payments                 Description: Creates a payment using a card or other supported source.                 Parameters: idempotency_key:string:required, amount_money.object:required, source_id:string:required, autocomplete:boolean, customer_id:string, note:string                 Method: import { SquareClient, SquareEnvironment } from "square"; async function main() {   const client = new SquareClient({     environment: SquareEnvironment.Sandbox,     token: "YOUR_ACCESS_TOKEN",   });   

In [17]:
import langchain
langchain.debug = True

query = "List payments"
workflow = build_workflow()
generated_code = generate_payment_code(query, workflow, llm)


 Running code generation for query: 'List payments'
[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph] Entering Chain run with input:
[0m{
  "query": "List payments",
  "api_docs": "",
  "generated_code": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph > chain:retrieve_api] Entering Chain run with input:
[0m{
  "query": "List payments",
  "api_docs": "",
  "generated_code": ""
}
Search API Tool Output:
API: ListPayments                 Endpoint: https://connect.squareup.com/v2/payments                 Description: Retrieves a list of payments taken by the account.                 Parameters: begin_time:string, card_brand:string, cursor:string, end_time:string, is_offline_payment:boolean, last_4:string, limit:integer, location_id:string, offline_begin_time:string, offline_end_time:string, sort_field:string, sort_order:string, total:integer, updated_at_begin_time:string, updated_at_end_time:string                 Method: import { SquareClient, SquareEnvironment } from "squa

In [18]:
import langchain
langchain.debug = True

query = "Get a payment"
workflow = build_workflow()
generated_code = generate_payment_code(query, workflow, llm)


 Running code generation for query: 'Get a payment'
[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph] Entering Chain run with input:
[0m{
  "query": "Get a payment",
  "api_docs": "",
  "generated_code": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph > chain:retrieve_api] Entering Chain run with input:
[0m{
  "query": "Get a payment",
  "api_docs": "",
  "generated_code": ""
}
Search API Tool Output:
API: GetPayment                 Endpoint: https://connect.squareup.com/v2/payments/{payment_id}                 Description: Retrieves payment details by payment ID.                 Parameters: payment_id:string:required                 Method: import { SquareClient, SquareEnvironment } from "square"; async function main() {   const client = new SquareClient({     environment: SquareEnvironment.Sandbox,     token: "YOUR_ACCESS_TOKEN",   });   await client.payments.get({}); } main();                 HTTP: GET
[36;1m[1;3m[chain/end][0m [1m[chain:LangGraph > chain:retrieve

# Single-Agent Code Generation Workflow
In the single-agent version of your project, you implemented a LangChain-based system that takes a user query and automatically generates both frontend and backend code using relevant Square API documentation. The workflow consists of three core components:

 1. Vector Search-Based API Retrieval
You used a Chroma vector store to embed and store Square API documentation. When the user enters a query, the system performs semantic search using SentenceTransformerEmbeddings to find the most relevant API entry.

Function: search_apis(query)

Purpose: Fetch top-matching API documentation based on query

 2. Prompt-Based Code Generation
You built a PromptTemplate to guide the LLM in generating production-ready code. The template instructed the LLM to:

Create a Node.js + Express backend

Create a React + Tailwind + Axios frontend

Follow best practices: use environment variables, error handling, CORS, etc.

Function: generate_code(state)

Model: OpenAI ChatOpenAI (e.g., gpt-4o)

Result: Structured code labeled as Backend Code and Frontend Code

 3. LangGraph Workflow Execution
You used the LangGraph library to define a simple directed workflow graph:

Node 1: Retrieve API docs (retrieve_api)

Node 2: Generate code (generate_code)

Edge: Connect retrieve → generate → END

Entry point: "retrieve_api"

Result: Final state with both retrieved docs and generated code

# MultiAgents

In [19]:
from langgraph.graph import StateGraph, END

def search_agent(state: AgentState) -> AgentState:
    query = state["query"]
    try:
        docs = search_apis(query)
        state["api_docs"] = docs
    except Exception as e:
        state["api_docs"] = ""
        state["generated_code"] = f"Error: {e}"
    return state

def codegen_agent(state: Dict[str, Any]) -> Dict[str, Any]:
    try:
        prompt = PromptTemplate(
            input_variables=["query", "api_docs"],
            template=(
                "You are an advanced coding assistant trained to generate complete and production-ready code based on user requirements and official Square API documentation.\n\n"
                "User Request:\n{query}\n\n"
                "Relevant Square API Documentation:\n{api_docs}\n\n"
                "Using the above, generate the following:\n\n"
                "1. Backend Code:\n"
                "   - Use Node.js with Express and the official `square` SDK\n"
                "   - Include all required parameters and return fields\n"
                "   - Accept JSON input and respond with JSON\n"
                "   - Use the `SQUARE_ACCESS_TOKEN` environment variable\n"
                "   - Implement CORS and proper error handling\n"
                "   - Clearly label this section as: `Backend Code`\n\n"
                "2. Frontend Code:\n"
                "   - Use React with Tailwind CSS and Axios\n"
                "   - Create inputs for all required parameters\n"
                "   - Display success or error messages\n"
                "   - Use `useState` and `useEffect` hooks for state management\n"
                "   - Clearly label this section as: `Frontend Code`\n\n"
                "Only return the code for each section, with no explanations, comments, or file paths. "
                "Separate each section with its corresponding label."
            )
        )

        chain = prompt | llm
        output = chain.invoke({
            "query": state["query"],
            "api_docs": state["api_docs"]
        })
        state["generated_code"] = output.content

    except Exception as e:
        print(f" Codegen agent failed: {e}")
        state["generated_code"] = f"Error: {e}"
    return state


def doc_agent(state: AgentState) -> AgentState:
    try:
        docs = state.get("api_docs", "")
        query = state.get("query", "")
        state["doc_summary"] = (
            f"### API Documentation Summary for `{query}`\n\n"
            f"```\n{docs}\n```"
        )
    except Exception as e:
        state["doc_summary"] = f"Error generating doc summary: {e}"
    return state


In [20]:
graph = StateGraph(AgentState)
graph.add_node("search_agent", search_agent)
graph.add_node("codegen_agent", codegen_agent)
graph.add_node("doc_agent", doc_agent)

graph.add_edge("search_agent", "codegen_agent")
graph.add_edge("codegen_agent", "doc_agent")
graph.add_edge("doc_agent", END)

graph.set_entry_point("search_agent")
workflow = graph.compile()

In [21]:
query = "Create a payment"
state = AgentState(query=query, api_docs="", generated_code="")

# Run the workflow
result = workflow.invoke(state)

# Print full state dict for debugging
print("\n Final State from LangGraph:")
for k, v in result.items():
    print(f"{k}: {v[:200] if isinstance(v, str) else v}")  # truncate long strings


[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph] Entering Chain run with input:
[0m{
  "query": "Create a payment",
  "api_docs": "",
  "generated_code": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:LangGraph > chain:search_agent] Entering Chain run with input:
[0m{
  "query": "Create a payment",
  "api_docs": "",
  "generated_code": ""
}
Search API Tool Output:
API: CreatePayment                 Endpoint: https://connect.squareup.com/v2/payments                 Description: Creates a payment using a card or other supported source.                 Parameters: idempotency_key:string:required, amount_money.object:required, source_id:string:required, autocomplete:boolean, customer_id:string, note:string                 Method: import { SquareClient, SquareEnvironment } from "square"; async function main() {   const client = new SquareClient({     environment: SquareEnvironment.Sandbox,     token: "YOUR_ACCESS_TOKEN",   });   await client.payments.create({     idempotencyKey: "1615

# Multi-Agent Code Generation Workflow with LangGraph
In the multi-agent version, you extended your initial system by orchestrating three specialized agents using LangGraph, where each agent is responsible for a distinct subtask in the pipeline. This modular agent setup increases clarity, separation of concerns, and traceability in your workflow.

1. Agent 1: search_agent – API Retrieval
This agent takes the user's query and performs a similarity search against a Chroma vector store of embedded Square API documentation. It identifies the most relevant API based on semantic similarity and stores the result in state["api_docs"].

Tool used: SentenceTransformerEmbeddings + Chroma

Output: Injected into api_docs field of AgentState

2. Agent 2: codegen_agent – Code Generation
This agent uses a refined PromptTemplate to guide the LLM in generating both backend and frontend code based on the user’s query and retrieved documentation.

LLM: OpenAI GPT model

Format: Well-labeled sections for Backend Code and Frontend Code

Prompt: Instructs the LLM to use Square SDK, environment variables, JSON input/output, CORS, Tailwind, React hooks, etc.

Output: Stored in state["generated_code"]

3. Agent 3: doc_agent – API Documentation Summary
This final agent formats the retrieved API documentation into a clean, human-readable Markdown block for inclusion in reports or developer documentation.

Output: state["doc_summary"] in Markdown format

Includes: API name, endpoint, description, parameters, and example

Workflow Graph (LangGraph)
Used StateGraph to chain the agents sequentially:


search_agent → codegen_agent → doc_agent → END
Entry: "search_agent"

Exit: Final state contains all 3 fields:

query

api_docs

generated_code

doc_summary