In [None]:
%%capture
!pip install requirements.txt

In [106]:
import json

from PyPDFForm import PdfWrapper

pdf = PdfWrapper("i-140.pdf")

with open('c.json', 'r') as file:
    company_info = json.load(file)

customer_info = {
    "prt2PetitionType[1]": True,
    "Pt3Line1a_FamilyName[0]": "CUSTOMER_FIRST_NAME",
    "Pt3Line1b_GivenName[0]": "CUSTOMER_MIDDLE_NAME",
    "Pt3Line1c_MiddleName[0]": "CUSTOMER_LAST_NAME",
    "Line2a_InCareofName[0]": "CUSTOMER_NAME",
    "Line2b_StreetNumberName[0]": "CUSTOMER_STREET_ADDR",
    "Line2c_Unit[0]": False,
    "Line2c_Unit[1]": False,
    "Line2c_Unit[2]": False,
    "Line2c_AptSteFlrNumber[0]": "C_APT",
    "Line2d_CityOrTown[0]": "CUSTOMER_CITY_OR_TOWN",
    "Line2h_Province[0]": "CUSTOMER_PROVINCE",
    "Line2i_Country[0]": "CUSTOMER_COUNTRY",
    "Line2e_State[0]": 0,
    "Line2f_ZipCode[0]": "C_ZIP",
    "Line2g_PostalCode[0]": "CUSTOMER_POSTAL_CODE",
}

schema = pdf.schema["properties"]

print(json.dumps(dict(list(schema.items())[50:]), indent=4))
print(type(schema))

info = company_info | customer_info

pdf = pdf.fill(info)

with open("output.pdf", "wb+") as output_file:
    output_file.write(pdf.read())

{
    "Line5_DateOfBirth[0]": {
        "type": "string",
        "description": "Part 3. Information About the Person for Whom You Are Filing. Other Information. 3. Enter Date of Birth. Enter as 2-digit Month, 2-digit Day, and 4-digit Year."
    },
    "Line6_CityTownOfBirth[0]": {
        "type": "string",
        "description": "Part 3. Information About the Person for Whom You Are Filing. Other Information. 4. Enter City, Town, or Village of Birth."
    },
    "Line8_Country[0]": {
        "type": "string",
        "description": "Part 3. Information About the Person for Whom You Are Filing. Other Information. 6. Enter Country of Birth."
    },
    "Line9_Country[0]": {
        "type": "string",
        "description": "Part 3. Information About the Person for Whom You Are Filing. Other Information. 7. Enter Country of Citizenship or Nationality."
    },
    "Pt3Line8_AlienNumber[0]": {
        "type": "string",
        "maxLength": 9,
        "description": "Part 3. Information Abo

In [53]:
from typing import Annotated, Optional
from typing_extensions import TypedDict

from langchain_tavily import TavilySearch
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
from typing_extensions import TypedDict

from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition

from langchain_openai import ChatOpenAI

from pydantic import BaseModel, Field

# Keys
OPENROUTER_API_KEY = "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" # Your OpenRouter API Key
TAVILLY_API_KEY = "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"

class State(TypedDict):
    messages: Annotated[list[BaseMessage], add_messages] # Be explicit about list[BaseMessage]

class ResponseFormatter(BaseModel):
    """Always use this tool to structure your response to the user."""
    name: str = Field(description="The name of a person")
    age: int = Field(description="The age of a person")
    legal_first_name: str = Field(description="The legal first name of a person. This does NOT include the middle name.")
    legal_middle_name: Optional[str] = Field(description="The legal middle name of a person")
    legal_last_name: str = Field(description="The legal last name of a person")

graph_builder = StateGraph(State)

tavily = TavilySearch(api_key=TAVILLY_API_KEY, max_results=5)
tools = [tavily]

llm = ChatOpenAI(
    model_name="deepseek/deepseek-chat-v3-0324:free",
    openai_api_base="https://openrouter.ai/api/v1",
    openai_api_key=OPENROUTER_API_KEY
)

llm_with_tools = llm.bind_tools(tools).with_structured_output(ResponseFormatter)

def chatbot(state: State):
    # Invoke the LLM to get the structured output
    structured_output = llm_with_tools.invoke(state["messages"])

    # Decide how to represent this in the chat history
    # You could create an AIMessage from it, or just store it separately.
    # For simplicity, let's create an AIMessage with a string representation
    ai_message = AIMessage(content=f"Structured Response: {structured_output.model_dump_json(indent=2)}")

    return {
        "messages": [ai_message], # Add the AI message to history
        "structured_response": structured_output # Store the structured object separately
    }

# In your stream_graph_updates, you might then check for 'structured_response'
def stream_graph_updates(user_input: str):
    for event in graph.stream({"messages": [HumanMessage(content=user_input)]}): # Use HumanMessage directly
        for value in event.values():
            if "messages" in value and value["messages"]:
                print("Assistant:", value["messages"][-1].content)
            if "structured_response" in value and value["structured_response"]:
                print("Structured Output:", value["structured_response"].model_dump_json(indent=2))

graph_builder.add_node("chatbot", chatbot)

tool_node = ToolNode(tools=[tool])
graph_builder.add_node("tools", tool_node)

graph_builder.add_conditional_edges(
    "chatbot",
    tools_condition
)
# Any time a tool is called, we return to the chatbot to decide the next step
graph_builder.add_edge("tools", "chatbot")
graph_builder.add_edge(START, "chatbot")
graph = graph_builder.compile()

while True:
    try:
        user_input = input("User: ")
        if user_input.lower() in ["quit", "exit", "q"]:
            print("Goodbye!")
            break

        stream_graph_updates(user_input)
    except:
        break

User:  Soheil Behnezhad from Northeastern University


Assistant: Structured Response: {
  "name": "Soheil Behnezhad",
  "age": 32,
  "legal_first_name": "Soheil",
  "legal_middle_name": null,
  "legal_last_name": "Behnezhad"
}


User:  q


Goodbye!
