In [1]:
import os
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [11]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

embeddings = OpenAIEmbeddings()
vector_store = Chroma(embedding_function=embeddings)
llm = ChatOpenAI(temperature=0)

loader = WebBaseLoader(
    web_paths=['https://www.uscis.gov/working-in-the-united-states/students-and-exchange-visitors/optional-practical-training-opt-for-f-1-students'],
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)
vector_store.add_documents(documents=all_splits)

vector_store

<langchain_chroma.vectorstores.Chroma at 0x123acf610>

In [40]:
# Agent tool: Search web for relevant pages, documents, etc. related to the query, and return the URLs of those pages
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_core.tools import tool
from typing import Tuple, Any

@tool(response_format='content_and_artifact')
def search_urls_gov(query: str) -> Tuple[str, Any]:
    """
    Retrieve URLs of relevant .gov and .edu websites for the given search query, using DuckDuckGo.
    """
    search = DuckDuckGoSearchResults(output_format='list', num_results=10, keys_to_include=['link', 'title'])
    results = search.invoke(f'{query} site:(.gov | .edu)')
    results_serialized = '\n'.join([f'URL: {result['link']}\tTitle: {result['snippet']}' for result in results])
    return results_serialized, results

In [34]:
from langchain import hub
from langchain_core.documents import Document
from typing_extensions import List, TypedDict

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    prompt = hub.pull("rlm/rag-prompt")
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [35]:
from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [36]:
response = graph.invoke({'question': 'What is OPT?'})
response['answer']



"Optional Practical Training (OPT) is temporary employment related to an F-1 student's major area of study. Eligible students can apply for up to 12 months of OPT before or after completing their academic studies. All periods of pre-completion OPT will be deducted from the available post-completion OPT."

In [None]:
response = 

LangGraph agent

In [39]:
from langgraph.graph import StateGraph, MessagesState

class MessagesStateRAG(MessagesState):
    

graph_builder = StateGraph(MessagesState)