##LangGraph Solution

In [None]:
# !pip install langgraph

In [None]:
import os
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langgraph.graph import StateGraph,START,END 
from langchain.memory import ConversationBufferMemory 
from langchain.chains import ConversationalRetrievalChain 
from typing import Dict, List, Literal
from pydantic import BaseModel, Field
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import JsonOutputParser


ModuleNotFoundError: No module named 'langchain.memory'

#LLM

In [None]:
import os
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
load_dotenv()

llm = AzureChatOpenAI(
    openai_api_key= os.getenv("AZURE_LLMsOpenAI_API_KEY"),
    openai_api_version=os.getenv("AZURE_LLMsOpenAI_API_VERSION"),
    azure_deployment= os.getenv("AZURE_LLMsOpenAI_GPT4O_DEPLOYMENT_NAME"),
    azure_endpoint= os.getenv("AZURE_LLMsOpenAI_ENDPOINT"),
    temperature=0.2,
    top_p=0.7,
    max_tokens=4096)
llm.invoke("Hello").content



TypeError: str expected, not NoneType

In [None]:
def extract_text(file_path):
    loader = PyPDFLoader(file_path)
    return loader.load_and_split()

#Graph State



In [4]:
class GraphState(BaseModel): 
    query: str = Field(default="", description="User query input") 
    flow_type: str = Field(default=None, description="Determines whether to perform comparison or Q&A") 
    document_texts: List[str] = Field(default=None, description="List of document texts to process") 
    comparison_results: Dict[str, List[str]] = Field(default=None, description="Stores commonalities and differences between documents") 
    rag_answer: str = Field(default=None, description="Stores the RAG-based answer for Q&A flow")

def graph_state() -> GraphState: 
    return GraphState()


##Create DB

In [5]:
# Create Sample Documents

document1 = "This is a sample contract. The terms include payment within 30 days and a penalty clause." 
document2 = "This is a sample agreement. It states that payment should be made within 30 days and includes a penalty section."

document_texts = [document1, document2]

# Load VectorDB for RAG
def get_embeddings():
    embeddings=AzureOpenAIEmbeddings(
    model=os.getenv("AZURE_LLMsOpenAI_EMBEDDINGS_DEPLOYMENT_NAME"),
    azure_endpoint=os.getenv("AZURE_LLMsOpenAI_ENDPOINT"),
    api_key=os.getenv("AZURE_LLMsOpenAI_API_KEY"),
    openai_api_version=os.getenv("AZURE_LLMsOpenAI_API_VERSION"))
    return embeddings

def create_vectordb(docs: List[str]): 
    embeddings = get_embeddings()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200) 
    split_docs = text_splitter.create_documents(docs) 
    return FAISS.from_documents(split_docs, embeddings)

In [6]:
class router_class(BaseModel):
    classifier: Literal["compare","rag","end"]

    
def router(query: str):
    llm = AzureChatOpenAI(
        openai_api_key= os.getenv("AZURE_LLMsOpenAI_API_KEY"),
        openai_api_version=os.getenv("AZURE_LLMsOpenAI_API_VERSION"),
        azure_deployment= os.getenv("AZURE_LLMsOpenAI_GPT4O_DEPLOYMENT_NAME"),
        azure_endpoint= os.getenv("AZURE_LLMsOpenAI_ENDPOINT"),
        temperature=0.2,
        top_p=0.7,
        max_tokens=4096,
        )

    system_prompt = """
            You are an AI assistant that classifies user queries into three categories:
            - "compare" if the query involves comparing two documents.
            - "rag" if the query requires question-answering with a retrieval-based solution.
            - "end" if the user wants to quit/exit.

            Return the response in **strict JSON format**, with **only one key**: classifier.
            Do not include any explanations or extra text.
            """
    router_prompt = ChatPromptTemplate([
    ("system",system_prompt),
    ("user", "{query}")
    ])

    # structured_llm= llm.with_structured_output(router_class)
    parser = JsonOutputParser()
    chain=router_prompt | llm |parser
    response=chain.invoke({"query":query})
    return response

res=router("end")
print(res['classifier'])


end


##Nodes


In [None]:
# Decision Node:Prompts user for Input Determines which flow to follow
def decision_node(state: GraphState) -> GraphState: 
    query = input("Enter your query: ")
    state=state.copy(update={"query":query})
    print({state.query})
    res = router(state.query)
    flow_type=""
    print(res)
    if "compare" in res['classifier']:
        flow_type = "comparison"
    elif "end" in res['classifier']:
        flow_type = "end"
    else:
        flow_type = "rag"
    state=state.copy(update={"flow_type":flow_type})
    return state


# Comparison Node: Compares full document texts

def comparison_node(state: GraphState) -> GraphState: 
    doc1, doc2 = state.document_texts 
    common_text = set(doc1.split()) & set(doc2.split()) 
    differences = set(doc1.split()) ^ set(doc2.split()) 
    return state.copy(update={"comparison_results": {"common": list(common_text), "differences": list(differences)}})

# Q&A Node: Uses RAG with VectorDB

def qa_node(state: GraphState) -> GraphState: 
    vectordb = create_vectordb(state.document_texts) 
    memory = ConversationBufferMemory(memory_key="chat_history") 
    chain = ConversationalRetrievalChain.from_llm(llm, vectordb.as_retriever(), memory=memory) 
    response = chain.run(state.query) 
    return state.copy(update={"rag_answer": response})

def end_node(state: GraphState) -> GraphState:
    print("workflow has ended")
    return state




##Build the LangGraph Workflow

In [None]:
graph = StateGraph(GraphState)

graph.add_node("decision_node", decision_node) 
graph.add_node("comparison_node", comparison_node) 
graph.add_node("qa_node", qa_node) 
graph.add_node("end_node", end_node) 

graph.add_edge("comparison_node","decision_node") # Return to decision node after comparison
graph.add_edge("qa_node","decision_node") # Return to decision node after Q&A

# Define Conditional Edges
def route_fn(state:GraphState)->str:
    return state.flow_type
    
graph.add_conditional_edges(
    "decision_node",
    route_fn,
    {
       "comparison":"comparison_node",
       "rag":"qa_node",
       "end":"end_node"
    }
)







<langgraph.graph.state.StateGraph at 0x21b99b49ff0>

In [9]:
graph.set_entry_point("decision_node") 
workflow = graph.compile()

In [10]:
from IPython.display import Image, display
try:
    display(Image(workflow.get_graph().draw_mermaid_png()))
except Exception as e:
    print(e)

HTTPSConnectionPool(host='mermaid.ink', port=443): Max retries exceeded with url: /img/JSV7aW5pdDogeydmbG93Y2hhcnQnOiB7J2N1cnZlJzogJ2xpbmVhcid9fX0lJQpncmFwaCBURDsKCV9fc3RhcnRfXyhbPHA+X19zdGFydF9fPC9wPl0pOjo6Zmlyc3QKCWRlY2lzaW9uX25vZGUoZGVjaXNpb25fbm9kZSkKCWNvbXBhcmlzb25fbm9kZShjb21wYXJpc29uX25vZGUpCglxYV9ub2RlKHFhX25vZGUpCgllbmRfbm9kZShbZW5kX25vZGVdKTo6Omxhc3QKCV9fc3RhcnRfXyAtLT4gZGVjaXNpb25fbm9kZTsKCWNvbXBhcmlzb25fbm9kZSAtLT4gZGVjaXNpb25fbm9kZTsKCXFhX25vZGUgLS0+IGRlY2lzaW9uX25vZGU7CglkZWNpc2lvbl9ub2RlIC0uICZuYnNwO2NvbXBhcmlzb24mbmJzcDsgLi0+IGNvbXBhcmlzb25fbm9kZTsKCWRlY2lzaW9uX25vZGUgLS4gJm5ic3A7cmFnJm5ic3A7IC4tPiBxYV9ub2RlOwoJZGVjaXNpb25fbm9kZSAtLiAmbmJzcDtlbmQmbmJzcDsgLi0+IGVuZF9ub2RlOwoJY2xhc3NEZWYgZGVmYXVsdCBmaWxsOiNmMmYwZmYsbGluZS1oZWlnaHQ6MS4yCgljbGFzc0RlZiBmaXJzdCBmaWxsLW9wYWNpdHk6MAoJY2xhc3NEZWYgbGFzdCBmaWxsOiNiZmI2ZmMK?type=png&bgColor=!white (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer 

In [11]:
initial_state = GraphState(document_texts=document_texts) 
# workflow.invoke(initial_state)

for event in workflow.stream(initial_state):
    for key,value in event.items():
        print(key,value)
        

C:\Users\2781419\AppData\Local\Temp\ipykernel_23104\2012341382.py:4: PydanticDeprecatedSince20: The `copy` method is deprecated; use `model_copy` instead. See the docstring of `BaseModel.copy` for details about how to handle `include` and `exclude`. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  state=state.copy(update={"query":query})


{'hi'}
{'classifier': 'rag'}
decision_node {'query': 'hi', 'flow_type': 'rag', 'document_texts': ['This is a sample contract. The terms include payment within 30 days and a penalty clause.', 'This is a sample agreement. It states that payment should be made within 30 days and includes a penalty section.']}


C:\Users\2781419\AppData\Local\Temp\ipykernel_23104\2012341382.py:15: PydanticDeprecatedSince20: The `copy` method is deprecated; use `model_copy` instead. See the docstring of `BaseModel.copy` for details about how to handle `include` and `exclude`. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  state=state.copy(update={"flow_type":flow_type})
  memory = ConversationBufferMemory(memory_key="chat_history")
  response = chain.run(state.query)
C:\Users\2781419\AppData\Local\Temp\ipykernel_23104\2012341382.py:34: PydanticDeprecatedSince20: The `copy` method is deprecated; use `model_copy` instead. See the docstring of `BaseModel.copy` for details about how to handle `include` and `exclude`. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  return state.copy(update={"rag_answer": response})


qa_node {'query': 'hi', 'flow_type': 'rag', 'document_texts': ['This is a sample contract. The terms include payment within 30 days and a penalty clause.', 'This is a sample agreement. It states that payment should be made within 30 days and includes a penalty section.'], 'rag_answer': 'Hello! How can I assist you today?'}


C:\Users\2781419\AppData\Local\Temp\ipykernel_23104\2012341382.py:4: PydanticDeprecatedSince20: The `copy` method is deprecated; use `model_copy` instead. See the docstring of `BaseModel.copy` for details about how to handle `include` and `exclude`. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  state=state.copy(update={"query":query})


{'end'}
{'classifier': 'end'}
decision_node {'query': 'end', 'flow_type': 'end', 'document_texts': ['This is a sample contract. The terms include payment within 30 days and a penalty clause.', 'This is a sample agreement. It states that payment should be made within 30 days and includes a penalty section.'], 'rag_answer': 'Hello! How can I assist you today?'}
workflow has ended
end_node {'query': 'end', 'flow_type': 'end', 'document_texts': ['This is a sample contract. The terms include payment within 30 days and a penalty clause.', 'This is a sample agreement. It states that payment should be made within 30 days and includes a penalty section.'], 'rag_answer': 'Hello! How can I assist you today?'}


C:\Users\2781419\AppData\Local\Temp\ipykernel_23104\2012341382.py:15: PydanticDeprecatedSince20: The `copy` method is deprecated; use `model_copy` instead. See the docstring of `BaseModel.copy` for details about how to handle `include` and `exclude`. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  state=state.copy(update={"flow_type":flow_type})
