In [92]:
import os
os.environ["COHERE_API_KEY"] = '****************************************'
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] ='*******************************************'

In [93]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

#loading the documents from the folder
loader = PyPDFDirectoryLoader("data/")
docs = loader.load()
split_text=RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512,
    chunk_overlap=0
)

split_doc=split_text.split_documents(docs)

In [94]:
from langchain_cohere import CohereEmbeddings
from langchain_community.vectorstores import Chroma

cembed=CohereEmbeddings(model="embed-english-v3.0")

vec_store=Chroma.from_documents(
    documents=split_doc,
    embedding=cembed
)


In [95]:
retriever = vec_store.as_retriever()

In [96]:
from pydantic import BaseModel, Field


#ROuter

In [97]:
from langchain_cohere import ChatCohere
from langchain_core.prompts import ChatPromptTemplate
# from langchain_core.pydantic_v1 import BaseModel,Field

class search_web(BaseModel):
    """Use web_search for all general queries not covered in the documents."""
    query: str = Field(description="The query to search on the internet.")

class vector_store(BaseModel):
    """Use vectorstore for questions on Agentic AI."""
    query: str = Field(description="The query to retrieve from the RAG vectorstore.")



    

In [98]:
preamble = """You are a router. Direct the user query to the appropriate tool based on topic relevance.

Use the **vectorstore** if the user’s question relates to any of the following topics discussed in the documents:

- Agentic AI systems and architectures
- The role and future of Small Language Models (SLMs) in Agentic AI
- Economic, legal, and operational implications of deploying Agentic AI
- Prompt engineering, tool use, and modular AI agent design
- Adversarial attacks or robustness concerns in LLM-based agents

Use **web_search** if the question is unrelated to these domains or requires real-time, general, or external web information.

"""


In [61]:
llm = ChatCohere(model="command-r",temperature=0)

llm_router=llm.bind_tools(
    tools=[search_web,vector_store] , preamble=preamble
)

#This is testing purpose

In [62]:
prompt= ChatPromptTemplate.from_messages([("human", "{question}")])

In [63]:
ques_chain = prompt | llm_router

In [64]:
response = ques_chain.invoke(
    {"question": "Who owns the content made by agentic AI?"}
)
print(response.response_metadata["tool_calls"])



[{'id': 'vector_store_baa4m0hq11qc', 'type': 'function', 'function': {'name': 'vector_store', 'arguments': '{"query":"content ownership"}'}}]


In [99]:
from typing import Literal


In [100]:
### Retrieval Grader


# Data model
class DocGrade(BaseModel):
    relevance_label: Literal["yes", "no"] = Field(
        description="Only respond with 'yes' or 'no' — is the document relevant to the question?"
    )



In [101]:
preamble = """
You are a grader assessing the relevance of a retrieved document to a user question.

Relevance means that the document either:
- Contains **keywords or phrases** from the question, OR
- Has **semantic similarity** in meaning or intent to the question.

You must assign a **binary relevance score**:
- Respond `"yes"` if the document directly addresses or answers the question.
- Respond `"no"` if it does not clearly relate to the question.

Respond with only `"yes"` or `"no"` — no explanations or other values are allowed.
"""


In [102]:

# LLM with function call
llm = ChatCohere(model="command-r", temperature=0)
structured_llm_grader = llm.with_structured_output(DocGrade, preamble=preamble)

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader
question = "According to U.S. Copyright Office policy in 2023, can works generated solely by AI be copyrighted"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
response = retrieval_grader.invoke({"question": question, "document": doc_txt})
print(response)


relevance_label='yes'


In [103]:
print(doc_txt)

thereby creating significant ambiguity regarding the ownership of outputs from fully autonomous 
agentic AI systems.
A case in point is DABUS, an AI system that generated novel inventions (a food container 
and a flashing beacon). Patent applications naming DABUS as the inventor triggered legal battles 
around the world. Thus far, patent offices and courts in major jurisdictions (US, UK, EU) have 
rejected AI inventorship, insisting that inventors must be natural persons. For instance, the US 
Federal Circuit in Thaler v. Vidal (2022) affirmed that under current statutes, only humans can be 
inventors.2 The European Patent Office and UK Patent Office reached similar conclusions.3 
However, notable outliers exist: South Africa granted a patent with DABUS as inventor (albeit via a 
formality with no substantive examination).4 In Australia, the Federal Court (2021) initially ruled 
that AI could be an inventor under its law,5 but this was later overturned on appeal by the Full 
Federal Co

In [104]:
from langchain_core.messages import HumanMessage
from langchain_core.output_parsers import StrOutputParser

preamble = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise."""


llm = ChatCohere(model="command-r",temperature=0).bind(preamble=preamble)

In [105]:
def prompt(x):
    return ChatPromptTemplate.from_messages(
        [
            HumanMessage(
                f"Question: {x['question']} \nAnswer: ",
                additional_kwargs={"documents": x["documents"]},
            )
        ]
    )

In [106]:
gen_chain = prompt | llm | StrOutputParser()

generation = gen_chain.invoke({"documents": docs, "question": question})
print(generation)

According to U.S. Copyright Office policy, works generated solely by AI have, historically, been tied to the notion of a human creator and are therefore not afforded the same copyright protections as human-created works. The involvement of AI in the creative process has led to significant ambiguity over ownership rights, with courts in major jurisdictions rejecting AI inventorship. However, the law on this matter remains unclear.


In [107]:
preamble = """
You are an assistant for question-answering tasks.

Answer the user's question based on your general knowledge only — do not assume access to any external documents or context.

Keep your answer concise and limited to a maximum of three sentences.
If you are unsure or lack enough information, say "I don't know."

"""

def prompt(x):
    return ChatPromptTemplate.from_messages(
        [HumanMessage(f"Question: {x['question']} \nAnswer: ")]
    )

llm_chain_fall_back = prompt | llm | StrOutputParser()

# Run
question_fall_back = "Hi how are you?"
generation_fall_back = llm_chain_fall_back.invoke({"question": question_fall_back})
print(generation_fall_back)


Hello! I'm doing well, thank you for asking. How can I assist you today?


In [108]:

class GradeHallucinations(BaseModel):
    """Binary score for hallucination presence in the generated answer."""
    binary_label: Literal["yes", "no"] = Field(
        description="Respond only with 'yes' if the answer is grounded in the facts, 'no' if it is not"
    )


preamble = """
You are a grader assessing whether a language model's answer is grounded in the given set of facts.

Respond:
- 'yes' → if the answer is fully supported by the facts.
- 'no' → if any part of the answer is not supported by the facts.

You MUST respond only with 'yes' or 'no'. Do not add any explanation or alternative labels.
"""


# LLM with function call
llm = ChatCohere(model="command-r", temperature=0)
structured_llm_grader = llm.with_structured_output(
    GradeHallucinations, preamble=preamble
)

# Prompt
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        # ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)
documents = "\n\n".join([doc.page_content for doc in docs])


hallucination_grader = hallucination_prompt | structured_llm_grader
hallucination_grader.invoke({"documents": documents, "generation": generation})

GradeHallucinations(binary_label='yes')

In [109]:
print(documents)

thereby creating significant ambiguity regarding the ownership of outputs from fully autonomous 
agentic AI systems.
A case in point is DABUS, an AI system that generated novel inventions (a food container 
and a flashing beacon). Patent applications naming DABUS as the inventor triggered legal battles 
around the world. Thus far, patent offices and courts in major jurisdictions (US, UK, EU) have 
rejected AI inventorship, insisting that inventors must be natural persons. For instance, the US 
Federal Circuit in Thaler v. Vidal (2022) affirmed that under current statutes, only humans can be 
inventors.2 The European Patent Office and UK Patent Office reached similar conclusions.3 
However, notable outliers exist: South Africa granted a patent with DABUS as inventor (albeit via a 
formality with no substantive examination).4 In Australia, the Federal Court (2021) initially ruled 
that AI could be an inventor under its law,5 but this was later overturned on appeal by the Full 
Federal Co

In [110]:
print(question)

According to U.S. Copyright Office policy in 2023, can works generated solely by AI be copyrighted


In [111]:
print(generation)

According to U.S. Copyright Office policy, works generated solely by AI have, historically, been tied to the notion of a human creator and are therefore not afforded the same copyright protections as human-created works. The involvement of AI in the creative process has led to significant ambiguity over ownership rights, with courts in major jurisdictions rejecting AI inventorship. However, the law on this matter remains unclear.


In [112]:
class AnswerGrade(BaseModel):
    """
    Evaluates if the answer addresses the user’s question.
    """
    label: Literal["yes", "no"] = Field(
        description="Only return 'yes' or 'no'"
    )


preamble = """
You are a grader checking if an answer correctly responds to a user's question.

Respond:
- 'yes' if the answer  relates to or addresses the question, even if it's not perfect.
- 'no' if the answer is off-topic or unrelated to the question given.

Only reply with 'yes' or 'no'.
"""



llm = ChatCohere(model="command-r", temperature=0)
answer_llm = llm.with_structured_output(AnswerGrade, preamble=preamble)


answer_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a grader. Respond only with 'yes' or 'no'."),
    ("human", "User question: Can AI-generated images be copyrighted?\nLLM generation: AI-generated images without human input cannot be copyrighted.\nAnswer:"),
    ("ai", "yes"),
    ("human", "User question: What is the capital of France?\nLLM generation: Apples are red and green.\nAnswer:"),
    ("ai", "no"),
    ("human", "User question: \n\n{question}\n\nLLM generation: \n\n{generation}")
])


answer_chain = answer_prompt | answer_llm
result = answer_chain.invoke({"question": question, "generation": generation})

In [113]:
print("=== QUESTION ===")
print(question)
print("=== GENERATION ===")
print(generation)
print("=== RESULT ===")
print(result.label)


=== QUESTION ===
According to U.S. Copyright Office policy in 2023, can works generated solely by AI be copyrighted
=== GENERATION ===
According to U.S. Copyright Office policy, works generated solely by AI have, historically, been tied to the notion of a human creator and are therefore not afforded the same copyright protections as human-created works. The involvement of AI in the creative process has led to significant ambiguity over ownership rights, with courts in major jurisdictions rejecting AI inventorship. However, the law on this matter remains unclear.
=== RESULT ===
yes


In [114]:
print(llm.invoke("User question: " + question + "\nLLM generation: " + generation + "\nAnswer:"))


content="That's correct. The U.S. Copyright Office does not currently afford copyright protection to works generated solely by AI. The office maintains the position that copyright protection extends only to works created by human authors, with AI involvement seen as an ambiguity that courts in many jurisdictions have also rejected. This is a rapidly evolving area of law, but the current policy leans towards the human creator being the only recognized copyright owner." additional_kwargs={'id': 'a426eced-f7aa-4610-94d0-0a07db910038', 'finish_reason': 'COMPLETE', 'content': "That's correct. The U.S. Copyright Office does not currently afford copyright protection to works generated solely by AI. The office maintains the position that copyright protection extends only to works created by human authors, with AI involvement seen as an ambiguity that courts in many jurisdictions have also rejected. This is a rapidly evolving area of law, but the current policy leans towards the human creator b

In [115]:
os.environ['TAVILY_API_KEY'] ='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'


In [136]:

from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults()

In [137]:
from typing import List
from typing_extensions import TypedDict

class GraphState(TypedDict):
    """
    State of the graph
    question
    llm generation
    documents
    """

    question:str
    generation:str
    documents:List[str]

In [138]:
def DocRetrieve(state):
    print("Retriever State")
    question = state["question"]

    docs = retriever.invoke(question)
    return {"documents":docs,"question":question}

In [139]:
def fallback_llm(state):
    print("llm fallback")
    question = state["question"]
    gen=llm_chain_fall_back.invoke({"question":question})
    return {"question":question,"generation":gen}

In [195]:
def AnswerGenerate(state):
    print("Answer Generator")
    
    question = state["question"]
    docs = state["documents"]

    # if not isinstance(docs, list):
    #     docs = [docs]

    # # Convert document list to plain text
    # doc_text = "\n\n".join([doc.page_content for doc in docs])

    # # Call generation chain
    gen = gen_chain.invoke({"documents": docs, "question": question})

    return {
        "documents": docs,
        "question": question,
        "generation": gen
    }


In [196]:
def DocumentGrader(state):
    print("Document Grader")
    question=state["question"]
    docs=state['documents']

    relevant_docs=[]
    for doc in docs:
        label =retrieval_grader.invoke({"question":question , "document":doc.page_content})
        
        if label.relevance_label == "yes":
            print("Relevant Document")
            relevant_docs.append(doc)
        else:
            print("Not relevant")
            continue
    return{"documents":relevant_docs,"question":question}            

In [197]:

def Search_web(state):
   

    print("web search")
    question = state["question"]


    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)

    return {"documents": web_results, "question": question}


In [198]:
#edges

In [206]:
def QuestionRouter(state):
    print("Question Router")
    question=state["question"]

    decision = ques_chain.invoke({"question" : question})

    if "tool_calls" not in decision.additional_kwargs:
        print("Routing to LLM")
        return "llm_fallback"
    if len(decision.additional_kwargs["tool_calls"]) == 0:
        raise "Router could not decide source"

    store = decision.additional_kwargs["tool_calls"][0]["function"]["name"]
    if store == "search_web":
        print("WEB SEARCH")
        return "web_search"
    elif store == "vector_store":
        print("RAG")
        return "vectorstore"
    else:
        print("LLM")
        return "llm_fallback"

In [207]:
def DecisionGenerate(state):
    print("Decision to Generate or websearch")

    rel_doc = state["documents"]

    if not rel_doc:
        print("Web Search")
        return "web_search"
    else:
        print("Generate")
        return "generate"


In [208]:
def GradeHallucination_answer(state):
   
    print("Hallucination Checker Module")
    question = state["question"]
    docs = state["documents"]
    gen = state["generation"]

    doc_str = "\n\n".join([doc.page_content for doc in docs])


    score = hallucination_grader.invoke(
        {"documents": doc_str, "generation": gen}
    )
    grade = score.binary_label

    if grade == "yes":
        print("Generation is Grounded")
        
        print("Answer vs question")
        score = answer_chain.invoke({"question": question, "generation": generation})
        grade = score.label
        if grade == "yes":
            print("Answer Address Question")
            return "useful"
        else:
            print("Answer does not Address Question")
            return "not useful"
    else:
        print("Not Grounded")
        return "not supported"


In [209]:
import pprint
from langgraph.graph import END, StateGraph, START

# Define your graph with the shared state class
workflow = StateGraph(GraphState)

#  Node Definitions
workflow.add_node("web_search", search_web)  
workflow.add_node("retrieve", DocRetrieve)  
workflow.add_node("grade_documents", DocumentGrader)  
workflow.add_node("generate", AnswerGenerate) 
workflow.add_node("llm_fallback", fallback_llm)  

workflow.add_conditional_edges(
    START,
    QuestionRouter,
    {
        "web_search": "web_search",
        "vectorstore": "retrieve",  
        "llm_fallback": "llm_fallback", 
    },
)

workflow.add_edge("web_search", "retrieve")

workflow.add_edge("retrieve", "grade_documents")

workflow.add_conditional_edges(
    "grade_documents",
    DecisionGenerate,
    {
        "web_search": "web_search",       
        "generate": "generate",          
    },
)

workflow.add_conditional_edges(
    "generate",
    GradeHallucination_answer,
    {
        "not supported": "generate",   
        "not useful": "web_search",    
        "useful": END,                 
    },
)

workflow.add_edge("llm_fallback", END)

app = workflow.compile()


In [204]:
inputs = {
    "question": "Who owns the content made by agentic AI"
}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint.pprint(f"Node '{key}':")
        # Optional: print full state at each node
    pprint.pprint("\n---\n")

# Final generation
pprint.pprint(value["generation"])


Question Router
RAG
Retriever State
"Node 'retrieve':"
'\n---\n'
Document Grader
Relevant Document
Relevant Document
Relevant Document
Relevant Document
Decision to Generate or websearch
Generate
"Node 'grade_documents':"
'\n---\n'
Answer Generator
Hallucination Checker Module
Generation is Grounded
Answer vs question
Answer Address Question
"Node 'generate':"
'\n---\n'
('The ownership of content generated by agentic AI is a legal ambiguity. '
 'Current frameworks suggest the researcher or influencer who commissioned it '
 "might own the content, but this is a challenge that's reshaping law. AI "
 'service providers and AIs themselves might also claim ownership.')


In [211]:

inputs = {"question": "Hello, how are you today?"}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint.pprint(f"Node '{key}':")
    pprint.pprint("\n---\n")

# Final generation
pprint.pprint(value["generation"])

Question Router
Routing to LLM
llm fallback
"Node 'llm_fallback':"
'\n---\n'
("Hello! I'm doing well, thank you for asking. How about you? I hope you're "
 'having a fantastic day!')
