In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def locate_and_split(filepath : str):
    loader = PyPDFLoader(file_path=filepath)
    docs = loader.load_and_split()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
    chunks = text_splitter.split_documents(documents=docs)
    print("Chunking...")
    return chunks

In [None]:
chunks = locate_and_split(r"D:\Project\RAG\docs\NIPS-2017-attention-is-all-you-need-Paper.pdf")
chunks[0].metadata['title']

Chunking...


{'producer': 'PyPDF2',
 'creator': 'PyPDF',
 'creationdate': '',
 'subject': 'Neural Information Processing Systems http://nips.cc/',
 'publisher': 'Curran Associates, Inc.',
 'language': 'en-US',
 'created': '2017',
 'eventtype': 'Poster',
 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existing best ensemble result by over 1 BLEU. On English-to-

In [4]:
from langchain_core.messages import BaseMessage
from langgraph.graph import StateGraph, START, END
from typing import TypedDict, Annotated, List
from operator import add
from langchain_core.documents import Document


from HybridSearchRAG.model import GenModel
from HybridSearchRAG.retriever import get_hybrid_retriever
from HybridSearchRAG.prompts import GRADE_DOCS, FINAL_ANSWER

llm = GenModel()
retriever = get_hybrid_retriever()

class AgentState(TypedDict):
    messages : List[BaseMessage]
    question : str
    documents : List[Document]
    relevance : str
    result : str

def retriever_node(state : AgentState) -> AgentState:
    print("--- RETRIEVING DOCUMENTS ---")
    query = state['question']
    result = retriever.invoke(query)
    print(f"--- RETRIEVED {len(result)} DOCUMENTS ---")
    return {"documents" : result}

def grade_docs(state : AgentState) -> AgentState:
    print("--- GRADING DOCUMENTS ---")  
    question = state["question"]
    docs = state["documents"]
    prompt = GRADE_DOCS.format_prompt(question=question, documents=docs)

    result = llm.invoke(prompt)
    if "yes" in result.lower():
        print("--- GRADE: DOCUMENTS ARE RELEVANT ---")
        return {"relevance": "YES"}
    else:
        print("--- GRADE: DOCUMENTS ARE NOT RELEVANT ---")
        return {"relevance": "NO"}
    
def generation(state : AgentState) -> AgentState:
    print("--- GENERATING ANSWER ---")

    query = state["question"]
    docs = state['documents']
    formatted_docs = "\n\n".join(doc.page_content for doc in docs)
    prompt = FINAL_ANSWER.format_prompt(context=formatted_docs, question=query)

    result = llm.invoke(prompt)
    print("--- ANSWER GENERATED ---")

    return {"result" : result}

def should_continue(state : AgentState):
    relevance = state["relevance"]

    if "yes" in relevance.lower().strip():
        return "CONTINUE"
    return "END"

graph = StateGraph(AgentState)

graph.add_node("RETRIEVER NODE", retriever_node)
graph.add_node("GRADE NODE", grade_docs)
graph.add_node("GENERATION NODE", generation)

graph.add_edge(START, "RETRIEVER NODE")
graph.add_edge("RETRIEVER NODE", "GRADE NODE")

graph.add_conditional_edges(
    "GRADE NODE",
    should_continue,
    {
        "CONTINUE" : "GENERATION NODE",
        "END" : END
    }
)

graph.add_edge("GENERATION NODE", END)

app = graph.compile()
app


ImportError: cannot import name 'GenModel' from 'HybridSearchRAG.model' (d:\Project\RAG\HybridSearchRAG\model.py)