In [60]:
from langgraph.graph import START, END, StateGraph
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from CONFIG import GROQ_MODEL, OPENAI_EMBEDDED_MODEL
from langchain_community.vectorstores import FAISS
from typing import TypedDict, List
from langchain_core.prompts import ChatPromptTemplate

In [48]:
load_dotenv()

True

In [49]:
# MODEL loaded

embedded_model = OpenAIEmbeddings(model=OPENAI_EMBEDDED_MODEL)
llm = ChatGroq(model=GROQ_MODEL)

In [50]:
# PDF loading

pdf = PyPDFLoader(file_path='A:\AI_Projects_Practice\CRAG\The_Evolution_of_AI_in_Dubai.pdf')
pdf_loaded = pdf.load()
print(f"PDF has {len(pdf_loaded)} pages")

PDF has 8 pages


In [51]:
# Chunking

chunks = RecursiveCharacterTextSplitter(chunk_size=900, chunk_overlap=100).split_documents(pdf_loaded)
print(f"Total chunks are {len(chunks)}")

Total chunks are 24


In [56]:
# Stored in vector database (FAISS)

vectore_storage = FAISS.from_documents(chunks, embedded_model)

In [57]:
# Retrieved based on similarity (k=2)

retriever = vectore_storage.as_retriever(serach_type='similarity', serach_kwargs={'k': 2})

In [58]:
class state(TypedDict):
    question: str
    answer: str
    doc: List[str]

In [59]:
def retrieved_str(state: state) -> str:
    q = state['question']
    return {'question': retriever.invoke(q)}

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "Answer only from the context. If not in context, say you don't know."),
        ("human", "Question: {question}\n\nContext:\n{context}"),
    ]
)

def generate(state):
    context = "\n\n".join(d.page_content for d in state["doc"])
    out = (prompt | llm).invoke({"question": state["question"], "context": context})
    return {"answer": out.content}