In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_chroma import Chroma
from langchain_classic.text_splitter import RecursiveCharacterTextSplitter
from Models.groq import llm
from langchain_huggingface import HuggingFaceEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def loaded_docs(path_name):
    loader=PyPDFLoader(path_name)
    document = loader.load()
    return document

In [3]:
path=r"C:\Users\aswin\Projects\GenworxTraining\Self_Learn\AI\LangChain\Unit 1 SPM.pdf"
path2=r"C:\Users\aswin\Projects\GenworxTraining\Self_Learn\AI\LangChain\Unit 2 SPM.pdf"
path3=r"C:\Users\aswin\Projects\GenworxTraining\Self_Learn\AI\LangChain\SPM unit-3 .pdf"
document=loaded_docs(path)
document2=loaded_docs(path2)
document3=loaded_docs(path3)

In [4]:
def get_chunks(splitter,document):
    return splitter.split_documents(document)

In [5]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks = get_chunks(splitter,document)
chunks2= get_chunks(splitter,document2)
chunks3= get_chunks(splitter,document3)

In [6]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(
    documents=chunks+chunks2+chunks3,
    embedding=embeddings,
    persist_directory="./chroma_db",
    collection_metadata={"hnsw:space":"cosine"}
)

In [7]:
# query="where the case study of Application of Project Management Methodologies? and want the page number in which this topic is present"
# query="Agile is like building a puzzle"
# query=" Waterfall in Construction Projects"
# query="Compare waterfall and agile technique in software projects"

# query="Forward Pass & Backward Pass techniques"
# query="what i can infer from the Dynamic System Development Method?"
query="summarize Principles of Agile model and also compare PERT Chart and Gantt Chart"

In [8]:
from langchain_core.prompts import ChatPromptTemplate,PromptTemplate
from langchain_core.runnables import RunnablePassthrough
retriever = vectordb.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 20,"filter":{"page":{"$gte":0}},"lambda_mult":0.5,"fetch_k":50}
)

SYSTEM_PROMPT="""
You are currently summarizing the contents from a unversity_notes for SOFTWARE PROJECT MANAGEMENT
for this you are given the top chunks related to that quesiton and the actual question
"""
prompt = PromptTemplate(
    input_variables=["context","question"],
    template=
    """Answer using the context below.
    Context:
    {context}

    Question:
    {question}
    """
)
docs=retriever.invoke(query)

def get_context(docs):
    return "\n\n".join(f"""
    [Document {idx}]
    Source: {doc.metadata["source"]}
    Page: {doc.metadata["page"]}
    Section: {doc.metadata.get("section", "N/A")}
    Content:
    {doc.page_content}
    """.strip()for idx, doc in enumerate(docs, 1))


context = get_context(docs)
rag_chain=prompt | llm
result=rag_chain.invoke({"context":context,"question":query})