In [178]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_chroma import Chroma
from langchain_classic.text_splitter import RecursiveCharacterTextSplitter
from Models.groq import llm
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.agents import create_agent


In [179]:
def loaded_docs(path_name):
    loader=PyPDFLoader(path_name)
    document = loader.load()
    return document

In [180]:
path=r"C:\Users\aswin\Projects\GenworxTraining\Self_Learn\AI\LangChain\Unit 1 SPM.pdf"
path2=r"C:\Users\aswin\Projects\GenworxTraining\Self_Learn\AI\LangChain\Unit 2 SPM.pdf"
path3=r"C:\Users\aswin\Projects\GenworxTraining\Self_Learn\AI\LangChain\SPM unit-3 .pdf"
document=loaded_docs(path)
document2=loaded_docs(path2)
document3=loaded_docs(path3)

In [181]:
def get_chunks(splitter,document):
    return splitter.split_documents(document)

In [182]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks = get_chunks(splitter,document)
chunks2= get_chunks(splitter,document2)
chunks3= get_chunks(splitter,document3)

In [183]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(
    documents=chunks+chunks2+chunks3,
    embedding=embeddings,
    persist_directory="./chroma_db",
    collection_metadata={"hnsw:space":"cosine"}
)

In [184]:
# query="where the case study of Application of Project Management Methodologies? and want the page number in which this topic is present"
# query="Agile is like building a puzzle"
# query=" Waterfall in Construction Projects"
# query="Compare waterfall and agile technique in software projects"

# query="Forward Pass & Backward Pass techniques"
# query="what i can infer from the Dynamic System Development Method?"
query="summarize Principles of Agile model and also compare PERT Chart and Gantt Chart"

In [None]:
from langchain_core.prompts import ChatPromptTemplate,PromptTemplate
from langchain_core.runnables import RunnablePassthrough
retriever = vectordb.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 20,"filter":{"page":{"$gte":0}},"lambda_mult":0.5,"fetch_k":50}
)

SYSTEM_PROMPT="""
You are currently summarizing the contents from a unversity_notes for SOFTWARE PROJECT MANAGEMENT
for this you are given the top chunks related to that quesiton and the actual question
"""
prompt = PromptTemplate(
    input_variables=["context","question"],
    template=
    """Answer using the context below.
    Context:
    {context}

    Question:
    {question}
    """
)
docs=retriever.invoke(query)

def get_context(docs):
    return "\n\n".join(f"""
    [Document {idx}]
    Source: {doc.metadata["source"]}
    Page: {doc.metadata["page"]}
    Section: {doc.metadata.get("section", "N/A")}
    Content:
    {doc.page_content}
    """.strip()for idx, doc in enumerate(docs, 1))

def de_duplicate(docs):
    unique_docs=[]
    seen=set()
    for i in docs:
        cur_page=i.metadata["page"]
        cur_source=i.metadata['source']
        key=(cur_source,cur_page)
        if key not in seen:
            seen.add(cur_page)
            unique_docs.append(i)
    return unique_docs

unique_docs=de_duplicate(docs)
context = get_context(unique_docs)
rag_chain=prompt | llm
result=rag_chain.invoke({"context":context,"question":query})

In [None]:
result.content_blocks

[{'type': 'text',
  'text': "**Principles of Agile Model:**\n\nThe Agile model is a flexible and iterative approach to software development that emphasizes customer collaboration, continuous improvement, and rapid delivery of working software. The 12 Agile principles are:\n\n1. **Customer satisfaction**: Our highest priority is to satisfy the customer through early and continuous delivery of valuable software.\n2. **Welcome changing requirements**: Welcome changing requirements, even late in development. Agile processes harness change for the customer's competitive advantage.\n3. **Deliver working software**: Deliver working software frequently, from a couple of weeks to a couple of months, with a preference to the shorter timescale.\n4. **Business people and developers work together**: Business people and developers work together daily throughout the project.\n5. **Face-to-face conversation**: Face-to-face conversation is the most efficient and effective method of conveying informatio

In [194]:
context

"[Document 1]\n    Source: C:\\Users\\aswin\\Projects\\GenworxTraining\\Self_Learn\\AI\\LangChain\\SPM unit-3 .pdf\n    Page: 31\n    Section: N/A\n    Content:\n    It involves tracking how each task is going. Regularly check if tasks are on schedule, \naddress any delays, and adjust plans if needed. This ensures the project stays on track and \nany issues are resolved quickly. \nPERT Chart Vs Gantt Chart \nHere is a comparison of PERT and Gantt charts in a detailed way: \nFeature PERT Chart Gantt Chart \nPurpose Visualize project tasks and \ndependencies Schedule tasks over time\n\n[Document 2]\n    Source: C:\\Users\\aswin\\Projects\\GenworxTraining\\Self_Learn\\AI\\LangChain\\SPM unit-3 .pdf\n    Page: 30\n    Section: N/A\n    Content:\n    estimation of effort, selection of suitable process model for software development and \ndecomposition of tasks into subtasks. \n \nWhat does a PERT Chart Contain? \nHere are the main components of a PERT chart: \n\uf0b7 Nodes: it represents th