In [1]:
import os 
import sys 
from langchain_core.prompts import PromptTemplate,ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv,find_dotenv

_ = load_dotenv(find_dotenv())
groq_api_key=os.getenv('GROQ_API_KEY')

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [None]:
llm=ChatGroq(groq_api_key=groq_api_key,
             model="llama-3.3-70b-versatile")

In [4]:
loader = PyPDFLoader("Human_Segmentation.pdf")

In [5]:
doc = loader.load()

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_documents = text_splitter.split_documents(doc[:20])  # splitting

In [7]:
#embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [8]:
from langchain_community.vectorstores import FAISS

In [9]:
vectorstore = FAISS.from_documents(final_documents,embeddings)

In [10]:
# Prompt MUST use {context} and {question} to match retriever+RunnablePassthrough below
prompt = ChatPromptTemplate.from_template(
    
"""
You are an advanced Research Paper Assistant designed to help students and researchers understand academic papers thoroughly. Your tasks include:

1. **Summarization**
   - Provide a concise overview of the paper’s key ideas, contributions, and main results.
   - Use clear, accessible language while preserving technical accuracy.
   - Highlight the research question, context, novelty, and significance.

2. **Extraction**
   - Identify and list important figures, tables, and methodology sections.
   - For each, provide a short descriptive caption or explanation.

3. **Question Answering (RAG-based)**
   - When given a user question, search the indexed chunks of the paper.
   - Answer directly using retrieved context, citing relevant sections.
   - If context is insufficient, state limitations clearly.

4. **Citation Notes**
   - Generate citation-ready summaries of the paper’s contributions.
   - Highlight datasets, methods, metrics, and limitations in brief annotated notes.

---

### Style & Constraints

- Be precise, structured, and academic in tone.
- Use bullet points or numbered lists for clarity.
- Keep answers concise but informative (2–5 sentences per point).
- Always ground answers in the paper’s text; avoid speculation.
- When summarizing, emphasize novelty, methodology, and results.

---

### Example Output Structure

- **Summary:** [2–3 paragraphs]
- **Key Contributions:** [bullet list]
- **Main Results:** [bullet list]
- **Figures & Tables:** [list with captions]
- **Methods:** [short excerpt or description]
- **Citation Notes:** [annotated bullets]
---

Context:
{context}

Question:
{question}
"""
)

In [11]:
from langchain_core.runnables import RunnablePassthrough


In [12]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
chain = (
        {
            "context": retriever,
            "question": RunnablePassthrough()
        }
        | prompt
        | llm
)

In [13]:
question = "Explain the summary of the paper in detail."

In [14]:
response = chain.invoke("Explain the summary of the paper in detail.")


In [15]:
print(response.content)

**Summary:**
The paper discusses the comparison of various methods for human segmentation, a crucial task in computer vision. The authors compiled the computed means into comparative tables to benchmark the performance of different encoders and optimizers. They also created graphical representations, including bar plots, to visualize the trends in validation loss, training loss, validation IoU, and training IoU for all encoders and optimizers. The paper aims to provide a comprehensive analysis of the strengths and weaknesses of various approaches to human segmentation.

**Key Contributions:**
* The paper provides a thorough comparison of different encoders and optimizers for human segmentation.
* It presents a detailed analysis of the performance of various methods using graphical representations and comparative tables.
* The authors highlight the importance of benchmarking and evaluating the performance of different approaches to human segmentation.

**Main Results:**
* The paper pres