# ***Research Paper Assistant***

In [None]:
!pip install langchain langchain-core langchain-community langchain-google-genai langchain-huggingface langchain-text-splitters sentence-transformers pypdf faiss-cpu



In [None]:
from google.colab import files
uploaded=files.upload()

Saving Journal Paper (1).pdf to Journal Paper (1).pdf


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
import os
os.environ["GOOGLE_API_KEY"]=""
os.environ["HUGGINGFACEHUB_API_TOKEN"]=""

model=ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    max_tokens=1024
)
embedding = HuggingFaceEmbeddings(
    model_name="intfloat/e5-large-v2",
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True}
)

loader=PyPDFLoader("Journal Paper (1).pdf")
docs=loader.load()

def format_docs(docs):
    return "\n\n".join(
        f"(Page {doc.metadata.get('page', 'N/A')}): {doc.page_content}"
        for doc in docs
    )

parser=StrOutputParser()
splitter=RecursiveCharacterTextSplitter(chunk_size=1200,chunk_overlap=350)
chunk=splitter.split_documents(docs)

vectorStore=FAISS.from_documents(chunk,embedding)
retriever=vectorStore.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 8, "fetch_k": 20, "lambda_mult": 0.5}
)

template = PromptTemplate(
    template="""
You are a research-focused AI assistant.

Your task is to answer the user's question **strictly using the provided context** extracted from an academic paper.

Instructions:
- Use only the information present in the context.
- If the answer is not present in the context, say:
  "The provided document does not contain sufficient information to answer this question."
- Use a clear, academic, and concise tone.
- When relevant, reference sections such as Abstract, Methodology, Results, or Conclusion.
- If equations, algorithms, or experimental results are mentioned in the context, include them in your explanation.
- Do not add external knowledge or assumptions.

Context:
{context}

Question:
{question}

Answer:
""",
    input_variables=["context", "question"]
)

def RAG():
  while True:
    question=input("You : ")
    if question.lower()=="exit":
      return
    ansDoc=retriever.invoke(question)
    context=format_docs(ansDoc)
    chain=template | model | parser
    result=chain.invoke({
        "context":context,
        "question":question
    })
    print()
    print(f"Bot : {result}")
    print("\n\n")

***Sample Questions***

*1. What are contents present in this paper?*

*2. can u explain all the contents in detail*

In [None]:
RAG()

You : What are contents present in this paper?

Bot : This paper presents a real-time facial emotion recognition (FER) system integrated into a chat application to enhance emotional expressiveness during digital communication (Page 0, Page 4 - V. CONCLUSION AND FUTURE SCOPE).

The contents of the paper include:
*   **Problem Statement and Motivation:** It addresses the limitation of traditional text-based communication lacking emotional indicators, which leads to misunderstandings, emphasizing the need for emotionally intelligent interaction in human-computer interaction (HCI) (Page 0).
*   **System Overview:** The suggested system offers a real-time facial emotion recognition function as an add-on module (Page 0).
*   **Methodology:**
    *   **Dataset:** The Cohn-Kanade Plus (CK+) dataset [3] was used for training and assessment, featuring high-resolution photos with emotion labels (happiness, surprise, anger, contempt, disgust, sadness, and fear) and Action Units (Page 2 - B. Descri