In [None]:
!pip3 install -qU langgraph langchain_google-genai langchain_community
!pip3 install --upgrade --quiet pypdf pandas==2.2.2 streamlit python-dotenv
!pip3 install --quiet --upgrade langchain
!pip3 install -qU langchain-google-genai
!pip3 install --upgrade --quiet  langchain-huggingface sentence_transformers
!pip3 install -qU chromadb


In [None]:
import os,getpass
key=os.environ["Gemini-2.0-flash"]=getpass.getpass("Enter You API key: ")

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import PromptTemplate
from langchain.pydantic_v1 import BaseModel, Field
from langchain_huggingface.embeddings import HuggingFaceEmbeddings


import tempfile
import streamlit as st
import pandas as pd
from dotenv import load_dotenv


In [None]:
llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash",
                       api_key=key,
                       temperature=0.4,
                       top_p=0.4
                       )

In [None]:
loader=PyPDFLoader("/content/IJAMR2304261.pdf")
pages=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,
                                             chunk_overlap=200,
                                             separators=["\n\n","\n"," "])
chunks=text_splitter.split_documents(pages)


In [None]:
def embedding_function():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    # Convert the list of Documents to a list of strings before embedding
    return embeddings
embedding_function=embedding_function()
texts = [doc.page_content for doc in chunks]
query_result = embedding_function.embed_documents(texts)  # Use embed_documents for multiple texts
    # If you need a single embedding for all the text, you can combine them first
    # all_text = " ".join(texts)
    # query_result = embeddings.embed_query(all_text)


In [None]:
# How embedding works
from langchain.evaluation import load_evaluator
evaluator = load_evaluator(evaluator="embedding_distance",
                           embeddings=embeddings) # Pass the instance to load_evaluator
evaluator.evaluate_strings(prediction)

In [None]:
import uuid
def create_vectorstore(chunks,embedding_function,vectorstore_path):
  # Changed 'docs' to 'chunks' to iterate through the provided chunks variable
  ids=[str(uuid.uuid5(uuid.NAMESPACE_DNS,doc.page_content))for doc in chunks]
  unique_ids=set()
  unique_chunks=[]
  for chunk,id in zip(chunks,ids):
    if id not in unique_ids:
      unique_ids.add(id)
      unique_chunks.append(chunk)
  vectorstore=Chroma.from_documents(documents=chunks,
                                  embedding=embedding_function,
                                  persist_directory="vectorstore")
  vectorstore.persist()

  return vectorstore

In [None]:
vectorstore=create_vectorstore(chunks=chunks,
                               embedding_function=embedding_function,
                               vectorstore_path="vectorstore_chroma")

In [None]:
vectorstore=Chroma(persist_directory="vectorstore_choma",
                   embedding_function=embedding_function)


In [None]:
retriever=vectorstore.as_retriever(search_type="similarity")
relevant_chunks=retriever.invoke("What is the title of the article? ")

In [None]:
# template=ChatPromptTemplate.from_messages(
#     [
#         ('system','You are an assistant for question-answering tasks.Use the following pieces of retrieved context to answer the question.if you donot know the answer,just say that you donot know,dont try to make up an answer.'),
#         ("human","Answer the question precisely based on the above context : {question}")
#     ]

# )

Prompt_Template="""
You are an assistant for question-answering tasks.Use the following pieces of retrieved context to answer the question.if you donot know the answer,just say that you donot know,dont try to make up an answer.

{context}

---
Answer the question precisely based on the above context : {question}
"""



In [None]:
context_text="\n\n--\n\n".join([doc.page_content for doc in relevant_chunks])
prompt_template=ChatPromptTemplate.from_template(template=Prompt_Template)
prompt= prompt_template.format(context=context_text,
                        question="What is the title of the article?")

prompt

In [None]:
llm.invoke(prompt)