# Install Libraries

In [None]:
pip install langchain google-generativeai huggingface-hub faiss-cpu chromadb


In [None]:
import os
from dotenv import load_dotenv

# loading environment variables from .env file
load_dotenv()

google_api_key = os.getenv("GOOGLE_API_KEY")
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

print("Google API Key:", google_api_key)


In [None]:
!pip install -U langchain-community

In [None]:
!pip install google-generativeai


In [None]:
# ✅ Required Imports
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
import os

# ✅ Load API key from .env
load_dotenv()
google_api_key = os.getenv("GOOGLE_API_KEY")

if not google_api_key:
    raise ValueError("❌ GOOGLE_API_KEY not found in .env file")

# ✅ Step 1: Load PDF
loader = PyMuPDFLoader("NIPS-2017-attention-is-all-you-need-Paper.pdf")
raw_docs = loader.load()

# ✅ Step 2: Split documents into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = splitter.split_documents(raw_docs)

# ✅ Step 3: Generate Embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embedding_model)
retriever = vectorstore.as_retriever()

# ✅ Step 4: Set up Gemini model
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=google_api_key,
    convert_system_message_to_human=True
)

# ✅ Step 5: Create RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

# ✅ Step 6: Run a query
query = "What is the summary of section 2?"
result = qa_chain.invoke(query)

# ✅ Print result
print("\n📘 Answer:", result["result"])
print("\n📄 Source(s):")
for doc in result["source_documents"]:
    print("-", doc.metadata.get("page", "No page info"), ":", doc.page_content[:200], "...")


In [None]:
# LangChain LLM wrapper for Gemini
from langchain_google_genai import ChatGoogleGenerativeAI

# Direct Gemini Api Testing

In [None]:
import os
import google.generativeai as genai
from dotenv import load_dotenv

# Load your API key
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

genai.configure(api_key=GOOGLE_API_KEY)

# Initialize Gemini model
model = genai.GenerativeModel("gemini-2.0-flash")

# Run a prompt
response = model.generate_content("Summarize the benefits of AI in agriculture.")

print("Response:\n", response.text)


# Install these Libraries

In [None]:
!pip install \
  langchain==0.1.17 \
  langchain-core==0.1.52 \
  langchain-community==0.0.38 \
  langchain-google-genai==0.0.6 \
  google-generativeai==0.3.2 \
  faiss-cpu==1.7.4 \
  unstructured==0.12.6 \
  pypdf==4.2.0 \
  sentence-transformers==2.2.2



In [None]:
# ✅ Imports
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
import os

In [None]:
# ✅ Load your API key from .env file
load_dotenv()
google_api_key = os.getenv("GOOGLE_API_KEY")

# ✅ Check for API key before proceeding
if not google_api_key:
    raise ValueError("GOOGLE_API_KEY not found in .env file!")

# ✅ Load PDF document using a compatible loader
loader = PyMuPDFLoader("NIPS-2017-attention-is-all-you-need-Paper.pdf")  # Make sure this file is in the same folder or give full path
documents = loader.load()

In [None]:
!pip uninstall -y numpy transformers
!pip install numpy==1.24.4
!pip install transformers --upgrade


In [None]:
!pip install sentence-transformers


In [None]:
# ✅ Imports
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
import os

# ✅ Load your API key from .env file
load_dotenv()
google_api_key = os.getenv("GOOGLE_API_KEY")

# ✅ Check for API key before proceeding
if not google_api_key:
    raise ValueError("GOOGLE_API_KEY not found in .env file!")

# ✅ Load PDF document using a compatible loader
loader = PyMuPDFLoader("NIPS-2017-attention-is-all-you-need-Paper.pdf")  # Make sure this file is in the same folder or give full path
documents = loader.load()

# ✅ Split the text into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = splitter.split_documents(documents)

# ✅ Generate embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# ✅ Store in FAISS
db = FAISS.from_documents(docs, embedding_model)
retriever = db.as_retriever()

# ✅ Setup Gemini LLM
llm = GoogleGenerativeAI(model="models/gemini-2.0-flash", google_api_key=google_api_key)

# ✅ Build the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

# ✅ Run a test query
query = "What is the summary of section 2?"
result = qa_chain.invoke({"query": query})
print("Answer:", result["result"])


In [None]:
query = "What is the main topic of the document?"
response = qa_chain({"query": query})

# Access result and sources separately
print("Answer:", response["result"])

# Optional: print sources
for i, doc in enumerate(response["source_documents"], 1):
    print(f"\nSource {i}:\n{doc.page_content}")


In [None]:
# 6. Ask Questions
query = "What is the main topic of the document?"
result = qa_chain({"query": query})
print("Answer:", result["result"])