In [14]:
from dotenv import load_dotenv
import os
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings  # ✅ both from here now
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from pypdf import PdfReader



In [15]:

# 2. Load environment variables
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")

# ✅ Test if API key is loaded
print("API Key Loaded:", "Yes" if api_key else "No")

API Key Loaded: Yes


In [17]:
# --- Test Gemini LLM ---
# Use a valid Gemini model (gemini-2.0-flash is safe & widely supported)
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key)

In [18]:

response = llm.invoke("Hello, who are you?")
print("LLM Test Response:", response.content)

LLM Test Response: I am a large language model, trained by Google.


In [24]:
# --- Test PDF Q&A Pipeline ---
pdf_reader = PdfReader("cnn.pdf")   # make sure sample.pdf is in your folder
text = ""
for page in pdf_reader.pages:
    page_text = page.extract_text()
    if page_text:
        text += page_text

print("Total characters extracted from PDF:", len(text))

Total characters extracted from PDF: 53130


In [25]:
# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_text(text)
print("Total chunks created:", len(chunks))


Total chunks created: 68


In [28]:
# Create embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
vectorstore = FAISS.from_texts(chunks, embeddings)

In [29]:
# Ask a question
retriever = vectorstore.as_retriever()
docs = retriever.get_relevant_documents("What is this PDF about?")
print("PDF Retrieval Result:", docs[0].page_content[:200])  # preview first 200 chars

  docs = retriever.get_relevant_documents("What is this PDF about?")


PDF Retrieval Result: [4] Y . LeCun, B. Boser, J.S. Denker, D. Henderson, R.E. Howard, W . Hubbard, et al., Backpropagation
applied to handwritten zip code recognition, Neural Computation 1 (4) (1989) 541–551, https://
doi
