In [None]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_google_vertexai import ChatVertexAI
from langchain.embeddings import VertexAIEmbeddings   # Embeddings
from langchain.vectorstores import FAISS
pip install langchain-google-genai


In [2]:
import os
from dotenv import load_dotenv

# Load .env file
load_dotenv()

# Fetch the key
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")

# Print it
print("Perplexity API Key:", PERPLEXITY_API_KEY)


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

# Load environment variables
from dotenv import load_dotenv
import os
load_dotenv()

# Initialize Gemini LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-pro",
    api_key=os.getenv("GOOGLE_API_KEY"),
    temperature=0.7,
)

# Simple test
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant."),
    ("user", "{input}")
])

chain = prompt | llm

result = chain.invoke({"input": "Write a haiku about the moon."})
print(result.content)


### (1) Load data

In [20]:
import os
os.environ["USER_AGENT"] = "my-app/1.0"


In [11]:
from langchain_community.document_loaders import WebBaseLoader

urls = [
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
]

loader = WebBaseLoader(
    urls,
    header_template={"User-Agent": "Mozilla/5.0"}
)

docs = loader.load()
print(len(docs))
print(docs[0].page_content[:500])

2
 
  Wall Street rises as Tesla soars on AI optimism             

  

      



   

  EnglishHindiGujaratiSpecialsSearch Quotes, News, Mutual Fund NAVs  Trending Stocks    Brightcom Group INE425B01027, BCG, 532368 NSDL INE301O01023, , 544467 Suzlon Energy INE040H01021, SUZLON, 532667 Ola Electric INE0LXG01040, OLAELEC, 544225 TCS INE467B01029, TCS, 532540  QuotesMutual FundsCommoditiesFutures & OptionsCurrencyNewsCryptocurrencyForumNoticesVideosGlossaryAll  Hello, Login Hello, LoginLog-inor Sig


### (2) Split data to create chunks

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

split_docs = text_splitter.split_documents(docs)

print(len(split_docs))
print(split_docs[0].page_content[:500])

39
Wall Street rises as Tesla soars on AI optimism


In [24]:
len(docs)

2

In [25]:
docs[0]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html', 'title': 'Wall Street rises as Tesla soars on AI optimism', 'description': 'Tesla (TSLA.O) rallied 10% after Morgan Stanley upgraded the electric car maker to ', 'language': 'en'}, page_content=' \n  Wall Street rises as Tesla soars on AI optimism             \n\n  \n\n      \n\n\n\n   \n\n  EnglishHindiGujaratiSpecialsSearch Quotes, News, Mutual Fund NAVs  Trending Stocks    Brightcom Group INE425B01027, BCG, 532368 NSDL INE301O01023, , 544467 Suzlon Energy INE040H01021, SUZLON, 532667 Ola Electric INE0LXG01040, OLAELEC, 544225 TCS INE467B01029, TCS, 532540  QuotesMutual FundsCommoditiesFutures & OptionsCurrencyNewsCryptocurrencyForumNoticesVideosGlossaryAll  Hello, Login Hello, LoginLog-inor Sign-UpMy AccountMy Profile My PortfolioMy WatchlistMy AlertsMy MessagesPrice AlertsMy Profile My PROMy PortfolioMy WatchlistMy AlertsMy MessagesPrice A

### (3) Create embeddings for these chunks and save them to FAISS index

In [1]:
import faiss
print("FAISS version:", faiss.__version__)


FAISS version: 1.12.0


In [14]:
import os
os.environ["GOOGLE_API_KEY"] = "GOOGLE_API_KEY"


In [15]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


In [25]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key="GOOGLE_API_KEY"
)


In [26]:
from dotenv import load_dotenv, dotenv_values
import os

# Force override
load_dotenv(dotenv_path=r"C:\Users\lohit\langchain\2_news_research_tool_project\.env", override=True)

print("dotenv_values:", dotenv_values(r"C:\Users\lohit\langchain\2_news_research_tool_project\.env"))
print("os.getenv:", os.getenv("GOOGLE_API_KEY"))


dotenv_values: OrderedDict({'GOOGLE_API_KEY': 'AIzaSyABDeDUW_jOMw4Uur3k-g9_Fy-RjLPlbuI'})
os.getenv: AIzaSyABDeDUW_jOMw4Uur3k-g9_Fy-RjLPlbuI


In [27]:
import os
print("Current env:", os.environ.get("GOOGLE_API_KEY"))


Current env: AIzaSyABDeDUW_jOMw4Uur3k-g9_Fy-RjLPlbuI


In [29]:
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Create embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Build FAISS index
vectorindex_gemini = FAISS.from_documents(split_docs, embeddings)

# ðŸ”¹ Save FAISS Index properly
faiss_index_path = "vector_index"
vectorindex_gemini.save_local(faiss_index_path)
print("âœ… FAISS index saved successfully!")

# ðŸ”¹ Later: Load FAISS Index
loaded_index = FAISS.load_local(faiss_index_path, embeddings, allow_dangerous_deserialization=True)
print("âœ… FAISS index loaded successfully!")


âœ… FAISS index saved successfully!
âœ… FAISS index loaded successfully!


In [31]:
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQAWithSourcesChain

# âœ… Use Gemini embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# âœ… Create FAISS index from split documents
vectorindex_gemini = FAISS.from_documents(split_docs, embeddings)

# âœ… Save FAISS index locally (no pickle)
faiss_index_path = "vector_index"
vectorindex_gemini.save_local(faiss_index_path)
print("âœ… FAISS index saved successfully!")

# âœ… Load FAISS index
vectorIndex = FAISS.load_local(faiss_index_path, embeddings, allow_dangerous_deserialization=True)
print("âœ… FAISS index loaded successfully!")

# âœ… Initialize Gemini LLM
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)

# âœ… Create Retrieval QA Chain
chain = RetrievalQAWithSourcesChain.from_llm(
    llm=llm,
    retriever=vectorIndex.as_retriever()
)

print("âœ… Retrieval QA Chain ready!")


âœ… FAISS index saved successfully!
âœ… FAISS index loaded successfully!
âœ… Retrieval QA Chain ready!


### (4) Retrieve similar embeddings for a given question and call LLM to retrieve final answer

In [32]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())
chain



In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro")
response = llm.invoke("What's the price of the Tiago iCNG?")

print("Response:", response.content)


[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 9:chain:LLMChain > 10:llm:OpenAI] [2.88s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\nSOURCES: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        }
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "total_tokens": 2093,
      "prompt_tokens": 1976,
      "completion_tokens": 117
    },
    "model_name": "text-davinci-003"
  },
  "run": null
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 9:chain:LLMChain] [2.88s] Exiting Chain run wi

{'answer': ' The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\n',
 'sources': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html'}

In [37]:
from google.ai import generativelanguage as genai
from google.api_core.client_options import ClientOptions
import os
from dotenv import load_dotenv

load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

client_options = ClientOptions(api_key=GOOGLE_API_KEY)
client = genai.ModelsClient(client_options=client_options)

models = client.list_models()
for m in models:
    print(m.name, m.supported_methods)


AttributeError: module 'google.ai.generativelanguage' has no attribute 'ModelsClient'