In [2]:
import os
import streamlit as st 
import pickle
import time 
import langchain 
from langchain_google_genai import (
    ChatGoogleGenerativeAI,
    HarmBlockThreshold,
    HarmCategory,
    GoogleGenerativeAIEmbeddings
)

from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.vectorstores import FAISS

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")

In [9]:
 llm = ChatGoogleGenerativeAI(
        model="gemini-pro",
        temperature=0.9,
        google_api_key=api_key,
        safety_settings={
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
        },
        max_tokens=500)

In [13]:
loader=UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/india/severe-weather-alert-heavy-rains-to-hit-eight-states-until-august-3-delhi-on-yellow-alert-12780337.html",
    "https://www.moneycontrol.com/automobile/mitsubishi-motors-joins-honda-nissan-alliance-amidst-major-auto-industry-shift-report-article-12780198.html"

])

In [14]:
data=loader.load()
len(data)

2

In [16]:
# To create chunks
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
docs=text_splitter.split_documents(data)
len(docs)

17

In [33]:
docs

[Document(metadata={'source': 'https://www.moneycontrol.com/news/india/severe-weather-alert-heavy-rains-to-hit-eight-states-until-august-3-delhi-on-yellow-alert-12780337.html'}, page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nGo Ad-Free\n\nMy Alerts\n\nco-presented by\n\nassociated by\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsIndia\n\nTrending Topics\n\nNiti Aayog Meeting LIVEShashi TharoorPNB ScamMamata BanerjeeRahul GandhiPM ModiKanwar YatraMumbai WeatherNavi Mumbai Building Collapse\n\nSevere Weather Alert: Heavy rains to hi

In [115]:

import pickle
from langchain_community.vectorstores import FAISS

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_document")
db = FAISS.from_documents(docs, embeddings)

with open("metadata.pkl", "wb") as f:
    pickle.dump(vectors, f)


In [117]:
retriever = db.as_retriever()

In [118]:
chain=RetrievalQAWithSourcesChain.from_llm(llm=llm,retriever=retriever)

In [119]:
print(chain)



In [126]:
query="what does elonmusk daughter do?"

In [127]:
langchain.debug=True
chain({'question':query},return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what does elonmusk daughter do?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Elon Musk's transgender daughter disowns him, says he is 'desperate for attention, validation'\n\nMark Zuckerberg reveals story behind his gold necklace, says it has a prayer for his daughters\n\nAn economist outsmarted the lottery and won 14 jackpots. He explains the math\n\nAdvisory Alert:\n\nForum Facebook Twitter Instagram Linkedin RSS\n\nPortfolio\n\nMarkets\n\nWatchlist\n\nLive TV Show\n\nCurrencies\n\nFREE Credit Score₹100 Cash Reward\n\nCommodities\n\nFixed Income\n\nPersonal Finance

{'answer': "The provided context does not include information about what Elon Musk's daughter does. Therefore, I cannot answer this question.\n",
 'sources': ''}