In [1]:
pip install -U langchain-huggingface

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [5]:
import google.generativeai as genai

In [10]:
from typing import Optional, List
from langchain_core.language_models.llms import LLM 

In [12]:
class GeminiLLM(LLM):
    model: str = "gemini-1.5-flash"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        model = genai.GenerativeModel(self.model)
        response = model.generate_content(prompt)
        return response.text

    @property
    def _llm_type(self) -> str:
        return "google_gemini_llm"


# Instantiate our custom Gemini LLM
llm = GeminiLLM()

In [14]:
from langchain.document_loaders import UnstructuredURLLoader

loader_url = UnstructuredURLLoader(urls = [
    "https://www.moneycontrol.com/news/business/lilavati-medical-trust-versus-hdfc-bank-what-the-dispute-is-all-about-13258639.html",
    "https://www.moneycontrol.com/news/business/stocks/hdfc-bank-shares-trade-higher-touches-days-high-of-rs-1994-90-alpha-article-13241800.html"
])
data=loader_url.load()
len(data)

2

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = splitter.split_documents(data)
len(chunks)

21

In [18]:
chunks[0]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/lilavati-medical-trust-versus-hdfc-bank-what-the-dispute-is-all-about-13258639.html'}, page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nLoans up to ₹50 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nNetwork 18\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_BUSINESS_AS/MC_ENG_ROS_NWS_BUS_AS_ATF_728\n\nMoneycontrol\n\nGo PRO NowPRO\n\nMoneycontrol PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nSubscription Products\n\nprofile\n\nAmbareesh Baliga\n\nprofile\n\nCK Nara

In [20]:
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings  # ✅ Updated import

# Load embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Use with FAISS
vectorstore = FAISS.from_documents(chunks, embeddings)

In [22]:
file_path="vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorstore,f)

In [24]:
if os.path.exists(file_path):
        with open(file_path,"rb") as f:
            vectorIndex = pickle.load(f)

In [30]:
from langchain.chains import RetrievalQAWithSourcesChain

chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())
chain



In [34]:
query = "What allegations has the Lilavati Medical Trust made?"

langchain.debug=True

chain({"question":query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "What allegations has the Lilavati Medical Trust made?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "What allegations has the Lilavati Medical Trust made?\n\nThe Trust has accused the bank’s CEO, Jagdishan, and eight others of fraud and misappropriation of the Trust’s funds.\n\nThe Trust has accused Jagdishan of accepting a bribe of Rs 2.05 crore to help a group consisting of one Chetan Mehta and other erstwhile trustees to retain illegal control over the Trust. The Trust claimed that the payment was documented in a handwritten cash diary, which had been seized.\n\nFur

{'answer': "The Lilavati Medical Trust accused HDFC Bank's CEO, Jagdishan, and eight others of fraud and misappropriation of the Trust's funds.  Specifically, they allege Jagdishan accepted a bribe of Rs 2.05 crore to help a group retain illegal control over the Trust, a payment documented in a seized handwritten cash diary.  They further allege Jagdishan facilitated an illegal deposit of Rs 25 crore into an HDFC Bank account without proper authorization.  Finally, they claim Rs 1.5 crore was disbursed to hospital staff under the guise of CSR donations, but was actually a bribe to suppress information about the alleged wrongdoing.\n\n",
 'sources': 'https://www.moneycontrol.com/news/business/lilavati-medical-trust-versus-hdfc-bank-what-the-dispute-is-all-about-13258639.html'}