In [2]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [6]:
import openai  # ✅ all lowercase

openai.api_base = "http://localhost:1234/v1"
openai.api_key = "not-needed"  # LM Studio doesn’t require authentication


In [12]:
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import UnstructuredURLLoader

# ✅ Define local LLM
llm = ChatOpenAI(
    model="openai/gpt-oss-20b",     # Your LM Studio model
    temperature=0.9,
    max_tokens=500,
    openai_api_base="http://localhost:1234/v1",  # Point to LM Studio
    openai_api_key="not-needed"  # Dummy key since LM Studio doesn’t need one
)

# ✅ Load URLs
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/opinion/be-ready-before-gst-2-0-key-challenges-indian-businesses-must-navigate-13475776.html",
    "https://www.moneycontrol.com/news/business/markets/idbi-bank-divestment-due-diligence-by-qualified-bidders-nearly-complete-says-dipam-secy-share-soars-8-percent-13476011.html"
])

data = loaders.load()
print("Loaded docs:", len(data))

# ✅ Example: Summarize first article with local LLM
response = llm.invoke(
    "Summarize this financial article:\n\n" + data[1].page_content
)
print("\n📊 Summary:\n", response.content)

Loaded docs: 2

📊 Summary:
 **Key Takeaway:**  
The Indian government and LIC have advanced the sale of their 60.72 % stake in IDBI Bank, with qualified bidders nearly finished with due‑diligence. The divestment is expected to wrap up by the end of FY26, after earlier delays, and is part of a broader ₹47 000 crore disinvestment target (₹20 000 crore already sold). The process includes a technical document ready for the sale and a forthcoming RFP stage. IDBI Bank shares rose 8 % on the news, reflecting investor confidence in the progressing divestment.


In [11]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

docs = text_splitter.split_documents(data)
len(docs)


22

In [15]:
import torch
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

device = "mps" if torch.backends.mps.is_available() else "cpu"

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={"device": device}  # ✅ run on GPU if available
)

vectorindex_local = FAISS.from_documents(docs, embeddings)

In [16]:
import pickle

# Save FAISS index to a file
file_path = "vector_index_local.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorindex_local, f)

print(f"✅ Vector index saved at {file_path}")

✅ Vector index saved at vector_index_local.pkl


In [17]:
# Load FAISS index from file
with open("vector_index_local.pkl", "rb") as f:
    vectorindex_local = pickle.load(f)

print("✅ Vector index loaded successfully")

✅ Vector index loaded successfully


In [19]:
chain = RetrievalQAWithSourcesChain.from_llm(
    llm=llm,
    retriever=vectorindex_local.as_retriever()
)

print("✅ RetrievalQAWithSourcesChain created successfully")

✅ RetrievalQAWithSourcesChain created successfully


In [21]:
query = "Explain the taxpayers who are under the 12 percent bracket?"

##langchain.debug = True
chain ({"question": query}, return_only_outputs=True)


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "Explain the taxpayers who are under the 12 percent bracket?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Revamping promotional schemes\n\nThe rate rationalisation could mark a major shift for promotional schemes across industries. Manufacturers who are currently taxed at 5% GST rate often hold back from offering promotional schemes because of the risk of mixed supply since most of the goods attract a higher tax rate (12% or 18%). With more products expected to shift into lower slabs after rationalisation, these businesses could be in a stronger position to revamp the

{'answer': '**Answer**\n\nTaxpayers who fall under the **12\u202f% bracket** refer to those whose income (or consumption) places them in a lower‑rate tax category—typically individuals or businesses with relatively modest earnings or spending levels that are taxed at 12\u202f%.  \nIn the context of the article, these taxpayers have historically been willing to absorb an additional 6\u202f% burden by paying a higher **GST rate of 18\u202f%** on certain products. This willingness was based on the assumption that the differential between the base tax and the added surcharge would remain at 6\u202f%.  \n\nHowever, with the new GST‑2.0 changes, the differential is expected to widen from **6\u202f% to 13\u202f%**. Consequently, these taxpayers may face a considerably disadvantageous situation because:\n\n1. **Higher Effective Tax Burden:** The increase in the surcharge means they will pay a larger proportion of their income or consumption as tax.\n2. **Reduced Purchasing Power:** For consume