In [1]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# 1. Load Data
urls = [
    "https://finance.yahoo.com/quote/AAPL",
    "https://www.investing.com/equities/apple-computer-inc",
    "https://www.moneycontrol.com/india/stockpricequote/technology/infosys/IT"
]

loader = WebBaseLoader(urls)
docs = loader.load()
print(f"Number of documents loaded: {len(docs)}")

# 2. Split Data
# increased chunk size slightly for financial context
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
doc_chunks = text_splitter.split_documents(docs)
print(f"Number of document chunks: {len(doc_chunks)}")

USER_AGENT environment variable not set, consider setting it to identify your requests.


Number of documents loaded: 3
Number of document chunks: 169


In [3]:
import os
from getpass import getpass
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma # UPDATED IMPORT

# Setup API Key
if "HUGGINGFACE_API_TOKEN" not in os.environ:
    os.environ["HUGGINGFACE_API_TOKEN"] = getpass("Enter Hugging Face Token: ")

# Initialize Embeddings
# Using a specific financial model or a general strong one is better
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")

# Create Vector Store
# Note: We pass the embedding function directly
vectorstore = Chroma.from_documents(
    documents=doc_chunks,
    embedding=embeddings,
    collection_name="stock-data"
)

print("Vector Store Created Successfully")

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


Vector Store Created Successfully


In [None]:
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEndpoint # Recommended for newer LC

# Define the Repo ID (Mistral is better for agents than Flan-T5)
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"

# Initialize LLM
llm = HuggingFaceEndpoint(
    repo_id=repo_id,
    max_length=128,
    temperature=0.5,
    huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"]
)

# Create Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
)

# Run Query
query = "What is the current stock price or trend of Apple Inc.?"
result = qa_chain.invoke({"query": query})
print(result['result'])