In [47]:
!pip install langchain langchain-text-splitters langchain-community bs4
!pip install -U "langchain[huggingface]"
!pip install -qU langchain-huggingface
!pip install -qU langchain-chroma
!pip install -U "langchain-core"
!pip install -U langchain langchain-community
!pip install -qU langchain-classic langchain-community rank_bm25
!pip install -qU langchain-experimental flashrank



In [48]:
import requests
import re
from langchain_core.documents import Document
from langchain.agents import create_agent
from langchain.tools import tool
from langchain_experimental.text_splitter import SemanticChunker
import os
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

In [49]:
import getpass
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = "YOUR_API_KEY"

from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

if "HUGGINGFACEHUB_API_TOKEN" not in os.environ:
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = "YOUR_API_KEY"
llm = HuggingFaceEndpoint(
    repo_id="Qwen/Qwen2.5-7B-Instruct",
    task = "text-generation",
    temperature=0.7,
    max_new_tokens=1024
)
model = ChatHuggingFace(llm=llm)

In [50]:
from langchain_huggingface import HuggingFaceEmbeddings
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") #768

from langchain_core.vectorstores import InMemoryVectorStore
vector_store = InMemoryVectorStore(embedder)

In [55]:
query1 = input("Ask your query")
query2 = input("Please enter the ticker of the company")

Ask your queryWhy did Apple stock drop?
Please enter the ticker of the companyAAPL


In [57]:
def get_alpha_vantage_news(ticker, api_key):
    url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={ticker}&apikey={api_key}'
    try:
        response = requests.get(url)
        data = response.json()

        if "Information" in data:
            print(f"API Note: {data['Information']}")
            return []

        news_items = data.get("feed", [])
        if not news_items:
            print(f"No news found for: {ticker}")
            return []

        return news_items
    except Exception as e:
        print(f"Connection Error: {e}")
        return []

def prepare_docs_for_db(news_feed, ticker):
    if not isinstance(news_feed, list):
        return []

    docs = []
    for item in news_feed:
        doc = Document(
            page_content=item.get('summary', ''),
            metadata={
                "ticker": ticker.upper(),
                "date": item.get('time_published', ''),
                "source": item.get('source', ''),
                "sentiment": item.get('overall_sentiment_label', '')
            }
        )
        docs.append(doc)
    return docs

api_key = 'YOUR_API_KEY'
raw_message = query2

ticker_pattern = r'\b[A-Z]{1,5}\b'
matches = re.findall(ticker_pattern, raw_message.upper())

if matches:
    target_ticker = matches[0]
    print(f"Detected Ticker: {target_ticker}")

    raw_news = get_alpha_vantage_news(target_ticker, api_key)

    processed_docs = prepare_docs_for_db(raw_news, target_ticker)

    if processed_docs:
        print(f"Successfully prepared {len(processed_docs)} documents for {target_ticker}")
    else:
        print("No documents were created.")
else:
    print("No ticker detected. Please provide a ticker like 'NVDA'.")

Detected Ticker: AAPL
Successfully prepared 50 documents for AAPL


In [58]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

vector_db = Chroma(
    collection_name="market_analyst_knowledge",
    embedding_function=embeddings,
    persist_directory="./market_db"
)

if processed_docs:
    vector_db.add_documents(processed_docs)
    print(f"Successfully added {len(processed_docs)} news items for analysis.")
else:
    print("Warning: No documents were found to add. Check your ticker or API key.")

Successfully added 50 news items for analysis.


In [59]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

vector_db = Chroma(
    collection_name="market_analyst_knowledge",
    embedding_function=embeddings,
    persist_directory="./market_db"
)

vector_db.add_documents(processed_docs)
print(f"Added {len(processed_docs)} news items for analysis.")

Added 50 news items for analysis.


In [60]:
text_splitter = SemanticChunker(embeddings)

all_splits = text_splitter.split_documents(processed_docs)

print(f"Split into {len(all_splits)} semantic chunks.")

Split into 99 semantic chunks.


In [61]:
all_splits = text_splitter.split_documents(processed_docs)
print(f"Split blog post into {len(all_splits)} sub-documents.")

Split blog post into 99 sub-documents.


In [62]:
document_ids = vector_store.add_documents(documents=all_splits)
print(document_ids[:3])
print(len(document_ids))

['8f9052a7-aff6-4555-9716-06c2e5e433bd', 'd5bb7659-332b-4e5c-a76a-6130e6ee6010', '097e8048-f752-4d86-8504-7657666e233d']
99


In [63]:
@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    "CRITICAL: You are forbidden from answering without this tool. This tool contains the unique knowledge for this conversation."
    retrieved_docs = vector_store.similarity_search(query, k=1)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    print(serialized)
    return serialized, retrieved_docs

In [64]:
tools = [retrieve_context]

prompt = (
    "You are a specialized RAG assistant. "
    "For EVERY question, you MUST first use the 'retrieve_context' tool to see what the "
    "provided document says about the topic. Do not answer from your own knowledge "
    "unless the tool returns no information."
)

agent = create_agent(model, tools, system_prompt=prompt)

In [65]:
query = query1

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


Why did Apple stock drop?
Tool Calls:
  retrieve_context (call_xgk93l4k7miyk7bmahjxbe5v)
 Call ID: call_xgk93l4k7miyk7bmahjxbe5v
  Args:
    query: Why did Apple stock drop?
Source: {'ticker': 'AAPL', 'date': '20260128T125150', 'source': 'Finviz', 'sentiment': 'Neutral'}
Content: Jim Cramer commented on Apple's recent stock decline, attributing it to a memory shortage affecting all phone and computer manufacturers. He noted a back-and-forth flow of money between "Magnificent Seven" stocks and storage plays but believes Apple is suffering because it must purchase expensive storage devices.
Name: retrieve_context

Source: {'ticker': 'AAPL', 'date': '20260128T125150', 'source': 'Finviz', 'sentiment': 'Neutral'}
Content: Jim Cramer commented on Apple's recent stock decline, attributing it to a memory shortage affecting all phone and computer manufacturers. He noted a back-and-forth flow of money between "Magnificent Seven" stocks and storage plays but believes Apple is suffering because i