<a href="https://colab.research.google.com/github/Kedar154/AI-Driven-Stock-Market-Intelligence/blob/main/RAG_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import userdata
GAPi = userdata.get('groqAPi')

# Setting up Libraries

In [2]:
# Install necessary libraries
!pip install -q yfinance langchain langchain-community langchain-huggingface chromadb duckduckgo-search langchain_groq
# We use 'langchain-huggingface' to avoid the deprecation warning you saw earlier.

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.1/21.1 MB[0m [31m83.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m75.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m61.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.4/17.4 MB[0m [31m74.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import os
import json
import yfinance as yf
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper

# --- SETUP THE BRAIN (EMBEDDINGS) ---
# This downloads the small but powerful model to your Colab session
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [4]:
# Block 2: Groq LLM Configuration
from google.colab import userdata
from langchain_groq import ChatGroq

# 1. Securely grab the key
try:
    GROQ_API_KEY = userdata.get('groqAPi')
except:
    GROQ_API_KEY = "PASTE_YOUR_GROQ_KEY_HERE" # Failsafe

# 2. Initialize ChatGroq
# Llama-3.3-70b-versatile is excellent for financial reasoning
llm = ChatGroq(
    model_name="llama-3.3-70b-versatile",
    groq_api_key=GROQ_API_KEY,
    temperature=0 # High precision for financial data
)

# 3. Quick Connection Test
try:
    response = llm.invoke("Confirm connection. What is your model name?")
    print(f"✅ SUCCESS: Groq is active!")
    print(f"🤖 Agent Brain: {response.content}")
except Exception as e:
    print(f"❌ CONNECTION FAILED: {e}")

✅ SUCCESS: Groq is active!
🤖 Agent Brain: Connection confirmed. My model name is Llama. Llama stands for "Large Language Model Meta AI."


# Extracting ticker and date

In [5]:
import datetime

def extract(user_query):
    """
    Uses the LLM to identify the Stock Ticker and the relevant Timeframe.
    """
    # Get today's date for relative time context (e.g., 'last week')
    today = datetime.date.today().strftime("%B %d, %Y")

    extraction_prompt = f"""
    Today's Date: {today}
    User Query: "{user_query}"

    Extract the following from the query and return ONLY a JSON object:
    1. "ticker": The official stock symbol (e.g., AAPL, MSFT). If a company name is used, convert it to a ticker.
    2. "time_period": A short string describing the time mentioned (e.g., "1mo", "5d", "1y", or a specific month).
    in the timeperiod you may also recieve festival as inputs you must find how much back in time the festival ocurred with respect to the given time and date
    Example Output: {{"ticker": "TSLA", "time_period": "1mo"}}

    Return ONLY JSON.

    """

    try:
        response = llm.invoke(extraction_prompt).content
        # We clean the output in case the LLM adds markdown backticks
        json_str = response.replace("```json", "").replace("```", "").strip()
        data = json.loads(json_str)
        return data.get("ticker", "NONE"), data.get("time_period", "5d") #setting backup dates and tickers
    except Exception as e:
        print(f"Extraction Error: {e}")
        return "NONE", "5d"



In [6]:
# --- TESTING ---
ticker, period = extract("hi")
print(f"Extracted Ticker: {ticker}")
print(f"Extracted Period: {period}")

Extracted Ticker: None
Extracted Period: None


# TOOLs for AGENT

In [7]:
# Block 3: Tool Definitions
def ask_price(ticker):
    """
    Fetches the last 5 days of price history using yfinance.
    This provides the 'Numerical Context' for the agent.
    """
    try:
        stock = yf.Ticker(ticker)
        # We fetch 5 days of history to see recent trends
        hist = stock.history(period="5d")

        if hist.empty:
            return f"No price data found for ticker: {ticker}. Please check the symbol."

        # We convert to string for the LLM to read
        return hist.to_string()
    except Exception as e:
        return f"Error fetching price for {ticker}: {str(e)}"

def ask_news(ticker):
    """
    Fetches 5 snippets from DuckDuckGo search.
    This provides the 'Reasoning Context' for the agent.
    """
    try:
        wrapper = DuckDuckGoSearchAPIWrapper(max_results=5)
        search = DuckDuckGoSearchRun(api_wrapper=wrapper)

        # We craft a specific query to find 'why' the stock moved
        query = f"recent financial news and analyst reports for {ticker} stock movement"
        results = search.invoke(query)

        return results
    except Exception as e:
        return f"Error searching news for {ticker}: {str(e)}"


# Rag Memory

In [8]:
def to_DB(raw_news_text):
  """ take text and perform chunking """
  chunker = RecursiveCharacterTextSplitter(
      chunk_size=600,
      chunk_overlap=100
  )
  docs = chunker.create_documents([raw_news_text])

  # create a VDB of embeddings of these docs
  VDB = Chroma.from_documents(
      documents=docs,
      embedding=embeddings,
      collection_name = "news"
  )
  return VDB

In [9]:
def get_context(VDB, user_query):
  # search DB for top 3 relavant snippets related to user query
  rank = VDB.similarity_search(user_query, k=3)
  # AUGMENTING prompt
  context = "\n---\n".join([doc.page_content for doc in rank])
  return context


# 1992

In [11]:
def HM1992(user_query):
    # 1. Extraction: Get Ticker and Date
    ticker, period = extract(user_query)

    # 2. DECISION LOGIC: Is this a stock query or a general chat?
    greetings = ["hi", "hello", "who are you", "hey", "help"]
    is_greeting = any(word == user_query.lower().strip() for word in greetings)
    if ticker == "NONE" or is_greeting:
        # GENERAL CHAT PROMPT (Harshad Mehta Persona)
        general_prompt = f"""
        ROLE: You are Harshad Mehta, the 'Big Bull' of the BSE.
        PERSONALITY: Confident, ambitious, slightly cocky, and charismatic. Use a mix of financial
        swagger and street-smart wisdom.

        USER QUERY: {user_query}

        TASK:
        1. Explain who you are: The man who taught India to dream big, the master of the markets.
        2. Explain how to use you: Tell them you have access to real-time price data and the
           latest market news (RAG power).
        3. Give a starter example: Tell them to ask you "Why is Reliance moving?" or "Analyze
           Nvidia for me."

        Keep it sharp and legendary.
        """
        return llm.invoke(general_prompt).content

    # 3. FINANCIAL QUERY LOGIC (The RAG Pipeline)
    price_data = ask_price(ticker)
    web_news = ask_news(ticker)
    db = to_DB(web_news)
    context = get_context(db, user_query)

    # 4. THE HARSHAD MEHTA FINANCIAL PROMPT
    harshad_financial_prompt = f"""
    ROLE: You are Harshad Mehta, the legendary stockbroker.
    STYLE: Use bold language. You aren't just an analyst; you see the "loopholes" and the "opportunities"
    others miss. Use metaphors about 'Risk' and 'Success'. also keep the introduction short, to about 50 words

    USER QUERY: {user_query}
    TICKER: {ticker}

    MARKET DATA: {price_data}
    INNER CIRCLE NEWS: {context}

    RESPONSE STRUCTURE:
    - **The Big Bull Headline**: A flashy, bold headline.
    - **The Technical Game**: Analyze the 5-day price movement. Use terms like 'bull run' or 'short-covering'.
    - **Market Sentiment**: What is the street saying? Extract this from the news context.
    - **The Bottom Line (Conclusion)**: Tell the user the ultimate move.

    CLOSING TAGLINE: Always end with a famous Harshad-style quote, choose randomly from these two  "Risk hai toh Ishq hai!"
    or "Success kya hai? Failure ke baad ka chapter."
    """

    return llm.invoke(harshad_financial_prompt).content

In [14]:
def run_analyst_agent(user_query):
    # 1. Extraction (Same as before)
    ticker, period = extract(user_query)

    # Greeting Check
    greetings = ["hi", "hello", "who are you", "hey", "help"]
    if ticker == "NONE" or any(word == user_query.lower().strip() for word in greetings):
        general_prompt = f"Identity: Harshad Mehta. Explain who you are and how to use you. Mention you need tickers like Reliance or ^NSEI."
        return llm.invoke(general_prompt).content

    # 2. Data Fetching
    search_ticker = "^NSEI" if ticker == "NIFTY" else ticker
    price_data = ask_price(search_ticker)
    raw_news = ask_news(ticker)

    # 3. RAG & Verification Step
    db = to_DB(raw_news)
    raw_context = get_context(db, user_query)

    # --- NEW: SOURCE VERIFICATION ---
    verification_prompt = f"""
    You are a Fact Checker.
    Ticker: {ticker}
    News Snippets: {raw_context}
    user_request : {user_query}

    TASK: Remove any news snippets that are NOT the related to the {ticker} and query: {user_query}.
    you have to check whether the news snippet is actually the cause behind the action asked by user.
    If a snippet is about a different company (like Nvidia mentioned during a Nifty query), delete it.
    Return ONLY the verified, relevant text. If nothing is relevant, return 'NO RELEVANT NEWS'.
    """
    verified_context = llm.invoke(verification_prompt).content
    # --------------------------------

    # 4. Final Analysis with Verified Context
    harshad_financial_prompt = f"""
    ROLE: Harshad Mehta.
    USER QUERY: {user_query}
    TICKER: {ticker}

    MARKET DATA: {price_data}
    VERIFIED NEWS: {verified_context}

    INSTRUCTIONS:
    - If VERIFIED NEWS is 'NO RELEVANT NEWS', say: "The street is quiet on this one, Lala. No direct news, so we look at the charts."
    - Otherwise, use the verified news to explain the move.

    STRUCTURE:
    - **The Big Bull Headline**
    - **The Technical Game**
    - **Market Sentiment**
    - **The Bottom Line**

    CLOSING: "Risk hai toh Ishq hai!"
    """

    return llm.invoke(harshad_financial_prompt).content

In [16]:
user_input = "tell me why tesla is trying to go up but trump is pulling it down"
print(run_analyst_agent(user_input))

**The Big Bull Headline**
The street is quiet on this one, Lala. No direct news, so we look at the charts. It seems like the market is trying to decipher the next move for TSLA, but without any concrete news, we're left to analyze the technicals.

**The Technical Game**
Looking at the charts, we can see that TSLA has been experiencing a bit of volatility, with the stock price fluctuating between $417 and $447 over the past few days. The volume has been significant, with over 57 million shares traded on the 14th and over 67 million shares traded on the 21st. This suggests that there's a lot of interest in the stock, but the lack of clear direction is causing the price to oscillate.

**Market Sentiment**
Market sentiment seems to be mixed, with some investors bullish on TSLA's future prospects and others bearish due to various factors, including potential regulatory hurdles and competition in the EV space. The fact that the stock is trying to go up suggests that there are still many beli