In [None]:
!pip install langchain langchain-text-splitters langchain-community bs4



In [None]:
!pip install -U "langchain[google-genai]"



In [None]:
import os
import json
import requests
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta

from langchain.chat_models import init_chat_model
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS

In [None]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyBroXxsS3h2SfqggCXs0bQ-fXlRn49OqME"
model = init_chat_model("google_genai:gemini-2.5-flash-lite")

In [None]:
SYSTEM_PROMPT = """
You are an analytical finance chatbot.

Rules:
- You only answer stock-market related questions.
- You do NOT give personal financial advice.
- You only use provided price data and news.
- If the question is unrelated to stocks, respond with:
  "I can only help with stock-market related questions."

Tasks:
- Extract stock ticker
- Identify time interval
- Identify intent (price, returns, volatility, news, reason for movement)
Return structured JSON when asked.
"""


In [None]:
def call_llm(messages):
    return model.invoke(messages).content


In [None]:
user_query = input("User: ")


User: Tell me returns of dmart stock fron 1/12/2025 to 31/12/2025


In [None]:
extract_prompt = f"""
Extract the stock ticker and time interval from the query.

Return JSON only in this format:
{{
  "company": "",
  "ticker": "",
  "start_date": "YYYY-MM-DD or null",
  "end_date": "YYYY-MM-DD or null",
  "intent": ""
}}

Query:
{user_query}
"""

response = call_llm([
    SystemMessage(content=SYSTEM_PROMPT),
    HumanMessage(content=extract_prompt)
])

cleaned_response = response.strip().replace('```json\n', '').replace('\n```', '')
parsed = json.loads(cleaned_response)
ticker = parsed["ticker"]
start_date = parsed["start_date"]
end_date = parsed["end_date"]
intent = parsed["intent"]

In [238]:
def fetch_price_data(ticker):
    df = yf.download(
        ticker,
        period="2y",
        interval="1d",
        auto_adjust=False
    )

    df.reset_index(inplace=True)
    df["daily_return"] = (df["Close"] - df["Open"]) / df["Open"]
    df = df.dropna()

    return df

In [None]:
price_df = fetch_price_data(ticker)


[*********************100%***********************]  1 of 1 completed


In [None]:
def compute_news_window(start_date, end_date):
    start = datetime.fromisoformat(start_date) - timedelta(days=5)
    end = datetime.fromisoformat(end_date) + timedelta(days=2)
    return start.date(), end.date()


In [None]:
def fetch_gnews(company, start_date, end_date, api_key):
    url = "https://gnews.io/api/v4/search"
    params = {
        "q": f"{company} stock",
        "lang": "en",
        "from": start_date.isoformat(),
        "to": end_date.isoformat(),
        "max": 10,
        "token": api_key
    }

    r = requests.get(url, params=params)
    r.raise_for_status()
    return r.json()["articles"]


In [None]:
news_start, news_end = compute_news_window(start_date, end_date)

articles = fetch_gnews(
    parsed["company"],
    news_start,
    news_end,
    api_key=os.environ["GNEWS_API_KEY"]
)


In [None]:
texts = [
    f"{a['title']}. {a.get('description', '')}"
    for a in articles
]

vectorstore = FAISS.from_texts(texts, embedding=embeddings)


In [None]:
relevant_news = vectorstore.similarity_search(user_query, k=5)


In [None]:
price_summary = {
    "start_price": float(price_df.iloc[0]["Close"]),
    "end_price": float(price_df.iloc[-1]["Close"]),
    "mean_return": float(price_df["daily_return"].mean()),
    "volatility": float(price_df["daily_return"].std())
}

news_context = "\n".join([doc.page_content for doc in relevant_news])


  "start_price": float(price_df.iloc[0]["Close"]),
  "end_price": float(price_df.iloc[-1]["Close"]),


In [None]:
final_prompt = f"""
Stock: {ticker}
Time period: {start_date} to {end_date}

Price summary:
{price_summary}

Relevant news:
{news_context}

Answer the user's question:
"{user_query}"

Be concise, analytical, and factual.
"""


In [None]:
final_answer = call_llm([
    SystemMessage(content=SYSTEM_PROMPT),
    HumanMessage(content=final_prompt)
])

print(final_answer)


```json
{
  "stock_ticker": "DMART.NS",
  "time_interval": "2025-12-01 to 2025-12-31",
  "intent": "returns",
  "answer": "The mean return for DMART.NS from 2025-12-01 to 2025-12-31 was -0.001577, representing a slight decrease in value over the period."
}
```
