# News-Driven Stock Volatility Predictor

In [12]:
tickers = [
    'TCS.NS', 'INFY.NS', 'RELIANCE.NS',
    'HDFCBANK.NS', 'ICICIBANK.NS',
    'SBIN.NS', 'TATAMOTORS.NS', 'WIPRO.NS'
]


In [13]:
!pip check

lamini 3.4.6 has requirement numpy<2.0.0, but you have numpy 2.3.1.
langchain-core 0.3.62 has requirement packaging<25,>=23.2, but you have packaging 25.0.
nba-api 1.6.1 has requirement numpy<2.0.0,>=1.22.2, but you have numpy 2.3.1.
streamlit 1.45.1 has requirement packaging<25,>=20, but you have packaging 25.0.


In [14]:
!pip install alpha_vantage pandas numpy




In [17]:
import yfinance as yf
import pandas as pd
import numpy as np
import time, random, requests
from datetime import datetime

# --- 1. NSE Tickers and Date Range ---
tickers = [
    "TCS.NS", "INFY.NS", "RELIANCE.NS", "HDFCBANK.NS",
    "ICICIBANK.NS", "SBIN.NS", "TATAMOTORS.NS", "WIPRO.NS"
]

start_date = "2023-01-01"
end_date = "2024-7-3"
CALL_PAUSE_SEC = 15
MAX_RETRIES = 4
frames = []

# --- 2. Robust Fetch Function with Retry ---
def fetch_ticker(ticker: str) -> pd.DataFrame | None:
    for attempt in range(1, MAX_RETRIES + 1):
        try:
            print(f"⏬ {ticker}: Attempt {attempt}/{MAX_RETRIES}")
            df = yf.Ticker(ticker).history(
                start=start_date,
                end=end_date,
                interval="1d",
                auto_adjust=False,
                actions=False
            )
            if df.empty:
                raise ValueError("Empty DataFrame")
            df = df[["Open", "High", "Low", "Close", "Volume"]].reset_index()
            df["Ticker"] = ticker
            return df
        except (requests.exceptions.RequestException, ValueError) as e:
            wait = (2 ** attempt) + random.uniform(0, 1)
            print(f"   ↳ Error: {e} | Sleeping {wait:.1f}s")
            time.sleep(wait)
    print(f"❌ {ticker}: All retries failed.")
    return None
# --- 3. Loop Through Tickers with Delay ---
for idx, tk in enumerate(tickers, 1):
    df = fetch_ticker(tk)
    if df is not None:
        frames.append(df)
    if idx < len(tickers):
        time.sleep(CALL_PAUSE_SEC)

# --- 4. Combine, Label & Save ---
if not frames:
    raise RuntimeError("❌ All downloads failed.")

prices = pd.concat(frames, ignore_index=True).sort_values(["Ticker", "Date"])

# Add next-day direction label
prices["Next_Close"] = prices.groupby("Ticker")["Close"].shift(-1)
prices["Target"] = np.where(prices["Next_Close"] > prices["Close"], 1, 0)
prices.dropna(subset=["Next_Close"], inplace=True)
prices.reset_index(drop=True, inplace=True)

# Save CSV
prices.to_csv("nse_labeled_prices.csv", index=False)
print(f"✅ Saved {len(prices)} rows to nse_labeled_prices.csv")


⏬ TCS.NS: Attempt 1/4
⏬ INFY.NS: Attempt 1/4
⏬ RELIANCE.NS: Attempt 1/4
⏬ HDFCBANK.NS: Attempt 1/4
⏬ ICICIBANK.NS: Attempt 1/4
⏬ SBIN.NS: Attempt 1/4
⏬ TATAMOTORS.NS: Attempt 1/4
⏬ WIPRO.NS: Attempt 1/4
✅ Saved 2928 rows to nse_labeled_prices.csv
