In [1]:
!pip install kafka-python gspread oauth2client pandas yfinance




In [2]:
!pip install yfinance




In [None]:
import gspread
from kafka import KafkaProducer
import yfinance as yf
import json
import time
from oauth2client.service_account import ServiceAccountCredentials

# 🔹 Google Sheets Authentication
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("stock-market-data-project-446c6101e293.json", scope)

# 🔹 Retry connecting to Google Sheets
for attempt in range(5):
    try:
        client = gspread.authorize(creds)
        spreadsheet = client.open("Stock Market Data")
        sheet = spreadsheet.worksheet("Stock Selection")
        print("✅ Successfully connected to Google Sheets!")
        break
    except Exception as e:
        print(f"❌ Failed to connect to Google Sheets (Attempt {attempt + 1}/5): {e}")
        time.sleep(5)  # Wait and retry
else:
    raise Exception("🚨 Could not connect to Google Sheets after multiple attempts.")

# 🔹 Kafka Configuration
KAFKA_SERVER = "192.168.125.254:9092"  # Update with your Kafka server IP
TOPIC = "stock_market_realtime"

producer = KafkaProducer(
    bootstrap_servers=KAFKA_SERVER,
    value_serializer=lambda v: json.dumps(v).encode("utf-8")
)

while True:
    try:
        # 🔹 Read selected stock from Google Sheets (B2)
        selected_stock = sheet.acell("B2").value.strip().upper()  # Remove spaces & convert to uppercase

        # 🔹 Check if stock exists before adding `.NS`
        stock_check = yf.Ticker(selected_stock).info
        if "symbol" not in stock_check:
            print(f"⚠️ Invalid stock symbol '{selected_stock}'. Skipping...")
            time.sleep(60)
            continue

        print(f"📈 Fetching data for {selected_stock}...")

        stock_data = yf.Ticker(selected_stock).history(period="1d", interval="1m")

        if stock_data.empty:
            print(f"⚠️ No stock data found for {selected_stock}. Skipping...")
            time.sleep(60)
            continue  # Skip this iteration

        latest = stock_data.iloc[-1]
        data = {
            "timestamp": str(latest.name),
            "stock": selected_stock,
            "price": latest["Close"],
            "open": latest["Open"],
            "high": latest["High"],
            "low": latest["Low"],
            "previous_close": stock_check.get("previousClose", "N/A"),
            "volume": latest["Volume"]
        }

        # 🔹 Send Data to Kafka
        producer.send(TOPIC, data)
        print(f"📤 Sent data: {data}")

    except Exception as e:
        print(f"❌ Error: {e}")

    time.sleep(20)  # Fetch data every minute


✅ Successfully connected to Google Sheets!
📈 Fetching data for META...
📤 Sent data: {'timestamp': '2025-03-20 11:23:00-04:00', 'stock': 'META', 'price': np.float64(607.8499755859375), 'open': np.float64(607.8499755859375), 'high': np.float64(607.8499755859375), 'low': np.float64(607.8499755859375), 'previous_close': 584.06, 'volume': np.float64(0.0)}
📈 Fetching data for META...
📤 Sent data: {'timestamp': '2025-03-20 11:23:00-04:00', 'stock': 'META', 'price': np.float64(607.8699951171875), 'open': np.float64(607.8699951171875), 'high': np.float64(607.8699951171875), 'low': np.float64(607.8699951171875), 'previous_close': 584.06, 'volume': np.float64(0.0)}
📈 Fetching data for TSLA...
📤 Sent data: {'timestamp': '2025-03-20 11:23:00-04:00', 'stock': 'TSLA', 'price': np.float64(235.80999755859375), 'open': np.float64(235.80999755859375), 'high': np.float64(235.80999755859375), 'low': np.float64(235.80999755859375), 'previous_close': 235.86, 'volume': np.float64(0.0)}
📈 Fetching data for TSL

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/HDFC?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=HDFC&crumb=G8Rj8r.iIxv


❌ Error: 'NoneType' object has no attribute 'update'
📈 Fetching data for NVDA...
📤 Sent data: {'timestamp': '2025-03-20 11:40:00-04:00', 'stock': 'NVDA', 'price': np.float64(119.4000015258789), 'open': np.float64(119.4000015258789), 'high': np.float64(119.4000015258789), 'low': np.float64(119.4000015258789), 'previous_close': 117.52, 'volume': np.float64(0.0)}
📈 Fetching data for NVDA...
📤 Sent data: {'timestamp': '2025-03-20 11:41:00-04:00', 'stock': 'NVDA', 'price': np.float64(119.42500305175781), 'open': np.float64(119.42500305175781), 'high': np.float64(119.42500305175781), 'low': np.float64(119.42500305175781), 'previous_close': 117.52, 'volume': np.float64(0.0)}
📈 Fetching data for NVDA...
📤 Sent data: {'timestamp': '2025-03-20 11:41:00-04:00', 'stock': 'NVDA', 'price': np.float64(119.36009979248047), 'open': np.float64(119.36009979248047), 'high': np.float64(119.36009979248047), 'low': np.float64(119.36009979248047), 'previous_close': 117.52, 'volume': np.float64(0.0)}
📈 Fetchin