In [2]:
import os
os.environ["OPENWEATHER_API_KEY"] = "596eadfb2fd863480d863b7062bd2e8f"

In [4]:
# ============================
# Colab Setup & Imports
# ============================
# If you're on Google Colab, install osmium (optional)
!pip -q install osmium

from google.colab import drive
import os
import time
import math
import re
import random
import numpy as np
import pandas as pd
import datetime
import requests

# Optional import (not required for TomTom)
import osmium as osm  # noqa: F401

# If you haven’t already in a previous cell, set your key like this:
# import os
# os.environ["OPENWEATHER_API_KEY"] = "596eadfb2fd863480d863b7062bd2e8f"

# ============================
# 0. FILE PATHS
# ============================
# Mount Google Drive
drive.mount('/content/drive')

# Paths to your files in Drive
demand_path = "/content/drive/MyDrive/synthetic_demand_data.csv"
fleet_path  = "/content/drive/MyDrive/synthetic_fleet_with_coords (1).csv"

# Read CSVs
demand_df = pd.read_csv(demand_path)
fleet_df  = pd.read_csv(fleet_path)

# ============================
# 1. LOAD DEMAND DATA FROM CSV
#    Expected cols: demand_id, time, zone, lat, lon, demand_size_kg
# ============================

# Check required columns
required_demand_cols = ["time", "zone", "lat", "lon", "demand_size_kg"]
missing_demand = [c for c in required_demand_cols if c not in demand_df.columns]
if missing_demand:
    raise ValueError(f"Demand CSV is missing required columns: {missing_demand}")

# Parse time column
demand_df["time"] = pd.to_datetime(demand_df["time"], errors="coerce")

# Ensure numeric types for lat/lon/size
for c in ["lat", "lon", "demand_size_kg"]:
    demand_df[c] = pd.to_numeric(demand_df[c], errors="coerce")

print("Demand sample:\n", demand_df.head())

# ============================
# 2. LOAD FLEET DATA FROM CSV
#    Normalize to: vehicle_id, capacity, lat, lon, availability
# ============================

required_fleet_cols_before = ["vehicle_id", "capacity_kg", "latitude", "longitude"]
missing_fleet = [c for c in required_fleet_cols_before if c not in fleet_df.columns]
if missing_fleet:
    raise ValueError(f"Fleet CSV is missing required columns: {missing_fleet}")

fleet_df = fleet_df.rename(columns={
    "capacity_kg": "capacity",
    "latitude": "lat",
    "longitude": "lon"
})

if "availability" not in fleet_df.columns:
    fleet_df["availability"] = 1

for c in ["capacity", "lat", "lon", "availability"]:
    fleet_df[c] = pd.to_numeric(fleet_df[c], errors="coerce")

print("\nFleet sample (normalized):\n", fleet_df.head())

# ============================
# 3. LIVE TRAFFIC FROM TOMTOM FLOW
# ============================

TOMTOM_API_KEY = os.environ.get("TOMTOM_API_KEY", "8tnvwDhRN7nFEDHgC78NtgP1bmQye9vp")
if not TOMTOM_API_KEY or TOMTOM_API_KEY.startswith("<"):
    raise RuntimeError(
        "TomTom API key not set. Set os.environ['TOMTOM_API_KEY'] = 'YOUR_TOMTOM_KEY'."
    )

TOMTOM_FLOW_URL = "https://api.tomtom.com/traffic/services/4/flowSegmentData/absolute/10/json"

POLL_INTERVAL_SEC = 60
POLL_CYCLES = 5

def get_tomtom_speed_kmh(lat, lon, retries=2, pause_sec=0.5):
    params = {"point": f"{lat},{lon}", "key": TOMTOM_API_KEY}
    attempt = 0
    while True:
        try:
            resp = requests.get(TOMTOM_FLOW_URL, params=params, timeout=15)
            if resp.status_code in (429, 500, 502, 503, 504):
                if attempt < retries:
                    attempt += 1
                    time.sleep(pause_sec * (attempt + 1))
                    continue
                resp.raise_for_status()
            resp.raise_for_status()
            data = resp.json()
            fsd = (data or {}).get("flowSegmentData", {}) or {}
            cur = fsd.get("currentSpeed")
            if isinstance(cur, (int, float)) and cur > 0:
                return float(cur)
            ffs = fsd.get("freeFlowSpeed")
            if isinstance(ffs, (int, float)) and ffs > 0:
                return float(ffs)
            return 30.0
        except requests.RequestException:
            if attempt < retries:
                attempt += 1
                time.sleep(pause_sec * (attempt + 1))
                continue
            return 30.0

def batch_get_tomtom_speeds(coords, pause_sec=0.1):
    rounded = [(round(float(lat), 5), round(float(lon), 5)) for (lat, lon) in coords]
    unique = list(dict.fromkeys(rounded))

    speed_map = {}
    for (rlat, rlon) in unique:
        speed_map[(rlat, rlon)] = get_tomtom_speed_kmh(rlat, rlon)
        time.sleep(pause_sec)

    result = {}
    for (lat, lon), (rlat, rlon) in zip(coords, rounded):
        result[(float(lat), float(lon))] = speed_map[(rlat, rlon)]
    return result

def refresh_live_traffic(demand_df: pd.DataFrame, poll_cycles: int = 1, interval_sec: int = 60):
    coords = [(float(r.lat), float(r.lon)) for _, r in demand_df.iterrows()]
    for i in range(1, poll_cycles + 1):
        tt_speed_by_coord = batch_get_tomtom_speeds(coords, pause_sec=0.1)
        demand_df["traffic_speed_kmh"] = [
            tt_speed_by_coord[(float(r.lat), float(r.lon))] for _, r in demand_df.iterrows()
        ]
        print(f"\n[{datetime.datetime.now()}] TomTom speeds refreshed (cycle {i}/{poll_cycles}).")
        print(demand_df[["lat", "lon", "traffic_speed_kmh"]])
        if i < poll_cycles:
            time.sleep(interval_sec)

# ============================
# 4. LIVE WEATHER FROM OPENWEATHER
# ============================

# ✅ FIX 1: Read the **env var name**, not the key value
OPENWEATHER_API_KEY = os.environ.get("OPENWEATHER_API_KEY", "")
if not OPENWEATHER_API_KEY or OPENWEATHER_API_KEY.startswith("<") or len(OPENWEATHER_API_KEY.strip()) == 0:
    raise RuntimeError(
        "OpenWeather API key not set. Set os.environ['OPENWEATHER_API_KEY'] = 'YOUR_OPENWEATHER_API_KEY'."
    )

# (Optional) quick sanity check without revealing your whole key
print("OpenWeather key loaded (last 4): ****" + OPENWEATHER_API_KEY[-4:])

OPENWEATHER_URL = "https://api.openweathermap.org/data/2.5/weather"

def get_openweather(lat, lon, retries=2, pause_sec=0.5):
    """
    Query OpenWeather current weather for (lat, lon).
    Returns: description, temp_c, humidity, wind_speed_ms, precip_mm_hr
    """
    # ✅ FIX 2: Use the env var in the request, not a hardcoded key
    params = {
        "lat": float(lat),
        "lon": float(lon),
        "appid": OPENWEATHER_API_KEY,
        "units": "metric",
    }
    attempt = 0
    while True:
        try:
            resp = requests.get(OPENWEATHER_URL, params=params, timeout=15)
            if resp.status_code in (429, 500, 502, 503, 504):
                if attempt < retries:
                    attempt += 1
                    time.sleep(pause_sec * (attempt + 1))
                    continue
                resp.raise_for_status()
            resp.raise_for_status()
            data = resp.json() or {}

            weather_list = data.get("weather") or []
            description = (weather_list[0].get("description") if weather_list else "") or ""
            main = data.get("main") or {}
            wind = data.get("wind") or {}
            rain = data.get("rain") or {}
            snow = data.get("snow") or {}

            temp_c = float(main.get("temp")) if isinstance(main.get("temp"), (int, float)) else np.nan
            humidity = int(main.get("humidity")) if isinstance(main.get("humidity"), (int, float)) else np.nan
            wind_ms = float(wind.get("speed")) if isinstance(wind.get("speed"), (int, float)) else np.nan

            precip_1h = np.nan
            if isinstance(rain.get("1h"), (int, float)):
                precip_1h = float(rain["1h"])
            elif isinstance(snow.get("1h"), (int, float)):
                precip_1h = float(snow["1h"])

            return {
                "description": str(description).lower(),
                "temp_c": temp_c,
                "humidity": humidity,
                "wind_speed_ms": wind_ms,
                "precip_mm_hr": precip_1h if not np.isnan(precip_1h) else 0.0,
            }
        except requests.RequestException:
            if attempt < retries:
                attempt += 1
                time.sleep(pause_sec * (attempt + 1))
                continue
            return {
                "description": "",
                "temp_c": np.nan,
                "humidity": np.nan,
                "wind_speed_ms": np.nan,
                "precip_mm_hr": 0.0,
            }

def batch_get_openweather(coords, pause_sec=0.1):
    rounded = [(round(float(lat), 5), round(float(lon), 5)) for (lat, lon) in coords]
    unique = list(dict.fromkeys(rounded))

    weather_map = {}
    for (rlat, rlon) in unique:
        weather_map[(rlat, rlon)] = get_openweather(rlat, rlon)
        time.sleep(pause_sec)

    result = {}
    for (lat, lon), (rlat, rlon) in zip(coords, rounded):
        result[(float(lat), float(lon))] = weather_map[(rlat, rlon)]
    return result

def compute_weather_factor(desc: str, precip_mm_hr: float) -> float:
    d = (desc or "").lower()
    if precip_mm_hr and precip_mm_hr > 0:
        return 1.25
    if any(k in d for k in ["thunderstorm", "snow", "sleet"]):
        return 1.3
    if "rain" in d or "drizzle" in d:
        return 1.2
    if "mist" in d or "fog" in d or "haze" in d or "smoke" in d:
        return 1.1
    return 1.0

def refresh_live_weather(demand_df: pd.DataFrame, poll_cycles: int = 1, interval_sec: int = 60):
    coords = [(float(r.lat), float(r.lon)) for _, r in demand_df.iterrows()]
    for i in range(1, poll_cycles + 1):
        ow_by_coord = batch_get_openweather(coords, pause_sec=0.1)

        weather_desc = []
        temp_c_list = []
        humidity_list = []
        wind_ms_list = []
        precip_list = []
        factor_list = []

        for _, r in demand_df.iterrows():
            w = ow_by_coord[(float(r.lat), float(r.lon))]
            desc = w["description"]
            temp_c = w["temp_c"]
            humidity = w["humidity"]
            wind_ms = w["wind_speed_ms"]
            precip = w["precip_mm_hr"]
            factor = compute_weather_factor(desc, precip)

            weather_desc.append(desc)
            temp_c_list.append(temp_c)
            humidity_list.append(humidity)
            wind_ms_list.append(wind_ms)
            precip_list.append(precip)
            factor_list.append(factor)

        demand_df["weather_description"] = weather_desc
        demand_df["temp_c"] = temp_c_list
        demand_df["humidity"] = humidity_list
        demand_df["wind_speed_ms"] = wind_ms_list
        demand_df["precip_mm_hr"] = precip_list
        demand_df["weather_factor"] = factor_list

        print(f"\n[{datetime.datetime.now()}] OpenWeather refreshed (cycle {i}/{poll_cycles}).")
        print(demand_df[["lat", "lon", "weather_description", "temp_c", "humidity", "wind_speed_ms", "precip_mm_hr", "weather_factor"]].head())

        if i < poll_cycles:
            time.sleep(interval_sec)

# --- First fetches happen here ---
refresh_live_traffic(demand_df, poll_cycles=POLL_CYCLES, interval_sec=POLL_INTERVAL_SEC)
refresh_live_weather(demand_df, poll_cycles=POLL_CYCLES, interval_sec=POLL_INTERVAL_SEC)

# ============================
# 5. STATE VECTOR CREATION
# ============================

state_vectors = []

for _, vehicle in fleet_df.iterrows():
    vlat = float(vehicle["lat"])
    vlon = float(vehicle["lon"])
    vcap = float(vehicle["capacity"])
    vavail = float(vehicle["availability"])

    for _, demand in demand_df.iterrows():
        dlat = float(demand["lat"])
        dlon = float(demand["lon"])
        dsize = float(demand["demand_size_kg"])
        base_speed = float(demand["traffic_speed_kmh"]) if pd.notnull(demand.get("traffic_speed_kmh", np.nan)) else 0.0

        wfactor = float(demand.get("weather_factor", 1.0)) if pd.notnull(demand.get("weather_factor", np.nan)) else 1.0

        # Euclidean distance on degrees (rough; consider haversine later)
        distance = np.linalg.norm([vlat - dlat, vlon - dlon])

        adjusted_speed = base_speed * wfactor
        speed_km_per_min = (adjusted_speed / 60.0) if adjusted_speed > 0 else 0.0
        eta = distance / (speed_km_per_min + 1e-6)

        state = [
            vcap,
            vavail,
            dsize,
            adjusted_speed,
            eta,
            distance,
        ]
        state_vectors.append(state)

state_matrix = np.array(state_vectors, dtype=float)

print("\nState Matrix Shape:", state_matrix.shape)
if state_matrix.size > 0:
    print("Sample State Vector (first row):\n", state_matrix[0])



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Demand sample:
                  time          zone      lat     lon  demand_size_kg
0 2025-08-28 00:41:00  Yeshwanthpur  13.0186  77.556             149
1 2025-08-28 00:18:00  Yeshwanthpur  13.0186  77.556              90
2 2025-08-28 00:35:00  Yeshwanthpur  13.0186  77.556             146
3 2025-08-28 00:36:00  Yeshwanthpur  13.0186  77.556              79
4 2025-08-28 00:48:00  Yeshwanthpur  13.0186  77.556             125

Fleet sample (normalized):
   vehicle_id pickup_location  capacity availability_time_ist      lat  \
0       V001    Yeshwanthpur       500   2025-08-28 09:23:00  13.0285   
1       V002      Whitefield       500   2025-08-28 17:05:00  12.9698   
2       V003     Koramangala      1500   2025-08-28 17:58:00  12.9279   
3       V004     Koramangala      1000   2025-08-28 15:18:00  12.9279   
4       V005    Marathahalli      1000   2025-0