In [1]:
import requests
import pandas as pd

# 1. Setup
HEADERS = {"User-Agent": "MyWeatherApp/1.0"}
LATITUDE = "39.7456"
LONGITUDE = "-97.0892"

# 2. Get Grid Data URL (First API Call)
points_url = f"https://api.weather.gov/points/{LATITUDE},{LONGITUDE}"
try:
    points_response = requests.get(points_url, headers=HEADERS)
    points_response.raise_for_status()
    point_data = points_response.json()
    forecast_url = point_data["properties"]["forecast"]
except requests.exceptions.RequestException as e:
    print(f"Error fetching grid data: {e}")
    exit()

# 3. Fetch Forecast Data (Second API Call)
try:
    forecast_response = requests.get(forecast_url, headers=HEADERS)
    forecast_response.raise_for_status()
    forecast_data = forecast_response.json()
except requests.exceptions.RequestException as e:
    print(f"Error fetching forecast data: {e}")
    exit()

# 4. Extract the List of Forecast Periods
# The actual forecast is a list of dictionaries under 'properties' -> 'periods'
forecast_periods = forecast_data["properties"]["periods"]

# 5. Convert to Pandas DataFrame
# Pandas can directly read a list of dictionaries into a DataFrame
df = pd.DataFrame(forecast_periods)

# 6. Clean and Display the DataFrame (Optional but Recommended)
# Select only the columns you want to display for a cleaner view
df_cleaned = df[
    ["name", "temperature", "temperatureUnit", "windSpeed", "detailedForecast"]
]

# Better-formatted column for temperature
df_cleaned["Temperature"] = (
    df_cleaned["temperature"].astype(str) + " " + df_cleaned["temperatureUnit"]
)
df_cleaned = df_cleaned.drop(columns=["temperature", "temperatureUnit"])

# Rename columns for clarity
df_cleaned.columns = [
    "Time Period",
    "Wind Speed",
    "Detailed Forecast",
    "Temperature",
]

print(df_cleaned)

        Time Period    Wind Speed  \
0           Tonight         5 mph   
1            Friday   5 to 10 mph   
2      Friday Night         5 mph   
3          Saturday   5 to 15 mph   
4    Saturday Night         5 mph   
5            Sunday         5 mph   
6      Sunday Night   5 to 10 mph   
7            Monday  10 to 15 mph   
8      Monday Night   5 to 10 mph   
9           Tuesday   5 to 10 mph   
10    Tuesday Night         5 mph   
11        Wednesday   5 to 10 mph   
12  Wednesday Night   5 to 10 mph   
13         Thursday        10 mph   

                                    Detailed Forecast Temperature  
0   Patchy fog after 2am. Mostly clear, with a low...        43 F  
1   Patchy fog before 9am. Sunny. High near 75, wi...        75 F  
2   Partly cloudy, with a low around 47. Southwest...        47 F  
3   Mostly sunny, with a high near 70. Northwest w...        70 F  
4   Mostly clear, with a low around 38. Northeast ...        38 F  
5   Sunny, with a high near 62. East

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned["Temperature"] = (


In [3]:
df_cleaned

Unnamed: 0,Time Period,Wind Speed,Detailed Forecast,Temperature
0,This Afternoon,15 mph,"Sunny, with a high near 63. Northwest wind aro...",63 F
1,Tonight,0 to 10 mph,"Clear, with a low around 39. Northwest wind 0 ...",39 F
2,Wednesday,0 to 5 mph,"Sunny, with a high near 67. Northwest wind 0 t...",67 F
3,Wednesday Night,0 to 5 mph,"Mostly clear, with a low around 38. North wind...",38 F
4,Thursday,5 to 10 mph,"Sunny, with a high near 65. East wind 5 to 10 ...",65 F
5,Thursday Night,5 to 10 mph,A chance of rain showers after 1am. Mostly clo...,47 F
6,Friday,10 mph,"A chance of rain showers before 10am, then a c...",55 F
7,Friday Night,5 to 10 mph,"A chance of showers and thunderstorms. Cloudy,...",49 F
8,Saturday,5 to 10 mph,A slight chance of rain showers before 1pm. Mo...,60 F
9,Saturday Night,5 mph,"Mostly cloudy, with a low around 50.",50 F


In [23]:
import requests
import pandas as pd

# Define a list of locations (latitude, longitude, and a friendly name)
LOCATIONS = [
    {"name": "Washington DC", "lat": "38.8894", "lon": "-77.0352"},
    {"name": "San Francisco", "lat": "37.7749", "lon": "-122.4194"},
    {"name": "Miami", "lat": "25.7617", "lon": "-80.1918"},
]

HEADERS = {"User-Agent": "MyMultiRegionApp/1.0"}
ALL_FORECASTS = []


def get_forecast_for_point(lat, lon):
    """Fetches the 7-day forecast URL from the NWS API for a given lat/lon."""
    points_url = f"https://api.weather.gov/points/{lat},{lon}"
    response = requests.get(points_url, headers=HEADERS)
    response.raise_for_status()
    return response.json()["properties"]["forecast"]


def fetch_and_process_forecast(location_info):
    """Fetches the final forecast and processes it into a list of records."""
    name, lat, lon = (
        location_info["name"],
        location_info["lat"],
        location_info["lon"],
    )
    print(f"Fetching data for {name}...")

    try:
        # 1. Get the final forecast URL
        forecast_url = get_forecast_for_point(lat, lon)

        # 2. Fetch the actual forecast data
        forecast_response = requests.get(forecast_url, headers=HEADERS)
        forecast_response.raise_for_status()

        forecast_periods = forecast_response.json()["properties"]["periods"]

        # 3. Add the location name to each period and append to the master list
        for period in forecast_periods:
            period["location_name"] = name  # <-- Add the location identifier
            ALL_FORECASTS.append(period)

    except requests.exceptions.RequestException as e:
        print(f"Skipping {name} due to error: {e}")


# Iterate through all locations
for loc in LOCATIONS:
    fetch_and_process_forecast(loc)

# Create the final DataFrame
if ALL_FORECASTS:
    df = pd.DataFrame(ALL_FORECASTS)

    # Optional: Clean up and select columns
    df_final = df[
        [
            "location_name",
            "name",
            "temperature",
            "temperatureUnit",
            "detailedForecast",
        ]
    ]
    df_final.columns = ["Location", "Time Period", "Temp", "Unit", "Forecast"]

    print("\n--- Consolidated Forecast Data ---")
    print(df_final)

Fetching data for Washington DC...
Fetching data for San Francisco...
Fetching data for Miami...

--- Consolidated Forecast Data ---
         Location      Time Period  Temp Unit  \
0   Washington DC   This Afternoon    58    F   
1   Washington DC          Tonight    39    F   
2   Washington DC         Thursday    58    F   
3   Washington DC   Thursday Night    34    F   
4   Washington DC           Friday    57    F   
5   Washington DC     Friday Night    35    F   
6   Washington DC         Saturday    59    F   
7   Washington DC   Saturday Night    50    F   
8   Washington DC           Sunday    67    F   
9   Washington DC     Sunday Night    40    F   
10  Washington DC           Monday    55    F   
11  Washington DC     Monday Night    36    F   
12  Washington DC          Tuesday    53    F   
13  Washington DC    Tuesday Night    40    F   
14  San Francisco        Overnight    57    F   
15  San Francisco        Wednesday    69    F   
16  San Francisco  Wednesday Night

In [5]:
df_final

Unnamed: 0,Location,Time Period,Temp,Unit,Forecast
0,Washington DC,This Afternoon,73,F,"Mostly sunny, with a high near 73. South wind ..."
1,Washington DC,Tonight,50,F,Rain showers likely between 7pm and 2am. Mostl...
2,Washington DC,Wednesday,64,F,"Sunny, with a high near 64. West wind 5 to 14 ..."
3,Washington DC,Wednesday Night,44,F,"Mostly clear, with a low around 44. West wind ..."
4,Washington DC,Thursday,63,F,"Sunny, with a high near 63. West wind 5 to 14 ..."
5,Washington DC,Thursday Night,42,F,"Mostly clear, with a low around 42."
6,Washington DC,Friday,61,F,"Sunny, with a high near 61."
7,Washington DC,Friday Night,41,F,"Mostly clear, with a low around 41."
8,Washington DC,Saturday,61,F,"Mostly sunny, with a high near 61."
9,Washington DC,Saturday Night,44,F,"Mostly cloudy, with a low around 44."


In [1]:
import requests

# 1. Define your location and a User-Agent
# NOTE: The NWS API requires a User-Agent header that identifies your application/email.
LATITUDE = 38.8951  # Example: Washington D.C.
LONGITUDE = -77.0369
USER_AGENT = "MyWeatherApp/1.0 (contact@example.com)"

HEADERS = {"User-Agent": USER_AGENT, "Accept": "application/geo+json"}


def get_station_id(lat, lon):
    """Step 1: Get the closest weather observation station ID for the coordinates."""
    points_url = f"https://api.weather.gov/points/{lat},{lon}/stations"
    print(f"1. Querying for stations at: {points_url}")

    try:
        response = requests.get(points_url, headers=HEADERS, timeout=10)
        response.raise_for_status()  # Raise an exception for bad status codes
        data = response.json()

        # The first feature in the list is usually the closest station
        station_id_url = data["features"][0]["id"]
        station_id = station_id_url.split("/")[-1]
        print(f"-> Closest Station ID found: {station_id}")
        return station_id

    except requests.exceptions.RequestException as e:
        print(f"Error finding station ID: {e}")
        return None


def get_current_observation(station_id):
    """Step 2 & 3: Get and parse the latest observation for the given station ID."""
    if not station_id:
        return

    observation_url = (
        f"https://api.weather.gov/stations/{station_id}/observations/latest"
    )
    print(f"\n2. Querying for latest observation at: {observation_url}")

    try:
        response = requests.get(observation_url, headers=HEADERS, timeout=10)
        response.raise_for_status()
        data = response.json()

        properties = data["properties"]

        # Extracting key observation metrics
        temperature_c = properties["temperature"]["value"]
        wind_speed_mps = properties["windSpeed"]["value"]
        wind_direction_deg = properties["windDirection"]["value"]
        text_description = properties["textDescription"]

        # NOTE on Units: NWS API returns data in metric (mostly Celsius and m/s).
        # You would need to convert these values for imperial units.

        print("\n--- Current Observation ---")
        print(f"Station: {station_id}")
        print(f"Description: {text_description}")
        print(f"Temperature: {temperature_c:.2f}°C")

        if wind_speed_mps is not None:
            print(f"Wind Speed: {wind_speed_mps:.2f} m/s")
        else:
            print("Wind Speed: N/A")

        if wind_direction_deg is not None:
            print(f"Wind Direction: {wind_direction_deg}°")
        else:
            print("Wind Direction: N/A")

        print("---------------------------")

    except requests.exceptions.RequestException as e:
        print(f"Error fetching observation data: {e}")
    except KeyError as e:
        print(
            f"Error parsing data (missing key): {e}. Station data may be incomplete."
        )


# --- Execution ---
station = get_station_id(LATITUDE, LONGITUDE)
print(station)
get_current_observation(station)

1. Querying for stations at: https://api.weather.gov/points/38.8951,-77.0369/stations
-> Closest Station ID found: KDCA
KDCA

2. Querying for latest observation at: https://api.weather.gov/stations/KDCA/observations/latest

--- Current Observation ---
Station: KDCA
Description: Clear
Temperature: 16.00°C
Wind Speed: N/A
Wind Direction: N/A
---------------------------


In [29]:
import requests

TOKEN = "FhaIFQelGOSdLrnxCfCTvRMHAKeErqLC"
STATION_ID = "GHCND:USW00093721"  # Example station ID format for NOAA CDO
START_DATE = "2023-01-01"
END_DATE = "2023-01-31"

headers = {"token": TOKEN}

params = {
    "datasetid": "GHCND",  # daily summaries dataset
    "stationid": STATION_ID,
    "startdate": START_DATE,
    "enddate": END_DATE,
    "limit": 1000,
    "units": "metric",
}

url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"

response = requests.get(url, headers=headers, params=params)
data = response.json()

for result in data["results"]:
    print(result)

{'date': '2023-01-01T00:00:00', 'datatype': 'ADPT', 'station': 'GHCND:USW00093721', 'attributes': ',,W,', 'value': 67}
{'date': '2023-01-01T00:00:00', 'datatype': 'ASLP', 'station': 'GHCND:USW00093721', 'attributes': ',,W,', 'value': 10149}
{'date': '2023-01-01T00:00:00', 'datatype': 'ASTP', 'station': 'GHCND:USW00093721', 'attributes': ',,W,', 'value': 10085}
{'date': '2023-01-01T00:00:00', 'datatype': 'AWBT', 'station': 'GHCND:USW00093721', 'attributes': ',,W,', 'value': 89}
{'date': '2023-01-01T00:00:00', 'datatype': 'AWND', 'station': 'GHCND:USW00093721', 'attributes': ',,W,', 'value': 2.4}
{'date': '2023-01-01T00:00:00', 'datatype': 'PRCP', 'station': 'GHCND:USW00093721', 'attributes': ',,W,2400', 'value': 0.0}
{'date': '2023-01-01T00:00:00', 'datatype': 'RHAV', 'station': 'GHCND:USW00093721', 'attributes': ',,W,', 'value': 76}
{'date': '2023-01-01T00:00:00', 'datatype': 'RHMN', 'station': 'GHCND:USW00093721', 'attributes': ',,W,', 'value': 41}
{'date': '2023-01-01T00:00:00', 'dat

In [6]:
observation_url = f"https://api.weather.gov/stations/PCLC1/observations/latest"
print(f"\n2. Querying for latest observation at: {observation_url}")

response = requests.get(observation_url, headers=HEADERS, timeout=10)
response.raise_for_status()
data = response.json()


2. Querying for latest observation at: https://api.weather.gov/stations/PCLC1/observations/latest


In [4]:
data

{'@context': ['https://geojson.org/geojson-ld/geojson-context.jsonld',
  {'@version': '1.1',
   'wx': 'https://api.weather.gov/ontology#',
   's': 'https://schema.org/',
   'geo': 'http://www.opengis.net/ont/geosparql#',
   'unit': 'http://codes.wmo.int/common/unit/',
   '@vocab': 'https://api.weather.gov/ontology#',
   'geometry': {'@id': 's:GeoCoordinates', '@type': 'geo:wktLiteral'},
   'city': 's:addressLocality',
   'state': 's:addressRegion',
   'distance': {'@id': 's:Distance', '@type': 's:QuantitativeValue'},
   'bearing': {'@type': 's:QuantitativeValue'},
   'value': {'@id': 's:value'},
   'unitCode': {'@id': 's:unitCode', '@type': '@id'},
   'forecastOffice': {'@type': '@id'},
   'forecastGridData': {'@type': '@id'},
   'publicZone': {'@type': '@id'},
   'county': {'@type': '@id'}}],
 'id': 'https://api.weather.gov/stations/F4266/observations/2025-11-14T02:05:00+00:00',
 'type': 'Feature',
 'geometry': {'type': 'Point', 'coordinates': [-118.78, 36.28]},
 'properties': {'@id':

In [21]:
import requests
import json
import regex as re
from tqdm import tqdm


def get_noaa_obs_station(lat, lon):
    """Get the NOAA observation station for a given latitude and longitude."""
    url = f"https://api.weather.gov/points/{lat},{lon}/stations"
    response = requests.get(url)
    response.raise_for_status()
    return response.json()


path = "../ingestion/counties/state_coordinates.txt"
with open(path, "r") as f:
    state_to_coords = json.load(f)
states_listed = []
weather_stations = {}
station_ids = []
for state, coords in tqdm(state_to_coords.items()):
    if state not in states_listed:
        states_listed.append(state)
        weather_stations[state] = []
        print(state)
    for coord in tqdm(coords):
        lon, lat = coord
        response = get_noaa_obs_station(lat, lon)
        api_link = str(response["features"][0]["id"])
        station_id = api_link.rstrip("/").rsplit("/", 1)[-1]
        if station_id not in station_ids:
            station_ids.append(station_id)
            weather_stations[state].append(station_id)
print(weather_stations)

  0%|          | 0/15 [00:00<?, ?it/s]

AR


100%|██████████| 75/75 [00:06<00:00, 11.61it/s]
  7%|▋         | 1/15 [00:06<01:30,  6.46s/it]

IA


100%|██████████| 99/99 [00:09<00:00, 10.93it/s]
 13%|█▎        | 2/15 [00:15<01:43,  7.99s/it]

IL


100%|██████████| 102/102 [04:05<00:00,  2.41s/it]
 20%|██        | 3/15 [04:21<23:18, 116.56s/it]

IN


100%|██████████| 92/92 [05:27<00:00,  3.56s/it]
 27%|██▋       | 4/15 [09:48<36:36, 199.72s/it]

KS


 99%|█████████▉| 104/105 [06:53<00:03,  3.98s/it]
 27%|██▋       | 4/15 [16:42<45:55, 250.52s/it]


KeyboardInterrupt: 

In [22]:
data["properties"]

{'@id': 'https://api.weather.gov/stations/KSGT/observations/2025-11-12T21:50:00+00:00',
 '@type': 'wx:ObservationStation',
 'elevation': {'unitCode': 'wmoUnit:m', 'value': 68},
 'station': 'https://api.weather.gov/stations/KSGT',
 'stationId': 'KSGT',
 'stationName': 'Stuttgart, Stuttgart Municipal Airport',
 'timestamp': '2025-11-12T21:50:00+00:00',
 'rawMessage': '',
 'textDescription': 'Clear',
 'icon': 'https://api.weather.gov/icons/land/day/skc?size=medium',
 'presentWeather': [],
 'temperature': {'unitCode': 'wmoUnit:degC',
  'value': 21,
  'qualityControl': 'V'},
 'dewpoint': {'unitCode': 'wmoUnit:degC', 'value': 9, 'qualityControl': 'V'},
 'windDirection': {'unitCode': 'wmoUnit:degree_(angle)',
  'value': 230,
  'qualityControl': 'V'},
 'windSpeed': {'unitCode': 'wmoUnit:km_h-1',
  'value': 7.416,
  'qualityControl': 'V'},
 'windGust': {'unitCode': 'wmoUnit:km_h-1',
  'value': None,
  'qualityControl': 'Z'},
 'barometricPressure': {'unitCode': 'wmoUnit:Pa',
  'value': 102133.48

In [26]:
props = data.get("properties") or {}
geom = data.get("geometry") or {}
coords = geom.get("coordinates")

lon = (
    coords[0] if isinstance(coords, (list, tuple)) and len(coords) > 0 else None
)
lat = (
    coords[1] if isinstance(coords, (list, tuple)) and len(coords) > 1 else None
)

test_dict_json = {
    "timestamp": props.get("timestamp"),
    "stationId": props.get("stationId"),
    "stationName": props.get("stationName"),
    "lat": lat,
    "lon": lon,
    "elevation": (props.get("elevation") or {}).get("value"),
    "temp_unit": (props.get("temperature") or {}).get("unitCode"),
    "temp": (props.get("temperature") or {}).get("value"),
    "pressure_pa": (props.get("barometricPressure") or {}).get("value"),
    "humidity": (props.get("relativeHumidity") or {}).get("value"),
    "wind_unit": (props.get("windSpeed") or {}).get("unitCode"),
    "wind_speed": (props.get("windSpeed") or {}).get("value"),
    "precip_unit": (props.get("precipitationLast3Hours") or {}).get("unitCode"),
    "precip_3hr": (props.get("precipitationLast3Hours") or {}).get("value"),
    "heat_unit": (props.get("heatIndex") or {}).get("unitCode"),
    "heat_index": (props.get("heatIndex") or {}).get("value"),
    "max_temp_24_unit": (props.get("maxTemperatureLast24Hours") or {}).get(
        "unitCode"
    ),
    "max_temp_24": (props.get("maxTemperatureLast24Hours") or {}).get("value"),
    "min_temp_24_unit": (props.get("minTemperatureLast24Hours") or {}).get(
        "unitCode"
    ),
    "min_temp_24": (props.get("minTemperatureLast24Hours") or {}).get("value"),
}

In [27]:
test_dict_json

{'timestamp': '2025-11-12T18:50:00+00:00',
 'stationId': 'KSGT',
 'stationName': 'Stuttgart, Stuttgart Municipal Airport',
 'lat': 34.6,
 'lon': -91.5699999,
 'elevation': 68,
 'temp_unit': 'wmoUnit:degC',
 'temp': 19,
 'pressure_pa': 102133.48,
 'humidity': 59.762815682936,
 'wind_unit': 'wmoUnit:km_h-1',
 'wind_speed': None,
 'precip_unit': 'wmoUnit:mm',
 'precip_3hr': None,
 'heat_unit': 'wmoUnit:degC',
 'heat_index': None,
 'max_temp_24_unit': 'wmoUnit:degC',
 'max_temp_24': None,
 'min_temp_24_unit': 'wmoUnit:degC',
 'min_temp_24': None}

In [43]:
import json

json.dump(test_dict_json)

TypeError: dump() missing 1 required positional argument: 'fp'