In [21]:
import polars as pl
import requests as req
from datetime import datetime
import os
from dotenv import load_dotenv
from io import StringIO
import time


In [22]:
load_dotenv()

True

In [23]:
# Get NOAA token from environment variables
NOAA_TOKEN = os.getenv("NOAA_TOKEN")

# Define parameters
dataset = 'GHCND'
location = 'FIPS:19'
start = '2024-06-01'
end = '2024-06-30'
datatypes = 'TMAX,TMIN,PRCP'
units = 'standard'
limit = 100

# Build URL with f-string
url = f"https://www.ncei.noaa.gov/cdo-web/api/v2/data?datasetid={dataset}&locationid={location}&startdate={start}&enddate={end}&datatypeid={datatypes}&units={units}&limit={limit}"

headers = {'token': NOAA_TOKEN}
print(f"Full URL: {url}")


Full URL: https://www.ncei.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&locationid=FIPS:19&startdate=2024-06-01&enddate=2024-06-30&datatypeid=TMAX,TMIN,PRCP&units=standard&limit=100


In [24]:
response = req.get(url, headers=headers, timeout=120)

# Only parse JSON if status is 200
if response.status_code == 200:
    data = response.json()
else:
    print("\nError : ", response.status_code)

In [25]:
df_raw = pl.DataFrame(data['results'])
print(df_raw)
print("Unique datatypes:", df_raw['datatype'].unique())

shape: (100, 5)
┌─────────────────────┬──────────┬───────────────────┬────────────┬───────┐
│ date                ┆ datatype ┆ station           ┆ attributes ┆ value │
│ ---                 ┆ ---      ┆ ---               ┆ ---        ┆ ---   │
│ str                 ┆ str      ┆ str               ┆ str        ┆ f64   │
╞═════════════════════╪══════════╪═══════════════════╪════════════╪═══════╡
│ 2024-06-01T00:00:00 ┆ PRCP     ┆ GHCND:US1IAAD0002 ┆ ,,N,0800   ┆ 0.0   │
│ 2024-06-01T00:00:00 ┆ PRCP     ┆ GHCND:US1IAAL0003 ┆ T,,N,0700  ┆ 0.0   │
│ 2024-06-01T00:00:00 ┆ PRCP     ┆ GHCND:US1IAAL0005 ┆ ,,N,0700   ┆ 0.0   │
│ 2024-06-01T00:00:00 ┆ PRCP     ┆ GHCND:US1IAAL0006 ┆ ,,N,0700   ┆ 0.01  │
│ 2024-06-01T00:00:00 ┆ PRCP     ┆ GHCND:US1IAAL0007 ┆ ,,N,0700   ┆ 0.0   │
│ …                   ┆ …        ┆ …                 ┆ …          ┆ …     │
│ 2024-06-01T00:00:00 ┆ PRCP     ┆ GHCND:US1IAEM0003 ┆ ,,N,0700   ┆ 1.0   │
│ 2024-06-01T00:00:00 ┆ PRCP     ┆ GHCND:US1IAFM0001 ┆ ,,N,0800   ┆ 0.07

In [None]:
def fetch_all_iowa_weather(start_date, end_date):
    """
    Fetch all Iowa weather data with pagination.
    """
    NOAA_TOKEN = os.getenv('NOAA_TOKEN')

    all_results = []
    offset = 1
    limit = 1000  # Max allowed by NOAA

    base_url = "https://www.ncei.noaa.gov/cdo-web/api/v2/data"

    print(f"Fetching Iowa weather: {start_date} to {end_date}")

    while True:
        # Build URL with current offset
        url = f"{base_url}?datasetid=GHCND&locationid=FIPS:19&startdate={start_date}&enddate={end_date}&datatypeid=TMAX,TMIN,PRCP&units=standard&limit={limit}&offset={offset}"

        headers = {'token': NOAA_TOKEN}

        print(f"Fetching offset {offset}...", end=' ')

        try:
            response = req.get(url, headers=headers, timeout=300)

            if response.status_code != 200:
                print("\nError : ", response.status_code)
                break

            data = response.json()

            if 'results' not in data or len(data['results']) == 0:
                print("No more results")
                break

            batch_size = len(data['results'])
            all_results.extend(data['results'])

            print(f"got {batch_size} records (total: {len(all_results)})")

            # Check if we've reached the end
            if batch_size < limit:
                print("Reached end of data")
                break

            # Check metadata to see if there are more results
            if 'metadata' in data and 'resultset' in data['metadata']:
                total_count = data['metadata']['resultset']['count']
                if len(all_results) >= total_count:
                    print(f"Fetched all {total_count} records")
                    break

            offset += limit

            # NOAA rate limit: 5 requests per second
            # Be conservative: 4 requests per second
            time.sleep(0.3)

        except Exception as e:
            print(f"\nError: {e}")
            break

    print(f"\nTotal records fetched: {len(all_results)}")

    # Convert to Polars DataFrame
    df = pl.DataFrame(all_results)

    return df


# Fetch all data for June 2024
df_raw = fetch_all_iowa_weather('2024-06-01', '2024-06-30')

print("\n=== Final Dataset ===")
print(df_raw)

Fetching Iowa weather: 2024-06-01 to 2024-06-30
Fetching offset 1... 
Error :  400

Total records fetched: 0

=== Final Dataset ===
shape: (0, 0)
┌┐
╞╡
└┘
