In [1]:
import requests
import pandas as pd
import os
import boto3
import json
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

# Exploring the NOAA API

In [3]:
NOAA_API_TOKEN = os.getenv("NOAA_API_TOKEN")
BASE_URL = "https://www.ncei.noaa.gov/cdo-web/api/v2/"
HEADERS = {"token": NOAA_API_TOKEN}

In [4]:

# Location: New York
NYC_LOCATION_ID = "CITY:US360019"
# Daily Summaries Dataset ID
GHCND_DATASET_ID = "GHCND" 


In [5]:
def explore_noaa_endpoint(endpoint, params=None):
    """Makes a request and pretty-prints the JSON response."""
    print(f"Requesting: {BASE_URL}{endpoint} with params: {params}")
    try:
        response = requests.get(f"{BASE_URL}{endpoint}", headers=HEADERS, params=params)
        
        response.raise_for_status()
        print(f"Response Successfull! URL Used: {response.url}")
        data = response.json()
        print(json.dumps(data, indent=4))
        return data
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        if hasattr(e, 'response') and e.response is not None:
            print(f"Response content: {e.response.text}")
    except json.JSONDecodeError:
        print("Failed to decode JSON.")
        print(f"Response content: {response.text}")
    return None

# Exploring Data Categories

In [6]:
params_datacategories = {"datasetid": GHCND_DATASET_ID, "limit": 50}
datacategories_data = explore_noaa_endpoint("datacategories", params=params_datacategories)

Requesting: https://www.ncei.noaa.gov/cdo-web/api/v2/datacategories with params: {'datasetid': 'GHCND', 'limit': 50}
Response Successfull! URL Used: https://www.ncei.noaa.gov/cdo-web/api/v2/datacategories?datasetid=GHCND&limit=50
{
    "metadata": {
        "resultset": {
            "offset": 1,
            "count": 9,
            "limit": 50
        }
    },
    "results": [
        {
            "name": "Evaporation",
            "id": "EVAP"
        },
        {
            "name": "Land",
            "id": "LAND"
        },
        {
            "name": "Precipitation",
            "id": "PRCP"
        },
        {
            "name": "Sky cover & clouds",
            "id": "SKY"
        },
        {
            "name": "Sunshine",
            "id": "SUN"
        },
        {
            "name": "Air Temperature",
            "id": "TEMP"
        },
        {
            "name": "Water",
            "id": "WATER"
        },
        {
            "name": "Wind",
            "id": "

## Categories of Interest
- PRCP (Percipitation) - Rain Itensity
- TEMP (Air Temperature)
- WXTYPE (Weather Type)

# Exploring Data Types

In [7]:
pd.set_option('display.max_rows', None)

params_datatypes = {"datasetid": GHCND_DATASET_ID, "limit": 500}
datatypes_data = explore_noaa_endpoint("datatypes", params=params_datatypes)

Requesting: https://www.ncei.noaa.gov/cdo-web/api/v2/datatypes with params: {'datasetid': 'GHCND', 'limit': 500}
Response Successfull! URL Used: https://www.ncei.noaa.gov/cdo-web/api/v2/datatypes?datasetid=GHCND&limit=500
{
    "metadata": {
        "resultset": {
            "offset": 1,
            "count": 136,
            "limit": 500
        }
    },
    "results": [
        {
            "mindate": "1994-03-19",
            "maxdate": "1996-05-28",
            "name": "Average cloudiness midnight to midnight from 30-second ceilometer data",
            "datacoverage": 1,
            "id": "ACMC"
        },
        {
            "mindate": "1965-01-01",
            "maxdate": "2005-12-31",
            "name": "Average cloudiness midnight to midnight from manual observations",
            "datacoverage": 1,
            "id": "ACMH"
        },
        {
            "mindate": "1994-02-01",
            "maxdate": "1996-05-28",
            "name": "Average cloudiness sunrise to sunset

In [8]:
datatypes_df = pd.json_normalize(datatypes_data["results"])

In [9]:
datatypes_df

Unnamed: 0,mindate,maxdate,name,datacoverage,id
0,1994-03-19,1996-05-28,Average cloudiness midnight to midnight from 3...,1.0,ACMC
1,1965-01-01,2005-12-31,Average cloudiness midnight to midnight from m...,1.0,ACMH
2,1994-02-01,1996-05-28,Average cloudiness sunrise to sunset from 30-s...,1.0,ACSC
3,1965-01-01,2005-12-31,Average cloudiness sunrise to sunset from manu...,1.0,ACSH
4,1982-01-01,2025-06-01,Average wind speed,1.0,AWND
5,1948-08-02,2012-07-23,Number of days included in the multiday evapor...,1.0,DAEV
6,1832-05-11,2025-06-02,Number of days included in the multiday precip...,1.0,DAPR
7,1877-01-02,2024-05-25,Number of days included in the multiday snow f...,1.0,DASF
8,1863-05-04,2025-05-23,Number of days included in the multiday minimu...,1.0,DATN
9,1863-05-04,2025-05-22,Number of days included in the multiday maximu...,1.0,DATX


## Types of Interest


### PRCP (Precipitation):
- "name": "Precipitation"
- "maxdate": "2025-06-01" (Current)
- This is the standard daily total precipitation. 
### SNOW (Snowfall) & SNWD (Snow depth):
- Good for context but your primary KPI is about rain intensity. Snow would be a different kind of adverse weather.
## WT** codes (Weather Type e.g., WT01 - Fog, WT03 - Thunder, WT05 - Hail, WT13 - Mist, WT14 - Drizzle, WT16 - Rain, WT17 - Freezing Rain, WT18 - Snow/Ice Pellets):
These are flags indicating if a certain weather phenomenon occurred on that day.
"maxdate" for these is generally current (2021-xxxx to 2025-xxxx).
- WT16 (Rain): This is highly relevant. If WT16 is flagged for a day, it rained.
- WT14 (Drizzle): Also relevant for light rain.
- WT13 (Mist): Could be relevant.
- WT17 (Freezing Rain): A specific type of hazardous precipitation.

The presence of these flags on a given day, combined with the PRCP value, can help infer the nature and occurrence of precipitation. For example, a day with a high PRCP value and WT16 flagged clearly had significant rain.

# Exploring Stations


In [10]:
params_stations = {"datasetid": GHCND_DATASET_ID, "locationid": "CITY:US360019", "limit": 1000}
stations_data = explore_noaa_endpoint("stations", params=params_stations)

Requesting: https://www.ncei.noaa.gov/cdo-web/api/v2/stations with params: {'datasetid': 'GHCND', 'locationid': 'CITY:US360019', 'limit': 1000}
Response Successfull! URL Used: https://www.ncei.noaa.gov/cdo-web/api/v2/stations?datasetid=GHCND&locationid=CITY%3AUS360019&limit=1000
{
    "metadata": {
        "resultset": {
            "offset": 1,
            "count": 330,
            "limit": 1000
        }
    },
    "results": [
        {
            "elevation": 36.6,
            "mindate": "2015-07-30",
            "maxdate": "2017-11-19",
            "latitude": 41.0641,
            "name": "STAMFORD 2.6 SSW, CT US",
            "datacoverage": 0.1469,
            "id": "GHCND:US1CTFR0022",
            "elevationUnit": "METERS",
            "longitude": -73.577
        },
        {
            "elevation": 6.4,
            "mindate": "2016-05-01",
            "maxdate": "2022-04-12",
            "latitude": 41.0377883911133,
            "name": "STAMFORD 4.2 S, CT US",
            

In [11]:
stations_df = pd.json_normalize(stations_data["results"])

In [12]:
stations_df

Unnamed: 0,elevation,mindate,maxdate,latitude,name,datacoverage,id,elevationUnit,longitude
0,36.6,2015-07-30,2017-11-19,41.0641,"STAMFORD 2.6 SSW, CT US",0.1469,GHCND:US1CTFR0022,METERS,-73.577
1,6.4,2016-05-01,2022-04-12,41.037788,"STAMFORD 4.2 S, CT US",0.879,GHCND:US1CTFR0039,METERS,-73.568176
2,20.1,2008-01-25,2009-11-05,40.921298,"BERGENFIELD 0.3 SW, NJ US",0.2826,GHCND:US1NJBG0001,METERS,-74.001983
3,16.8,2008-02-09,2017-11-23,40.902694,"SADDLE BROOK TWP 0.6 E, NJ US",0.9779,GHCND:US1NJBG0002,METERS,-74.083358
4,21.6,2008-01-29,2025-05-31,40.91467,"TENAFLY 1.3 W, NJ US",0.8584,GHCND:US1NJBG0003,METERS,-73.9775
5,15.8,2008-02-21,2011-07-27,40.983041,"WESTWOOD 0.8 ESE, NJ US",0.1979,GHCND:US1NJBG0005,METERS,-74.015858
6,112.2,2008-03-02,2015-11-26,41.058611,"RAMSEY 0.6 E, NJ US",0.3305,GHCND:US1NJBG0006,METERS,-74.134068
7,12.8,2008-03-18,2012-07-22,40.907116,"SADDLE BROOK TWP 0.3 NNE, NJ US",0.8665,GHCND:US1NJBG0008,METERS,-74.093406
8,9.4,2008-03-26,2023-09-09,40.99145,"RIVER VALE TWP 1.5 S, NJ US",0.1881,GHCND:US1NJBG0010,METERS,-74.012348
9,8.2,2008-03-25,2009-02-11,40.794355,"NORTH ARLINGTON 0.7 NE, NJ US",0.8765,GHCND:US1NJBG0011,METERS,-74.119043


## Key NYC Stations:

## Index 326: GHCND:USW00094728 | NY CITY CENTRAL PARK, NY US
- datacoverage: 1 
- mindate: "1869-01-01", maxdate: "2025-06-01" 

## Index 321: GHCND:USW00014732 | LAGUARDIA AIRPORT, NY US
- datacoverage: 1 
- mindate: "1939-10-07", maxdate: "2025-06-02" (Excellent date range)

## Index 329: GHCND:USW00094789 | JFK INTERNATIONAL AIRPORT, NY US
- datacoverage: 0.9995 
- mindate: "1948-07-17", maxdate: "2025-06-02" (Excellent date range)



# DATA Endpoint

In [41]:
# Fetching PRCP, TAVG, WT16 for Central Park for a specific day
SELECTED_STATION_ID = "GHCND:USW00094728" 

params_data = {
    "datasetid": GHCND_DATASET_ID,
    "stationid": SELECTED_STATION_ID,
    "datatypeid": ["PRCP", "TEMP", "TAVG", "TMAX", "TMIN", "WT16", "WT14"], # List of data types
    "startdate": "2024-01-01",
    "enddate": "2024-01-05", 
    "units": "standard",       
    "limit": 1000             
}

In [42]:
data = explore_noaa_endpoint("data", params=params_data)

Requesting: https://www.ncei.noaa.gov/cdo-web/api/v2/data with params: {'datasetid': 'GHCND', 'stationid': 'GHCND:USW00094728', 'datatypeid': ['PRCP', 'TEMP', 'TAVG', 'TMAX', 'TMIN', 'WT16', 'WT14'], 'startdate': '2024-01-01', 'enddate': '2024-01-05', 'units': 'standard', 'limit': 1000}
Response Successfull! URL Used: https://www.ncei.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&stationid=GHCND%3AUSW00094728&datatypeid=PRCP&datatypeid=TEMP&datatypeid=TAVG&datatypeid=TMAX&datatypeid=TMIN&datatypeid=WT16&datatypeid=WT14&startdate=2024-01-01&enddate=2024-01-05&units=standard&limit=1000
{
    "metadata": {
        "resultset": {
            "offset": 1,
            "count": 15,
            "limit": 1000
        }
    },
    "results": [
        {
            "date": "2024-01-01T00:00:00",
            "datatype": "PRCP",
            "station": "GHCND:USW00094728",
            "attributes": ",,W,2400",
            "value": 0.03
        },
        {
            "date": "2024-01-01T00:00:00",
