In [5]:
import os
import json
import pandas as pd
from dotenv import load_dotenv
dotenv_local_path = '../.env'
load_dotenv(dotenv_path=dotenv_local_path, verbose=True)

True

# Explore NOAA API Using requests

In [6]:
import requests

In [11]:
# Get list of all available datasets
url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/datasets"
headers = {"token":os.environ.get('NOAA_Token')}
response = requests.get(url, headers=headers)
data = response.json()

file_path = './data/datasets_all.json'

with open(file_path, 'w') as f:
    json.dump(data, f, indent=4, sort_keys=True)
    
"""
The results contain the Daily Summaries dataset:
{
"datacoverage": 1,
"id": "GHCND",
"maxdate": "2020-11-22",
"mindate": "1763-01-01",
"name": "Daily Summaries",
"uid": "gov.noaa.ncdc:C00861"
}

Daily Summaries seems interesting, so let's focus on it for our analysis.
"""

'\nThe results contain the Daily Summaries dataset:\n{\n"datacoverage": 1,\n"id": "GHCND",\n"maxdate": "2020-11-22",\n"mindate": "1763-01-01",\n"name": "Daily Summaries",\n"uid": "gov.noaa.ncdc:C00861"\n}\n\nDaily Summaries seems interesting, so let\'s focus on it for our analysis.\n'

In [12]:
# Get all locations in the world
url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/locations?locations&limit=10"
headers = {"token":os.environ.get('NOAA_Token')}
response = requests.get(url, headers=headers)
data = response.json()

file_path = './data/locations_all.json'

with open(file_path, 'w') as f:
    json.dump(data, f, indent=4, sort_keys=True)
    
"""
The results contain the following structure:
{
"datacoverage": 0.9958,
"id": "CITY:AE000001",
"maxdate": "2020-11-18",
"mindate": "1983-01-01",
"name": "Abu Dhabi, AE"
}

The id key will likely be important for accessing data for a particular city.
"""

'\nThe results contain the following structure:\n{\n"datacoverage": 0.9958,\n"id": "CITY:AE000001",\n"maxdate": "2020-11-18",\n"mindate": "1983-01-01",\n"name": "Abu Dhabi, AE"\n}\n\nThe id key will likely be important for accessing data for a particular city.\n'

In [14]:
# Get all locations in the US
url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/locations?locationcategoryid=ST&limit=52"
headers = {"token":os.environ.get('NOAA_Token')}
response = requests.get(url, headers=headers)
data = response.json()

file_path = './data/locations_us.json'

with open(file_path, 'w') as f:
    json.dump(data, f, indent=4, sort_keys=True)

"""
The results contain the CA location:
{
"datacoverage": 1,
"id": "FIPS:06",
"maxdate": "2020-11-22",
"mindate": "1850-10-01",
"name": "California"
}
"""

'\nThe results contain the CA location:\n{\n"datacoverage": 1,\n"id": "FIPS:06",\n"maxdate": "2020-11-22",\n"mindate": "1850-10-01",\n"name": "California"\n}\n'

In [22]:
# Get list of all available stations for CA location
"""
Note the use of an offset. This is required because each request can handle a maximum of 1000 results.
The metadata key in the request tells us how many results there are:
"metadata": {
    "resultset": {
    "count": 4346,
    "limit": 1000,
    "offset": 1
    }
}

We need 5 files because we have 4346 results.

Also note that the url contains a parameter for sorting the results. We used descending just as an example.
"""

offset_counter=1
file_counter=0

while file_counter < 5:
    url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/stations?locationid=FIPS:06&sortfield=name&sortorder=desc&limit=1000" +"&offset=" + str(offset_counter)
    headers = {"token":os.environ.get('NOAA_Token')}
    response = requests.get(url, headers=headers)
    data = response.json()
    
    file_path = f'./data/stations_ca_{file_counter}.json'
    
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=4, sort_keys=True)
        
    offset_counter+=1000
    file_counter+=1

"""
The results contain the OAKLAND NORTH CALIFORNIA location:
{
"datacoverage": 0.9704,
"elevation": 427.6,
"elevationUnit": "METERS",
"id": "GHCND:USR0000COKN",
"latitude": 37.865,
"longitude": -122.2208,
"maxdate": "2020-11-22",
"mindate": "1992-06-08",
"name": "OAKLAND NORTH CALIFORNIA, CA US"
}

We will use Oakland because it contains a relatively long dataset and is still active today.
"""

'\nThe results contain the OAKLAND NORTH CALIFORNIA location:\n{\n"datacoverage": 0.9704,\n"elevation": 427.6,\n"elevationUnit": "METERS",\n"id": "GHCND:USR0000COKN",\n"latitude": 37.865,\n"longitude": -122.2208,\n"maxdate": "2020-11-22",\n"mindate": "1992-06-08",\n"name": "OAKLAND NORTH CALIFORNIA, CA US"\n}\n\nWe will use Oakland because it contains a relatively long dataset and is still active today.\n'

In [25]:
# Get list of all available stations for CA location that support the Daily Summaries dataset
# and are active today

"""
Since there are only 255 results, we don't need a loop:
"metadata": {
"resultset": {
"count": 255,
"limit": 1000,
"offset": 1
    }
}
"""

url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/stations?locationid=FIPS:06&sortfield=name&sortorder=desc&datasetid=GHCND&startdate=2020-11-22&enddate=2000-01-01&limit=1000"
headers = {"token":os.environ.get('NOAA_Token')}
response = requests.get(url, headers=headers)
data = response.json()

file_path = f'./data/stations_ca_ghcnd.json'

with open(file_path, 'w') as f:
    json.dump(data, f, indent=4, sort_keys=True)

"""
The results contain the OAKLAND NORTH CALIFORNIA station:
{
"datacoverage": 0.9704,
"elevation": 427.6,
"elevationUnit": "METERS",
"id": "GHCND:USR0000COKN",
"latitude": 37.865,
"longitude": -122.2208,
"maxdate": "2020-11-22",
"mindate": "1992-06-08",
"name": "OAKLAND NORTH CALIFORNIA, CA US"
}
"""

'\nThe results contain the OAKLAND NORTH CALIFORNIA station:\n{\n"datacoverage": 0.9704,\n"elevation": 427.6,\n"elevationUnit": "METERS",\n"id": "GHCND:USR0000COKN",\n"latitude": 37.865,\n"longitude": -122.2208,\n"maxdate": "2020-11-22",\n"mindate": "1992-06-08",\n"name": "OAKLAND NORTH CALIFORNIA, CA US"\n}\n'