Source: 
* Obtain County FIPS: https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html 
* Get your API key: https://www.ncdc.noaa.gov/cdo-web/webservices/v2
* Choose your dataset: https://www.ncdc.noaa.gov/cdo-web/webservices/v2#datasets
* Choose your data categories: https://www.ncdc.noaa.gov/cdo-web/webservices/v2#dataCategories

In [114]:
# read county shapfile
import geopandas as gpd
import pandas as pd

# Fetch Climate Online Data
import requests

In [115]:
zip_url = 'https://www2.census.gov/geo/tiger/GENZ2023/shp/cb_2023_us_county_500k.zip'

# Read the shapefile from the ZIP URL directly using GeoPandas
county_data = gpd.read_file(zip_url)

# Create 'FIPS' column; we will use it for fetching stations by county. 
county_data['FIPS'] = county_data.STATEFP + county_data.COUNTYFP

# Show the first 5 rows
county_data.head()

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,GEOIDFQ,GEOID,NAME,NAMELSAD,STUSPS,STATE_NAME,LSAD,ALAND,AWATER,geometry,FIPS
0,1,3,161527,0500000US01003,1003,Baldwin,Baldwin County,AL,Alabama,6,4117725048,1132887203,"POLYGON ((-88.02858 30.22676, -88.02399 30.230...",1003
1,1,69,161560,0500000US01069,1069,Houston,Houston County,AL,Alabama,6,1501742235,4795415,"POLYGON ((-85.71209 31.19727, -85.70934 31.198...",1069
2,1,5,161528,0500000US01005,1005,Barbour,Barbour County,AL,Alabama,6,2292160151,50523213,"POLYGON ((-85.74803 31.61918, -85.74544 31.618...",1005
3,1,119,161585,0500000US01119,1119,Sumter,Sumter County,AL,Alabama,6,2340898915,24634880,"POLYGON ((-88.41492 32.36452, -88.41471 32.366...",1119
4,5,91,69166,0500000US05091,5091,Miller,Miller County,AR,Arkansas,6,1616257232,36848741,"POLYGON ((-94.04343 33.55158, -94.04332 33.552...",5091


In [125]:
# Set up the API key and base URL
api_key = 'Your-API-KEY'  # Replace it with your own token
base_url = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/data'

In [116]:
"""
This code shows all available datasets in Climate Data Online.
"""

URL = 'https://www.ncei.noaa.gov/cdo-web/api/v2/datasets'
# Make the API request
headers = {'token': api_key}
response = requests.get(URL, headers=headers)

# Check the response status and print the data
if response.status_code == 200:
    data = response.json()
else:
    print('Failed to retrieve data:', response.status_code)

pd.DataFrame(data['results'])

Unnamed: 0,uid,mindate,maxdate,name,datacoverage,id
0,gov.noaa.ncdc:C00861,1750-02-01,2024-05-07,Daily Summaries,1.0,GHCND
1,gov.noaa.ncdc:C00946,1750-02-01,2024-05-01,Global Summary of the Month,1.0,GSOM
2,gov.noaa.ncdc:C00947,1763-01-01,2024-01-01,Global Summary of the Year,1.0,GSOY
3,gov.noaa.ncdc:C00345,1991-06-05,2024-05-08,Weather Radar (Level II),0.95,NEXRAD2
4,gov.noaa.ncdc:C00708,1994-05-20,2024-05-06,Weather Radar (Level III),0.95,NEXRAD3
5,gov.noaa.ncdc:C00821,2010-01-01,2010-01-01,Normals Annual/Seasonal,1.0,NORMAL_ANN
6,gov.noaa.ncdc:C00823,2010-01-01,2010-12-31,Normals Daily,1.0,NORMAL_DLY
7,gov.noaa.ncdc:C00824,2010-01-01,2010-12-31,Normals Hourly,1.0,NORMAL_HLY
8,gov.noaa.ncdc:C00822,2010-01-01,2010-12-01,Normals Monthly,1.0,NORMAL_MLY
9,gov.noaa.ncdc:C00505,1970-05-12,2014-01-01,Precipitation 15 Minute,0.25,PRECIP_15


In [121]:
"""
This code shows all available data categories in Climate Data Online.
"""

URL = 'https://www.ncei.noaa.gov/cdo-web/api/v2/datacategories?limit=41'

# Make the API request
headers = {'token': api_key}
response = requests.get(URL, headers=headers)

# Check the response status and print the data
if response.status_code == 200:
    data = response.json()
else:
    print('Failed to retrieve data:', response.status_code)

pd.DataFrame(data['results'])

Unnamed: 0,name,id
0,Annual Agricultural,ANNAGR
1,Annual Degree Days,ANNDD
2,Annual Precipitation,ANNPRCP
3,Annual Temperature,ANNTEMP
4,Autumn Agricultural,AUAGR
5,Autumn Degree Days,AUDD
6,Autumn Precipitation,AUPRCP
7,Autumn Temperature,AUTEMP
8,Computed,COMP
9,Computed Agricultural,COMPAGR


In [128]:
# Set parameters for the API request
params = {
    'datasetid': 'GSOM',  # Example, Global Summary of the Month
    'locationid': 'FIPS:49017', # FIPS code for the specified county
    #'datatypeid': 'TMAX', # We can further specify what data type in the data category we want to use, TMAX  is max temperature.
    'datacategoryid': 'TEMP',
    'startdate': '2023-01-01',
    'enddate': '2023-01-31',
    'limit': 1000,  
    'units': 'standard'
}

# Make the API request
headers = {'token': api_key}
response = requests.get(base_url, headers=headers, params=params)

# Check the response status and process the data
if response.status_code == 200:
    # Load data into a DataFrame
    data = response.json()
    # Assuming the data of interest is in a list under a key, typically 'results'
    if 'results' in data:
        df = pd.DataFrame(data['results'])
        
    else:
        print("No results in data")
else:
    print("Failed to retrieve data:", response.status_code)


In [129]:
df

Unnamed: 0,date,datatype,station,attributes,value
0,2023-01-01T00:00:00,DP01,GHCND:US1UTGF0001,",N",10.00
1,2023-01-01T00:00:00,DP10,GHCND:US1UTGF0001,",N",7.00
2,2023-01-01T00:00:00,DP1X,GHCND:US1UTGF0001,",N",0.00
3,2023-01-01T00:00:00,DYXP,GHCND:US1UTGF0001,"+,,N",20230115.00
4,2023-01-01T00:00:00,EMXP,GHCND:US1UTGF0001,",,N,15,+",0.73
...,...,...,...,...,...
371,2023-01-01T00:00:00,TMIN,GHCND:USW00023159,",,,W",2.20
372,2023-01-01T00:00:00,WDF2,GHCND:USW00023159,",W",260.00
373,2023-01-01T00:00:00,WDF5,GHCND:USW00023159,",W",260.00
374,2023-01-01T00:00:00,WSF2,GHCND:USW00023159,",W",36.90


In [125]:
"""
We conduct a loop to get the dataframe for each county by looping different FIPS codes. 
The combined dataframes store all stations info we specified.
"""
dataframes = {} # a dictionary to store all dataframes
for fips in county_data['FIPS']:
    params = {
        'datasetid': 'GSOM',  
        'locationid': f'FIPS:{fips}', # Here 
        'datacategoryid': 'TEMP',
        'startdate': '2023-01-01',
        'enddate': '2023-01-31',
        'limit': 1000,  
        'units': 'standard'}

    # Make the API request
    headers = {'token': api_key}
    response = requests.get(base_url, headers=headers, params=params)

    # Check the response status and process the data
    if response.status_code == 200:
        # Load data into a DataFrame
        data = response.json()

        if 'results' in data:
            df = pd.DataFrame(data['results'])
            dataframes[fips] = df
            print(f"Fetch data of County {fips}")

        else:
            print(f"No results in County {fips}")
    else:
        print(f"Failed to retrieve data of County {fips}:", response.status_code)

        
# Concatenate all DataFrames from the dictionary into a single DataFrame
combined_df = pd.concat(dataframes.values(), ignore_index=True)

# Show the combined DataFrame
combined_df.head()