In [27]:
import json
import us 
from urllib.request import urlopen
from typing import List

In [45]:
def get_stations(states: List = None,
                 network_types: List = ["ASOS"]) -> List:
    """Returns list of station ids based on input states.

    Args:
        states (List, optional): Defaults to None, which is all states.
        network_types (List, optional): Defaults to ["ASOS"]. 

    Returns:
        List: station ids that can be used for querying climate data
    """
    if states is None:
        states = list(us.states.mapping('abbr', 'name').keys())
    
    networks = [f'{s}_{n}' for s in states for n in network_types]
    stations = []
    
    for network in networks:
        # Get metadata
        url = f"https://mesonet.agron.iastate.edu/geojson/network/{network}.geojson"
        data = urlopen(url)
        jdict = json.load(data)
        for site in jdict["features"]:
            stations.append(site["properties"]["sid"])
    
    return stations

24

In [53]:
def get_station_data(stations: List,
                    start_year: int,
                    start_month: int,
                    start_day: int,
                    end_year: int,
                    end_month: int,
                    end_day: int,
                    base_url = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?data=all&tz=Etc/UTC&format=comma&latlon=yes&") -> pd.DataFrame:
    """Pulls hourly climate data for list of given stations. Size of returned df might be enormous if query is too large.

    Args:
        stations (List): Stations returned from get_stations() or manually from IEM
        start_year (int): start year of data to pull
        start_month (int): start month of data to pull
        start_day (int): start day of data to pull
        end_year (int): end year of data to pull
        end_month (int): end month of data to pull
        end_day (int): end day of data to pull
        base_url (str, optional): Defaults to "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?data=all&tz=Etc/UTC&format=comma&latlon=yes&".

    Returns:
        pd.DataFrame: ['station', 'valid', 'lon', 'lat', 'tmpf', 'dwpf', 'relh', 'drct',
       'sknt', 'p01i', 'alti', 'mslp', 'vsby', 'gust', 'skyc1', 'skyc2',
       'skyc3', 'skyc4', 'skyl1', 'skyl2', 'skyl3', 'skyl4', 'wxcodes',
       'ice_accretion_1hr', 'ice_accretion_3hr', 'ice_accretion_6hr',
       'peak_wind_gust', 'peak_wind_drct', 'peak_wind_time', 'feel', 'metar']
    """
                    
    base_url += f"year1={start_year}&month1={start_month}&day1={start_day}&"
    base_url += f"year2={end_year}&month2={end_month}&day2={end_day}&"

    station_frame = pd.DataFrame()

    for station in stations:
        url = f"{service}&station={station}"
        df = pd.read_csv(url, skiprows=5)
        station_frame = station_frame.append(df)

    return station_frame


In [57]:
stations = get_stations(states = ["MD"])

maryland_data = get_station_data(stations, 2020, 9, 1, 2020, 9, 3)

maryland_data.head()

Index(['station', 'valid', 'lon', 'lat', 'tmpf', 'dwpf', 'relh', 'drct',
       'sknt', 'p01i', 'alti', 'mslp', 'vsby', 'gust', 'skyc1', 'skyc2',
       'skyc3', 'skyc4', 'skyl1', 'skyl2', 'skyl3', 'skyl4', 'wxcodes',
       'ice_accretion_1hr', 'ice_accretion_3hr', 'ice_accretion_6hr',
       'peak_wind_gust', 'peak_wind_drct', 'peak_wind_time', 'feel', 'metar'],
      dtype='object')