Goal: Retrieve the species checklist for a region using the eBird API.


In [1]:
from __future__ import annotations

In [2]:
from birding.ebird_api import get_ebird_api_key

ebird_api_key = get_ebird_api_key()

In [3]:
locations_of_interest = [
    "Punta Quepos, Puntarenas Province, Costa Rica",
    "Estero Damas, Puntarenas Province, Costa Rica",
    "Parque Nacional Manuel Antonio, Costa Rica",
    "Parque Nacional Carara, Costa Rica",
    "Tarcoles River, Puntarenas Province, Costa Rica",
    "Monteverde, Puntarenas Province, Costa Rica",
    "Reserva Bosque Nuboso Santa Elena, Tronadora, Guanacaste, Costa Rica",
    "Monteverde Cloud Forest Reserve, Monteverde, Puntarenas Province, Costa Rica",
    "Reserva Curi Cancha, Monteverde, Costa Rica",
    "La Fortuna, Alajuela Province, Costa Rica",
    "Parque Nacional Volcán Arenal, Costa Rica",
    "Arenal Volcano, Alajuela Province, San Carlos, Costa Rica",
    "Bogarin Trail, Alajuela Province, La Fortuna, Costa Rica",
    "Mistico Park Hanging Bridges, La Fortuna, Alajuela Province, Costa Rica",
    "Parque Nacional Volcán Poás, Costa Rica",
]

In [4]:
from birding.geocoding import retrieve_geocode
from birding.sqlite_cache import init_db

init_db()
for location in locations_of_interest:
    print(f"\n\nRetrieving geocode for location '{location}'...")
    data = retrieve_geocode(query=location)
    if data is None:
        print(f"Could not find geocode for location '{location}'.")
    else:
        print(data)



Retrieving geocode for location 'Punta Quepos, Puntarenas Province, Costa Rica'...
Geocode for query 'Punta Quepos, Puntarenas Province, Costa Rica' was already cached.
{'place_id': 287637760, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'node', 'osm_id': 5207350899, 'lat': '9.3988718', 'lon': '-84.1719522', 'class': 'natural', 'type': 'peak', 'place_rank': 18, 'importance': 0.16004359731441214, 'addresstype': 'peak', 'name': 'Punta Quepos', 'display_name': 'Punta Quepos, Quepos, Puntarenas, 60601, Costa Rica', 'boundingbox': ['9.3988218', '9.3989218', '-84.1720022', '-84.1719022']}


Retrieving geocode for location 'Estero Damas, Puntarenas Province, Costa Rica'...
Geocode for query 'Estero Damas, Puntarenas Province, Costa Rica' was already cached.
{'place_id': 286614497, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'way', 'osm_id': 410673410, 'lat': '9.4616350', 'lon': '-84.217255

In [5]:
from birding.primitives import Coordinate

raw_location_data = {loc: retrieve_geocode(query=loc) for loc in locations_of_interest}
location_data = {loc: data for loc, data in raw_location_data.items() if data is not None}
assert len(raw_location_data) == len(location_data), "A location's geocode data was None!"

location_coords = {loc: Coordinate.from_geocode_data(data) for loc, data in location_data.items()}
location_coords

Geocode for query 'Punta Quepos, Puntarenas Province, Costa Rica' was already cached.
Geocode for query 'Estero Damas, Puntarenas Province, Costa Rica' was already cached.
Geocode for query 'Parque Nacional Manuel Antonio, Costa Rica' was already cached.
Geocode for query 'Parque Nacional Carara, Costa Rica' was already cached.
Geocode for query 'Tarcoles River, Puntarenas Province, Costa Rica' was already cached.
Geocode for query 'Monteverde, Puntarenas Province, Costa Rica' was already cached.
Geocode for query 'Reserva Bosque Nuboso Santa Elena, Tronadora, Guanacaste, Costa Rica' was already cached.
Geocode for query 'Monteverde Cloud Forest Reserve, Monteverde, Puntarenas Province, Costa Rica' was already cached.
Geocode for query 'Reserva Curi Cancha, Monteverde, Costa Rica' was already cached.
Geocode for query 'La Fortuna, Alajuela Province, Costa Rica' was already cached.
Geocode for query 'Parque Nacional Volcán Arenal, Costa Rica' was already cached.
Geocode for query 'Arena

{'Punta Quepos, Puntarenas Province, Costa Rica': Coordinate(latitude=9.3988718, longitude=-84.1719522),
 'Estero Damas, Puntarenas Province, Costa Rica': Coordinate(latitude=9.461635, longitude=-84.2172554),
 'Parque Nacional Manuel Antonio, Costa Rica': Coordinate(latitude=9.0605046, longitude=-84.2449917),
 'Parque Nacional Carara, Costa Rica': Coordinate(latitude=9.7900999, longitude=-84.5709133),
 'Tarcoles River, Puntarenas Province, Costa Rica': Coordinate(latitude=9.8096317, longitude=-84.5909112),
 'Monteverde, Puntarenas Province, Costa Rica': Coordinate(latitude=10.2911515, longitude=-84.8136439),
 'Reserva Bosque Nuboso Santa Elena, Tronadora, Guanacaste, Costa Rica': Coordinate(latitude=10.3433135, longitude=-84.7911521),
 'Monteverde Cloud Forest Reserve, Monteverde, Puntarenas Province, Costa Rica': Coordinate(latitude=10.3036512, longitude=-84.7877117),
 'Reserva Curi Cancha, Monteverde, Costa Rica': Coordinate(latitude=10.3064418, longitude=-84.8076789),
 'La Fortuna, 

In [6]:
from birding.ebird_api import retrieve_nearby_hotspots

for loc, coord in location_coords.items():
    print(f"\n\nRetrieving nearby hotspots for location '{loc}'...")
    hotspots = retrieve_nearby_hotspots(ebird_api_key, coord=coord)
    print(f"Found {len(hotspots)} nearby hotspots.")

    if len(hotspots) < 5:
        print("Location had less than 5 nearby hotspots!")
        more_hotspots = retrieve_nearby_hotspots(ebird_api_key, coord=coord, distance_km=50)
        print(f"Found {len(more_hotspots)} hotspots within 50 km.")



Retrieving nearby hotspots for location 'Punta Quepos, Puntarenas Province, Costa Rica'...
Nearby hotspots for (9.3989, -84.1720) were already cached.
Found 51 nearby hotspots.


Retrieving nearby hotspots for location 'Estero Damas, Puntarenas Province, Costa Rica'...
Nearby hotspots for (9.4616, -84.2173) were already cached.
Found 52 nearby hotspots.


Retrieving nearby hotspots for location 'Parque Nacional Manuel Antonio, Costa Rica'...
Nearby hotspots for (9.0605, -84.2450) were already cached.
Found 2 nearby hotspots.
Location had less than 5 nearby hotspots!
Nearby hotspots for (9.0605, -84.2450) were already cached.
Found 52 hotspots within 50 km.


Retrieving nearby hotspots for location 'Parque Nacional Carara, Costa Rica'...
Nearby hotspots for (9.7901, -84.5709) were already cached.
Found 76 nearby hotspots.


Retrieving nearby hotspots for location 'Tarcoles River, Puntarenas Province, Costa Rica'...
Nearby hotspots for (9.8096, -84.5909) were already cached.
Found 68 n

In [7]:
spots_per_loc = {
    loc: retrieve_nearby_hotspots(ebird_api_key, coord) for loc, coord in location_coords.items()
}

use_50_km = ["Parque Nacional Manuel Antonio, Costa Rica"]
for loc in use_50_km:
    spots_per_loc[loc] = retrieve_nearby_hotspots(ebird_api_key, location_coords[loc], 50)

spots_per_loc

Nearby hotspots for (9.3989, -84.1720) were already cached.
Nearby hotspots for (9.4616, -84.2173) were already cached.
Nearby hotspots for (9.0605, -84.2450) were already cached.
Nearby hotspots for (9.7901, -84.5709) were already cached.
Nearby hotspots for (9.8096, -84.5909) were already cached.
Nearby hotspots for (10.2912, -84.8136) were already cached.
Nearby hotspots for (10.3433, -84.7912) were already cached.
Nearby hotspots for (10.3037, -84.7877) were already cached.
Nearby hotspots for (10.3064, -84.8077) were already cached.
Nearby hotspots for (10.4717, -84.6444) were already cached.
Nearby hotspots for (10.4288, -84.7032) were already cached.
Nearby hotspots for (10.4621, -84.7034) were already cached.
Nearby hotspots for (10.4703, -84.6508) were already cached.
Nearby hotspots for (10.4880, -84.7538) were already cached.
Nearby hotspots for (10.2175, -84.2292) were already cached.
Nearby hotspots for (9.0605, -84.2450) were already cached.


{'Punta Quepos, Puntarenas Province, Costa Rica': [{'locId': 'L2102226',
   'locName': 'Arenas del Mar Resort (reserve, hotel grounds and drive way)',
   'countryCode': 'CR',
   'subnational1Code': 'CR-P',
   'lat': 9.3983113,
   'lng': -84.1629338,
   'latestObsDt': '2025-10-12 15:09',
   'numSpeciesAllTime': 191},
  {'locId': 'L3300769',
   'locName': 'Barrio Lourdes (town)',
   'countryCode': 'CR',
   'subnational1Code': 'CR-P',
   'lat': 9.4505872,
   'lng': -84.1271296,
   'latestObsDt': '2025-12-15 06:10',
   'numSpeciesAllTime': 101},
  {'locId': 'L2873762',
   'locName': 'Biesanz beach',
   'countryCode': 'CR',
   'subnational1Code': 'CR-P',
   'lat': 9.401709,
   'lng': -84.1675043,
   'latestObsDt': '2025-12-09 15:40',
   'numSpeciesAllTime': 211},
  {'locId': 'L3308537',
   'locName': 'Buenavista Villas/Tulemar',
   'countryCode': 'CR',
   'subnational1Code': 'CR-P',
   'lat': 9.4078746,
   'lng': -84.159738,
   'latestObsDt': '2025-12-04 06:48',
   'numSpeciesAllTime': 241}

In [8]:
all_hotspot_keys = set()
for hotspots in spots_per_loc.values():
    for spot_data in hotspots:
        all_hotspot_keys.update(spot_data.keys())

all_hotspot_keys

{'countryCode',
 'lat',
 'latestObsDt',
 'lng',
 'locId',
 'locName',
 'numSpeciesAllTime',
 'subnational1Code'}

In [9]:
from birding.primitives import EBirdHotspot

hotspots_per_location = {
    loc: {EBirdHotspot.from_json(hotspot_data) for hotspot_data in hotspots}
    for loc, hotspots in spots_per_loc.items()
}

In [10]:
hotspots_per_location

{'Punta Quepos, Puntarenas Province, Costa Rica': {EBirdHotspot(location=EBirdLocation(id='L10206898', name='Hotel Makanda by the Sea (restricted access)', coord=Coordinate(latitude=9.4029964, longitude=-84.1626817), country_code='CR', subnat1_code='CR-P', subnat1_name=None, subnat2_code=None, subnat2_name=None), all_time_species=100),
  EBirdHotspot(location=EBirdLocation(id='L10478874', name='Parque Nahomi', coord=Coordinate(latitude=9.4224968, longitude=-84.1693497), country_code='CR', subnat1_code='CR-P', subnat1_name=None, subnat2_code=None, subnat2_name=None), all_time_species=133),
  EBirdHotspot(location=EBirdLocation(id='L11431955', name='Portasol living Birder House', coord=Coordinate(latitude=9.3731784, longitude=-83.9622937), country_code='CR', subnat1_code='CR-P', subnat1_name=None, subnat2_code=None, subnat2_name=None), all_time_species=247),
  EBirdHotspot(location=EBirdLocation(id='L1351111', name='Hotel Costa Verde', coord=Coordinate(latitude=9.3986344, longitude=-84.1

In [11]:
from geopy.distance import geodesic

# Find the nearest hotspot to each location
nearest_hotspots: dict[str, EBirdHotspot] = {}
for loc, hotspots in hotspots_per_location.items():
    loc_coord = location_coords[loc]
    nearest = min(hotspots, key=lambda hs: geodesic(loc_coord, hs.location.coord).mi)
    nearest_hotspots[loc] = nearest

nearest_hotspots

{'Punta Quepos, Puntarenas Province, Costa Rica': EBirdHotspot(location=EBirdLocation(id='L1848099', name='Parador Resort and Spa', coord=Coordinate(latitude=9.397801, longitude=-84.169388), country_code='CR', subnat1_code='CR-P', subnat1_name=None, subnat2_code=None, subnat2_name=None), all_time_species=222),
 'Estero Damas, Puntarenas Province, Costa Rica': EBirdHotspot(location=EBirdLocation(id='L509908', name='Manglares Damas', coord=Coordinate(latitude=9.4619308, longitude=-84.2156982), country_code='CR', subnat1_code='CR-P', subnat1_name=None, subnat2_code=None, subnat2_name=None), all_time_species=247),
 'Parque Nacional Manuel Antonio, Costa Rica': EBirdHotspot(location=EBirdLocation(id='L5626588', name='Seamount Furuno (4-1)', coord=Coordinate(latitude=8.8833213, longitude=-84.1753055), country_code='CR', subnat1_code='CR-P', subnat1_name=None, subnat2_code=None, subnat2_name=None), all_time_species=19),
 'Parque Nacional Carara, Costa Rica': EBirdHotspot(location=EBirdLocatio

In [15]:
# For now, only retrieve species lists for the nearest hotspot to each location of interest
from birding.ebird_api import retrieve_species_list

for hotspot in nearest_hotspots.values():
    print(f"\n\nRetrieving bird species list for hotspot '{hotspot.location.name}'...")

    species_list = retrieve_species_list(ebird_api_key, area_code=hotspot.location.id)
    print(f"'{hotspot.location.name}' has {len(species_list)} nearby species.")



Retrieving bird species list for hotspot 'Parador Resort and Spa'...
Bird species list for 'L1848099' was already cached.
'Parador Resort and Spa' has 223 nearby species.


Retrieving bird species list for hotspot 'Manglares Damas'...
Bird species list for 'L509908' was already cached.
'Manglares Damas' has 247 nearby species.


Retrieving bird species list for hotspot 'Seamount Furuno (4-1)'...
Bird species list for 'L5626588' was already cached.
'Seamount Furuno (4-1)' has 19 nearby species.


Retrieving bird species list for hotspot 'PN Carara (Localidad General)--historical please use other hotspots'...
Bird species list for 'L447761' was already cached.
'PN Carara (Localidad General)--historical please use other hotspots' has 430 nearby species.


Retrieving bird species list for hotspot 'Ponds north of Rio Tarcoles Bridge'...
Bird species list for 'L673538' was already cached.
'Ponds north of Rio Tarcoles Bridge' has 236 nearby species.


Retrieving bird species list for hotspo

In [None]:
# Retrieve species lists for all subnational and national areas represented in the current data

all_species_lists: dict[str, list[str]] = {}
invalid_area_codes = {"CR-P", "CR-A", "CR-G", "CR-H"}

for loc, hotspots in hotspots_per_location.items():
    print(f"Retrieving species lists for subnational regions around '{loc}'...")
    for hotspot in hotspots:
        hs_loc = hotspot.location
        for possible_area_code in {hs_loc.subnat1_code, hs_loc.subnat2_code, hs_loc.country_code}:
            if possible_area_code is None or possible_area_code in invalid_area_codes:
                continue

            new_list = retrieve_species_list(ebird_api_key, possible_area_code)
            all_species_lists[possible_area_code] = new_list

species_union = set()
for species_list in all_species_lists.values():
    species_union.update(species_list)

print(f"Current species lists include {len(species_union)} total distinct species.")

Retrieving species lists for subnational regions around 'Punta Quepos, Puntarenas Province, Costa Rica'...
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR-SJ' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cached.
Bird species list for 'CR' was already cache

In [22]:
# Finally, retrieve species lists for all hotspots near our locations of interest
invalid_area_codes = {"CR-P", "CR-A", "CR-G", "CR-H"}

for loc, hotspots in hotspots_per_location.items():
    print(f"Retrieving species lists for hotspots near '{loc}'...")
    for hotspot in hotspots:
        hotspot_species = retrieve_species_list(ebird_api_key, hotspot.location.id)
        all_species_lists[hotspot.location.id] = hotspot_species

species_union = set()
for species_list in all_species_lists.values():
    species_union.update(species_list)

print(f"Current species lists include {len(species_union)} total distinct species.")

Retrieving species lists for hotspots near 'Punta Quepos, Puntarenas Province, Costa Rica'...
Bird species list for 'L3058980' was already cached.
Bird species list for 'L3308553' was already cached.
Bird species list for 'L3527795' was already cached.
Bird species list for 'L2102226' was already cached.
Bird species list for 'L3942621' was already cached.
Bird species list for 'L1848099' was already cached.
Bird species list for 'L4829224' was already cached.
Bird species list for 'L7928552' was already cached.
Bird species list for 'L17897693' was already cached.
Bird species list for 'L2132992' was already cached.
Bird species list for 'L5218074' was already cached.
Bird species list for 'L16052695' was already cached.
Bird species list for 'L8237030' was already cached.
Bird species list for 'L3871374' was already cached.
Bird species list for 'L3881190' was already cached.
Bird species list for 'L3300769' was already cached.
Bird species list for 'L2480432' was already cached.
Bir

In [1]:
import pooch

RESOLVE_ECOREGIONS_2017_URL = "https://storage.googleapis.com/teow2016/Ecoregions2017.zip"

fetcher = pooch.create(
    path=pooch.os_cache("birding_datasets"),
    base_url="",
    registry={"Ecoregions2017.zip": None},
    urls={"Ecoregions2017.zip": RESOLVE_ECOREGIONS_2017_URL},
)

zip_path = fetcher.fetch("Ecoregions2017.zip", downloader=pooch.HTTPDownloader(progressbar=True))
zip_path

'/home/benned/.cache/birding_datasets/Ecoregions2017.zip'

In [None]:
import geopandas as gpd

ecoregions = gpd.read_file(f"zip://{zip_path}!Ecoregions2017.shp")
ecoregions = ecoregions.to_crs("EPSG:4326")  # Use (long, lat) in degrees

expected_fields = ["BIOME_NAME", "BIOME_NUM", "ECO_ID", "ECO_NAME", "LICENSE", "REALM", "geometry"]
missing = [c for c in expected_fields if c not in ecoregions.columns]
if missing:
    raise KeyError(
        f"Missing expected columns in the RESOLVE dataset: {missing}. "
        f"Available: {sorted(ecoregions.columns)}"
    )

In [None]:
@dataclass(frozen=True)
class SurroundingSpecies:
    """A collection of species information in the regions containing a location."""

    location: EBirdLocation
    location_species: list[str]
    subnational2_species: list[str] | None
    subnational1_species: list[str] | None
    country_species: list[str]

    def __str__(self) -> str:
        """Return a human-readable string representation of the species collection."""
        species_str = f"Location ({self.location.name}) Species: {len(self.location_species)}\n"

        if self.subnational2_species is not None:
            subnat2_name = (
                self.location.subnational2_name
                if self.location.subnational2_name is not None
                else self.location.subnational2_code
            )
            n_subnat2_species = len(self.subnational2_species)
            species_str += f"Subnational Region 2 ({subnat2_name}) Species: {n_subnat2_species}\n"

        if self.subnational1_species is not None:
            subnat1_name = (
                self.location.subnational1_name
                if self.location.subnational1_name is not None
                else self.location.subnational1_code
            )
            n_subnat1_species = len(self.subnational1_species)
            species_str += f"Subnational Region 1 ({subnat1_name}) Species: {n_subnat1_species}\n"

        country_code = self.location.country_code
        species_str += f"Country ({country_code}) Species: {len(self.country_species)}"

        return species_str

In [33]:
species = SurroundingSpecies.from_location(nearest_hotspot.location)
species

SurroundingSpecies(location=EBirdLocation(id='L24184797', name='Kennedy Plaza, Providence', latitude=41.825473, longitude=-71.412127, country_code='US', subnational1_code='US-RI', subnational1_name=None, subnational2_code='US-RI-007', subnational2_name=None), location_species=['cangoo', 'mutswa', 'mallar3', 'ambduc', 'wiltur', 'rocpig', 'moudov', 'chiswi', 'ribgul', 'amhgul1', 'doccor', 'coohaw', 'rebwoo', 'dowwoo', 'perfal', 'easpho', 'blujay', 'amecro', 'fiscro', 'bkcchi', 'tuftit', 'gockin', 'whbnut', 'carwre', 'eursta', 'grycat', 'normoc', 'amerob', 'cedwax', 'houspa', 'houfin', 'amegfi', 'chispa', 'fiespa', 'sonspa', 'bnhcow', 'comgra', 'norcar'], subnational2_species=['snogoo', 'rosgoo', 'gragoo', 'x00776', 'gwfgoo', 'pifgoo', 'brant', 'bargoo', 'cacgoo1', 'cangoo', 'x00758', 'x00759', 'mutswa', 'truswa', 'tunswa', 'musduc', 'wooduc', 'buwtea', 'cintea', 'norsho', 'gadwal', 'eurwig', 'amewig', 'x00724', 'mallar3', 'ambduc', 'x00004', 'norpin', 'x00628', 'gnwtea', 'canvas', 'redhe

In [34]:
print(species)

Location (Kennedy Plaza, Providence) Species: 38
Subnational Region 2 (US-RI-007) Species: 328
Subnational Region 1 (US-RI) Species: 472
Country (US) Species: 1775
