In [None]:
# Use !pip install ____ to install missing modules as needed

# Install pygbif for the GBIF download
!pip install pygbif

# Import the below libraries:
# To create reproducible file paths
from pathlib import Path

# To login to GBIF
from getpass import getpass

# For the downloads
import csv
import os
import requests
import tempfile
import time
import zipfile

# To work with different types of data
from glob import glob # To combine data arrays
from shapely.geometry import Point # To work with point geometries
import earthpy # To work with "canned" data
import geopandas as gpd # To make GeoDataFrames/work with vector data
import pandas as pd # To work with dataframes
import pygbif.occurrences as occ
import pygbif.species as species
import numpy as np # To work with arrays
import rasterio # To read raster data
import rioxarray as rxr # To work with raster data
import xarray as xr # To work with data arrays
 
# For interactive plotting
import matplotlib.pyplot as plt
import hvplot.pandas 
import hvplot.xarray
import holoviews as hv



In [11]:
# Create the main project directory to store data in
pm_light_pollution_dir = Path("polyphemus-moth-light-pollution-dir")
pm_light_pollution_dir.mkdir(parents=True, exist_ok=True)

In [12]:
# Login into GBIF
# Securely request and store GBIF username, password, and email address

reset_credentials = True

if (not ('GBIF_USER'  in os.environ)) or reset:
    os.environ['GBIF_USER'] = input('GBIF username:')

if (not ('GBIF_PWD'  in os.environ)) or reset:
    os.environ['GBIF_PWD'] = getpass('GBIF password:')
    
if (not ('GBIF_EMAIL'  in os.environ)) or reset:
    os.environ['GBIF_EMAIL'] = input('GBIF email:')

In [13]:
# Conduct a search query for the polyphemus moth
species_info = species.name_lookup('Antheraea polyphemus', rank='SPECIES')

# Obtain the first result
first_result = species_info['results'][0]

# List the species key
species_key = 1866570

# Print the results
first_result['species'], species_key

('Antheraea polyphemus', 1866570)

In [14]:
# Only download the data once
gbif_polyphemus_pattern = os.path.join(str(pm_light_pollution_dir), "**", "occurrence.txt")
if not glob(gbif_polyphemus_pattern):
    # Only submit one request
    if not 'GBIF_DOWNLOAD_KEY' in os.environ:
        # Submit query to GBIF
        gbif_query = occ.download([
        f"speciesKey = {species_key}",
        "hasCoordinate = True"
    ],
    format="DWCA"
)
    # Take the first result
    os.environ['GBIF_DOWNLOAD_KEY'] = gbif_query[0]

    # Wait for the download to build
    dld_key = os.environ['GBIF_DOWNLOAD_KEY']
    wait = occ.download_meta(dld_key)['status']
    while not wait=='SUCCEEDED':
        wait = occ.download_meta(dld_key)['status']
        time.sleep(5)

    # Download the GBIF data
    dld_info = occ.download_get(
    os.environ['GBIF_DOWNLOAD_KEY'], 
    path=pm_light_pollution_dir)
    dld_path = dld_info['path']

    # Unzip the GBIF data
    with zipfile.ZipFile(dld_path) as dld_zip:
        dld_zip.extractall(path=pm_light_pollution_dir)
                
    # Clean up the GBIF .zip file
    os.remove(dld_path)

    # Establish the path
    gbif_polyphemus_path = glob(gbif_polyphemus_pattern, recursive=True)[0]

INFO:Your download key is 0046453-251025141854904
INFO:Download file size: 14665977 bytes
INFO:On disk at polyphemus-moth-light-pollution-dir/0046453-251025141854904.zip


In [15]:
# Load the GBIF data
gbif_polyphemus_df = pd.read_csv(
    gbif_polyphemus_path,
    delimiter='\t',
        index_col='gbifID',
        usecols=['gbifID', 'eventDate','decimalLatitude', 'decimalLongitude']
)
gbif_polyphemus_df

Unnamed: 0_level_0,eventDate,decimalLatitude,decimalLongitude
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
923926047,2014-07-01T19:26:19,30.076778,-95.495178
923925493,2014-07-01T04:53:34,43.852963,-72.588778
923925071,2014-06-30T13:58:01,43.599196,-72.517297
923924387,2014-06-28T22:35:38,43.852963,-72.588778
923923819,2014-06-26T22:01:11,35.544423,-82.370604
...,...,...,...
1024184645,2014-07-02,44.285028,-72.573667
1024184438,2014-07-04T15:03:36,43.562679,-72.496800
1024182752,2014-06-11T12:38:08,46.137070,-89.891541
1024180480,2014-04-01T19:47:39,30.237556,-97.836428


In [16]:
# Extract the month
gbif_polyphemus_df['eventDate'] = pd.to_datetime(gbif_polyphemus_df['eventDate'], errors='coerce')
gbif_polyphemus_df['month'] = gbif_polyphemus_df['eventDate'].dt.month

gbif_polyphemus_df.head()

Unnamed: 0_level_0,eventDate,decimalLatitude,decimalLongitude,month
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
923926047,2014-07-01 19:26:19,30.076778,-95.495178,7.0
923925493,2014-07-01 04:53:34,43.852963,-72.588778,7.0
923925071,2014-06-30 13:58:01,43.599196,-72.517297,6.0
923924387,2014-06-28 22:35:38,43.852963,-72.588778,6.0
923923819,2014-06-26 22:01:11,35.544423,-82.370604,6.0


In [17]:
# Convert the GBIF data to a GeoDataFrame
# Input the longitude and latitude column names
gbif_gdf = (
    gpd.GeoDataFrame(
        gbif_polyphemus_df, 
        geometry=gpd.points_from_xy(
            gbif_polyphemus_df.decimalLongitude, 
            gbif_polyphemus_df.decimalLatitude), 
        crs="EPSG:4326")
    # Specify the desired columns
    [['month', 'geometry', 'decimalLatitude', 'decimalLongitude']]
)
gbif_gdf

Unnamed: 0_level_0,month,geometry,decimalLatitude,decimalLongitude
gbifID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
923926047,7.0,POINT (-95.49518 30.07678),30.076778,-95.495178
923925493,7.0,POINT (-72.58878 43.85296),43.852963,-72.588778
923925071,6.0,POINT (-72.5173 43.5992),43.599196,-72.517297
923924387,6.0,POINT (-72.58878 43.85296),43.852963,-72.588778
923923819,6.0,POINT (-82.3706 35.54442),35.544423,-82.370604
...,...,...,...,...
1024184645,,POINT (-72.57367 44.28503),44.285028,-72.573667
1024184438,7.0,POINT (-72.4968 43.56268),43.562679,-72.496800
1024182752,6.0,POINT (-89.89154 46.13707),46.137070,-89.891541
1024180480,4.0,POINT (-97.83643 30.23756),30.237556,-97.836428
