In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Species Distribution Modeling on Earth Engine

<table align="left">
 <td>
   <a href=https://colab.research.google.com/github/KMarkert/ee-workshop-esa2023/blob/main/notebooks/04_species_distribution_modeling_ee.ipynb>
       <img src=https://cloud.google.com/ml-engine/images/colab-logo-32px.png alt="Colab logo">
    Run in Colab
   </a>
 </td>
 <td>
   <a href=https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/KMarkert/ee-workshop-esa2023/main/notebooks/04_species_distribution_modeling_ee.ipynb>
       <img src=https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32 alt=\"Vertex AI logo\">
     Open in Vertex AI Workbench
   </a>
 </td>
</table>
<br/><br/><br/>

**Purpose:** The purpose of this lab is to demonstrate an ecological workflow for species distribution modeling using Earth Engine.  This will be use concepts from previous labs such as filtering, compositing, and image processing to create a. Futhermore, you will be introduced to exports for storing intermideate data and results as well as a classification workflow using a familiar ecological model, MaxEnt. At the completion of the lab, you will be able to walk through an end-to-end species distribition workflow using Earth Engine.

In [None]:
# If you are running this notebook in Colab, run this cell to install geemap. 
# This allows for you to use interactive maps with Earth Engine.

import os
import sys

# If on Vertex AI Workbench, then don't execute this code
IS_COLAB = "google.colab" in sys.modules
if not os.path.exists("/opt/deeplearning/metadata/env_version") and not os.getenv(
    "DL_ANACONDA_HOME"
):
    if IS_COLAB:
        !pip install geemap -q

In [None]:
from IPython.display import JSON
import math

import ee
import geemap
from geemap import colormaps as cmaps
import google

In [None]:
if IS_COLAB:
    print('Authenticating using Colab auth...')
    # Authenticate to populate Application Default Credentials in the Colab VM.
    google.colab.auth.authenticate_user()
    # Create credentials needed for accessing Earth Engine.
    credentials, auth_project_id = google.auth.default()
    PROJECT = input('Enter your Google Cloud Project ID: ')
    # Initialize Earth Engine.
    ee.Initialize(credentials,project=PROJECT)
    
else:
    print("Authenticating using Notebook auth...")
    if os.path.exists(ee.oauth.get_credentials_path()) is False:
        ee.Authenticate()
    else:
        print('\N{check mark} '
              'Previously created authentication credentials were found.')
    PROJECT=None
    ee.Initialize()

print('\N{check mark} Successfully initialized!')

## Tracking data

A key component to species distribution modeling is understanding where the species have been so that we can understand the environmental factors that may influence where a particular species lives and migrates to.

[Movebank](https://www.movebank.org/cms/movebank-main) is a great repository for accessing tracking data. For this example we will use the GPS tracking of bobcats and coyotes in northern Washington data from [Prugh et al., 2023](https://doi.org/10.1126/science.adf2472). This data has downaloded from Movebank been formatted for use with Earth Engine and ingested for use.

In [None]:
# access the bobcat/coyote tracking data from Earth Engine asset
tracking_data = ee.FeatureCollection('projects/ee-kmarkert-demo/assets/GPS_tracking_bobcats_coyotes_WA')

In [None]:
# do some formatting to add a timestamp column
tracking_data = (
    tracking_data
    .map(lambda x: ee.Feature(x).set('millis', ee.Date(ee.Feature(x).get('system:time_start')).millis()))
)

In [None]:
# filter out data for different species
coyotes = tracking_data.filter(
  ee.Filter.eq("individual-taxon-canonical-name", "Canis latrans")
)
bobcat = tracking_data.filter(
  ee.Filter.eq("individual-taxon-canonical-name", "Lynx rufus")
)

In [None]:
# display the locations
Map = geemap.Map()

Map.addLayer(coyotes, {"color":"red"}, "Coyotes")
Map.addLayer(bobcat, {"color":"blue"}, "Bobcat")

Map.centerObject(tracking_data, 10)

Map

In [None]:
#@title Choose your species

species = "Bobcat" #@param ["Bobcat", "Coyote"]


In [None]:
# get the correct distribution data based on species common name of interest
if species.lower() == "bobcat":
    species_locations = bobcat.randomColumn().limit(5000,'random')
elif species.lower() == "coyote":
    species_locations = coyotes.randomColumn().limit(5000,'random')
else:
    raise ValueError("value for `species` not recongnized, options are 'Bobcat' or 'Coyote'")

In [None]:
# get the min/max dates from the collection so that we can composite imagery for the valid dates
dates = (
    tracking_data
    .aggregate_array('system:time_start')
    .map(lambda x: ee.Date(x).format('YYYY-MM-01'))
    .distinct()
)

start_date = dates.reduce(ee.Reducer.min())
end_date = dates.reduce(ee.Reducer.max())


In [None]:
# helper function to convert qa bit image to flag
def extract_bits(image, start, end=None, new_name=None):
    """Function to conver qa bits to binary flag image

    args:
        image (ee.Image): qa image to extract bit from
        start (int): starting bit for flag
        end (int | None, optional): ending bit for flag, if None then will only use start bit. default = None
        new_name (str | None, optional): output name of resulting image, if None name will be {start}Bits. default = None

    returns:
        ee.Image: image with extract bits
    """

    newname = new_name if new_name is not None else f"{start}Bits"

    if (start == end) or (end is None):
        # perform a bit shift with bitwiseAnd
        return image.select([0], [newname]).bitwiseAnd(1 << start)
    else:
        # Compute the bits we need to extract.
        pattern = 0
        for i in range(start, end):
            pattern += int(math.pow(2, i))

        # Return a single band image of the extracted QA bits, giving the band
        # a new name.
        return image.select([0], [newname]).bitwiseAnd(pattern).rightShift(start)

def preprocess_viirs(image):
    """Custom QA masking method for VIIRS VNP09GA dataset"""
    cloudMask = extract_bits(
        image.select("QF1"), 2, end=3, new_name="cloud_qa"
    ).lt(1)
    shadowMask = extract_bits(
        image.select("QF2"), 3, new_name="shadow_qa"
    ).Not()
    snowMask = extract_bits(image.select("QF2"), 5, new_name="snow_qa").Not()
    sensorZenith = image.select("SensorZenith").abs().lt(6000)

    qa_mask = cloudMask.And(shadowMask).And(sensorZenith)

    ndvi = image.normalizedDifference(['I2', 'I1']).rename('NDVI')

    return (
        image.select('(M|I).*')
        .addBands(ndvi)
        .updateMask(qa_mask)
    )



In [None]:
# load in the VIIRS surface reflectance product
# and apply the preprocessing function
viirs = (
    ee.ImageCollection("NOAA/VIIRS/001/VNP09GA")
    .filterDate(start_date, "2023-08-01")
    .map(preprocess_viirs)
)


In [None]:
dem = ee.Image("NASA/NASADEM_HGT/001")
bioclim = ee.Image("WORLDCLIM/V1/BIO")

In [None]:
absence_area = species_locations.geometry(1e4).bounds(1e4).buffer(1e6).difference(right = species_locations.geometry(1e2).buffer(500), maxError = 1e3)

In [None]:
def sample_locations(date):
    """Function to sample locations from a date

    args:
        date (ee.String): string date in YYYY-MM-dd format

    returns:
        ee.FeatureCollection: sampled locations for a given date
    """
    start_date = ee.Date(date)
    end_date = start_date.advance(1, "month")

    tracks = species_locations.filter(
        ee.Filter.rangeContains('millis',start_date.millis(), end_date.millis())
    )

    presence = tracks.map(lambda x: ee.Feature(x).set('presence',1))

    absence = ee.FeatureCollection.randomPoints(absence_area, points=presence.size(), seed = start_date.millis(), maxError=1e3)
    absence = absence.map(lambda x: ee.Feature(x).set('presence',0))

    tracks = presence.merge(absence)

    sample_img = (
        viirs.filterDate(start_date, end_date).select("NDVI").mean()
        .addBands(dem.select('elevation'))
        .addBands(bioclim)
    )

    samples = sample_img.sampleRegions(
        collection=tracks,
        scale=1000,
        tileScale=16,
        geometries=True,
    )

    return samples

In [None]:
# Convert the ouptut to a Feature Collection and flatten
samples = ee.FeatureCollection(dates.map(sample_locations)).flatten()

## Export the intermediate results

Earth Engine has a limit to how long you can wait for one particular computation to finish, that is 5 minutes. To allow for longer running tasks that take a while to process, you can submit an export task that will run asyncronously. Here you will create an export task for the sampling so that can run a little longer and you can use the results later.

In [None]:
EXPORT_SAMPLES = False


if EXPORT_SAMPLES:
    
    if PROJECT is None:
        PROJECT = input('Enter your Google Cloud Project ID: ')
    
    sample_asset = f"projects/{PROJECT}/assets/{species}_classifier_samples"

    task = ee.batch.Export.table.toAsset(
        samples, 
        description=f'{species}_sample_export', 
        assetId=sample_asset)
    task.start()

This can take some time depending on complexity of the computation.

## Train the distribution model

To speed things up for demostration purposes, we will load in a pre-exported sample if you would like to use your samples you just exported at a later time, simply change the asset name to what was exported.

In [None]:
sample_fc = ee.FeatureCollection(f'projects/ee-kmarkert-demo/assets/{species}_classifier_samples"
# sample_fc = ee.FeatureCollection(sample_asset)

In [None]:
# get an image to use for prediction
img =  (
    viirs.filterDate("2019-04-01","2019-05-01").select("NDVI").mean().resample()
    .addBands(dem.select('elevation'))
    .addBands(bioclim).resample()
)

# get the band names from the prediction image
band_names = img.bandNames()

In [None]:
maxent_classifier = (
    ee.Classifier.amnhMaxent()
    .setOutputMode('PROBABILITY')
    .train(sample_fc,'presence', band_names)
)

classifier_explained = maxent_classifier.explain()

In [None]:
# print the classifier information
JSON(classifier_explained.getInfo(), root='Contributions')

## Apply model

In [None]:
prediction_bounds = sample_fc.reduceColumns(
    ee.Reducer.minMax().repeat(
        band_names.length()
    ), 
    band_names
)

In [None]:
# convert the min and max for each variable to an image
min_img = ee.Image.constant(prediction_bounds.get('min'))
max_img = ee.Image.constant(prediction_bounds.get('max'))

min_img = min_img.multiply(1.1)
max_img = max_img.multiply(1.1)

# threshold each variable and reduce to one band
min_mask = img.gte(min_img).reduce(ee.Reducer.min())
max_mask = img.lte(max_img).reduce(ee.Reducer.min())

# combine the min/max mask into one image
mask = min_mask.Or(max_mask)

In [None]:
# apply the MaxEnt model on the image and mask the output to the bounds from training
distribution_output = img.classify(maxent_classifier).updateMask(mask)

In [None]:
# display the locations
Map = geemap.Map()

Map.addLayer(distribution_output, {'bands':'probability',"palette":cmaps.get_palette("magma")}, f'{species} Distribution')

Map.addLayer(min_mask,{}, "min mask",False)
Map.addLayer(max_mask,{}, 'Max mask',False)
Map.addLayer(mask,{}, 'combined mask',False)


Map.addLayer(species_locations, {"color":"red"}, f"{species} tracks")

Map.centerObject(species_locations, 8)

Map