In [1]:
import pandas
import ee  # Google Earth Engine API
import folium
import functools
import numpy as np
import time

In [2]:
ee.Initialize()

## Load sample data
To start out, you will need a CSV file with one row for each sample. It must have columns for the latitude, longitude, and sample identifier. We will use the coordinates to get more data about each sample.


In [3]:
csv_path = 'Final_metadata_05312019.csv'

# Read the relevant columns from the CSV into a dataframe
samples = pandas.read_csv(csv_path, usecols=['MatchName', 'Longitude', 'Latitude'])

# Display the shape of the data we read in and its first few rows
print("Data shape:", samples.shape)
print(samples.head())

# Define a Feature for each 100-meter-radius sample area
sample_areas = []
for sample in samples.itertuples():
    # Store the important data as properties of the feature
    sample_areas.append(
        ee.Feature(ee.Geometry.Point(sample.Longitude, sample.Latitude).buffer(100))
        .set('name', sample.MatchName)
        .set('longitude', sample.Longitude)
        .set('latitude', sample.Latitude))

# Define a FeatureCollection containing all the sample areas
sample_areas = ee.FeatureCollection(sample_areas)

Data shape: (278, 3)
  MatchName   Longitude   Latitude
0   K0024A2 -118.567312  34.083974
1   K0024B2 -118.570642  34.084767
2   K0024C1 -118.551882  34.055978
3   K0026A1 -117.229718  32.853963
4   K0026B1 -117.232750  32.849619


## Define dataset classes
GIS datasets can be in either vector or raster format. In Google Earth Engine, raster data is represented by an `Image` and vector data by a `FeatureCollection`. You can find datasets by searching in the [Earth Engine data catalog](https://developers.google.com/earth-engine/datasets). It's also possible to instantiate an Earth Engine object from data that you have locally if it's not available through the API.

To conceptualize the data it will be helpful to wrap these Earth Engine objects in our own classes. This way we can store information we need to use the dataset alongside it: the band or property to use and how to process and display the data.

### Raster datasets

In [5]:
class RasterDataset:
    
    def __init__(self, image, band, name=None, categorical=False, map_params={}, preprocess=None):
        """
        Represent a Google Earth Engine raster dataset and the information we need to use it.
        
        image: The ee.Image or ee.ImageCollection object representing the desired dataset. 
            This is the snippet of code provided on the dataset's page in the Earth Engine catalog.
            e.g.: ee.Image('path') or ee.ImageCollection('path')
        band: The identifier of the desired image band e.g. 'bio01'
        name: The column name to display for the data gathered from this dataset
        categorical: Set to True if the data is not continuous. If True, takes the mode of pixels
            in the sample area rather than the median.
        map_params: Settings for how to display the data e.g. palette, min, max. 
            See param options: https://developers.google.com/earth-engine/api_docs#ee.data.getmapid
        preprocess: A function to apply to the dataset before doing any calculations
        """
        
        # self.name exists to be a common property between RasterDatasets and VectorDatasets
        self.name = name or band  # If a different name is not set, use the band name
        self.categorical = categorical
        self.map_params = map_params
        self.preprocess = preprocess
        
        
        # Select just the one band of interest from the image and rename it to self.name
        if isinstance(image, ee.ImageCollection):  
            # If it's an image collection, mosaic it into one image
            self.data = image.mosaic().select([band], [self.name])
        else:
            self.data = image.select([band], [self.name])
        
        self.band = self.name  

        if self.preprocess:
            self.data = self.preprocess(self.data)
        
    def get_sample_area_data(self, sample_area: ee.Feature) -> ee.Feature:
        '''
        Add a new property to the input feature storing the value of self.data in the feature area
        
        param sample_area: ee.Feature with the geometry to sample over
        returns: input feature with a new property in the form 
            {self.band: mode (categorical) or median (continuous) of self.data 
                        over the feature geometry}
        '''
        
        # Get the average distance from each point in the sample area to the data image
        distance = self.distance(sample_area.geometry())
        
        geometry = ee.Algorithms.If(
            distance,  # If the distance is not None,
            ee.Algorithms.If(
                distance.eq(0),  # If the distance is 0, the data covers the sample area
                sample_area.geometry(),
                sample_area.geometry().buffer(distance), # Otherwise, we need to buffer it
            ),
            # If the distance is None, the sample area is farther away from the data than the
            # range being searched by the distance kernel.
            # Continue with the sample area geometry, the reducer will return None.
            sample_area.geometry()
        )
        result = ee.Algorithms.If(
            self.categorical,  # If the data type is categorical,
            self.mode(geometry),  # Get the most common category in the sample area
            self.median(geometry)  # Otherwise it's continuous, so get the median
        )
    
        return sample_area.set(self.band, result)
    
    def distance(self, geometry: ee.Geometry) -> ee.Number:
        '''
        Return the average distance from each point in the input geometry to the self.data image
        
        - This should be 0 in most cases, meaning the data covers the sample area.
        - If the sample area is within the search radius from the self.data image 
            (here set to 1000m), the distance will be returned.
        - If the sample area is outside the search radius, None is returned.
        
        This is intended to account for edges of datasets not lining up exactly with the 
        coastline, leaving some coastal sample sites just outside the self.data image.
        
        param geometry: ee.Geometry of the sample area of interest
        returns: ee.Number (0 or distance) or None
        '''
        # Make an image representing the distance from any given point to the self.data image
        # This will be 0 everywhere that the self.data image covers
        distance_image = self.data.distance(
            ee.Kernel.euclidean(1000, 'meters'),
            False  # Do not exclude masked pixels
        ).select([self.band], ['distance'])  # Rename the band to 'distance'
        
        # Get the average distance from each point in the sample area to the nearest pixel of data
        distance = ee.Number(distance_image.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=geometry,
            scale=100,
            maxPixels=1e9
        ).get('distance'))
        
        return distance
    
    def mode(self, geometry: ee.Geometry) -> ee.Number:
        '''
        Reduce the dataset over the sample area to get the most common value in that area.
        
        param geometry: ee.Geometry of the sample area of interest
        returns: mode of pixels within the geometry as an ee.Number
        '''
        mode = self.data.reduceRegion(
            reducer=ee.Reducer.mode(),
            geometry=geometry,
            scale=100,
            maxPixels=1e9
        ).get(self.band)
        return mode
    
    def median(self, geometry: ee.Geometry) -> ee.Number:
        '''
        Reduce the dataset over the sample area to get the median value in that area
        
        param geometry: ee.Geometry of the sample area of interest
        returns: median of pixels within the geometry as an ee.Number
        '''
        median = self.data.reduceRegion(
            reducer=ee.Reducer.median(),
            geometry=geometry,
            scale=100,
            maxPixels=1e9
        ).get(self.band)
        return median
        


### Vector datasets


In [6]:
class VectorDataset:
    
    def __init__(self, feature_collection, property: str, name=None, map_property=None, map_params={}, postprocess=None):
        """
        Represent a Google Earth Engine raster dataset and the information we need to use it.
        
        feature_collection: The ee.FeatureCollection object representing the dataset.
            This is the 'Earth Engine snippet' displayed on the dataset's page in the Earth 
            Engine catalog. e.g.: ee.FeatureCollection('EPA/Ecoregions/2013/L3')
        property: The key of the desired feature property
        name: The column name to display for the data gathered from this dataset.
        map_property: The key of the property to use for mapping, if different.
            This is useful if the property you want to export is non-numeric.
            For instance, in the EPA Level III Ecoregions dataset, you might want to return
            the values of 'us_l3name', which is non-numeric, but generate the map using
            'us_l3code', which can be cast to an int.
        map_params: Settings for how to display the data e.g. palette, min, max
        postprocess: A function to apply to each value after it is calculated
        """
        self.data = feature_collection
        self.property = property
        self.name = name or property
        self.map_property = map_property or property
        self.map_params = map_params
        self.postprocess = postprocess
        
    # Note: This function is an argument to map(). Arguments to map() cannot print anything
    # or call getInfo(). Doing so results in an EEException: ValueNode empty
    # source: https://gis.stackexchange.com/questions/345598/mapping-simp>le-function-to-print-date-and-time-stamp-on-google-earth-engine-pyth
    def get_sample_area_data(self, sample_area: ee.Feature) -> str:
        """
        Return the value from the dataset to assign to the sample area.
        """
        # Get a FeatureCollection storing the overlaps between the sample area and the dataset
        overlaps = self.data.filterBounds(sample_area.geometry()).map(
            lambda feature: feature.intersection(sample_area.geometry())
        )

        result = ee.Algorithms.If(
            # If there is exactly 1 overlapping dataset feature, return its value
            overlaps.size().eq(1),
            sample_area.set(self.name, overlaps.first().get(self.property)),

            ee.Algorithms.If(
                # If there are 0 overlapping dataset features, return the value of the closest one
                overlaps.size().eq(0),
                sample_area.set(self.name, 
                                self.get_nearest_feature(sample_area).get(self.property)),

                # Otherwise, there must be >1 features overlapping the sample area
                # Return the value of the one with the largest overlap
                sample_area.set(self.name, 
                                self.get_predominant_feature(overlaps).get(self.property))
            )
        )
        
        # Apply any postprocessing function to the new value
        if self.postprocess:
            result = sample_area.set(self.name, self.postprocess(result.get(self.name)))
            
        return result

    def get_nearest_feature(self, sample_area: ee.Feature) -> str:
        """
        To be used when the sample area doesn't overlap the dataset at all.
        
        Get the dataset feature that is nearest to the sample area, and
        return the value of its dataset.property.
        
        param sample_area: ee.Feature representing the sample area of interest
        """

        # Define a filter to get all dataset features within 10000 meters of the sample area
        spatialFilter = ee.Filter.withinDistance(
            distance=10000,
            leftField='.geo',
            rightField='.geo',
            maxError=10
        )
        # Define a join that will return only the 'best' (nearest) match
        saveBestJoin = ee.Join.saveBest(
          matchKey='closestFeature',
          measureKey='distance'
        )
        # Apply the join, using the distance filter to define match quality
        # Get the only feature in the resulting FeatureCollection
        result = ee.Feature(saveBestJoin.apply(
            ee.FeatureCollection(sample_area),
            self.data,
            spatialFilter
        ).first())

        # Return the closest dataset feature
        return ee.Feature(result.get('closestFeature'))
    
    def get_predominant_feature(self, overlaps: ee.FeatureCollection) -> str:
        """
        To be used when the sample area overlaps more than one dataset feature.
        
        Return the value of 'property' for the largest overlap.
        """
        # Add 'area' as a property to each feature. This is the area in square meters 
        # of the intersection of the ecoregion feature and the sample area.
        overlaps = overlaps.map(
            lambda feature: feature.set({'area': feature.geometry().area()}))

        # Find the maximum area among all the overlaps
        max_area = overlaps.aggregate_max('area')

        # Return the overlap with the largest area
        return ee.Feature(overlaps.filter(ee.Filter.gte('area', max_area)).first())
    

### Instantiate your datasets
Instantiate your chosen Earth Engine datasets in the list below.

In [7]:
def filterCalifornia(collection: ee.ImageCollection) -> ee.ImageCollection:
    # Define a rectangular area just big enough to contain all of California
    california = ee.Geometry.Polygon([
        ee.Geometry.Point(-125, 32.5),
        ee.Geometry.Point(-125, 42),
        ee.Geometry.Point(-114, 42),
        ee.Geometry.Point(-114, 32.5)
    ])
    # Return a collection of just those images that are within California
    return collection.filterBounds(california)

def reproject(image: ee.Image, scale: int) -> ee.Image:
    '''
    Resample and reproject an image to a different resolution.
    
    param image: Image to reproject
    param scale: Desired pixel width in meters
    '''
    return image.resample('bilinear').reproject(  # Use bilinear interpolation method
        crs=image.projection().crs(),  # Keep the same map projection
        scale=scale  # Change the scale
    )

# Define the list of datasets from which to retrieve data
datasets = [
# --------------------- BIOCLIM DATA 1KM RESOLUTION ----------------------------
    RasterDataset(
        image=reproject(ee.Image('WORLDCLIM/V1/BIO'), 100),
        band='bio01', 
        map_params={'min': 12, 'max': 18},
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'),
        band='bio02', 
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio03'
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio04', 
        preprocess = lambda img: img.divide(ee.Image(100))
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio05', 
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio06', 
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio07', 
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio08',
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio09', 
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio10', 
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio11', 
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'),
        band='bio12'
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio13'
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio14'
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'),
        band='bio15'
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'),
        band='bio16'
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio17'
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio18'
    ),
    RasterDataset(
        image=ee.Image('WORLDCLIM/V1/BIO'), 
        band='bio19'
    ),
# ------------------------ EPA LEVEL III ECOREGIONS ------------------------
    VectorDataset(
        feature_collection=ee.FeatureCollection('EPA/Ecoregions/2013/L3'),
        property='us_l3name',
        map_property='us_l3code'
    ),
# ------------------------ USDA SOIL TAXONOMY -----------------------------
    RasterDataset(
        image=ee.Image('OpenLandMap/SOL/SOL_GRTGROUP_USDA-SOILTAX_C/v01'),
        band='grtgroup'
    ),
# ----------------------- USGS NATIONAL ELEVATION 10M RESOLUTION -----------------
    RasterDataset(
        image=ee.Image('USGS/NED'),
        band='elevation'
    ),
# ----------------------- SLOPE CALCULATED FROM ELEVATION -------------------------
    RasterDataset(
        image=ee.Image('USGS/NED'),
        band='elevation',
        name='slope',
        preprocess=lambda img: ee.Terrain.slope(img)
    ),
# ----------------------- ASPECT CALCULATED FROM ELEVATION -------------------------
    RasterDataset(
        image=ee.Image('USGS/NED'),
        band='elevation',
        name='aspect',
        preprocess=lambda img: ee.Terrain.aspect(img)
    ),
# ---------------------- USDA SOIL PH IN H20 AT SURFACE LEVEL ----------------------
    RasterDataset(
        image=ee.Image('OpenLandMap/SOL/SOL_PH-H2O_USDA-4C1A2A_M/v02'),
        band='b0',
        name='soil pH in H20',
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
# ---------------------- USDA SOIL ORGANIC CARBON CONTENT AT SURFACE LEVEL --------
    RasterDataset(
        image=ee.Image('OpenLandMap/SOL/SOL_ORGANIC-CARBON_USDA-6A1C_M/v02'),
        band='b0',
        name='soil organic carbon',
        preprocess = lambda img: img.divide(ee.Image(5))
    ),
# ---------------------- USDA SOIL SAND CONTENT AT SURFACE LEVEL ------------------
    RasterDataset(
        image=ee.Image('OpenLandMap/SOL/SOL_SAND-WFRACTION_USDA-3A1A1A_M/v02'),
        band='b0',
        name='soil sand'
    ),
# ---------------------- USDA SOIL CLAY CONTENT AT SURFACE LEVEL ------------------
    RasterDataset(
        image=ee.Image('OpenLandMap/SOL/SOL_CLAY-WFRACTION_USDA-3A1A1A_M/v02'),
        band='b0',
        name='soil clay'
    ),
# ---------------------- USDA SOIL BULK DENSITY (FINE EARTH) AT SURFACE LEVEL ----
    RasterDataset(
        image=ee.Image('OpenLandMap/SOL/SOL_BULKDENS-FINEEARTH_USDA-4A1H_M/v02'),
        band='b0',
        name='soil bulk density',
        preprocess = lambda img: img.divide(ee.Image(10))
    ),
# -------------------- LANDSAT 8 NDVI --------------------------------------------
    RasterDataset(
        image=ee.ImageCollection('LANDSAT/LC08/C01/T1_32DAY_NDVI').filterDate(
            '2020-01-01', '2020-03-01'
        ),
        band='NDVI'
    ),
# -------------------- LANDSAT 8 EVI --------------------------------------------
    RasterDataset(
        image=ee.ImageCollection('LANDSAT/LC08/C01/T1_8DAY_EVI').filterDate(
            '2020-01-01', '2020-03-01'
        ),
        band='EVI'
    ),
# -------------------- LANDSAT 8 NBRT --------------------------------------------
    RasterDataset(
        image=ee.ImageCollection('LANDSAT/LC08/C01/T1_8DAY_NBRT').filterDate(
            '2020-01-01', '2020-03-01'
        ),
        band='NBRT'
    ),
# -------------------- LANDSAT 8 ANNUAL GREENEST PIXEL----------------------------
    RasterDataset(
        image=ee.ImageCollection('LANDSAT/LC08/C01/T1_ANNUAL_GREENEST_TOA'),
        band='greenness'
    ),
# -------------------- USGS NATIONAL LAND COVER DATABASE LANDCOVER ---------------
    RasterDataset(
        image=ee.ImageCollection('USGS/NLCD').filterMetadata(
            'system:index',
            'equals',
            'NLCD2016'
        ),
        band='landcover'
    ),
# -------------------- USGS NATIONAL LAND COVER DATABASE PERCENT IMPERVIOUS ------
    RasterDataset(
        image=ee.ImageCollection('USGS/NLCD').filterMetadata(
            'system:index',
            'equals',
            'NLCD2016'
        ),
        band='impervious'
    ),
# -------------------- USGS NATIONAL LAND COVER DATABASE TREE COVER --------------
    RasterDataset(
        image=ee.ImageCollection('USGS/NLCD').filterMetadata(
            'system:index',
            'equals',
            'NLCD2016'
        ),
        band='percent_tree_cover'
    ),
# ------------------- SENTINEL 2 TOP-OF-ATMOSPHERE REFLECTANCE -------------------
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B1'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B2'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B3'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B4'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B5'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B6'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B7'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B8'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B8A'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B9'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B10'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B11'
    ),
    RasterDataset(
        image=filterCalifornia(
            ee.ImageCollection("COPERNICUS/S2").filterDate('2018-01-01', '2018-01-15').map(
                lambda img: img.divide(ee.Image(10000))
            )
        ),
        band='B12'
    ),
]


## Map the data
Next we define a mapping class and some methods to help visualize the data. This will help with understanding what we're doing, and also make it easy to visually verify that the results make sense. The JavaScript version of the Earth Engine API provides a Map class that makes it easy to do this, but the Python API doesn't have that feature. I'm implementing it using Folium, a Python library for creating Leaflet Javascript maps, as recommended [here](https://developers.google.com/earth-engine/python_install-colab.html#interactive_map).

In [8]:
# These two methods are going to be added as custom folium map methods. 
# This will make it easy to display vector and raster Earth Engine objects to a map.
def add_raster_layer(self, ee_image_object, vis_params, name):
    """Display an EE image (raster) on a folium map"""
    map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)
    folium.raster_layers.TileLayer(
        tiles = map_id_dict['tile_fetcher'].url_format,
        attr = "Map Data © Google Earth Engine",
        name = name,
        overlay = True,
        control = True
    ).add_to(self)

def add_vector_layer(self, coords, name):
    """Display an EE geometry (vector) on a folium map"""
    # Reverse the order of the coordinates from (lng, lat) to (lat, lng)
    coords = np.array([np.flip(i) for i in coords])
    
    # Add the coordinates as a polygon layer in the map
    folium.vector_layers.Polygon(
        locations=coords,
        name=name,
        color='red',
        overlay=True,
        control=True,
        tooltip=name
    ).add_to(self)

Next we define the Map class, which makes use of the above methods to draw multiple datasets onto a folium map and display it.

In [9]:
class Map:
    
    def __init__(self):
        """Initialize a custom folium map"""
        # Add EE drawing methods to folium.
        folium.Map.add_raster_layer = add_raster_layer
        folium.Map.add_vector_layer = add_vector_layer
        self.map = folium.Map(location=[35, -119], zoom_start=4, height=500)
        
    def add_polygon(self, coords: np.ndarray, name: str):
        self.map.add_vector_layer(coords, name)

    def add(self, dataset):
        """Add layers to an interactive map using folium"""

        # If the dataset is in vector format, we want to convert it to raster in order to 
        # display on the map more easily,
        if isinstance(dataset, VectorDataset):
            map_property = dataset.property
            
            # Non-numeric values can't be converted into raster format
            # We will have to arbitrarily assign numeric values to represent them
            if not isinstance(dataset.data.first().get(dataset.property), ee.Number):
                # Get an array of all existing values of the property
                values = ee.List(dataset.data.aggregate_array(dataset.property))
                # Use the index of each property in the array as an arbitrary numeric value
                # e.g. if your values were ['forest', 'desert', 'water'],
                # the new band would have values [0, 1, 2] respectively.
                dataset.data = dataset.data.map(
                    lambda feature: feature.set(
                        'as_number', values.indexOf(feature.get(dataset.property)))
                )
                # Do mapping based on the new numeric property
                map_property = 'as_number'
            
            # Convert to a raster image, turning values of the given property into pixel values
            image = dataset.data.reduceToImage(
                properties=[map_property], reducer=ee.Reducer.first())
            
        # If it's in raster format, we can use it as-is
        else:
            image = dataset.data
        
        self.map.add_raster_layer(
            image.updateMask(image.gt(0)), 
            dataset.map_params, 
            dataset.name)
        
    def get_coords(fc: ee.FeatureCollection) -> np.array:
        """
        Return a list of coordinate lists representing the geometries 
        of the features in the FeatureCollection.
        """
        coords = np.array(fc.iterate(
            lambda item, l: ee.List(l).add(item.geometry().coordinates()), 
            ee.List([])).getInfo())
        return coords
    
    def get_names(fc: ee.FeatureCollection) -> np.array:
        """
        Return a list of names extracted from the 'name' property of each 
        feature in the FeatureCollection
        """
        names = np.array(fc.iterate(
            lambda item, l: ee.List(l).add(item.get('name')), 
            ee.List([])).getInfo())
        return names

    def display(self):
            self.map.add_child(folium.LayerControl())  # Add a layer control panel to the map
            display(self.map)   # Display the map


## Get data for every sample area from every dataset

In [10]:
# Define a map that will display all the data together, and add each sample area geometry to it
map = Map()
for polygon, name in zip(Map.get_coords(sample_areas), Map.get_names(sample_areas)):
    map.add_polygon(polygon, name=name)
print(len(datasets))
# For each dataset,
for dataset in datasets:
    print(dataset.name)
    # Add a property to each sample area storing the value calculated from the dataset
    sample_areas = sample_areas.map(lambda feature: dataset.get_sample_area_data(feature))
    # Add the dataset as a layer to the map
    map.add(dataset)
    
map.display()  # Display the map

# Get the name of each dataset, which will be the column headers
dataset_names = [dataset.name for dataset in datasets]

# Export the sample area data to your Google Drive as a CSV
# Note: It may take a few minutes to show up.
task = ee.batch.Export.table.toDrive(
    collection=sample_areas, 
    description='eedata',  # The file will show up in your Drive with this name
    selectors=['name', 'latitude', 'longitude'] + dataset_names  # Don't output geometry property
)
task.start()

49
bio01
bio02
bio03
bio04


KeyboardInterrupt: 