# Deep Learning pipeline mockup

[Graphical representation of the pipeline](https://www.draw.io/#G1U6XDddvcjas2vglyKeVz0ouFOElzOMCH)

## Setup software libraries

In [3]:
# Import and initialize the Earth Engine library.
import ee
ee.Initialize()
ee.__version__

'0.1.202'

In [4]:
# Folium setup.
import folium
print(folium.__version__)

0.8.3


In [5]:
# Skydipper library.
import Skydipper
print(Skydipper.__version__)

0.1.7


In [6]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import functools
import json
import uuid
import os
from pprint import pprint
import env
import time

In [38]:
import sqlalchemy
from sqlalchemy import Column, Integer, Text, DateTime
from sqlalchemy.dialects.postgresql import JSON

In [55]:
engine = sqlalchemy.create_engine('postgresql://postgres:postgres@0.0.0.0:5432/geomodels')
metadata = sqlalchemy.MetaData(bind=engine)
metadata.reflect(engine)
pd.read_sql_query("SELECT * FROM dataset", engine).values[0]

array([0, 'Sentinel-2-Top-of-Atmosphere-Reflectance',
       'Sentinel 2 Top-of-Atmosphere Reflectance',
       "['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8A', 'B8', 'B11', 'B12', 'ndvi', 'ndwi']",
       "['B4', 'B3', 'B2']", 'gee'], dtype=object)

In [50]:
import csv
from io import StringIO
from sqlalchemy import create_engine
def psql_insert_copy(table, conn, keys, data_iter):
    # gets a DBAPI connection that can provide a cursor
    dbapi_conn = conn.connection
    with dbapi_conn.cursor() as cur:
        s_buf = StringIO()
        writer = csv.writer(s_buf)
        writer.writerows(data_iter)
        s_buf.seek(0)
        columns = ', '.join('"{}"'.format(k) for k in keys)
        if table.schema:
            table_name = '{}.{}'.format(table.schema, table.name)
        else:
            table_name = table.name
        sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format(
            table_name, columns)
        cur.copy_expert(sql=sql, file=s_buf)
engine = create_engine('postgresql://postgres:postgres@0.0.0.0:5432/geomodels')

In [52]:
datasets.to_sql('dataset', engine, method=psql_insert_copy)

***
## Database

We will create a Database to save all the attributes that we will generate all through the pipeline

In [84]:
def create_db_table(table_path, columns, dtypes):
    if not os.path.exists(table_path):
        dictionary = dict(zip(columns, dtypes))
        dtypes = np.dtype([(k, v) for k, v in dictionary.items()]) 
    
        data = np.empty(0, dtype=dtypes)
        df = pd.DataFrame(data)
    
        df.to_csv(table_path)
    else:
        df = pd.read_csv(table_path, index_col=0)
    
    return df

In [85]:
if not os.path.exists('Database'):
    os.makedirs('Database')
    
datasets = create_db_table('Database/dataset.csv', 
                          columns = ['slug', 'name', 'bands', 'rgb_bands', 'provider'], 
                          dtypes = [str, str, list, list, str]
                         )

images = create_db_table('Database/image.csv', 
                          columns = ['dataset_id', 'bands_selections', 'scale', 'init_date',
                                     'end_date', 'composite_method', 'bands_min_max'], 
                          dtypes = [int, list, float, str, str, str, str]
                         )

models = create_db_table('Database/model.csv', 
                          columns = ['model_name', 'model_type', 'model_description', 'output_image_id'], 
                          dtypes = [str, str, str, int]
                        )
                         
versions = create_db_table('Database/model_versions.csv', 
                           columns = ['model_id', 'model_architecture', 'input_image_id', 'output_image_id', 'geostore_id', 'sample_size', 
                                      'training_params', 'version', 'data_status', 'training_status', 'eeified', 'deployed'], 
                           dtypes = [int, str, int, int, str, float, str, str, str, str, str, str]   
                          )

***
### Skydipper datasets for Deep Learning

In [86]:
slugs_list = ["Sentinel-2-Top-of-Atmosphere-Reflectance",
              "Landsat-7-Surface-Reflectance",
              "Landsat-8-Surface-Reflectance",
              "USDA-NASS-Cropland-Data-Layers",
              "USGS-National-Land-Cover-Database"]

In [87]:
c = Skydipper.Collection(search=' '.join(slugs_list), object_type=['dataset'], app=['skydipper'], limit=10)
c

### Earth Engine ImageCollection attributes

We define the different attributes that we will need for each Earth Engine ImageCollection all through the notebook. 

We include them in the `ee_collection_specifics.py` file:

In [1]:
%%writefile ee_collection_specifics.py

"""
Information on Earth Engine collections stored here (e.g. bands, collection ids, etc.)
"""

import ee

def ee_collections(collection):
    """
    Earth Engine image collection names
    """
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': 'COPERNICUS/S2',
        'Landsat-7-Surface-Reflectance': 'LANDSAT/LE07/C01/T1_SR',
        'Landsat-8-Surface-Reflectance': 'LANDSAT/LC08/C01/T1_SR',
        'USDA-NASS-Cropland-Data-Layers': 'USDA/NASS/CDL',
        'USGS-National-Land-Cover-Database': 'USGS/NLCD',
        'Skydipper-Water-Quality': 'projects/vizzuality/skydipper-water-quality/LWQ-100m'
    }
    
    return dic[collection]

def ee_bands(collection):
    """
    Earth Engine band names
    """
    
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': ['B1','B2','B3','B4','B5','B6','B7','B8A','B8','B11','B12','ndvi','ndwi'],
        'Landsat-7-Surface-Reflectance': ['B1','B2','B3','B4','B5','B6','B7','ndvi','ndwi'],
        'Landsat-8-Surface-Reflectance': ['B1','B2','B3','B4','B5','B6','B7','B10','B11','ndvi','ndwi'],
        'USDA-NASS-Cropland-Data-Layers': ['landcover', 'cropland', 'land', 'water', 'urban'],
        'USGS-National-Land-Cover-Database': ['impervious'],
        'Skydipper-Water-Quality': ['turbidity_blended_mean']
    }
    
    return dic[collection]

def ee_bands_rgb(collection):
    """
    Earth Engine rgb band names
    """
    
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': ['B4','B3','B2'],
        'Landsat-7-Surface-Reflectance': ['B3','B2','B1'],
        'Landsat-8-Surface-Reflectance': ['B4', 'B3', 'B2'],
        'USDA-NASS-Cropland-Data-Layers': ['landcover'],
        'USGS-National-Land-Cover-Database': ['impervious'],
        'Skydipper-Water-Quality': ['turbidity_blended_mean']
    }
    
    return dic[collection]

def ee_bands_normThreshold(collection):
    """
    Normalization threshold percentage
    """
    
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': {'B1': 75,'B2': 75,'B3': 75,'B4': 75,'B5': 80,'B6': 80,'B7': 80,'B8A': 80,'B8': 80,'B11': 100,'B12': 100},
        'Landsat-7-Surface-Reflectance': {'B1': 95,'B2': 95,'B3': 95,'B4': 100,'B5': 100,'B6': 100,'B7': 100},
        'Landsat-8-Surface-Reflectance': {'B1': 90,'B2': 95,'B3': 95,'B4': 95,'B5': 100,'B6': 100,'B7': 100,'B10': 100,'B11': 100},
        'USDA-NASS-Cropland-Data-Layers': {'landcover': 100, 'cropland': 100, 'land': 100, 'water': 100, 'urban': 100},
        'USGS-National-Land-Cover-Database': {'impervious': 100},
        'Skydipper-Water-Quality': {'turbidity_blended_mean': 100}
    }
    
    return dic[collection]

def normalize(collection):
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': True,
        'Landsat-7-Surface-Reflectance': True,
        'Landsat-8-Surface-Reflectance': True,
        'USDA-NASS-Cropland-Data-Layers': False,
        'USGS-National-Land-Cover-Database': False,
        'Skydipper-Water-Quality': False
    }
    
    return dic[collection]

def vizz_params_rgb(collection):
    """
    Visualization parameters
    """
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': {'min':0,'max':3000, 'bands':['B4','B3','B2']},
        'Landsat-7-Surface-Reflectance': {'min':0,'max':3000, 'gamma':1.4, 'bands':['B3','B2','B1']},
        'Landsat-8-Surface-Reflectance': {'min':0,'max':3000, 'gamma':1.4, 'bands':['B4','B3','B2']},
        'USDA-NASS-Cropland-Data-Layers': {'min':0,'max':3, 'bands':['landcover']},
        'USGS-National-Land-Cover-Database': {'min': 0, 'max': 1, 'bands':['impervious']},
        'Skydipper-Water-Quality': {'min': 0, 'max': 1, 'bands':['turbidity_blended_mean']}
    }
    
    return dic[collection]

def vizz_params(collection):
    """
    Visualization parameters
    """
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': [{'min':0,'max':1, 'bands':['B4','B3','B2']}, 
                      {'min':0,'max':1, 'bands':['B1']},
                      {'min':0,'max':1, 'bands':['B5']},
                      {'min':0,'max':1, 'bands':['B6']},
                      {'min':0,'max':1, 'bands':['B7']},
                      {'min':0,'max':1, 'bands':['B8A']},
                      {'min':0,'max':1, 'bands':['B8']},
                      {'min':0,'max':1, 'bands':['B11']},
                      {'min':0,'max':1, 'bands':['B12']},
                      {'min':0,'max':1, 'gamma':1.4, 'bands':['ndvi']},
                      {'min':0,'max':1, 'gamma':1.4, 'bands':['ndwi']}],
        'Landsat-7-Surface-Reflectance': [{'min':0,'max':1, 'gamma':1.4, 'bands':['B3','B2','B1']}, 
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B4']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B5']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B7']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B6']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['ndvi']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['ndwi']}],
        'Landsat-8-Surface-Reflectance': [{'min':0,'max':1, 'gamma':1.4, 'bands':['B4','B3','B2']}, 
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B1']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B5']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B6']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B7']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B10']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B11']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['ndvi']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['ndwi']}],
        'USDA-NASS-Cropland-Data-Layers': [{'min':0,'max':3, 'bands':['landcover']},
                               {'min':0,'max':1, 'bands':['cropland']},
                               {'min':0,'max':1, 'bands':['land']},
                               {'min':0,'max':1, 'bands':['water']},
                               {'min':0,'max':1, 'bands':['urban']}],
        'USGS-National-Land-Cover-Database': [{'min': 0, 'max': 1, 'bands':['impervious']}],
        'Skydipper-Water-Quality': [{'min': 0, 'max': 1, 'bands':['turbidity_blended_mean']}],
        
    }
    
    return dic[collection]

## ------------------------- Filter datasets ------------------------- ##
## Lansat 7 Cloud Free Composite
def CloudMaskL7sr(image):
    qa = image.select('pixel_qa')
    #If the cloud bit (5) is set and the cloud confidence (7) is high
    #or the cloud shadow bit is set (3), then it's a bad pixel.
    cloud = qa.bitwiseAnd(1 << 5).And(qa.bitwiseAnd(1 << 7)).Or(qa.bitwiseAnd(1 << 3))
    #Remove edge pixels that don't occur in all bands
    mask2 = image.mask().reduce(ee.Reducer.min())
    return image.updateMask(cloud.Not()).updateMask(mask2)

def CloudFreeCompositeL7(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('LANDSAT/LE07/C01/T1_SR')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate).map(CloudMaskL7sr)

    ## Composite
    composite = collection.median()
    
    ## normDiff bands
    normDiff_band_names = ['ndvi', 'ndwi']
    for nB, normDiff_band in enumerate([['B4','B3'], ['B4','B2']]):
        image_nd = composite.normalizedDifference(normDiff_band).rename(normDiff_band_names[nB])
        composite = ee.Image.cat([composite, image_nd])
    
    return composite

## Lansat 8 Cloud Free Composite
def CloudMaskL8sr(image):
    opticalBands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
    thermalBands = ['B10', 'B11']

    cloudShadowBitMask = ee.Number(2).pow(3).int()
    cloudsBitMask = ee.Number(2).pow(5).int()
    qa = image.select('pixel_qa')
    mask1 = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(
    qa.bitwiseAnd(cloudsBitMask).eq(0))
    mask2 = image.mask().reduce('min')
    mask3 = image.select(opticalBands).gt(0).And(
            image.select(opticalBands).lt(10000)).reduce('min')
    mask = mask1.And(mask2).And(mask3)
    
    return image.updateMask(mask)

def CloudFreeCompositeL8(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate).map(CloudMaskL8sr)

    ## Composite
    composite = collection.median()
    
    ## normDiff bands
    normDiff_band_names = ['ndvi', 'ndwi']
    for nB, normDiff_band in enumerate([['B5','B4'], ['B5','B3']]):
        image_nd = composite.normalizedDifference(normDiff_band).rename(normDiff_band_names[nB])
        composite = ee.Image.cat([composite, image_nd])
    
    return composite

## Sentinel 2 Cloud Free Composite
def CloudMaskS2(image):
    """
    European Space Agency (ESA) clouds from 'QA60', i.e. Quality Assessment band at 60m
    parsed by Nick Clinton
    """
    AerosolsBands = ['B1']
    VIBands = ['B2', 'B3', 'B4']
    RedBands = ['B5', 'B6', 'B7', 'B8A']
    NIRBands = ['B8']
    SWIRBands = ['B11', 'B12']

    qa = image.select('QA60')

    # Bits 10 and 11 are clouds and cirrus, respectively.
    cloudBitMask = int(2**10)
    cirrusBitMask = int(2**11)

    # Both flags set to zero indicates clear conditions.
    mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(\
            qa.bitwiseAnd(cirrusBitMask).eq(0))

    return image.updateMask(mask)

def CloudFreeCompositeS2(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('COPERNICUS/S2')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate)\
            .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\
            .map(CloudMaskS2)

    ## Composite
    composite = collection.median()
    
    ## normDiff bands
    normDiff_band_names = ['ndvi', 'ndwi']
    for nB, normDiff_band in enumerate([['B8','B4'], ['B8','B3']]):
        image_nd = composite.normalizedDifference(normDiff_band).rename(normDiff_band_names[nB])
        composite = ee.Image.cat([composite, image_nd])
    
    return composite

## Cropland Data Layers
def CroplandData(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('USDA/NASS/CDL')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate)

    ## First image
    image = ee.Image(collection.first())
    
    ## Change classes
    land = ['65', '131', '141', '142', '143', '152', '176', '87', '190', '195']
    water = ['83', '92', '111']
    urban = ['82', '121', '122', '123', '124']
    
    classes = []
    for n, i in enumerate([land,water,urban]):
        a = ''
        for m, j in enumerate(i):
            if m < len(i)-1:
                a = a + 'crop == '+ j + ' || '
            else: 
                a = a + 'crop == '+ j
        classes.append('('+a+') * '+str(n+1))
    classes = ' + '.join(classes)
    
    image = image.expression(classes, {'crop': image.select(['cropland'])})
    
    image =image.rename('landcover')
    
    # Split image into 1 band per class
    names = ['cropland', 'land', 'water', 'urban']
    mask = image
    for i, name in enumerate(names):
        image = ee.Image.cat([image, mask.eq(i).rename(name)])
     
    return image

## National Land Cover Database
def ImperviousData(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('USGS/NLCD')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate)

    ## First image
    image = ee.Image(collection.first())
    
    ## Select impervious band
    image = image.select('impervious')
    
    ## Normalize to 1
    image = image.divide(100).float()
    
    return image

def WaterQuality(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('projects/vizzuality/skydipper-water-quality/LWQ-100m')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate)

    ## First image
    image = ee.Image(collection.first())
    
    ## Select impervious band
    image = image.select('turbidity_blended_mean')
    
    return image

## ------------------------------------------------------------------- ##

def Composite(collection):
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': CloudFreeCompositeS2,
        'Landsat-7-Surface-Reflectance': CloudFreeCompositeL7,
        'Landsat-8-Surface-Reflectance': CloudFreeCompositeL8,
        'USDA-NASS-Cropland-Data-Layers': CroplandData,
        'USGS-National-Land-Cover-Database': ImperviousData,
        'Skydipper-Water-Quality': WaterQuality
    }
    
    return dic[collection]

Overwriting ee_collection_specifics.py


In [28]:
import ee_collection_specifics

### Populate `dataset` table

In [88]:
for collection in slugs_list:

    ds = Skydipper.Dataset(id_hash=collection)
    name = ds.attributes.get('name')
    provider = ds.attributes.get('provider')

    bands = [ee_collection_specifics.ee_bands(collection)]
    rgb_bands = [ee_collection_specifics.ee_bands_rgb(collection)]


    dictionary = dict(zip(list(datasets.keys()), [collection, name, bands, rgb_bands, provider]))
    
    if (datasets['slug'] == collection).any():
        datasets = datasets
    else:
        datasets = datasets.append(pd.DataFrame(dictionary), ignore_index = True)
        datasets.to_csv('Database/dataset.csv')
    
datasets

Unnamed: 0,slug,name,bands,rgb_bands,provider
0,Sentinel-2-Top-of-Atmosphere-Reflectance,Sentinel 2 Top-of-Atmosphere Reflectance,"['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8...","['B4', 'B3', 'B2']",gee
1,Landsat-7-Surface-Reflectance,Landsat 7 Surface Reflectance,"['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'nd...","['B3', 'B2', 'B1']",gee
2,Landsat-8-Surface-Reflectance,Landsat 8 Surface Reflectance,"['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B1...","['B4', 'B3', 'B2']",gee
3,USDA-NASS-Cropland-Data-Layers,USDA NASS Cropland Data Layers,"['landcover', 'cropland', 'land', 'water', 'ur...",['landcover'],gee
4,USGS-National-Land-Cover-Database,USGS National Land Cover Database,['impervious'],['impervious'],gee


***
## Data pre-processing

We normalize the composite images to have values from 0 to 1.

**Variables**

In [672]:
input_dataset = 'Landsat-8-Surface-Reflectance'
output_dataset = 'USDA-NASS-Cropland-Data-Layers'
init_date = '2016-01-01'
end_date = '2016-12-31'
scale = 30 #scale in meters
collections = [input_dataset, output_dataset]

**Normalize images function**

In [30]:
def min_max_values(image, collection, scale):
    
    normThreshold = ee_collection_specifics.ee_bands_normThreshold(collection)
    
    num = 2
    lon = np.linspace(-180, 180, num)
    lat = np.linspace(-90, 90, num)
    
    features = []
    for i in range(len(lon)-1):
        for j in range(len(lat)-1):
            features.append(ee.Feature(ee.Geometry.Rectangle(lon[i], lat[j], lon[i+1], lat[j+1])))
    
    regReducer = {
        'geometry': ee.FeatureCollection(features),
        'reducer': ee.Reducer.minMax(),
        'maxPixels': 1e10,
        'bestEffort': True,
        'scale':scale
        
    }
    
    values = image.reduceRegion(**regReducer).getInfo()
    
    # Avoid outliers by taking into account only the normThreshold% of the data points.
    regReducer = {
        'geometry': ee.FeatureCollection(features),
        'reducer': ee.Reducer.histogram(),
        'maxPixels': 1e10,
        'bestEffort': True,
        'scale':scale
        
    }
    
    hist = image.reduceRegion(**regReducer).getInfo()

    for band in list(normThreshold.keys()):
        if normThreshold[band] != 100:
            count = np.array(hist.get(band).get('histogram'))
            x = np.array(hist.get(band).get('bucketMeans'))
        
            cumulative_per = np.cumsum(count/count.sum()*100)
        
            values[band+'_max'] = x[np.where(cumulative_per < normThreshold[band])][-1]
        
    return values

def normalize_ee_images(image, collection, values):
    
    Bands = ee_collection_specifics.ee_bands(collection)
       
    # Normalize [0, 1] ee images
    for i, band in enumerate(Bands):
        if i == 0:
            image_new = image.select(band).clamp(values[band+'_min'], values[band+'_max'])\
                                .subtract(values[band+'_min'])\
                                .divide(values[band+'_max']-values[band+'_min'])
        else:
            image_new = image_new.addBands(image.select(band).clamp(values[band+'_min'], values[band+'_max'])\
                                    .subtract(values[band+'_min'])\
                                    .divide(values[band+'_max']-values[band+'_min']))
            
    return image_new

### Populate `image` table

In [674]:
# Read dataset and image tables
datasets = pd.read_csv('Database/dataset.csv', index_col=0)
images = pd.read_csv('Database/image.csv', index_col=0)

for collection in collections:
    dataset_id = datasets[datasets['slug'] == collection].index[0]

    # Populate image table
    if images[['dataset_id', 'scale', 'init_date', 'end_date']].isin([dataset_id, scale, init_date, end_date]).all(axis=1).any():
        images = images
    else:
        # Create composite
        image = ee_collection_specifics.Composite(collection)(init_date, end_date)
    
        bands = ee_collection_specifics.ee_bands(collection)
        image = image.select(bands)
        
        if ee_collection_specifics.normalize(collection):
            # Get min/man values for each band
            values = min_max_values(image, collection, scale)
        else:
            values = {}
    
        # Append values to table
        dictionary = dict(zip(list(images.keys()), [[dataset_id], [''], [scale], [init_date], [end_date], [''], [''], [json.dumps(values)]]))
        images = images.append(pd.DataFrame(dictionary), ignore_index = True)
        
    # Save table
    images.to_csv('Database/image.csv')

### Display composites

In [676]:
# Define the URL format used for Earth Engine generated map tiles.
EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'
Map = folium.Map(location=[38.012, -121.2747], zoom_start=11)

# Read dataset and image tables
datasets = pd.read_csv('Database/dataset.csv', index_col=0)
images = pd.read_csv('Database/image.csv', index_col=0)

for collection in collections:

    dataset_id = datasets[datasets['slug'] == collection].index[0]
    
    df = images[(images['dataset_id'] == dataset_id) & 
                (images['scale'] == scale) & 
                (images['init_date'] == init_date) & 
                (images['end_date'] == end_date)
               ].copy()
    
    values = json.loads(df['bands_min_max'].iloc[0])
    
    # Create composite
    image = ee_collection_specifics.Composite(collection)(init_date, end_date)
    
    # Normalize images
    if bool(values): 
        image = normalize_ee_images(image, collection, values)
    
        
    for params in ee_collection_specifics.vizz_params(collection):
        mapid = image.getMapId(params)
        folium.TileLayer(
        tiles=EE_TILES.format(**mapid),
        attr='Google Earth Engine',
        overlay=True,
        name=str(params['bands']),
      ).add_to(Map)

Map.add_child(folium.LayerControl())
Map

### Select input/output bands

**Variables**

In [677]:
input_bands = ['B1','B2','B3','B4','B5','B6','B7','ndvi','ndwi']
output_bands = ['cropland', 'land', 'water', 'urban']

bands = [input_bands, output_bands]

**Populate `image` table**

In [678]:
# Read dataset and image tables
datasets = pd.read_csv('Database/dataset.csv', index_col=0)
images = pd.read_csv('Database/image.csv', index_col=0)
images = images.astype({"bands_selections": str})

for n, collection in enumerate(collections):

    dataset_id = datasets[datasets['slug'] == collection].index[0]
    
    df = images[(images['dataset_id'] == dataset_id) & 
                (images['scale'] == scale) & 
                (images['init_date'] == init_date) & 
                (images['end_date'] == end_date)
               ].copy()
    
    # Take rows where bands_selections column is NaN
    df1 = df[df['bands_selections'] == 'nan'].copy()
    
    if df1.any().any():
        # Take first index
        index = df1.index[0]
        images.at[index, 'bands_selections'] = str(bands[n])
    else:
        if images[['dataset_id', 'bands_selections', 'scale', 'init_date', 'end_date']].isin([dataset_id, str(bands[n]), scale, init_date, end_date]).all(axis=1).any():
            images = images
        else:
            df2 = df.iloc[0:1].copy()
            df2.at[df2.index[0], 'bands_selections'] = str(bands[n])
            images = images.append(df2, ignore_index = True)
                   
# Save table
images.to_csv('Database/image.csv')        

***
## Create TFRecords for training

**geoStore**

In [680]:
def polygons_to_geoStoreMultiPoligon(Polygons):
    MultiPoligon = {}
    properties = ["training", "validation"]
    features = []
    for n, polygons in enumerate(Polygons):
        multipoligon = []
        for polygon in polygons.get('features'):
            multipoligon.append(polygon.get('geometry').get('coordinates'))
            
        features.append({
            "type": "Feature",
            "properties": {"name": properties[n]},
            "geometry": {
                "type": "MultiPolygon",
                "coordinates":  multipoligon
            }
        }
        ) 
        
    MultiPoligon = {
        "geojson": {
            "type": "FeatureCollection", 
            "features": features
        }
    }

    return MultiPoligon

In [681]:
if collections[1] == 'USGS-National-Land-Cover-Database':
    trainPolygons = {"type":"FeatureCollection","features":[{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-123.22265625000001,45.213003555993964],[-122.03613281249999,45.213003555993964],[-122.03613281249999,46.164614496897094],[-123.22265625000001,46.164614496897094],[-123.22265625000001,45.213003555993964]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-122.1240234375,38.16911413556086],[-120.76171875,38.16911413556086],[-120.76171875,39.13006024213511],[-122.1240234375,39.13006024213511],[-122.1240234375,38.16911413556086]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-119.70703125,34.77771580360469],[-118.3447265625,34.77771580360469],[-118.3447265625,35.92464453144099],[-119.70703125,35.92464453144099],[-119.70703125,34.77771580360469]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-115.97167968750001,35.496456056584165],[-114.521484375,35.496456056584165],[-114.521484375,36.73888412439431],[-115.97167968750001,36.73888412439431],[-115.97167968750001,35.496456056584165]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-118.21289062499999,33.797408767572485],[-116.23535156249999,33.797408767572485],[-116.23535156249999,34.379712580462204],[-118.21289062499999,34.379712580462204],[-118.21289062499999,33.797408767572485]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-112.6318359375,33.02708758002874],[-111.4013671875,33.02708758002874],[-111.4013671875,34.016241889667015],[-112.6318359375,34.016241889667015],[-112.6318359375,33.02708758002874]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-105.6005859375,39.40224434029275],[-104.5458984375,39.40224434029275],[-104.5458984375,40.44694705960048],[-105.6005859375,40.44694705960048],[-105.6005859375,39.40224434029275]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-112.67578124999999,40.27952566881291],[-111.4453125,40.27952566881291],[-111.4453125,41.21172151054787],[-112.67578124999999,41.21172151054787],[-112.67578124999999,40.27952566881291]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-97.734375,32.21280106801518],[-95.9326171875,32.21280106801518],[-95.9326171875,33.32134852669881],[-97.734375,33.32134852669881],[-97.734375,32.21280106801518]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-99.36035156249999,29.036960648558267],[-97.822265625,29.036960648558267],[-97.822265625,30.031055426540206],[-99.36035156249999,30.031055426540206],[-99.36035156249999,29.036960648558267]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-95.185546875,38.61687046392973],[-93.9990234375,38.61687046392973],[-93.9990234375,39.639537564366684],[-95.185546875,39.639537564366684],[-95.185546875,38.61687046392973]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-91.2744140625,38.30718056188316],[-89.6484375,38.30718056188316],[-89.6484375,39.16414104768742],[-91.2744140625,39.16414104768742],[-91.2744140625,38.30718056188316]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-88.330078125,41.343824581185686],[-86.8798828125,41.343824581185686],[-86.8798828125,42.391008609205045],[-88.330078125,42.391008609205045],[-88.330078125,41.343824581185686]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-93.91113281249999,44.49650533109348],[-92.5048828125,44.49650533109348],[-92.5048828125,45.583289756006316],[-93.91113281249999,45.583289756006316],[-93.91113281249999,44.49650533109348]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-81.38671875,34.813803317113155],[-80.2880859375,34.813803317113155],[-80.2880859375,35.782170703266075],[-81.38671875,35.782170703266075],[-81.38671875,34.813803317113155]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-85.0341796875,33.17434155100208],[-83.7158203125,33.17434155100208],[-83.7158203125,34.27083595165],[-85.0341796875,34.27083595165],[-85.0341796875,33.17434155100208]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-87.2314453125,35.60371874069731],[-86.17675781249999,35.60371874069731],[-86.17675781249999,36.63316209558658],[-87.2314453125,36.63316209558658],[-87.2314453125,35.60371874069731]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-87.14355468749999,32.91648534731439],[-86.2646484375,32.91648534731439],[-86.2646484375,33.97980872872457],[-87.14355468749999,33.97980872872457],[-87.14355468749999,32.91648534731439]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-81.9140625,27.566721430409707],[-81.03515625,27.566721430409707],[-81.03515625,28.844673680771795],[-81.9140625,28.844673680771795],[-81.9140625,27.566721430409707]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-84.7705078125,38.92522904714054],[-83.75976562499999,38.92522904714054],[-83.75976562499999,40.17887331434696],[-84.7705078125,40.17887331434696],[-84.7705078125,38.92522904714054]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-80.947265625,40.27952566881291],[-79.98046875,40.27952566881291],[-79.98046875,41.178653972331674],[-80.947265625,41.178653972331674],[-80.947265625,40.27952566881291]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-75.2783203125,40.613952441166596],[-73.8720703125,40.613952441166596],[-73.8720703125,41.21172151054787],[-75.2783203125,41.21172151054787],[-75.2783203125,40.613952441166596]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-78.0908203125,38.44498466889473],[-76.728515625,38.44498466889473],[-76.728515625,39.33429742980725],[-78.0908203125,39.33429742980725],[-78.0908203125,38.44498466889473]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-112.6318359375,46.164614496897094],[-111.4453125,46.164614496897094],[-111.4453125,46.86019101567027],[-112.6318359375,46.86019101567027],[-112.6318359375,46.164614496897094]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-117.1142578125,43.229195113965005],[-115.57617187499999,43.229195113965005],[-115.57617187499999,44.08758502824516],[-117.1142578125,44.08758502824516],[-117.1142578125,43.229195113965005]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-96.328125,35.746512259918504],[-95.2734375,35.746512259918504],[-95.2734375,36.4566360115962],[-96.328125,36.4566360115962],[-96.328125,35.746512259918504]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-98.173828125,35.02999636902566],[-96.9873046875,35.02999636902566],[-96.9873046875,35.817813158696616],[-98.173828125,35.817813158696616],[-98.173828125,35.02999636902566]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-92.6806640625,34.379712580462204],[-91.7578125,34.379712580462204],[-91.7578125,35.10193405724606],[-92.6806640625,35.10193405724606],[-92.6806640625,34.379712580462204]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-90.7470703125,34.63320791137959],[-89.3408203125,34.63320791137959],[-89.3408203125,35.71083783530009],[-90.7470703125,35.71083783530009],[-90.7470703125,34.63320791137959]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-107.314453125,34.74161249883172],[-106.12792968749999,34.74161249883172],[-106.12792968749999,35.60371874069731],[-107.314453125,35.60371874069731],[-107.314453125,34.74161249883172]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-94.3505859375,41.1455697310095],[-92.94433593749999,41.1455697310095],[-92.94433593749999,42.19596877629178],[-94.3505859375,42.19596877629178],[-94.3505859375,41.1455697310095]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-85.869140625,40.68063802521456],[-84.5947265625,40.68063802521456],[-84.5947265625,41.64007838467894],[-85.869140625,41.64007838467894],[-85.869140625,40.68063802521456]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-87.099609375,39.30029918615029],[-85.6494140625,39.30029918615029],[-85.6494140625,40.245991504199026],[-87.099609375,40.245991504199026],[-87.099609375,39.30029918615029]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-117.7734375,47.30903424774781],[-116.103515625,47.30903424774781],[-116.103515625,48.1367666796927],[-117.7734375,48.1367666796927],[-117.7734375,47.30903424774781]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-97.91015624999999,37.3002752813443],[-96.8115234375,37.3002752813443],[-96.8115234375,38.09998264736481],[-97.91015624999999,38.09998264736481],[-97.91015624999999,37.3002752813443]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-94.06494140625,32.25926542645933],[-93.4716796875,32.25926542645933],[-93.4716796875,32.7872745269555],[-94.06494140625,32.7872745269555],[-94.06494140625,32.25926542645933]]]}}]}  
    evalPolygons = {"type":"FeatureCollection","features":[{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-95.888671875,29.38217507514529],[-95.06469726562499,29.38217507514529],[-95.06469726562499,30.12612436422458],[-95.888671875,30.12612436422458],[-95.888671875,29.38217507514529]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-83.84765625,42.374778361114195],[-82.94677734375,42.374778361114195],[-82.94677734375,42.78733853171998],[-83.84765625,42.78733853171998],[-83.84765625,42.374778361114195]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-96.88568115234375,40.69521661351714],[-95.77606201171875,40.69521661351714],[-95.77606201171875,41.393294288784865],[-96.88568115234375,41.393294288784865],[-96.88568115234375,40.69521661351714]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-105.05126953124999,38.57393751557591],[-104.490966796875,38.57393751557591],[-104.490966796875,39.0831721934762],[-105.05126953124999,39.0831721934762],[-105.05126953124999,38.57393751557591]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-122.62390136718749,46.95776134668866],[-121.84936523437499,46.95776134668866],[-121.84936523437499,48.04136507445029],[-122.62390136718749,48.04136507445029],[-122.62390136718749,46.95776134668866]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-120.157470703125,36.465471886798134],[-119.24560546875001,36.465471886798134],[-119.24560546875001,37.03763967977139],[-120.157470703125,37.03763967977139],[-120.157470703125,36.465471886798134]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-120.02563476562501,39.33854604847979],[-119.55871582031251,39.33854604847979],[-119.55871582031251,39.7240885773337],[-120.02563476562501,39.7240885773337],[-120.02563476562501,39.33854604847979]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-86.30859375,37.61423141542417],[-84.9462890625,37.61423141542417],[-84.9462890625,38.65119833229951],[-86.30859375,38.65119833229951],[-86.30859375,37.61423141542417]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-78.31054687499999,36.914764288955936],[-76.86035156249999,36.914764288955936],[-76.86035156249999,38.03078569382294],[-78.31054687499999,38.03078569382294],[-78.31054687499999,36.914764288955936]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-102.87597656249999,31.541089879585808],[-101.4697265625,31.541089879585808],[-101.4697265625,32.24997445586331],[-102.87597656249999,32.24997445586331],[-102.87597656249999,31.541089879585808]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-83.5400390625,39.50404070558415],[-82.177734375,39.50404070558415],[-82.177734375,40.54720023441049],[-83.5400390625,40.54720023441049],[-83.5400390625,39.50404070558415]]]}}]}
    
    geostore = polygons_to_geoStoreMultiPoligon([trainPolygons, evalPolygons])
    
if collections[1] == 'USDA-NASS-Cropland-Data-Layers':
    trainPolygons = {"type": "FeatureCollection", "features": [{"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-122.882080078125, 40.50126945841645], [-122.1240234375, 40.50126945841645], [-122.1240234375, 41.008920735004885], [-122.882080078125, 41.008920735004885], [-122.882080078125, 40.50126945841645]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-122.2283935546875, 39.00637903337455], [-121.607666015625, 39.00637903337455], [-121.607666015625, 39.46588451142044], [-122.2283935546875, 39.46588451142044], [-122.2283935546875, 39.00637903337455]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-120.355224609375, 38.77978137804918], [-119.608154296875, 38.77978137804918], [-119.608154296875, 39.342794408952365], [-120.355224609375, 39.342794408952365], [-120.355224609375, 38.77978137804918]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-121.90979003906249, 37.70555348721583], [-120.9814453125, 37.70555348721583], [-120.9814453125, 38.39764411353178], [-121.90979003906249, 38.39764411353178], [-121.90979003906249, 37.70555348721583]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-120.03662109374999, 37.45741810262938], [-119.1851806640625, 37.45741810262938], [-119.1851806640625, 38.08268954483802], [-120.03662109374999, 38.08268954483802], [-120.03662109374999, 37.45741810262938]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-120.03662109374999, 37.45741810262938], [-119.1851806640625, 37.45741810262938], [-119.1851806640625, 38.08268954483802], [-120.03662109374999, 38.08268954483802], [-120.03662109374999, 37.45741810262938]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-120.03662109374999, 37.45741810262938], [-119.1851806640625, 37.45741810262938], [-119.1851806640625, 38.08268954483802], [-120.03662109374999, 38.08268954483802], [-120.03662109374999, 37.45741810262938]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-112.554931640625, 33.0178760185549], [-111.588134765625, 33.0178760185549], [-111.588134765625, 33.78827853625996], [-112.554931640625, 33.78827853625996], [-112.554931640625, 33.0178760185549]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-112.87353515625, 40.51379915504413], [-111.829833984375, 40.51379915504413], [-111.829833984375, 41.28606238749825], [-112.87353515625, 41.28606238749825], [-112.87353515625, 40.51379915504413]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-108.19335937499999, 39.095962936305476], [-107.1826171875, 39.095962936305476], [-107.1826171875, 39.85915479295669], [-108.19335937499999, 39.85915479295669], [-108.19335937499999, 39.095962936305476]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-124.25537109375, 30.86451022625836], [-124.25537109375, 30.86451022625836], [-124.25537109375, 30.86451022625836], [-124.25537109375, 30.86451022625836]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-106.875, 37.142803443716836], [-105.49072265625, 37.142803443716836], [-105.49072265625, 38.18638677411551], [-106.875, 38.18638677411551], [-106.875, 37.142803443716836]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-117.31201171875001, 43.27720532212024], [-116.01562499999999, 43.27720532212024], [-116.01562499999999, 44.134913443750726], [-117.31201171875001, 44.134913443750726], [-117.31201171875001, 43.27720532212024]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-115.7080078125, 44.69989765840318], [-114.7412109375, 44.69989765840318], [-114.7412109375, 45.36758436884978], [-115.7080078125, 45.36758436884978], [-115.7080078125, 44.69989765840318]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-120.65185546875, 47.517200697839414], [-119.33349609375, 47.517200697839414], [-119.33349609375, 48.32703913063476], [-120.65185546875, 48.32703913063476], [-120.65185546875, 47.517200697839414]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-119.83886718750001, 45.69083283645816], [-118.38867187500001, 45.69083283645816], [-118.38867187500001, 46.694667307773116], [-119.83886718750001, 46.694667307773116], [-119.83886718750001, 45.69083283645816]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-107.09472656249999, 47.45780853075031], [-105.84228515625, 47.45780853075031], [-105.84228515625, 48.31242790407178], [-107.09472656249999, 48.31242790407178], [-107.09472656249999, 47.45780853075031]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-101.57958984375, 46.93526088057719], [-100.107421875, 46.93526088057719], [-100.107421875, 47.945786463687185], [-101.57958984375, 47.945786463687185], [-101.57958984375, 46.93526088057719]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-101.162109375, 44.32384807250689], [-99.7119140625, 44.32384807250689], [-99.7119140625, 45.22848059584359], [-101.162109375, 45.22848059584359], [-101.162109375, 44.32384807250689]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-100.5908203125, 41.261291493919884], [-99.25048828124999, 41.261291493919884], [-99.25048828124999, 42.114523952464246], [-100.5908203125, 42.114523952464246], [-100.5908203125, 41.261291493919884]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-97.9541015625, 37.142803443716836], [-96.65771484375, 37.142803443716836], [-96.65771484375, 38.13455657705411], [-97.9541015625, 38.13455657705411], [-97.9541015625, 37.142803443716836]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-112.78564453124999, 32.91648534731439], [-111.357421875, 32.91648534731439], [-111.357421875, 33.925129700072], [-112.78564453124999, 33.925129700072], [-112.78564453124999, 32.91648534731439]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-106.435546875, 35.15584570226544], [-105.22705078125, 35.15584570226544], [-105.22705078125, 36.13787471840729], [-106.435546875, 36.13787471840729], [-106.435546875, 35.15584570226544]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-97.3828125, 32.45415593941475], [-96.2841796875, 32.45415593941475], [-96.2841796875, 33.22949814144951], [-97.3828125, 33.22949814144951], [-97.3828125, 32.45415593941475]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-97.97607421875, 35.04798673426734], [-97.00927734375, 35.04798673426734], [-97.00927734375, 35.764343479667176], [-97.97607421875, 35.764343479667176], [-97.97607421875, 35.04798673426734]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-97.97607421875, 35.04798673426734], [-97.00927734375, 35.04798673426734], [-97.00927734375, 35.764343479667176], [-97.97607421875, 35.764343479667176], [-97.97607421875, 35.04798673426734]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-95.4052734375, 47.62097541515849], [-94.24072265625, 47.62097541515849], [-94.24072265625, 48.28319289548349], [-95.4052734375, 48.28319289548349], [-95.4052734375, 47.62097541515849]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-94.19677734375, 41.27780646738183], [-93.09814453125, 41.27780646738183], [-93.09814453125, 42.13082130188811], [-94.19677734375, 42.13082130188811], [-94.19677734375, 41.27780646738183]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-93.71337890625, 37.75334401310656], [-92.6806640625, 37.75334401310656], [-92.6806640625, 38.51378825951165], [-93.71337890625, 38.51378825951165], [-93.71337890625, 37.75334401310656]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-90.63720703125, 34.615126683462194], [-89.47265625, 34.615126683462194], [-89.47265625, 35.69299463209881], [-90.63720703125, 35.69299463209881], [-90.63720703125, 34.615126683462194]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-93.05419921875, 30.44867367928756], [-91.77978515625, 30.44867367928756], [-91.77978515625, 31.57853542647338], [-93.05419921875, 31.57853542647338], [-93.05419921875, 30.44867367928756]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-90.02197265625, 44.276671273775186], [-88.59374999999999, 44.276671273775186], [-88.59374999999999, 44.98034238084973], [-90.02197265625, 44.98034238084973], [-90.02197265625, 44.276671273775186]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-90.63720703125, 38.41055825094609], [-89.49462890625, 38.41055825094609], [-89.49462890625, 39.18117526158749], [-90.63720703125, 39.18117526158749], [-90.63720703125, 38.41055825094609]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-87.56103515625, 35.62158189955968], [-86.28662109375, 35.62158189955968], [-86.28662109375, 36.4566360115962], [-87.56103515625, 36.4566360115962], [-87.56103515625, 35.62158189955968]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-90.63720703125, 31.93351676190369], [-89.49462890625, 31.93351676190369], [-89.49462890625, 32.731840896865684], [-90.63720703125, 32.731840896865684], [-90.63720703125, 31.93351676190369]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-69.54345703125, 44.68427737181225], [-68.5107421875, 44.68427737181225], [-68.5107421875, 45.336701909968134], [-69.54345703125, 45.336701909968134], [-69.54345703125, 44.68427737181225]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-73.212890625, 41.49212083968776], [-72.35595703125, 41.49212083968776], [-72.35595703125, 42.032974332441405], [-73.212890625, 42.032974332441405], [-73.212890625, 41.49212083968776]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-77.93701171875, 38.70265930723801], [-76.97021484375, 38.70265930723801], [-76.97021484375, 39.26628442213066], [-77.93701171875, 39.26628442213066], [-77.93701171875, 38.70265930723801]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-79.25537109375, 35.44277092585766], [-78.15673828125, 35.44277092585766], [-78.15673828125, 36.13787471840729], [-79.25537109375, 36.13787471840729], [-79.25537109375, 35.44277092585766]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-81.4306640625, 33.55970664841198], [-80.44189453125, 33.55970664841198], [-80.44189453125, 34.288991865037524], [-81.4306640625, 34.288991865037524], [-81.4306640625, 33.55970664841198]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-84.90234375, 33.394759218577995], [-83.91357421875, 33.394759218577995], [-83.91357421875, 34.19817309627726], [-84.90234375, 34.19817309627726], [-84.90234375, 33.394759218577995]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-82.28759765625, 28.246327971048842], [-81.2548828125, 28.246327971048842], [-81.2548828125, 29.209713225868185], [-82.28759765625, 29.209713225868185], [-82.28759765625, 28.246327971048842]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-109.88525390624999, 42.65012181368022], [-108.56689453125, 42.65012181368022], [-108.56689453125, 43.50075243569041], [-109.88525390624999, 43.50075243569041], [-109.88525390624999, 42.65012181368022]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-117.61962890624999, 39.04478604850143], [-116.65283203124999, 39.04478604850143], [-116.65283203124999, 39.740986355883564], [-117.61962890624999, 39.740986355883564], [-117.61962890624999, 39.04478604850143]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-102.67822265625, 31.42866311735861], [-101.71142578125, 31.42866311735861], [-101.71142578125, 32.26855544621476], [-102.67822265625, 32.26855544621476], [-102.67822265625, 31.42866311735861]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-119.47631835937499, 36.03133177633187], [-118.58642578124999, 36.03133177633187], [-118.58642578124999, 36.55377524336089], [-119.47631835937499, 36.55377524336089], [-119.47631835937499, 36.03133177633187]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-116.224365234375, 33.091541548655215], [-115.56518554687499, 33.091541548655215], [-115.56518554687499, 33.568861182555565], [-116.224365234375, 33.568861182555565], [-116.224365234375, 33.091541548655215]]]}}]}
    evalPolygons = {"type": "FeatureCollection", "features": [{"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-122.13208008, 41.25126946], [-121.37402344, 41.25126946], [-121.37402344, 41.75892074], [-122.13208008, 41.75892074], [-122.13208008, 41.25126946]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-121.15979004, 38.45555349], [-120.23144531, 38.45555349], [-120.23144531, 39.14764411], [-121.15979004, 39.14764411], [-121.15979004, 38.45555349]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-111.80493164, 33.76787602], [-110.83813477, 33.76787602], [-110.83813477, 34.53827854], [-111.80493164, 34.53827854], [-111.80493164, 33.76787602]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-106.125, 37.89280344], [-104.74072266, 37.89280344], [-104.74072266, 38.93638677], [-106.125, 38.93638677], [-106.125, 37.89280344]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-119.08886719, 46.44083284], [-117.63867188, 46.44083284], [-117.63867188, 47.44466731], [-119.08886719, 47.44466731], [-119.08886719, 46.44083284]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-99.84082031, 42.01129149], [-98.50048828, 42.01129149], [-98.50048828, 42.86452395], [-99.84082031, 42.86452395], [-99.84082031, 42.01129149]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-96.6328125, 33.20415594], [-95.53417969, 33.20415594], [-95.53417969, 33.97949814], [-96.6328125, 33.97949814], [-96.6328125, 33.20415594]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-93.44677734, 42.02780647], [-92.34814453, 42.02780647], [-92.34814453, 42.8808213], [-93.44677734, 42.8808213], [-93.44677734, 42.02780647]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-89.27197266, 45.02667127], [-87.84375, 45.02667127], [-87.84375, 45.73034238], [-89.27197266, 45.73034238], [-89.27197266, 45.02667127]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-68.79345703, 45.43427737], [-67.76074219, 45.43427737], [-67.76074219, 46.08670191], [-68.79345703, 46.08670191], [-68.79345703, 45.43427737]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-80.68066406, 34.30970665], [-79.69189453, 34.30970665], [-79.69189453, 35.03899187], [-80.68066406, 35.03899187], [-80.68066406, 34.30970665]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-116.86962891, 39.79478605], [-115.90283203, 39.79478605], [-115.90283203, 40.49098636], [-116.86962891, 40.49098636], [-116.86962891, 39.79478605]]]}}]}
    
    geostore = polygons_to_geoStoreMultiPoligon([trainPolygons, evalPolygons])

In [682]:
nTrain = len(geostore.get('geojson').get('features')[0].get('geometry').get('coordinates'))
nEval = len(geostore.get('geojson').get('features')[1].get('geometry').get('coordinates'))
print('Number of training polygons:',  nTrain)
print('Number of training polygons:',  nEval)

Number of training polygons: 47
Number of training polygons: 12


In [683]:
multipolygon = Skydipper.Geometry(attributes=geostore)
multipolygon

In [684]:
multipolygon.map()

**Display Polygons**

In [685]:
# Define the URL format used for Earth Engine generated map tiles.
EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'
Map = folium.Map(location=[38., -100.], zoom_start=5)
for params in ee_collection_specifics.vizz_params(output_dataset):
    mapid = image.getMapId(params)
    folium.TileLayer(
    tiles=EE_TILES.format(**mapid),
    attr='Google Earth Engine',
    overlay=True,
    name=str(params['bands']),
 ).add_to(Map)
 

#  Convert the GeoJSONs to feature collections
trainFeatures = ee.FeatureCollection([geostore.get('geojson').get('features')[0]])
evalFeatures = ee.FeatureCollection([geostore.get('geojson').get('features')[1]])

polyImage = ee.Image(0).byte().paint(trainFeatures, 1).paint(evalFeatures, 2)
polyImage = polyImage.updateMask(polyImage)

mapid = polyImage.getMapId({'min': 1, 'max': 2, 'palette': ['red', 'blue']})
folium.TileLayer(
    tiles=EE_TILES.format(**mapid),
    attr='Google Earth Engine',
    overlay=True,
    name='training polygons',
  ).add_to(Map)
Map.add_child(folium.LayerControl())
Map

### An array of images

We have to stack the 2D images (input and output images of the Neural Network) to create a single image from which samples can be taken. Convert the image into an array image in which each pixel stores 256x256 patches of pixels for each band. This is a key step that bears emphasis: to export training patches, convert a multi-band image to [an array image](https://developers.google.com/earth-engine/arrays_array_images#array-images) using [neighborhoodToArray()](https://developers.google.com/earth-engine/api_docs#eeimageneighborhoodtoarray), then sample the image at points.

In [686]:
def image_into_array(collections, bands, init_date, end_date, scale):
    kernelSize = 256
    
    # Read dataset and image tables
    datasets = pd.read_csv('Database/dataset.csv', index_col=0)
    images = pd.read_csv('Database/image.csv', index_col=0)
    
    for n, collection in enumerate(collections):
        
        dataset_id = datasets[datasets['slug'] == collection].index[0]
        
        df = images[(images['dataset_id'] == dataset_id) & 
                (images['bands_selections'] == str(bands[n])) & 
                (images['scale'] == scale) & 
                (images['init_date'] == init_date) & 
                (images['end_date'] == end_date)
               ].copy()
        
        values = json.loads(df['bands_min_max'].iloc[0])
    
        # Create composite
        composite = ee_collection_specifics.Composite(collection)(init_date, end_date)
        
        print(composite.getInfo())
    
        # Normalize images
        if bool(values): 
            composite = normalize_ee_images(composite, collection, values)
        
        if n == 0:
            image = composite.select(bands[n])
        else:
            featureStack = ee.Image.cat([image,composite.select(bands[n])]).float()
            
    list = ee.List.repeat(1, kernelSize)
    lists = ee.List.repeat(list, kernelSize)
    kernel = ee.Kernel.fixed(kernelSize, kernelSize, lists)
    
    arrays = featureStack.neighborhoodToArray(kernel)
    
    return arrays

### Export TFRecords

The mapped data look reasonable so take a sample from each polygon and merge the results into a single export. The key step is sampling the array image at points, to get all the pixels in a 256x256 neighborhood at each point. It's worth noting that to build the training and testing data for the FCNN, you export a single TFRecord file that contains patches of pixel values in each record. You do NOT need to export each training/testing patch to a different image. Since each record potentially contains a lot of data (especially with big patches or many input bands), some manual sharding of the computation is necessary to avoid the computed value too large error. Specifically, the following code takes multiple (smaller) samples within each geometry, merging the results to get a single export.

In [687]:
def get_image_ids(collections, bands, scale, init_date, end_date):
    # Read dataset and image tables
    datasets = pd.read_csv('Database/dataset.csv', index_col=0)
    images = pd.read_csv('Database/image.csv', index_col=0)
    
    image_ids = []
    for n, collection in enumerate(collections):
        
        dataset_id = datasets[datasets['slug'] == collection].index[0]
        
        df = images[(images['dataset_id'] == dataset_id) & 
                (images['bands_selections'] == str(bands[n])) & 
                (images['scale'] == scale) & 
                (images['init_date'] == init_date) & 
                (images['end_date'] == end_date)
               ].copy()
        
        image_ids.append(df.index[0])
        
    return image_ids

In [688]:
def GeoJSONs_to_FeatureCollections(geostore):
    feature_collections = []
    for n in range(len(geostore.get('geojson').get('features'))):
        # Make a list of Features
        features = []
        for i in range(len(geostore.get('geojson').get('features')[n].get('geometry').get('coordinates'))):
            features.append(
                ee.Feature(
                    ee.Geometry.Polygon(
                        geostore.get('geojson').get('features')[n].get('geometry').get('coordinates')[i]
                    )
                )
            )
            
        # Create a FeatureCollection from the list.
        feature_collections.append(ee.FeatureCollection(features))
    return feature_collections

In [689]:
def export_TFRecords(arrays, scale, bands, image_ids, geostore_id, sample_size, feature_collections, feature_lists):
    # Export all the training/evaluation data (in many pieces), with one task per geometry.
    
    # These numbers determined experimentally.
    nShards  = int(sample_size/20) # Number of shards in each polygon.
    
    base_names = ['training_patches', 'eval_patches']
    bucket = 'geo-ai'
    folder = 'Data/'+str(image_ids[0])+'_'+ str(image_ids[1])+'/'+str(geostore_id)+'/'+str(sample_size)

    file_paths = []
    for i, feature in enumerate(feature_collections):
        for g in range(feature.size().getInfo()):
            geomSample = ee.FeatureCollection([])
            for j in range(nShards):
                sample = arrays.sample(
                    region = ee.Feature(feature_lists[i].get(g)).geometry(), 
                    scale = scale, 
                    numPixels = sample_size / nShards, # Size of the shard.
                    seed = j,
                    tileScale = 8
                )
                geomSample = geomSample.merge(sample)
                
            desc = base_names[i] + '_g' + str(g)
            
            file_paths.append(bucket+ '/' + folder + '/' + desc)
            
            task = ee.batch.Export.table.toCloudStorage(
                collection = geomSample,
                description = desc, 
                bucket = bucket, 
                fileNamePrefix = folder + '/' + desc,
                fileFormat = 'TFRecord',
                selectors = bands[0] + bands[1]
            )
            task.start()
            
    return task, file_paths

**Variables**

In [690]:
image_ids = get_image_ids(collections, bands, scale, init_date, end_date)
geostore_id = multipolygon.id
sample_size = 1000 # Total sample size in each polygon.

In [691]:
# Convert the GeoJSON to feature collections
feature_collections = GeoJSONs_to_FeatureCollections(geostore)

# Convert the feature collections to lists for iteration.
feature_lists = list(map(lambda x: x.toList(x.size()), feature_collections))

# Export all the training/evaluation data (in many pieces), with one task per geometry.
versions = pd.read_csv('Database/model_versions.csv', index_col=0)
versions.replace(np.nan, '', regex=True, inplace = True)
df = versions[['input_image_id', 'output_image_id', 'geostore_id', 'sample_size']].isin([image_ids[0], image_ids[1], geostore_id, sample_size]).copy()
if not df.all(axis=1).any() and not (versions[df.all(axis=1)]['data_status'] == 'COMPLETED').all():
    task, file_paths= export_TFRecords(arrays, scale, bands, image_ids, geostore_id, sample_size, feature_collections, feature_lists)

### Populate `model_versions` tables

In [692]:
if (versions.empty) or not df.all(axis=1).any():
    dictionary = dict(zip(list(versions.keys()), [[''], [''], [image_ids[0]], [image_ids[1]], [geostore_id], [sample_size], [''], [''], [''], [''], [''], ['']]))
    versions = versions.append(pd.DataFrame(dictionary), ignore_index = True, sort=False)
    
# Save table
versions.to_csv('Database/model_versions.csv')

**Save status**

In [694]:
import time

def check_status_data(task, file_paths):
    status_list = list(map(lambda x: str(x), task.list()[:len(file_paths)])) 
    status_list = list(map(lambda x: x[x.find("(")+1:x.find(")")], status_list))
    
    return status_list


In [695]:
if not (versions[df.all(axis=1)]['data_status'] == 'COMPLETED').all():
    status_list = check_status_data(task, file_paths)
    index = versions.index[-1]
    while not status_list == ['COMPLETED'] * len(file_paths):
        status_list = check_status_data(task, file_paths)
        
        #Save temporal status in table
        versions.at[index, 'data_status'] = json.dumps(dict(zip(file_paths, status_list)))
        versions.to_csv('Database/model_versions.csv')
        
        time.sleep(60)
    
    #Save final status in table
    versions.at[index, 'data_status'] = "COMPLETED"
    versions.to_csv('Database/model_versions.csv')

***
## Training the model in AI Platform
### Training code package setup

It's necessary to create a Python package to hold the training code.  Here we're going to get started with that by creating a folder for the package and adding an empty `__init__.py` file.

In [696]:
ROOT_PATH = 'AI_Platform/cnn_trainer'
PACKAGE_FOLDER = '/trainer'

!rm -r {ROOT_PATH}
!mkdir {ROOT_PATH}
!mkdir {ROOT_PATH+PACKAGE_FOLDER}
!touch {ROOT_PATH+PACKAGE_FOLDER}/__init__.py
!ls -l {ROOT_PATH+PACKAGE_FOLDER}

total 0
-rw-r--r--  1 ikersanchez  staff  0 Jan 24 13:11 __init__.py


**Variables**

In [697]:
# Variables for training in AI platform
bucket = 'geo-ai'
project_id = env.project_id
region = "us-central1"

trainer_package_path = 'AI_Platform/cnn_trainer/trainer/'
main_trainer_module = 'trainer.task'

model_type = 'segmentation'
model_architecture = 'deepvel'

# Training parameters
training_params = {
    "bucket": bucket,
    "base_names": ['training_patches', 'eval_patches'],
    "data_dir": 'gs://' + bucket + '/Data/' + str(image_ids[0])+'_'+ str(image_ids[1])+'/'+str(geostore_id)+'/'+str(sample_size),
    "in_bands": bands[0],
    "out_bands": bands[1],
    "kernel_size": 256,
    "train_size": sample_size*nTrain,
    "eval_size": sample_size*nEval,
    "model_type": model_type,
    "model_architecture": model_architecture,
    "output_activation": '',
    "batch_size": 16,
    "epochs": 1,
    "shuffle_size": 2000,
    "learning_rate": 1e-3,
    "loss": "mse",
    "metrics": ['RootMeanSquaredError']
}

**Populate `model` table**

In [698]:
models = pd.read_csv('Database/model.csv', index_col=0)
models.replace(np.nan, '', regex=True, inplace = True)

df = models[['model_type', 'output_image_id']].isin([model_type, image_ids[1]]).copy()
if not df.all(axis=1).any():
    dictionary = dict(zip(list(models.keys()), [[''], [model_type], [''], [image_ids[1]]]))
    models = models.append(pd.DataFrame(dictionary), ignore_index = True, sort=False)

# Save table
models.to_csv('Database/model.csv')

In [699]:
model_id = models[(models['model_type'] == model_type) & (models['output_image_id'] == image_ids[1])].index[0]

**Populate `model_versions` table**

In [702]:
def removekey(dictionary, key):
    del dictionary[key]
    return dictionary

In [703]:
versions = pd.read_csv('Database/model_versions.csv', index_col=0, dtype={'version': str})
versions.replace(np.nan, '', regex=True, inplace = True)
df = versions.copy()

# Check if the version already exists
if (df['training_params'].apply(lambda x : removekey(json.loads(x),'job_dir')) == training_params).any():
    # Get version id
    version_id = df[df['training_params'].apply(lambda x : removekey(json.loads(x),'job_dir')) == training_params].index[0]
    
    # Check status
    status = df.iloc[version_id]['training_status']
    print('Version already exists with training status equal to:', status)
    
    if status == 'SUCCEEDED':
        print('The training job successfully completed.')
    if status == 'FAILED':
        print('This version got a error while training.')
        print('Change training parameters and try again.')
    if status == 'CANCELLED':
        print('The training job was cancelled.')
        print('Start training again.')
        # Get training version
        training_version = df.iloc[version_id]['version']
        
        # Update job name
        job_name = 'job_v' + str(int(time.time()))
            
        # Add job directory
        training_params = json.loads(df.iloc[version_id]['training_params'])
        training_params['job_dir'] = 'gs://' + bucket + '/Models/' + str(model_id) + '/' + training_version + '/'
        
        # Save training version and clear status
        versions.at[version_id, 'training_params'] =  json.dumps(training_params)
        versions.at[version_id, 'training_status'] = ''
        
# Create new version  
else:
    print('Create new version')
    # New training version and job name
    training_version = str(int(time.time()))
    job_name = 'job_v' + training_version
    
    # Add job directory
    training_params['job_dir'] = 'gs://' + bucket + '/Models/' + str(model_id) + '/' + training_version + '/'
    
    df = versions[['input_image_id', 'output_image_id', 'geostore_id', 'sample_size', 'version', 'data_status']].isin(
        [image_ids[0], image_ids[1], geostore_id, sample_size, '', 'COMPLETED']).copy()
    
    # Check if untrained version already exists
    if df.all(axis=1).any():
        version_id = versions[(versions['input_image_id'] == image_ids[0]) & (versions['output_image_id'] == image_ids[1]) & 
                              (versions['geostore_id'] == geostore_id) & (versions['sample_size'] == sample_size)].index[0]
    
        versions.at[version_id, 'model_id'] = model_id
        versions.at[version_id, 'model_architecture'] = model_architecture
        versions.at[version_id, 'training_params'] = json.dumps(training_params)
        versions.at[version_id, 'version'] = training_version
        
    else:
        dictionary = dict(zip(list(versions.keys()), [[''], [''], [image_ids[0]], [image_ids[1]], [geostore_id], [sample_size], [''], [''], ['COMPLETED'], [''], [''], ['']]))
        versions = versions.append(pd.DataFrame(dictionary), ignore_index = True, sort=False)
        version_id = versions.index[-1]
        
        versions.at[version_id, 'model_id'] = model_id
        versions.at[version_id, 'model_architecture'] = model_architecture
        versions.at[version_id, 'training_params'] = json.dumps(training_params)
        versions.at[version_id, 'version'] = training_version
    
versions.to_csv('Database/model_versions.csv') 

Creating new version


**Save training parameters**

These training parameters need to be stored in a place where other code can access them.  There are a variety of ways of accomplishing that, but here we'll save it into a json file called `config.json`.

In [705]:
with open(ROOT_PATH+PACKAGE_FOLDER+'/training_params.json', 'w', encoding='utf-8') as f:
    json.dump(json.dumps(training_params), f, ensure_ascii=False, indent=4)

In [706]:
with open(ROOT_PATH+PACKAGE_FOLDER+'/training_params.json') as json_file:
    config = json.loads(json.load(json_file))
config

{'bucket': 'geo-ai',
 'base_names': ['training_patches', 'eval_patches'],
 'data_dir': 'gs://geo-ai/Data/0_1/f75559fb87f5c22deb56eb2a73aa4e12/1000',
 'in_bands': ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'ndvi', 'ndwi'],
 'out_bands': ['cropland', 'land', 'water', 'urban'],
 'kernel_size': 256,
 'train_size': 47000,
 'eval_size': 12000,
 'model_type': 'segmentation',
 'model_architecture': 'deepvel',
 'output_activation': '',
 'batch_size': 16,
 'epochs': 1,
 'shuffle_size': 2000,
 'learning_rate': 0.001,
 'loss': 'mse',
 'metrics': ['RootMeanSquaredError'],
 'job_dir': 'gs://geo-ai/Models/0/1579867953/'}

In [707]:
%%writefile {ROOT_PATH+PACKAGE_FOLDER}/config.py

config = {'bucket': 'geo-ai',
 'base_names': ['training_patches', 'eval_patches'],
 'data_dir': 'gs://geo-ai/Data/0_1/f75559fb87f5c22deb56eb2a73aa4e12/1000',
 'in_bands': ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'ndvi', 'ndwi'],
 'out_bands': ['cropland', 'land', 'water', 'urban'],
 'kernel_size': 256,
 'train_size': 47000,
 'eval_size': 12000,
 'model_type': 'segmentation',
 'model_architecture': 'deepvel',
 'output_activation': '',
 'batch_size': 16,
 'epochs': 1,
 'shuffle_size': 2000,
 'learning_rate': 0.001,
 'loss': 'mse',
 'metrics': ['RootMeanSquaredError'],
 'job_dir': 'gs://geo-ai/Models/0/1579867953/'}

Writing AI_Platform/cnn_trainer/trainer/config.py


**Training/evaluation data**

The following is code to load training/evaluation data.  Write this into `util.py`.

In [708]:
%%writefile {ROOT_PATH+PACKAGE_FOLDER}/util.py
"""Utilities to download and preprocess the data."""

import tensorflow as tf
import json

from . import config

#with open('training_params.json') as json_file:
#    config = json.loads(json.load(json_file))

def parse_function(proto):
    """The parsing function.
    Read a serialized example into the structure defined by features_dict.
    Args:
      example_proto: a serialized Example.
    Returns: 
      A dictionary of tensors, keyed by feature name.
    """
    
    # Define your tfrecord 
    features = config.config.get('in_bands') + config.config.get('out_bands')
    
    # Specify the size and shape of patches expected by the model.
    kernel_shape = [config.config.get('kernel_size'), config.config.get('kernel_size')]
    columns = [
      tf.io.FixedLenFeature(shape=kernel_shape, dtype=tf.float32) for k in features
    ]
    features_dict = dict(zip(features, columns))
    
    # Load one example
    parsed_features = tf.io.parse_single_example(proto, features_dict)

    # Convert a dictionary of tensors to a tuple of (inputs, outputs)
    inputs_list = [parsed_features.get(key) for key in features]
    stacked = tf.stack(inputs_list, axis=0)
    
    # Convert the tensors into a stack in HWC shape
    stacked = tf.transpose(stacked, [1, 2, 0])
    
    return stacked[:,:,:len(config.config.get('in_bands'))], stacked[:,:,len(config.config.get('in_bands')):]

def get_dataset(glob):
    """Get the preprocessed training dataset
    Returns: 
    A tf.data.Dataset of training data.
    """
    glob = tf.compat.v1.io.gfile.glob(glob)
    
    dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')
    dataset = dataset.map(parse_function, num_parallel_calls=5)
    
    return dataset


def get_training_dataset():
    """Get the preprocessed training dataset
    Returns: 
    A tf.data.Dataset of training data.
    """
    glob = config.config.get('data_dir') + '/' + config.config.get('base_names')[0] + '*'
    dataset = get_dataset(glob)
    dataset = dataset.shuffle(config.config.get('shuffle_size')).batch(config.config.get('batch_size')).repeat()
    return dataset

def get_evaluation_dataset():
    """Get the preprocessed evaluation dataset
    Returns: 
      A tf.data.Dataset of evaluation data.
    """
    glob = config.config.get('data_dir') + '/' + config.config.get('base_names')[1] + '*'
    dataset = get_dataset(glob)
    dataset = dataset.batch(1).repeat()
    return dataset

Writing AI_Platform/cnn_trainer/trainer/util.py


Verify that `util.py` is functioning as intended.

In [709]:
from AI_Platform.cnn_trainer.trainer import util

training_dataset = util.get_training_dataset()
training_dataset

<DatasetV1Adapter shapes: ((?, 256, 256, 9), (?, 256, 256, 1)), types: (tf.float32, tf.float32)>

**Model**

We rewrite the desired model (previously specified in `config.py`) into `model.py` file.

In [710]:
!cp ../models/{model_type}/{model_architecture+'.py'} {ROOT_PATH+PACKAGE_FOLDER}/model.py

Verify that `model.py` is functioning as intended.

In [643]:
from AI_Platform.cnn_trainer.trainer import model

model = model.create_keras_model(inputShape = (None, None, len(training_params.get('in_bands'))), nClasses = len(training_params.get('out_bands')))
print(model.summary())

Model: "deepvel"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image (InputLayer)              [(None, None, None,  0                                            
__________________________________________________________________________________________________
conv2d_42 (Conv2D)              (None, None, None, 6 5248        image[0][0]                      
__________________________________________________________________________________________________
conv2d_43 (Conv2D)              (None, None, None, 6 36928       conv2d_42[0][0]                  
__________________________________________________________________________________________________
batch_normalization_39 (BatchNo (None, None, None, 6 256         conv2d_43[0][0]                  
____________________________________________________________________________________________

**Training task**

The following will create `task.py`, which will get the training and evaluation data, train the model and save it when it's done in a Cloud Storage bucket.

In [711]:
%%writefile {ROOT_PATH+PACKAGE_FOLDER}/task.py
"""Trains a Keras model"""

import os
import json

import tensorflow as tf

from . import config
from . import util
from . import model

#with open('training_params.json') as json_file:
#    config = json.loads(json.load(json_file))
          
def train_and_evaluate():
    """Trains and evaluates the Keras model.

    Uses the Keras model defined in model.py and trains on data loaded and
    preprocessed in util.py. Saves the trained model in TensorFlow SavedModel
    format to the path defined in part by the --job-dir argument.
    """

    # Create the Keras Model
    if not config.config.get('output_activation'):
        keras_model = model.create_keras_model(inputShape = (None, None, len(config.config.get('in_bands'))), nClasses = len(config.config.get('out_bands')))
    else:
        keras_model = model.create_keras_model(inputShape = (None, None, len(config.config.get('in_bands'))), nClasses = len(config.config.get('out_bands')), output_activation = config.config.get('output_activation'))

    # Compile Keras model
    optimizer = tf.keras.optimizers.SGD(lr=config.config.get('learning_rate'))
    keras_model.compile(loss=config.config.get('loss'), optimizer=optimizer, metrics=config.config.get('metrics'))


    # Pass a tfrecord
    training_dataset = util.get_training_dataset()
    evaluation_dataset = util.get_evaluation_dataset()

    # Setup TensorBoard callback.
    tensorboard_cb = tf.keras.callbacks.TensorBoard(os.path.join(config.config.get('job_dir'), 'logs'))

    # Train model
    keras_model.fit(
        x=training_dataset,
        steps_per_epoch=int(config.config.get('train_size') / config.config.get('batch_size')),
        epochs=config.config.get('epochs'),
        validation_data=evaluation_dataset,
        validation_steps=int(config.config.get('eval_size') / config.config.get('batch_size')),
        verbose=1,
        callbacks=[tensorboard_cb])

    tf.contrib.saved_model.save_keras_model(keras_model, os.path.join(config.config.get('job_dir'), 'model'))

if __name__ == '__main__':
    tf.logging.set_verbosity('INFO')
    train_and_evaluate()

Writing AI_Platform/cnn_trainer/trainer/task.py


**Using GPUs**

AI Platform lets you run any TensorFlow training application on a GPU-enabled machine. Learn more about [using GPUs for training models in the cloud](https://cloud.google.com/ml-engine/docs/tensorflow/using-gpus#submit-job).
We define a `config.yaml` file that describes the GPU options we want. 

In [712]:
%%writefile {ROOT_PATH}/config.yaml

trainingInput:
    scaleTier: CUSTOM
    # A single NVIDIA Tesla V100 GPU
    masterType: large_model_v100

Writing AI_Platform/cnn_trainer/config.yaml


### Submit the package to AI Platform for training

**Set up your GCP project**

Enter your project ID in the cell below. Then run the cell to make sure the Cloud SDK uses the right project for all the commands in this notebook.

In [375]:
! gcloud config set project $project_id

Updated property [core/project].


Updates are available for some Cloud SDK components.  To install them,
please run:
  $ gcloud components update



To take a quick anonymous survey, run:
  $ gcloud survey



**Authenticate your GCP account**

Enter the path to your service account key as the
`GOOGLE_APPLICATION_CREDENTIALS` variable in the cell below and run the cell.

In [376]:
%env GOOGLE_APPLICATION_CREDENTIALS 'privatekey.json'

env: GOOGLE_APPLICATION_CREDENTIALS='privatekey.json'


**Submit a training job to AI Platform**

In [713]:
!gcloud ai-platform jobs submit training {job_name} \
    --job-dir {training_params.get('job_dir')} \
    --package-path {trainer_package_path} \
    --module-name {main_trainer_module} \
    --region {region} \
    --config {ROOT_PATH}/config.yaml \
    --runtime-version 1.14 \
    --python-version 3.5 

Job [job_v1579867953] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ai-platform jobs describe job_v1579867953

or continue streaming the logs with the command

  $ gcloud ai-platform jobs stream-logs job_v1579867953
jobId: job_v1579867953
state: QUEUED


**Save training status**

In [647]:
def check_status_training(job_name, project_id):
    desc = !gcloud ai-platform jobs describe {job_name} --project {project_id}
    return desc.grep('state:')[0].split(':')[1].strip()   

In [714]:
versions = pd.read_csv('Database/model_versions.csv', index_col=0, dtype={'version': str})

status = check_status_training(job_name, project_id)
while not status in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
    status = check_status_training(job_name, project_id)
    
    #Save temporal status in table
    versions.at[version_id, 'training_status'] = status
    versions.to_csv('Database/model_versions.csv')
    
    time.sleep(60)

#Save final status in table
versions.at[version_id, 'training_status'] = status
versions.to_csv('Database/model_versions.csv')

***
## Prepare the model for making predictions in Earth Engine

Before we can use the model in Earth Engine, it needs to be hosted by AI Platform.  But before we can host the model on AI Platform we need to *EEify* (a new word!) it.  The EEification process merely appends some extra operations to the input and outputs of the model in order to accomdate the interchange format between pixels from Earth Engine (float32) and inputs to AI Platform (base64).  (See [this doc](https://cloud.google.com/ml-engine/docs/online-predict#binary_data_in_prediction_input) for details.)  

**`earthengine model prepare`**

The EEification process is handled for you using the Earth Engine command `earthengine model prepare`.  To use that command, we need to specify the input and output model directories and the name of the input and output nodes in the TensorFlow computation graph.  We can do all that programmatically:

In [716]:
from tensorflow.python.tools import saved_model_utils

model_path = training_params.get('job_dir') + 'model/'

meta_graph_def = saved_model_utils.get_meta_graph_def(model_path, 'serve')
inputs = meta_graph_def.signature_def['serving_default'].inputs
outputs = meta_graph_def.signature_def['serving_default'].outputs

# Just get the first thing(s) from the serving signature def.  i.e. this
# model only has a single input and a single output.
input_name = None
for k,v in inputs.items():
    input_name = v.name
    break

output_name = None
for k,v in outputs.items():
    output_name = v.name
    break

# Make a dictionary that maps Earth Engine outputs and inputs to 
# AI Platform inputs and outputs, respectively.
import json
input_dict = "'" + json.dumps({input_name: "array"}) + "'"
output_dict = "'" + json.dumps({output_name: "prediction"}) + "'"

# Put the EEified model next to the trained model directory.
EEified_path = training_params.get('job_dir') + 'eeified/' 

# You need to set the project before using the model prepare command.
!earthengine set_project {project_id}
!earthengine model prepare --source_dir {model_path} --dest_dir {EEified_path} --input {input_dict} --output {output_dict}

# Populate models table
versions.at[version_id, 'eeified'] = True
versions.to_csv('Database/model_versions.csv')

Running command using Cloud API.  Set --no-use_cloud_api to go back to using the API

Successfully saved project id
Running command using Cloud API.  Set --no-use_cloud_api to go back to using the API

Success: model at 'gs://geo-ai/Models/0/1579867953/eeified/' is ready to be hosted in AI Platform.


**Deployed the model to AI Platform**

Before it's possible to get predictions from the trained and EEified model, it needs to be deployed on AI Platform.  The first step is to create the model.  The second step is to create a version.  See [this guide](https://cloud.google.com/ml-engine/docs/tensorflow/deploying-models) for details.  Note that models and versions can be monitored from the [AI Platform models page](http://console.cloud.google.com/ai-platform/models) of the Cloud Console. 

To ensure that the model is ready for predictions without having to warm up nodes, you can use a configuration yaml file to set the scaling type of this version to autoScaling, and, set a minimum number of nodes for the version. This will ensure there are always nodes on stand-by, however, you will be charged as long as they are running. For this example, we'll set the minNodes to 10. That means that at a minimum, 10 nodes are always up and running and waiting for predictions. The number of nodes will also scale up automatically if needed.

In [717]:
%%writefile config.yaml
autoScaling:
    minNodes: 10

Overwriting config.yaml


**Variables**

In [718]:
region = "us-central1"
version_name = 'v' + training_version

if not models.iloc[model_id]['model_name']:
    model_name = models.iloc[model_id]['model_type']+'_'+str(versions.iloc[version_id]['output_image_id'])
    models.at[model_id,'model_name'] = model_name
    
    models.to_csv('Database/model.csv')
else:
    model_name = models.iloc[model_id]['model_name']

In [719]:
print('Creating version: ' + version_name)

!gcloud ai-platform models create {model_name} 
!gcloud ai-platform versions create {version_name} \
  --model {model_name} \
  --origin {EEified_path} \
  --runtime-version=1.14 \
  --framework "TENSORFLOW" \
  --python-version=3.5

# Populate models table
versions.at[version_id, 'deployed'] = True
versions.to_csv('Database/model_versions.csv')

Creating version: v1579867953
[1;31mERROR:[0m (gcloud.ai-platform.models.create) Resource in project [skydipper-196010] is the subject of a conflict: Field: model.name Error: A model with the same name already exists.
- '@type': type.googleapis.com/google.rpc.BadRequest
  fieldViolations:
  - description: A model with the same name already exists.
    field: model.name
Creating version (this might take a few minutes)......done.                    


***
## Predict in Earth Engine

**Read tables**

In [7]:
datasets = pd.read_csv('Database/dataset.csv', index_col=0)
images = pd.read_csv('Database/image.csv', index_col=0)
models = pd.read_csv('Database/model.csv', index_col=0)
versions = pd.read_csv('Database/model_versions.csv', index_col=0, dtype = {'model_id': pd.Int64Dtype(), 'version': pd.Int64Dtype()})

**Select pre-trained models**

In [8]:
print(list(models['model_name']))

['segmentation_0_1', 'regression_2']


In [9]:
model_name = 'segmentation_0_1'
print(model_name)

segmentation_0_1


**Select versions**

In [10]:
model_id = models[models['model_name'] == model_name].index[0]
model_type = models.iloc[model_id]['model_type']

In [11]:
version_names = list(map(lambda x: int(x), list(versions[versions['model_id'] == model_id]['version'])))
print(version_names)

[1579684545, 1579867953]


In [12]:
version = version_names[1]
version_id = versions[versions['version'] == version].index[0]
version_name = 'v'+ str(version)
print(version_name)

v1579867953


**Variables**

In [13]:
version_id = versions[versions['version'] == version].index[0]
training_params =json.loads(versions[versions['version'] == version]['training_params'][version_id])
image_ids = list(versions.iloc[version_id][['input_image_id', 'output_image_id']])

In [14]:
collections = list(datasets.iloc[list(images.iloc[image_ids]['dataset_id'])]['slug'])
bands = [training_params.get('in_bands'), training_params.get('out_bands')]
scale, init_date, end_date = list(images.iloc[image_ids[0]][['scale', 'init_date', 'end_date']])
scale = float(scale)
project_id = env.project_id

In [15]:
print('Datasets: ', collections)
print('Bands: ', bands)
print('scale: ', scale)
print('init_date: ', init_date)
print('end_date: ', end_date)

Datasets:  ['Landsat-8-Surface-Reflectance', 'USDA-NASS-Cropland-Data-Layers']
Bands:  [['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'ndvi', 'ndwi'], ['cropland', 'land', 'water', 'urban']]
scale:  30.0
init_date:  2016-01-01
end_date:  2016-12-31


Select new date range

In [26]:
init_date_new = '2018-01-01'
end_date_new = '2018-12-31'

**Create a Polygon object from Geojson**

In [16]:
atts={'geojson': {'type': 'FeatureCollection',
  'features': [{'type': 'Feature',
    'properties': {},
    'geometry': {'type': 'Polygon',
     'coordinates': [[[-3.9990234375,40.17887331434696],
                      [ -3.3343505859375,40.17887331434696],
                      [-3.3343505859375,40.57849862511043],
                      [-3.9990234375,40.57849862511043],
                      [-3.9990234375,40.17887331434696]]]}}]}}


geometry = Skydipper.Geometry(attributes=atts)
geometry

In [17]:
geometry.map()

**`ee.Model.fromAiPlatformPredictor`**

There is now a trained model, prepared for serving to Earth Engine, hosted and versioned on AI Platform.  
We can now connect Earth Engine directly to the trained model for inference.  You do that with the `ee.Model.fromAiPlatformPredictor` command.
For this command to work, we need to know a lot about the model.  To connect to the model, you need to know the name and version.

**Inputs**

You need to be able to recreate the imagery on which it was trained in order to perform inference.  Specifically, you need to create an array-valued input from the scaled data and use that for input.  (Recall that the new input node is named `array`, which is convenient because the array image has one band, named `array` by default.)  The inputs will be provided as 144x144 patches (`inputTileSize`), at 30-meter resolution (`proj`), but 8 pixels will be thrown out (`inputOverlapSize`) to minimize boundary effects.

In [31]:
input_image_id = versions.iloc[version_id]['input_image_id']
    
values = json.loads(images.iloc[input_image_id]['bands_min_max'])
# Create composite
image = ee_collection_specifics.Composite(collections[0])(init_date_new, end_date_new)

# Normalize images
if bool(values): 
    image = normalize_ee_images(image, collections[0], values)

Select bands and convert them into float

In [32]:
image = image.select(bands[0]).float()
image.getInfo()

{'type': 'Image',
 'bands': [{'id': 'B1',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0]},
  {'id': 'B2',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0]},
  {'id': 'B3',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0]},
  {'id': 'B4',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0]},
  {'id': 'B5',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0]},
  {'id': 'B6',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [1.0, 0.0, 0.0, 0.0, 1.0, 0.0]},
  {'id': 'B7',
   'data_type': {'type': 'PixelType', 'precisi

**Outputs**

The output (which you also need to know).

In [33]:
# Load the trained model and use it for prediction.
model = ee.Model.fromAiPlatformPredictor(
    projectName = project_id,
    modelName = model_name,
    version = version_name,
    inputTileSize = [144, 144],
    inputOverlapSize = [8, 8],
    proj = ee.Projection('EPSG:4326').atScale(scale),
    fixInputProj = True,
    outputBands = {'prediction': {
        'type': ee.PixelType.float(),
        'dimensions': 1,
      }                  
    }
)
predictions = model.predictImage(image.toArray()).arrayFlatten([bands[1]])
predictions.getInfo()

{'type': 'Image',
 'bands': [{'id': 'cropland',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [0.00026949458523585647,
    0.0,
    0.0,
    0.0,
    0.00026949458523585647,
    0.0]},
  {'id': 'land',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [0.00026949458523585647,
    0.0,
    0.0,
    0.0,
    0.00026949458523585647,
    0.0]},
  {'id': 'water',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [0.00026949458523585647,
    0.0,
    0.0,
    0.0,
    0.00026949458523585647,
    0.0]},
  {'id': 'urban',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [0.00026949458523585647,
    0.0,
    0.0,
    0.0,
    0.00026949458523585647,
    0.0]}]}

Clip the prediction area with the polygon

In [34]:
# Clip the prediction area with the polygon
polygon = ee.Geometry.Polygon(geometry.attributes.get('geojson').get('features')[0].get('geometry').get('coordinates'))
predictions = predictions.clip(polygon)

# Get centroid
centroid = polygon.centroid().getInfo().get('coordinates')[::-1]

Segmentate image:

In [35]:
if model_type == 'segmentation':
    maxValues = predictions.reduce(ee.Reducer.max())

    predictions = predictions.addBands(maxValues)

    expression = ""
    for n, band in enumerate(bands[1]):
        expression = expression + f"(b('{band}') == b('max')) ? {str(n+1)} : "

    expression = expression + f"0"

    segmentation = predictions.expression(expression)
    predictions = predictions.addBands(segmentation.mask(segmentation).select(['constant'], ['categories']))

**Display**

Use folium to visualize the input imagery and the predictions.

In [36]:
# Define the URL format used for Earth Engine generated map tiles.
EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'

mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 1})
Map = folium.Map(location=centroid, zoom_start=11)
folium.TileLayer(
    tiles=EE_TILES.format(**mapid),
    attr='Google Earth Engine',
    overlay=True,
    name='median composite',
  ).add_to(Map)

for band in bands[1]:
    mapid = predictions.getMapId({'bands': [band], 'min': 0, 'max': 1})
    
    folium.TileLayer(
        tiles=EE_TILES.format(**mapid),
        attr='Google Earth Engine',
        overlay=True,
        name=band,
      ).add_to(Map)


if model_type == 'segmentation':
    mapid = predictions.getMapId({'bands': ['categories'], 'min': 1, 'max': len(bands[1])})
    
    folium.TileLayer(
        tiles=EE_TILES.format(**mapid),
        attr='Google Earth Engine',
        overlay=True,
        name='categories',
      ).add_to(Map)
    
Map.add_child(folium.LayerControl())
Map