# Deep Learning pipeline mockup

[Graphical representation of the pipeline](https://www.draw.io/#G1U6XDddvcjas2vglyKeVz0ouFOElzOMCH)

## Setup software libraries

In [None]:
# Import and initialize the Earth Engine library.
import ee
ee.Initialize()
ee.__version__

In [None]:
# Folium setup.
import folium
print(folium.__version__)

In [None]:
# Skydipper library.
import Skydipper
print(Skydipper.__version__)

In [None]:
# Other libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import functools
import json
import uuid
import os
import tarfile
from pprint import pprint
import env
import time
import sqlalchemy
from sqlalchemy import Column, Integer, BigInteger, Float, Text, String, Boolean, DateTime
from sqlalchemy.dialects.postgresql import JSON
from shapely.geometry import shape
from google.cloud import storage
from google.cloud.storage import blob
from googleapiclient import discovery
from googleapiclient import errors
from oauth2client.client import GoogleCredentials

***
## Database

We will create a Database to save all the attributes that we will generate all through the pipeline

In [None]:
def create_db_table(table_path, columns, dtypes):
    if not os.path.exists(table_path):
        dictionary = dict(zip(columns, dtypes))
        dtypes = np.dtype([(k, v) for k, v in dictionary.items()]) 
    
        data = np.empty(0, dtype=dtypes)
        df = pd.DataFrame(data)
    
        df.to_csv(table_path, sep=';', quotechar='\'',index=True, index_label='id')
    else:
        df = pd.read_csv(table_path, sep=';', quotechar='\'')
    
    return df

In [None]:
if not os.path.exists('Database'):
    os.makedirs('Database')
    
datasets = create_db_table('Database/dataset.csv', 
                          columns = ['slug', 'name', 'bands', 'rgb_bands', 'provider'], 
                          dtypes = [str, str, list, list, str]
                         )

images = create_db_table('Database/image.csv', 
                          columns = ['dataset_id', 'bands_selections', 'scale', 'init_date',
                                     'end_date', 'composite_method', 'bands_min_max', 'norm_type'], 
                          dtypes = [int, list, float, str, str, str, str, str]
                         )

models = create_db_table('Database/model.csv', 
                          columns = ['model_name', 'model_type', 'model_output', 'model_description', 'output_image_id'], 
                          dtypes = [str, str, str, str, int]
                        )
                         
versions = create_db_table('Database/model_versions.csv', 
                           columns = ['model_id', 'model_architecture', 'input_image_id', 'output_image_id', 'geostore_id', 'kernel_size', 'sample_size', 
                                      'training_params', 'version', 'data_status', 'training_status', 'eeified', 'deployed'], 
                           dtypes = [int, str, int, int, str, int, int, str, int, str, str, bool, bool]   
                          )

### Connecting Pandas to a Database with SQLAlchemy ([tutorial](https://hackersandslackers.com/connecting-pandas-to-a-sql-database-with-sqlalchemy/))

#### Create an engine

An `engine` is an object used to connect to databases using the information in our URI.

In [None]:
engine = sqlalchemy.create_engine('postgresql://postgres:postgres@0.0.0.0:5432/geomodels')

#### Create SQL tables from DataFrames

In [None]:
def df_to_db(df, table_name):
    if table_name == "dataset":
        df.to_sql("dataset",
                       engine,
                       if_exists='replace',
                       schema='public',
                       index=True,
                       index_label='id',
                       chunksize=500,
                       dtype={"slug": Text,
                              "name": Text,
                              "bands": Text,
                              "bands": Text,
                              "provider": Text})
    if table_name == "image":
        df.to_sql("image",
                       engine,
                       if_exists='replace',
                       schema='public',
                       index=True,
                       index_label='id',
                       chunksize=500,
                       dtype={"dataset_id ": Integer,
                              "bands_selections": Text,
                              "scale": Float,
                              "init_date": Text,
                              "end_date": Text,
                              "bands_min_max": JSON,
                              "norm_type": Text})
    
    if table_name == "model":
        df.to_sql("model",
                       engine,
                       if_exists='replace',
                       schema='public',
                       index=True,
                       index_label='id',
                       chunksize=500,
                       dtype={"model_name": Text,
                              "model_type": Text,
                              "model_output": Text,
                              "model_description": Text,
                              "output_image_id": Integer})
    
    if table_name == "model_versions":
        df.to_sql("model_versions",
                       engine,
                       if_exists='replace',
                       schema='public',
                       index=True,
                       index_label='id',
                       chunksize=500,
                       dtype={"model_id": Integer,
                              "model_architecture": Text,
                              "input_image_id": Integer,
                              "output_image_id": Integer,
                              "geostore_id": Text,
                              "kernel_size": BigInteger,
                              "sample_size": BigInteger,
                              "training_params": JSON,
                              "version": BigInteger,
                              "data_status": Text,
                              "training_status": Text,
                              "eeified": Boolean,
                              "deployed": Boolean})   

**Read DataFrames**

In [None]:
if not engine.dialect.has_table(engine, "dataset"):
    datasets = pd.read_csv('Database/dataset.csv', sep=';', quotechar='\'')
if not engine.dialect.has_table(engine, "image"):
    images = pd.read_csv('Database/image.csv', sep=';', quotechar='\'')
if not engine.dialect.has_table(engine, "model"):
    models = pd.read_csv('Database/model.csv', sep=';', quotechar='\'')
if not engine.dialect.has_table(engine, "model_versions"):
    versions = pd.read_csv('Database/model_versions.csv', sep=';', quotechar='\'')

**Save SQL tables**

In [None]:
if not engine.dialect.has_table(engine, "dataset"):
    df_to_db(datasets, "dataset")
if not engine.dialect.has_table(engine, "image"):
    df_to_db(images, "image")
if not engine.dialect.has_table(engine, "model"):
    df_to_db(models, "model")
if not engine.dialect.has_table(engine, "model_versions"):
    df_to_db(versions, "model_versions")

#### Read DataFrames from query

In [None]:
def df_from_query(table_name):
    queries = {
        "dataset": "SELECT * FROM dataset",
        "image": "SELECT * FROM image",
        "model": "SELECT * FROM model",
        "model_versions": "SELECT * FROM model_versions",
    } 
    
    try:
        if table_name in queries.keys():
            df = pd.read_sql(queries.get(table_name), con=engine).drop(columns='id')
            
        return df
    except:
        print("Table doesn't exist in database!")       

In [None]:
datasets = df_from_query('dataset')
datasets.head()

In [None]:
images = df_from_query('image')
images.head()

In [None]:
models = df_from_query('model')
models.head()

In [None]:
versions = df_from_query('model_versions')
versions.head()

**Save DataFrames**

In [None]:
def df_to_csv(df, table_name):
    table_paths = {
        "dataset": 'Database/dataset.csv',
        "image": 'Database/image.csv',
        "model": 'Database/model.csv',
        "model_versions": 'Database/model_versions.csv',
    } 
    
    try:
        if table_name in table_paths.keys():
            df.to_csv(table_paths.get(table_name),sep=';', quotechar='\'',index=True, index_label='id')
    except:
        print("Incorrect table name!")

In [None]:
df_to_csv(datasets, 'dataset')
df_to_csv(images, 'image')
df_to_csv(models, 'model')
df_to_csv(versions, 'model_versions')

***
### Skydipper datasets for Deep Learning

In [None]:
slugs_list = ["Sentinel-2-Top-of-Atmosphere-Reflectance",
              "Landsat-7-Surface-Reflectance",
              "Landsat-8-Surface-Reflectance",
              "USDA-NASS-Cropland-Data-Layers",
              "USGS-National-Land-Cover-Database",
              "Lake-Water-Quality-100m"]

In [None]:
c = Skydipper.Collection(search=' '.join(slugs_list), object_type=['dataset'], app=['skydipper'], limit=10)
c

### Earth Engine ImageCollection attributes

We define the different attributes that we will need for each Earth Engine ImageCollection all through the notebook. 

We include them in the `ee_collection_specifics.py` file:

In [None]:
%%writefile ee_collection_specifics.py

"""
Information on Earth Engine collections stored here (e.g. bands, collection ids, etc.)
"""

import ee

def ee_collections(collection):
    """
    Earth Engine image collection names
    """
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': 'COPERNICUS/S2',
        'Landsat-7-Surface-Reflectance': 'LANDSAT/LE07/C01/T1_SR',
        'Landsat-8-Surface-Reflectance': 'LANDSAT/LC08/C01/T1_SR',
        'USDA-NASS-Cropland-Data-Layers': 'USDA/NASS/CDL',
        'USGS-National-Land-Cover-Database': 'USGS/NLCD',
        'Lake-Water-Quality-100m': 'projects/vizzuality/skydipper-water-quality/LWQ-100m'
    }
    
    return dic[collection]

def ee_bands(collection):
    """
    Earth Engine band names
    """
    
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': ['B1','B2','B3','B4','B5','B6','B7','B8A','B8','B11','B12','ndvi','ndwi'],
        'Landsat-7-Surface-Reflectance': ['B1','B2','B3','B4','B5','B6','B7','ndvi','ndwi'],
        'Landsat-8-Surface-Reflectance': ['B1','B2','B3','B4','B5','B6','B7','B10','B11','ndvi','ndwi'],
        'USDA-NASS-Cropland-Data-Layers': ['landcover', 'cropland', 'land', 'water', 'urban'],
        'USGS-National-Land-Cover-Database': ['impervious'],
        'Lake-Water-Quality-100m': ['turbidity_blended_mean']
    }
    
    return dic[collection]

def ee_bands_rgb(collection):
    """
    Earth Engine rgb band names
    """
    
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': ['B4','B3','B2'],
        'Landsat-7-Surface-Reflectance': ['B3','B2','B1'],
        'Landsat-8-Surface-Reflectance': ['B4', 'B3', 'B2'],
        'USDA-NASS-Cropland-Data-Layers': ['landcover'],
        'USGS-National-Land-Cover-Database': ['impervious'],
        'Lake-Water-Quality-100m': ['turbidity_blended_mean']
    }
    
    return dic[collection]

def ee_bands_normThreshold(collection):
    """
    Normalization threshold percentage
    """
    
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': {'B1': 75,'B2': 75,'B3': 75,'B4': 75,'B5': 80,'B6': 80,'B7': 80,'B8A': 80,'B8': 80,'B11': 100,'B12': 100},
        'Landsat-7-Surface-Reflectance': {'B1': 95,'B2': 95,'B3': 95,'B4': 100,'B5': 100,'B6': 100,'B7': 100},
        'Landsat-8-Surface-Reflectance': {'B1': 90,'B2': 95,'B3': 95,'B4': 95,'B5': 100,'B6': 100,'B7': 100,'B10': 100,'B11': 100},
        'USDA-NASS-Cropland-Data-Layers': {'landcover': 100, 'cropland': 100, 'land': 100, 'water': 100, 'urban': 100},
        'USGS-National-Land-Cover-Database': {'impervious': 100},
        'Lake-Water-Quality-100m': {'turbidity_blended_mean': 100}
    }
    
    return dic[collection]

def normalize(collection):
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': True,
        'Landsat-7-Surface-Reflectance': True,
        'Landsat-8-Surface-Reflectance': True,
        'USDA-NASS-Cropland-Data-Layers': False,
        'USGS-National-Land-Cover-Database': False,
        'Lake-Water-Quality-100m': False
    }
    
    return dic[collection]

def vizz_params_rgb(collection):
    """
    Visualization parameters
    """
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': {'min':0,'max':3000, 'bands':['B4','B3','B2']},
        'Landsat-7-Surface-Reflectance': {'min':0,'max':3000, 'gamma':1.4, 'bands':['B3','B2','B1']},
        'Landsat-8-Surface-Reflectance': {'min':0,'max':3000, 'gamma':1.4, 'bands':['B4','B3','B2']},
        'USDA-NASS-Cropland-Data-Layers': {'min':0,'max':3, 'bands':['landcover']},
        'USGS-National-Land-Cover-Database': {'min': 0, 'max': 1, 'bands':['impervious']},
        'Lake-Water-Quality-100m': {'min': 0, 'max': 1, 'bands':['turbidity_blended_mean']}
    }
    
    return dic[collection]

def vizz_params(collection):
    """
    Visualization parameters
    """
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': [{'min':0,'max':1, 'bands':['B4','B3','B2']}, 
                      {'min':0,'max':1, 'bands':['B1']},
                      {'min':0,'max':1, 'bands':['B5']},
                      {'min':0,'max':1, 'bands':['B6']},
                      {'min':0,'max':1, 'bands':['B7']},
                      {'min':0,'max':1, 'bands':['B8A']},
                      {'min':0,'max':1, 'bands':['B8']},
                      {'min':0,'max':1, 'bands':['B11']},
                      {'min':0,'max':1, 'bands':['B12']},
                      {'min':0,'max':1, 'gamma':1.4, 'bands':['ndvi']},
                      {'min':0,'max':1, 'gamma':1.4, 'bands':['ndwi']}],
        'Landsat-7-Surface-Reflectance': [{'min':0,'max':1, 'gamma':1.4, 'bands':['B3','B2','B1']}, 
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B4']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B5']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B7']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B6']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['ndvi']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['ndwi']}],
        'Landsat-8-Surface-Reflectance': [{'min':0,'max':1, 'gamma':1.4, 'bands':['B4','B3','B2']}, 
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B1']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B5']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B6']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B7']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B10']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['B11']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['ndvi']},
                     {'min':0,'max':1, 'gamma':1.4, 'bands':['ndwi']}],
        'USDA-NASS-Cropland-Data-Layers': [{'min':0,'max':3, 'bands':['landcover']},
                               {'min':0,'max':1, 'bands':['cropland']},
                               {'min':0,'max':1, 'bands':['land']},
                               {'min':0,'max':1, 'bands':['water']},
                               {'min':0,'max':1, 'bands':['urban']}],
        'USGS-National-Land-Cover-Database': [{'min': 0, 'max': 1, 'bands':['impervious']}],
        'Lake-Water-Quality-100m': [{'min': 0, 'max': 1, 'bands':['turbidity_blended_mean']}],
        
    }
    
    return dic[collection]

## ------------------------- Filter datasets ------------------------- ##
## Lansat 7 Cloud Free Composite
def CloudMaskL7sr(image):
    qa = image.select('pixel_qa')
    #If the cloud bit (5) is set and the cloud confidence (7) is high
    #or the cloud shadow bit is set (3), then it's a bad pixel.
    cloud = qa.bitwiseAnd(1 << 5).And(qa.bitwiseAnd(1 << 7)).Or(qa.bitwiseAnd(1 << 3))
    #Remove edge pixels that don't occur in all bands
    mask2 = image.mask().reduce(ee.Reducer.min())
    return image.updateMask(cloud.Not()).updateMask(mask2)

def CloudFreeCompositeL7(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('LANDSAT/LE07/C01/T1_SR')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate).map(CloudMaskL7sr)

    ## Composite
    composite = collection.median()
    
    ## normDiff bands
    normDiff_band_names = ['ndvi', 'ndwi']
    for nB, normDiff_band in enumerate([['B4','B3'], ['B4','B2']]):
        image_nd = composite.normalizedDifference(normDiff_band).rename(normDiff_band_names[nB])
        composite = ee.Image.cat([composite, image_nd])
    
    return composite

## Lansat 8 Cloud Free Composite
def CloudMaskL8sr(image):
    opticalBands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
    thermalBands = ['B10', 'B11']

    cloudShadowBitMask = ee.Number(2).pow(3).int()
    cloudsBitMask = ee.Number(2).pow(5).int()
    qa = image.select('pixel_qa')
    mask1 = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(
    qa.bitwiseAnd(cloudsBitMask).eq(0))
    mask2 = image.mask().reduce('min')
    mask3 = image.select(opticalBands).gt(0).And(
            image.select(opticalBands).lt(10000)).reduce('min')
    mask = mask1.And(mask2).And(mask3)
    
    return image.updateMask(mask)

def CloudFreeCompositeL8(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate).map(CloudMaskL8sr)

    ## Composite
    composite = collection.median()
    
    ## normDiff bands
    normDiff_band_names = ['ndvi', 'ndwi']
    for nB, normDiff_band in enumerate([['B5','B4'], ['B5','B3']]):
        image_nd = composite.normalizedDifference(normDiff_band).rename(normDiff_band_names[nB])
        composite = ee.Image.cat([composite, image_nd])
    
    return composite

## Sentinel 2 Cloud Free Composite
def CloudMaskS2(image):
    """
    European Space Agency (ESA) clouds from 'QA60', i.e. Quality Assessment band at 60m
    parsed by Nick Clinton
    """
    AerosolsBands = ['B1']
    VIBands = ['B2', 'B3', 'B4']
    RedBands = ['B5', 'B6', 'B7', 'B8A']
    NIRBands = ['B8']
    SWIRBands = ['B11', 'B12']

    qa = image.select('QA60')

    # Bits 10 and 11 are clouds and cirrus, respectively.
    cloudBitMask = int(2**10)
    cirrusBitMask = int(2**11)

    # Both flags set to zero indicates clear conditions.
    mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(\
            qa.bitwiseAnd(cirrusBitMask).eq(0))

    return image.updateMask(mask)

def CloudFreeCompositeS2(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('COPERNICUS/S2')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate)\
            .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\
            .map(CloudMaskS2)

    ## Composite
    composite = collection.median()
    
    ## normDiff bands
    normDiff_band_names = ['ndvi', 'ndwi']
    for nB, normDiff_band in enumerate([['B8','B4'], ['B8','B3']]):
        image_nd = composite.normalizedDifference(normDiff_band).rename(normDiff_band_names[nB])
        composite = ee.Image.cat([composite, image_nd])
    
    return composite

## Cropland Data Layers
def CroplandData(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('USDA/NASS/CDL')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate)

    ## First image
    image = ee.Image(collection.first())
    
    ## Change classes
    land = ['65', '131', '141', '142', '143', '152', '176', '87', '190', '195']
    water = ['83', '92', '111']
    urban = ['82', '121', '122', '123', '124']
    
    classes = []
    for n, i in enumerate([land,water,urban]):
        a = ''
        for m, j in enumerate(i):
            if m < len(i)-1:
                a = a + 'crop == '+ j + ' || '
            else: 
                a = a + 'crop == '+ j
        classes.append('('+a+') * '+str(n+1))
    classes = ' + '.join(classes)
    
    image = image.expression(classes, {'crop': image.select(['cropland'])})
    
    image =image.rename('landcover')
    
    # Split image into 1 band per class
    names = ['cropland', 'land', 'water', 'urban']
    mask = image
    for i, name in enumerate(names):
        image = ee.Image.cat([image, mask.eq(i).rename(name)])
     
    return image

## National Land Cover Database
def ImperviousData(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('USGS/NLCD')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate)

    ## First image
    image = ee.Image(collection.first())
    
    ## Select impervious band
    image = image.select('impervious')
    
    ## Normalize to 1
    image = image.divide(100).float()
    
    return image

def WaterQuality(startDate, stopDate):
    ## Define your collection
    collection = ee.ImageCollection('projects/vizzuality/skydipper-water-quality/LWQ-100m')

    ## Filter 
    collection = collection.filterDate(startDate,stopDate)

    ## First image
    image = ee.Image(collection.first())
    
    ## Select impervious band
    image = image.select('turbidity_blended_mean')
    
    return image

## ------------------------------------------------------------------- ##

def Composite(collection):
    dic = {
        'Sentinel-2-Top-of-Atmosphere-Reflectance': CloudFreeCompositeS2,
        'Landsat-7-Surface-Reflectance': CloudFreeCompositeL7,
        'Landsat-8-Surface-Reflectance': CloudFreeCompositeL8,
        'USDA-NASS-Cropland-Data-Layers': CroplandData,
        'USGS-National-Land-Cover-Database': ImperviousData,
        'Lake-Water-Quality-100m': WaterQuality
    }
    
    return dic[collection]

In [None]:
import ee_collection_specifics

### Populate `dataset` table

In [None]:
# Read table
datasets = df_from_query('dataset')

for collection in slugs_list:

    ds = Skydipper.Dataset(id_hash=collection)
    name = ds.attributes.get('name')
    provider = ds.attributes.get('provider')

    bands = [str(ee_collection_specifics.ee_bands(collection))]
    rgb_bands = [str(ee_collection_specifics.ee_bands_rgb(collection))]


    dictionary = dict(zip(list(datasets.keys()), [collection, name, bands, rgb_bands, provider]))
    
    if (datasets['slug'] == collection).any():
        datasets = datasets
    else:
        datasets = datasets.append(pd.DataFrame(dictionary), ignore_index = True)
        
        # Save table
        df_to_csv(datasets, "dataset")
        df_to_db(datasets, "dataset")
    
datasets

***
## Composite images
**Variables**

In [None]:
input_dataset = 'Sentinel-2-Top-of-Atmosphere-Reflectance'
output_dataset = 'Lake-Water-Quality-100m'
init_date = '2019-01-21'
end_date = '2019-01-31'
collections = [input_dataset, output_dataset]

**Display composite**

In [None]:
# Define the URL format used for Earth Engine generated map tiles.
EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'

composites = []
Map = folium.Map(location=[39.31, 0.302])
for n, collection in enumerate(collections):
    composites.append(ee_collection_specifics.Composite(collection)(init_date, end_date))
    
    mapid = composites[n].getMapId(ee_collection_specifics.vizz_params_rgb(collection))
    tiles_url = EE_TILES.format(**mapid)
    folium.TileLayer(
    tiles=tiles_url,
    attr='Google Earth Engine',
    overlay=True,
    name=str(ee_collection_specifics.ee_bands_rgb(collection))).add_to(Map)
    
Map.add_child(folium.LayerControl())
Map

***
## Geostore

We select the areas from which we will export the training data.

**Variables**

In [None]:
def polygons_to_geoStoreMultiPoligon(Polygons):
    Polygons = list(filter(None, Polygons))
    MultiPoligon = {}
    properties = ["training", "validation", "test"]
    features = []
    for n, polygons in enumerate(Polygons):
        multipoligon = []
        for polygon in polygons.get('features'):
            multipoligon.append(polygon.get('geometry').get('coordinates'))
            
        features.append({
            "type": "Feature",
            "properties": {"name": properties[n]},
            "geometry": {
                "type": "MultiPolygon",
                "coordinates":  multipoligon
            }
        }
        ) 
        
    MultiPoligon = {
        "geojson": {
            "type": "FeatureCollection", 
            "features": features
        }
    }

    return MultiPoligon

In [None]:
if collections[1] == 'USGS-National-Land-Cover-Database':
    trainPolygons = {"type":"FeatureCollection","features":[{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-123.22265625000001,45.213003555993964],[-122.03613281249999,45.213003555993964],[-122.03613281249999,46.164614496897094],[-123.22265625000001,46.164614496897094],[-123.22265625000001,45.213003555993964]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-122.1240234375,38.16911413556086],[-120.76171875,38.16911413556086],[-120.76171875,39.13006024213511],[-122.1240234375,39.13006024213511],[-122.1240234375,38.16911413556086]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-119.70703125,34.77771580360469],[-118.3447265625,34.77771580360469],[-118.3447265625,35.92464453144099],[-119.70703125,35.92464453144099],[-119.70703125,34.77771580360469]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-115.97167968750001,35.496456056584165],[-114.521484375,35.496456056584165],[-114.521484375,36.73888412439431],[-115.97167968750001,36.73888412439431],[-115.97167968750001,35.496456056584165]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-118.21289062499999,33.797408767572485],[-116.23535156249999,33.797408767572485],[-116.23535156249999,34.379712580462204],[-118.21289062499999,34.379712580462204],[-118.21289062499999,33.797408767572485]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-112.6318359375,33.02708758002874],[-111.4013671875,33.02708758002874],[-111.4013671875,34.016241889667015],[-112.6318359375,34.016241889667015],[-112.6318359375,33.02708758002874]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-105.6005859375,39.40224434029275],[-104.5458984375,39.40224434029275],[-104.5458984375,40.44694705960048],[-105.6005859375,40.44694705960048],[-105.6005859375,39.40224434029275]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-112.67578124999999,40.27952566881291],[-111.4453125,40.27952566881291],[-111.4453125,41.21172151054787],[-112.67578124999999,41.21172151054787],[-112.67578124999999,40.27952566881291]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-97.734375,32.21280106801518],[-95.9326171875,32.21280106801518],[-95.9326171875,33.32134852669881],[-97.734375,33.32134852669881],[-97.734375,32.21280106801518]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-99.36035156249999,29.036960648558267],[-97.822265625,29.036960648558267],[-97.822265625,30.031055426540206],[-99.36035156249999,30.031055426540206],[-99.36035156249999,29.036960648558267]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-95.185546875,38.61687046392973],[-93.9990234375,38.61687046392973],[-93.9990234375,39.639537564366684],[-95.185546875,39.639537564366684],[-95.185546875,38.61687046392973]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-91.2744140625,38.30718056188316],[-89.6484375,38.30718056188316],[-89.6484375,39.16414104768742],[-91.2744140625,39.16414104768742],[-91.2744140625,38.30718056188316]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-88.330078125,41.343824581185686],[-86.8798828125,41.343824581185686],[-86.8798828125,42.391008609205045],[-88.330078125,42.391008609205045],[-88.330078125,41.343824581185686]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-93.91113281249999,44.49650533109348],[-92.5048828125,44.49650533109348],[-92.5048828125,45.583289756006316],[-93.91113281249999,45.583289756006316],[-93.91113281249999,44.49650533109348]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-81.38671875,34.813803317113155],[-80.2880859375,34.813803317113155],[-80.2880859375,35.782170703266075],[-81.38671875,35.782170703266075],[-81.38671875,34.813803317113155]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-85.0341796875,33.17434155100208],[-83.7158203125,33.17434155100208],[-83.7158203125,34.27083595165],[-85.0341796875,34.27083595165],[-85.0341796875,33.17434155100208]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-87.2314453125,35.60371874069731],[-86.17675781249999,35.60371874069731],[-86.17675781249999,36.63316209558658],[-87.2314453125,36.63316209558658],[-87.2314453125,35.60371874069731]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-87.14355468749999,32.91648534731439],[-86.2646484375,32.91648534731439],[-86.2646484375,33.97980872872457],[-87.14355468749999,33.97980872872457],[-87.14355468749999,32.91648534731439]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-81.9140625,27.566721430409707],[-81.03515625,27.566721430409707],[-81.03515625,28.844673680771795],[-81.9140625,28.844673680771795],[-81.9140625,27.566721430409707]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-84.7705078125,38.92522904714054],[-83.75976562499999,38.92522904714054],[-83.75976562499999,40.17887331434696],[-84.7705078125,40.17887331434696],[-84.7705078125,38.92522904714054]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-80.947265625,40.27952566881291],[-79.98046875,40.27952566881291],[-79.98046875,41.178653972331674],[-80.947265625,41.178653972331674],[-80.947265625,40.27952566881291]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-75.2783203125,40.613952441166596],[-73.8720703125,40.613952441166596],[-73.8720703125,41.21172151054787],[-75.2783203125,41.21172151054787],[-75.2783203125,40.613952441166596]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-78.0908203125,38.44498466889473],[-76.728515625,38.44498466889473],[-76.728515625,39.33429742980725],[-78.0908203125,39.33429742980725],[-78.0908203125,38.44498466889473]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-112.6318359375,46.164614496897094],[-111.4453125,46.164614496897094],[-111.4453125,46.86019101567027],[-112.6318359375,46.86019101567027],[-112.6318359375,46.164614496897094]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-117.1142578125,43.229195113965005],[-115.57617187499999,43.229195113965005],[-115.57617187499999,44.08758502824516],[-117.1142578125,44.08758502824516],[-117.1142578125,43.229195113965005]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-96.328125,35.746512259918504],[-95.2734375,35.746512259918504],[-95.2734375,36.4566360115962],[-96.328125,36.4566360115962],[-96.328125,35.746512259918504]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-98.173828125,35.02999636902566],[-96.9873046875,35.02999636902566],[-96.9873046875,35.817813158696616],[-98.173828125,35.817813158696616],[-98.173828125,35.02999636902566]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-92.6806640625,34.379712580462204],[-91.7578125,34.379712580462204],[-91.7578125,35.10193405724606],[-92.6806640625,35.10193405724606],[-92.6806640625,34.379712580462204]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-90.7470703125,34.63320791137959],[-89.3408203125,34.63320791137959],[-89.3408203125,35.71083783530009],[-90.7470703125,35.71083783530009],[-90.7470703125,34.63320791137959]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-107.314453125,34.74161249883172],[-106.12792968749999,34.74161249883172],[-106.12792968749999,35.60371874069731],[-107.314453125,35.60371874069731],[-107.314453125,34.74161249883172]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-94.3505859375,41.1455697310095],[-92.94433593749999,41.1455697310095],[-92.94433593749999,42.19596877629178],[-94.3505859375,42.19596877629178],[-94.3505859375,41.1455697310095]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-85.869140625,40.68063802521456],[-84.5947265625,40.68063802521456],[-84.5947265625,41.64007838467894],[-85.869140625,41.64007838467894],[-85.869140625,40.68063802521456]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-87.099609375,39.30029918615029],[-85.6494140625,39.30029918615029],[-85.6494140625,40.245991504199026],[-87.099609375,40.245991504199026],[-87.099609375,39.30029918615029]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-117.7734375,47.30903424774781],[-116.103515625,47.30903424774781],[-116.103515625,48.1367666796927],[-117.7734375,48.1367666796927],[-117.7734375,47.30903424774781]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-97.91015624999999,37.3002752813443],[-96.8115234375,37.3002752813443],[-96.8115234375,38.09998264736481],[-97.91015624999999,38.09998264736481],[-97.91015624999999,37.3002752813443]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-94.06494140625,32.25926542645933],[-93.4716796875,32.25926542645933],[-93.4716796875,32.7872745269555],[-94.06494140625,32.7872745269555],[-94.06494140625,32.25926542645933]]]}}]}  
    validPolygons = {"type":"FeatureCollection","features":[{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-95.888671875,29.38217507514529],[-95.06469726562499,29.38217507514529],[-95.06469726562499,30.12612436422458],[-95.888671875,30.12612436422458],[-95.888671875,29.38217507514529]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-83.84765625,42.374778361114195],[-82.94677734375,42.374778361114195],[-82.94677734375,42.78733853171998],[-83.84765625,42.78733853171998],[-83.84765625,42.374778361114195]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-96.88568115234375,40.69521661351714],[-95.77606201171875,40.69521661351714],[-95.77606201171875,41.393294288784865],[-96.88568115234375,41.393294288784865],[-96.88568115234375,40.69521661351714]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-105.05126953124999,38.57393751557591],[-104.490966796875,38.57393751557591],[-104.490966796875,39.0831721934762],[-105.05126953124999,39.0831721934762],[-105.05126953124999,38.57393751557591]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-122.62390136718749,46.95776134668866],[-121.84936523437499,46.95776134668866],[-121.84936523437499,48.04136507445029],[-122.62390136718749,48.04136507445029],[-122.62390136718749,46.95776134668866]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-120.157470703125,36.465471886798134],[-119.24560546875001,36.465471886798134],[-119.24560546875001,37.03763967977139],[-120.157470703125,37.03763967977139],[-120.157470703125,36.465471886798134]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-120.02563476562501,39.33854604847979],[-119.55871582031251,39.33854604847979],[-119.55871582031251,39.7240885773337],[-120.02563476562501,39.7240885773337],[-120.02563476562501,39.33854604847979]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-86.30859375,37.61423141542417],[-84.9462890625,37.61423141542417],[-84.9462890625,38.65119833229951],[-86.30859375,38.65119833229951],[-86.30859375,37.61423141542417]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-78.31054687499999,36.914764288955936],[-76.86035156249999,36.914764288955936],[-76.86035156249999,38.03078569382294],[-78.31054687499999,38.03078569382294],[-78.31054687499999,36.914764288955936]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-102.87597656249999,31.541089879585808],[-101.4697265625,31.541089879585808],[-101.4697265625,32.24997445586331],[-102.87597656249999,32.24997445586331],[-102.87597656249999,31.541089879585808]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-83.5400390625,39.50404070558415],[-82.177734375,39.50404070558415],[-82.177734375,40.54720023441049],[-83.5400390625,40.54720023441049],[-83.5400390625,39.50404070558415]]]}}]}
    testPolygons = None
    
    geostore = polygons_to_geoStoreMultiPoligon([trainPolygons, validPolygons, testPolygons])
    
if collections[1] == 'USDA-NASS-Cropland-Data-Layers':
    trainPolygons = {"type": "FeatureCollection", "features": [{"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-122.882080078125, 40.50126945841645], [-122.1240234375, 40.50126945841645], [-122.1240234375, 41.008920735004885], [-122.882080078125, 41.008920735004885], [-122.882080078125, 40.50126945841645]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-122.2283935546875, 39.00637903337455], [-121.607666015625, 39.00637903337455], [-121.607666015625, 39.46588451142044], [-122.2283935546875, 39.46588451142044], [-122.2283935546875, 39.00637903337455]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-120.355224609375, 38.77978137804918], [-119.608154296875, 38.77978137804918], [-119.608154296875, 39.342794408952365], [-120.355224609375, 39.342794408952365], [-120.355224609375, 38.77978137804918]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-121.90979003906249, 37.70555348721583], [-120.9814453125, 37.70555348721583], [-120.9814453125, 38.39764411353178], [-121.90979003906249, 38.39764411353178], [-121.90979003906249, 37.70555348721583]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-120.03662109374999, 37.45741810262938], [-119.1851806640625, 37.45741810262938], [-119.1851806640625, 38.08268954483802], [-120.03662109374999, 38.08268954483802], [-120.03662109374999, 37.45741810262938]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-120.03662109374999, 37.45741810262938], [-119.1851806640625, 37.45741810262938], [-119.1851806640625, 38.08268954483802], [-120.03662109374999, 38.08268954483802], [-120.03662109374999, 37.45741810262938]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-120.03662109374999, 37.45741810262938], [-119.1851806640625, 37.45741810262938], [-119.1851806640625, 38.08268954483802], [-120.03662109374999, 38.08268954483802], [-120.03662109374999, 37.45741810262938]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-112.554931640625, 33.0178760185549], [-111.588134765625, 33.0178760185549], [-111.588134765625, 33.78827853625996], [-112.554931640625, 33.78827853625996], [-112.554931640625, 33.0178760185549]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-112.87353515625, 40.51379915504413], [-111.829833984375, 40.51379915504413], [-111.829833984375, 41.28606238749825], [-112.87353515625, 41.28606238749825], [-112.87353515625, 40.51379915504413]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-108.19335937499999, 39.095962936305476], [-107.1826171875, 39.095962936305476], [-107.1826171875, 39.85915479295669], [-108.19335937499999, 39.85915479295669], [-108.19335937499999, 39.095962936305476]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-124.25537109375, 30.86451022625836], [-124.25537109375, 30.86451022625836], [-124.25537109375, 30.86451022625836], [-124.25537109375, 30.86451022625836]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-106.875, 37.142803443716836], [-105.49072265625, 37.142803443716836], [-105.49072265625, 38.18638677411551], [-106.875, 38.18638677411551], [-106.875, 37.142803443716836]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-117.31201171875001, 43.27720532212024], [-116.01562499999999, 43.27720532212024], [-116.01562499999999, 44.134913443750726], [-117.31201171875001, 44.134913443750726], [-117.31201171875001, 43.27720532212024]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-115.7080078125, 44.69989765840318], [-114.7412109375, 44.69989765840318], [-114.7412109375, 45.36758436884978], [-115.7080078125, 45.36758436884978], [-115.7080078125, 44.69989765840318]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-120.65185546875, 47.517200697839414], [-119.33349609375, 47.517200697839414], [-119.33349609375, 48.32703913063476], [-120.65185546875, 48.32703913063476], [-120.65185546875, 47.517200697839414]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-119.83886718750001, 45.69083283645816], [-118.38867187500001, 45.69083283645816], [-118.38867187500001, 46.694667307773116], [-119.83886718750001, 46.694667307773116], [-119.83886718750001, 45.69083283645816]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-107.09472656249999, 47.45780853075031], [-105.84228515625, 47.45780853075031], [-105.84228515625, 48.31242790407178], [-107.09472656249999, 48.31242790407178], [-107.09472656249999, 47.45780853075031]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-101.57958984375, 46.93526088057719], [-100.107421875, 46.93526088057719], [-100.107421875, 47.945786463687185], [-101.57958984375, 47.945786463687185], [-101.57958984375, 46.93526088057719]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-101.162109375, 44.32384807250689], [-99.7119140625, 44.32384807250689], [-99.7119140625, 45.22848059584359], [-101.162109375, 45.22848059584359], [-101.162109375, 44.32384807250689]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-100.5908203125, 41.261291493919884], [-99.25048828124999, 41.261291493919884], [-99.25048828124999, 42.114523952464246], [-100.5908203125, 42.114523952464246], [-100.5908203125, 41.261291493919884]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-97.9541015625, 37.142803443716836], [-96.65771484375, 37.142803443716836], [-96.65771484375, 38.13455657705411], [-97.9541015625, 38.13455657705411], [-97.9541015625, 37.142803443716836]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-112.78564453124999, 32.91648534731439], [-111.357421875, 32.91648534731439], [-111.357421875, 33.925129700072], [-112.78564453124999, 33.925129700072], [-112.78564453124999, 32.91648534731439]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-106.435546875, 35.15584570226544], [-105.22705078125, 35.15584570226544], [-105.22705078125, 36.13787471840729], [-106.435546875, 36.13787471840729], [-106.435546875, 35.15584570226544]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-97.3828125, 32.45415593941475], [-96.2841796875, 32.45415593941475], [-96.2841796875, 33.22949814144951], [-97.3828125, 33.22949814144951], [-97.3828125, 32.45415593941475]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-97.97607421875, 35.04798673426734], [-97.00927734375, 35.04798673426734], [-97.00927734375, 35.764343479667176], [-97.97607421875, 35.764343479667176], [-97.97607421875, 35.04798673426734]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-97.97607421875, 35.04798673426734], [-97.00927734375, 35.04798673426734], [-97.00927734375, 35.764343479667176], [-97.97607421875, 35.764343479667176], [-97.97607421875, 35.04798673426734]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-95.4052734375, 47.62097541515849], [-94.24072265625, 47.62097541515849], [-94.24072265625, 48.28319289548349], [-95.4052734375, 48.28319289548349], [-95.4052734375, 47.62097541515849]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-94.19677734375, 41.27780646738183], [-93.09814453125, 41.27780646738183], [-93.09814453125, 42.13082130188811], [-94.19677734375, 42.13082130188811], [-94.19677734375, 41.27780646738183]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-93.71337890625, 37.75334401310656], [-92.6806640625, 37.75334401310656], [-92.6806640625, 38.51378825951165], [-93.71337890625, 38.51378825951165], [-93.71337890625, 37.75334401310656]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-90.63720703125, 34.615126683462194], [-89.47265625, 34.615126683462194], [-89.47265625, 35.69299463209881], [-90.63720703125, 35.69299463209881], [-90.63720703125, 34.615126683462194]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-93.05419921875, 30.44867367928756], [-91.77978515625, 30.44867367928756], [-91.77978515625, 31.57853542647338], [-93.05419921875, 31.57853542647338], [-93.05419921875, 30.44867367928756]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-90.02197265625, 44.276671273775186], [-88.59374999999999, 44.276671273775186], [-88.59374999999999, 44.98034238084973], [-90.02197265625, 44.98034238084973], [-90.02197265625, 44.276671273775186]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-90.63720703125, 38.41055825094609], [-89.49462890625, 38.41055825094609], [-89.49462890625, 39.18117526158749], [-90.63720703125, 39.18117526158749], [-90.63720703125, 38.41055825094609]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-87.56103515625, 35.62158189955968], [-86.28662109375, 35.62158189955968], [-86.28662109375, 36.4566360115962], [-87.56103515625, 36.4566360115962], [-87.56103515625, 35.62158189955968]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-90.63720703125, 31.93351676190369], [-89.49462890625, 31.93351676190369], [-89.49462890625, 32.731840896865684], [-90.63720703125, 32.731840896865684], [-90.63720703125, 31.93351676190369]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-69.54345703125, 44.68427737181225], [-68.5107421875, 44.68427737181225], [-68.5107421875, 45.336701909968134], [-69.54345703125, 45.336701909968134], [-69.54345703125, 44.68427737181225]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-73.212890625, 41.49212083968776], [-72.35595703125, 41.49212083968776], [-72.35595703125, 42.032974332441405], [-73.212890625, 42.032974332441405], [-73.212890625, 41.49212083968776]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-77.93701171875, 38.70265930723801], [-76.97021484375, 38.70265930723801], [-76.97021484375, 39.26628442213066], [-77.93701171875, 39.26628442213066], [-77.93701171875, 38.70265930723801]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-79.25537109375, 35.44277092585766], [-78.15673828125, 35.44277092585766], [-78.15673828125, 36.13787471840729], [-79.25537109375, 36.13787471840729], [-79.25537109375, 35.44277092585766]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-81.4306640625, 33.55970664841198], [-80.44189453125, 33.55970664841198], [-80.44189453125, 34.288991865037524], [-81.4306640625, 34.288991865037524], [-81.4306640625, 33.55970664841198]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-84.90234375, 33.394759218577995], [-83.91357421875, 33.394759218577995], [-83.91357421875, 34.19817309627726], [-84.90234375, 34.19817309627726], [-84.90234375, 33.394759218577995]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-82.28759765625, 28.246327971048842], [-81.2548828125, 28.246327971048842], [-81.2548828125, 29.209713225868185], [-82.28759765625, 29.209713225868185], [-82.28759765625, 28.246327971048842]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-109.88525390624999, 42.65012181368022], [-108.56689453125, 42.65012181368022], [-108.56689453125, 43.50075243569041], [-109.88525390624999, 43.50075243569041], [-109.88525390624999, 42.65012181368022]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-117.61962890624999, 39.04478604850143], [-116.65283203124999, 39.04478604850143], [-116.65283203124999, 39.740986355883564], [-117.61962890624999, 39.740986355883564], [-117.61962890624999, 39.04478604850143]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-102.67822265625, 31.42866311735861], [-101.71142578125, 31.42866311735861], [-101.71142578125, 32.26855544621476], [-102.67822265625, 32.26855544621476], [-102.67822265625, 31.42866311735861]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-119.47631835937499, 36.03133177633187], [-118.58642578124999, 36.03133177633187], [-118.58642578124999, 36.55377524336089], [-119.47631835937499, 36.55377524336089], [-119.47631835937499, 36.03133177633187]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-116.224365234375, 33.091541548655215], [-115.56518554687499, 33.091541548655215], [-115.56518554687499, 33.568861182555565], [-116.224365234375, 33.568861182555565], [-116.224365234375, 33.091541548655215]]]}}]}
    validPolygons = {"type": "FeatureCollection", "features": [{"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-122.13208008, 41.25126946], [-121.37402344, 41.25126946], [-121.37402344, 41.75892074], [-122.13208008, 41.75892074], [-122.13208008, 41.25126946]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-121.15979004, 38.45555349], [-120.23144531, 38.45555349], [-120.23144531, 39.14764411], [-121.15979004, 39.14764411], [-121.15979004, 38.45555349]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-111.80493164, 33.76787602], [-110.83813477, 33.76787602], [-110.83813477, 34.53827854], [-111.80493164, 34.53827854], [-111.80493164, 33.76787602]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-106.125, 37.89280344], [-104.74072266, 37.89280344], [-104.74072266, 38.93638677], [-106.125, 38.93638677], [-106.125, 37.89280344]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-119.08886719, 46.44083284], [-117.63867188, 46.44083284], [-117.63867188, 47.44466731], [-119.08886719, 47.44466731], [-119.08886719, 46.44083284]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-99.84082031, 42.01129149], [-98.50048828, 42.01129149], [-98.50048828, 42.86452395], [-99.84082031, 42.86452395], [-99.84082031, 42.01129149]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-96.6328125, 33.20415594], [-95.53417969, 33.20415594], [-95.53417969, 33.97949814], [-96.6328125, 33.97949814], [-96.6328125, 33.20415594]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-93.44677734, 42.02780647], [-92.34814453, 42.02780647], [-92.34814453, 42.8808213], [-93.44677734, 42.8808213], [-93.44677734, 42.02780647]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-89.27197266, 45.02667127], [-87.84375, 45.02667127], [-87.84375, 45.73034238], [-89.27197266, 45.73034238], [-89.27197266, 45.02667127]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-68.79345703, 45.43427737], [-67.76074219, 45.43427737], [-67.76074219, 46.08670191], [-68.79345703, 46.08670191], [-68.79345703, 45.43427737]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-80.68066406, 34.30970665], [-79.69189453, 34.30970665], [-79.69189453, 35.03899187], [-80.68066406, 35.03899187], [-80.68066406, 34.30970665]]]}}, {"type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-116.86962891, 39.79478605], [-115.90283203, 39.79478605], [-115.90283203, 40.49098636], [-116.86962891, 40.49098636], [-116.86962891, 39.79478605]]]}}]}
    testPolygons = None
    
    geostore = polygons_to_geoStoreMultiPoligon([trainPolygons, validPolygons, testPolygons])
    
if collections[1] == 'Lake-Water-Quality-100m':
    trainPolygons = {"type":"FeatureCollection","features":[{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-0.406494140625,38.64476310916202],[0.27740478515625,38.64476310916202],[0.27740478515625,39.74521015328692],[-0.406494140625,39.74521015328692],[-0.406494140625,38.64476310916202]]]}},{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-1.70013427734375,35.15135442846945],[-0.703125,35.15135442846945],[-0.703125,35.94688293218141],[-1.70013427734375,35.94688293218141],[-1.70013427734375,35.15135442846945]]]}}]}
    validPolygons = None
    testPolygons = None
    
    geostore = polygons_to_geoStoreMultiPoligon([trainPolygons, validPolygons, testPolygons])

In [None]:
nFeatures = len(geostore.get('geojson').get('features'))

nPolygons = {}
for n in range(nFeatures):
    multipoly_type = geostore.get('geojson').get('features')[n].get('properties').get('name')
    nPolygons[multipoly_type] = len(geostore.get('geojson').get('features')[n].get('geometry').get('coordinates'))
    
for multipoly_type in nPolygons.keys():
    print(f'Number of {multipoly_type} polygons:', nPolygons[multipoly_type])

In [None]:
multipolygon = Skydipper.Geometry(attributes=geostore)
multipolygon

In [None]:
multipolygon.map()

**Display Polygons**

In [None]:
def get_geojson_string(geom):
    coords = geom.get('coordinates', None)
    if coords and not any(isinstance(i, list) for i in coords[0]):
        geom['coordinates'] = [coords]
    feat_col = {"type": "FeatureCollection", "features": [{"type": "Feature", "properties": {}, "geometry": geom}]}
    return json.dumps(feat_col)

In [None]:
# Define the URL format used for Earth Engine generated map tiles.
EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'

features = multipolygon.attributes['geojson']['features']
if len(features) > 0:
    shapely_geometry = [shape(feature['geometry']) for feature in features]
else:
    shapely_geometry = None
    
centroid = list(shapely_geometry[0].centroid.coords)[0][::-1]

bbox = multipolygon.attributes['bbox']
bounds = [bbox[2:][::-1], bbox[:2][::-1]]       
        
Map = folium.Map(location=centroid, zoom_start=6)
Map.fit_bounds(bounds)
for params in ee_collection_specifics.vizz_params(output_dataset):
    mapid = composites[1].getMapId(params)
    folium.TileLayer(
    tiles=EE_TILES.format(**mapid),
    attr='Google Earth Engine',
    overlay=True,
    name=str(params['bands']),
 ).add_to(Map)
 

nFeatures = len(features)
colors = ['#64D1B8', 'red', 'blue']
for n in range(nFeatures):
    style_function = lambda x: {
        'fillOpacity': 0.0,
            'weight': 4,
            'color': colors[0]
            }
    folium.GeoJson(data=get_geojson_string(features[n]['geometry']), style_function=style_function).add_to(Map)

Map.add_child(folium.LayerControl())
Map

***
## Data pre-processing

We normalize the composite images to have values from 0 to 1.

**Variables**

In [None]:
input_dataset = 'Sentinel-2-Top-of-Atmosphere-Reflectance'
output_dataset = 'Lake-Water-Quality-100m'
init_date = '2019-01-21'
end_date = '2019-01-31'
scale = 100 #scale in meters
norm_type = 'geostore' # options ['global', 'geostore', 'custom']
collections = [input_dataset, output_dataset]
geostore_id = multipolygon.id

**Normalize images function**

In [None]:
def min_max_values(image, collection, scale, norm_type='global', geostore_id=None, values = {}):
    
    normThreshold = ee_collection_specifics.ee_bands_normThreshold(collection)
    
    if not norm_type == 'custom':
        if norm_type == 'global':
            num = 2
            lon = np.linspace(-180, 180, num)
            lat = np.linspace(-90, 90, num)
            
            features = []
            for i in range(len(lon)-1):
                for j in range(len(lat)-1):
                    features.append(ee.Feature(ee.Geometry.Rectangle(lon[i], lat[j], lon[i+1], lat[j+1])))
        
        if norm_type == 'geostore':
            try:
                geostore = Skydipper.Geometry(id_hash=geostore_id)
                features = []
                for feature in geostore.attributes['geojson']['features']:
                    features.append(ee.Feature(feature))
                
            except:
                print('Geostore_id is needed')
        
        regReducer = {
            'geometry': ee.FeatureCollection(features),
            'reducer': ee.Reducer.minMax(),
            'maxPixels': 1e10,
            'bestEffort': True,
            'scale':scale,
            'tileScale': 10
            
        }
        
        values = image.reduceRegion(**regReducer).getInfo()
        
        # Avoid outliers by taking into account only the normThreshold% of the data points.
        regReducer = {
            'geometry': ee.FeatureCollection(features),
            'reducer': ee.Reducer.histogram(),
            'maxPixels': 1e10,
            'bestEffort': True,
            'scale':scale,
            'tileScale': 10
            
        }
        
        hist = image.reduceRegion(**regReducer).getInfo()
    
        for band in list(normThreshold.keys()):
            if normThreshold[band] != 100:
                count = np.array(hist.get(band).get('histogram'))
                x = np.array(hist.get(band).get('bucketMeans'))
            
                cumulative_per = np.cumsum(count/count.sum()*100)
            
                values[band+'_max'] = x[np.where(cumulative_per < normThreshold[band])][-1]
    else:
        values = values
        
    return values

def normalize_ee_images(image, collection, values):
    
    Bands = ee_collection_specifics.ee_bands(collection)
       
    # Normalize [0, 1] ee images
    for i, band in enumerate(Bands):
        if i == 0:
            image_new = image.select(band).clamp(values[band+'_min'], values[band+'_max'])\
                                .subtract(values[band+'_min'])\
                                .divide(values[band+'_max']-values[band+'_min'])
        else:
            image_new = image_new.addBands(image.select(band).clamp(values[band+'_min'], values[band+'_max'])\
                                    .subtract(values[band+'_min'])\
                                    .divide(values[band+'_max']-values[band+'_min']))
            
    return image_new

### Populate `image` table

In [None]:
# Read dataset and image tables
datasets = df_from_query('dataset')
images = df_from_query('image')

for collection in collections:
    dataset_id = datasets[datasets['slug'] == collection].index[0]

    # Populate image table
    if images[['dataset_id', 'scale', 'init_date', 'end_date']].isin([dataset_id, scale, init_date, end_date]).all(axis=1).any():
        images = images
    else:
        # Create composite
        image = ee_collection_specifics.Composite(collection)(init_date, end_date)
    
        bands = ee_collection_specifics.ee_bands(collection)
        image = image.select(bands)
        
        if ee_collection_specifics.normalize(collection):
            # Get min/man values for each band
            values = min_max_values(image, collection, scale, norm_type=norm_type, geostore_id=geostore_id)
        else:
            values = {}
            
        
    
        # Append values to table
        dictionary = dict(zip(list(images.keys()), [[dataset_id], [''], [scale], [init_date], [end_date], [json.dumps(values)], [norm_type]]))
        images = images.append(pd.DataFrame(dictionary), ignore_index = True)
        
    # Save image table
    df_to_csv(images, "image")
    df_to_db(images, "image")

### Display composites

In [None]:
# Define the URL format used for Earth Engine generated map tiles.
EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'

Map = folium.Map(location=centroid, zoom_start=6)
Map.fit_bounds(bounds)

# Read dataset and image tables
datasets = df_from_query('dataset')
images = df_from_query('image')

for collection in collections:

    dataset_id = datasets[datasets['slug'] == collection].index[0]
    
    df = images[(images['dataset_id'] == dataset_id) & 
                (images['scale'] == scale) & 
                (images['init_date'] == init_date) & 
                (images['end_date'] == end_date) & 
                (images['norm_type'] == norm_type)
               ].copy()
    
    values = json.loads(df['bands_min_max'].iloc[0])
    
    # Create composite
    image = ee_collection_specifics.Composite(collection)(init_date, end_date)
    
    # Normalize images
    if bool(values): 
        image = normalize_ee_images(image, collection, values)
    
        
    for params in ee_collection_specifics.vizz_params(collection):
        mapid = image.getMapId(params)
        folium.TileLayer(
        tiles=EE_TILES.format(**mapid),
        attr='Google Earth Engine',
        overlay=True,
        name=str(params['bands']),
      ).add_to(Map)

Map.add_child(folium.LayerControl())
Map

### Select input/output bands

**Variables**

In [None]:
input_bands = ['B2','B3','B4','B5','ndvi','ndwi']
output_bands = ['turbidity_blended_mean']
bands = [input_bands, output_bands]

**Populate `image` table**

In [None]:
# Read dataset and image tables
datasets = df_from_query('dataset')
images = df_from_query('image')

for n, collection in enumerate(collections):

    dataset_id = datasets[datasets['slug'] == collection].index[0]
    
    df = images[(images['dataset_id'] == dataset_id) & 
                (images['scale'] == scale) & 
                (images['init_date'] == init_date) & 
                (images['end_date'] == end_date) & 
                (images['norm_type'] == norm_type)
               ].copy()
    
    # Take rows where bands_selections column is empty
    df1 = df[df['bands_selections'] == ''].copy()
    
    if df1.any().any():
        # Take first index
        index = df1.index[0]
        images.at[index, 'bands_selections'] = str(bands[n])
    else:
        if images[['dataset_id', 'bands_selections', 'scale', 'init_date', 'end_date', 'norm_type']].isin(
            [dataset_id, str(bands[n]), scale, init_date, end_date, norm_type]).all(axis=1).any():
            images = images
        else:
            df2 = df.iloc[0:1].copy()
            df2.at[df2.index[0], 'bands_selections'] = str(bands[n])
            images = images.append(df2, ignore_index = True)
                   
# Save image table
df_to_csv(images, "image")
df_to_db(images, "image") 

***
## Create TFRecords for training

In [None]:
def get_image_ids(collections, bands, scale, init_date, end_date, norm_type):
    # Read dataset and image tables
    datasets = df_from_query('dataset')
    images = df_from_query('image')
    
    image_ids = []
    for n, collection in enumerate(collections):
        
        dataset_id = datasets[datasets['slug'] == collection].index[0]
        
        df = images[(images['dataset_id'] == dataset_id) & 
                (images['bands_selections'] == str(bands[n])) & 
                (images['scale'] == scale) & 
                (images['init_date'] == init_date) & 
                (images['end_date'] == end_date) & 
                (images['norm_type'] == norm_type) 
               ].copy()
        
        image_ids.append(df.index[0])
        
    return image_ids

In [None]:
def GeoJSONs_to_FeatureCollections(geostore):
    feature_collections = []
    for n in range(len(geostore.get('geojson').get('features'))):
        # Make a list of Features
        features = []
        for i in range(len(geostore.get('geojson').get('features')[n].get('geometry').get('coordinates'))):
            features.append(
                ee.Feature(
                    ee.Geometry.Polygon(
                        geostore.get('geojson').get('features')[n].get('geometry').get('coordinates')[i]
                    )
                )
            )
            
        # Create a FeatureCollection from the list.
        feature_collections.append(ee.FeatureCollection(features))
    return feature_collections

In [None]:
def export_TFRecords(collections, bands, init_date, end_date, scale, norm_type, image_ids, geostore_id, 
                     sample_size, kernel_size, geostore, feature_collections, feature_lists):

    ## Stack the 2D images (input and output images of the Neural Network) 
    ## to create a single image from which samples can be taken
    
    bucket = 'geo-ai'
    folder = 'Data/'+str(image_ids[0])+'_'+ str(image_ids[1])+'/'+str(geostore_id)+'/'+str(kernel_size)+'/'+str(sample_size)
    
    # These numbers determined experimentally.
    nShards  = int(sample_size/20) # Number of shards in each polygon.
        
    # Read dataset and image tables
    datasets = df_from_query('dataset')
    images = df_from_query('image')
    
    for n, collection in enumerate(collections):
        
        dataset_id = datasets[datasets['slug'] == collection].index[0]
        
        df = images[(images['dataset_id'] == dataset_id) & 
                (images['bands_selections'] == str(bands[n])) & 
                (images['scale'] == scale) & 
                (images['init_date'] == init_date) & 
                (images['end_date'] == end_date) &
                (images['norm_type'] == norm_type)
               ].copy()
        
        values = json.loads(df['bands_min_max'].iloc[0])
    
        # Create composite
        composite = ee_collection_specifics.Composite(collection)(init_date, end_date)
    
        # Normalize images
        if bool(values): 
            composite = normalize_ee_images(composite, collection, values)
        
        if n == 0:
            image_stack = composite.select(bands[n])
        else:
            image_stack = ee.Image.cat([image_stack,composite.select(bands[n])]).float()
            
    if kernel_size == 1:
        # Sample pixels
        vector = image_stack.sample(region = feature_collections[0], scale = scale,\
                                    numPixels=sample_size, tileScale=4, seed=999)

        # Add random column
        vector = vector.randomColumn(seed=999)

        # Partition the sample approximately 70-30.
        train_dataset = vector.filter(ee.Filter.lt('random', 0.7))
        eval_dataset = vector.filter(ee.Filter.gte('random', 0.7))
        
        # Training and validation size
        train_size = train_dataset.size().getInfo()
        eval_size = eval_dataset.size().getInfo()
        
        # Export all the training/evaluation data.   
        file_paths = []
        base_names = ['training_pixels', 'eval_pixels']
        for n, dataset in enumerate([train_dataset, eval_dataset]):
            
            file_paths.append(bucket+ '/' + folder + '/' + base_names[n])
            
            # Create the tasks.
            task = ee.batch.Export.table.toCloudStorage(
              collection = dataset,
              description = 'Export '+base_names[n],
              fileNamePrefix = folder + '/' + base_names[n],
              bucket = bucket,
              fileFormat = 'TFRecord',
              selectors = bands[0] + bands[1])
            
            task.start()
            
    if kernel_size > 1:
        # Convert the image into an array image in which each pixel stores (kernel_size x kernel_size) patches of pixels for each band.
        list = ee.List.repeat(1, kernel_size)
        lists = ee.List.repeat(list, kernel_size)
        kernel = ee.Kernel.fixed(kernel_size, kernel_size, lists)
        
        arrays = image_stack.neighborhoodToArray(kernel)
        
        # Training and validation size
        nFeatures = len(geostore.get('geojson').get('features'))
        nPolygons = {}
        for n in range(nFeatures):
            multipoly_type = geostore.get('geojson').get('features')[n].get('properties').get('name')
            nPolygons[multipoly_type] = len(geostore.get('geojson').get('features')[n].get('geometry').get('coordinates'))
            
        train_size = nPolygons['training']*sample_size
        eval_size = nPolygons['validation']*sample_size
    
        # Export all the training/evaluation data (in many pieces), with one task per geometry.      
        file_paths = []
        base_names = ['training_patches', 'eval_patches']
        for i, feature in enumerate(feature_collections):
            for g in range(feature.size().getInfo()):
                geomSample = ee.FeatureCollection([])
                for j in range(nShards):
                    sample = arrays.sample(
                        region = ee.Feature(feature_lists[i].get(g)).geometry(), 
                        scale = scale, 
                        numPixels = sample_size / nShards, # Size of the shard.
                        seed = j,
                        tileScale = 8
                    )
                    geomSample = geomSample.merge(sample)
                    
                desc = base_names[i] + '_g' + str(g)
                
                file_paths.append(bucket+ '/' + folder + '/' + desc)
                
                task = ee.batch.Export.table.toCloudStorage(
                    collection = geomSample,
                    description = desc, 
                    bucket = bucket, 
                    fileNamePrefix = folder + '/' + desc,
                    fileFormat = 'TFRecord',
                    selectors = bands[0] + bands[1]
                )
                task.start()
                
    return task, file_paths, train_size, eval_size, base_names

**Variables**

In [None]:
image_ids = get_image_ids(collections, bands, scale, init_date, end_date)
geostore_id = multipolygon.id
kernel_size = 1 #256
sample_size = 20000 #1000 # Total sample size in each polygon.

In [None]:
# Convert the GeoJSON to feature collections
feature_collections = GeoJSONs_to_FeatureCollections(geostore)

# Convert the feature collections to lists for iteration.
feature_lists = list(map(lambda x: x.toList(x.size()), feature_collections))

# Export all the training/evaluation data.
versions = df_from_query('model_versions')

df = versions[['input_image_id', 'output_image_id', 'geostore_id', 'kernel_size', 'sample_size']].isin([image_ids[0], image_ids[1], geostore_id, kernel_size, sample_size]).copy()
if not df.all(axis=1).any():
    task, file_paths, train_size, eval_size, base_names = export_TFRecords(collections, bands, init_date, end_date, scale, norm_type, image_ids, geostore_id,
                                                                           sample_size, kernel_size, geostore, feature_collections, feature_lists)
elif not (versions[df.all(axis=1)]['data_status'] == 'COMPLETED').all():
    task, file_paths, train_size, eval_size, base_names = export_TFRecords(collections, bands, init_date, end_date, scale, norm_type, image_ids, geostore_id,
                                                                           sample_size, kernel_size, geostore, feature_collections, feature_lists)
    

### Populate `model_versions` tables

In [None]:
# Export all the training/evaluation data.
versions = df_from_query('model_versions')

df = versions[['input_image_id', 'output_image_id', 'geostore_id', 'kernel_size', 'sample_size']].isin([image_ids[0], image_ids[1], geostore_id, kernel_size, sample_size]).copy()

In [None]:
# Export all the training/evaluation data.
versions = df_from_query('model_versions')

if (versions.empty) or not df.all(axis=1).any():
    dictionary = dict(zip(list(versions.keys()), [[-9999], [''], [image_ids[0]], [image_ids[1]], [geostore_id], [kernel_size], [sample_size], [json.dumps({})], [-9999], [''], [''], [False], [False]]))
    versions = versions.append(pd.DataFrame(dictionary), ignore_index = True, sort=False)
    
# Save version table
df_to_csv(versions, "model_versions")
df_to_db(versions, "model_versions") 

**Save status**

In [None]:
import time

def check_status_data(task, file_paths):
    status_list = list(map(lambda x: str(x), task.list()[:len(file_paths)])) 
    status_list = list(map(lambda x: x[x.find("(")+1:x.find(")")], status_list))
    
    return status_list


In [None]:
# Read model versions table.
versions = df_from_query('model_versions')

df = versions[['input_image_id', 'output_image_id', 'geostore_id', 'kernel_size', 'sample_size']].isin([image_ids[0], image_ids[1], geostore_id, kernel_size, sample_size]).copy()
if not (versions[df.all(axis=1)]['data_status'] == 'COMPLETED').all():
    status_list = check_status_data(task, file_paths)
    index = versions.index[-1]
    while not status_list == ['COMPLETED'] * len(file_paths):
        status_list = check_status_data(task, file_paths)
        
        #Save temporal status in table
        versions.at[index, 'data_status'] = json.dumps(dict(zip(file_paths, status_list)))
        df_to_csv(versions, "model_versions")
        df_to_db(versions, "model_versions")
        
        time.sleep(60)
    
    #Save final status in table
    versions.at[index, 'data_status'] = "COMPLETED"

    df_to_csv(versions, "model_versions")
    df_to_db(versions, "model_versions")

***
## Training the model in AI Platform
### Training code package setup

It's necessary to create a Python package to hold the training code.  Here we're going to get started with that by creating a folder for the package and adding an empty `__init__.py` file.

In [None]:
ROOT_PATH = 'AI_Platform/cnn_trainer'
PACKAGE_FOLDER = '/trainer'

!rm -r {ROOT_PATH}
!mkdir {ROOT_PATH}
!mkdir {ROOT_PATH+PACKAGE_FOLDER}
!touch {ROOT_PATH+PACKAGE_FOLDER}/__init__.py
!ls -l {ROOT_PATH+PACKAGE_FOLDER}

**Setuptools file named `setup.py`**

In [None]:
%%writefile {ROOT_PATH}/setup.py

from setuptools import find_packages
from setuptools import setup

REQUIRED_PACKAGES = ['keras==2.2.4']

setup(
    name='trainer',
    version='0.1',
    install_requires=REQUIRED_PACKAGES,
    packages=find_packages(),
    include_package_data=True,
    description='My training application package.'
)

**Training/evaluation data**

The following is code to load training/evaluation data.  Write this into `util.py`.

In [None]:
%%writefile {ROOT_PATH+PACKAGE_FOLDER}/util.py
"""Utilities to download and preprocess the data."""

import tensorflow as tf
import json
from google.cloud import storage
from google.cloud.storage import blob

class Util():

    def __init__(self, path):
        self.path = path
        
        self.client = storage.Client(project='skydipper-196010')
        self.bucket = self.client.get_bucket('geo-ai')
        self.blob = self.bucket.blob(self.path)
        self.config = json.loads(self.blob.download_as_string(client=self.client).decode('utf-8'))
        
    def parse_function(self, proto):
        """The parsing function.
        Read a serialized example into the structure defined by features_dict.
        Args:
          example_proto: a serialized Example.
        Returns: 
          A dictionary of tensors, keyed by feature name.
        """
        
        # Define your tfrecord 
        features = self.config.get('in_bands') + self.config.get('out_bands')
        
        # Specify the size and shape of patches expected by the model.
        kernel_shape = [self.config.get('kernel_size'), self.config.get('kernel_size')]
        columns = [
          tf.io.FixedLenFeature(shape=kernel_shape, dtype=tf.float32) for k in features
        ]
        features_dict = dict(zip(features, columns))
        
        # Load one example
        parsed_features = tf.io.parse_single_example(proto, features_dict)
    
        # Convert a dictionary of tensors to a tuple of (inputs, outputs)
        inputs_list = [parsed_features.get(key) for key in features]
        stacked = tf.stack(inputs_list, axis=0)
        
        # Convert the tensors into a stack in HWC shape
        stacked = tf.transpose(stacked, [1, 2, 0])
        
        return stacked[:,:,:len(self.config.get('in_bands'))], stacked[:,:,len(self.config.get('in_bands')):]
    
    def get_dataset(self, glob):
        """Get the preprocessed training dataset
        Returns: 
        A tf.data.Dataset of training data.
        """
        glob = tf.compat.v1.io.gfile.glob(glob)
        
        dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')
        dataset = dataset.map(self.parse_function, num_parallel_calls=5)
        
        return dataset
    
    
    def get_training_dataset(self):
        """Get the preprocessed training dataset
        Returns: 
        A tf.data.Dataset of training data.
        """
        glob = self.config.get('data_dir') + '/' + self.config.get('base_names')[0] + '*'
        dataset = self.get_dataset(glob)
        dataset = dataset.shuffle(self.config.get('shuffle_size')).batch(self.config.get('batch_size')).repeat()
        return dataset
    
    def get_evaluation_dataset(self):
        """Get the preprocessed evaluation dataset
        Returns: 
          A tf.data.Dataset of evaluation data.
        """
        glob = self.config.get('data_dir') + '/' + self.config.get('base_names')[1] + '*'
        dataset = self.get_dataset(glob)
        dataset = dataset.batch(1).repeat()
        return dataset

**Model**

In [None]:
!mkdir {ROOT_PATH+PACKAGE_FOLDER+'/models'}
!touch {ROOT_PATH+PACKAGE_FOLDER+'/models'}/__init__.py
!cp -r ../models/CNN {ROOT_PATH+PACKAGE_FOLDER+'/models'}/CNN
!cp -r ../models/MLP {ROOT_PATH+PACKAGE_FOLDER+'/models'}/MLP

In [None]:
%%writefile {ROOT_PATH+PACKAGE_FOLDER}/model.py

from google.cloud import storage
from google.cloud.storage import blob
import json

from .models.CNN.regression import deepvel as CNNregDeepVel, segnet as  CNNregSegNet, unet as CNNregUNet
from .models.CNN.segmentation import deepvel as CNNsegDeepVel, segnet as  CNNsegSegNet, unet as CNNsegUNet
from .models.MLP.regression import sequential1 as MLPregSequential1

def select_model(path):
    # Read training parameters from GCS
    client = storage.Client(project='skydipper-196010')
    bucket = client.get_bucket('geo-ai')
    blob = bucket.blob(path)
    config = json.loads(blob.download_as_string(client=client).decode('utf-8'))
    
    # Model's dictionary
    models = {'CNN':
              {
                  'regression': 
                  {
                      'deepvel': CNNregDeepVel.create_keras_model,
                      'segnet': CNNregSegNet.create_keras_model,
                      'unet': CNNregUNet.create_keras_model,
                  },
                  'segmentation': 
                  {
                      'deepvel': CNNsegDeepVel.create_keras_model,
                      'segnet': CNNsegSegNet.create_keras_model,
                      'unet': CNNsegUNet.create_keras_model,
                  }
              }, 
              'MLP': 
              {
                  'regression': 
                  {
                      'sequential1': MLPregSequential1.create_keras_model,
                  }
              }
             }
    
    return models.get(config.get('model_type')).get(config.get('model_output')).get(config.get('model_architecture'))

**Training task**

The following will create `task.py`, which will get the training and evaluation data, train the model and save it when it's done in a Cloud Storage bucket.

In [None]:
%%writefile {ROOT_PATH+PACKAGE_FOLDER}/task.py
"""Trains a Keras model"""

import os
import json
import argparse

import tensorflow as tf
from google.cloud import storage
from google.cloud.storage import blob

from .util import Util
from . import model

def get_args():
    """Argument parser.

    Returns:
      Dictionary of arguments.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--params-file',
        type=str,
        required=True,
        help='GCS location where we have saved the training_params.json file')
    parser.add_argument(
        '--verbosity',
        choices=['DEBUG', 'ERROR', 'FATAL', 'INFO', 'WARN'],
        default='INFO')
    args, _ = parser.parse_known_args()
    return args

def train_and_evaluate(args):
    """Trains and evaluates the Keras model.

    Uses the Keras model defined in model.py and trains on data loaded and
    preprocessed in util.py. Saves the trained model in TensorFlow SavedModel
    format to the path defined in part by the --job-dir argument.
    """
    
    # Read training parameters from GCS
    client = storage.Client(project='skydipper-196010')
    bucket = client.get_bucket('geo-ai')
    blob = bucket.blob(args.params_file)
    config = json.loads(blob.download_as_string(client=client).decode('utf-8'))

    # Create the Keras Model
    selected_model = model.select_model(args.params_file)

    if not config.get('output_activation'):
        keras_model = selected_model(inputShape = (None, None, len(config.get('in_bands'))), nClasses = len(config.get('out_bands')))
    else:
        keras_model = selected_model(inputShape = (None, None, len(config.get('in_bands'))), nClasses = len(config.get('out_bands')), output_activation = config.get('output_activation'))

    # Compile Keras model
    optimizer = tf.keras.optimizers.Adam(lr=config.get('learning_rate'))
    keras_model.compile(loss=config.get('loss'), optimizer=optimizer, metrics=config.get('metrics'))


    # Pass a tfrecord
    util = Util(path = args.params_file) 
    training_dataset = util.get_training_dataset()
    evaluation_dataset = util.get_evaluation_dataset()

    # Setup TensorBoard callback.
    tensorboard_cb = tf.keras.callbacks.TensorBoard(os.path.join(config.get('job_dir'), 'logs'))

    # Train model
    keras_model.fit(
        x=training_dataset,
        steps_per_epoch=int(config.get('train_size') / config.get('batch_size')),
        epochs=config.get('epochs'),
        validation_data=evaluation_dataset,
        validation_steps=int(config.get('eval_size') / config.get('batch_size')),
        verbose=1,
        callbacks=[tensorboard_cb])

    tf.contrib.saved_model.save_keras_model(keras_model, os.path.join(config.get('job_dir'), 'model'))

if __name__ == '__main__':
    args = get_args()
    tf.logging.set_verbosity('INFO')
    train_and_evaluate(args)

**Create a .tar.gz distribution package**

In [None]:
def make_tarfile(output_filename, source_dir):
    with tarfile.open(output_filename, "w:gz") as tar:
        tar.add(source_dir, arcname=os.path.basename(source_dir))

In [None]:
output_filename = './AI_Platform/trainer-0.1.tar.gz'
source_dir = './AI_Platform/cnn_trainer/'

make_tarfile(output_filename, source_dir)

In [None]:
#output_filename = './AI_Platform/cnn_trainer/dist/trainer-0.1.tar.gz'

client = storage.Client().from_service_account_json(env.privatekey_path)
bucket = client.get_bucket(training_params['bucket'])
blob = bucket.blob('Train/trainer-0.1.tar.gz')
                     
blob.upload_from_filename(
    filename = output_filename, 
    content_type = 'text/plain',
    client=client
)
print(blob.public_url)

### Set up the training parameters

**Variables**

In [None]:
# Variables for training in AI platform
bucket = 'geo-ai'
project_id = env.project_id
region = "us-central1"

main_trainer_module = 'trainer.task'

model_type = 'MLP'
model_output = 'regression'
model_architecture = 'sequential1'

# Training parameters
training_params = {
    "bucket": bucket,
    "base_names": base_names,
    "data_dir": 'gs://' + bucket + '/Data/' + str(image_ids[0])+'_'+ str(image_ids[1])+'/'+str(geostore_id)+'/'+str(kernel_size)+'/'+str(sample_size),
    "in_bands": bands[0],
    "out_bands": bands[1],
    "kernel_size": int(kernel_size),
    "train_size": train_size,
    "eval_size": eval_size,
    "model_type": model_type,
    "model_output": model_output,
    "model_architecture": model_architecture,
    "output_activation": '',
    "batch_size": 16,
    "epochs": 100,
    "shuffle_size": 2000,
    "learning_rate": 1e-2,
    "loss": "mse",
    "metrics": ['mse']
}

**Populate `model` table**

In [None]:
# Read model table
models = df_from_query('model')

df = models[['model_type', 'model_output', 'output_image_id']].isin([model_type, model_output, image_ids[1]]).copy()
if not df.all(axis=1).any():
    dictionary = dict(zip(list(models.keys()), [[''], [model_type], [model_output], [''], [image_ids[1]]]))
    models = models.append(pd.DataFrame(dictionary), ignore_index = True, sort=False)

# Save model table
df_to_csv(models, "model")
df_to_db(models, "model")

In [None]:
model_id = models[(models['model_type'] == model_type) & (models['model_output'] == model_output) & (models['output_image_id'] == image_ids[1])].index[0]

**Populate `model_versions` table**

In [None]:
def removekey(dictionary, key):
    if key in dictionary.keys():
        del dictionary[key]
    return dictionary

In [None]:
# Read model table
versions = df_from_query('model_versions')
df = versions.copy()
df['training_params'] = df['training_params'].apply(lambda x : removekey(json.loads(x),'job_dir'))

# Check if the version already exists
if (df['training_params'] == training_params).any():
    # Get version id
    version_id = df[df['training_params'].apply(lambda x : removekey(x,'job_dir')) == training_params].index[0]
    
    # Check status
    status = df.iloc[version_id]['training_status']
    print('Version already exists with training status equal to:', status)
    
    if status == 'SUCCEEDED':
        print('The training job successfully completed.')
    if (status == 'CANCELLED') or (status == 'FAILED'):
        print(f'The training job was {status}.')
        if status == 'CANCELLED':  
            print('Start training again.')
        if status == 'FAILED': 
            print('Change training parameters and try again.')
        # Get training version
        training_version = df.iloc[version_id]['version']
        
        # Update job name
        job_name = 'job_v' + str(int(time.time()))
            
        # Add job directory
        training_params = json.loads(df.iloc[version_id]['training_params'])
        training_params['job_dir'] = 'gs://' + bucket + '/Models/' + str(model_id) + '/' +  str(training_version) + '/'
        
        # Save training version and clear status
        versions.at[version_id, 'training_params'] =  json.dumps(training_params)
        versions.at[version_id, 'training_status'] = ''
        
        # Remove job_dir
        !gsutil rm -r {training_params['job_dir']}
        
# Create new version  
else:
    print('Create new version')
    # New training version and job name
    training_version = str(int(time.time()))
    job_name = 'job_v' + training_version
    
    # Add job directory
    training_params['job_dir'] = 'gs://' + bucket + '/Models/' + str(model_id) + '/' +  str(training_version) + '/'
    
    df = versions[['input_image_id', 'output_image_id', 'geostore_id', 'kernel_size', 'sample_size', 'training_params', 'data_status']].isin(
        [image_ids[0], image_ids[1], geostore_id, kernel_size, sample_size, json.dumps(training_params), 'COMPLETED']).copy()
    
    # Check if untrained version already exists
    if (df.all(axis=1).any()):
        version_id = df[df.all(axis=1)].index[0]
        #versions[(versions['input_image_id'] == image_ids[0]) & (versions['output_image_id'] == image_ids[1]) & 
                              #(versions['geostore_id'] == geostore_id)  & (versions['kernel_size'] == kernel_size) & 
                              #(versions['sample_size'] == sample_size)].index[0]
    
        versions.at[version_id, 'model_id'] = model_id
        versions.at[version_id, 'model_architecture'] = model_architecture
        versions.at[version_id, 'training_params'] = json.dumps(training_params)
        versions.at[version_id, 'version'] = training_version
        
    else:
        dictionary = dict(zip(list(versions.keys()), [[''], [''], [image_ids[0]], [image_ids[1]], [geostore_id], [kernel_size], [sample_size], [''], [''], ['COMPLETED'], [''], [''], ['']]))
        versions = versions.append(pd.DataFrame(dictionary), ignore_index = True, sort=False)
        version_id = versions.index[-1]
        
        versions.at[version_id, 'model_id'] = int(model_id)
        versions.at[version_id, 'model_architecture'] = model_architecture
        versions.at[version_id, 'training_params'] = json.dumps(training_params)
        versions.at[version_id, 'version'] = int(training_version)

        
# Save version table
versions = versions.astype({'model_id': 'int64', 
                            'version': 'int64', 
                            'eeified': bool, 
                            'deployed': bool})

df_to_csv(versions, "model_versions")
df_to_db(versions, "model_versions")

In [None]:
versions

**Save training parameters**

These training parameters need to be stored in a place where other code can access them.

In [None]:
params_path = 'Models/' + str(model_id) + '/' +  str(training_version) + '/training_params.json'

In [None]:
#client = storage.Client(project=env.project_id)
client = storage.Client.from_service_account_json(env.privatekey_path)
bucket = client.get_bucket(training_params['bucket'])
blob = bucket.blob(params_path)

blob.upload_from_string(
    data=json.dumps(training_params),
    content_type='application/json',
    client=client
)

### Submit a training job to AI Platform for training

**Set up your GCP project**

Enter your project ID in the cell below. Then run the cell to make sure the Cloud SDK uses the right project for all the commands in this notebook.

In [None]:
! gcloud config set project $project_id

**Authenticate your GCP account**

Enter the path to your service account key as the
`GOOGLE_APPLICATION_CREDENTIALS` variable in the cell below and run the cell.

In [None]:
%env GOOGLE_APPLICATION_CREDENTIALS {env.privatekey_path}

**Submit a training job to AI Platform**

In [None]:
training_inputs = {'scaleTier': 'CUSTOM',             
    'masterType': 'large_model_v100', # A single NVIDIA Tesla V100 GPU 
    'packageUris': ['gs://'+training_params['bucket']+'/'+'Train/trainer-0.1.tar.gz'],
    'pythonModule': main_trainer_module,
    'args': ['--params-file', params_path],
    'region': region,
    'jobDir': training_params['job_dir'],
    'runtimeVersion': '1.14',
    'pythonVersion': '3.5'}

job_spec = {'jobId': job_name, 'trainingInput': training_inputs}

In [None]:
print('Creating training job: ' + job_name)

# Save your project ID in the format the APIs need
project = 'projects/{}'.format(env.project_id)

# Get a Python representation of the AI Platform Training services
credentials = GoogleCredentials.from_stream(env.privatekey_path)
ml = discovery.build('ml', 'v1', credentials = credentials)

# Create a request to call projects.jobs.create.
request = ml.projects().jobs().create(body=job_spec,
              parent=project)


# Make the call.
try:
    response = request.execute()
    print(response)

except errors.HttpError as err:
    # Something went wrong, print out some information.
    print('There was an error creating the training job. Check the details:')
    print(err._get_reason())

**Save training status**

In [None]:
def check_status_training(job_name, project_id):
    desc = !gcloud ai-platform jobs describe {job_name} --project {project_id}
    return desc.grep('state:')[0].split(':')[1].strip()   

In [None]:
# Read version table
versions = df_from_query('model_versions')

status = check_status_training(job_name, env.project_id)
while not status in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
    status = check_status_training(job_name, env.project_id)
    
    #Save temporal status in table
    versions.at[version_id, 'training_status'] = status    
    df_to_csv(versions, "model_versions")
    df_to_db(versions, "model_versions")
    
    time.sleep(60)

#Save final status in table
versions.at[version_id, 'training_status'] = status
df_to_csv(versions, "model_versions")
df_to_db(versions, "model_versions")

***
## Prepare the model for making predictions in Earth Engine

Before we can use the model in Earth Engine, it needs to be hosted by AI Platform.  But before we can host the model on AI Platform we need to *EEify* (a new word!) it.  The EEification process merely appends some extra operations to the input and outputs of the model in order to accomdate the interchange format between pixels from Earth Engine (float32) and inputs to AI Platform (base64).  (See [this doc](https://cloud.google.com/ml-engine/docs/online-predict#binary_data_in_prediction_input) for details.)  

**`earthengine model prepare`**

The EEification process is handled for you using the Earth Engine command `earthengine model prepare`.  To use that command, we need to specify the input and output model directories and the name of the input and output nodes in the TensorFlow computation graph.  We can do all that programmatically:

In [None]:
from tensorflow.python.tools import saved_model_utils

# Read version table
versions = df_from_query('model_versions')

model_path = training_params.get('job_dir') + 'model/'

meta_graph_def = saved_model_utils.get_meta_graph_def(model_path, 'serve')
inputs = meta_graph_def.signature_def['serving_default'].inputs
outputs = meta_graph_def.signature_def['serving_default'].outputs

# Just get the first thing(s) from the serving signature def.  i.e. this
# model only has a single input and a single output.
input_name = None
for k,v in inputs.items():
    input_name = v.name
    break

output_name = None
for k,v in outputs.items():
    output_name = v.name
    break

# Make a dictionary that maps Earth Engine outputs and inputs to 
# AI Platform inputs and outputs, respectively.
import json
input_dict = "'" + json.dumps({input_name: "array"}) + "'"
output_dict = "'" + json.dumps({output_name: "prediction"}) + "'"

# Put the EEified model next to the trained model directory.
EEified_path = training_params.get('job_dir') + 'eeified/' 

# You need to set the project before using the model prepare command.
!earthengine set_project {project_id}
!earthengine model prepare --source_dir {model_path} --dest_dir {EEified_path} --input {input_dict} --output {output_dict}

# Populate model_versions table
versions.at[version_id, 'eeified'] = True
df_to_csv(versions, "model_versions")
df_to_db(versions, "model_versions")

### Deployed the model to AI Platform

Before it's possible to get predictions from the trained and EEified model, it needs to be deployed on AI Platform.  The first step is to create the model.  The second step is to create a version.  See [this guide](https://cloud.google.com/ml-engine/docs/tensorflow/deploying-models) for details.  Note that models and versions can be monitored from the [AI Platform models page](http://console.cloud.google.com/ai-platform/models) of the Cloud Console. 

To ensure that the model is ready for predictions without having to warm up nodes, you can use a configuration yaml file to set the scaling type of this version to autoScaling, and, set a minimum number of nodes for the version. This will ensure there are always nodes on stand-by, however, you will be charged as long as they are running. For this example, we'll set the minNodes to 10. That means that at a minimum, 10 nodes are always up and running and waiting for predictions. The number of nodes will also scale up automatically if needed.

**Variables**

In [None]:
region = "us-central1"
version_name = 'v' + training_version

models = df_from_query('model')
model_name = models.iloc[model_id]['model_name']
model_description = models.iloc[model_id]['model_description']

**Create model**

In [None]:
print('Creating model: ' + model_name)

# Store your full project ID in a variable in the format the API needs.
project = 'projects/{}'.format(env.project_id)

# Build a representation of the Cloud ML API.
credentials = GoogleCredentials.from_stream(env.privatekey_path)
ml = discovery.build('ml', 'v1', credentials=credentials)

# Create a dictionary with the fields from the request body.
request_dict = {'name': model_name,
               'description': model_description}

# Create a request to call projects.models.create.
request = ml.projects().models().create(
              parent=project, body=request_dict)

# Make the call.
try:
    response = request.execute()
    print(response)
except errors.HttpError as err:
    # Something went wrong, print out some information.
    print('There was an error creating the model. Check the details:')
    print(err._get_reason())

**Create version**

In [None]:
# Build a representation of the Cloud ML API.
credentials = GoogleCredentials.from_stream(env.privatekey_path)
ml = discovery.build('ml', 'v1', credentials=credentials)

# Create a dictionary with the fields from the request body.
request_dict = {
    'name': version_name,
    'deploymentUri': EEified_path,
    'runtimeVersion': '1.14',
    'pythonVersion': '3.5',
    'framework': 'TENSORFLOW',
    'autoScaling': {
        "minNodes": 10
    }
}

# Create a request to call projects.models.versions.create.
request = ml.projects().models().versions().create(
    parent=f'projects/{env.project_id}/models/{model_name}',
    body=request_dict
)

# Make the call.
try:
    response = request.execute()
    print(response)
except errors.HttpError as err:
    # Something went wrong, print out some information.
    print('There was an error creating the model. Check the details:')
    print(err._get_reason())

**Save deployment status**

In [None]:
def check_status_deployment(model_name, version_name):
    desc = !gcloud ai-platform versions describe {version_name} --model={model_name}
    return desc.grep('state:')[0].split(':')[1].strip() 

In [None]:
# Read version table
versions = df_from_query('model_versions')

status = check_status_deployment(model_name, version_name)
while not status == 'READY':
    status = check_status_deployment(model_name, version_name)
    
    #Save temporal status in table
    versions.at[version_id, 'deployed'] = False   
    df_to_csv(versions, "model_versions")
    df_to_db(versions, "model_versions")
    
    time.sleep(60)

#Save final status in table
versions.at[version_id, 'deployed'] = True
df_to_csv(versions, "model_versions")
df_to_db(versions, "model_versions")

***
## Predict in Earth Engine

**Read tables**

In [None]:
datasets = df_from_query('dataset')
images = df_from_query('image')
models = df_from_query('model')
versions = df_from_query('model_versions')

In [None]:
datasets

In [None]:
images

In [None]:
models

In [None]:
versions

**Select pre-trained models**

In [None]:
print(list(models['model_name']))

In [None]:
model_name = 'MLP_regression_4'
print(model_name)

**Select versions**

In [None]:
model_id = models[models['model_name'] == model_name].index[0]
model_type = models.iloc[model_id]['model_type']

In [None]:
version_names = list(map(lambda x: int(x), list(versions[versions['model_id'] == model_id]['version'])))
print(version_names)

In [None]:
version = version_names[2]
version_id = versions[versions['version'] == version].index[0]
version_name = 'v'+ str(version)
print(version_name)

**Variables**

In [None]:
version_id = versions[versions['version'] == version].index[0]
training_params =json.loads(versions[versions['version'] == version]['training_params'][version_id])
image_ids = list(versions.iloc[version_id][['input_image_id', 'output_image_id']])

In [None]:
collections = list(datasets.iloc[list(images.iloc[image_ids]['dataset_id'])]['slug'])
bands = [training_params.get('in_bands'), training_params.get('out_bands')]
scale, init_date, end_date = list(images.iloc[image_ids[0]][['scale', 'init_date', 'end_date']])
scale = float(scale)
project_id = env.project_id

In [None]:
print('Datasets: ', collections)
print('Bands: ', bands)
print('scale: ', scale)
print('init_date: ', init_date)
print('end_date: ', end_date)

Select new date range

In [None]:
init_date_new = '2019-01-21'
end_date_new = '2019-01-31'

**Create a Polygon object from Geojson**

In [None]:
atts={'geojson': {'type': 'FeatureCollection',
  'features': [{'type': 'Feature',
    'properties': {},
    'geometry': {'type': 'Polygon',
     'coordinates': [[[0.5548095703125, 40.496048060627885],
                      [0.9558105468749999,40.496048060627885],
                      [0.9558105468749999,40.83667117059108],
                      [0.5548095703125,40.83667117059108],
                      [0.5548095703125,40.496048060627885]]]}}]}}

geometry = Skydipper.Geometry(attributes=atts)
geometry

In [None]:
geometry.map()

**`ee.Model.fromAiPlatformPredictor`**

There is now a trained model, prepared for serving to Earth Engine, hosted and versioned on AI Platform.  
We can now connect Earth Engine directly to the trained model for inference.  You do that with the `ee.Model.fromAiPlatformPredictor` command.
For this command to work, we need to know a lot about the model.  To connect to the model, you need to know the name and version.

**Inputs**

You need to be able to recreate the imagery on which it was trained in order to perform inference.  Specifically, you need to create an array-valued input from the scaled data and use that for input.  (Recall that the new input node is named `array`, which is convenient because the array image has one band, named `array` by default.)  The inputs will be provided as 144x144 patches (`inputTileSize`), at 30-meter resolution (`proj`), but 8 pixels will be thrown out (`inputOverlapSize`) to minimize boundary effects.

In [None]:
input_image_id = versions.iloc[version_id]['input_image_id']
    
values = json.loads(images.iloc[input_image_id]['bands_min_max'])
# Create composite
image = ee_collection_specifics.Composite(collections[0])(init_date_new, end_date_new)

# Normalize images
if bool(values): 
    image = normalize_ee_images(image, collections[0], values)

Select bands and convert them into float

In [None]:
image = image.select(bands[0]).float()
image.getInfo()

**Outputs**

The output (which you also need to know).

In [None]:
kernel_size = int(versions['kernel_size'].iloc[version_id])
if kernel_size == 1:
    input_tile_size = [1, 1]
    input_overlap_size = [0, 0]
if kernel_size >1 :
    input_tile_size = [144, 144]
    input_overlap_size = [8, 8]
    
# Load the trained model and use it for prediction.
model = ee.Model.fromAiPlatformPredictor(
    projectName = project_id,
    modelName = model_name,
    version = version_name,
    inputTileSize = input_tile_size,
    inputOverlapSize = input_overlap_size,
    proj = ee.Projection('EPSG:4326').atScale(scale),
    fixInputProj = True,
    outputBands = {'prediction': {
        'type': ee.PixelType.float(),
        'dimensions': 1,
      }                  
    }
)
predictions = model.predictImage(image.toArray()).arrayFlatten([bands[1]])
predictions.getInfo()

Clip the prediction area with the polygon

In [None]:
# Clip the prediction area with the polygon
polygon = ee.Geometry.Polygon(geometry.attributes.get('geojson').get('features')[0].get('geometry').get('coordinates'))
predictions = predictions.clip(polygon)

# Get centroid
centroid = polygon.centroid().getInfo().get('coordinates')[::-1]

Segmentate image:

In [None]:
if model_output == 'segmentation':
    maxValues = predictions.reduce(ee.Reducer.max())

    predictions = predictions.addBands(maxValues)

    expression = ""
    for n, band in enumerate(bands[1]):
        expression = expression + f"(b('{band}') == b('max')) ? {str(n+1)} : "

    expression = expression + f"0"

    segmentation = predictions.expression(expression)
    predictions = predictions.addBands(segmentation.mask(segmentation).select(['constant'], ['categories']))

**Display**

Use folium to visualize the input imagery and the predictions.

In [None]:
# Define the URL format used for Earth Engine generated map tiles.
EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'

mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 1})
Map = folium.Map(location=centroid, zoom_start=10)
folium.TileLayer(
    tiles=EE_TILES.format(**mapid),
    attr='Google Earth Engine',
    overlay=True,
    name='median composite',
  ).add_to(Map)

for band in bands[1]:
    mapid = predictions.getMapId({'bands': [band], 'min': 0, 'max': 1})
    
    folium.TileLayer(
        tiles=EE_TILES.format(**mapid),
        attr='Google Earth Engine',
        overlay=True,
        name=band,
      ).add_to(Map)


if model_output == 'segmentation':
    mapid = predictions.getMapId({'bands': ['categories'], 'min': 1, 'max': len(bands[1])})
    
    folium.TileLayer(
        tiles=EE_TILES.format(**mapid),
        attr='Google Earth Engine',
        overlay=True,
        name='categories',
      ).add_to(Map)
    
Map.add_child(folium.LayerControl())
Map