# **Deep Learning with SkyDL**
**Setup software libraries**

In [1]:
from SkyDL import skydl
import env

## **Training of Deep Learning models**
**Create `Trainer` object**

In [2]:
SkyTrainer = skydl.Trainer(privatekey_path = env.privatekey_path)

**Get token**

In [3]:
SkyTrainer.get_token(email='iker.sanchez@vizzuality.com')

Skydipper login password: ·········


## Database
**Retrive table names of the database**

In [4]:
SkyTrainer.table_names

['image', 'model', 'dataset', 'model_versions']

**Retrieve table from database**

In [5]:
SkyTrainer.datasets

Unnamed: 0,slug,name,bands,rgb_bands,provider
0,Sentinel-2-Top-of-Atmosphere-Reflectance,Sentinel 2 Top-of-Atmosphere Reflectance,"['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8...","['B4', 'B3', 'B2']",gee
1,Landsat-7-Surface-Reflectance,Landsat 7 Surface Reflectance,"['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'nd...","['B3', 'B2', 'B1']",gee
2,Landsat-8-Surface-Reflectance,Landsat 8 Surface Reflectance,"['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B1...","['B4', 'B3', 'B2']",gee
3,USDA-NASS-Cropland-Data-Layers,USDA NASS Cropland Data Layers,"['landcover', 'cropland', 'land', 'water', 'ur...",['landcover'],gee
4,USGS-National-Land-Cover-Database,USGS National Land Cover Database,['impervious'],['impervious'],gee
5,Lake-Water-Quality-100m,Lake Water Quality 100m,['turbidity_blended_mean'],['turbidity_blended_mean'],gee


## Skydipper datasets for Deep Learning

In [None]:
#SkyTrainer.datasets_api 

## Image composites

In [6]:
SkyTrainer.composite(slugs=['Sentinel-2-Top-of-Atmosphere-Reflectance', 'Lake-Water-Quality-100m'],\
              init_date = '2019-01-21', end_date = '2019-01-31', zoom=6)

## Creation of Geostore
We select the areas from which we will export the training, validation and testing data.

In [7]:
train_atts = {"type":"FeatureCollection",
              "features":[
                  {"type":"Feature",
                   "properties":{},
                   "geometry":{
                       "type":"Polygon",
                       "coordinates":[[[-0.406494140625,38.64476310916202],
                                       [0.28,38.64476310916202],
                                       [0.28,39.74521015328692],
                                       [-0.406494140625,39.74521015328692],
                                       [-0.406494140625,38.64476310916202]]]
                   }
                  },
                  {"type":"Feature",
                   "properties":{},
                   "geometry":{
                       "type":"Polygon",
                       "coordinates":[[[-1.70013427734375,35.15135442846945],
                                       [-0.703125,35.15135442846945],
                                       [-0.703125,35.94688293218141],
                                       [-1.70013427734375,35.94688293218141],
                                       [-1.70013427734375,35.15135442846945]]]
                   }
                  }
              ]
             }

valid_atts = None
test_atts = None

In [8]:
SkyTrainer.create_geostore_from_geojson(attributes=[train_atts, valid_atts, test_atts])

Number of training polygons: 2


Check geostore object

In [9]:
SkyTrainer.geostore

{'geojson': {'type': 'FeatureCollection',
  'features': [{'type': 'Feature',
    'properties': {'name': 'training'},
    'geometry': {'type': 'MultiPolygon',
     'coordinates': [[[[-0.406494140625, 38.64476310916202],
        [0.28, 38.64476310916202],
        [0.28, 39.74521015328692],
        [-0.406494140625, 39.74521015328692],
        [-0.406494140625, 38.64476310916202]]],
      [[[-1.70013427734375, 35.15135442846945],
        [-0.703125, 35.15135442846945],
        [-0.703125, 35.94688293218141],
        [-1.70013427734375, 35.94688293218141],
        [-1.70013427734375, 35.15135442846945]]]]}}]}}

In [10]:
SkyTrainer.nPolygons

{'training': 2}

Check geostore object on a server and display it on map

In [None]:
#SkyTrainer.multipolygon

In [None]:
#SkyTrainer.multipolygon.map()

## Data pre-processing
We normalize the composite images to have values from 0 to 1.

In [11]:
SkyTrainer.normalize_images(scale=100, norm_type='geostore')

### Select input/output bands

In [12]:
SkyTrainer.select_bands(input_bands = ['B2','B3','B4','B5','ndvi','ndwi'],\
                 output_bands = ['turbidity_blended_mean'])

In [13]:
SkyTrainer.images

Unnamed: 0,dataset_id,bands_selections,scale,init_date,end_date,bands_min_max,norm_type,geostore_id
0,0,"['B2', 'B3', 'B4', 'B5', 'ndvi', 'ndwi']",100.0,2019-01-21,2019-01-31,"{""B11_max"": 10857.5, ""B11_min"": 7.0, ""B12_max""...",geostore,cc1d5d3dee2c8a6e9a4d10f74f7747a9
1,5,['turbidity_blended_mean'],100.0,2019-01-21,2019-01-31,{},geostore,cc1d5d3dee2c8a6e9a4d10f74f7747a9
2,0,"['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8...",30.0,2016-01-01,2016-12-31,"{""B11_max"": 9660.0, ""B11_min"": 1.0, ""B12_max"":...",global,
3,3,"['cropland', 'land', 'water', 'urban']",30.0,2016-01-01,2016-12-31,{},global,


In [14]:
SkyTrainer.versions

Unnamed: 0,model_id,model_architecture,input_image_id,output_image_id,geostore_id,kernel_size,sample_size,training_params,version,data_status,training_status,eeified,deployed
0,0,sequential1,0,1,cc1d5d3dee2c8a6e9a4d10f74f7747a9,1,20000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582126498,COMPLETED,SUCCEEDED,True,False
1,0,sequential1,0,1,cc1d5d3dee2c8a6e9a4d10f74f7747a9,1,20000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582128515,COMPLETED,SUCCEEDED,True,False
2,0,sequential1,0,1,cc1d5d3dee2c8a6e9a4d10f74f7747a9,1,20000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582131333,COMPLETED,SUCCEEDED,True,True
3,1,segnet,2,3,f75559fb87f5c22deb56eb2a73aa4e12,256,1000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582197697,COMPLETED,SUCCEEDED,True,False
4,1,segnet,2,3,f75559fb87f5c22deb56eb2a73aa4e12,256,1000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582214196,COMPLETED,SUCCEEDED,True,True


## Create TFRecords for training

In [15]:
SkyTrainer.export_TFRecords(sample_size = 20000, kernel_size = 1)

## Training the model in AI Platform

In [24]:
SkyTrainer.train_model_ai_platform(model_type='MLP', model_output='regression', model_architecture='sequential1',\
                                   model_name='water_quality', batch_size=2, epochs=25)

Model already exists with name: water_quality.
And description .
Create new version
Creating training job: job_v1583404103
{'jobId': 'job_v1583404103', 'trainingInput': {'scaleTier': 'CUSTOM', 'masterType': 'large_model_v100', 'packageUris': ['gs://geo-ai/Train/trainer-0.2.tar.gz'], 'pythonModule': 'trainer.task', 'args': ['--params-file', 'Models/0/1583404103/training_params.json'], 'region': 'us-central1', 'runtimeVersion': '1.15', 'jobDir': 'gs://geo-ai/Models/0/1583404103/', 'pythonVersion': '3.7'}, 'createTime': '2020-03-05T10:28:26Z', 'state': 'QUEUED', 'trainingOutput': {}, 'etag': 'i5dsk9VCkJI='}
Current training status: PREPARING
Current training status: RUNNING
Current training status: RUNNING
Current training status: RUNNING
Current training status: SUCCEEDED
TensorBoard.dev url


In [26]:
SkyTrainer.versions

'{"bucket": "geo-ai", "base_names": ["training_pixels", "validation_pixels", "test_pixels"], "data_dir": "gs://geo-ai/Data/0_1/cc1d5d3dee2c8a6e9a4d10f74f7747a9/1/20000", "in_bands": ["B2", "B3", "B4", "B5", "ndvi", "ndwi"], "out_bands": ["turbidity_blended_mean"], "kernel_size": 1, "training_size": 6993, "validation_size": 2312, "test_size": 2373, "model_type": "MLP", "model_output": "regression", "model_architecture": "sequential1", "output_activation": "", "batch_size": 2, "epochs": 25, "shuffle_size": 2000, "learning_rate": 0.001, "loss": "mse", "metrics": ["mse"], "job_dir": "gs://geo-ai/Models/0/1583404103/", "tb_url": "https://tensorboard.dev/experiment/08RVP1GQQ2a6VGvf3yLIGg/"}'

In [32]:
SkyTrainer.versions

Unnamed: 0,model_id,model_architecture,input_image_id,output_image_id,geostore_id,kernel_size,sample_size,training_params,version,data_status,training_status,eeified,deployed
0,0,sequential1,0,1,cc1d5d3dee2c8a6e9a4d10f74f7747a9,1,20000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582126498,COMPLETED,SUCCEEDED,True,False
1,0,sequential1,0,1,cc1d5d3dee2c8a6e9a4d10f74f7747a9,1,20000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582128515,COMPLETED,SUCCEEDED,True,False
2,0,sequential1,0,1,cc1d5d3dee2c8a6e9a4d10f74f7747a9,1,20000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582131333,COMPLETED,SUCCEEDED,True,True
3,1,segnet,2,3,f75559fb87f5c22deb56eb2a73aa4e12,256,1000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582197697,COMPLETED,SUCCEEDED,True,False
4,1,segnet,2,3,f75559fb87f5c22deb56eb2a73aa4e12,256,1000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582214196,COMPLETED,SUCCEEDED,True,True
5,0,sequential1,0,1,cc1d5d3dee2c8a6e9a4d10f74f7747a9,1,20000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1583404103,COMPLETED,SUCCEEDED,False,False


## Deployed the model to AI Platform

Before it's possible to get predictions from the trained model, it needs to be deployed on AI Platform.  The first step is to create the model.  The second step is to create a version.  See [this guide](https://cloud.google.com/ml-engine/docs/tensorflow/deploying-models) for details.  Note that models and versions can be monitored from the [AI Platform models page](http://console.cloud.google.com/ai-platform/models) of the Cloud Console. 

In [None]:
SkyTrainer.deploy_model_ai_platform()

In [None]:
SkyTrainer.versions

***
## **Validation of Deep Learning models**

**Create `Validator` object**

In [34]:
SkyValidator = skydl.Validator()

In [35]:
SkyValidator.versions

Unnamed: 0,model_id,model_architecture,input_image_id,output_image_id,geostore_id,kernel_size,sample_size,training_params,version,data_status,training_status,eeified,deployed
0,0,sequential1,0,1,cc1d5d3dee2c8a6e9a4d10f74f7747a9,1,20000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582126498,COMPLETED,SUCCEEDED,True,False
1,0,sequential1,0,1,cc1d5d3dee2c8a6e9a4d10f74f7747a9,1,20000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582128515,COMPLETED,SUCCEEDED,True,False
2,0,sequential1,0,1,cc1d5d3dee2c8a6e9a4d10f74f7747a9,1,20000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582131333,COMPLETED,SUCCEEDED,True,True
3,1,segnet,2,3,f75559fb87f5c22deb56eb2a73aa4e12,256,1000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582197697,COMPLETED,SUCCEEDED,True,False
4,1,segnet,2,3,f75559fb87f5c22deb56eb2a73aa4e12,256,1000,"{""bucket"": ""geo-ai"", ""base_names"": [""training_...",1582214196,COMPLETED,SUCCEEDED,True,True


### Select model

In [36]:
SkyValidator.models

Unnamed: 0,model_name,model_type,model_output,model_description,output_image_id
0,water_quality,MLP,regression,,1
1,land_use_4,CNN,segmentation,This model segmentates the image into 4 differ...,3


In [38]:
versions = SkyValidator.select_model(model_name='water_quality')

The water_quality model has the following versions: [1582126498, 1582128515, 1582131333]


### Select version

In [39]:
SkyValidator.select_version(version=versions[2])

Selected version name: v1582131333
Datasets:  ['Sentinel-2-Top-of-Atmosphere-Reflectance', 'Lake-Water-Quality-100m']
Bands:  [['B2', 'B3', 'B4', 'B5', 'ndvi', 'ndwi'], ['turbidity_blended_mean']]
scale:  100.0
init_date:  2019-01-21
end_date:  2019-01-31


### Inspect the training process
We use [TensorBoard](https://www.tensorflow.org/tensorboard/get_started) to inspect the training process.TensorBoard is a tool for providing the measurements and visualizations needed during the machine learning workflow. It enables tracking experiment metrics like loss and accuracy, visualizing the model graph, projecting embeddings to a lower dimensional space, and much more.

A brief overview of the dashboards shown (tabs in bottom navigation bar):

- The **Scalars** dashboard shows how the loss and metrics change with every epoch. You can use it to also track training speed, learning rate, and other scalar values.
- The **Graphs** dashboard helps you visualize your model. In this case, the Keras graph of layers is shown which can help you ensure it is built correctly.
- The **Distributions** and **Histograms** dashboards show the distribution of a Tensor over time. This can be useful to visualize weights and biases and verify that they are changing in an expected way.

In [40]:
SkyValidator.inspect_training_process()

### Evaluate the model on test data

In [None]:
results = SkyValidator.evaluate_model()

***
## **Prediction of Deep Learning models**
**Create `Predictor` object**

In [41]:
SkyPredictor = skydl.Predictor()

**Get token**

In [42]:
SkyPredictor.get_token(email='iker.sanchez@vizzuality.com')

Skydipper login password: ·········


### Select model

In [43]:
SkyPredictor.models

Unnamed: 0,model_name,model_type,model_output,model_description,output_image_id
0,water_quality,MLP,regression,,1
1,land_use_4,CNN,segmentation,This model segmentates the image into 4 differ...,3


In [44]:
versions = SkyPredictor.select_model(model_name='water_quality')

The water_quality model has the following versions: [1582126498, 1582128515, 1582131333]


### Select version

In [45]:
SkyPredictor.select_version(version=versions[2])

Selected version name: v1582131333
Datasets:  ['Sentinel-2-Top-of-Atmosphere-Reflectance', 'Lake-Water-Quality-100m']
Bands:  [['B2', 'B3', 'B4', 'B5', 'ndvi', 'ndwi'], ['turbidity_blended_mean']]
scale:  100.0
init_date:  2019-01-21
end_date:  2019-01-31


## Creation of Geostore

We select the areas in which we will perform the prediction.

In [46]:
atts={'geojson': {'type': 'FeatureCollection',
  'features': [{'type': 'Feature',
    'properties': {},
    'geometry': {'type': 'Polygon',
     'coordinates': [[[0.5548095703125, 40.496048060627885],
                      [0.9558105468749999,40.496048060627885],
                      [0.9558105468749999,40.83667117059108],
                      [0.5548095703125,40.83667117059108],
                      [0.5548095703125,40.496048060627885]]]}}]}}

atts={'geojson': {'type': 'FeatureCollection',
  'features': [{'type': 'Feature',
    'properties': {},
    'geometry': {'type': 'Polygon',
     'coordinates': [[[-6.56982421875,36.52950186333475],
                      [-6.208648681640625,36.52950186333475],
                      [-6.208648681640625,36.81917959744268],
                      [-6.56982421875,36.81917959744268],
                      [-6.56982421875,36.52950186333475]]]}}]}}

In [47]:
SkyPredictor.create_geostore_from_geojson(atts)

## Predicting in AI Platform

In [48]:
SkyPredictor.predict_ai_platform(init_date='2019-04-11', end_date='2019-04-21', min=0, max=3)