<a href="https://colab.research.google.com/github/Max-FM/seagrass/blob/master/notebooks/predict_seagrass_lizard_island.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Predicting Lizard Island Seagrass using Banq d'Arguin trained machine learning model

Note: If you are evaluating multiple machine learning models in a single session you'll need to restart the runtime between each run.

## Preparing prediction data

### Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Install `seagrass` package

In [None]:
%%capture

!pip install "seagrass[modulos] @ git+https://github.com/Max-FM/seagrass.git@feature/unit_tests"

### Open Sentinel 2 and bathymetry GeoTIFFs

In [None]:
from seagrass.raster import open_sentinel2_image

In [None]:
s2_filepath = '/content/drive/MyDrive/Bathymetry/LizardIsland_s2cldmdn.tif'
s2 = open_sentinel2_image(s2_filepath)

### Plot Sentinel 2 image

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from rasterio.plot import show

In [None]:
def normalize(array):
    """Normalizes numpy arrays into scale 0.0 - 1.0"""
    array_min, array_max = array.min(), array.max()
    return ((array - array_min)/(array_max - array_min))

def make_composite(band_1, band_2, band_3):
    """Converts three raster bands into a composite image"""
    return normalize(np.dstack((band_1, band_2, band_3)))

def make_s2_rgb(s2_raster):
    red_deglint = s2_raster[2]
    green_deglint = s2_raster[1]
    blue_deglint = s2_raster[0]

    return make_composite(red_deglint, green_deglint, blue_deglint)

In [None]:
rgb = make_s2_rgb(s2)

plt.figure(figsize=(15,15))
plt.imshow(rgb)


rgb.shape

### Create prediction features

In [None]:
from seagrass.prepare import return_features
from seagrass.utils import save_prediction_features

In [None]:
%%time

# prediction_features = return_features(s2.values, bands=[3,4,5,6,7,8,9])
prediction_features = return_features(s2.values, bands=[3,4,5,9])

In [None]:
display(prediction_features.shape, prediction_features)

### Save to Modulos compatible tar file

In [None]:
from os.path import exists

In [None]:
# cols = ['b', 'g', 'r', 're1', 're2', 're3', 'nir']
cols = ['b', 'g', 'r', 'nir']


prediction_tar_filepath ='/content/seagrass_prediction.tar'

if not exists(prediction_tar_filepath):
    save_prediction_features(prediction_tar_filepath, prediction_features, column_labels=cols)

## Generate predictions with Modulos Auto ML

### Copy over ML model

In [None]:
models_dir = '/content/drive/Shareddrives/1_Satellite_Derived_Bathymetry & coastal veg/Banc dArguin bathymetry & seagrass/seagrass_ml_models'

# image_id = '000012'
# timestamp = '23-07-2021'

# model_id = '702ffbf9fce54ed0a6975a3bf4aef201'
# model_id = '5634a3781d7a4534a52d8c751f13ae71'
# model_id = 'b371174dfd2d45638de13376c3d05b9e'

image_id = '000011'
# timestamp = '28-07-2021'

# model_id = '01a19ee91fe945ec97b53bd58e02b909'

timestamp = '2021-08-05'
# model_id = '44b6b458bfeb47afa7a2ca203ef077aa'
# model_id = 'e81e16008f6e4fdcae387a365dd6e2bc'
# model_id = '6742bb4cb1754aee887acb2ee1c13a5c'
model_id = '4b8063a950134e52bdd991bb0226dd18'

tar_filepath = f'{models_dir}/{image_id}_{timestamp}/{model_id}.tar'

tar_filepath

In [None]:
!tar -xf "{tar_filepath}" -C "/content"

### Change directory to model folder

In [None]:
%cd "/content/{model_id}"

### Display solution info

In [None]:
from IPython.display import HTML
HTML(filename=f'README.html')

### Making predictions using trained Modulos ML model

In [None]:
import batch_client as bc 
import os
import shutil

### Set path variables

* **path_to_tar:** Path to the tar file. The tar file has to contain the same data and has to be packed in the same way as the dataset that was uploaded to Modulos.
* **path_to_tmp:** Path to temporary data folder.

In [None]:
path_to_tar = "/content/seagrass_prediction.tar"
path_to_tmp = os.path.join(bc.DEFAULT_OUTPUT_DIR,"tmp_data_dir")

### Run the batch client 

Remove temporary files from previous run:

In [None]:
if os.path.exists(path_to_tmp):
    shutil.rmtree(path_to_tmp)

The batch client takes a `.tar` file as input and outputs predictions in same format as training labels have been. It does so by performing the following steps:
* Creating a temporary directory to save and store intermediate calculations.
* Converting the `.tar` data set into an internal format (HDF5 file). Saved in `path_to_hdf5_data`
* Running the feature extractor on the data.
* Running the model to get predictions.
* Saving the predictions into the same format as training labels have been when training on the platform.

In [None]:
%%time

bc.main(
    dataset_path=path_to_tar, 
    output_dir_user="", 
    verbose=True,
    keep_tmp=True
)

### Look at the predictions

In [None]:
from modulos_utils.solution_utils import jupyter_utils as ju

displayer = ju.JupyterDisplayer.construct(base_dir=bc.FILE_DIR)
HTML(displayer.show())

### Convert predictions to raster image

In [None]:
import pandas as pd

df = pd.read_csv(f'/content/{model_id}/output_batch_client/predictions/predictions.csv')

predictions = df['predictions'].values.reshape(s2.shape[1], s2.shape[2])

In [None]:
#  Normalised Difference Water Index (NDWI)
def ndwi(s2):
    green = s2[4]
    nir = s2[9]
    return (green - nir) / (green + nir)

#  Creates a land pixel mask using the NDWI as a threshold.
def land_mask(s2):
    return ndwi(s2).values < -0.1 

In [None]:
predictions[land_mask(s2)] = 0
rgb[land_mask(s2)] = 0

In [None]:
def make_seagrass_map(predictions):
    zeros = np.zeros([s2.shape[1], s2.shape[2], 1])
    return np.dstack([predictions, predictions, zeros])

In [None]:
plt.figure(figsize=(15,15))
plt.imshow(rgb)
plt.figure(figsize=(15,15))
plt.imshow(predictions)
plt.figure(figsize=(15,15))
plt.imshow(rgb + make_seagrass_map(predictions))


In [None]:
import rasterio

In [None]:
def save_predictions_tiff(filepath, predictions, sentinel2_crs, sentinel2_transform):
    with rasterio.open(
        filepath,
        'w',
        driver='GTiff',
        height=predictions.shape[0],
        width=predictions.shape[1],
        count=1,
        dtype=predictions.dtype,
        crs=sentinel2_crs,
        transform=sentinel2_transform,
    ) as output_raster:
        output_raster.write(predictions, 1)

In [None]:
prediction_tif_dir = f'/content/drive/Shareddrives/1_Satellite_Derived_Bathymetry & coastal veg/Lizard Island bathymetry & seagrass/seagrass_predictions/banc_d_arguin_train/{image_id}_{timestamp}'
# prediction_tif_dir = '/content'

In [None]:
%mkdir "{prediction_tif_dir}"

In [None]:
filepath = f'{prediction_tif_dir}/lizard_island_seagrass_prediction_{model_id}.tif'

filepath

In [None]:
# save_predictions_tiff(filepath, predictions, s2.rio.crs, s2.rio.transform())