# Masking PlanetScope by UDM pixel quality and mosaic 

**Author:** Robbi Bishop-Taylor, Geoscience Australia

---


This takes raw PlanetScope observations and masks each observation by the included Unusable Data Mask (UDM) to remove clouds and nodata. These masked layers are then mosaiced to produce a single combined dataset for each day.

In [1]:
from os import environ
import os
import sys
from tqdm import tqdm
import zipfile
import json
import requests
import time
from requests.auth import HTTPBasicAuth
import pandas as pd
import glob
from osgeo import gdal
from matplotlib import pyplot as plt
from skimage.morphology import dilation
from skimage.morphology import square
import pickle
import xarray as xr

# Set area of interest
geo_json_geometry = {
        "type": "Polygon",
        "coordinates": [
          [
            [
              150.03865242004395,
              -29.479654449250273
            ],
            [
              150.0849151611328,
              -29.479654449250273
            ],
            [
              150.0849151611328,
              -29.430926458062313
            ],
            [
              150.03865242004395,
              -29.430926458062313
            ],
            [
              150.03865242004395,
              -29.479654449250273
            ]
          ]
        ]
      }


## Mask PlanetScope by UDM
For each surface reflectance observation, we take the UDM layer and mask out bad values (e.g. clouds, nodata pixels).

In [3]:
for in_data in glob.glob('data/planetscope/*/*_AnalyticMS_SR_clip.tif'):    
 
    # For each SR_clip file, get name of corresponding UDM pixel quality layer by subsituting string
    in_udm = in_data.replace('_SR', '_DN_udm')
    
    # Create name for output masked file
    out_masked = 'data/planetscope/masked/{}'.format(os.path.basename(in_data.replace('_SR', '_masked')))
    
    # Test if file exists; if it doesn't, proceed with masking
    if not os.path.isfile(out_masked):

        # Import raw data layer
        print(in_data)
        data_ds = gdal.Open(in_data)
        geo_transform = data_ds.GetGeoTransform()
        projection = data_ds.GetProjection()

        # Import UDM pixel quality band
        udm_ds = gdal.Open(in_udm)
        udm_array = udm_ds.GetRasterBand(1).ReadAsArray() 

        # Dilate areas of cloud by one cell as recommended by Planet manual
        udm_dilated = dilation(udm_array > 0, square(3))
        udm_dilated

        # Set up driver for writing output
        driver = gdal.GetDriverByName('GTiff')
        dtype=gdal.GDT_UInt16

        # Create raster of given size and projection
        rows, cols = udm_array.shape
        dataset = driver.Create(out_masked, cols, rows, 4, dtype)
        dataset.SetGeoTransform(geo_transform)
        dataset.SetProjection(projection)

        # Iterate through each band and write to file
        for i in [1, 2, 3, 4]:

            # Read in data
            data_array = data_ds.GetRasterBand(i).ReadAsArray() 

            # Set cells with UDM values to 0
            data_array[udm_dilated] = 0

            # Write data to array and set nodata values
            band = dataset.GetRasterBand(i)
            band.WriteArray(data_array)
            band.SetNoDataValue(0)

        # Close file
        dataset = None
        data_ds = None
        udm_ds = None

## Mosaic masked PlanetScope for each date
There can be multiple PlanetScope observations for each date due to the small 25 x 25 km tiles used by Planet. Here we mosaic all observations for a single date into one dataset per day.

In [4]:
# Extract date from file paths
tile_paths = glob.glob('data/planetscope/masked/*_AnalyticMS_masked_clip.tif')
date_keys = [os.path.basename(i)[0:8] for i in tile_paths]

# Get min and max coords from geojson to ensure consistent mosaic extent
x_coords = [i[0] for i in geo_json_geometry['coordinates'][0]]
y_coords = [i[1] for i in geo_json_geometry['coordinates'][0]]

# Create mosaic for each date
for unique_date in set(date_keys):
    
    try:
    
        # Set parameters for mosaicing      
        in_data = 'data/planetscope/masked/*{}*_AnalyticMS_masked_clip.tif'.format(unique_date)       
        out_data = 'data/planetscope/mosaics/{}_planetscope.tif'.format(unique_date)        
        extent = '{} {} {} {}'.format(min(x_coords), min(y_coords), max(x_coords), max(y_coords))
        
        if not os.path.isfile(out_data):

            # Mosaic individual datasets into one using gdalwarp
            !gdalwarp -t_srs 'EPSG:32756' -tr 3.0 -3.0 -te $extent -te_srs 'EPSG:4326' -tap -overwrite $in_data $out_data
    
    except:
        
        print('Failed to mosaic files for {}'.format(unique_date))