# Preprocessing of rapideye: Cloud masking with reference

In [1]:
import os
import re
import datetime
from glob import glob
from glob import iglob
import xml.etree.ElementTree as ET
from matplotlib.pyplot import imshow
import numpy as np
from scipy import ndimage
import shutil

import rasterio

In [2]:
FMASK_CLOUD = 4
FMASK_CLOUD_SHADOW = 2
FMASK_OUTSIDE = 255

In [3]:
ns = 'http://www.opengis.net/gml'
ns2 = 'http://schemas.rapideye.de/products/productMetadataGeocorrected'
ns3 = 'http://earth.esa.int/eop'
ns4 = 'http://earth.esa.int/opt'

# Auxiliary functions

### Difference image

In [4]:
def create_reference_array(bands, rows, columns, images_references_paths):  
    '''
    This method creates a reference image by calculating the
    median of a set of images pixelwise.
    '''
    number_of_images = len(images_references_paths)
    my_array = np.empty((number_of_images, bands, rows, columns))
    for index in range(number_of_images):
        with rasterio.open(images_references_paths[index]) as src:
            data = src.read()
        my_array[index] = data.astype(np.float)
    medians = np.empty((bands, rows, columns))
    for band in range(bands):
        medians[band] = np.median(my_array[:, band, :, :], axis=0)
    return medians

In [5]:
def calculate_difference_from_reference(image_array, images_references_paths,bands, rows, columns):
    '''
    This method calculates the difference between an array and an array created
    to get a reference.
    '''
    return image_array - create_reference_array(bands, rows, columns, images_references_paths)

## Clouds and shadows

In [6]:
def filter_median(input_image_raster, filter_size):
    '''
    Median filtering of raster
    '''
    return ndimage.median_filter(input_image_raster, filter_size)

In [7]:
def morph_dilation(input_image_raster, filter_size):
    '''
    Morphological dilation of raster
    '''
    ndim = 3
    if input_image_raster.ndim == 2:
        input_image_raster = np.expand_dims(input_image_raster, axis=0)
        ndim = 2
    if input_image_raster.ndim != 3:
        raise Exception("Input array has to be 3D")
    if ndim == 3:
        return ndimage.grey_dilation(input_image_raster, (1, filter_size, filter_size))
    else:
        return ndimage.grey_dilation(input_image_raster, (1, filter_size, filter_size))[0]

In [34]:
def cloud_mask_array(image_difference_array, threshold=30000, filter_size=13, morphing_size=0): 
    '''
    This method returns a mask for the given array, it stacks all the bands into
    one and filters values that match the threshold. The new array will be filled
    with 0 when the values are below the threshold and 1 when the values are above
    the threshold. We assume that clouds will have brighter values when compared
    to the reference array.
    '''
    clouds = filter_median((np.sum(image_difference_array, axis=0) > threshold).astype(np.int), filter_size)
    if morphing_size:
        clouds = morph_dilation(clouds, morphing_size)
    return clouds

In [35]:
def shadow_mask_array(image_difference_array, threshold=-5500, filter_size=13, morphing_size=0):  
    '''
    This method returns a mask for the given array, it stacks all the bands into
    one and filters values that match the threshold. The new array will be filled
    with 0 when the values are above the threshold and 1 when the values are bellow
    the threshold. We assume that shadows will have darker values when compared to
    the reference array.
    '''
    shadows = filter_median((np.sum(image_difference_array[3:, :, :], axis=0) < threshold).astype(np.int), filter_size)
    if morphing_size:
        shadows = morph_dilation(shadows, morphing_size)
    return shadows

In [36]:
#def calculate_cloud_shadow(clouds, shadows,
def calculate_cloud_shadow(clouds,
                           solar_zenith, solar_azimuth, resolution, spacecraft_view_angle=None):
    '''
    This method iterates over a list of different cloud heights, and calculates
    the shadow that the clouds in the given mask project. This projections are
    then intersected with the shadow mask that we already have.
    '''
    cloud_row_column = np.column_stack(np.where(clouds == 1))
    cloud_heights = np.arange(500, 15200, 100) #(1000, 3100, 100)
    cloud_mask_shape = clouds.shape
    clouds_projection = np.zeros(cloud_mask_shape)
    if spacecraft_view_angle:
        print('using spacecraft view angle')
    else:
        print('using solar azimuth angle')
    for cloud_height in cloud_heights:    
        distance = cloud_height / resolution * np.tan(np.deg2rad(90 - solar_zenith))
        x_difference = distance * np.sin(np.deg2rad(360 - solar_azimuth))
        y_difference = distance * np.cos(np.deg2rad(360 - solar_azimuth))   
        if spacecraft_view_angle:
            if spacecraft_view_angle > 0:
                rows = cloud_row_column[:, 0] - y_difference   / resolution
                cols = cloud_row_column[:, 1] - x_difference   / resolution
            else:
                rows = cloud_row_column[:, 0] + y_difference   / resolution
                cols = cloud_row_column[:, 1] + x_difference   / resolution
        else:
            if solar_azimuth < 180:
                rows = cloud_row_column[:, 0] - y_difference   / resolution
                cols = cloud_row_column[:, 1] - x_difference   / resolution
            else:
                rows = cloud_row_column[:, 0] + y_difference   / resolution
                cols = cloud_row_column[:, 1] + x_difference   / resolution
                
        rows = rows.astype(np.int)
        cols = cols.astype(np.int)
        np.putmask(rows, rows < 0, 0)
        np.putmask(cols, cols < 0, 0)
        np.putmask(rows, rows >= cloud_mask_shape[0] - 1, cloud_mask_shape[0] - 1)
        np.putmask(cols, cols >= cloud_mask_shape[1] - 1, cloud_mask_shape[1] - 1)
        clouds_projection[rows, cols] = 1  
#    in_between = shadows * clouds_projection
    in_between = clouds_projection
    return in_between

In [37]:
def outside_mask_array(image_array, no_data_value=0, outside_value=FMASK_OUTSIDE):
    '''
    This method creates a mask for the values outside the image. We assume that
    a no data pixel will have 0 value in every band so when we add all the bands
    together, we mask values with 0 value in them.
    '''
    mask_array = np.zeros((image_array.shape[1], image_array.shape[2]))
    sum_of_bands_array = np.sum(image_array, axis=0) == 0
    np.putmask(mask_array, sum_of_bands_array, outside_value)
    return mask_array

In [38]:
def cloud_shadow_mask_array(image_array, image_difference_array, 
                            solar_zenith, solar_azimuth, 
                            resolution, cloud_threshold, shadow_threshold,
                            spacecraft_view_angle=None):
    '''
    This method creates a mask for clouds and shadows using a reference array.
    '''
    clouds = cloud_mask_array(image_difference_array,threshold=cloud_threshold)
#    shadows = shadow_mask_array(image_difference_array, threshold=shadow_threshold)
#    inbetween = calculate_cloud_shadow(clouds, shadows,
    inbetween = calculate_cloud_shadow(clouds,
                                       solar_zenith, solar_azimuth, resolution,
                                       spacecraft_view_angle)
    
    image_mask_array = outside_mask_array(image_array, outside_value=FMASK_OUTSIDE)
    

    np.putmask(image_mask_array, inbetween == 1, FMASK_CLOUD_SHADOW)
    np.putmask(image_mask_array, clouds == 1, FMASK_CLOUD)
    np.putmask(image_mask_array, np.sum(image_array, axis=0) == 0, FMASK_OUTSIDE)
    return image_mask_array

In [39]:
def script_cloud_shadow(path, path_result, cloud_threshold, shadow_threshold,angle):
    xml_file_list = glob(os.path.join(path, '*.xml'))
    pattern = re.compile(r'.*[0-9]{4}-[0-9]{2}-[0-9]{2}.*_RE(1|2|3|4|5)_3A.*_metadata\.xml')
    xml_file_list = [x for x in xml_file_list if pattern.search(x)]
    if len(xml_file_list) != 1:
        raise ValueError('Could not identify a unique xml metadata file')
    xml_file = xml_file_list[0]
    print('metadata file')
    print(xml_file)
    root = ET.parse(xml_file).getroot()
    solar_zenith = float(root.find('ns:using/ns3:EarthObservationEquipment/ns3:acquisitionParameters/ns2:Acquisition/ns4:illuminationElevationAngle',
                               namespaces={'ns': ns, 'ns2': ns2, 'ns3':ns3, 'ns4':ns4}).text)
    solar_azimuth = float(root.find('ns:using/ns3:EarthObservationEquipment/ns3:acquisitionParameters/ns2:Acquisition/ns4:illuminationAzimuthAngle',
                               namespaces={'ns': ns, 'ns2': ns2, 'ns3':ns3, 'ns4':ns4}).text)
    if angle == 'space_craft':
        spacecraft_view_angle = float(root.find('ns:using/ns3:EarthObservationEquipment/ns3:acquisitionParameters/ns2:Acquisition/ns4:spaceCraftViewAngle',
                                                namespaces={'ns': ns, 'ns2': ns2, 'ns3':ns3, 'ns4':ns2}).text)
    else:
        spacecraft_view_angle = None
    rows = int(root.find('ns:resultOf/ns2:EarthObservationResult/ns3:product/ns2:ProductInformation/ns2:numRows',
                       namespaces={'ns':ns, 'ns2':ns2, 'ns3':ns3}).text)
    columns = int(root.find('ns:resultOf/ns2:EarthObservationResult/ns3:product/ns2:ProductInformation/ns2:numColumns',
                          namespaces={'ns':ns, 'ns2':ns2, 'ns3':ns3}).text)
    bands = int(root.find('ns:resultOf/ns2:EarthObservationResult/ns3:product/ns2:ProductInformation/ns2:numBands',
                      namespaces={'ns':ns, 'ns2':ns2, 'ns3':ns3}).text)
    tile_id = root.find('ns:metaDataProperty/ns2:EarthObservationMetaData/ns2:tileId',
                        namespaces={'ns':ns,'ns2':ns2}).text
    print('metadata')
    print((solar_zenith, solar_azimuth, spacecraft_view_angle, rows, columns, bands, tile_id))
    tif_file_list = glob(os.path.join(path, '*.tif'))
    pattern = re.compile(r'.*[0-9]{4}-[0-9]{2}-[0-9]{2}.*_RE(1|2|3|4|5)_3A.*[0-9]{6}\.tif$')
    tif_file_list = [x for x in tif_file_list if pattern.search(x)]
    if len(tif_file_list) != 1:
        raise ValueError('Could not identify a unique tif file')
    tif_file = tif_file_list[0]
    if not os.path.exists(path_result):
        os.makedirs(path_result)
    print('tif_file')
    print(tif_file)
    shutil.copy(tif_file,path_result)
    with rasterio.open(tif_file) as src:
        data = src.read()
        meta_src = src.meta.copy()
        
    resolution = list(meta_src['transform'])[0]
    path_tile_id = '/LUSTRE/MADMEX/eodata/rapideye/' + tile_id
    list_tif_files_tile_id = glob(os.path.join(path_tile_id, '*/*/l3a/*.tif'))
    list_tif_files_tile_id = [x for x in list_tif_files_tile_id if pattern.search(x) and x != tif_file]
    
    print('Beginning cloud masking of rapideye image')
    image_difference_array = calculate_difference_from_reference(data, list_tif_files_tile_id,bands, rows, columns)
    cloud_shadow_array = cloud_shadow_mask_array(data, image_difference_array, 
                                                 solar_zenith, solar_azimuth, resolution, 
                                                 cloud_threshold, shadow_threshold, spacecraft_view_angle)
    meta = meta_src
    meta.update(compress='lzw',
            count = 1,
            dtype=rasterio.float64)
    filename_result = os.path.splitext(os.path.basename(tif_file))[0] + '_clouds_with_reference_' + angle + '.tif'
    filename_path_result = path_result  + filename_result
    
    print(filename_path_result)
#    if not os.path.exists(path_result):
#        os.makedirs(path_result)
    
    with rasterio.open(filename_path_result, "w", **meta) as dst:
        dst.write(cloud_shadow_array,1)

# Definitions of variables and call to script

## Select a tile id and modify next variable

In [40]:
tile_id = '1448025'

## Select 'space_craft' or 'azimuth_angle' for next variable

In [41]:
#angle = 'space_craft'
angle = 'azimuth_angle'

In [42]:
path = '/LUSTRE/MADMEX/eodata/rapideye/' + tile_id + '/'
path_result_tile_id = os.path.expanduser(os.path.join("~/results/rapideye_preprocessing", datetime.date.today().strftime("%m-%d-%Y") 
                                                      + '/' + tile_id + '/'))

In [43]:
print(path,path_result_tile_id)

/LUSTRE/MADMEX/eodata/rapideye/1448025/ /home/madmex_user/results/rapideye_preprocessing/11-25-2019/1448025/


In [44]:
if not os.path.exists(path_result_tile_id):
    os.makedirs(path_result_tile_id)

In [20]:
for source_path in iglob(os.path.join(path, '*/*/l3a/')):
    path_date = source_path.split('/',6)[6]
    source_path = path + '%s' % (path_date)
    image_date = path_date.split('/')[1] + '/'
    cloud_threshold = 20000
    shadow_threshold = -3000
    path_result = path_result_tile_id + image_date
    print('source_path')
    print(source_path)
    script_cloud_shadow(source_path, path_result, cloud_threshold, shadow_threshold, angle)
    print('-------')


source_path
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-07-08/l3a/
metadata file
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-07-08/l3a/1448025_2016-07-08_RE4_3A_718223_metadata.xml
metadata
(76.94772, 74.04642, None, 5000, 5000, 5, '1448025')
tif_file
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-07-08/l3a/1448025_2016-07-08_RE4_3A_718223.tif
Beginning cloud masking of rapideye image
using solar azimuth angle
/home/madmex_user/results/rapideye_preprocessing/11-13-2019/1448025/2016-07-08/1448025_2016-07-08_RE4_3A_718223_clouds_with_reference_azimuth_angle.tif
-------
source_path
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-03-14/l3a/
metadata file
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-03-14/l3a/1448025_2016-03-14_RE2_3A_366622_metadata.xml
metadata
(65.83974, 151.3344, None, 5000, 5000, 5, '1448025')
tif_file
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-03-14/l3a/1448025_2016-03-14_RE2_3A_366622.tif
Beginning cloud masking of rapideye image
using solar az

In [33]:
for source_path in iglob(os.path.join(path, '*/*/l3a/')):
    path_date = source_path.split('/',6)[6]
    source_path = path + '%s' % (path_date)
    image_date = path_date.split('/')[1] + '/'
    cloud_threshold = 20000
    shadow_threshold = -3000
    path_result = path_result_tile_id + image_date
    print('source_path')
    print(source_path)
    print('path_result')
    print(path_result)
    script_cloud_shadow(source_path, path_result, cloud_threshold, shadow_threshold, angle)
    print('-------')


source_path
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-07-08/l3a/
path_result
/home/madmex_user/results/rapideye_preprocessing/11-22-2019/1448025/2016-07-08/
metadata file
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-07-08/l3a/1448025_2016-07-08_RE4_3A_718223_metadata.xml
metadata
(76.94772, 74.04642, -9.88361, 5000, 5000, 5, '1448025')
tif_file
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-07-08/l3a/1448025_2016-07-08_RE4_3A_718223.tif
Beginning cloud masking of rapideye image
using spacecraft view angle
/home/madmex_user/results/rapideye_preprocessing/11-22-2019/1448025/2016-07-08/1448025_2016-07-08_RE4_3A_718223_clouds_with_reference_space_craft.tif
-------
source_path
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-03-14/l3a/
path_result
/home/madmex_user/results/rapideye_preprocessing/11-22-2019/1448025/2016-03-14/
metadata file
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-03-14/l3a/1448025_2016-03-14_RE2_3A_366622_metadata.xml
metadata
(65.83974, 151.3344, -3.07

In [45]:
for source_path in iglob(os.path.join(path, '*/*/l3a/')):
    path_date = source_path.split('/',6)[6]
    source_path = path + '%s' % (path_date)
    image_date = path_date.split('/')[1] + '/'
    cloud_threshold = 20000
    shadow_threshold = -3000
    path_result = path_result_tile_id + image_date
    print('source_path')
    print(source_path)
    print('path_result')
    print(path_result)
    script_cloud_shadow(source_path, path_result, cloud_threshold, shadow_threshold, angle)
    print('-------')


source_path
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-07-08/l3a/
path_result
/home/madmex_user/results/rapideye_preprocessing/11-25-2019/1448025/2016-07-08/
metadata file
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-07-08/l3a/1448025_2016-07-08_RE4_3A_718223_metadata.xml
metadata
(76.94772, 74.04642, None, 5000, 5000, 5, '1448025')
tif_file
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-07-08/l3a/1448025_2016-07-08_RE4_3A_718223.tif
Beginning cloud masking of rapideye image
using solar azimuth angle
/home/madmex_user/results/rapideye_preprocessing/11-25-2019/1448025/2016-07-08/1448025_2016-07-08_RE4_3A_718223_clouds_with_reference_azimuth_angle.tif
-------
source_path
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-03-14/l3a/
path_result
/home/madmex_user/results/rapideye_preprocessing/11-25-2019/1448025/2016-03-14/
metadata file
/LUSTRE/MADMEX/eodata/rapideye/1448025/2016/2016-03-14/l3a/1448025_2016-03-14_RE2_3A_366622_metadata.xml
metadata
(65.83974, 151.3344, None, 500

# For QGIS:

In bash:

```
tile_id=1448025

ls /LUSTRE/MADMEX/eodata/rapideye/$tile_id/201*/*/l3a/*.tif|grep -v '[e-m].tif$'|xargs -I {} open -a /Applications/QGIS.app/ {}

ls /LUSTRE/MADMEX/processes/madmex_processing_results/RE_toa/*$tile_id*clouds.tif|xargs -I {} open -a /Applications/QGIS.app/ {}
```

# Note:

After reviewing with QGIS, delete original tifs that were copied to directory under `/home/madmex_user/results/rapideye_preprocessing`