### Import Modules

In [1]:
import geopandas as gpd
import json
import pandas as pd
import numpy as np
import os
import ee
import datetime
import sys

### Declare Functions

In [2]:
#Build a Sentinel-2 collection
#this is initialised for each day/period of analysis
#Rob has altered this to NOT filter by clouds (we want that statistic in our outputs)
def get_s2_sr_cld_col(aoi, start_date, end_date):
    # Import and filter S2 SR.
    s2_sr_col = (ee.ImageCollection('COPERNICUS/S2_SR')
        .filterBounds(aoi)
        .filterDate(start_date, end_date))
        #.filter(ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', CLOUD_FILTER)))

    # Import and filter s2cloudless.
    s2_cloudless_col = (ee.ImageCollection('COPERNICUS/S2_CLOUD_PROBABILITY')
        .filterBounds(aoi)
        .filterDate(start_date, end_date))

    # Join the filtered s2cloudless collection to the SR collection by the 'system:index' property.
    return ee.ImageCollection(ee.Join.saveFirst('s2cloudless').apply(**{
        'primary': s2_sr_col,
        'secondary': s2_cloudless_col,
        'condition': ee.Filter.equals(**{
            'leftField': 'system:index',
            'rightField': 'system:index'
        })
    }))

def add_required_bands(img):
    #This shows how we wold select an individual band and rename it
    #green = img.select('B3').rename('green')
    
    ndvi = img.normalizedDifference(['B8','B4']).rename('ndvi')
    #ndwi = img.normalizedDifference(['B3','B8']).rename('ndwi')#['B3','B8']#.float()
    mndwi = img.normalizedDifference(['B3','B12']).rename('mndwi')
    ndmi = img.normalizedDifference(['B8','B11']).rename('ndmi')
    
    #This was Rob manually performing calcs, just use the normalizedDifference
    #ndwi = green.subtract(nir).divide(green.add(nir)).rename('ndwi')
    #mndwi = green.subtract(swir2).divide(green.add(swir2)).rename('mndwi')
    
    #Add our bands to the original collection
    return img.addBands(ee.Image([ndvi, mndwi, ndmi]))#,ndwi

#Define cloud mask component functions
#Cloud Components
#Pretty much unchanged from examples on web
def add_cloud_bands(img):
    # Get s2cloudless image, subset the probability band.
    cld_prb = ee.Image(img.get('s2cloudless')).select('probability')

    # Condition s2cloudless by the probability threshold value.
    is_cloud = cld_prb.gt(CLD_PRB_THRESH).rename('clouds')

    # Add the cloud probability layer and cloud mask as image bands.
    return img.addBands(ee.Image([cld_prb, is_cloud]))

#Cloud Shadow Components
#Pretty much unchanged from examples on web
def add_shadow_bands(img):
    # Identify water pixels from the SCL band.
    not_water = img.select('SCL').neq(6)

    # Identify dark NIR pixels that are not water (potential cloud shadow pixels).
    SR_BAND_SCALE = 1e4
    dark_pixels = img.select('B8').lt(NIR_DRK_THRESH*SR_BAND_SCALE).multiply(not_water).rename('dark_pixels')

    # Determine the direction to project cloud shadow from clouds (assumes UTM projection).
    shadow_azimuth = ee.Number(90).subtract(ee.Number(img.get('MEAN_SOLAR_AZIMUTH_ANGLE')));

    # Project shadows from clouds for the distance specified by the CLD_PRJ_DIST input.
    cld_proj = (img.select('clouds').directionalDistanceTransform(shadow_azimuth, CLD_PRJ_DIST*10)
        .reproject(**{'crs': img.select(0).projection(), 'scale': 100})
        .select('distance')
        .mask()
        .rename('cloud_transform'))

    # Identify the intersection of dark pixels with cloud shadow projection.
    shadows = cld_proj.multiply(dark_pixels).rename('shadows')

    # Add dark pixels, cloud projection, and identified shadows as image bands.
    return img.addBands(ee.Image([dark_pixels, cld_proj, shadows]))

#Final cloud shadow mask
#Rob has altered the application of a threshold
#Rob has also used this function to add in our required bands
def add_cld_shdw_mask(img):
    
    #Add Robs NDWI stuff
    img = img#.clip(AOI)#Clipping here doesn't really speed things up
    
    addReqBands = add_required_bands(img)
    
    # Add cloud component bands.
    #img_cloud = add_cloud_bands(img)
    img_cloud = add_cloud_bands(addReqBands)

    # Add cloud shadow component bands.
    img_cloud_shadow = add_shadow_bands(img_cloud)

    # Combine cloud and shadow mask, set cloud and shadow as value 1, else 0.
    is_cld_shdw = img_cloud_shadow.select('clouds').add(img_cloud_shadow.select('shadows')).gt(0).rename('cloudandshadow')
    #is_cld_shdw = is_cld_shdw.rename('cloudandshadow')
    
    #Rob thinks this is cutting out too much cloud....
    # Remove small cloud-shadow patches and dilate remaining pixels by BUFFER input.
    # 20 m scale is for speed, and assumes clouds don't require 10 m precision.
    is_cld_shdw_FB = (is_cld_shdw.focalMin(2).focalMax(BUFFER*2/20)
        .reproject(**{'crs': img.select([0]).projection(), 'scale': 20}))
        #.rename('cloudmasksimple'))

    is_cld_shdw_Buff = (is_cld_shdw.focalMax(BUFFERSMALL*2/20)
        .reproject(**{'crs': img.select([0]).projection(), 'scale': 20}))
    
    is_cld_shdw_FB = is_cld_shdw_FB.rename('cloudandshadowfiltbuff')
    is_cld_shdw_Buff = is_cld_shdw_Buff.rename('cloudandshadowbuff')
    
    # Add the final cloud-shadow mask to the image.
    #return img_cloud_shadow.addBands(is_cld_shdw, is_cld_shdw_simple)
    return img_cloud_shadow.addBands(ee.Image([is_cld_shdw, is_cld_shdw_FB, is_cld_shdw_Buff]))

#This is the analysis applied to each 'day', assuming the collection has already been built (with our bands added)
def calc_stats_from_layers(col, AOI, json_storages, utmZoneInfo, mndwiMaskVal, ndviMaskVal):

    mndwiMaskVal = float(mndwiMaskVal)
    ndviMaskVal = float(ndviMaskVal)

    # Mosaic the image collection.
    img = col.mosaic()#.clip(AOI)
    
    #selfMask, I think, drops out Zeros, just like it you manually masked to keep values > 0

    # Subset layers and prepare them for display.
    #clouds = img.select('clouds').selfMask()
    #shadows = img.select('shadows').selfMask()
    #dark_pixels = img.select('dark_pixels').selfMask()
    #probability = img.select('probability')
    #cloudandshadow = img.select('cloudandshadow').selfMask()
    #cloudandshadowfandb = img.select('cloudandshadowfiltbuff').selfMask()
    cloudandshadowbuff = img.select('cloudandshadowbuff').selfMask()
    #cloud_transform = img.select('cloud_transform')


    ndvi = img.select('ndvi')
    ndviMaskLayer = ndvi.gte(ndviMaskVal).rename('ndviMaskLayer')
    mndwi = img.select('mndwi')
    mndwiMaskLayer = mndwi.gte(mndwiMaskVal).rename('mndwiMaskLayer')
    ndmi = img.select('ndmi')
    
    #This would retain values of MNDWI, not set them to 0/1
    #mndwimasked = mndwi.updateMask(mndwiMaskLayer)#.rename('ndwi')

    
    #Perform reprojections
    ndviProj = ndvi.clip(AOI).reproject(crs=utmZoneInfo, scale=10)
    mndwiProj = mndwi.clip(AOI).reproject(crs=utmZoneInfo, scale=10)#Chosen 10 metres, , crsTransform='null'
    ndmiProj = ndmi.clip(AOI).reproject(crs=utmZoneInfo, scale=10)#Chosen 10 metres, , crsTransform='null'
    #ndwiMaskedProj = ndwiMaskLayer.clip(AOI).reproject(crs=utmZoneInfo, scale=10)
    ndviMaskedProj = ndviMaskLayer.clip(AOI).reproject(crs=utmZoneInfo, scale=10)
    mndwiMaskedProj = mndwiMaskLayer.clip(AOI).reproject(crs=utmZoneInfo, scale=10)
    cloudandshadowbuffProj = cloudandshadowbuff.clip(AOI).reproject(crs=utmZoneInfo, scale=10)

    #This will now give each cell in the mask area 1 * pixel area
    #Need one complete raster for total area stats calc
    allStatsLayer = mndwiProj.gte(-50.0).multiply(ee.Image.pixelArea()).rename('allPixels')
    
    ndviStatsLayer = ndviMaskedProj.multiply(ee.Image.pixelArea()).rename('ndviStatsLayer')
    ndviAvgLayer = ndviProj.multiply(1).rename('ndviAvgLayer')

    mndwiStatsLayer = mndwiMaskedProj.multiply(ee.Image.pixelArea()).rename('mndwiStatsLayer')
    mndwiAvgLayer = mndwiProj.multiply(1).rename('mndwiAvgLayer')
    
    ndmiAvgLayer = ndmiProj.multiply(1).rename('mndwiAvgLayer')
    
    cloudStatsLayer = cloudandshadowbuffProj.multiply(ee.Image.pixelArea()).rename('cloudStatsLayer')

    #DF to capture stats
    startStats = datetime.datetime.now()
    
    #print("creating collection")
    theCollection = ee.FeatureCollection(json_storages)
    #print("created collection")

    #A bunch of server-side ee.FeatureCollections...
    #Use getInfo to transfer server-side feature collection to the client. The result is an object.
    #The .getInfo()['features']) should drag the results back, client side????
    #These are a bunch of JSON objects, unfortunately they include the coordinates of every feature!!!!
    #print("reducing for features")
    allStats = allStatsLayer.reduceRegions(**{'reducer': ee.Reducer.sum(),'crs':utmZoneInfo, 'scale': 10,'collection': theCollection}).getInfo()['features']
    #ndwiStats = ndwiStatsLayer.reduceRegions(**{'reducer': ee.Reducer.sum(),'crs':utmZoneInfo, 'scale': 10,'collection': theCollection}).getInfo()['features']
    mndwiStats = mndwiStatsLayer.reduceRegions(**{'reducer': ee.Reducer.sum(),'crs':utmZoneInfo, 'scale': 10,'collection': theCollection}).getInfo()['features']
    mndwiAvg = mndwiAvgLayer.reduceRegions(**{'reducer': ee.Reducer.mean(),'crs':utmZoneInfo, 'scale': 10,'collection': theCollection}).getInfo()['features']
    ndviStats = ndviStatsLayer.reduceRegions(**{'reducer': ee.Reducer.sum(),'crs':utmZoneInfo, 'scale': 10,'collection': theCollection}).getInfo()['features']
    ndviAvg = ndviAvgLayer.reduceRegions(**{'reducer': ee.Reducer.mean(),'crs':utmZoneInfo, 'scale': 10,'collection': theCollection}).getInfo()['features']
    ndmiAvg = ndmiAvgLayer.reduceRegions(**{'reducer': ee.Reducer.mean(),'crs':utmZoneInfo, 'scale': 10,'collection': theCollection}).getInfo()['features']
    cloudStats = cloudStatsLayer.reduceRegions(**{'reducer': ee.Reducer.sum(),'crs':utmZoneInfo, 'scale': 10,'collection': theCollection}).getInfo()['features']
    #print("reduced for features")

    endStats = datetime.datetime.now()
    statsDdiff = (endStats-startStats).total_seconds()

    #print("Zonal Stats took " + str(statsDdiff) + " seconds")
    #print(ndwiStats.getInfo()['features'][0]['properties'])

    #Send back a dictionary of JSON objects, these string identifiers will need to be match in the script that calls this function
    return {'allAreas':allStats,'ndviAreas':ndviStats,'mndwiAreas':mndwiStats,'ndviAvg':ndviAvg,'mndwiAvg':mndwiAvg,'ndmiAvg':ndmiAvg,'cloudAreas':cloudStats}

#Function to pull out our metrics from GEE JSON results
def retrieveStatsFromJSON(theResultsDict, theIDField, theMetric, theDataPropertyName):
    #This is an example output
    #[{'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[151.29950364947237, -27.748655793369675], [151.31082715269562, -27.752316677570064]]]},
    #'id': '0', 'properties': {'ADD_2019': None, 'AUTH_REF': None, 'AUTH_STAT': None, 'AVG_DEPTH': None, 'COMPL_ACT': None, 'COMPL_ID': None, 'FEATURE': 'notified', 'FSL': None, 'GIS_CAP': None, 'LIDAR_CAP': None, 'LOT_PLAN': None, 'NOW_CAP': None, 'NOW_FMDBID': None, 'NOW_FMID': None, 'NOW_PROV': None, 'NOW_RECD': None, 'NOW_WKDBID': '23649', 'OBJECTID': 12769, 'OFFICE': 'Warwick', 'OWNER_2019': None, 'OWNER_2020': None, 'REMARKS': None, 'STR_STATUS': None, 'STR_TYPE': 'storage', 'SUBCAT': None, 'WATERPLAN': 'Condamine and Balonne', 'YR_CAPTURE': None, 'sum': 438920.0014711266}}]
    #print("The length: " + str(len(theResultsDict)))
    geeStats = pd.DataFrame(columns=[theIDField, theDataPropertyName])
    #print(theDataPropertyName)
    #print(theIDField)
    #print(theMetric)
    for i in range(len(theResultsDict)):
        #print(theResultsDict[i]["properties"])
        #print(type(theResultsDict[i]["properties"]))
        #keys = list(theResultsDict[i]["properties"].keys())
        #print(keys)
        theID = theResultsDict[i]["properties"][theIDField]
        #print(theID)
        dataRow = [theID, -9999]
        #print("Before check")
        #print(dataRow)
        if theMetric in theResultsDict[i]["properties"]:
        #    print("In here first")
        #    print(dataRow)
            dataRow = [theID, theResultsDict[i]["properties"][theMetric]]
        #    print(dataRow)
        
        geeStats.loc[len(geeStats)] = dataRow
    #print(geeStats)
    return geeStats

### Setup proxy server

In [7]:
yourUserName = "dechastelj" #"ellisr"
yourPassword = ""

In [8]:
os.environ['http_proxy'] = "http://" + yourUserName + ":" + yourPassword + "@web-prdproxy-usr.dmz:80"
os.environ['https_proxy'] = "http://" + yourUserName + ":" + yourPassword + "@web-prdproxy-usr.dmz:80"

### Initialize GEE

In [11]:
#Don't need this every time, initialize can be enough
ee.Authenticate()

Enter verification code: 4/1AbUR2VPUDUf0-X8QvoKUU2W3L1If6XLQooY2JtDUkD7ngCmFvxxACIpjZLQ

Successfully saved authorization token.


In [12]:
ee.Initialize()

### Declare variables

In [19]:
#Your shapefile, we'll reproject to WGS84 for transfer to GEE

#The shapefile of features
featurePath1 = r"C:\Users\dechastelj\Jupyter\BR_storages_WGS84.shp"
featurePath2 = r"C:\Users\dechastelj\Jupyter\BR_cropping_final_WGS84.shp"
featurePath3 = r"C:\Users\dechastelj\Jupyter\BR_sample_parcels_WGS84.shp"
featurePathList = [featurePath1] #Just storages

#The Field with the identifier
uniqueid_field = "UNIQUEID"

#The most appropriate UTM zone for spatial analysis
#utmZoneInfo = "EPSG:28356"#z56
utmZoneInfo = "EPSG:28355"#z55
#utmZoneInfo = "EPSG:28354"#z54

#Output CSV to hold hard copy
outputCSV = r"C:\Users\dechastelj\Jupyter\output\BR_sample_parcels_output.csv"

#Start of analysis period, YYYY-MM-DD
#sDate = "2019-01-01"
sDate = "2023-02-21"

#End of analysis period, YYYY-MM-DD
eDate = "2023-02-22"

#Index threshold Values for area (greater than or equal to)
mdwiThresh = 0.0
ndviThresh = 0.5

CLOUD_FILTER = 60
CLD_PRB_THRESH = 50
NIR_DRK_THRESH = 0.15
CLD_PRJ_DIST = 1
BUFFER = 50
BUFFERSMALL = 5

### Count total days for analysis

In [14]:
overallStartDate = datetime.datetime.strptime(sDate,"%Y-%m-%d")
overallEndDate = datetime.datetime.strptime(eDate,"%Y-%m-%d")
delta = overallEndDate - overallStartDate


theDayCount = delta.days + 1

print("Total days:", theDayCount)

Total days: 8


### Create feature geometry

In [15]:
#feature_shapes = gpd.read_file(featurePath)
#feature_shapesWGS84 = feature_shapes.to_crs(4326)
#json_features = json.loads(feature_shapesWGS84.to_json())

#This will give bounds in expected GEE format ### use feature geometry instead to avoid empty days
#geeFeatureGeometry = ee.Geometry(ee.FeatureCollection(json_features).geometry())
#AOI = geeFeatureGeometry.bounds()

### Check the ID Field

In [16]:
# list the unique quality code values
#print(feature_shapes[uniqueid_field].head(10))

### Create string variables

In [20]:
#Analysis
#some string contstants
dateString = "date"
totAreaString = "total_area"
mdwiAreaString = "mdwi_area"
ndwiAreaString = "ndwi_area"
mdwiAvgString = "mdwi_avg"
ndviAreaString = "ndvi_area"
ndviAvgString = "ndvi_avg"
ndmiAvgString = "ndmi_avg"
cloudAreaString = "cloud_area"

### Start analysis

In [21]:
#Empty dataframe for results
tsStats = pd.DataFrame(columns=[uniqueid_field, dateString, totAreaString, mdwiAreaString,
                                     mdwiAvgString, ndviAreaString, ndviAvgString, ndmiAvgString, cloudAreaString])

# Get current time
beginNow = datetime.datetime.now().strftime("%d/%m/%Y, %H:%M:%S")
print("Starting at " + beginNow)
print("")

# for each feature
for featurePath in featurePathList:
    
    # Print feature name
    print("Starting features:", featurePath)
    
    # Get feature geometry    
    feature_shapes = gpd.read_file(featurePath)
    feature_shapesWGS84 = feature_shapes.to_crs(4326)
    json_features = json.loads(feature_shapesWGS84.to_json())

    #This will give bounds in expected GEE format ### use feature geometry instead to avoid empty days
    geeFeatureGeometry = ee.Geometry(ee.FeatureCollection(json_features).geometry())
    #AOI = geeFeatureGeometry.bounds()

    # For each day
    for i in range(theDayCount):

        # Get the date range
        theStartDate = overallStartDate + datetime.timedelta(days=i)
        theStart = datetime.datetime.strftime(theStartDate, "%Y-%m-%d")
        theEndDate = overallStartDate + datetime.timedelta(days=i+1)
        theEnd = datetime.datetime.strftime(theEndDate, "%Y-%m-%d")

        # Create a dataframe to store daily results
        dfCombo = pd.DataFrame()

        # Try processing the day    
        #try:

        # Print the current time
        hereNow = datetime.datetime.now().strftime("%d/%m/%Y, %H:%M:%S")
        print("Processing period: " + str(theStart) + " at " + hereNow)

        # Create an image collection
        theImgColl = get_s2_sr_cld_col(geeFeatureGeometry, theStart, theEnd)
        numElements = theImgColl.size().getInfo()

        # If images are found for that day
        if numElements > 0:

            # Print the number of images found
            print("Images found: " + str(numElements))

            # Add cloud shadow mask
            theImgCollExpanded = theImgColl.map(add_cld_shdw_mask)

            # Calculate stats from layers
            statsDict = calc_stats_from_layers(theImgCollExpanded, geeFeatureGeometry, json_features, utmZoneInfo,
                                                                   mdwiThresh, ndviThresh)            
            # Retrieve stats for allAreas
            dfAll = retrieveStatsFromJSON(statsDict["allAreas"], uniqueid_field, "sum", totAreaString)

            # Add date column
            dfAll[dateString] = theStart

            # Retrieve stats for MDWI and NDVI, and Clouds
            dfMDWI = retrieveStatsFromJSON(statsDict["mndwiAreas"], uniqueid_field, "sum", mdwiAreaString)
            dfNDVI = retrieveStatsFromJSON(statsDict["ndviAreas"], uniqueid_field, "sum", ndviAreaString)
            dfMDWIAvg = retrieveStatsFromJSON(statsDict["mndwiAvg"], uniqueid_field, "mean", mdwiAvgString)
            dfNDVIAvg = retrieveStatsFromJSON(statsDict["ndviAvg"], uniqueid_field, "mean", ndviAvgString)
            dfNDMIAvg = retrieveStatsFromJSON(statsDict["ndmiAvg"], uniqueid_field, "mean", ndmiAvgString)
            dfCloud = retrieveStatsFromJSON(statsDict["cloudAreas"], uniqueid_field, "sum", cloudAreaString)

            #Merge all of the DF's together
            dfCombo = pd.merge(dfAll, dfMDWI, how="left", left_on=[uniqueid_field], right_on=[uniqueid_field])
            dfCombo = pd.merge(dfCombo, dfMDWIAvg, how="left", left_on=[uniqueid_field], right_on=[uniqueid_field])
            dfCombo = pd.merge(dfCombo, dfNDVI, how="left", left_on=[uniqueid_field], right_on=[uniqueid_field])
            dfCombo = pd.merge(dfCombo, dfNDVIAvg, how="left", left_on=[uniqueid_field], right_on=[uniqueid_field])
            dfCombo = pd.merge(dfCombo, dfNDMIAvg, how="left", left_on=[uniqueid_field], right_on=[uniqueid_field])
            dfCombo = pd.merge(dfCombo, dfCloud, how="left", left_on=[uniqueid_field], right_on=[uniqueid_field])

            # Add stats to the results dataframe
            if len(dfCombo) > 0:
                tsStats = pd.concat([tsStats, dfCombo])

            # Print the finish time for the day
            hereNow = datetime.datetime.now().strftime("%d/%m/%Y, %H:%M:%S")
            print("Done processing day: " + str(theStart) + " at " + hereNow)
            print("")

        else:
            # Print the finish time for the day
            print("No images found")
            print("")   

        #except:

            # Print the finish time for the day
            #rightNow = datetime.datetime.now().strftime("%d/%m/%Y, %H:%M:%S")
            #print("Failed processing day: " + str(theStart) + " at " + rightNow)
            #print("")

# Save total results to CSV
tsStats.to_csv(outputCSV, index=False)

# Calculate time taken for completion
rightNow = datetime.datetime.now().strftime("%d/%m/%Y, %H:%M:%S")
print("Entire period processed, started at " + beginNow + " and finished at " + rightNow)

Starting at 11/05/2023, 10:56:38

Starting features: C:\Users\dechastelj\Jupyter\BR_storages_WGS84.shp
Processing period: 2023-02-21 at 11/05/2023, 10:56:38
Images found: 3
Done processing day: 2023-02-21 at 11/05/2023, 10:57:12

Processing period: 2023-02-22 at 11/05/2023, 10:57:12
No images found

Processing period: 2023-02-23 at 11/05/2023, 10:57:13
No images found

Processing period: 2023-02-24 at 11/05/2023, 10:57:15
No images found

Processing period: 2023-02-25 at 11/05/2023, 10:57:17
No images found

Processing period: 2023-02-26 at 11/05/2023, 10:57:20
Images found: 3
Done processing day: 2023-02-26 at 11/05/2023, 10:57:54

Processing period: 2023-02-27 at 11/05/2023, 10:57:54
No images found

Processing period: 2023-02-28 at 11/05/2023, 10:57:56
No images found

Entire period processed, started at 11/05/2023, 10:56:38 and finished at 11/05/2023, 10:57:58
