In [None]:
#imports, global consts, inits
import ee
import geemap

#Note: If you haven't set a default project for earthengine to use via CLI, you'll need to provide ee.Initialize() with project name
#see https://developers.google.com/earth-engine/guides/auth
ee.Initialize()
map = geemap.Map()

sudanStateBorders = ee.FeatureCollection("projects/seamproj01/assets/SudanStateBorders")    #Shapefiles for Sudan administration borders, via OCHA HDX. This dataset is used for 
                                                                                            #state/admin division clipping (offline, on QGIS) of the cropland masks bellow. Mostly used
                                                                                            #when testing with geemap.

testArea =  ee.FeatureCollection("projects/seamproj01/assets/test_area_v2") #A small block in the Gezira state east of the Blue Nile, a subset of geziraCropland

khartoumCropland = ee.FeatureCollection("projects/seamproj01/assets/khartoum_cropmask_v4_1")    #From the Copernicus Moderate Dynamic Land Cover dataset. Extracted cropland pixels, then clipped
                                                                                                #to Khartoum state. Polygons less than 0.3km2 in area are removed. Holes smaller than 0.5km2 are filled.
                                                                                                #Subdivided using level 2 OCHA admin subdivions (with Karrari and Um Bada merged into one)
geziraCropland = ee.FeatureCollection("projects/seamproj01/assets/gezira_cropmask_v4")  #Also based on Copernicus MDLC, cliped first with GlobCover dataset (via FAO LCLU). Then
                                                                                        #Seperated based on position relative to Blue Nile (east or west)

testAreaSamples = ee.FeatureCollection("projects/seamproj01/assets/test_samples")   #50 samples covering testArea, with a string attribute NAME matching testArea's NAME attrib,
                                                                                    #TODO implement this: and an int attrib ISFALLOW with 1 for fallow, 0 for cultivated land. ISFALLOW is manually labeled, used as calibration data.
                                                                                    

In [None]:
#This function is especially usefull for sentinel data. To try and improve performance a bit, we'll limit the collection to only target months, using ee.Filter.calendarRange(start, end, field)
#problem is, targetMonths may no be in order (consider inter-annual years), so we can't just take first and last entry. can't take max or min either else it would defeat purpose (min/max
#of [11, 12, 1, 2] would result in twelve months).
def FilterCollectionForPeriod(  col : ee.ImageCollection,
                                startDate : str,
                                endDate : str,
                                targetMonths : list) -> ee.ImageCollection:
    periodStart = 0
    periodEnd = 0
    periods = []
    for i in range (1, len(targetMonths)):
        if (targetMonths[i] > targetMonths[i - 1]):
            periodEnd = i
        else:
            periods.append([targetMonths[periodStart], targetMonths[periodEnd]])
            periodStart = periodEnd = i

    periods.append([targetMonths[periodStart], targetMonths[periodEnd]])

    monthsFilter = ee.Filter.calendarRange(periods[0][0], periods[0][1], "month")
    for i in range (1, len(periods)):
        monthsFilter = ee.Filter.Or(monthsFilter, ee.Filter.calendarRange(periods[i][0], periods[i][1], "month"))

    return col.filterDate(startDate, endDate).filter(monthsFilter)

#Clipping image with high vertex-count polygons can take a LOT of time. So, we clip our rasters to the bounding box of these polygons, and suffer processing the extra
#pixels. From quick testing, this is still much faster than clipping to exact geometry. Computing NDVI TS for khartoum (v3) dataset (2019-2023 winter seasons) took only
#2 hours with the new clipping method. With the original, exact clipping, it wasn't finished even after 17hrs (canceled it at that point)
#Note that this doesn't affect the clipping optimisation in the spatial anomaly analysis component, neither the reduceRegion(s) used in it
def ClipCollectionToCollection(imageCol : ee.ImageCollection, featureCol : ee.FeatureCollection):
    clipGeometry = featureCol.map(lambda feature : ee.Feature(feature.geometry().bounds())).geometry()
    return imageCol.map(lambda image : image.clip(clipGeometry).copyProperties(image, image.propertyNames()))

def ProcessMODISCollection( col : ee.ImageCollection,
                            roi : ee.FeatureCollection,
                            startDate : str,
                            endDate : str,
                            targetMonths : list) -> ee.ImageCollection:
    
    #mappable functions (leaving them scoped inside the MODIS function because similarily named ones with different implementation exist for Sentinel as well)
    def MaskPoortQualityPixels(img : ee.Image) -> ee.Image:
        qaBand = img.select("State")
        #TODO add snow/ice masking
        mask = qaBand.bitwiseAnd(3).eq(0) #pixel is clear from cloud (bits 0 and 1)
        mask = mask.And(qaBand.bitwiseAnd(4).eq(0)) #not cloud shadow (bit 2)
        mask = mask.And(qaBand.bitwiseAnd(768).eq(0)) #no cirrus (bits 8 and 9)
        return img.updateMask(mask)
    
    output = FilterCollectionForPeriod(col, startDate, endDate, targetMonths)
    output = output.filterBounds(roi)
    #output = output.map(lambda image : image.clip(roi).copyProperties(image, image.propertyNames()))
    output = ClipCollectionToCollection(output, roi)
    output = output.map(MaskPoortQualityPixels)
    output = output.map(lambda image : image.normalizedDifference(["sur_refl_b02", "sur_refl_b01"]).rename("NDVI").copyProperties(image, image.propertyNames()))
    return output


def ProcessSentinelCollection(  col : ee.ImageCollection,
                                roi : ee.FeatureCollection,
                                startDate : str,
                                endDate : str,
                                targetMonths : list) -> ee.ImageCollection:
    
    def MaskPoortQualityPixels(img : ee.Image) -> ee.Image:
        qaBand = img.select("SCL")
        #Unlike MODIS, the quality band used here, "SCL," contains a single int meant to be interpreted as a single int.
        #https://custom-scripts.sentinel-hub.com/custom-scripts/sentinel-2/scene-classification/
        
        mask = qaBand.eq(3) #cloud shadows
        mask = mask.And(qaBand.eq(6)) #water
        mask = mask.And(qaBand.eq(8)) #medium probability clouds
        mask = mask.And(qaBand.eq(9)) #high probability clouds
        mask = mask.And(qaBand.eq(10)) #thin cirrus
        mask = mask.And(qaBand.eq(11)) #snow/ice

        mask = mask.neq(1) #flip so it masks out the above
        
        return img.updateMask(mask)
    
    #filter then process the output collection
    output = FilterCollectionForPeriod(col, startDate, endDate, targetMonths)
    output = output.filterBounds(roi)
    output = output.select("B4", "B8", "SCL") #grasping at straws trying to make this thing run faster... TODO experiment to see if this actually has an effect
    #output = output.map(lambda image : image.clip(roi).copyProperties(image, image.propertyNames()))
    output = ClipCollectionToCollection(output, roi)
    output = output.map(MaskPoortQualityPixels)
    output = output.map(lambda image : image.normalizedDifference(["B8", "B4"]).rename("NDVI").copyProperties(image, image.propertyNames()))
    return output


In [None]:
##roi is a feature collection containing target polygons for the region of analysis
##projectName is a string with which the name of the output raster will be prefixed

roi = testArea
projectName = "TestProj" 

# roi = geziraCropland
# projectName = "Gezira" 

# roi = khartoumCropland
# projectName = "Khartoum"

#This it the name of the property (or "attribute", in GIS-speak) that contains the identifier for the subdivision of the roi used as climate divisions for spatial anomaly
#analysis, and also used in splitting rasters of the output.
subdivisionPropertyName = "NAME"

#spatial resolution of the output file(s), in meters
targetOutputScale = 10

#This is the value for the z-score bellow which a pixel is considered fallow. Made a vairable here for caliberation purposes.
#temporalZScoreThresholdMax = -3 #Original value used by Wallace et al.
#temporalZScoreThresholdRange = -3 #Original value used by Wallace et al.
temporalZScoreThresholdMax = -1.25
temporalZScoreThresholdRange = -1.0

#For spatial anomaly analysis, the max of a climate division for a given season is multiplied by this value.
#spatialMedianMultiplierMax = 0.8 #Original value used by Wallace et al.
#spatialMedianMultiplierMax = 0.8 #Original value used by Wallace et al.
spatialMedianMultiplierMax = 1.0
spatialMedianMultiplierRange = 1.0

#time-series limits. All inclusive.
yearStart = 2019
monthStart = 1
yearEnd = 2024
monthEnd = 2

# yearStart = 2021
# monthStart = 7
# yearEnd = 2022
# monthEnd = 2

#targetMonths are the months comprising the season for analysis.
#WARNING! MUST BE 4 Values! Otherwise, the Temporal Anomaly Analysis component must be adjusted
#WARNING! ORDER OF MONTHS MUST BE CHRONOLOGICAL! If the season is inter-annual, start with the months in the first year, then the second year
#e.g. if the season starts on November, the list would be [11, 12, 1, 2]
targetMonths = [7, 8, 9, 10] #"Summer" season in Sudan (technically Autumn). This covers the growth periods of crops such as sorghum.
#targetMonths = [11, 12, 1, 2] #"Winter" season in Sudan. This covers growth periods of crops such as wheat

dateStart = f"{yearStart}-{monthStart}-1"
dateEnd = f"{yearEnd}-{monthEnd + 1}-1" if monthEnd < 12 else  f"{yearEnd + 1}-1-1"

#Process and set ndviTS to the (processed) dataset you are using.
modis = ee.ImageCollection("MODIS/061/MOD09Q1")
modis = ProcessMODISCollection(modis, roi, dateStart, dateEnd, targetMonths)
ndviTS = modis

# sentinel2SR = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
# sentinel2SR = ProcessSentinelCollection(sentinel2SR, roi, dateStart, dateEnd, targetMonths)
# #TODO the Sentinel-2 L2A data used here miss some dates prior to dec 2018 for Gezira region. This is specific to GEE's version of 2A. Actual 2A on Copernicus Dataspace do cover this period.
# #Note that GEE's L1C (the TOA version) does cover this period as well. So consider implementing a TOA-to-SR algo to augment the 2A data where it's lacking.
# #Also note: feeding this algorithm months (within range dateStart to dateEnd) without rasters will throw exceptions.
# ndviTS = sentinel2SR

#Note: ndviTS is only used in computation of the max/range timeseries, but it's not considered in some reduceRegions and exports bellow, which require the scale of the image as argument.
#TODO adjust the reduceRegions and Exports bellow to dynamically use the scale based on that of the selected dataset

#Auto Calibration settings
#if autoCalibrateParams is true, the model will be calibrated using calibrationDataset.
#calibrationAttribName is the attribute column name that contains integer marking whether land is fallow or not (0 = not fallow, 1 = fallow)
#calibrationYear is the year which the values in ISFALLOW represent
autoCalibrateParams : bool = True
calibrationDataset = testAreaSamples
calibrationAttribName = "ISFALLOW"
calibrationYear = 2019

# alpha = 1.0 #the base "step length" of coordinate descent (or ascent)
# #note: calibration is considered complete once any othe two conditions above is reached.
# minIterations = 2 #minimum iterations to process before testing whether the break the calibration.
# maxIterations = 100 #max number of optimisation iterations before caliberation is stopped.
# maxInnerIterations = 10 #Put simply, the max number of subdivs of alpha to be tested for each coordiante in each [outer] iteration.
# minChangeThreshold = 0.01 #minimum percentage of change in the objective/cost function (the error) between caliberation iterations before stopping the process


In [None]:
# #Checking that enough data are available for sentinel-2 L2A for the selected roi/dates.
# #note: this check does not use the processed data. It tests the original dataset after filtering for roi and date, to speed the process up as ProcessSentinelCollection() can
# #take a significant of time, depending on roi.
# periodStart = 0
# periodEnd = 0
# periods = []
# for i in range (1, len(targetMonths)):
#     if (targetMonths[i] > targetMonths[i - 1]):
#         periodEnd = i
#     else:
#         periods.append([targetMonths[periodStart], targetMonths[periodEnd]])
#         periodStart = periodEnd = i

# periods.append([targetMonths[periodStart], targetMonths[periodEnd]])

# monthsFilter = ee.Filter.calendarRange(periods[0][0], periods[0][1], "month")
# for i in range (1, len(periods)):
#     monthsFilter = ee.Filter.Or(monthsFilter, ee.Filter.calendarRange(periods[i][0], periods[i][1], "month"))
    
# for year in range(yearStart, yearEnd + 1):
#     for month in targetMonths:
#         subColStart = f"{year}-{month}-1"
#         subColEnd = f"{year}-{month + 1}-1" if month < 12 else f"{year + 1}-1-1"
#         ndviRastersForMonth = ee.ImageCollection("COPERNICUS/S2_HARMONIZED").filterBounds(roi.geometry()).filterDate(subColStart, subColEnd).size().getInfo()

#         print (f"{year}-{month} : available scenes = {ndviRastersForMonth}")

In [None]:
#In this block, we generte the timeseries (monthly max NDVI and NDVI range) and the statistics for the "pure crop" signal.
#Outputs of this block are two ImageCollections: "timeseries," and "pureCropNDVI"
#Note: "year" here is used to refer to the year of the start of the season, no the calendar year of the month. This is important for inter-annual seasons (e.g. Nov through
#Feb). 
    #timseries contains images for each season.
        #Each Image has number of bands equal to twice the number of targetMonths (2x4 = 8).
        #Each Image has a property "year" for the season's year.
        #Each band is named "month_x_max" or "month_x_range", where x is the month's number; "max" and "range" denote whether it encodes the maximum monthly ndvi or ndvi monthly range.
    #pureCropNDVI contains images for each month in the targetMonths (total = 4)
        #each image has 4 bands: "month_x_max_mean" or "month_x_max_stdDev" (and similarily for range).
        #each image has a property "month" with month's number

timeSeries = ee.List([])

lastCompleteSeason = yearStart

for year in range(yearStart, yearEnd + 1):
    
    yearTS = ee.List([])
    isCompleteSeason = True
    
    lastMonth = 0
    calendarYear = year
    lastCompleteSeason = year

    for month in targetMonths:
        if (month < lastMonth):
            calendarYear += 1
        lastMonth = month
        
        if ((calendarYear * 100) + month > (yearEnd * 100) + monthEnd):
            isCompleteSeason = False
            break
        
        
        subPeriodStart = f"{calendarYear}-{month}-1"
        subPeriodEnd = f"{calendarYear}-{month + 1}-1" if month < 12 else  f"{calendarYear + 1}-1-1"
        subCol = ndviTS.filterDate(subPeriodStart, subPeriodEnd)
        
        minMaxNDVI = subCol.reduce(ee.Reducer.minMax())

        monthMax = minMaxNDVI.select("NDVI_max").rename("max")
        monthRange = minMaxNDVI.select("NDVI_max").subtract(minMaxNDVI.select("NDVI_min")).rename("range")
        yearTS = yearTS.add(ee.Image([monthMax, monthRange]).set({"system:index" : f"month_{month}"}))
    
    if(not isCompleteSeason):
        lastCompleteSeason += -1
        break
    
    yearTS = ee.ImageCollection(yearTS).toBands().set({"year" : year, "system:index" : ee.String("year_").cat(str(year))})
    timeSeries = timeSeries.add(yearTS)

timeSeries = ee.ImageCollection(timeSeries)

##uncomment this part to export the ndviTS. Not very recommended for Sentinel 2 data for large areas (produces rasters between 500~900MB for Gezira dataset)...
##to reduce output size, the data is scaled by 100 the rounded off, and stored as an unsigned, 8bit int which range 0 to 255, but because ndvi ts ranges from 0 to 1 (zero because we masked out water), the
##resulting raster would range from 0 to 100. Also note the reduced precision this introduces (shouldn't matter much anyway)
##TODO adjust code so bands would have name of year.
##TODO seperate output images for this the same as you do fallowTS bellow.
##TODO export max and range timeseries seperately
# task = ee.batch.Export.image.toDrive(
#     #image = timeSeries.select([f"month_{targetMonths[0]}_max", f"month_{targetMonths[1]}_max", f"month_{targetMonths[2]}_max", f"month_{targetMonths[3]}_max"]).toBands().clip(roi.geometry()).multiply(ee.Image(ee.Number(100))).toUint8(),
#     #description = f"{projectName}_maxNDVI_TS_{yearStart}-{yearEnd}_season_{targetMonths[0]}-{targetMonths[1]}-{targetMonths[2]}-{targetMonths[3]}",
#     image = timeSeries.select([f"month_{targetMonths[0]}_range", f"month_{targetMonths[1]}_range", f"month_{targetMonths[2]}_range", f"month_{targetMonths[3]}_range"]).toBands().clip(roi.geometry()).multiply(ee.Image(ee.Number(100))).toUint8(),
#     description = f"{projectName}_rangeNDVI_TS_{yearStart}-{yearEnd}_season_{targetMonths[0]}-{targetMonths[1]}-{targetMonths[2]}-{targetMonths[3]}",
#     #folder='ee_export',
#     region = roi.geometry(),
#     scale = targetOutputScale,
#     crs = 'EPSG:4326',
#     maxPixels = 500000000,
#     fileFormat = 'GeoTIFF',
#     formatOptions = {
#         'noData': -9999
# })
# task.start()


pureCropNDVI = ee.List([])

for month in targetMonths:
    prefix = f"month_{month}_"
    thisMonthTS = timeSeries.select([prefix + "max", prefix + "range"])

    monthMedian = thisMonthTS.reduce(ee.Reducer.median()).rename([prefix + "max", prefix + "range"]).set({"month" : month})
    
    meanStdReducer = ee.Reducer.mean().combine(ee.Reducer.stdDev(), sharedInputs = True)

    monthPureCropNDVI = thisMonthTS.map(lambda img : img.updateMask(img.gte(monthMedian)))
    monthPureCropNDVI = monthPureCropNDVI.reduce(meanStdReducer).set("month", month)

    pureCropNDVI = pureCropNDVI.add(monthPureCropNDVI)

pureCropNDVI = ee.ImageCollection(pureCropNDVI)

In [None]:
#This block computes the temporal anomalies (z-scores) for each season. The output is an ImageCollection called temporalAnomalies, containing Images representing each season.
    #Each Image has number of bands equal to twice the number of targetMonths (2x4 = 8).
    #Each Image has a property "year" for the season's year.
    #Each band is named "month_x_max" or "month_x_range", respectively for the z-scores for max NDVI and NDVI range, for each target month.

def ComputeTemporalAnomalies(image : ee.Image) -> ee.Image:
    seasonAnomalies = ee.List([])

    for month in targetMonths:
        prefix = f"month_{month}_"
        pureCropSignal = pureCropNDVI.filter(ee.Filter.eq("month", month)).first()
        
        taMax = image.select(prefix + "max").subtract(pureCropSignal.select(prefix + "max_mean")).divide(pureCropSignal.select(prefix + "max_stdDev")).rename("max")
        taRange = image.select(prefix + "range").subtract(pureCropSignal.select(prefix + "range_mean")).divide(pureCropSignal.select(prefix + "range_stdDev")).rename("range")

        ta = ee.Image([taMax, taRange]).set({"system:index" : f"month_{month}"})
        seasonAnomalies = seasonAnomalies.add(ta)
    
    return ee.ImageCollection(seasonAnomalies).toBands().set({"year" : image.get("year")})
    


temporalAnomalies = timeSeries.map(ComputeTemporalAnomalies)

##uncomment this part to export the anomaly series.
##similarily to ndvi timeseries export, the data is scaled by 100 the rounded off, and stored as a 16bit int with range -32,768 to 32,767
##since z score shouldn't go far from zero (negative or positive single digits, typically), this range should be more than adequte.
##Note: gee refuses to set image index (its name) inside map(). So the output image will be prefixed with integers starting from 0, instead of year number.
##TODO seperate output images for this the same as you do fallowTS bellow.
##TODO export max and range series seperately
# task = ee.batch.Export.image.toDrive(
#     image = temporalAnomalies.toBands().clip(roi.geometry()).multiply(ee.Image(ee.Number(100))).toInt16(),
#     description = f"{projectName}_temporalAnomalies_{yearStart}-{yearEnd}_season_{targetMonths[0]}-{targetMonths[1]}-{targetMonths[2]}-{targetMonths[3]}",
#     #folder='ee_export',
#     region = roi.geometry(),
#     scale = targetOutputScale, #WARNING! The zscore data isn't very compressible, even when reduced to int16. Using large scales (less than 30m) with large areas (gezira example) results in rasters 11GB in size
#     crs = 'EPSG:4326',
#     maxPixels = 500000000,
#     fileFormat = 'GeoTIFF',
#     formatOptions = {
#         'noData': -9999
# })
# task.start()


In [None]:
#Temporal analysis component.

#mapable function, mapped over temporalAnomalies collection
def TemporalAnomalyAnalysis(image : ee.Image) -> ee.Image: #to be mapped over temporalAnomalies collection
    isFallow_1 = image.select(f"month_{targetMonths[0]}_max").lt(temporalZScoreThresholdMax).And(image.select(f"month_{targetMonths[1]}_max").lt(temporalZScoreThresholdMax)).And(image.select(f"month_{targetMonths[2]}_max").lt(temporalZScoreThresholdMax))
    isFallow_1 = isFallow_1.Or(image.select(f"month_{targetMonths[1]}_max").lt(temporalZScoreThresholdMax).And(image.select(f"month_{targetMonths[2]}_max").lt(temporalZScoreThresholdMax)).And(image.select(f"month_{targetMonths[3]}_max").lt(temporalZScoreThresholdMax)))
    isFallow_1 = isFallow_1.rename("max")

    isFallow_2 = image.select(f"month_{targetMonths[0]}_range").lt(temporalZScoreThresholdRange).And(image.select(f"month_{targetMonths[1]}_range").lt(temporalZScoreThresholdRange)).And(image.select(f"month_{targetMonths[2]}_range").lt(temporalZScoreThresholdRange))
    isFallow_2 = isFallow_2.Or(image.select(f"month_{targetMonths[1]}_range").lt(temporalZScoreThresholdRange).And(image.select(f"month_{targetMonths[2]}_range").lt(temporalZScoreThresholdRange)).And(image.select(f"month_{targetMonths[3]}_range").lt(temporalZScoreThresholdRange)))
    isFallow_2 = isFallow_2.rename("range")

    return ee.Image([isFallow_1, isFallow_2]).set({"year" : image.get("year")})

#isFallow_TA the two questions for the temporal anomalies
isFallow_TA = temporalAnomalies.map(TemporalAnomalyAnalysis)

In [None]:
#Spatial analysis component.
zones = roi.toList(roi.size())
zonesSize = zones.size().getInfo()

print(f"Processing spatial anomalies for {zonesSize} zone(s)")

#cache clipped rasters (with constant value of zero) to avoid clipping in the loops bellow 
#rationale for this is that SpatialAnomalyAnalysis() would call clipping for n * m * k, where n = number of target months, m = number of subfeatures, and k = number
#of years. Whereas clipping it here does so only m times. Replacing the internal clipping with addition ops.
baseClippedRasters = ee.List([])
for i in  range(0, zonesSize):
    baseClippedRasters = baseClippedRasters.add(ee.Image(0.0).clip(ee.Feature(zones.get(i)).geometry()).toFloat())

#mapable function, mapped over timeseries collection to generate input for spatial anomal analysis
def SpatialStatistics(image : ee.Image) -> ee.Image:
    maxOfMax = ee.List([])
    maxOfRange = ee.List([])
    
    spatialMedianMax = ee.List([])
    spatialMedianRange = ee.List([])
    
    for month in targetMonths:
        name = f"month_{month}"
        _maxOfMax = image.select(name + "_max").rename("max")
        _maxOfRange = image.select(name + "_range").rename("range")

        monthSpatialMedianMax = ee.List([])
        monthSpatialMedianRange = ee.List([])
        
        maxOfMax = maxOfMax.add(_maxOfMax)
        maxOfRange = maxOfRange.add(_maxOfRange)

        for i in  range(0, zonesSize):
            targetZone = ee.Feature(zones.get(i)).geometry()
            zoneMaxMedian =  _maxOfMax.reduceRegion(   reducer = ee.Reducer.median(),
                                                        geometry = targetZone,
                                                        tileScale = 4,
                                                        scale = 250, #TODO this was increased in the original, single zone implementation. Test setting it back.
                                                        maxPixels = 5000000, #TODO same as above (reduced from default 10mil)
                                                        bestEffort = True,
                                                        crs='EPSG:4326',
                                                        ).getNumber("max")
            
            zoneRangeMedian =  _maxOfRange.reduceRegion(    reducer = ee.Reducer.median(),
                                                            geometry = targetZone,
                                                            tileScale = 4,
                                                            scale = 250,
                                                            maxPixels = 5000000,
                                                            bestEffort = True,
                                                            crs='EPSG:4326',
                                                            ).getNumber("range")
            
            zoneBaseRaster = ee.Image(baseClippedRasters.get(i))

            zoneMaxMedian = zoneBaseRaster.add(ee.Image(zoneMaxMedian)).toFloat()
            zoneRangeMedian = zoneBaseRaster.add(ee.Image(zoneRangeMedian)).toFloat()

            monthSpatialMedianMax = monthSpatialMedianMax.add(zoneMaxMedian)
            monthSpatialMedianRange = monthSpatialMedianRange.add(zoneRangeMedian)
        
        monthSpatialMedianMax = ee.ImageCollection(monthSpatialMedianMax).mosaic()
        monthSpatialMedianRange = ee.ImageCollection(monthSpatialMedianRange).mosaic()

        spatialMedianMax = spatialMedianMax.add(monthSpatialMedianMax)
        spatialMedianRange = spatialMedianRange.add(monthSpatialMedianRange)

    maxOfMax = ee.ImageCollection(maxOfMax).reduce(ee.Reducer.max()).rename("maxOfMax")
    maxOfRange = ee.ImageCollection(maxOfRange).reduce(ee.Reducer.max()).rename("maxOfRange")

    spatialMedianMax = ee.ImageCollection(spatialMedianMax).reduce(ee.Reducer.max(), parallelScale = 4).rename("medianOfMax")
    spatialMedianRange = ee.ImageCollection(spatialMedianRange).reduce(ee.Reducer.max(), parallelScale = 4).rename("medianOfRange")
    
    return ee.Image([maxOfMax, maxOfRange, spatialMedianMax, spatialMedianRange]).set({"year" : image.get("year")})

#mapable function, mapped over spatial statistics to generate the isFallow rasters based on spatial anomalies
def SpatialAnomalyAnalysis(image : ee.Image) -> ee.Image:
    maxOfMax = image.select("maxOfMax")
    maxOfRange = image.select("maxOfRange")
    spatialMedianMax = image.select("medianOfMax")
    spatialMedianRange = image.select("medianOfRange")

    isFallow_3 = maxOfMax.lt(spatialMedianMax.multiply(ee.Number(spatialMedianMultiplierMax))).rename("max")
    isFallow_4 = maxOfRange.lt(spatialMedianRange.multiply(ee.Number(spatialMedianMultiplierRange))).rename("range")

    return ee.Image([isFallow_3, isFallow_4]).set({"year" : image.get("year")})


#isFallow_SA the two questions for the spatial anomalies
spatialStats = timeSeries.map(SpatialStatistics)
isFallow_SA = spatialStats.map(SpatialAnomalyAnalysis)

In [None]:
#Final analysis.
#For a pixel to qualify as fallow, it has to be classified as such in at least two of either the temporal or spatial checks.

def ComputeFallowTS(isFallow_TA, isFallow_SA):
    isFallowComponents = ee.List([])
    for year in range (yearStart, yearEnd + 1):
        if ((year * 100) + targetMonths[0] > (yearEnd * 100) + monthEnd):
                break
                
        yearComponents = ee.List([]).add(isFallow_TA.filter(ee.Filter.eq("year", year)).first().set({"system:index" : f"{year}_TA"}))
        yearComponents = yearComponents.add(isFallow_SA.filter(ee.Filter.eq("year", year)).first().set({"system:index" : f"{year}_SA"}))

        yearComponents = ee.ImageCollection(yearComponents).toBands().set({"year" : str(year)})
        isFallowComponents = isFallowComponents.add(yearComponents)

    isFallowComponents = ee.ImageCollection(isFallowComponents)
    
    return isFallowComponents.map(lambda img : img.reduce(ee.Reducer.sum()).gte(ee.Number(2)).rename(ee.String(img.get("year")))).toBands()


fallowTS = ComputeFallowTS(isFallow_TA, isFallow_SA)

In [None]:
#Cache temporatlAnomalies and spatialStats (which are expensive to compute) as an asset to be used for optimisation

pathPrefix = "projects/seamproj01/assets/"

task = ee.batch.Export.image.toAsset(temporalAnomalies.toBands().clip(roi.geometry()), 
                                     "temporalAnomalies",  
                                     pathPrefix + "tempAn", 
                                     scale = 10, 
                                     maxPixels= 3e8)
#task.start()

task = ee.batch.Export.image.toAsset(spatialStats.toBands().clip(roi.geometry()), 
                                     "spatialStats",  
                                     pathPrefix + "spStat", 
                                     scale = 10, 
                                     maxPixels= 3e8)

# task.start()

In [None]:
#Recreate temporaAnomalies and spatialStats from cached rasters
#Reminder: for temporalAnomalies:
    #Each Image has number of bands equal to twice the number of targetMonths (2x4 = 8).
    #Each Image has a property "year" for the season's year.
    #Each band is named "month_x_max" or "month_x_range", respectively for the z-scores for max NDVI and NDVI range, for each target month.
#reminder for spatialStats:
    #each image has property "year"
    #each image has four bands: maxOfMax, maxOfRange, medianOfMax, and medianOfRange

#TODO skip this block if not autocaliberating

tempAnImg = ee.Image(pathPrefix + "tempAn")
spStatImg = ee.Image(pathPrefix + "spStat")

#To make life a little bit easier, clip the rasters above to a small area around the sampling points, since we don't need the whole region for calibration,
#which is what we are doing if we are in this block

clipper = testAreaSamples.map(lambda point : point.buffer(250)) #ideally, we would clip only the pixel, but since we don't know where it is exactly (and rasters vary in res)...
#buffer on a point returns a circle, let's convert it to a rectangle. Not necessary, but I like rects better for these kinda things :) Using the circles bounds simplifis this.
clipper = roi.map(lambda circle : ee.Geometry.Rectangle(ee.Array.cat(circle.geometry().bounds().coordinates(),1).slice(0, 0, 3, 2).reshape([-1]).toList()))

tempAnImg = tempAnImg.clip(clipper.geometry())
spStatImg = spStatImg.clip(clipper.geometry())


temporalAnomalies = ee.List([])
spatialStats = ee.List([])

spStatsNames = ["maxOfMax", "maxOfRange", "medianOfMax", "medianOfRange"]
taBandNames = [] #the process bellow breaks the band naming (prefexes an integer), generate this list for easier renaming bellow.
for m in targetMonths:
    taBandNames.append(f"month_{m}_max")
    taBandNames.append(f"month_{m}_range")

yearsList = [int(i[5:9]) for i in tempAnImg.bandNames().getInfo()]  #workaround for an issue introduced by inter-annual seasons.

for year in range(yearStart, yearEnd + 1):
    if year not in yearsList:  #workaround for an issue introduced by inter-annual seasons.
        continue

    yearImgTA = ee.List([])
    #temporal anomalies are by month, so we loop over targetMonths
    for month in targetMonths:
        bandNameMax = f"year_{year}_month_{month}_max"
        bandNameRange = f"year_{year}_month_{month}_range"
        
        yearImgTA = yearImgTA.add(tempAnImg.select(bandNameMax).rename([f"month_{month}_max"]))
        yearImgTA = yearImgTA.add(tempAnImg.select(bandNameRange).rename([f"month_{month}_range"]))

    #spatialStats are for an entire year, not by month.
    yearImgSS = ee.ImageCollection([spStatImg.select(f"year_{year}_{statName}") for statName in spStatsNames]).toBands().rename(spStatsNames).set({"year" : year})

    temporalAnomalies = temporalAnomalies.add(ee.ImageCollection(yearImgTA).toBands().set({"year" : year}).rename(taBandNames))
    spatialStats = spatialStats.add(yearImgSS)

temporalAnomalies = ee.ImageCollection(temporalAnomalies)
spatialStats = ee.ImageCollection(spatialStats)

# #test
print ("for tempAn")
print (temporalAnomalies.first().bandNames().getInfo())
print (temporalAnomalies.aggregate_array("year").getInfo())
# print ("for spStat")
# print (spatialStats.first().bandNames().getInfo())
# print (spatialStats.aggregate_array("year").getInfo())

In [None]:
#WIP
#Autocaliberation using [TODO]

#TODO Check that autoCalibrate is set to True, if not, skip this block.

from random import seed as Seed, random as Random, randrange as RandIntRange

#Should usually be called before RecomputeFallowTS(), else the latter will use old parameters.
def UpdateParams(parameters):
    global temporalZScoreThresholdMax
    global temporalZScoreThresholdRange
    global spatialMedianMultiplierMax
    global spatialMedianMultiplierRange

    temporalZScoreThresholdMax = parameters[0]
    temporalZScoreThresholdRange = parameters[1]
    spatialMedianMultiplierMax = parameters[2]
    spatialMedianMultiplierRange = parameters[3]


def RecomputeFallowTS():
    # return #test
    return ComputeFallowTS(temporalAnomalies.map(TemporalAnomalyAnalysis),
                           spatialStats.map(SpatialAnomalyAnalysis))

#assumes fallowTS is up to date
def SampleIsFallowAtValidationPoints(fallowTS):
    isFallowRaster = fallowTS.select(f"..{calibrationYear}") #output of fallowTS generation prepends an int from 0 onwards then an underscore. TODO fix
    bandName = isFallowRaster.bandNames().get(0)
    
    #mappable function
    def Sample(point):
        value = ee.Number(isFallowRaster.sample(region = point.geometry(), scale = 10).first().get(bandName))
        return point.set({"estimate" : value})

    return testAreaSamples.map(Sample)

#assumes fallowTS is up to date
def ObjectiveFunction(fallowTS):

    # return round(Random(),2) #test
    data = SampleIsFallowAtValidationPoints(fallowTS)

    #mappable function, to help with confusion matrix generation
    def AddCompositeColumn(point):
        return point.set({"compositeScore" : ee.Number((ee.Number(2).multiply(point.get("estimate")).add(point.get(calibrationAttribName))))})

    adjustedSamples = ee.Dictionary(data.map(AddCompositeColumn).aggregate_array("compositeScore").reduce(ee.Reducer.frequencyHistogram()))
    confArray = ee.Array([[adjustedSamples.get("3.0", 0.0), adjustedSamples.get("1.0", 0.0)], [adjustedSamples.get("2.0", 0.0), adjustedSamples.get("0.0", 0.0)]], ee.PixelType.float())
    confMatrix = ee.ConfusionMatrix(confArray.long())
    
    return confMatrix.accuracy().getInfo() #TODO should we use Kappa?

def StopOptimisation(currentIteration, currentScore, lastScore) -> bool:
    if currentIteration < minIterations:
        return False

    if currentIteration >= maxIterations:
        print(f"Stopping after reaching max iterations")
        return True

    errorMetricChange = abs((lastScore - currentScore) / lastScore)
    if errorMetricChange <= minChangeThreshold:
        print(f"Stopping for stagnating improvements. Change percentage = {errorMetricChange}")
        return True
    
    return False

def RandomParams(ranges: list[list]) -> list:
    params = []
    for range in ranges:
        params.append(range[0] + Random() * (range[1] - range[0]))
    return params

def RandomAgents(agents: list[list], excludeID: int, count = 3) -> list[list]:
    picksIDs = []
    
    while len(picksIDs) < count:
        i = RandIntRange(0, len(agents))
        if i not in picksIDs and i != excludeID:
            picksIDs.append(i)

    return [agents[i] for i in picksIDs]

def Clamp(value: float, range: list[float, float]) -> float:
    return max(min(value, max(range[0], range[1])), min(range[0], range[1]))

currentScore = ObjectiveFunction(RecomputeFallowTS()) #could do with existing fallowTS, but since params may have changed after its generation in the previous 
                                                #block (e.g. during test), better recompute it just to be safe.

popSize = 20 #Population size. Minimum = 4 Recommended estimate is 10 * parameters. in our case, 10 * 4 = 40 #TODO move to input block, should be 10*paramCount
crossOverProb = 0.9 #Cross Over Probability. Should be between 0.0 and 1.0. Recommended default = 0.9 #TODO move to input block
diffWeight = 0.8 #Differential Weight. Between 0.0 and 2.0. Recommended default = 0.8 #TODO move to input block
paramRanges = [[-0.5, -5.0], [-0.5, -5.0], [0.1, 2.0], [0.1, 2.0]] #Allowble range for each parameter.
                                                                    #order = temporalZScoreThresholdMax, temporalZScoreThresholdRange, spatialMedianMultiplierMax,
                                                                    #and spatialMedianMultiplierRange #TODO move to input block
diffEvSeed = 485138463513684685 #seed for the randomizer. Ensures reproducability. Setting to None makes each run different than preceding one #TODO move to input block
minIterations = 10 #Optimisation shall proceed no less than this number #TODO move to input block
maxIterations = 30 #Optimisation shall break at this number regardless of results #TODO move to input block
minChangeThreshold = 0.01 #minimum (percentage) change between iterations before stopping optimisation #TODO move to input block

Seed(diffEvSeed) #TODO move this to the start of ***THIS*** block, after the related import (and after moving the variables above to their blocks)

#init population
agents = []
scores = [] #same index as agents
for i in range (0, popSize):
    randAgent = RandomParams(paramRanges)
    while randAgent in agents: #Should be a practical impossibility to happen, but still...
        randAgent = RandomParams(paramRanges)
    agents.append(randAgent)
    UpdateParams(randAgent)
    scores.append(ObjectiveFunction(RecomputeFallowTS()))


print (f"Agents = {len(agents)}") #test
for i in range (0, popSize): print(f"{scores[i]} -- {agents[i]}") #test

bestScore = [0.0, []]
currentScore = minChangeThreshold * 1.1
iteration = 0

while not StopOptimisation(iteration, currentScore, bestScore[0]):
    print (f"------------------------------\nIteration {iteration} -- params: {bestScore[1]} -- score current: {currentScore} last: {bestScore[0]}")
    currentScore = bestScore[0]

    for i in range (0, popSize):
        assistVectors = RandomAgents(agents, i, 3)

        testedAgent = list(agents[i])
        rIndex = RandIntRange(0, len(paramRanges))
        weights = RandomParams([[0.0, 1.0]] * len(paramRanges))
        for j in range (0, len(paramRanges)):
            if weights[j] < crossOverProb or j == rIndex:
                testedAgent[j] = Clamp(assistVectors[0][j] + diffWeight * (assistVectors[1][j] - assistVectors[2][j]), paramRanges[j])

        UpdateParams(testedAgent)
        testedScore = ObjectiveFunction(RecomputeFallowTS())

        if (testedScore > scores[i]):
            print (f"\t\tAgent {i} improved from {scores[i]} to {testedScore} - Now: {testedAgent}")
            agents[i] = testedAgent
            scores[i] = testedScore
        else:
            print (f"\t\tAgent {i} no improvement ({scores[i]})")
        
        if(testedScore > bestScore[0]):
            bestScore = [testedScore, testedAgent]

    print(f"Best scorer: {bestScore[1]} -- {bestScore[0]}")
    iteration += 1
    pass


print (f"=================\n Finished after {iteration} iterations")
print (f"{bestScore[0]} -- {bestScore[1]}")

In [None]:
#Result export

noDataValue = -9999

#split raster into multiple ones based on zone, then export
#TODO make the splitting optional
for i in range (0, zonesSize):
    targetZone = ee.Feature(zones.get(i))
    exportName = f"{projectName}_{targetZone.get(subdivisionPropertyName).getInfo()}_fallow_ts_{yearStart}-{yearEnd}_season_{targetMonths[0]}-{targetMonths[1]}-{targetMonths[2]}-{targetMonths[3]}"
    print (f"{i} - exporting file: {exportName}")
    
    task = ee.batch.Export.image.toDrive(
    image = fallowTS.clip(targetZone.geometry()),
    description = exportName,
    region = targetZone.geometry(),
    scale = targetOutputScale,
    crs = 'EPSG:4326',
    maxPixels = 300000000,
    fileFormat = 'GeoTIFF',
    formatOptions = {'noData': noDataValue})

    task.start()


## This part exports the four fallow components. Mostly used for testing and checking the internal working of the aglorithm.
# isFallowComponentsCollapsed = isFallowComponents.toBands()
# for i in range (0, zonesSize):
#     targetZone = ee.Feature(zones.get(i))
#     exportName = f"{projectName}_{targetZone.get(subdivisionPropertyName).getInfo()}_fallow_components_{yearStart}-{yearEnd}_season_{targetMonths[0]}-{targetMonths[1]}-{targetMonths[2]}-{targetMonths[3]}"

#     task = ee.batch.Export.image.toDrive(
#     image = isFallowComponentsCollapsed.clip(targetZone.geometry()),
#     description = exportName,
#     #folder = 'ee_export',
#     region = targetZone.geometry(),
#     scale = targetOutputScale,
#     crs = 'EPSG:4326',
#     fileFormat = 'GeoTIFF',
#     formatOptions = {'noData': noDataValue})

#     task.start()