# Summary

This file has been taken from various versions of the GEE_pull_functions files that have been used to pull surface reflectance from Landsat Collection 1 over rivers, lakes, wqp matchups, etc. It has been modified to run with Landsat Collcetion 2, based on changes with collection 2 data such as: 

* Bit mask information (i.e. bit numbers) 
* Scaling parameters for optical bands 
* Band names 
* Thresholds (inputs, rather) for the DSWE function


In [1]:
import geemap
import time
import os
import time
import ee
import os
import numpy as np
import pandas

In [2]:
ee.Authenticate()


Successfully saved authorization token.


In [3]:
ee.Initialize()

# Image Masking 

Masks are binary images used to remove unwanted pixels from the final layer. 

Below are the updated bit numbers within the 'QA_PIXEL' band for LS Collection 2 data. Note that this is one of the main differences between collection 1 and 2. 

*    Bit 0: Fill
*    Bit 1: Dilated Cloud
*    Bit 2: Cirrus (high confidence)
*    Bit 3: Cloud
*    Bit 4: Cloud Shadow
*    Bit 5: Snow
*    Bit 6: Clear
*        0: Cloud or Dilated Cloud bits are set
*        1: Cloud and Dilated Cloud bits are not set
*    Bit 7: Water
*    Bits 8-9: Cloud Confidence
*        0: None
*        1: Low
*        2: Medium
*        3: High
*    Bits 10-11: Cloud Shadow Confidence
*        0: None
*        1: Low
*        2: Medium
*        3: High
*    Bits 12-13: Snow/Ice Confidence
*        0: None
*        1: Low
*        2: Medium
*        3: High
*    Bits 14-15: Cirrus Confidence
*        0: None
*        1: Low
*        2: Medium
*        3: High

# Unpack

The following functions 'unpack' the information stored within the QA band. 

In [4]:

def Unpack(bitBand, startingBit, bitWidth): #For reference, the Water bit, bit 7, would have a starting bit of 7 and a bit width of 1
  return (ee.Image(bitBand)\
  .rightShift(startingBit)\
  .bitwiseAnd(ee.Number(2).pow(ee.Number(bitWidth)).subtract(ee.Number(1)).int()))


def UnpackAll(bitBand, bitInfo):
  unpackedImage = ee.Image.cat([Unpack(bitBand, bitInfo[key][0], bitInfo[key][1]).rename([key]) for key in bitInfo])
  return unpackedImage



# Function of Mask (FMask)

Fmask is used for cloud, cloudshadow, snow/ice, and water masking 

In [5]:
def AddFmask(image):
    bitInfo = {
    'Cloud': [3, 1],
    'CloudShadow': [4, 1], 
    'SnowIce': [5, 1],
    'Water': [7, 1]
    }
    
    temp = UnpackAll(image.select(['pixel_qa']), bitInfo)
    
    fmask = (temp.select(['Water']).rename(['fmask'])
    .where(temp.select(['SnowIce']), ee.Image(4)) #4 because we're taking SnowIce bit number (5) and subtracting 1
    .where(temp.select(['CloudShadow']), ee.Image(3))
    .where(temp.select(['Cloud']), ee.Image(2))
    .mask(temp.select(['Cloud']).gte(0))) 
    #mask the fmask so that it has the same footprint as the quality (BQA) band
    return(image.addBands(fmask))

# Dynamic Surface Water Extent (DSWE)

These functions are for calculating DSWE. The thresholds for various tests (t1-t5), haven't necessarily changed. What's changed are the inputs. Previously, for LS Collection 1 the inputs for these functions were unscaled surface reflectance. Now, the inputs are scaled surface reflectance. 


In [6]:

def Mndwi(image): 
  return(image
  .expression('(GREEN - SWIR1) / (GREEN + SWIR1)', {
    'GREEN': image.select(['Green']),
    'SWIR1': image.select(['Swir1'])
  }))

#Multi-band Spectral Relationship Visible
def Mbsrv(image):
  return(image.select(['Green']).add(image.select(['Red'])).rename('mbsrv'))

#Multi-band Spectral Relationship Near infrared
def Mbsrn(image):
  return(image.select(['Nir']).add(image.select(['Swir1'])).rename('mbsrn'))

#Normalized Difference Vegetation Index
def Ndvi(image):
  return(image
  .expression('(NIR - RED) / (NIR + RED)', {
    'RED': image.select(['Red']),
    'NIR': image.select(['Nir'])
  }))

#Automated Water Extent Shadow
def Awesh(image):
  return(image
  .expression('Blue + 2.5 * Green + (-1.5) * mbsrn + (-0.25) * Swir2', {
    'Blue': image.select(['Blue']),
    'Green': image.select(['Green']),
    'mbsrn': Mbsrn(image).select(['mbsrn']),
    'Swir2': image.select(['Swir2'])
  }))

def Dswe(i):
   mndwi = Mndwi(i)
   mbsrv = Mbsrv(i)
   mbsrn = Mbsrn(i)
   awesh = Awesh(i)
   swir1 = i.select(['Swir1'])
   nir = i.select(['Nir'])
   ndvi = Ndvi(i)
   blue = i.select(['Blue'])
   swir2 = i.select(['Swir2'])
  
  # These thresholds are taken from the LS Collection 2 DSWE Data Format Control Book:
  # (https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/media/files/LSDS-2042_LandsatC2_L3_DSWE_DFCB-v2.pdf)
  # Inputs are meant to be scaled reflectance values 

   t1 = mndwi.gt(0.124) # MNDWI greater than Wetness Index Threshold
   t2 = mbsrv.gt(mbsrn) # MBSRV greater than MBSRN
   t3 = awesh.gt(0) #AWESH greater than 0
   t4 = (mndwi.gt(-0.44)  #Partial Surface Water 1 thresholds
   .And(swir1.lt(0.09)) #900 for no scaling (LS Collection 1)
   .And(nir.lt(0.15)) #1500 for no scaling (LS Collection 1)
   .And(ndvi.lt(0.7))) 
   t5 = (mndwi.gt(-0.5) #Partial Surface Water 2 thresholds
   .And(blue.lt(0.1)) #1000 for no scaling (LS Collection 1)
   .And(swir1.lt(0.3)) #3000 for no scaling (LS Collection 1)
   .And(swir2.lt(0.1)) #1000 for no scaling (LS Collection 1)
   .And(nir.lt(0.25))) #2500 for no scaling (LS Collection 1)
  
   t = t1.add(t2.multiply(10)).add(t3.multiply(100)).add(t4.multiply(1000)).add(t5.multiply(10000));
  
   noWater = (t.eq(0)
   .Or(t.eq(1))
   .Or(t.eq(10))
   .Or(t.eq(100))
   .Or(t.eq(1000)))
   hWater = (t.eq(1111)
   .Or(t.eq(10111))
   .Or(t.eq(11011))
   .Or(t.eq(11101))
   .Or(t.eq(11110))
   .Or(t.eq(11111)))
   mWater = (t.eq(111)
   .Or(t.eq(1011))
   .Or(t.eq(1101))
   .Or(t.eq(1110))
   .Or(t.eq(10011))
   .Or(t.eq(10101))
   .Or(t.eq(10110))
   .Or(t.eq(11001))
   .Or(t.eq(11010))
   .Or(t.eq(11100)))
   pWetland = t.eq(11000)
   lWater = (t.eq(11)
   .Or(t.eq(101))
   .Or(t.eq(110))
   .Or(t.eq(1001))
   .Or(t.eq(1010))
   .Or(t.eq(1100))
   .Or(t.eq(10000))
   .Or(t.eq(10001))
   .Or(t.eq(10010))
   .Or(t.eq(10100)))
  
   iDswe = (noWater.multiply(0)
   .add(hWater.multiply(1))
   .add(mWater.multiply(2))
   .add(pWetland.multiply(3))
   .add(lWater.multiply(4)))
  
   return iDswe.rename('dswe')

# Hillshades and hillshadows

These functions calculate hillshade and hillhshadow based on information on azimuth and zenith angles stored within a Landsat image. 

The names for the azimuth angle was changed from 'SOLAR_AZIMUTH' to 'SUN_AZIMUTH'. There also is no 'ZENITH' field, but a 'SUN_ELEVATION' field that you can do 90 - 'SUN_ELEVATION' to get 'SUN_ZENITH'. 

In [7]:
def CalcHillShades(image, geo):

  MergedDEM = ee.Image("users/eeProject/MERIT").clip(geo.buffer(300)) # potential choice, area buffered that hillshadow is calculated within 

  SOLAR_AZIMUTH_ANGLE = ee.Number(image.get('SUN_AZIMUTH'))
  SOLAR_ELEVATION =ee.Number(image.get('SUN_ELEVATION'))
  
  hillShade = ee.Terrain.hillshade(MergedDEM, SOLAR_AZIMUTH_ANGLE,
  SOLAR_ELEVATION).rename(['hillShade'])
               
  return hillShade
  
def CalcHillShadows(image, geo):
  MergedDEM = ee.Image("users/eeProject/MERIT").clip(geo.buffer(3000)) # potential choice, area buffered that hillshadow is calculated within 
  SOLAR_AZIMUTH_ANGLE = ee.Number(image.get('SUN_AZIMUTH'))
  SOLAR_ZENITH_ANGLE =ee.Number(90).subtract(image.get('SUN_ELEVATION'))

  hillShadow = ee.Terrain.hillShadow(MergedDEM, SOLAR_AZIMUTH_ANGLE,SOLAR_ZENITH_ANGLE, 30).rename(['hillShadow'])
    
  return hillShadow



# Need to add comments here describing function

In [8]:
def ExtractChannel(image, centerline, bound, maxDistance):
  cost = image.Not().cumulativeCost(ee.Image().toByte().paint(centerline, 1).And(image), maxDistance, False)
  channelMask = cost.eq(0).unmask(0).clip(bound).rename(['channelMask'])
  channel = image.mask(channelMask).unmask(0)
  return channel

# Need to add comments which explain why this is necessary

In [9]:
def removeGeo(i):
    return i.setGeometry(None)

def add_negative_Aerosol(image):
      
      Aerosol = image.select('Aerosol')
      negative_Aerosol = Aerosol.lt(0).rename('negative_Aerosol')
  
      return(negative_Aerosol)


def add_negative_Red(image):

      Red = image.select('Red')
      negative_Red = Red.lt(0).rename('negative_Red')
  
      return(negative_Red)

def add_negative_Blue(image):

      Blue = image.select('Blue')
      negative_Blue = Blue.lt(0).rename('negative_Blue')
  
      return(negative_Blue)

def add_negative_Green(image):

      Green = image.select('Green')
      negative_Green = Green.lt(0).rename('negative_Green')
  
      return(negative_Green)


def add_negative_Nir(image):

      Nir = image.select('Nir')
      negative_Nir = Nir.lt(0).rename('negative_Nir')
  
      return(negative_Nir)

def add_negative_Swir1(image):

      Swir1 = image.select('Swir1')
      negative_Swir1 = Swir1.lt(0).rename('negative_Swir1')
  
      return(negative_Swir1)

def add_negative_Swir2(image):

      Swir2 = image.select('Swir2')
      negative_Swir2 = Swir2.lt(0).rename('negative_Swir2')
  
      return(negative_Swir2)

# Surface reflectance pull function

* The pull function first defines variables that will be used to mask each image (i.e. clouds, hillshadows, dswe). 

* Then a waterOut variable is defined as the image with all of the masks applied and additional variables to be exported within our table (i.e. the DSWE value, the cloud value, etc.)

* The extract channel function is then called to apply our pull function over the channel mask

* Then the combined reducer calculates medians and standard deviations for our band values, counts negative pixel values and pixels where dswe == 1 or dswe == 3, gets mean hillshdaow and cloud values, and collects the first non null value for various image metadata properties

* Then we reduce regions with the combined reducer

In [10]:
def pull(image):
 
    f = AddFmask(image).select('fmask')
    clouds = f.gte(2).rename('clouds')
    d = Dswe(image).select('dswe')
    hs = CalcHillShadows(image, reach_polygon.geometry()).select('hillShadow')
    negative_Aerosol = add_negative_Aerosol(image).selfMask().updateMask(d.eq(1)).updateMask(clouds.eq(0)).updateMask(hs.eq(1)) 
    negative_Red = add_negative_Red(image).selfMask().updateMask(d.eq(1)).updateMask(clouds.eq(0)).updateMask(hs.eq(1)) 
    negative_Blue = add_negative_Blue(image).selfMask().updateMask(d.eq(1)).updateMask(clouds.eq(0)).updateMask(hs.eq(1)) 
    negative_Green = add_negative_Green(image).selfMask().updateMask(d.eq(1)).updateMask(clouds.eq(0)).updateMask(hs.eq(1)) 
    negative_Nir = add_negative_Nir(image).selfMask().updateMask(d.eq(1)).updateMask(clouds.eq(0)).updateMask(hs.eq(1)) 
    negative_Swir1 = add_negative_Swir1(image).selfMask().updateMask(d.eq(1)).updateMask(clouds.eq(0)).updateMask(hs.eq(1)) 
    negative_Swir2 = add_negative_Swir2(image).selfMask().updateMask(d.eq(1)).updateMask(clouds.eq(0)).updateMask(hs.eq(1)) 
    dummy = (image.select(['Blue'],['dswe1']).updateMask(clouds.eq(0)).updateMask(d.eq(1)).updateMask(hs.eq(1)))
    hs0 = hs.eq(0).rename('shadow').selfMask().updateMask(clouds.eq(0)).updateMask(d.eq(1))
    cover = image.metadata('CLOUD_COVER')
    z = image.metadata('SUN_ELEVATION')
    a = image.metadata('SUN_AZIMUTH')
    date_number = ee.Number(image.get("system:time_start"))
    date = image.constant(date_number).rename("date")
    image_quality_number = ee.Number(image.get("IMAGE_QUALITY"))
    image_quality = image.constant(image_quality_number).rename("IMAGE_QUALITY")
    pixOut = (image.addBands(d)
              .addBands(image.select(['Aerosol'],['sd_Aerosol']))
              .addBands(image.select(['Blue'],['sd_Blue']))
              .addBands(image.select(['Green'],['sd_Green']))
              .addBands(image.select(['Red'],['sd_Red']))
              .addBands(image.select(['Nir'],['sd_Nir']))
              .addBands(image.select(['Swir1'],['sd_Swir1']))
              .addBands(image.select(['Swir2'],['sd_Swir2']))
              .updateMask(d.eq(1))
              .updateMask(clouds.eq(0))
              .updateMask(hs.eq(1))
              .addBands(negative_Aerosol)
              .addBands(negative_Red)
              .addBands(negative_Blue)
              .addBands(negative_Green)
              .addBands(negative_Nir)
              .addBands(negative_Swir1)
              .addBands(negative_Swir2)
              .addBands(dummy)
              .addBands(hs0)
              .addBands(hs)
              .addBands(clouds)
              .addBands(cover)
              .addBands(z)
              .addBands(a)
              .addBands(date)
              .addBands(image_quality))

    combinedReducer = (ee.Reducer.median().unweighted()
    .forEachBand(pixOut.select(['Aerosol', 'Blue', 'Green', 'Red', 'Nir', 'Swir1', 'Swir2','Surface_temp_kelvin', 'pixel_qa', 'dswe']))
    .combine(ee.Reducer.stdDev().unweighted().forEachBand(pixOut.select(['sd_Aerosol','sd_Blue', 'sd_Green', 'sd_Red','sd_Nir', 'sd_Swir1','sd_Swir2'])), '', False)
    .combine(ee.Reducer.count().unweighted().forEachBand(pixOut.select(['negative_Aerosol', 'negative_Blue', 'negative_Green', 'negative_Red', 'negative_Nir', 'negative_Swir1', 'negative_Swir2','dswe1','shadow' ])), 'pCount_', False)
    .combine(ee.Reducer.mean().unweighted().forEachBand(pixOut.select(['hillShadow', 'clouds'])), '', False)
    .combine(ee.Reducer.firstNonNull().unweighted().forEachBand(pixOut.select(['CLOUD_COVER', 'SUN_ELEVATION', 'SUN_AZIMUTH', 'date', 'IMAGE_QUALITY']))))
         

    ## Collect median reflectance and occurance values
    lsout = pixOut.reduceRegions(reach_polygon, combinedReducer, 30)
    
    out = lsout.map(removeGeo)
    
    return out

Function for limiting number of tasks in Task Manager

In [11]:
def maximum_no_of_tasks(MaxNActive, waitingPeriod):
  ##maintain a maximum number of active tasks
  time.sleep(10)
  ## initialize submitting jobs
  ts = list(ee.batch.Task.list())

  NActive = 0
  for task in ts:
       if ('RUNNING' in str(task) or 'READY' in str(task)):
           NActive += 1
  ## wait if the number of current active tasks reach the maximum number
  ## defined in MaxNActive
  while (NActive >= MaxNActive):
      time.sleep(waitingPeriod) # if reach or over maximum no. of active tasks, wait for 2min and check again
      ts = list(ee.batch.Task.list())
      NActive = 0
      for task in ts:
        if ('RUNNING' in str(task) or 'READY' in str(task)):
          NActive += 1
  return()

Merging landsat collections and scaling band values

In [12]:
#LS Collection 2 has a different scaling parameter that needs to be applied to the optical bands. Also, thermal bands need scale factors and these are different for LS8 and LS5/LS7

#Clip image function
def clipImage(image):
  return image.clip(reach_polygon.geometry())

def scale_ls5_ls7(image):

  opticalBands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
  thermalBand = image.select('ST_B6').multiply(0.00341802).add(149.0)

  return image.addBands(opticalBands, overwrite = True)\
  .addBands(thermalBand,  overwrite = True)

def scale_ls8_ls9(image):
  opticalBands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
  thermalBand = image.select('ST_B10').multiply(0.00341802).add(149.0)

  return image.addBands(opticalBands, overwrite = True)\
  .addBands(thermalBand,  overwrite = True)


def rename_image_quality(i):
    return i.set({"IMAGE_QUALITY": i.get("IMAGE_QUALITY_OLI")})

#Aerosol doesn't exist for LS 5 and LS 7 so we will have to add a dummy band with fill value of -99 for those collections
dummyAerosol = ee.Image(-99).rename('Null_CS')

def add_dummy_Aerosol(i):
    return i.addBands(dummyAerosol)

l9 = ee.ImageCollection('LANDSAT/LC09/C02/T1_L2')\
  .map(rename_image_quality)
l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")\
  .map(rename_image_quality)
l7 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2')\
  .map(add_dummy_Aerosol)
l5 = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2')\
  .map(add_dummy_Aerosol)

ls9_bands = ['SR_B1', 'SR_B2','SR_B3', 'SR_B4', 'SR_B5','SR_B6','SR_B7', 'ST_B10', 'QA_PIXEL']
ls8_bands = ['SR_B1','SR_B2','SR_B3', 'SR_B4', 'SR_B5','SR_B6','SR_B7', 'ST_B10','QA_PIXEL']
ls7_bands = ['Null_CS','SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7', 'ST_B6', 'QA_PIXEL']
ls5_bands = ['Null_CS','SR_B1','SR_B2','SR_B3', 'SR_B4', 'SR_B5','SR_B7', 'ST_B6','QA_PIXEL']
bands = ['Aerosol','Blue', 'Green', 'Red', 'Nir', 'Swir1', 'Swir2', 'Surface_temp_kelvin', 'pixel_qa']

ls9 = l9.map(scale_ls8_ls9).select(ls9_bands, bands)
ls8 = l8.map(scale_ls8_ls9).select(ls8_bands, bands)
ls7 = l7.map(scale_ls5_ls7).select(ls7_bands, bands)
ls5 = l5.map(scale_ls5_ls7).select(ls5_bands, bands)

#filtering for cloud cover less than 50 reduces processing time (don't know if this should be higher or lower)
ls_collection2 = ls9.merge(ls8).merge(ls7).merge(ls5).filter(ee.Filter.lt('CLOUD_COVER', 50)).filterDate('1984-03-16', '2022-11-01')


Preparing to loop over site identifiers

In [20]:

nhd_polygons = ee.FeatureCollection("projects/ee-samsillen0/assets/CONUS_COMID_reach_poly_collapse");
#load nhd centerlines
nhd_centerlines = ee.FeatureCollection("projects/ee-samsillen0/assets/nhd_grwl_full");

wrs = ee.FeatureCollection("projects/ee-samsillen0/assets/wrs2_asc_desc")\
    .filterMetadata('MODE', 'equals', 'D')

#this pr csv has the tendency to be finnicky ; due to the leading zeros in the pr column
pr_shp = ee.FeatureCollection("projects/ee-samsillen0/assets/allwqplagostiles")


In [21]:
lakesort = pr_shp.sort('PR')

lakeID = lakesort.aggregate_array('PR').getInfo() 

# make a folder in your google drive manually to output data

dlDir = "G:\My Drive\LC02_Polygon_Pull_COMID"
filesDown = os.listdir(dlDir) 
filesDown = [str(i.replace(".csv", "")) for i in filesDown]

lakeID  = [i for i in lakeID if i not in filesDown]

print(len(lakeID))

for x in range(0,len(lakeID)):
        
    print(lakeID[x])
    
    tile = wrs.filterMetadata('PR', 'equals', lakeID[x])
    
    reach_polygon = nhd_polygons.filterBounds(tile.geometry())
    
    reach_centerline = nhd_centerlines.filterBounds(tile.geometry())

    stack = ls_collection2.filterBounds(tile.geometry().centroid())
    
    out = stack.map(pull).flatten() 
    out = out.filter(ee.Filter.notNull(['Blue']))

# CHANGE folder output nanme to folder in your google drive to store output data
    dataOut = ee.batch.Export.table.toDrive(collection = out,\
                                            description = str(lakeID[x]),\
                                            folder = 'LC02_Polygon_Pull_COMID',\
                                            fileFormat = 'csv',\
                                            selectors  = ['Aerosol', 'sd_Aerosol', 'pCount_negative_Aerosol', 'Blue','sd_Blue','pCount_negative_Blue','Green','sd_Green', 'pCount_negative_Green', 'Red','sd_Red', 'pCount_negative_Red','Nir', 'sd_Nir','pCount_negative_Nir','Swir1','sd_Swir1','pCount_negative_Swir1','Swir2','sd_Swir2','pCount_negative_Swir2','Surface_temp_kelvin','pixel_qa','clouds','dswe','hillShadow','pCount_dswe1','pCount_shadow','system:index', 'IMAGE_QUALITY','date','CLOUD_COVER','SUN_ELEVATION','SUN_AZIMUTH','COMID'])    
  #Check how many existing tasks are running and take a break if it's >15  
    maximum_no_of_tasks(15, 60)
  # Send next task.
    dataOut.start()

# Make sure all Earth engine tasks are completed prior to moving on.  
maximum_no_of_tasks(1,300)
print('done')


494
010028
010029
011027
011028
011029
011030
011031
012027
012028
012029
012030
012031
012032
013028
013029
013030
013031
013032
013033
013035
013036
014029
014030
014031
014032
014033
014034
014035
014036
015029
015030
015031
015032
015033
015034
015035
015036
015037
015040
015041
015042
015043
016029
016030
016031
016032
016033
016034
016035
016036
016037
016038
016039
016040
016041
016042
016043
017030
017031
017032
017033
017034
017035
017036
017037
017038
017039
017040
017041
018030
018031
018032
018033
018034
018035
018036
018037
018038
018039
018040
019029
019030
019031
019032
019033
019034
019035
019036
019037
019038
019039
020028
020029
020030
020031
020032
020033
020034
020035
020036
020037
020038
020039
021028
021029
021030
021031
021032
021033
021034
021035
021036
021037
021038
021039
021040
022027
022028
022029
022030
022031
022032
022033
022034
022035
022036
022037
022038
022039
022040
023027
023028
023029
023030
023031
023032
023033
023034
023035
023036
023037
023038
02