Estimation of detection lag and confirmation lag relative to the start date of change in the reference data.

#### Import sample and stratification information

In [1]:
# Import strata and sample unit information
import sys 
import math
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import statistics 
np.set_printoptions(precision=2, suppress=True, floatmode='fixed')

source = "/gpfs/glad3/HLSDIST/Validation/2024_10kmblock/analysis/"  # Replace with the desired path
os.chdir(source)

referenceSource = source+"tables/reference_data/referenceTimeSeriesInterpolated16_16_goodFirst.csv"
#referenceSource = source+"tables/reference_data/referenceTimeSeriesInterpolated16_16_first.csv"
mapsourceHLS = "mapLabels2024"
mapsourceS1 = source+"generate_dist_s1_table/dist_s1_label_tables"

missingS1data = [545539,1105166,533999,540915,541754,588553,938249,114851,1195220,722463,1137281,1106625,518955,649887,1107445,1458943]
baddata = [488254]
HLSsmoke = [1402267,1416380]#smoke haze
S1allnodata = [1436925,1164088,209941,64176,596625]
excludelist = missingS1data+baddata+S1allnodata#+HLSsmoke

ANNname = "2024"
sampleDict = {}
sampleFull = {}
blockstrataDict = {}
substrataDict = {}
countSelected = {}
with open(source+"tables/reference_data/selectedpointsLL.csv","r") as sample:
  lines = sample.readlines()[1:]
  for l in lines:
    (ID,Block,subID,blockStratum,substratum,zone,x,y,centxUTM,centyUTM,Long,Lat,MGRS) = l.strip().split(",")
    sampleDict[ID] = [Long,Lat,zone,centxUTM,centyUTM,MGRS]
    sampleFull[ID] = l.strip()
    blockstrataDict[Block] = blockStratum
    substrataDict[ID] = int(substratum)
    if Block in countSelected:
      countSelected[Block] += 1
    else:
      countSelected[Block] = 1

allIDs = sampleDict.keys()

#Strata area
#scounts = allblocks['stratum'].value_counts()
scounts = pd.read_csv(source+"tables/reference_data/blockStrataCounts.csv").set_index('name')
scounts['area'] = scounts.multiply(100)
print(scounts)
allStrata = list(scounts.index)
totalBlockCount = scounts['blockCount'].sum()
print('totalBlockCount',totalBlockCount)
print(allStrata)

               blockCount       area
name                                
waternew             1272     127200
treelosswet          9696     969600
builtnewalert      120496   12049600
fire                 3188     318800
treelossTF          63161    6316100
cropnew             75912    7591200
wetshort             9477     947700
oldcrop_short       65470    6547000
gen                215057   21505700
other              385127   38512700
none              1085543  108554300
totalBlockCount 2034399
['waternew', 'treelosswet', 'builtnewalert', 'fire', 'treelossTF', 'cropnew', 'wetshort', 'oldcrop_short', 'gen', 'other', 'none']


In [2]:
selectedBlocks = pd.read_csv(source+"tables/reference_data/blockstrata_subareas.csv")
selectedBlocks = selectedBlocks.set_index('block')
print(selectedBlocks.head())
allBlocks = list(selectedBlocks.index)

def getBlocksStratum(stratum):
  return list(selectedBlocks[selectedBlocks['stratum']==stratum].index)

def getBlockPixelCount(block):
  return selectedBlocks.loc[int(block)][['sub1','sub2','sub3','sub4']].sum()


        MGRS      stratum  sub1   sub2  sub3    sub4       left   top  \
block                                                                   
30961  33NUF   treelossTF  2263     67  2493  105873   13.80825  4.95   
34405  50NPL  treelosswet  1614   6024  2755  100467  118.01200  5.40   
35975  37NEG   treelossTF  1369    231  2864  106424   39.09600  5.67   
40284  47NRG  treelosswet   762   2115  7183  100894  101.90300  6.30   
41318  36NUN     waternew   873  10119  6819   92899   31.40350  6.48   

          right  bottom  
block                    
30961   13.8985    4.86  
34405  118.1025    5.31  
35975   39.1865    5.58  
40284  101.9935    6.21  
41318   31.4940    6.39  


#### General functions to get DIST-S1, DIST-HLS, and reference labels and dates

In [4]:
#get number of days between and two dates; used to convert dates to 1-366 day of year 
def dayDiff(start,end):
  startdate = datetime.datetime.strptime(start,"%Y%m%d")
  enddate = datetime.datetime.strptime(end,"%Y%m%d")
  days = enddate-startdate
  return (days.days+1)

In [5]:
#DIST-S1 generate dictionary of daily STATUS values per ID (note switched path to block instead of MGRS tile)
def getDISTS1status_vI(block):#,skipNodata=False):
    #print(skipNodata)
    mapalert = {}
    IDlist = [ID for ID in allIDs if str(block) in ID]
    allNoData = True
    for ID in IDlist:
        mapalert[ID] = [255 for i in range(0,366)]
        #print(ID,end=',')
        try:
          with open(mapsourceS1+'/'+block+'/'+ID+'.csv','r') as mapfile:
            lines = mapfile.readlines()
            header = lines[0]
            maplist = lines[1:]
            for line in maplist:
                try:
                    (temp,SensingTime,STATUS)= line.strip().split(',')
                    day = dayDiff("20240101",datetime.datetime.strftime(datetime.datetime.strptime(SensingTime,"%Y-%m-%d"),"%Y%m%d"))

                    #if not (skipNodata and VEGANOM!='NA'):
                    mapalert[ID][day] = int(STATUS)
                    if int(STATUS) != 255:
                        allNoData = False
                except:
                #    print(traceback)
                    print(ID,line)
        except:
            with open("missingS1.txt","a") as OUT:
                OUT.write(ID+"\n")
    if allNoData:
        print("NO DATA FOR DIST-S1 BLOCK ",block)
    return mapalert

In [14]:
def getDISTS1Date_vI(block,adjusted,yeslabels=[1,2,3,4,5,6,7,8]):
    mapdate = {}
    IDlist = [ID for ID in allIDs if block in ID]
    allNoData = True
    for ID in IDlist:
        mapdate[ID] = [-1 for i in range(0,366)]
        try:
          with open(mapsourceS1+'/'+block+'/'+ID+'.csv','r') as mapfile:
            lines = mapfile.readlines()
            header = lines[0]
            maplist = lines[1:]
            date = 0
            for line in maplist:
                try:
                    (temp,SensingTime,STATUS)= line.strip().split(',')
                    day = dayDiff("20240101",datetime.datetime.strftime(datetime.datetime.strptime(SensingTime,"%Y-%m-%d"),"%Y%m%d"))
                    
                    if int(STATUS) in yeslabels :
                        if date == 0:
                            date = datetime.datetime.strftime(datetime.datetime.strptime(SensingTime,"%Y-%m-%d"),"%Y%m%d")
                        if adjusted:
                            i=1
                            while((day-i)>=1 and mapdate[ID][day-i]==-1):
                               i+=1
                            if day-1>=1:
                                if mapdate[ID][day-i] == 0:
                                    mapdate[ID][day] = (day-i/2)
                                else:
                                    mapdate[ID][day] = mapdate[ID][day-i]
                            else:
                                mapdate[ID][day-i] == dayDiff("20240101",date)/2
                        else:
                            mapdate[ID][day] = dayDiff("20240101",date)
                    elif STATUS == '255' and mapdate[ID][day] == -1:
                        mapdate[ID][day] = -1
                    elif STATUS == '0':
                        mapdate[ID][day] = 0
                        date = 0
                except:
                    print(ID,line)
        except:
            print(mapsourceS1+'/'+block+'/'+ID+'.csv failed')
            
    return mapdate

In [12]:
#generate dictionary of ref no, low, high change and no data for each day of year (note conversion only and only 2024 parameters don't work)
def getRefALERTDaily(filename,high=["VLmaj"],low=["VLmin"],nochange=["OCmin","OCmaj","VGmin","VGmaj","noChange"],IDlist=allIDs,only2024=False):
  refalert = {}
  with open(filename,"r") as mapfile:
    lines = mapfile.readlines()
    header = lines[0]
    reflist = lines[1:]
  for line in reflist:
    fields = line.strip().split(",")
    (ID,overallLabel,Long,Lat,changetype) = fields[0:5]
    refalert[ID] = [0 for i in range(0,366)]
    if ID in IDlist:
      daily = fields[5:]
      #refalert[ID] = [0 for i in range(0,366)]
      try:
        for day in range(0,366):
          found = False
          for l in high:
            if l == daily[day]:
              refalert[ID][day] = 3
          for l in low:
            if l == daily[day]:
              refalert[ID][day] = 2
          for l in nochange:
            if l == daily[day]:
              refalert[ID][day] = 1
          #if conversiononly and (refconv[ID] != "natural" and (refconv[ID] != "human" or (refconv[ID] == "human" and natural[ID] == '0'))):#(refconv[ID] != "human" or (refconv[ID] == "human" and natural[ID] == '0')):#(refconv[ID] == "no" or natural[ID] == '0'):
          #  refalert[ID][day] = 0
          #if only2024 and refprevyear[ID] == "TRUE":
          #  refalert[ID][day] = 0
      except:
        print(ID,day,daily)
  return refalert

In [13]:
####deltalist is not stratified.
def getRefALERTDateDaily(filename,yeslabels,nolabels,adjusted):
  refdate = {}
  #deltalist = [ [] for s in range(Nstrata+1)] 
  deltalist = { i:[] for i in allIDs}
  #deltalist = []
  with open(filename,'r') as reffile:
    lines = reffile.readlines()
    header = lines[0]
    reflist = lines[1:]
  for line in reflist:
    fields = line.strip().split(',')
    (ID,overallLabel,Long,Lat,changetype) = fields[0:5]
    daily = fields[5:]
    refdate[ID] = [-1 for i in range(0,367)]
    for day in range(0,366):
      found = False
      for l in yeslabels:
        if l == daily[day]:
          found = True
        #if l in daily[day] and int(ID) in refVLmaj:
        #  found = True
      if found:
        if day==0:
            refdate[ID][day] = 1
        elif refdate[ID][day-1]>0:
            refdate[ID][day] = refdate[ID][day-1]
        elif refdate[ID][day-1] == 0:
            refdate[ID][day] = day+1
        else:
            i=1
            while((day-i)>=0 and refdate[ID][day-i]==-1):#potentially could have loss nodat loss...
                i+=1
            if refdate[ID][day-i]>0:
                refdate[ID][day] = refdate[ID][day-i]
            else:
                #deltalist[strata[ID]].append((i/2))
                deltalist[ID].append((i/2))
                #deltalist.append((i/2))
            if adjusted:
                refdate[ID][day] = (day-i/2)+1
            else:
                refdate[ID][day] = day+1
      else:
        for l in nolabels:
          if l == daily[day]:
            found = True
        if found:
          refdate[ID][day] = 0
        else:
          refdate[ID][day] = -1
  return (refdate,deltalist)

In [22]:
#DIST-ALERT-HLS
def getDISTALERTStatus_vI(block,skipNodata=False):
    #print(skipNodata)
    mapalert = {}
    IDlist = [ID for ID in allIDs if str(block) in ID]

    for ID in IDlist:
        mapalert[ID] = [255 for i in range(0,367)]
        #print(ID,end=',')
        with open(mapsourceHLS+'/'+ID+'_DIST-ALERT_'+ANNname+'.csv','r') as mapfile:
            lines = mapfile.readlines()
            header = lines[0]
            maplist = lines[1:]
            for line in maplist:
                try:
                    (granuleID,SensingTime,ProductionTime,VEGDISTSTATUS,VEGANOM,VEGIND,VEGHIST,VEGANOMMAX,VEGDISTCONF,VEGDISTDATE,VEGDISTCOUNT,VEGDISTDUR,VEGLASTDATE,GENDISTSTATUS,GENANOM,GENANOMMAX,GENDISTCONF,GENDISTDATE,GENDISTCOUNT,GENDISTDUR,GENLASTDATE)= line.strip().split(',')
                    day = dayDiff("20240101",SensingTime[0:8])

                    if not (skipNodata and VEGANOM!='NA'):
                        if (int(VEGDISTSTATUS)==7 or int(VEGDISTSTATUS)==8) and VEGDISTDATE[0:4]=='2023': 
                            mapalert[ID][day] = 0
                        else:
                            mapalert[ID][day] = int(VEGDISTSTATUS)
                except:
                #    print(traceback)
                    print(ID,day,line)

    return mapalert


In [23]:
def getDISTALERTDate_vI(block,adjusted,yeslabels=[1,2,3,4,5,6,7,8],skipNodata=False):
    mapdate = {}
    IDlist = [ID for ID in allIDs if block in ID]
    allNoData = True
    for ID in IDlist:
        mapdate[ID] = [-1 for i in range(0,367)]
        try:
          with open(mapsourceHLS+'/'+ID+'_DIST-ALERT_'+ANNname+'.csv','r') as mapfile:
            lines = mapfile.readlines()
            header = lines[0]
            maplist = lines[1:]
            date = 0
            for line in maplist:
                try:
                    (granuleID,SensingTime,ProductionTime,VEGDISTSTATUS,VEGANOM,VEGIND,VEGHIST,VEGANOMMAX,VEGDISTCONF,VEGDISTDATE,VEGDISTCOUNT,VEGDISTDUR,VEGLASTDATE,GENDISTSTATUS,GENANOM,GENANOMMAX,GENDISTCONF,GENDISTDATE,GENDISTCOUNT,GENDISTDUR,GENLASTDATE)= line.strip().split(',')
                    day = dayDiff("20240101",SensingTime[0:8])
            
                    #if not (skipNodata and int(VEGANOM)==255):
                    
                    if int(VEGDISTSTATUS) in yeslabels :
                        if date == 0:
                            date = SensingTime[0:8]
                        if adjusted:
                            i=1
                            while((day-i)>=1 and mapdate[ID][day-i]==-1):
                               i+=1
                            if day-1>=1:
                                if mapdate[ID][day-i] == 0:
                                    mapdate[ID][day] = (day-i/2)
                                else:
                                    mapdate[ID][day] = mapdate[ID][day-i]#dayDiff("20211001",VEGDISTDATE)
                            else:
                                mapdate[ID][day-i] == dayDiff("20240101",date)/2
                        else:
                            mapdate[ID][day] = dayDiff("20240101",date)
                            #print("day",dayDiff("20230101",VEGDISTDATE))
                    elif VEGDISTSTATUS == '255':
                        mapdate[ID][day] = -1
                        #print("status")
                    elif VEGDISTSTATUS == '0':
                        mapdate[ID][day] = 0
                        #print("NA")
                        date = 0
                except:
                    print("Date",ID,day,line)
        except:
            print(mapsourceHLS+'/'+ID+'_DIST-ALERT_'+ANNname+'.csv failed')
            
    return mapdate

#### Latency calculation functions

In [15]:
def getMean(deltalist):
  denom = 0
  deltaSum = 0
  variance = 0
  validIDcount = 0
  strataDelta = {s:[] for s in allStrata}
  for block in allBlocks:
    IDlist = [ID for ID in allIDs if str(block) in ID]
    substrata = [[] for i in [0,1,2,3,4]]
    for ID in IDlist:
      scount = len(deltalist[ID])
      if scount>0:
        validIDcount+=1
      for delt in deltalist[ID]:
        substrata[substrataDict[ID]].append(delt) #assign to substrata list
    blockSum = 0
    blockCountSum = 0
    N = selectedBlocks.loc[int(block),['sub1','sub2','sub3','sub4']]
    for s in [1,2,3,4]: #estimate the average lag for block
      scount = len(substrata[s])
      if scount>0:
        smean = sum(substrata[s])/scount
        blockSum += smean*N.iloc[s-1]
        blockCountSum +=N.iloc[s-1]
    if blockCountSum>0:
      blockMean = blockSum/blockCountSum
      strataDelta[blockstrataDict[str(block)]].append(blockMean)
        
  for s in allStrata:
    Nh = scounts.loc[s]['blockCount']
    scount = len(strataDelta[s])
    if scount>0:
      smean = sum(strataDelta[s])/scount
      if scount > 1:
        ssum = 0
        for i in strataDelta[s]:
            ssum += (i-smean)**2
        sv = (1/(scount-1))*ssum
        variance += (Nh/totalBlockCount)**2 * ((Nh-scount)/(Nh-1)) * sv/scount
      
      denom += (scount * (Nh))
      deltaSum += smean * (scount * (Nh))
  if denom>0:        
    sampleMean = deltaSum/denom
    SE = math.sqrt(variance)
  else:
    sampleMean="NA"
    SE="NA"
  #print(validIDcount,"sample pixels with comparable dates of change in the ref and map")
  return (sampleMean,SE,validIDcount)#,deltaDate)

In [20]:
def estimateDetectionLag(ref,refdate,mincount,mapvalues,excludelist,lookback,forward=30,sysDIST="DIST-S1"):
    deltaList = { i:[] for i in allIDs}

    deltaDate = {}
    #confusion matrix
    for block in allBlocks:
      if not int(block) in excludelist:
        block = str(block)
        if sysDIST == "DIST-S1":
            map = getDISTS1status_vI(block)
            mapdate = getDISTS1Date_vI(block,False,mapvalues)
        elif sysDIST == "DIST-HLS":
            map = getDISTALERTStatus_vI(block,True)
            mapdate = getDISTALERTDate_vI(block,False,mapvalues,True)
            #mapdate = getDISTALERTDate_vI(block,False,[1,2,3,4,5,6,7,8],True)
        IDlist = [ID for ID in allIDs if str(block) in ID]
        refnocount = 0
        for ID in IDlist:
          if ID in ref and ID in map:
            refno=0
            ptotal = 0
            atotal = 0
            deltaTotal = 0
            deltaDate[ID] = [-1 for i in range(0,366)]
            for d in range(0,365):
                #if max(ref[ID][0:(d+1)])>0 and map[ID][d] != 255 or mapdate[ID][d] != -1:#or map[ID][d] != 0 
                    
                    start = (d>lookback)*(d-lookback)
                    if mapdate[ID][d]>0 and map[ID][d] in mapvalues:
                        start = (mapdate[ID][d]>lookback)*(mapdate[ID][d]-lookback)
                    end = d+mincount+forward
                    end2 = d+forward
                    if d+mincount+forward>365:
                        end = 365
                    if d+forward>365:
                        end2 = 365

                    if map[ID][d] in mapvalues:#active alert
                        if (ref[ID][start:end].count(2) + ref[ID][start:end].count(3)) > mincount:#if ref also has alert
                            if start < d-lookback:
                                i=0    
                                while(refdate[ID][start+i]<=0 and start+i<end2):#go back to first ref date with date
                                    i+=1
                                if mapdate[ID][d]-refdate[ID][start+i] > -30:
                                    deltaDate[ID][d] = mapdate[ID][d]-refdate[ID][start+i]
                                #print(ID,d,start,end2, end,mapdate[ID][d],refdate[ID][start+i],start+i)
                            else:
                                i=0    
                                while(refdate[ID][d-i]<=0 and d-i>0):#go back to first ref date with date
                                    i+=1
                                if mapdate[ID][d]-refdate[ID][d-i] > -30:
                                    deltaDate[ID][d] = mapdate[ID][d]-refdate[ID][d-i]
                                if refdate[ID][d-i]<=0:
                                    i=0    
                                    while(refdate[ID][d+i]<=0 and d+i<end2):#go forward to first ref date with date
                                        i+=1 
                                    if mapdate[ID][d]-refdate[ID][d+i] > -30:
                                        deltaDate[ID][d] = mapdate[ID][d]-refdate[ID][d+i]

                            deltaTotal +=deltaDate[ID][d] #sum total delta
                            atotal += 1 #count total dates of alert
                            ptotal += 1 #count total dates with data
                        elif ref[ID][start:(d+1)].count(1) > 0: #if ref has no alert
                            ptotal += 1 #count total dates with data
                            #print(ID,d,"map", map[ID][d],"mapdate",mapdate[ID][d],"no ref change from",start,"to",end2)
                            refno=1
                    elif map[ID][d] == 0:
                        if (ref[ID][start:(d+mincount)].count(2) + ref[ID][start:(d+mincount)].count(3)) > mincount or ref[ID][start:(d+1)].count(1) > 0: # ref has data
                            ptotal += 1 #count total dates with data
            refnocount+=refno
            if atotal>0: # if alert dates
                deltaList[ID].append(deltaTotal/atotal) # append average delta to strata list
    (sampleMean,SE,validcount) = getMean(deltaList)
    return (sampleMean,SE,validcount)


#### Latency

In [24]:

duration = 10 #how long the change must persist in the reference
lookback = 30 #how far to look back in the reference for change
forward = 30 #how far to look forward in the reference for change
noLabels = ["noChange"]

for refAdj in [True]:#,False]:
  for refyes in [["VLsub","VLmin","VLmaj","VGsub","VGmin","VGmaj","OCsub","OCmin","OCmaj"],["VLmin","VLmaj","VGmin","VGmaj","OCmin","OCmaj"]]:#["VLmin","VLmaj"],["VLsub","VLmin","VLmaj"]
    print("Compared against reference classes:",refyes)
    ref = getRefALERTDaily("referenceTimeSeries_goodFirst.csv",refyes,noLabels,IDlist=allIDs)
    (refdate,refuncList) = getRefALERTDateDaily("referenceTimeSeries_goodFirst.csv",refyes,noLabels,refAdj)
    #print(refuncList)
    (refunc,ruSE,valid) = getMean(refuncList)
    print(round(refunc,2),"+/-",round(ruSE,2),"day mean uncertainty in the reference data.\n")
    #printDictionary(refdate,"refdate_"+name+"adj"+str(refAdj)+".csv")
    for sysDIST in ["DIST-S1","DIST-HLS"]:#"DIST-S1",
      #(lag,SE) = estimateDetectionLag(ref,refdate,duration,[4,5,6],excludelist,lookback,forward,sysDIST)
      #print(sysDIST,"Mapped high loss events", round(lag,2),"+/-",round(SE,2),"day mean lag")
      (lag,SE,validIDcount) = estimateDetectionLag(ref,refdate,duration,[1,2,3,4,5,6],excludelist,lookback,forward,sysDIST)
      print(sysDIST,"Initial detection (low and high magnitude)", round(lag,2),"+/-",round(SE,2),"day mean lag.\n",validIDcount,"pixels with comparable dates of change in the ref and map\n")
      #(lag,SE) = estimateDetectionLag(ref,refdate,duration,[6],excludelist,lookback,forward,sysDIST)
      #print(sysDIST,"Mapped confirmed high loss events", round(lag,2),"+/-",round(SE,2),"day mean lag")
      (lag,SE,validIDcount) = estimateDetectionLag(ref,refdate,duration,[3,6],excludelist,lookback,forward,sysDIST)
      print(sysDIST,"Confirmed status (low and high magnitude)", round(lag,2),"+/-",round(SE,2),"day mean lag.\n",validIDcount,"pixels with comparable dates of change in the ref and map\n")

Compared against reference classes: ['VLsub', 'VLmin', 'VLmaj', 'VGsub', 'VGmin', 'VGmaj', 'OCsub', 'OCmin', 'OCmaj']
7.75 +/- 0.54 day mean uncertainty in the reference data.

DIST-S1 Initial detection (low and high magnitude) 32.26 +/- 2.24 day mean lag.
 843 pixels with comparable dates of change in the ref and map

DIST-S1 Confirmed status (low and high magnitude) 52.21 +/- 4.41 day mean lag.
 346 pixels with comparable dates of change in the ref and map

DIST-HLS Initial detection (low and high magnitude) 16.44 +/- 0.99 day mean lag.
 917 pixels with comparable dates of change in the ref and map

DIST-HLS Confirmed status (low and high magnitude) 30.64 +/- 1.53 day mean lag.
 797 pixels with comparable dates of change in the ref and map

Compared against reference classes: ['VLmin', 'VLmaj', 'VGmin', 'VGmaj', 'OCmin', 'OCmaj']
11.32 +/- 1.04 day mean uncertainty in the reference data.

DIST-S1 Initial detection (low and high magnitude) 37.32 +/- 3.21 day mean lag.
 641 pixels with