# DIST-S1 Performance Assessment

Accuracy estimation for equal probability sampling, from Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change" https://doi.org/10.1016/j.rse.2025.114714, Appendix A.1.1

Sample is a stratified sample of 10x10km blocks stratified by land cover change between 2023 and 2024. A ratio estimator is used. As pointed out in the Global Land Cover Map Validation Guidelines (https://doi.org/10.5067/doc/ceoswgcv/lpv/lc.001), the second stage variance typically contributes a negligible amount compared to the first stage variance and can be excluded.

#### Import sample and stratification information

In [None]:
# Import strata and sample unit information
import sys 
import math
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import statistics 
np.set_printoptions(precision=2, suppress=True, floatmode='fixed')

source = "/gpfs/glad3/HLSDIST/Validation/2024_10kmblock/analysis/"  # Replace with the desired path
os.chdir(source)

referenceSource = source+"tables/reference_data/referenceTimeSeriesInterpolated16_16_goodFirst.csv"
#referenceSource = source+"tables/reference_data/referenceTimeSeriesInterpolated16_16_first.csv"
mapsourceHLS = "mapLabels2024"
mapsourceS1 = source+"generate_dist_s1_table/dist_s1_label_tables"

missingS1data = [545539,1105166,533999,540915,541754,588553,938249,114851,1195220,722463,1137281,1106625,518955,649887,1107445,1458943]
baddata = [488254]
HLSsmoke = [1402267,1416380]#smoke haze
S1allnodata = [1436925,1164088,209941,64176,596625]
excludelist = missingS1data+baddata+S1allnodata+HLSsmoke

ANNname = "2024"
sampleDict = {}
sampleFull = {}
blockstrataDict = {}
substrataDict = {}
countSelected = {}
with open(source+"tables/reference_data/selectedpointsLL.csv","r") as sample:
  lines = sample.readlines()[1:]
  for l in lines:
    (ID,Block,subID,blockStratum,substratum,zone,x,y,centxUTM,centyUTM,Long,Lat,MGRS) = l.strip().split(",")
    sampleDict[ID] = [Long,Lat,zone,centxUTM,centyUTM,MGRS]
    sampleFull[ID] = l.strip()
    blockstrataDict[Block] = blockStratum
    substrataDict[ID] = int(substratum)
    if Block in countSelected:
      countSelected[Block] += 1
    else:
      countSelected[Block] = 1

allIDs = sampleDict.keys()

#Strata area
#scounts = allblocks['stratum'].value_counts()
scounts = pd.read_csv(source+"tables/reference_data/blockStrataCounts.csv").set_index('name')
scounts['area'] = scounts.multiply(100)
print(scounts)
allStrata = list(scounts.index)
totalBlockCount = scounts['blockCount'].sum()
print('totalBlockCount',totalBlockCount)
print(allStrata)

               blockCount       area
name                                
waternew             1272     127200
treelosswet          9696     969600
builtnewalert      120496   12049600
fire                 3188     318800
treelossTF          63161    6316100
cropnew             75912    7591200
wetshort             9477     947700
oldcrop_short       65470    6547000
gen                215057   21505700
other              385127   38512700
none              1085543  108554300
totalBlockCount 2034399
['waternew', 'treelosswet', 'builtnewalert', 'fire', 'treelossTF', 'cropnew', 'wetshort', 'oldcrop_short', 'gen', 'other', 'none']


In [2]:
selectedBlocks = pd.read_csv(source+"tables/reference_data/blockstrata_subareas.csv")
selectedBlocks = selectedBlocks.set_index('block')
print(selectedBlocks.head())
allBlocks = list(selectedBlocks.index)

def getBlocksStratum(stratum):
  return list(selectedBlocks[selectedBlocks['stratum']==stratum].index)

def getBlockPixelCount(block):
  return selectedBlocks.loc[int(block)][['sub1','sub2','sub3','sub4']].sum()


        MGRS      stratum  sub1   sub2  sub3    sub4       left   top  \
block                                                                   
30961  33NUF   treelossTF  2263     67  2493  105873   13.80825  4.95   
34405  50NPL  treelosswet  1614   6024  2755  100467  118.01200  5.40   
35975  37NEG   treelossTF  1369    231  2864  106424   39.09600  5.67   
40284  47NRG  treelosswet   762   2115  7183  100894  101.90300  6.30   
41318  36NUN     waternew   873  10119  6819   92899   31.40350  6.48   

          right  bottom  
block                    
30961   13.8985    4.86  
34405  118.1025    5.31  
35975   39.1865    5.58  
40284  101.9935    6.21  
41318   31.4940    6.39  


#### General functions

In [3]:
#get number of days between and two dates; used to convert dates to 1-366 day of year 
def dayDiff(start,end):
  startdate = datetime.datetime.strptime(start,"%Y%m%d")
  enddate = datetime.datetime.strptime(end,"%Y%m%d")
  days = enddate-startdate
  return (days.days+1)

In [4]:
#DIST-S1 generate dictionary of daily STATUS values per ID (note switched path to block instead of MGRS tile)
def getDISTS1status_vI(block):#,skipNodata=False):
    #print(skipNodata)
    mapalert = {}
    IDlist = [ID for ID in allIDs if block in ID]
    allNoData = True
    for ID in IDlist:
        mapalert[ID] = [255 for i in range(0,366)]
        #print(ID,end=',')
        try:
          with open(mapsourceS1+'/'+block+'/'+ID+'.csv','r') as mapfile:
            lines = mapfile.readlines()
            header = lines[0]
            maplist = lines[1:]
            for line in maplist:
                try:
                    (temp,SensingTime,STATUS)= line.strip().split(',')
                    day = dayDiff("20240101",datetime.datetime.strftime(datetime.datetime.strptime(SensingTime,"%Y-%m-%d"),"%Y%m%d"))

                    #if not (skipNodata and VEGANOM!='NA'):
                    mapalert[ID][day] = int(STATUS)
                    if int(STATUS) != 255:
                        allNoData = False
                except:
                #    print(traceback)
                    print(ID,line)
        except:
            with open("missingS1.txt","a") as OUT:
                OUT.write(ID+"\n")
    if allNoData:
        print("NO DATA FOR DIST-S1 BLOCK ",block)
    return mapalert

In [5]:
#generate dictionary of ref no, low, high change and no data for each day of year (note conversion only and only 2024 parameters don't work)
def getRefALERTDaily(filename,high=["VLmaj"],low=["VLmin"],nochange=["OCmin","OCmaj","VGmin","VGmaj","noChange"],IDlist=allIDs,conversiononly=False,only2024=False):
  #if conversiononly or only2024:
  #  with open("reference_conversion.csv","r") as reffile:
  #    reflist = reffile.readlines()[1:]
  #  refconv = {}
  #  refprevyear = {}
  #  natural = {}
  #  for line in reflist:
  #    fields = line.strip().split(",")
  #    (ID,changetype,conversion,naturalproportion,prevyear,overallLabel)=fields[0:6]
  #    refconv[ID]=conversion
  #    refprevyear[ID]=prevyear
  #    natural[ID] = naturalproportion
  refalert = {}
  with open(filename,"r") as mapfile:
    lines = mapfile.readlines()
    header = lines[0]
    reflist = lines[1:]
  for line in reflist:
    fields = line.strip().split(",")
    (ID,overallLabel,Long,Lat,changetype) = fields[0:5]
    refalert[ID] = [0 for i in range(0,366)]
    if ID in IDlist:
      daily = fields[5:]
      #refalert[ID] = [0 for i in range(0,366)]
      try:
        for day in range(0,366):
          found = False
          for l in high:
            if l == daily[day]:
              refalert[ID][day] = 3
          for l in low:
            if l == daily[day]:
              refalert[ID][day] = 2
          for l in nochange:
            if l == daily[day]:
              refalert[ID][day] = 1
          #if conversiononly and (refconv[ID] != "natural" and (refconv[ID] != "human" or (refconv[ID] == "human" and natural[ID] == '0'))):#(refconv[ID] != "human" or (refconv[ID] == "human" and natural[ID] == '0')):#(refconv[ID] == "no" or natural[ID] == '0'):
          #  refalert[ID][day] = 0
          #if only2024 and refprevyear[ID] == "TRUE":
          #  refalert[ID][day] = 0
      except:
        print(ID,day,daily)
  return refalert

In [6]:
#generate dictionary of ref no, low, high change and no data for each day of year (note conversion only and only 2024 parameters don't work)
def getRefchangetype(filename,high=["VLmaj","VGmaj","OCmaj"],low=["VLmin","VGmin","OCmin"],nochange=["noChange"]):
  refchangetype = {}
  with open(filename,"r") as mapfile:
    lines = mapfile.readlines()
    header = lines[0]
    reflist = lines[1:]
  for line in reflist:
    fields = line.strip().split(",")
    (ID,overallLabel,Long,Lat,changetype) = fields[0:5]
    if any(item in high for item in fields) or any(item in low for item in fields):
      refchangetype[ID] = changetype
    else:
      refchangetype[ID] = "noChange"

  return refchangetype

In [7]:
#generate dictionary of ref no, low, high change and no data for each day of year (note conversion only and only 2024 parameters don't work)
def getRefConversion(filename):
  refconversion = {}
  with open(filename,"r") as mapfile:
    lines = mapfile.readlines()
    header = lines[0]
    reflist = lines[1:]
  for line in reflist:
    fields = line.strip().split(",")
    (ID,t,changetype,conversion,prevyear,overallLabel) = fields[0:6]
    refconversion[ID] = conversion
  return refconversion

In [34]:
#build confusion matrix for a block of no, low, and high change
def getMatrixBlock(block,mapin,maplow,maphigh,nodata=[255],refType="VL",convOnly=False,only24 =False,mincount=10,Ndays=30,system="DIST-S1",printMatrix=False):
  strataList=[1,2,3,4]
  strataDict=substrataDict
  mapout = {}
  n = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in strataList}
  ntotal = {s:0 for s in strataList}
  refconversion = getRefConversion("reference_goodFirst.csv")
  if refType == "VL":
    ref = getRefALERTDaily(referenceSource,high=["VLmaj"],low=["VLmin"],nochange=["VGmin","VGmaj","OCmin","OCmaj","noChange","VGsub","OCsub"],conversiononly=convOnly,only2024=only24)
    refchangetype = getRefchangetype(referenceSource,high=["VLmaj"],low=["VLmin"],nochange=["VGmin","VGmaj","OCmin","OCmaj","noChange","VGsub","OCsub"])
  if refType == "VLsub":
    ref = getRefALERTDaily(referenceSource,high=["VLmaj"],low=["VLmin","VLsub"],nochange=["VGmin","VGmaj","OCmin","OCmaj","noChange","VGsub","OCsub"],conversiononly=convOnly,only2024=only24)
    refchangetype = getRefchangetype(referenceSource,high=["VLmaj"],low=["VLmin","VLsub"],nochange=["VGmin","VGmaj","OCmin","OCmaj","noChange","VGsub","OCsub"])
  elif refType =="VG":
    ref = getRefALERTDaily(referenceSource,high=["VGmaj"],low=["VGmin"],nochange=["noChange","VLmin","VLmaj","OCmin","OCmaj","VLsub","OCsub"],conversiononly=convOnly,only2024=only24)  
    refchangetype = getRefchangetype(referenceSource,high=["VGmaj"],low=["VGmin"],nochange=["noChange","VLmin","VLmaj","OCmin","OCmaj","VLsub","OCsub"])
  elif refType =="OC":
    ref = getRefALERTDaily(referenceSource,high=["OCmaj"],low=["OCmin"],nochange=["noChange","VLmin","VLmaj","VGmin","VGmaj","VLsub","VGsub"],conversiononly=convOnly,only2024=only24)  
    refchangetype = getRefchangetype(referenceSource,high=["OCmaj"],low=["OCmin"],nochange=["noChange","VLmin","VLmaj","VGmin","VGmaj","VLsub","VGsub"])
  elif refType =="ALL":
    ref = getRefALERTDaily(referenceSource,high=["VLmaj","VGmaj","OCmaj"],low=["VLmin","VGmin","OCmin"],nochange=["noChange"],conversiononly=convOnly,only2024=only24)
    refchangetype = getRefchangetype(referenceSource,high=["VLmaj","VGmaj","OCmaj"],low=["VLmin","VGmin","OCmin"],nochange=["noChange"])
  elif refType =="ALLsub":
    ref = getRefALERTDaily(referenceSource,high=["VLmaj","VGmaj","OCmaj"],low=["VLmin","VGmin","OCmin","VLsub","VGsub","OCsub"],nochange=["noChange"],conversiononly=convOnly,only2024=only24)
    refchangetype = getRefchangetype(referenceSource,high=["VLmaj","VGmaj","OCmaj"],low=["VLmin","VGmin","OCmin","VLsub","VGsub","OCsub"],nochange=["noChange"])

  IDlist = [ID for ID in list(ref.keys()) if block in ID]

  #confusion matrix
  for ID in IDlist:
    stratum = strataDict[ID]
    #if stratum in selectedStrata:
    try:
        p = [[0,0,0],[0,0,0],[0,0,0]]
        ptotal = 0
        mapout[ID] = [0 for x in range(0,366)]
        for d in range(0,366):
          if mapin[ID][d] in [255] or mapin[ID][d] in nodata:
              mapout[ID][d] = 0
          elif mapin[ID][d] in [0]:
              mapout[ID][d] = 1
          elif mapin[ID][d] in maplow:
              mapout[ID][d] = 2
          elif mapin[ID][d] in maphigh:
              mapout[ID][d] = 3
          else:############added to exclude fron matrix but include in proportion
              mapout[ID][d] = 4
          #if not int(ID) in excludelist:
          if max(ref[ID][0:(d+1)])>0 and mapout[ID][d] != 0:
                start = (d>Ndays)*(d-Ndays)

                #current anomoly/no anomaly; compare against lookback window
                if mapin[ID][d] < 7:
                    if ref[ID][start:(d+mincount)].count(2)+ref[ID][start:(d+mincount)].count(3) > mincount:
                      if ref[ID][start:(d+mincount)].count(3) > 0:
                        refVal=3
                      else:
                        refVal=2
                    elif ref[ID][start:(d+1)].count(1) > 0:
                        refVal=1
                    else:
                        refVal=0
                        
                #finished anomaly; compare year to date
                elif mapin[ID][d] >= 7 and mapin[ID][d]!=255:
                    start = 0
                    if ref[ID][start:(d+mincount)].count(2)+ref[ID][start:(d+mincount)].count(3) > mincount:
                      if ref[ID][start:(d+mincount)].count(3) > 0:
                        refVal=3
                      else:
                        refVal=2
                    elif ref[ID][start:(d+1)].count(1) > 0:
                        refVal=1
                    else:
                        refVal=0
                #nodata
                else:
                  refVal=0
                mapVal = mapout[ID][d]
                if mapVal==4 and refVal>0:
                  ptotal += 1
                elif refVal>0 and mapVal>0:
                    p[refVal-1][mapVal-1] += 1
                    ptotal += 1
        if ptotal>0:
          ntotal[stratum] += 1
          for r in [0,1,2]:
            for m in [0,1,2]:
              n[stratum][r][m] += (p[r][m]/ptotal)

        if ptotal>0 and printMatrix:
          if maphigh == [5,6,8]:
             ver = "provconf"
          elif maphigh == [6,8]:
             ver = "conf"
          elif maphigh == [4,5,6,8]:
            ver = "firstprovconf"
          with open("results/pointmatrix_"+refType+"_"+ver+"_"+system+"_lookback"+str(Ndays)+".csv","a") as OUT:
            OUT.write(','.join([ID,refchangetype[ID],refconversion[ID],str(sampleDict[ID][1]),str(sampleDict[ID][0]),str(sampleDict[ID][5]),blockstrataDict[block],str(substrataDict[ID])]))
            for r in [0,1,2]:
              for m in [0,1,2]:
                OUT.write(','+str(p[r][m]))
            userslow='NA';producerslow='NA';usershigh='NA';producershigh='NA';usersall='NA';producersall='NA'
            if (p[0][1]+p[1][1]+p[2][1])>0:
              userslow = (p[1][1]+p[2][1])/(p[0][1]+p[1][1]+p[2][1])
            if (p[1][0]+p[1][1]+p[1][0]) >0:
              producerslow = (p[1][1]+p[1][2])/(p[1][0]+p[1][1]+p[1][2])
            if (p[0][2]+p[1][2]+p[2][2])>0:
              usershigh = (p[1][2]+p[2][2])/(p[0][2]+p[1][2]+p[2][2])
            if (p[2][0]+p[2][1]+p[2][0])>0:
              producershigh = (p[2][1]+p[2][2])/(p[2][0]+p[2][1]+p[2][2])	
            if (p[0][1]+p[1][1]+p[2][1]+p[0][2]+p[1][2]+p[2][2]) >0: 
              usersall = (p[1][1]+p[2][1]+p[1][2]+p[2][2])/(p[0][1]+p[1][1]+p[2][1]+p[0][2]+p[1][2]+p[2][2])
            if (p[1][0]+p[1][1]+p[1][0] + p[2][0]+p[2][1]+p[2][0])>0:
              producersall = (p[1][1]+p[1][2]+p[2][1]+p[2][2])/(p[1][0]+p[1][1]+p[1][2] + p[2][0]+p[2][1]+p[2][2])	
            overallhigh	= (p[0][0]+p[0][1]+p[1][0]+p[1][1]+p[2][2])/ptotal
            overallall	= (p[0][0]+p[1][1]+p[1][2]+p[2][1]+p[2][2])/ptotal
            OUT.write(','+','.join([str(userslow),str(producerslow),str(usershigh),str(producershigh),str(usersall),str(producersall),str(overallhigh),str(overallall)]))
            OUT.write("\n")
 
    except:
        print(ID,"missing",stratum,d,p,ptotal,ntotal[stratum])
        print(userslow,producerslow,usershigh,producershigh,usersall,producersall,overallhigh,overallall)
        #print(mapin[ID])
        #print(ref[ID])
     
  return (n,ntotal)

In [9]:
#convert matrix from three classes (no, low, high) to two classes (no, yes) for different accuracy metrics
def convMat(n,selectedStrata=[1,2,3,4]):
  nlowuser = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nlowprod = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nhiuser = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nhiprod = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nalluser = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nallprod = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nlowoverall = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nhioverall = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nalloverall = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  NO = 0
  LOW = 1
  HI = 2
  for s in selectedStrata:
    stringS = str(s)

    #[stratum][ref][map]
    nlowprod[s][2][2] = n[s][LOW][LOW] + n[s][LOW][HI]
    nlowprod[s][2][1] = n[s][LOW][NO]
    nlowuser[s][2][2] = n[s][LOW][LOW] + n[s][HI][LOW]
    nlowuser[s][1][2] = n[s][NO][LOW]

    nhiprod[s][2][2] = n[s][HI][HI] + n[s][HI][LOW]
    nhiprod[s][2][1] = n[s][HI][NO]
    nhiuser[s][2][2] = n[s][HI][HI] + n[s][LOW][HI]
    nhiuser[s][1][2] = n[s][NO][HI]

    nallprod[s][2][2] = n[s][HI][HI] + n[s][HI][LOW] + n[s][LOW][HI] + n[s][LOW][LOW]
    nallprod[s][2][1] = n[s][HI][NO] + n[s][LOW][NO]
    nalluser[s][2][2] = n[s][HI][HI] + n[s][LOW][HI] + n[s][HI][LOW] + n[s][LOW][LOW]
    nalluser[s][1][2] = n[s][NO][HI] + n[s][NO][LOW]

    nhioverall[s][2][2] = n[s][HI][HI]
    nhioverall[s][1][1] = n[s][NO][NO] + n[s][LOW][NO] + n[s][NO][LOW] + n[s][LOW][LOW]
    nhioverall[s][1][2] = n[s][NO][HI] + n[s][LOW][HI]
    nhioverall[s][2][1] = n[s][HI][NO] + n[s][HI][LOW]

    nalloverall[s][2][2] = n[s][HI][HI] + n[s][HI][LOW] + n[s][LOW][HI] + n[s][LOW][LOW]
    nalloverall[s][1][1] = n[s][NO][NO]
    nalloverall[s][1][2] = n[s][NO][HI] + n[s][NO][LOW]
    nalloverall[s][2][1] = n[s][HI][NO] + n[s][LOW][NO]
  return (nlowuser, nlowprod, nhiuser, nhiprod,nalluser, nallprod,nhioverall,nalloverall)

In [10]:
#compute users accuracy for a block and return stat and SE
def usersAccuracyBlock(n, ntotal, block):
  N = selectedBlocks.loc[int(block),['sub1','sub2','sub3','sub4']] ## create tables of substrata areas

  if sum(ntotal.values())<10:
    return [None,None,None,None]
  #Accuracy
  y = 0
  usersx = 0
  for s in [1,2,3,4]:
    if ntotal[s]>0:
      y += (n[s][2][2]/ntotal[s])*N.iloc[s-1]
      usersx += ((n[s][1][2]+n[s][2][2])/ntotal[s])*N.iloc[s-1]
  if usersx > 0:
    users = (y/usersx)
    usersxout = usersx
  else:
    users = "NA"
    usersSE = "NA"
    usersxout = 0
  yout = y

  UAsub1 = 0
  UAsub2 = 0
  if users != "NA":
    for s in [1,2,3,4]:
      if (n[s][1][2]+n[s][2][2]) > 0 and ntotal[s]>1:
        yhmean = n[s][2][2]/ntotal[s]
        yhsampvar = ((n[s][2][2])*((1-yhmean)**2) + (n[s][1][1] + n[s][1][2] + n[s][2][1])*((0-yhmean)**2))/(ntotal[s]-1)
        xhmean = (n[s][1][2]+n[s][2][2])/ntotal[s]
        xhsampvar = ((n[s][1][2]+n[s][2][2])*((1-xhmean)**2) + (n[s][1][1] + n[s][2][1])*((0-xhmean)**2))/(ntotal[s]-1)
        xyhsampvar = (n[s][1][1] * (0-yhmean) * (0-xhmean) + n[s][1][2] * (0-yhmean) * (1-xhmean) + n[s][2][1] * (0-yhmean) * (0-xhmean) + n[s][2][2] * (1-yhmean) * (1-xhmean))/(ntotal[s] - 1)
        UAsub1 += N.iloc[s-1]*xhmean
        UAsub2 += N.iloc[s-1]**2 * (1 - ntotal[s]/N.iloc[s-1]) * (yhsampvar + (users**2)*xhsampvar - 2*users*xyhsampvar)/ntotal[s]
  
  if users != "NA":
    if UAsub1>0 and UAsub2>0:
        usersSE = math.sqrt(1/(UAsub1**2) * UAsub2)
    else:
        usersSE = None
    users = users
  else:
    users = None
    usersSE = None
  return [users,usersSE,yout,usersxout]

In [11]:
#compute producers accuracy for a block and return stat and SE
def producersAccuracyBlock(n, ntotal, block):
  N = selectedBlocks.loc[int(block),['sub1','sub2','sub3','sub4']] ## create tables of substrata areas

  if sum(ntotal.values())<10:
    return [None,None,None,None]
  #Accuracy
  y = 0
  producersx = 0
  for s in [1,2,3,4]:
    if ntotal[s]>0:
      y += (n[s][2][2]/ntotal[s])*N.iloc[s-1]
      producersx += ((n[s][2][1]+n[s][2][2])/ntotal[s])*N.iloc[s-1]

  if producersx > 0:
    producers = (y/producersx)
    prodxout = producersx
  else:
    producers = "NA"
    producersSE = "NA"
    prodxout = 0
  yout = y

  PAsub1 = 0
  PAsub2 = 0
  for s in [1,2,3,4]:
    if producers != "NA":
        if (n[s][2][1]+n[s][2][2]) > 0 and ntotal[s]>1:
            yhmean = n[s][2][2]/ntotal[s]
            yhsampvar = ((n[s][2][2])*((1-yhmean)**2) + (n[s][1][1] + n[s][1][2] + n[s][2][1])*((0-yhmean)**2))/(ntotal[s]-1)
            xphmean = (n[s][2][1]+n[s][2][2])/ntotal[s]
            xphsampvar = ((n[s][2][1]+n[s][2][2])*((1-xphmean)**2) + (n[s][1][1] + n[s][1][2])*((0-xphmean)**2))/(ntotal[s]-1)
            xyphsampvar = (n[s][1][1] * (0-yhmean) * (0-xphmean) + n[s][1][2] * (0-yhmean) * (0-xphmean) + n[s][2][1] * (0-yhmean) * (1-xphmean) + n[s][2][2] * (1-yhmean) * (1-xphmean))/(ntotal[s] - 1)
            PAsub1 += N.iloc[s-1]*xphmean
            PAsub2 += N.iloc[s-1]**2 * (1 - ntotal[s]/N.iloc[s-1]) * (yhsampvar + (producers**2)*xphsampvar - 2*producers*xyphsampvar)/ntotal[s]
  
  if producers != "NA":
    if PAsub1 >0 and PAsub2>0:
      producersSE = math.sqrt(1/(PAsub1**2) * PAsub2)
    else:
      producersSE = None
    producers = producers
  else:
    producers = None
    producersSE = None

  return [producers,producersSE,yout,prodxout]

In [12]:
#compute users accuracy for a block and return stat and SE
def overallAccuracyBlock(n, ntotal, block):
  N = selectedBlocks.loc[int(block),['sub1','sub2','sub3','sub4']] ## create tables of substrata areas

  if sum(ntotal.values())<10:
    return [None,None,None,None]
  #Accuracy
  y = 0
  overallx = 0
  for s in [1,2,3,4]:
    if ntotal[s]>0:
      y += ((n[s][2][2]+n[s][1][1])/ntotal[s])*N.iloc[s-1]
      overallx += ((n[s][1][1]+n[s][1][2]+n[s][2][1]+n[s][2][2])/ntotal[s])*N.iloc[s-1]
  if overallx > 0:
    overall = (y/overallx)
    overallxout = overallx
  else:
    overall = "NA"
    overallSE = "NA"
    overallxout = 0
  yout = y

  OAsub1 = 0
  OAsub2 = 0
  if overall != "NA":
    for s in [1,2,3,4]:
      if (n[s][1][1]+n[s][1][2]+n[s][2][1]+n[s][2][2]) > 0 and ntotal[s]>1:
        yhmean = (n[s][2][2]+n[s][1][1])/ntotal[s]
        yhsampvar = (((n[s][2][2]+n[s][1][1]))*((1-yhmean)**2) + (n[s][1][1] + n[s][1][2] + n[s][2][1])*((0-yhmean)**2))/(ntotal[s]-1)
        xhmean = (n[s][1][1]+n[s][1][2]+n[s][2][1]+n[s][2][2])/ntotal[s]
        xhsampvar = ((n[s][1][1]+n[s][1][2]+n[s][2][1]+n[s][2][2])*((1-xhmean)**2) + (n[s][1][1] + n[s][2][1])*((0-xhmean)**2))/(ntotal[s]-1)
        xyhsampvar = (n[s][1][1] * (0-yhmean) * (0-xhmean) + n[s][1][2] * (0-yhmean) * (1-xhmean) + n[s][2][1] * (0-yhmean) * (0-xhmean) + n[s][2][2] * (1-yhmean) * (1-xhmean))/(ntotal[s] - 1)
        OAsub1 += N.iloc[s-1]*xhmean
        OAsub2 += N.iloc[s-1]**2 * (1 - ntotal[s]/N.iloc[s-1]) * (yhsampvar + (overall**2)*xhsampvar - 2*overall*xyhsampvar)/ntotal[s]
  
  if overall != "NA":
    if OAsub1>0 and OAsub2>0:
        overallSE = math.sqrt(1/(OAsub1**2) * OAsub2)
    else:
        overallSE = None
    overall = overall
  else:
    overall = None
    overallSE = None
  return [overall,overallSE,yout,overallxout]

In [13]:
def getAccuracies(n,ntotal, name,block, measure="both"):
  (nlowuser, nlowprod, nhiuser, nhiprod,nalluser, nallprod) = convMat(n)
  loU="NA"
  loUSE="NA"
  loP="NA"
  loPSE="NA"
  hiU="NA"
  hiUSE="NA"
  hiP="NA"
  hiPSE="NA"
  aU="NA"
  aUSE="NA"
  aP="NA"
  aPSE="NA"
  if measure =="both" or measure == "users":
    (loU,loUSE,y,x) = usersAccuracyBlock(nlowuser, ntotal, block)
    (hiU,hiUSE,y,x) = usersAccuracyBlock(nhiuser, ntotal, block)
    (aU,aUSE,y,x) = usersAccuracyBlock(nalluser, ntotal, block)
  if measure =="both" or measure =="producers":
    (loP,loPSE,y,x) = producersAccuracyBlock(nlowprod, ntotal, block)
    (hiP,hiPSE,y,x) = producersAccuracyBlock(nhiprod, ntotal, block)
    (aP,aPSE,y,x) = producersAccuracyBlock(nallprod, ntotal, block)
  return [[name+"_low",loU,loUSE,loP,loPSE],[name+"_high",hiU,hiUSE,hiP,hiPSE],[name+"_all",aU,aUSE,aP,aPSE]]

## Accuracy 

#### Calculate accuracy single block

Accuracy for DIST-S1 for detecting vegetation loss for one block (Users SE Producers SE)

In [14]:
block = str(34405)
print("Accuracies for",block,"from stratum",blockstrataDict[block])
map=getDISTS1status_vI(block)
accuracies = []
(n,ntotal)=getMatrixBlock(block,map,[2],[5],nodata=[1,3,7,4,6,8],refType="VL")
accuracies = accuracies + getAccuracies(n,ntotal, "prov",block,measure="users")

(n,ntotal)=getMatrixBlock(block,map,[2,3,7],[5,6,8],nodata=[1,4],refType="VL")
accuracies = accuracies + getAccuracies(n,ntotal, "provconf",block)

(n,ntotal)=getMatrixBlock(block,map,[3,7],[6,8],nodata=[1,2,4,5],refType="VL")
accuracies = accuracies + getAccuracies(n,ntotal, "conf",block)

#(n,ntotal)=getMatrixBlock(block,map,[2,3,7],[5,6,8],nodata=[1,4],convOnly=True,only24=True)
#accuracies = accuracies + getAccuracies(n,ntotal, "conversion",block,measure="producers")

accuracies = pd.DataFrame(accuracies,columns=["name","users","usersSE","producers","producersSE"])
accuracies = accuracies[["users","usersSE","producers","producersSE"]].set_index(accuracies.name)
print(accuracies)

Accuracies for 34405 from stratum treelosswet


ValueError: too many values to unpack (expected 6)

Accuracy for DIST-S1 for detecting all change for one block (Users SE Producers SE)

In [None]:
block = str(34405)
map=getDISTS1status_vI(block)
accuracies = []
(n,ntotal)=getMatrixBlock(block,map,[2],[5],nodata=[1,3,7,4,6,8],refType="ALL")
accuracies = accuracies + getAccuracies(n,ntotal, "prov",block,measure="users")

(n,ntotal)=getMatrixBlock(block,map,[2,3,7],[5,6,8],nodata=[1,4],refType="ALL")
accuracies = accuracies + getAccuracies(n,ntotal, "provconf",block)

(n,ntotal)=getMatrixBlock(block,map,[3,7],[6,8],nodata=[1,2,4,5],refType="ALL")
accuracies = accuracies + getAccuracies(n,ntotal, "conf",block)

#(n,ntotal)=getMatrixBlock(block,map,[2,3,7],[5,6,8],nodata=[1,4],convOnly=True,only24=True)
#accuracies = accuracies + getAccuracies(n,ntotal, "conversion",block,measure="producers")

accuracies = pd.DataFrame(accuracies,columns=["name","users","usersSE","producers","producersSE"])
accuracies = accuracies[["users","usersSE","producers","producersSE"]].set_index(accuracies.name)
print(accuracies)

#### Functions to calculate accuracy for all blocks

In [14]:
def getBlockAccuracy(block,measure,changeintensity,maplowclasses,maphighclasses,nodataclasses,refChangeType,system="DIST-S1"):
  """Calculates the accuracy for the given block
  Args:
    block: (str) block ID
    measure: "users" or "producers"
    changeintensity: "low", "high", or "all" (defines what intensity threshold is evaluated for statistic)
    maplowclasses: [int] ( the status classes that will be marked as low intensity, e.g. for confirmed only [3,7])
    maphighclasses: [int] ( the status classes that will be marked as high intensity, e.g. for confirmed only [6,8])
    nodataclasses: [int] ( the status classes that will be marked as no data/excluded, e.g. for confirmed only [1,2,4,5,255] in order for the first and provisional to neither be counted as right or wrong)
    refChangeType: "VL", "VG", "OC", "ALL" (sets what type of reference change the product is evaluated against)
  """
  #try:
  if system == "DIST-S1":
    map=getDISTS1status_vI(block)
  elif system == "DIST-HLS":
    map=getDISTALERTStatus_vI(block)
  elif system == "DIST-HLS-GEN":
    map=getDISTALERTStatus_vI_GEN(block)
  else:
    print("system must be DIST-S1 or DIST-HLS")
  #except:
  #  print(block,"missing map data")
  (n,ntotal)=getMatrixBlock(block,map,maplowclasses,maphighclasses,nodataclasses,refType=refChangeType,system=system)
  
  (nlowuser, nlowprod, nhiuser, nhiprod,nalluser, nallprod,nhioverall,nalloverall) = convMat(n)
  #print(block,ntotal)
  if measure == "users":
    if changeintensity == "low":
      (stat, SE, yout, xout) = usersAccuracyBlock(nlowuser, ntotal, block)  #returns (users,usersSE,yout,usersxout)
    elif changeintensity == "high":
      (stat, SE, yout, xout) = usersAccuracyBlock(nhiuser, ntotal, block)  #returns (users,usersSE,yout,usersxout)
    elif changeintensity == "all":
      (stat, SE, yout, xout) = usersAccuracyBlock(nalluser, ntotal, block)  #returns (users,usersSE,yout,usersxout)
  elif measure =="producers":
    if changeintensity == "low":
      (stat, SE, yout, xout) = producersAccuracyBlock(nlowprod, ntotal, block)  #returns (producers,producersSE,yout,prodxout)
    elif changeintensity == "high":
      (stat, SE, yout, xout) = producersAccuracyBlock(nhiprod, ntotal, block)  #returns (producers,producersSE,yout,prodxoutE)
    elif changeintensity == "all":
      (stat, SE, yout, xout) = producersAccuracyBlock(nallprod, ntotal, block)  #returns (producers,producersSE,yout,prodxout)
  elif measure =="overall":
    if changeintensity == "high":
      (stat, SE, yout, xout) = overallAccuracyBlock(nhioverall, ntotal, block)  #returns (producers,producersSE,yout,prodxout)
    elif changeintensity == "all":
      (stat, SE, yout, xout) = overallAccuracyBlock(nalloverall, ntotal, block)  #returns (producers,producersSE,yout,prodxout)
  if sum(ntotal.values())>=(countSelected[block]*0.8) or sum(ntotal.values())>=10:
    if maphighclasses == [5,6,8]:
       ver = "provconf"
    elif maphighclasses == [6,8]:
       ver = "conf"
    elif maphighclasses == [4,5,6,8]:
      ver = "firstprovconf"
    with open("results/blockaccuracy_"+refChangeType+"_"+changeintensity+"_"+ver+"_"+system+".csv","a") as OUT:
      OUT.write(','.join([block, blockstrataDict[block],str(measure),refChangeType,str(changeintensity),str(sum(ntotal.values())),str(stat)])+"\n")#,str(SE)
  #else:
  #  print(block,sum(ntotal.values()),"final out of",countSelected[block],blockstrataDict[block])
  return (stat, SE, yout, xout)

Per accuracy statistic: (1) estimate mean and standard error per block stratum, (2) estimate the global accuracies and standard errors.

In [15]:
def strataRatioAccuracy(stratum,measure,changeintensity,maplowclasses,maphighclasses,nodataclasses,refChangeType,printdf=False,system="DIST-S1"):
  blocks = getBlocksStratum(stratum)
  statBlocks = {}
  SEBlocks = {}
  #nh = len(blocks)
  Nh = scounts.loc[stratum]['blockCount']
  y = {}
  x={}
  Xhat = 0
  for block in blocks:
    if not block in excludelist:#488254:
      block = str(block)
      try:
        (statBlocks[block],SEBlocks[block],y[block],x[block]) = getBlockAccuracy(str(block),measure,changeintensity,maplowclasses,maphighclasses,nodataclasses,refChangeType,system=system)
      except Exception as e:
        print(block,"failed",end="; ")
        print(e)
  df = pd.DataFrame.from_dict(statBlocks,orient='index')
  df.columns = ['measure']
  df['yu']=y
  df['xu']=x
  nh = df['xu'].count()
  if nh > 1:
    measure = df['yu'].sum()/df['xu'].sum() #* 100 # statistics.mean(df['measure'])
    yhat = (df['yu'].sum()/nh)
    xhat = (df['xu'].sum()/nh)
    if printdf:
      print(stratum)
      print(df)
    yvar = ((df['yu']**2).sum() - nh*(yhat**2))/(nh - 1)
    xvar = ((df['xu']**2).sum() - nh*(xhat**2))/(nh - 1)
    xyvar = ((df['yu']*df['xu']).sum() - nh*yhat*xhat)/(nh - 1)
    #variance = (1/(Nh*xhat)**2) * (Nh**2 *(1-nh/Nh) * (yvar + (measure**2) * xvar - 2*measure*xyvar) ) / nh
    subvariance = (Nh**2 *(1-nh/Nh) * (yvar + (measure**2) * xvar - 2*measure*xyvar) ) / nh
    SE = math.sqrt((1/(Nh*xhat))**2 * subvariance) #* 100
    return(measure,SE,subvariance,nh,yhat,xhat)#,yvar,xvar,xyvar) #SE and variance does not take into account within block variance
  else:
    raise Exception("less than 2 blocks")

In [16]:
#print(strataRatioAccuracy("oldcrop_short","users",changeintensity="high",maplowclasses=[3,7],maphighclasses=[6,8],nodataclasses=[255,1,2,4,5],refChangeType="VL",printdf=True))
#print(strataRatioAccuracy("oldcrop_short","users",changeintensity="high",maplowclasses=[2,3,7],maphighclasses=[5,6,8],nodataclasses=[255,1,4],refChangeType="VL",printdf=True))
#print(strataRatioAccuracy("oldcrop_short","producers",changeintensity="high",maplowclasses=[2,3,7],maphighclasses=[5,6,8],nodataclasses=[255,1,4],refChangeType="VL",printdf=True))

In [38]:
def globalRatioAccuracy(measure,changeintensity,maplowclasses,maphighclasses,nodataclasses,refChangeType="ALL",system="DIST-S1"):
  statStrata = {}
  SEstrata = {}
  Vstrata = {}
  nh = {}
  Nh = {}
  strataList = allStrata
  successfulStrata = []
  failedStrata = []
  yhat = {}
  xhat = {}
  for s in strataList:
    #print(s)
    try:
      (statStrata[s],SEstrata[s],Vstrata[s],nh[s],yhat[s],xhat[s]) = strataRatioAccuracy(s,measure,changeintensity,maplowclasses,maphighclasses,nodataclasses,refChangeType,system=system)
      Nh[s] = scounts.loc[s]['blockCount']
      successfulStrata.append(s)
    except:
      failedStrata.append(s)
  df = pd.DataFrame.from_dict(statStrata,orient='index')
  df.columns=['measure']
  df['SE'] = SEstrata
  df['SVar'] = Vstrata
  df['nh'] = nh
  df['Nh'] = Nh
  df['yhat'] = yhat
  df['xhat'] = xhat
  globalMeasure = (df['yhat']*df['Nh']).sum()/(df['xhat']*df['Nh']).sum()
  totalBlocksOfStrata = df['Nh'].sum()
  bigXhat = (df['xhat']*df['Nh']).sum()
  globalVar = df['SVar'].sum() /(bigXhat**2)
  globalSE = math.sqrt(globalVar)
  #print("\n","successful",successfulStrata)
  #print("failed",failedStrata)
  #print(df[['measure','SE','nh','Nh','yhat','xhat']])
  return(globalMeasure,globalSE,globalVar,df)

In [18]:
def checkPassingRequirement(file):
  with open(file,'r') as DAT:
    lines = DAT.readlines()
    countgte80 = 0
    countValid = 0
    for l in lines[1:]:
      (block,blockstratum,measureType,refChangeType,changeintensity,NpixelsEvaluated,stat) = l.strip().split(',')
      if measureType == "overall" and stat !='None':
        countValid+=1
        if float(stat)>=0.8:
          countgte80 +=1
    print(file,f"{(countgte80/countValid*100):.2f}")
    if countgte80/countValid >=0.8:
      print("PASSED requirement")

In [50]:
def writeAllAccuarcies(mapclasstype ="provconf",changeintensity="high",systemDIST="DIST-S1"):
  accdict = {}
  if mapclasstype=="provconf":
    maphigh = [5,6,8]
    maplow=[2,3,7]
    nodata=[255,1,4]
  elif mapclasstype=="conf":
    maphigh = [6,8]
    maplow=[3,7]
    nodata=[255,1,2,4,5]
  elif mapclasstype=="firstprovconf":
    maphigh = [4,5,6,8]
    maplow=[1,2,3,7]
    nodata=[255]
  for reftype in ["VLsub","ALLsub","VL","ALL"]:
    outfile = "results/blockaccuracy_"+reftype+"_"+changeintensity+"_"+mapclasstype+"_"+systemDIST+".csv"
    if os.path.exists(outfile):
      os.remove(outfile)
    with open(outfile,"a") as OUT:
      OUT.write("block,blockstratum,measureType,refChangeType,changeintensity,NpixelsEvaluated,stat\n")
    for measure in ["users","producers","overall"]:
      (stat,SE,var,df) = globalRatioAccuracy(measure,changeintensity=changeintensity,maplowclasses=maplow,maphighclasses=maphigh,nodataclasses=nodata,refChangeType=reftype,system=systemDIST) 
      print("Global",mapclasstype,reftype,measure,f"{(stat*100):.2f}",'±',f"{(SE*100):.2f}")
      accdict[mapclasstype+"_"+reftype]=[measure,stat,SE]
    checkPassingRequirement(outfile)
  return accdict    


#### Create point matrix files

In [None]:
#Print point matrix
def createpointmatrix(lookback):
  for sysDIST in ["DIST-S1"]:#,"DIST-HLS"]:
    for mapclasstype in ["firstprovconf","conf"]:#,"provconf"]:
      if mapclasstype=="provconf":
        maphigh = [5,6,8]
        maplow=[2,3,7]
        nodata=[255,1,4]
      elif mapclasstype=="conf":
        maphigh = [6,8]
        maplow=[3,7]
        nodata=[255,1,2,4,5]
      elif mapclasstype=="firstprovconf":
        maphigh = [4,5,6,8]
        maplow=[1,2,3,7]
        nodata=[255]
      for type in ["VLsub","ALLsub","VL","ALL"]:
        #if os.path.exists("results/pointmatrix_"+type+"_"+mapclasstype+"_"+sysDIST+".csv"):
        #  os.remove("results/pointmatrix_"+type+"_"+mapclasstype+"_"+sysDIST+".csv")
        with open("results/pointmatrix_"+type+"_"+mapclasstype+"_"+sysDIST+"_lookback"+str(lookback)+".csv",'w') as OUT:
          OUT.write("ID,changetype,conversion,latitude,longitude,MGRS,blockstratum,substratum,refno_mapno,refno_maplow,refno_maphigh,reflow_mapno,reflow_maplow,reflow_maphigh,refhigh_mapno,refhigh_maplow,refhigh_maphigh,userslow,producerslow,usershigh,producershigh,usersall,producersall,overallhigh,overallall\n")
        for block in allBlocks:
          if not block in excludelist:
            block = str(block)
            try:
              if sysDIST == "DIST-S1":
                map=getDISTS1status_vI(block)
              elif sysDIST == "DIST-HLS":
                map=getDISTALERTStatus_vI(block)
              (n,ntotal)=getMatrixBlock(block,map,maplow=maplow, maphigh=maphigh,nodata=nodata,refType=type,system=sysDIST,printMatrix=True,Ndays=lookback)
            except Exception as e:
              print(sysDIST,block,type, "failed", e)

createpointmatrix(30)
#createpointmatrix(90)


In [58]:
from IPython.display import display, HTML
def findAvPerfomancePerChangeType(file):
  df = pd.read_csv(file)
  df.loc[list("Built-up growth" in s for s in df['changetype']),'changetype'] = "Built-up expansion"
  #print(df)
  df.loc[list("rop expansion" in s for s in df['changetype']),'changetype'] = "Agriculture expansion"
  df.loc[list("Crop cycle change" in s for s in df['changetype']),'changetype'] = "Crop cycle change"
  df.loc[list("Clear cut" in s for s in df['changetype']),'changetype'] = "Clear cut"
  df.loc[list("noChange" in s for s in df['changetype']),'changetype'] = "No change"
  df.loc[list("No Change" in s for s in df['changetype']),'changetype'] = "No change"
  df.loc[list("Natural greening" in s for s in df['changetype']),'changetype'] = "Natural greening"
  df.loc[list("ther" in s for s in df['changetype']),'changetype'] = "Other change"
  df.loc[list("hifting" in s for s in df['changetype']),'changetype'] = "Shifting cultivation"
  df.loc[list("elective logging" in s for s in df['changetype']),'changetype'] = "Selective logging"
  df.loc[list("Fire" in row['changetype'] and row['conversion'] != "natural" for index,row in df[['changetype','conversion']].iterrows()),'changetype'] = "Fire (non-conversion)"

  ByChange= df.groupby(by = ['changetype'])
  means = pd.concat([ByChange.changetype.count().rename('count'),ByChange.usershigh.count(),ByChange.usershigh.mean(),ByChange.usersall.count(),ByChange.usersall.mean(),ByChange.producershigh.count(),ByChange.producershigh.mean(),ByChange.producersall.count(),ByChange.producersall.mean(),ByChange.overallhigh.mean(),ByChange.overallall.mean()],axis=1)
  display(HTML(means.to_html()))

print("Users for change classes means timing is different between dections and S1 is detecting\nchange when we don't see it in the reference" \
"\nThese tables are more helpful for evaluating omission in the various categories")
findAvPerfomancePerChangeType("results/pointmatrix_VLsub_firstprovconf_DIST-S1_lookback30.csv")
findAvPerfomancePerChangeType("results/pointmatrix_ALLsub_conf_DIST-S1_lookback30.csv")
findAvPerfomancePerChangeType("results/pointmatrix_ALLsub_firstprovconf_DIST-S1_lookback30.csv")


Users for change classes means timing is different between dections and S1 is detecting
change when we don't see it in the reference
These tables are more helpful for evaluating omission in the various categories


Unnamed: 0_level_0,count,usershigh,usershigh,usersall,usersall,producershigh,producershigh,producersall,producersall,overallhigh,overallall
changetype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Agriculture expansion,5,3,0.333333,4,0.375,0,,5,0.038715,0.959036,0.526364
Built-up expansion,32,6,0.833333,27,0.704068,15,0.213064,32,0.169732,0.804479,0.44244
Clear cut,85,54,0.891068,83,0.847895,44,0.703815,77,0.603217,0.703777,0.68405
Crop cycle change,204,76,0.571334,192,0.415697,85,0.46873,190,0.421823,0.806421,0.67236
Drought,1,0,,1,0.5,0,,1,0.5,1.0,0.652174
Fire,71,19,0.958509,66,0.735493,37,0.362668,61,0.357452,0.688066,0.555857
Fire (non-conversion),205,64,0.864983,184,0.652365,55,0.450213,193,0.390114,0.853332,0.694248
Lost water,2,0,,1,0.769231,0,,2,0.5,1.0,0.727116
Mining,2,1,0.714286,2,0.815789,0,,2,0.616667,0.681818,0.457576
Natural browning,49,5,0.466133,38,0.401711,2,0.015625,47,0.181987,0.94878,0.680647


Unnamed: 0_level_0,count,usershigh,usershigh,usersall,usersall,producershigh,producershigh,producersall,producersall,overallhigh,overallall
changetype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Agriculture expansion,6,1,1.0,1,1.0,2,0.0,5,0.0,0.957792,0.574328
Built-up expansion,33,4,0.976852,4,0.976852,16,0.0525,32,0.059023,0.787148,0.38453
Clear cut,86,51,0.923359,56,0.9282,41,0.570538,76,0.466336,0.716662,0.654373
Crop cycle change,229,52,0.696583,86,0.694779,104,0.257214,217,0.232525,0.802429,0.681399
Drought,1,0,,0,,0,,1,0.0,1.0,0.733333
Fire,71,17,0.984403,29,0.873332,37,0.212162,61,0.224636,0.705604,0.526307
Fire (non-conversion),207,45,0.889131,67,0.888018,52,0.177834,181,0.162618,0.8628,0.687272
Lost water,7,3,0.809524,5,0.8,2,0.26087,4,0.3575,0.862696,0.835282
Mining,2,1,1.0,1,1.0,0,,1,0.0,0.625,0.5
Natural browning,49,3,0.410778,6,0.493481,2,0.0,47,0.066001,0.945946,0.709026


Unnamed: 0_level_0,count,usershigh,usershigh,usersall,usersall,producershigh,producershigh,producersall,producersall,overallhigh,overallall
changetype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Agriculture expansion,6,4,0.5,5,0.531294,3,0.5,6,0.23021,0.911647,0.514942
Built-up expansion,33,7,0.987685,28,0.849314,16,0.202143,32,0.182284,0.780437,0.401198
Clear cut,86,56,0.894958,84,0.866373,44,0.703815,79,0.611418,0.694254,0.683104
Crop cycle change,229,83,0.612361,216,0.490158,107,0.441587,219,0.39546,0.791334,0.647408
Drought,1,0,,1,0.5,0,,1,0.5,1.0,0.652174
Fire,71,19,0.958509,66,0.735493,37,0.362668,61,0.357452,0.688066,0.55304
Fire (non-conversion),207,65,0.871456,186,0.661728,58,0.456236,195,0.375946,0.851932,0.682006
Lost water,7,4,0.482143,6,0.624512,3,0.792308,5,0.6593,0.837316,0.792085
Mining,2,1,0.714286,2,0.815789,0,,2,0.616667,0.681818,0.457576
Natural browning,49,5,0.553633,38,0.408636,2,0.015625,47,0.185595,0.94878,0.68322


#### Calculate accuracies for all blocks

Accuracies including first, provisional, and confirmed map classes for high magnitude change

In [40]:
writeAllAccuarcies(mapclasstype ="firstprovconf",changeintensity="high")


Global firstprovconf VL users 39.87 ± 9.24
Global firstprovconf VL producers 61.15 ± 5.16
Global firstprovconf VL overall 96.18 ± 1.06
results/blockaccuracy_VL_high_firstprovconf_DIST-S1.csv 88.52
PASSED requirement
Global firstprovconf ALL users 46.88 ± 8.68
Global firstprovconf ALL producers 58.34 ± 3.62
Global firstprovconf ALL overall 96.06 ± 1.07
results/blockaccuracy_ALL_high_firstprovconf_DIST-S1.csv 88.52
PASSED requirement
Global firstprovconf VLsub users 44.88 ± 8.67
Global firstprovconf VLsub producers 60.91 ± 5.09
Global firstprovconf VLsub overall 96.21 ± 1.05
results/blockaccuracy_VLsub_high_firstprovconf_DIST-S1.csv 88.52
PASSED requirement
Global firstprovconf ALLsub users 55.07 ± 7.27
Global firstprovconf ALLsub producers 58.82 ± 3.77
Global firstprovconf ALLsub overall 96.07 ± 1.07
results/blockaccuracy_ALLsub_high_firstprovconf_DIST-S1.csv 88.52
PASSED requirement


{'firstprovconf_VL': ['overall', 0.9617608216363167, 0.010594938779904441],
 'firstprovconf_ALL': ['overall', 0.9605800320275919, 0.010664247671648952],
 'firstprovconf_VLsub': ['overall', 0.9621116474817334, 0.010548794359558139],
 'firstprovconf_ALLsub': ['overall', 0.9607212246299203, 0.010678295875419441]}

In [51]:
writeAllAccuarcies(mapclasstype ="firstprovconf",changeintensity="all")


Global firstprovconf VLsub users 29.96 ± 6.79
Global firstprovconf VLsub producers 53.12 ± 7.10
Global firstprovconf VLsub overall 89.11 ± 1.62
results/blockaccuracy_VLsub_all_firstprovconf_DIST-S1.csv 72.13
Global firstprovconf ALLsub users 40.04 ± 6.76
Global firstprovconf ALLsub producers 30.68 ± 3.34
Global firstprovconf ALLsub overall 82.98 ± 2.62
results/blockaccuracy_ALLsub_all_firstprovconf_DIST-S1.csv 67.21
Global firstprovconf VL users 21.74 ± 5.24
Global firstprovconf VL producers 68.35 ± 4.92
Global firstprovconf VL overall 90.28 ± 1.70
results/blockaccuracy_VL_all_firstprovconf_DIST-S1.csv 72.13
Global firstprovconf ALL users 25.95 ± 4.87
Global firstprovconf ALL producers 50.77 ± 3.95
Global firstprovconf ALL overall 89.34 ± 2.18
results/blockaccuracy_ALL_all_firstprovconf_DIST-S1.csv 72.13


{'firstprovconf_VLsub': ['overall', 0.8910609105042226, 0.01622019743965221],
 'firstprovconf_ALLsub': ['overall', 0.829815168849409, 0.02621021855364815],
 'firstprovconf_VL': ['overall', 0.9027559523812388, 0.017033417980154526],
 'firstprovconf_ALL': ['overall', 0.8934010583016179, 0.021782445966853893]}

Accuracies including provisional and confirmed map classes for high magnitude change

In [35]:
writeAllAccuarcies(mapclasstype ="provconf",changeintensity="high")

Global provconf VL users 39.50 ± 9.19
Global provconf VL producers 56.40 ± 5.81
Global provconf VL overall 96.03 ± 1.14
Global provconf ALL users 47.02 ± 8.70
Global provconf ALL producers 55.36 ± 4.14
Global provconf ALL overall 95.94 ± 1.14


Accuracies including only confirmed map classes for high magnitude change

In [41]:
writeAllAccuarcies(mapclasstype ="conf",changeintensity="high")

Global conf VL users 48.13 ± 11.40
Global conf VL producers 49.98 ± 6.00
Global conf VL overall 96.75 ± 0.89
results/blockaccuracy_VL_high_conf_DIST-S1.csv 88.52
PASSED requirement
Global conf ALL users 57.96 ± 10.83
Global conf ALL producers 49.39 ± 4.36
Global conf ALL overall 96.69 ± 0.89
results/blockaccuracy_ALL_high_conf_DIST-S1.csv 90.16
PASSED requirement
Global conf VLsub users 54.79 ± 10.55
Global conf VLsub producers 49.57 ± 5.89
Global conf VLsub overall 96.77 ± 0.87
results/blockaccuracy_VLsub_high_conf_DIST-S1.csv 90.16
PASSED requirement
Global conf ALLsub users 68.32 ± 8.47
Global conf ALLsub producers 49.76 ± 4.62
Global conf ALLsub overall 96.70 ± 0.88
results/blockaccuracy_ALLsub_high_conf_DIST-S1.csv 90.16
PASSED requirement


{'conf_VL': ['overall', 0.9674979788923069, 0.00887105861544836],
 'conf_ALL': ['overall', 0.9669413166258208, 0.00885336991929006],
 'conf_VLsub': ['overall', 0.9676852775778905, 0.008726162046519749],
 'conf_ALLsub': ['overall', 0.9669871051052812, 0.008793375646588136]}

### Accuracies for DIST-HLS

Accuracies including first, provisional, and confirmed map classes for high magnitude change

In [44]:
writeAllAccuarcies(mapclasstype ="firstprovconf",changeintensity="high",systemDIST="DIST-HLS")

Global firstprovconf VL users 85.77 ± 5.36
Global firstprovconf VL producers 93.02 ± 1.18
Global firstprovconf VL overall 99.01 ± 0.17
results/blockaccuracy_VL_high_firstprovconf_DIST-HLS.csv 96.72
PASSED requirement
Global firstprovconf ALL users 86.67 ± 5.37
Global firstprovconf ALL producers 67.43 ± 7.52
Global firstprovconf ALL overall 98.48 ± 0.33
results/blockaccuracy_ALL_high_firstprovconf_DIST-HLS.csv 96.72
PASSED requirement
Global firstprovconf VLsub users 88.90 ± 5.10
Global firstprovconf VLsub producers 92.77 ± 1.16
Global firstprovconf VLsub overall 99.01 ± 0.17
results/blockaccuracy_VLsub_high_firstprovconf_DIST-HLS.csv 96.72
PASSED requirement
Global firstprovconf ALLsub users 89.63 ± 5.12
Global firstprovconf ALLsub producers 67.51 ± 7.41
Global firstprovconf ALLsub overall 98.49 ± 0.32
results/blockaccuracy_ALLsub_high_firstprovconf_DIST-HLS.csv 96.72
PASSED requirement


{'firstprovconf_VL': ['overall', 0.99014786095874, 0.001651162276399355],
 'firstprovconf_ALL': ['overall', 0.9848475440589339, 0.003291360999518737],
 'firstprovconf_VLsub': ['overall', 0.9901115188285541, 0.0016630694534393127],
 'firstprovconf_ALLsub': ['overall', 0.9849494643429669, 0.003196104143367076]}

Accuracies including provisional and confirmed map classes for high magnitude change

In [60]:
writeAllAccuarcies(mapclasstype ="provconf",changeintensity="high",systemDIST="DIST-HLS")

Global provconf VL users 87.15 ± 5.07
Global provconf VL producers 92.42 ± 1.31
Global provconf VL overall 99.02 ± 0.17
Global provconf ALL users 87.82 ± 5.08
Global provconf ALL producers 66.22 ± 7.76
Global provconf ALL overall 98.48 ± 0.34


{'provconf_VL': ['overall', 0.9902443472867127, 0.0016812259481226138],
 'provconf_ALL': ['overall', 0.984839775043594, 0.0033653974304900147]}

Accuracies including only confirmed map classes for high magnitude change

In [45]:
writeAllAccuarcies(mapclasstype ="conf",changeintensity="high",systemDIST="DIST-HLS")

Global conf VL users 87.52 ± 4.84
Global conf VL producers 91.05 ± 1.59
Global conf VL overall 99.07 ± 0.16
results/blockaccuracy_VL_high_conf_DIST-HLS.csv 96.72
PASSED requirement
Global conf ALL users 88.44 ± 4.84
Global conf ALL producers 63.27 ± 8.32
Global conf ALL overall 98.52 ± 0.34
results/blockaccuracy_ALL_high_conf_DIST-HLS.csv 96.72
PASSED requirement
Global conf VLsub users 90.83 ± 4.53
Global conf VLsub producers 90.76 ± 1.57
Global conf VLsub overall 99.07 ± 0.17
results/blockaccuracy_VLsub_high_conf_DIST-HLS.csv 96.72
PASSED requirement
Global conf ALLsub users 91.55 ± 4.54
Global conf ALLsub producers 63.26 ± 8.23
Global conf ALLsub overall 98.53 ± 0.33
results/blockaccuracy_ALLsub_high_conf_DIST-HLS.csv 96.72
PASSED requirement


{'conf_VL': ['overall', 0.990683388434671, 0.0016473872034498591],
 'conf_ALL': ['overall', 0.9852005131201492, 0.0033563926118859626],
 'conf_VLsub': ['overall', 0.9906840294913186, 0.0016501293446747045],
 'conf_ALLsub': ['overall', 0.9853339883928569, 0.0032561793377765243]}

In [48]:
writeAllAccuarcies(mapclasstype ="conf",changeintensity="high",systemDIST="DIST-HLS-GEN")

Global conf VLsub users 66.53 ± 11.11
Global conf VLsub producers 53.47 ± 4.82
Global conf VLsub overall 98.95 ± 0.16
results/blockaccuracy_VLsub_high_conf_DIST-HLS-GEN.csv 96.72
PASSED requirement
Global conf ALLsub users 81.68 ± 9.66
Global conf ALLsub producers 47.87 ± 4.84
Global conf ALLsub overall 98.55 ± 0.31
results/blockaccuracy_ALLsub_high_conf_DIST-HLS-GEN.csv 96.72
PASSED requirement


{'conf_VLsub': ['overall', 0.989540008735026, 0.0015620979228955052],
 'conf_ALLsub': ['overall', 0.985523042176319, 0.003113584664006771]}

## DIST-ALERT-HLS functions

In [42]:
#DIST-ALERT-HLS
def getDISTALERTStatus_vI(block,skipNodata=False):
    #print(skipNodata)
    mapalert = {}
    IDlist = [ID for ID in allIDs if str(block) in ID]

    for ID in IDlist:
        mapalert[ID] = [255 for i in range(0,367)]
        #print(ID,end=',')
        with open(mapsourceHLS+'/'+ID+'_DIST-ALERT_'+ANNname+'.csv','r') as mapfile:
            lines = mapfile.readlines()
            header = lines[0]
            maplist = lines[1:]
            for line in maplist:
                try:
                    (granuleID,SensingTime,ProductionTime,VEGDISTSTATUS,VEGANOM,VEGIND,VEGHIST,VEGANOMMAX,VEGDISTCONF,VEGDISTDATE,VEGDISTCOUNT,VEGDISTDUR,VEGLASTDATE,GENDISTSTATUS,GENANOM,GENANOMMAX,GENDISTCONF,GENDISTDATE,GENDISTCOUNT,GENDISTDUR,GENLASTDATE)= line.strip().split(',')
                    day = dayDiff("20240101",SensingTime[0:8])

                    if not (skipNodata and VEGANOM!='NA'):
                        if (int(VEGDISTSTATUS)==7 or int(VEGDISTSTATUS)==8) and VEGDISTDATE[0:4]=='2023': 
                            mapalert[ID][day] = 0
                        else:
                            mapalert[ID][day] = int(VEGDISTSTATUS)
                except:
                #    print(traceback)
                    print(ID,day,line)

    return mapalert


In [43]:
def getDISTALERTStatus_vI_GEN(block,skipNodata=False):
    #print(skipNodata)
    mapalert = {}
    IDlist = [ID for ID in allIDs if str(block) in ID]

    for ID in IDlist:
        mapalert[ID] = [255 for i in range(0,367)]
        #print(ID,end=',')
        with open(mapsourceHLS+'/'+ID+'_DIST-ALERT_'+ANNname+'.csv','r') as mapfile:
            lines = mapfile.readlines()
            header = lines[0]
            maplist = lines[1:]
            for line in maplist:
                try:
                    (granuleID,SensingTime,ProductionTime,VEGDISTSTATUS,VEGANOM,VEGIND,VEGHIST,VEGANOMMAX,VEGDISTCONF,VEGDISTDATE,VEGDISTCOUNT,VEGDISTDUR,VEGLASTDATE,GENDISTSTATUS,GENANOM,GENANOMMAX,GENDISTCONF,GENDISTDATE,GENDISTCOUNT,GENDISTDUR,GENLASTDATE)= line.strip().split(',')
                    day = dayDiff("20240101",SensingTime[0:8])

                    if not (skipNodata and int(GENANOM)==255):
                        if (int(GENDISTSTATUS)==7 or int(GENDISTSTATUS)==8) and GENDISTDATE[0:4]=='2023': 
                            mapalert[ID][day] = 0
                        else:
                            mapalert[ID][day] = int(GENDISTSTATUS)
                except:
                #    print(traceback)
                    print(ID,line)

    return mapalert