# DIST-S1 Performance Assessment

Accuracy estimation for equal probability sampling, from Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change" https://doi.org/10.1016/j.rse.2025.114714, Appendix A.1.1

Sample is a stratified sample of 10x10km blocks stratified by land cover change between 2023 and 2024. A ratio estimator is used. As pointed out in the Global Land Cover Map Validation Guidelines (https://doi.org/10.5067/doc/ceoswgcv/lpv/lc.001), the second stage variance typically contributes a negligible amount compared to the first stage variance and can be excluded.

#### Import sample and stratification information

In [1]:
# Import strata and sample unit information
import sys 
import math
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import statistics 
np.set_printoptions(precision=2, suppress=True, floatmode='fixed')

source = "/gpfs/glad3/HLSDIST/Validation/2024_10kmblock/analysis/"  # Replace with the desired path
os.chdir(source)

referenceSource = source+"tables/reference_data/referenceTimeSeriesInterpolated16_16_goodFirst.csv"
#referenceSource = source+"tables/reference_data/referenceTimeSeriesInterpolated16_16_first.csv"
mapsourceHLS = "mapLabels2024"
mapsourceS1 = source+"generate_dist_s1_table/dist_s1_label_tables"

missingS1data = [545539,1105166,533999,540915,541754,588553,938249,114851,1195220,722463,1137281,1106625,518955,649887,1107445,1458943]
baddata = [488254]
HLSsmoke = [1402267,1416380]#smoke haze
S1allnodata = [1436925,1164088,209941,64176,596625]
excludelist = missingS1data+baddata+S1allnodata#+HLSsmoke

ANNname = "2024"
sampleDict = {}
sampleFull = {}
blockstrataDict = {}
substrataDict = {}
countSelected = {}
with open(source+"tables/reference_data/selectedpointsLL.csv","r") as sample:
  lines = sample.readlines()[1:]
  for l in lines:
    (ID,Block,subID,blockStratum,substratum,zone,x,y,centxUTM,centyUTM,Long,Lat,MGRS) = l.strip().split(",")
    sampleDict[ID] = [Long,Lat,zone,centxUTM,centyUTM,MGRS]
    sampleFull[ID] = l.strip()
    blockstrataDict[Block] = blockStratum
    substrataDict[ID] = int(substratum)
    if Block in countSelected:
      countSelected[Block] += 1
    else:
      countSelected[Block] = 1

allIDs = sampleDict.keys()

#Strata area
#scounts = allblocks['stratum'].value_counts()
scounts = pd.read_csv(source+"tables/reference_data/blockStrataCounts.csv").set_index('name')
scounts['area'] = scounts.multiply(100)
print(scounts)
allStrata = list(scounts.index)
totalBlockCount = scounts['blockCount'].sum()
print('totalBlockCount',totalBlockCount)
print(allStrata)

               blockCount       area
name                                
waternew             1272     127200
treelosswet          9696     969600
builtnewalert      120496   12049600
fire                 3188     318800
treelossTF          63161    6316100
cropnew             75912    7591200
wetshort             9477     947700
oldcrop_short       65470    6547000
gen                215057   21505700
other              385127   38512700
none              1085543  108554300
totalBlockCount 2034399
['waternew', 'treelosswet', 'builtnewalert', 'fire', 'treelossTF', 'cropnew', 'wetshort', 'oldcrop_short', 'gen', 'other', 'none']


In [2]:
selectedBlocks = pd.read_csv(source+"tables/reference_data/blockstrata_subareas.csv")
selectedBlocks = selectedBlocks.set_index('block')
print(selectedBlocks.head())
allBlocks = list(selectedBlocks.index)

def getBlocksStratum(stratum):
  return list(selectedBlocks[selectedBlocks['stratum']==stratum].index)

def getBlockPixelCount(block):
  return selectedBlocks.loc[int(block)][['sub1','sub2','sub3','sub4']].sum()


        MGRS      stratum  sub1   sub2  sub3    sub4       left   top  \
block                                                                   
30961  33NUF   treelossTF  2263     67  2493  105873   13.80825  4.95   
34405  50NPL  treelosswet  1614   6024  2755  100467  118.01200  5.40   
35975  37NEG   treelossTF  1369    231  2864  106424   39.09600  5.67   
40284  47NRG  treelosswet   762   2115  7183  100894  101.90300  6.30   
41318  36NUN     waternew   873  10119  6819   92899   31.40350  6.48   

          right  bottom  
block                    
30961   13.8985    4.86  
34405  118.1025    5.31  
35975   39.1865    5.58  
40284  101.9935    6.21  
41318   31.4940    6.39  


#### General functions

In [3]:
#get number of days between and two dates; used to convert dates to 1-366 day of year 
def dayDiff(start,end):
  startdate = datetime.datetime.strptime(start,"%Y%m%d")
  enddate = datetime.datetime.strptime(end,"%Y%m%d")
  days = enddate-startdate
  return (days.days+1)

In [4]:
#DIST-S1 generate dictionary of daily STATUS values per ID (note switched path to block instead of MGRS tile)
def getDISTS1status_vI(block):#,skipNodata=False):
    #print(skipNodata)
    mapalert = {}
    IDlist = [ID for ID in allIDs if str(block) in ID]
    allNoData = True
    for ID in IDlist:
        mapalert[ID] = [255 for i in range(0,366)]
        #print(ID,end=',')
        try:
          with open(mapsourceS1+'/'+block+'/'+ID+'.csv','r') as mapfile:
            lines = mapfile.readlines()
            header = lines[0]
            maplist = lines[1:]
            for line in maplist:
                try:
                    (temp,SensingTime,STATUS)= line.strip().split(',')
                    day = dayDiff("20240101",datetime.datetime.strftime(datetime.datetime.strptime(SensingTime,"%Y-%m-%d"),"%Y%m%d"))

                    #if not (skipNodata and VEGANOM!='NA'):
                    mapalert[ID][day] = int(STATUS)
                    if int(STATUS) != 255:
                        allNoData = False
                except:
                #    print(traceback)
                    print(ID,line)
        except:
            with open("missingS1.txt","a") as OUT:
                OUT.write(ID+"\n")
    if allNoData:
        print("NO DATA FOR DIST-S1 BLOCK ",block)
    return mapalert

In [5]:
#generate dictionary of ref no, low, high change and no data for each day of year (note conversion only and only 2024 parameters don't work)
def getRefALERTDaily(filename,high=["VLmaj"],low=["VLmin"],nochange=["OCmin","OCmaj","VGmin","VGmaj","noChange"],IDlist=allIDs,conversiononly=False,only2024=False):
  #if conversiononly or only2024:
  #  with open("reference_conversion.csv","r") as reffile:
  #    reflist = reffile.readlines()[1:]
  #  refconv = {}
  #  refprevyear = {}
  #  natural = {}
  #  for line in reflist:
  #    fields = line.strip().split(",")
  #    (ID,changetype,conversion,naturalproportion,prevyear,overallLabel)=fields[0:6]
  #    refconv[ID]=conversion
  #    refprevyear[ID]=prevyear
  #    natural[ID] = naturalproportion
  refalert = {}
  with open(filename,"r") as mapfile:
    lines = mapfile.readlines()
    header = lines[0]
    reflist = lines[1:]
  for line in reflist:
    fields = line.strip().split(",")
    (ID,overallLabel,Long,Lat,changetype) = fields[0:5]
    refalert[ID] = [0 for i in range(0,366)]
    if ID in IDlist:
      daily = fields[5:]
      #refalert[ID] = [0 for i in range(0,366)]
      try:
        for day in range(0,366):
          found = False
          for l in high:
            if l == daily[day]:
              refalert[ID][day] = 3
          for l in low:
            if l == daily[day]:
              refalert[ID][day] = 2
          for l in nochange:
            if l == daily[day]:
              refalert[ID][day] = 1
          #if conversiononly and (refconv[ID] != "natural" and (refconv[ID] != "human" or (refconv[ID] == "human" and natural[ID] == '0'))):#(refconv[ID] != "human" or (refconv[ID] == "human" and natural[ID] == '0')):#(refconv[ID] == "no" or natural[ID] == '0'):
          #  refalert[ID][day] = 0
          #if only2024 and refprevyear[ID] == "TRUE":
          #  refalert[ID][day] = 0
      except:
        print(ID,day,daily)
  return refalert

In [6]:
#generate dictionary of ref no, low, high change and no data for each day of year (note conversion only and only 2024 parameters don't work)
def getRefALERTDaily(filename,high=["VLmaj"],low=["VLmin"],nochange=["OCmin","OCmaj","VGmin","VGmaj","noChange"],IDlist=allIDs,conversiononly=False,only2024=False):
  #if conversiononly or only2024:
  #  with open("reference_conversion.csv","r") as reffile:
  #    reflist = reffile.readlines()[1:]
  #  refconv = {}
  #  refprevyear = {}
  #  natural = {}
  #  for line in reflist:
  #    fields = line.strip().split(",")
  #    (ID,changetype,conversion,naturalproportion,prevyear,overallLabel)=fields[0:6]
  #    refconv[ID]=conversion
  #    refprevyear[ID]=prevyear
  #    natural[ID] = naturalproportion
  refalert = {}
  with open(filename,"r") as mapfile:
    lines = mapfile.readlines()
    header = lines[0]
    reflist = lines[1:]
  for line in reflist:
    fields = line.strip().split(",")
    (ID,overallLabel,Long,Lat,changetype) = fields[0:5]
    refalert[ID] = [0 for i in range(0,366)]
    if ID in IDlist:
      daily = fields[5:]
      #refalert[ID] = [0 for i in range(0,366)]
      try:
        for day in range(0,366):
          found = False
          for l in high:
            if l == daily[day]:
              refalert[ID][day] = 3
          for l in low:
            if l == daily[day]:
              refalert[ID][day] = 2
          for l in nochange:
            if l == daily[day]:
              refalert[ID][day] = 1
          #if conversiononly and (refconv[ID] != "natural" and (refconv[ID] != "human" or (refconv[ID] == "human" and natural[ID] == '0'))):#(refconv[ID] != "human" or (refconv[ID] == "human" and natural[ID] == '0')):#(refconv[ID] == "no" or natural[ID] == '0'):
          #  refalert[ID][day] = 0
          #if only2024 and refprevyear[ID] == "TRUE":
          #  refalert[ID][day] = 0
      except:
        print(ID,day,daily)
  return refalert

In [7]:
#generate dictionary of ref no, low, high change and no data for each day of year (note conversion only and only 2024 parameters don't work)
def getRefchangetype(filename,high=["VLmaj","VGmaj","OCmaj"],low=["VLmin","VGmin","OCmin"],nochange=["noChange"]):
  refchangetype = {}
  with open(filename,"r") as mapfile:
    lines = mapfile.readlines()
    header = lines[0]
    reflist = lines[1:]
  for line in reflist:
    fields = line.strip().split(",")
    (ID,overallLabel,Long,Lat,changetype) = fields[0:5]
    if any(item in high for item in fields) or any(item in low for item in fields):
      refchangetype[ID] = changetype
    else:
      refchangetype[ID] = "noChange"

  return refchangetype

In [8]:
#generate dictionary of ref no, low, high change and no data for each day of year (note conversion only and only 2024 parameters don't work)
def getRefConversion(filename):
  refconversion = {}
  with open(filename,"r") as mapfile:
    lines = mapfile.readlines()
    header = lines[0]
    reflist = lines[1:]
  for line in reflist:
    fields = line.strip().split(",")
    (ID,t,changetype,conversion,prevyear,overallLabel) = fields[0:6]
    refconversion[ID] = conversion
  return refconversion

In [9]:
#build confusion matrix for a block of no, low, and high change
def getMatrixBlock(block,mapin,maplow,maphigh,nodata=[255],refType="VL",convOnly=False,only24 =False,mincount=10,lookback=30,system="DIST-S1",printMatrix=False):
  strataList=[1,2,3,4]
  strataDict=substrataDict
  mapout = {}
  n = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in strataList}
  ntotal = {s:0 for s in strataList}
  refconversion = getRefConversion("reference_goodFirst.csv")
  if refType == "VL":
    ref = getRefALERTDaily(referenceSource,high=["VLmaj"],low=["VLmin"],nochange=["VGmin","VGmaj","OCmin","OCmaj","noChange","VGsub","OCsub"],conversiononly=convOnly,only2024=only24)
    refchangetype = getRefchangetype(referenceSource,high=["VLmaj"],low=["VLmin"],nochange=["VGmin","VGmaj","OCmin","OCmaj","noChange","VGsub","OCsub"])
  if refType == "VLsub":
    ref = getRefALERTDaily(referenceSource,high=["VLmaj"],low=["VLmin","VLsub"],nochange=["VGmin","VGmaj","OCmin","OCmaj","noChange","VGsub","OCsub"],conversiononly=convOnly,only2024=only24)
    refchangetype = getRefchangetype(referenceSource,high=["VLmaj"],low=["VLmin","VLsub"],nochange=["VGmin","VGmaj","OCmin","OCmaj","noChange","VGsub","OCsub"])
  elif refType =="VG":
    ref = getRefALERTDaily(referenceSource,high=["VGmaj"],low=["VGmin"],nochange=["noChange","VLmin","VLmaj","OCmin","OCmaj","VLsub","OCsub"],conversiononly=convOnly,only2024=only24)  
    refchangetype = getRefchangetype(referenceSource,high=["VGmaj"],low=["VGmin"],nochange=["noChange","VLmin","VLmaj","OCmin","OCmaj","VLsub","OCsub"])
  elif refType =="OC":
    ref = getRefALERTDaily(referenceSource,high=["OCmaj"],low=["OCmin"],nochange=["noChange","VLmin","VLmaj","VGmin","VGmaj","VLsub","VGsub"],conversiononly=convOnly,only2024=only24)  
    refchangetype = getRefchangetype(referenceSource,high=["OCmaj"],low=["OCmin"],nochange=["noChange","VLmin","VLmaj","VGmin","VGmaj","VLsub","VGsub"])
  elif refType =="ALL":
    ref = getRefALERTDaily(referenceSource,high=["VLmaj","VGmaj","OCmaj"],low=["VLmin","VGmin","OCmin"],nochange=["noChange"],conversiononly=convOnly,only2024=only24)
    refchangetype = getRefchangetype(referenceSource,high=["VLmaj","VGmaj","OCmaj"],low=["VLmin","VGmin","OCmin"],nochange=["noChange"])
  elif refType =="ALLsub":
    ref = getRefALERTDaily(referenceSource,high=["VLmaj","VGmaj","OCmaj"],low=["VLmin","VGmin","OCmin","VLsub","VGsub","OCsub"],nochange=["noChange"],conversiononly=convOnly,only2024=only24)
    refchangetype = getRefchangetype(referenceSource,high=["VLmaj","VGmaj","OCmaj"],low=["VLmin","VGmin","OCmin","VLsub","VGsub","OCsub"],nochange=["noChange"])

  IDlist = [ID for ID in list(ref.keys()) if block in ID]

  #confusion matrix
  for ID in IDlist:
    stratum = strataDict[ID]
    #if stratum in selectedStrata:
    try:
        p = [[0,0,0],[0,0,0],[0,0,0]]
        ptotal = 0
        mapout[ID] = [0 for x in range(0,366)]
        for d in range(0,366):
          if mapin[ID][d] in [255] or mapin[ID][d] in nodata:
              mapout[ID][d] = 0
          elif mapin[ID][d] in [0]:
              mapout[ID][d] = 1
          elif mapin[ID][d] in maplow:
              mapout[ID][d] = 2
          elif mapin[ID][d] in maphigh:
              mapout[ID][d] = 3
          else:############added to exclude fron matrix but include in proportion
              mapout[ID][d] = 4
          #if not int(ID) in excludelist:
          if max(ref[ID][0:(d+1)])>0 and mapout[ID][d] != 0:
                start = (d>lookback)*(d-lookback)

                ##current anomoly compare against lookback window and forward window
                #if mapin[ID][d] < 7 and mapin[ID][d]>0:
                #    if ref[ID][start:(d+mincount+forward)].count(2)+ref[ID][start:(d+mincount+forward)].count(3) > mincount:
                #      if ref[ID][start:(d+mincount+forward)].count(3) > 0:
                #        refVal=3
                #      else:
                #        refVal=2
                #    elif ref[ID][start:(d+1)].count(1) > 0:
                #        refVal=1
                #    else:
                #        refVal=0
                #current no anomaly; compare against lookback window
                if mapin[ID][d] <7:#== 0:
                    if ref[ID][start:(d+mincount)].count(2)+ref[ID][start:(d+mincount)].count(3) > mincount:
                      if ref[ID][start:(d+mincount)].count(3) > 0:
                        refVal=3
                      else:
                        refVal=2
                    elif ref[ID][start:(d+1)].count(1) > 0:
                        refVal=1
                    else:
                        refVal=0
                        
                #finished anomaly; compare year to date
                elif mapin[ID][d] >= 7 and mapin[ID][d]!=255:
                    start = 0
                    if ref[ID][start:(d+mincount)].count(2)+ref[ID][start:(d+mincount)].count(3) > mincount:
                      if ref[ID][start:(d+mincount)].count(3) > 0:
                        refVal=3
                      else:
                        refVal=2
                    elif ref[ID][start:(d+1)].count(1) > 0:
                        refVal=1
                    else:
                        refVal=0
                #nodata
                else:
                  refVal=0
                mapVal = mapout[ID][d]
                if mapVal==4 and refVal>0:
                  ptotal += 1
                elif refVal>0 and mapVal>0:
                    p[refVal-1][mapVal-1] += 1
                    ptotal += 1
        if ptotal>0:
          ntotal[stratum] += 1
          for r in [0,1,2]:
            for m in [0,1,2]:
              n[stratum][r][m] += (p[r][m]/ptotal)

        if ptotal>0 and printMatrix:
          if maphigh == [5,6,8]:
             ver = "provconf"
          elif maphigh == [6,8]:
             ver = "conf"
          elif maphigh == [4,5,6,8]:
            ver = "firstprovconf"
          with open("results/pointmatrix_"+refType+"_"+ver+"_"+system+"_lookback"+str(lookback)+".csv","a") as OUT:
            OUT.write(','.join([ID,refchangetype[ID],refconversion[ID],str(sampleDict[ID][1]),str(sampleDict[ID][0]),str(sampleDict[ID][5]),blockstrataDict[block],str(substrataDict[ID])]))
            for r in [0,1,2]:
              for m in [0,1,2]:
                OUT.write(','+str(p[r][m]))
            userslow='NA';producerslow='NA';usershigh='NA';producershigh='NA';usersall='NA';producersall='NA'
            if (p[0][1]+p[1][1]+p[2][1])>0:
              userslow = (p[1][1]+p[2][1])/(p[0][1]+p[1][1]+p[2][1])
            if (p[1][0]+p[1][1]+p[1][0]) >0:
              producerslow = (p[1][1]+p[1][2])/(p[1][0]+p[1][1]+p[1][2])
            if (p[0][2]+p[1][2]+p[2][2])>0:
              usershigh = (p[1][2]+p[2][2])/(p[0][2]+p[1][2]+p[2][2])
            if (p[2][0]+p[2][1]+p[2][0])>0:
              producershigh = (p[2][1]+p[2][2])/(p[2][0]+p[2][1]+p[2][2])	
            if (p[0][1]+p[1][1]+p[2][1]+p[0][2]+p[1][2]+p[2][2]) >0: 
              usersall = (p[1][1]+p[2][1]+p[1][2]+p[2][2])/(p[0][1]+p[1][1]+p[2][1]+p[0][2]+p[1][2]+p[2][2])
            if (p[1][0]+p[1][1]+p[1][0] + p[2][0]+p[2][1]+p[2][0])>0:
              producersall = (p[1][1]+p[1][2]+p[2][1]+p[2][2])/(p[1][0]+p[1][1]+p[1][2] + p[2][0]+p[2][1]+p[2][2])	
            overallhigh	= (p[0][0]+p[0][1]+p[1][0]+p[1][1]+p[2][2])/ptotal
            overallall	= (p[0][0]+p[1][1]+p[1][2]+p[2][1]+p[2][2])/ptotal
            OUT.write(','+','.join([str(userslow),str(producerslow),str(usershigh),str(producershigh),str(usersall),str(producersall),str(overallhigh),str(overallall)]))
            OUT.write("\n")
 
    except:
        print(ID,"missing",stratum,d,p,ptotal,ntotal[stratum])
        print(userslow,producerslow,usershigh,producershigh,usersall,producersall,overallhigh,overallall)
        #print(mapin[ID])
        #print(ref[ID])
     
  return (n,ntotal)

In [10]:
#convert matrix from three classes (no, low, high) to two classes (no, yes) for different accuracy metrics
def convMat(n,selectedStrata=[1,2,3,4]):
  nlowuser = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nlowprod = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nhiuser = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nhiprod = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nalluser = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nallprod = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nlowoverall = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nhioverall = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  nalloverall = {s:[[0,0,0],[0,0,0],[0,0,0]] for s in selectedStrata}
  NO = 0
  LOW = 1
  HI = 2
  for s in selectedStrata:
    stringS = str(s)

    #[stratum][ref][map]
    nlowprod[s][2][2] = n[s][LOW][LOW] + n[s][LOW][HI]
    nlowprod[s][2][1] = n[s][LOW][NO]
    nlowuser[s][2][2] = n[s][LOW][LOW] + n[s][HI][LOW]
    nlowuser[s][1][2] = n[s][NO][LOW]

    nhiprod[s][2][2] = n[s][HI][HI] + n[s][HI][LOW]
    nhiprod[s][2][1] = n[s][HI][NO]
    nhiuser[s][2][2] = n[s][HI][HI] + n[s][LOW][HI]
    nhiuser[s][1][2] = n[s][NO][HI]

    nallprod[s][2][2] = n[s][HI][HI] + n[s][HI][LOW] + n[s][LOW][HI] + n[s][LOW][LOW]
    nallprod[s][2][1] = n[s][HI][NO] + n[s][LOW][NO]
    nalluser[s][2][2] = n[s][HI][HI] + n[s][LOW][HI] + n[s][HI][LOW] + n[s][LOW][LOW]
    nalluser[s][1][2] = n[s][NO][HI] + n[s][NO][LOW]

    nhioverall[s][2][2] = n[s][HI][HI]
    nhioverall[s][1][1] = n[s][NO][NO] + n[s][LOW][NO] + n[s][NO][LOW] + n[s][LOW][LOW]
    nhioverall[s][1][2] = n[s][NO][HI] + n[s][LOW][HI]
    nhioverall[s][2][1] = n[s][HI][NO] + n[s][HI][LOW]

    nalloverall[s][2][2] = n[s][HI][HI] + n[s][HI][LOW] + n[s][LOW][HI] + n[s][LOW][LOW]
    nalloverall[s][1][1] = n[s][NO][NO]
    nalloverall[s][1][2] = n[s][NO][HI] + n[s][NO][LOW]
    nalloverall[s][2][1] = n[s][HI][NO] + n[s][LOW][NO]
  return (nlowuser, nlowprod, nhiuser, nhiprod,nalluser, nallprod,nhioverall,nalloverall)

In [11]:
#compute users accuracy for a block and return stat and SE
def usersAccuracyBlock(n, ntotal, block):
  N = selectedBlocks.loc[int(block),['sub1','sub2','sub3','sub4']] ## create tables of substrata areas
  selectedSubStrata = [1,2,3,4]
  if sum(ntotal.values())<10:
    return [None,None,None,None]
  #Accuracy
  y = 0
  usersx = 0
  for s in selectedSubStrata:
    if ntotal[s]>0:
      y += (n[s][2][2]/ntotal[s])*N.iloc[s-1]
      usersx += ((n[s][1][2]+n[s][2][2])/ntotal[s])*N.iloc[s-1]
  if usersx > 0:
    users = (y/usersx)
    usersxout = usersx
  else:
    users = "NA"
    usersSE = "NA"
    usersxout = 0
  yout = y

  UAsub1 = 0
  UAsub2 = 0
  if users != "NA":
    for s in selectedSubStrata:
      if (n[s][1][2]+n[s][2][2]) > 0 and ntotal[s]>1:
        yhmean = n[s][2][2]/ntotal[s]
        yhsampvar = ((n[s][2][2])*((1-yhmean)**2) + (n[s][1][1] + n[s][1][2] + n[s][2][1])*((0-yhmean)**2))/(ntotal[s]-1)
        xhmean = (n[s][1][2]+n[s][2][2])/ntotal[s]
        xhsampvar = ((n[s][1][2]+n[s][2][2])*((1-xhmean)**2) + (n[s][1][1] + n[s][2][1])*((0-xhmean)**2))/(ntotal[s]-1)
        xyhsampvar = (n[s][1][1] * (0-yhmean) * (0-xhmean) + n[s][1][2] * (0-yhmean) * (1-xhmean) + n[s][2][1] * (0-yhmean) * (0-xhmean) + n[s][2][2] * (1-yhmean) * (1-xhmean))/(ntotal[s] - 1)
        UAsub1 += N.iloc[s-1]*xhmean
        UAsub2 += N.iloc[s-1]**2 * (1 - ntotal[s]/N.iloc[s-1]) * (yhsampvar + (users**2)*xhsampvar - 2*users*xyhsampvar)/ntotal[s]
  
  if users != "NA":
    if UAsub1>0 and UAsub2>0:
        usersSE = math.sqrt(1/(UAsub1**2) * UAsub2)
    else:
        usersSE = None
    users = users
  else:
    users = None
    usersSE = None
  return [users,usersSE,yout,usersxout]

In [12]:
#compute producers accuracy for a block and return stat and SE
def producersAccuracyBlock(n, ntotal, block):
  N = selectedBlocks.loc[int(block),['sub1','sub2','sub3','sub4']] ## create tables of substrata areas
  selectedSubStrata = [1,2,3,4]

  if sum(ntotal.values())<10:
    return [None,None,None,None]
  #Accuracy
  y = 0
  producersx = 0
  for s in selectedSubStrata:
    if ntotal[s]>0:
      y += (n[s][2][2]/ntotal[s])*N.iloc[s-1]
      producersx += ((n[s][2][1]+n[s][2][2])/ntotal[s])*N.iloc[s-1]

  if producersx > 0:
    producers = (y/producersx)
    prodxout = producersx
  else:
    producers = "NA"
    producersSE = "NA"
    prodxout = 0
  yout = y

  PAsub1 = 0
  PAsub2 = 0
  for s in selectedSubStrata:
    if producers != "NA":
        if (n[s][2][1]+n[s][2][2]) > 0 and ntotal[s]>1:
            yhmean = n[s][2][2]/ntotal[s]
            yhsampvar = ((n[s][2][2])*((1-yhmean)**2) + (n[s][1][1] + n[s][1][2] + n[s][2][1])*((0-yhmean)**2))/(ntotal[s]-1)
            xphmean = (n[s][2][1]+n[s][2][2])/ntotal[s]
            xphsampvar = ((n[s][2][1]+n[s][2][2])*((1-xphmean)**2) + (n[s][1][1] + n[s][1][2])*((0-xphmean)**2))/(ntotal[s]-1)
            xyphsampvar = (n[s][1][1] * (0-yhmean) * (0-xphmean) + n[s][1][2] * (0-yhmean) * (0-xphmean) + n[s][2][1] * (0-yhmean) * (1-xphmean) + n[s][2][2] * (1-yhmean) * (1-xphmean))/(ntotal[s] - 1)
            PAsub1 += N.iloc[s-1]*xphmean
            PAsub2 += N.iloc[s-1]**2 * (1 - ntotal[s]/N.iloc[s-1]) * (yhsampvar + (producers**2)*xphsampvar - 2*producers*xyphsampvar)/ntotal[s]
  
  if producers != "NA":
    if PAsub1 >0 and PAsub2>0:
      producersSE = math.sqrt(1/(PAsub1**2) * PAsub2)
    else:
      producersSE = None
    producers = producers
  else:
    producers = None
    producersSE = None

  return [producers,producersSE,yout,prodxout]

In [13]:
#compute users accuracy for a block and return stat and SE
def overallAccuracyBlock(n, ntotal, block):
  N = selectedBlocks.loc[int(block),['sub1','sub2','sub3','sub4']] ## create tables of substrata areas

  if sum(ntotal.values())<10:
    return [None,None,None,None]
  #Accuracy
  y = 0
  overallx = 0
  for s in [1,2,3,4]:
    if ntotal[s]>0:
      y += ((n[s][2][2]+n[s][1][1])/ntotal[s])*N.iloc[s-1]
      overallx += ((n[s][1][1]+n[s][1][2]+n[s][2][1]+n[s][2][2])/ntotal[s])*N.iloc[s-1]
  if overallx > 0:
    overall = (y/overallx)
    overallxout = overallx
  else:
    overall = "NA"
    overallSE = "NA"
    overallxout = 0
  yout = y

  OAsub1 = 0
  OAsub2 = 0
  if overall != "NA":
    for s in [1,2,3,4]:
      if (n[s][1][1]+n[s][1][2]+n[s][2][1]+n[s][2][2]) > 0 and ntotal[s]>1:
        yhmean = (n[s][2][2]+n[s][1][1])/ntotal[s]
        yhsampvar = (((n[s][2][2]+n[s][1][1]))*((1-yhmean)**2) + (n[s][1][1] + n[s][1][2] + n[s][2][1])*((0-yhmean)**2))/(ntotal[s]-1)
        xhmean = (n[s][1][1]+n[s][1][2]+n[s][2][1]+n[s][2][2])/ntotal[s]
        xhsampvar = ((n[s][1][1]+n[s][1][2]+n[s][2][1]+n[s][2][2])*((1-xhmean)**2) + (n[s][1][1] + n[s][2][1])*((0-xhmean)**2))/(ntotal[s]-1)
        xyhsampvar = (n[s][1][1] * (0-yhmean) * (0-xhmean) + n[s][1][2] * (0-yhmean) * (1-xhmean) + n[s][2][1] * (0-yhmean) * (0-xhmean) + n[s][2][2] * (1-yhmean) * (1-xhmean))/(ntotal[s] - 1)
        OAsub1 += N.iloc[s-1]*xhmean
        OAsub2 += N.iloc[s-1]**2 * (1 - ntotal[s]/N.iloc[s-1]) * (yhsampvar + (overall**2)*xhsampvar - 2*overall*xyhsampvar)/ntotal[s]
  
  if overall != "NA":
    if OAsub1>0 and OAsub2>0:
        overallSE = math.sqrt(1/(OAsub1**2) * OAsub2)
    else:
        overallSE = None
    overall = overall
  else:
    overall = None
    overallSE = None
  return [overall,overallSE,yout,overallxout]

In [14]:
def getAccuracies(n,ntotal, name,block, measure="both"):
  (nlowuser, nlowprod, nhiuser, nhiprod,nalluser, nallprod) = convMat(n)
  loU="NA"
  loUSE="NA"
  loP="NA"
  loPSE="NA"
  hiU="NA"
  hiUSE="NA"
  hiP="NA"
  hiPSE="NA"
  aU="NA"
  aUSE="NA"
  aP="NA"
  aPSE="NA"
  if measure =="both" or measure == "users":
    (loU,loUSE,y,x) = usersAccuracyBlock(nlowuser, ntotal, block)
    (hiU,hiUSE,y,x) = usersAccuracyBlock(nhiuser, ntotal, block)
    (aU,aUSE,y,x) = usersAccuracyBlock(nalluser, ntotal, block)
  if measure =="both" or measure =="producers":
    (loP,loPSE,y,x) = producersAccuracyBlock(nlowprod, ntotal, block)
    (hiP,hiPSE,y,x) = producersAccuracyBlock(nhiprod, ntotal, block)
    (aP,aPSE,y,x) = producersAccuracyBlock(nallprod, ntotal, block)
  return [[name+"_low",loU,loUSE,loP,loPSE],[name+"_high",hiU,hiUSE,hiP,hiPSE],[name+"_all",aU,aUSE,aP,aPSE]]

## Accuracy 

#### Calculate accuracy single block (not needed, but if interested)

Accuracy for DIST-S1 for detecting vegetation loss for one block (Users SE Producers SE)

In [14]:
block = str(34405)
print("Accuracies for",block,"from stratum",blockstrataDict[block])
map=getDISTS1status_vI(block)
accuracies = []
(n,ntotal)=getMatrixBlock(block,map,[2],[5],nodata=[1,3,7,4,6,8],refType="VL")
accuracies = accuracies + getAccuracies(n,ntotal, "prov",block,measure="users")

(n,ntotal)=getMatrixBlock(block,map,[2,3,7],[5,6,8],nodata=[1,4],refType="VL")
accuracies = accuracies + getAccuracies(n,ntotal, "provconf",block)

(n,ntotal)=getMatrixBlock(block,map,[3,7],[6,8],nodata=[1,2,4,5],refType="VL")
accuracies = accuracies + getAccuracies(n,ntotal, "conf",block)

#(n,ntotal)=getMatrixBlock(block,map,[2,3,7],[5,6,8],nodata=[1,4],convOnly=True,only24=True)
#accuracies = accuracies + getAccuracies(n,ntotal, "conversion",block,measure="producers")

accuracies = pd.DataFrame(accuracies,columns=["name","users","usersSE","producers","producersSE"])
accuracies = accuracies[["users","usersSE","producers","producersSE"]].set_index(accuracies.name)
print(accuracies)

Accuracies for 34405 from stratum treelosswet


ValueError: too many values to unpack (expected 6)

Accuracy for DIST-S1 for detecting all change for one block (Users SE Producers SE)

In [None]:
block = str(34405)
map=getDISTS1status_vI(block)
accuracies = []
(n,ntotal)=getMatrixBlock(block,map,[2],[5],nodata=[1,3,7,4,6,8],refType="ALL")
accuracies = accuracies + getAccuracies(n,ntotal, "prov",block,measure="users")

(n,ntotal)=getMatrixBlock(block,map,[2,3,7],[5,6,8],nodata=[1,4],refType="ALL")
accuracies = accuracies + getAccuracies(n,ntotal, "provconf",block)

(n,ntotal)=getMatrixBlock(block,map,[3,7],[6,8],nodata=[1,2,4,5],refType="ALL")
accuracies = accuracies + getAccuracies(n,ntotal, "conf",block)

#(n,ntotal)=getMatrixBlock(block,map,[2,3,7],[5,6,8],nodata=[1,4],convOnly=True,only24=True)
#accuracies = accuracies + getAccuracies(n,ntotal, "conversion",block,measure="producers")

accuracies = pd.DataFrame(accuracies,columns=["name","users","usersSE","producers","producersSE"])
accuracies = accuracies[["users","usersSE","producers","producersSE"]].set_index(accuracies.name)
print(accuracies)

#### Functions to calculate accuracy for all blocks

In [15]:
def getBlockAccuracy(block,measure,changeintensity,maplowclasses,maphighclasses,nodataclasses,refChangeType,system="DIST-S1"):
  """Calculates the accuracy for the given block
  Args:
    block: (str) block ID
    measure: "users" or "producers"
    changeintensity: "low", "high", or "all" (defines what intensity threshold is evaluated for statistic)
    maplowclasses: [int] ( the status classes that will be marked as low intensity, e.g. for confirmed only [3,7])
    maphighclasses: [int] ( the status classes that will be marked as high intensity, e.g. for confirmed only [6,8])
    nodataclasses: [int] ( the status classes that will be marked as no data/excluded, e.g. for confirmed only [1,2,4,5,255] in order for the first and provisional to neither be counted as right or wrong)
    refChangeType: "VL", "VG", "OC", "ALL" (sets what type of reference change the product is evaluated against)
  """
  #try:
  if system == "DIST-S1":
    map=getDISTS1status_vI(block)
  elif system == "DIST-HLS":
    map=getDISTALERTStatus_vI(block)#,True)
  elif system == "DIST-HLS-GEN":
    map=getDISTALERTStatus_vI_GEN(block)
  else:
    print("system must be DIST-S1 or DIST-HLS")
  #except:
  #  print(block,"missing map data")
  (n,ntotal)=getMatrixBlock(block,map,maplowclasses,maphighclasses,nodataclasses,refType=refChangeType,system=system)
  
  (nlowuser, nlowprod, nhiuser, nhiprod,nalluser, nallprod,nhioverall,nalloverall) = convMat(n)
  #print(block,ntotal)
  if measure == "users":
    if changeintensity == "low":
      (stat, SE, yout, xout) = usersAccuracyBlock(nlowuser, ntotal, block)  #returns (users,usersSE,yout,usersxout)
    elif changeintensity == "high":
      (stat, SE, yout, xout) = usersAccuracyBlock(nhiuser, ntotal, block)  #returns (users,usersSE,yout,usersxout)
    elif changeintensity == "all":
      (stat, SE, yout, xout) = usersAccuracyBlock(nalluser, ntotal, block)  #returns (users,usersSE,yout,usersxout)
  elif measure =="producers":
    if changeintensity == "low":
      (stat, SE, yout, xout) = producersAccuracyBlock(nlowprod, ntotal, block)  #returns (producers,producersSE,yout,prodxout)
    elif changeintensity == "high":
      (stat, SE, yout, xout) = producersAccuracyBlock(nhiprod, ntotal, block)  #returns (producers,producersSE,yout,prodxoutE)
    elif changeintensity == "all":
      (stat, SE, yout, xout) = producersAccuracyBlock(nallprod, ntotal, block)  #returns (producers,producersSE,yout,prodxout)
  elif measure =="overall":
    if changeintensity == "high":
      (stat, SE, yout, xout) = overallAccuracyBlock(nhioverall, ntotal, block)  #returns (producers,producersSE,yout,prodxout)
    elif changeintensity == "all":
      (stat, SE, yout, xout) = overallAccuracyBlock(nalloverall, ntotal, block)  #returns (producers,producersSE,yout,prodxout)
  if sum(ntotal.values())>=(countSelected[block]*0.8) or sum(ntotal.values())>=10:
    if maphighclasses == [5,6,8]:
       ver = "provconf"
    elif maphighclasses == [6,8]:
       ver = "conf"
    elif maphighclasses == [4,5,6,8]:
      ver = "firstprovconf"
    with open("results/blockaccuracy_"+refChangeType+"_"+changeintensity+"_"+ver+"_"+system+".csv","a") as OUT:
      OUT.write(','.join([block, blockstrataDict[block],str(measure),refChangeType,str(changeintensity),str(sum(ntotal.values())),str(stat)])+"\n")#,str(SE)
  #else:
  #  print(block,sum(ntotal.values()),"final out of",countSelected[block],blockstrataDict[block])
  return (stat, SE, yout, xout)

Per accuracy statistic: (1) estimate mean and standard error per block stratum, (2) estimate the global accuracies and standard errors.

In [16]:
def strataRatioAccuracy(stratum,measure,changeintensity,maplowclasses,maphighclasses,nodataclasses,refChangeType,printdf=False,system="DIST-S1"):
  blocks = getBlocksStratum(stratum)
  statBlocks = {}
  SEBlocks = {}
  #nh = len(blocks)
  Nh = scounts.loc[stratum]['blockCount']
  y = {}
  x={}
  Xhat = 0
  for block in blocks:
    if not block in excludelist:#488254:
      block = str(block)
      try:
        (statBlocks[block],SEBlocks[block],y[block],x[block]) = getBlockAccuracy(str(block),measure,changeintensity,maplowclasses,maphighclasses,nodataclasses,refChangeType,system=system)
      except Exception as e:
        print(block,"failed",end="; ")
        print(e)
  df = pd.DataFrame.from_dict(statBlocks,orient='index')
  df.columns = ['measure']
  df['yu']=y
  df['xu']=x
  nh = df['xu'].count()
  if nh > 1:
    measure = df['yu'].sum()/df['xu'].sum() #* 100 # statistics.mean(df['measure'])
    yhat = (df['yu'].sum()/nh)
    xhat = (df['xu'].sum()/nh)
    if printdf:
      print(stratum)
      print(df)
    yvar = ((df['yu']**2).sum() - nh*(yhat**2))/(nh - 1)
    xvar = ((df['xu']**2).sum() - nh*(xhat**2))/(nh - 1)
    xyvar = ((df['yu']*df['xu']).sum() - nh*yhat*xhat)/(nh - 1)
    #variance = (1/(Nh*xhat)**2) * (Nh**2 *(1-nh/Nh) * (yvar + (measure**2) * xvar - 2*measure*xyvar) ) / nh
    subvariance = (Nh**2 *(1-nh/Nh) * (yvar + (measure**2) * xvar - 2*measure*xyvar) ) / nh
    SE = math.sqrt((1/(Nh*xhat))**2 * subvariance) #* 100
    return(measure,SE,subvariance,nh,yhat,xhat)#,yvar,xvar,xyvar) #SE and variance does not take into account within block variance
  else:
    raise Exception("less than 2 blocks")

In [17]:
#print(strataRatioAccuracy("wetshort","users",changeintensity="high",maplowclasses=[3,7],maphighclasses=[6,8],nodataclasses=[255,1,2,4,5],refChangeType="VL",printdf=True))
#print(strataRatioAccuracy("wetshort","users",changeintensity="high",maplowclasses=[2,3,7],maphighclasses=[5,6,8],nodataclasses=[255,1,4],refChangeType="VL",printdf=True))
#print(strataRatioAccuracy("wetshort","producers",changeintensity="high",maplowclasses=[2,3,7],maphighclasses=[5,6,8],nodataclasses=[255,1,4],refChangeType="VL",printdf=True))

In [18]:
def globalRatioAccuracy(measure,changeintensity,maplowclasses,maphighclasses,nodataclasses,refChangeType="ALL",system="DIST-S1",printdf=False):
  statStrata = {}
  SEstrata = {}
  Vstrata = {}
  nh = {}
  Nh = {}
  strataList = allStrata
  successfulStrata = []
  failedStrata = []
  yhat = {}
  xhat = {}
  for s in strataList:
    #print(s)
    try:
      (statStrata[s],SEstrata[s],Vstrata[s],nh[s],yhat[s],xhat[s]) = strataRatioAccuracy(s,measure,changeintensity,maplowclasses,maphighclasses,nodataclasses,refChangeType,system=system)
      Nh[s] = scounts.loc[s]['blockCount']
      successfulStrata.append(s)
    except:
      failedStrata.append(s)
  df = pd.DataFrame.from_dict(statStrata,orient='index')
  df.columns=['measure']
  df['SE'] = SEstrata
  df['SVar'] = Vstrata
  df['nh'] = nh
  df['Nh'] = Nh
  df['yhat'] = yhat
  df['xhat'] = xhat
  globalMeasure = (df['yhat']*df['Nh']).sum()/(df['xhat']*df['Nh']).sum()
  totalBlocksOfStrata = df['Nh'].sum()
  bigXhat = (df['xhat']*df['Nh']).sum()
  globalVar = df['SVar'].sum() /(bigXhat**2)
  globalSE = math.sqrt(globalVar)
  #print("\n","successful",successfulStrata)
  #print("failed",failedStrata)
  
  if printdf:
    print(df[['measure','SE','nh','Nh','yhat','xhat']])
  return(globalMeasure,globalSE,globalVar,df)

In [19]:
def checkPassingRequirement(file):
  with open(file,'r') as DAT:
    lines = DAT.readlines()
    countgte80 = 0
    countValid = 0
    for l in lines[1:]:
      (block,blockstratum,measureType,refChangeType,changeintensity,NpixelsEvaluated,stat) = l.strip().split(',')
      if measureType == "overall" and stat !='None':
        countValid+=1
        if float(stat)>=0.8:
          countgte80 +=1
    print(file,f"{(countgte80/countValid*100):.2f}")
    if countgte80/countValid >=0.8:
      print("PASSED requirement")

In [20]:
def writeAllAccuarcies(mapclasstype ="provconf",changeintensity="high",systemDIST="DIST-S1",printdf=False):
  accdict = {}
  if mapclasstype=="provconf":
    maphigh = [5,6,8]
    maplow=[2,3,7]
    nodata=[255,1,4]
  elif mapclasstype=="conf":
    maphigh = [6,8]
    maplow=[3,7]
    nodata=[255,1,2,4,5]
  elif mapclasstype=="firstprovconf":
    maphigh = [4,5,6,8]
    maplow=[1,2,3,7]
    nodata=[255]
  for reftype in ["VLsub","ALLsub"]:#,"VL","ALL"]:
    outfile = "results/blockaccuracy_"+reftype+"_"+changeintensity+"_"+mapclasstype+"_"+systemDIST+".csv"
    if os.path.exists(outfile):
      os.remove(outfile)
    with open(outfile,"a") as OUT:
      OUT.write("block,blockstratum,measureType,refChangeType,changeintensity,NpixelsEvaluated,stat\n")
    for measure in ["users","producers","overall"]:
      (stat,SE,var,df) = globalRatioAccuracy(measure,changeintensity=changeintensity,maplowclasses=maplow,maphighclasses=maphigh,nodataclasses=nodata,refChangeType=reftype,system=systemDIST,printdf=printdf) 
      print("Global",mapclasstype,reftype,measure,f"{(stat*100):.2f}",'±',f"{(SE*100):.2f}")
      accdict[mapclasstype+"_"+reftype]=[measure,stat,SE]
    checkPassingRequirement(outfile)
  return accdict    


#### Create point matrix files

In [28]:
#Print point matrix
def createpointmatrix(lookback):
  for sysDIST in ["DIST-S1","DIST-HLS"]:
    for mapclasstype in ["firstprovconf","conf","provconf"]:
      if mapclasstype=="provconf":
        maphigh = [5,6,8]
        maplow=[2,3,7]
        nodata=[255,1,4]
      elif mapclasstype=="conf":
        maphigh = [6,8]
        maplow=[3,7]
        nodata=[255,1,2,4,5]
      elif mapclasstype=="firstprovconf":
        maphigh = [4,5,6,8]
        maplow=[1,2,3,7]
        nodata=[255]
      for type in ["VLsub","ALLsub"]:#,"ALLsub","VL","ALL"]:
        #if os.path.exists("results/pointmatrix_"+type+"_"+mapclasstype+"_"+sysDIST+".csv"):
        #  os.remove("results/pointmatrix_"+type+"_"+mapclasstype+"_"+sysDIST+".csv")
        with open("results/pointmatrix_"+type+"_"+mapclasstype+"_"+sysDIST+"_lookback"+str(lookback)+".csv",'w') as OUT:
          OUT.write("ID,changetype,conversion,latitude,longitude,MGRS,blockstratum,substratum,refno_mapno,refno_maplow,refno_maphigh,reflow_mapno,reflow_maplow,reflow_maphigh,refhigh_mapno,refhigh_maplow,refhigh_maphigh,userslow,producerslow,usershigh,producershigh,usersall,producersall,overallhigh,overallall\n")
        for block in allBlocks:
          if not block in excludelist:
            block = str(block)
            try:
              if sysDIST == "DIST-S1":
                map=getDISTS1status_vI(block)
              elif sysDIST == "DIST-HLS":
                map=getDISTALERTStatus_vI(block)
              (n,ntotal)=getMatrixBlock(block,map,maplow=maplow, maphigh=maphigh,nodata=nodata,refType=type,system=sysDIST,printMatrix=True,lookback=lookback)
            except Exception as e:
              print(sysDIST,block,type, "failed", e)

createpointmatrix(30)
#createpointmatrix(90)


In [None]:
import subprocess
def addLCLU(file):
  with open(file,'r') as DAT:
    lines = DAT.readlines()[1:]
  with open(file+".csv",'w') as OUT:
    OUT.write("ID,changetype,conversion,latitude,longitude,MGRS,blockstratum,substratum,refno_mapno,refno_maplow,refno_maphigh,reflow_mapno,reflow_maplow,reflow_maphigh,refhigh_mapno,refhigh_maplow,refhigh_maphigh,userslow,producerslow,usershigh,producershigh,usersall,producersall,overallhigh,overallall,lclu2023\n")
    for l in lines:
      (ID,changetype,conversion,latitude,longitude,MGRS,blockstratum,substratum,refno_mapno,refno_maplow,refno_maphigh,reflow_mapno,reflow_maplow,reflow_maphigh,refhigh_mapno,refhigh_maplow,refhigh_maphigh,userslow,producerslow,usershigh,producershigh,usersall,producersall,overallhigh,overallall) = l.split(',')
      (block,sub) = ID.split('_')
      response = subprocess.run(["gdallocationinfo -wgs84 -valonly ../selected/" + block+'_'+MGRS+"_2023lclu.tif "+longitude+" "+latitude], capture_output=True,shell=True)
      lc23 = str(response.stdout.decode().strip()).split('\n')[0]
      lc23 = int(lc23)
      if lc23<=1:
        lcval="bareTF"
      elif lc23>=2 and lc23<=24 :
        lcval="shortTF"
      elif lc23>=25 and lc23<=49 :
        lcval="treeTF"
      elif lc23>=100 and lc23<=101 :
        lcval="barewet"
      elif lc23>=102 and lc23<=124 :
        lcval="shortwet"
      elif lc23>=125 and lc23<=149 :
        lcval="treewet"
      elif lc23>=200 and lc23<=208 :
        lcval="vwater"
      elif lc23==244 :
        lcval="cropland"
      elif lc23==250 :
        lcval="builtup"
      elif lc23==255 :
        lcval="nodata"
      else: 
        lcval="other" #ocean,snow,other
      OUT.write(','.join([ID,changetype,conversion,latitude,longitude,MGRS,lcval,blockstratum,substratum,refno_mapno,refno_maplow,refno_maphigh,reflow_mapno,reflow_maplow,reflow_maphigh,refhigh_mapno,refhigh_maplow,refhigh_maphigh,userslow,producerslow,usershigh,producershigh,usersall,producersall,overallhigh,overallall]))


addLCLU("results/pointmatrix_ALLsub_conf_DIST-S1_lookback30.csv")




In [29]:
from IPython.display import display, HTML
def findAvPerfomancePerChangeType(file):
  df = pd.read_csv(file)
  df.loc[list("Built-up growth" in s for s in df['changetype']),'changetype'] = "Built-up expansion"
  #print(df)
  df.loc[list("rop expansion" in s for s in df['changetype']),'changetype'] = "Agriculture expansion"
  df.loc[list("Crop cycle change" in s for s in df['changetype']),'changetype'] = "Crop cycle change"
  df.loc[list("Clear cut" in s for s in df['changetype']),'changetype'] = "Clear cut"
  df.loc[list("noChange" in s for s in df['changetype']),'changetype'] = "No change"
  df.loc[list("No Change" in s for s in df['changetype']),'changetype'] = "No change"
  df.loc[list("Natural greening" in s for s in df['changetype']),'changetype'] = "Natural greening"
  df.loc[list("ther" in s for s in df['changetype']),'changetype'] = "Other change"
  df.loc[list("hifting" in s for s in df['changetype']),'changetype'] = "Shifting cultivation"
  df.loc[list("elective logging" in s for s in df['changetype']),'changetype'] = "Selective logging"
  df.loc[list("Fire" in row['changetype'] and row['conversion'] != "natural" for index,row in df[['changetype','conversion']].iterrows()),'changetype'] = "Fire (non-conversion)"

  ByChange= df.groupby(by = ['changetype'])
  means = pd.concat([ByChange.usershigh.count(),ByChange.usershigh.mean(),ByChange.usersall.count(),ByChange.usersall.mean(),ByChange.producershigh.count(),ByChange.producershigh.mean(),ByChange.producersall.count(),ByChange.producersall.mean()],axis=1)
  means.columns = ['count of map high change','users high', 'count of all mapped change','users all','count of ref high','producers high', 'count of all ref','producers all']
  #means = pd.concat([ByChange.producershigh.count(),ByChange.producershigh.mean(),ByChange.producersall.count(),ByChange.producersall.mean()],axis=1)
  #means.columns = ['count of ref high','producers high', 'count of all ref','producers all']
  (t1,reftype,maptype,syst,lb) = file.split('_')
  nochange=df[df['changetype']=="No change"]
  nochange[nochange['usersall']==0].to_csv("results/nochange_"+reftype+"_"+maptype+".csv")
  display(HTML(means.to_html()))

print("Users for change classes means timing is different between dections and S1 is detecting\nchange when we don't see it in the reference" \
"\nThese tables are more helpful for evaluating omission in the various categories. Also, users for no change shows greater commissions.\n")
print("VEGETATION LOSS")
findAvPerfomancePerChangeType("results/pointmatrix_VLsub_firstprovconf_DIST-S1_lookback30.csv")
#findAvPerfomancePerChangeType("results/pointmatrix_VLsub_conf_DIST-S1_lookback30.csv")
#findAvPerfomancePerChangeType("results/pointmatrix_VLsub_firstprovconf_DIST-HLS_lookback30.csv")
print("ALL CHANGE, all map classes")
findAvPerfomancePerChangeType("results/pointmatrix_ALLsub_firstprovconf_DIST-S1_lookback30.csv")
print("ALL CHANGE, confirmed")
findAvPerfomancePerChangeType("results/pointmatrix_ALLsub_conf_DIST-S1_lookback30.csv")


Users for change classes means timing is different between dections and S1 is detecting
change when we don't see it in the reference
These tables are more helpful for evaluating omission in the various categories. Also, users for no change shows greater commissions.

VEGETATION LOSS


Unnamed: 0_level_0,count of map high change,users high,count of all mapped change,users all,count of ref high,producers high,count of all ref,producers all
changetype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Agriculture expansion,3,0.333333,4,0.375,0,,5,0.038715
Built-up expansion,10,0.7,33,0.688405,16,0.193987,39,0.21216
Clear cut,55,0.882305,84,0.842672,44,0.703815,77,0.603217
Crop cycle change,81,0.550203,201,0.411745,88,0.456883,198,0.417111
Drought,0,,1,0.5,0,,1,0.5
Fire (non-conversion),105,0.849922,291,0.667178,94,0.416203,289,0.398458
Lost water,0,,1,0.769231,0,,2,0.5
Mining,1,0.714286,2,0.815789,0,,2,0.616667
Natural browning,6,0.555111,39,0.410304,2,0.015625,47,0.194399
Natural browning and greening,5,0.454054,15,0.394847,2,0.357143,17,0.227022


ALL CHANGE, all map classes


Unnamed: 0_level_0,count of map high change,users high,count of all mapped change,users all,count of ref high,producers high,count of all ref,producers all
changetype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Agriculture expansion,4,0.5,5,0.531294,3,0.5,6,0.23021
Built-up expansion,16,0.759352,40,0.802592,20,0.270536,45,0.273288
Clear cut,57,0.886434,85,0.860993,44,0.703815,79,0.611418
Crop cycle change,92,0.576878,233,0.47576,113,0.437306,235,0.387235
Drought,0,,1,0.5,0,,1,0.5
Fire (non-conversion),106,0.854034,293,0.673221,97,0.420856,291,0.388616
Lost water,4,0.482143,6,0.624512,3,0.780556,5,0.6593
Mining,1,0.714286,2,0.815789,0,,2,0.616667
Natural browning,6,0.628028,39,0.417052,2,0.015625,47,0.198006
Natural browning and greening,5,0.556757,16,0.545606,2,0.333333,19,0.164325


ALL CHANGE, confirmed


Unnamed: 0_level_0,count of map high change,users high,count of all mapped change,users all,count of ref high,producers high,count of all ref,producers all
changetype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Agriculture expansion,1,1.0,1,1.0,2,0.0,5,0.0
Built-up expansion,12,0.761208,13,0.828944,20,0.142078,45,0.157903
Clear cut,52,0.910127,57,0.916044,41,0.570538,76,0.466336
Crop cycle change,58,0.652809,94,0.671478,110,0.256723,233,0.229615
Drought,0,,0,,0,,1,0.0
Fire (non-conversion),83,0.86702,120,0.854446,91,0.197302,276,0.198266
Lost water,3,0.809524,5,0.8,2,0.26087,4,0.3575
Mining,1,1.0,1,1.0,0,,1,0.0
Natural browning,3,0.410778,6,0.493481,2,0.0,47,0.066001
Natural browning and greening,2,0.388889,3,0.485714,2,0.25,19,0.051783


#### Calculate accuracies for all blocks

Accuracies including first, provisional, and confirmed map classes for high magnitude change

In [20]:
writeAllAccuarcies(mapclasstype ="firstprovconf",changeintensity="high",printdf=True)


                measure        SE  nh      Nh         yhat          xhat
waternew       0.410434  0.250169   8    1272  3007.368154   7327.283619
treelosswet    0.746203  0.174242  10    9696  6653.484036   8916.448155
builtnewalert  0.484645  0.121261  11  120496  4376.849626   9031.048526
fire           0.331440  0.327758   5    3188  4229.849387  12762.030031
treelossTF     0.658640  0.083008   9   63161  1359.849157   2064.633183
cropnew        0.153934  0.105936   8   75912   394.084318   2560.092482
wetshort       0.647071  0.059425   3    9477  4755.107052   7348.660147
oldcrop_short  0.554187  0.299285   6   65470  5455.094828   9843.420777
gen            0.000111  0.000098   6  215057     1.026483   9269.846247
other          0.253902  0.045517   8  385127   321.348249   1265.636641
Global firstprovconf VLsub users 26.37 ± 4.99
                measure        SE  nh      Nh         yhat          xhat
waternew       0.863895  0.065428   8    1272   493.910889    571.725931
treel

{'firstprovconf_VLsub': ['overall', 0.9531335018007353, 0.010809002201581413],
 'firstprovconf_ALLsub': ['overall', 0.9514192248858216, 0.010759446132652967]}

In [21]:
writeAllAccuarcies(mapclasstype ="firstprovconf",changeintensity="all")


Global firstprovconf VLsub users 18.95 ± 4.09
Global firstprovconf VLsub producers 49.80 ± 6.90
Global firstprovconf VLsub overall 86.28 ± 1.83
results/blockaccuracy_VLsub_all_firstprovconf_DIST-S1.csv 67.57
Global firstprovconf ALLsub users 36.06 ± 5.83
Global firstprovconf ALLsub producers 36.51 ± 2.91
Global firstprovconf ALLsub overall 82.62 ± 2.28
results/blockaccuracy_ALLsub_all_firstprovconf_DIST-S1.csv 62.16


{'firstprovconf_VLsub': ['overall', 0.8628170218112726, 0.0182801666819058],
 'firstprovconf_ALLsub': ['overall', 0.8262181123140991, 0.022785256800554037]}

Accuracies including provisional and confirmed map classes for high magnitude change

In [22]:
writeAllAccuarcies(mapclasstype ="provconf",changeintensity="high")

Global provconf VLsub users 26.28 ± 5.02
Global provconf VLsub producers 57.50 ± 5.37
Global provconf VLsub overall 95.15 ± 1.17
results/blockaccuracy_VLsub_high_provconf_DIST-S1.csv 91.89
PASSED requirement
Global provconf ALLsub users 45.21 ± 5.33
Global provconf ALLsub producers 56.56 ± 3.60
Global provconf ALLsub overall 94.99 ± 1.16
results/blockaccuracy_ALLsub_high_provconf_DIST-S1.csv 91.89
PASSED requirement


{'provconf_VLsub': ['overall', 0.9514585026939054, 0.011664801983304088],
 'provconf_ALLsub': ['overall', 0.9499400784128197, 0.011587710723376693]}

Accuracies including only confirmed map classes for high magnitude change

In [23]:
writeAllAccuarcies(mapclasstype ="conf",changeintensity="high")

Global conf VLsub users 33.79 ± 6.37
Global conf VLsub producers 51.00 ± 5.55
Global conf VLsub overall 96.16 ± 0.99
results/blockaccuracy_VLsub_high_conf_DIST-S1.csv 91.89
PASSED requirement
Global conf ALLsub users 59.93 ± 6.76
Global conf ALLsub producers 51.08 ± 3.73
Global conf ALLsub overall 96.07 ± 1.00
results/blockaccuracy_ALLsub_high_conf_DIST-S1.csv 91.89
PASSED requirement


{'conf_VLsub': ['overall', 0.9616317231400994, 0.009924716634846831],
 'conf_ALLsub': ['overall', 0.9607000615480705, 0.009958704320175272]}

### Accuracies for DIST-HLS

Accuracies including first, provisional, and confirmed map classes for high magnitude change

In [24]:
writeAllAccuarcies(mapclasstype ="firstprovconf",changeintensity="high",systemDIST="DIST-HLS")

  measure = df['yu'].sum()/df['xu'].sum() #* 100 # statistics.mean(df['measure'])
  SE = math.sqrt((1/(Nh*xhat))**2 * subvariance) #* 100


Global firstprovconf VLsub users 89.33 ± 4.77
Global firstprovconf VLsub producers 92.54 ± 1.07
Global firstprovconf VLsub overall 99.24 ± 0.12
results/blockaccuracy_VLsub_high_firstprovconf_DIST-HLS.csv 97.33
PASSED requirement


  measure = df['yu'].sum()/df['xu'].sum() #* 100 # statistics.mean(df['measure'])
  SE = math.sqrt((1/(Nh*xhat))**2 * subvariance) #* 100


Global firstprovconf ALLsub users 90.09 ± 4.78
Global firstprovconf ALLsub producers 60.04 ± 5.71
Global firstprovconf ALLsub overall 98.61 ± 0.28
results/blockaccuracy_ALLsub_high_firstprovconf_DIST-HLS.csv 97.33
PASSED requirement


{'firstprovconf_VLsub': ['overall', 0.992417955940199, 0.0012173007875830484],
 'firstprovconf_ALLsub': ['overall', 0.9861212843975699, 0.002838654623027613]}

Accuracies including provisional and confirmed map classes for high magnitude change

In [25]:
writeAllAccuarcies(mapclasstype ="provconf",changeintensity="high",systemDIST="DIST-HLS")

  measure = df['yu'].sum()/df['xu'].sum() #* 100 # statistics.mean(df['measure'])
  SE = math.sqrt((1/(Nh*xhat))**2 * subvariance) #* 100


Global provconf VLsub users 90.52 ± 4.54
Global provconf VLsub producers 91.92 ± 1.19
Global provconf VLsub overall 99.26 ± 0.12
results/blockaccuracy_VLsub_high_provconf_DIST-HLS.csv 97.33
PASSED requirement


  measure = df['yu'].sum()/df['xu'].sum() #* 100 # statistics.mean(df['measure'])
  SE = math.sqrt((1/(Nh*xhat))**2 * subvariance) #* 100


Global provconf ALLsub users 91.27 ± 4.54
Global provconf ALLsub producers 58.86 ± 5.98
Global provconf ALLsub overall 98.62 ± 0.29
results/blockaccuracy_ALLsub_high_provconf_DIST-HLS.csv 97.33
PASSED requirement


{'provconf_VLsub': ['overall', 0.9926010107170249, 0.0012193922028050416],
 'provconf_ALLsub': ['overall', 0.9862042545415673, 0.0028752279872623176]}

Accuracies including only confirmed map classes for high magnitude change

In [26]:
writeAllAccuarcies(mapclasstype ="conf",changeintensity="high",systemDIST="DIST-HLS")

  measure = df['yu'].sum()/df['xu'].sum() #* 100 # statistics.mean(df['measure'])
  SE = math.sqrt((1/(Nh*xhat))**2 * subvariance) #* 100


Global conf VLsub users 91.46 ± 4.22
Global conf VLsub producers 90.64 ± 1.44
Global conf VLsub overall 99.29 ± 0.12
results/blockaccuracy_VLsub_high_conf_DIST-HLS.csv 97.33
PASSED requirement


  measure = df['yu'].sum()/df['xu'].sum() #* 100 # statistics.mean(df['measure'])
  SE = math.sqrt((1/(Nh*xhat))**2 * subvariance) #* 100


Global conf ALLsub users 92.21 ± 4.23
Global conf ALLsub producers 56.84 ± 6.34
Global conf ALLsub overall 98.64 ± 0.29
results/blockaccuracy_ALLsub_high_conf_DIST-HLS.csv 97.33
PASSED requirement


{'conf_VLsub': ['overall', 0.9928613922179844, 0.001200008671965374],
 'conf_ALLsub': ['overall', 0.9864082917716566, 0.0028809615543790392]}

In [27]:
writeAllAccuarcies(mapclasstype ="conf",changeintensity="high",systemDIST="DIST-HLS-GEN")

Global conf VLsub users 28.03 ± 4.19
Global conf VLsub producers 58.03 ± 4.63
Global conf VLsub overall 98.59 ± 0.40
results/blockaccuracy_VLsub_high_conf_DIST-HLS-GEN.csv 97.33
PASSED requirement
Global conf ALLsub users 76.79 ± 5.81
Global conf ALLsub producers 54.27 ± 3.99
Global conf ALLsub overall 98.06 ± 0.44
results/blockaccuracy_ALLsub_high_conf_DIST-HLS-GEN.csv 97.33
PASSED requirement


{'conf_VLsub': ['overall', 0.985911461650762, 0.004014076297033938],
 'conf_ALLsub': ['overall', 0.9806101616311481, 0.004391001903659478]}

## DIST-ALERT-HLS functions

In [21]:
#DIST-ALERT-HLS
def getDISTALERTStatus_vI(block,skipNodata=False):
    #print(skipNodata)
    mapalert = {}
    IDlist = [ID for ID in allIDs if str(block) in ID]

    for ID in IDlist:
        mapalert[ID] = [255 for i in range(0,367)]
        #print(ID,end=',')
        with open(mapsourceHLS+'/'+ID+'_DIST-ALERT_'+ANNname+'.csv','r') as mapfile:
            lines = mapfile.readlines()
            header = lines[0]
            maplist = lines[1:]
            for line in maplist:
                try:
                    (granuleID,SensingTime,ProductionTime,VEGDISTSTATUS,VEGANOM,VEGIND,VEGHIST,VEGANOMMAX,VEGDISTCONF,VEGDISTDATE,VEGDISTCOUNT,VEGDISTDUR,VEGLASTDATE,GENDISTSTATUS,GENANOM,GENANOMMAX,GENDISTCONF,GENDISTDATE,GENDISTCOUNT,GENDISTDUR,GENLASTDATE)= line.strip().split(',')
                    day = dayDiff("20240101",SensingTime[0:8])

                    if not (skipNodata and VEGANOM!='NA'):
                        if (int(VEGDISTSTATUS)==7 or int(VEGDISTSTATUS)==8) and VEGDISTDATE[0:4]=='2023': 
                            mapalert[ID][day] = 0
                        else:
                            mapalert[ID][day] = int(VEGDISTSTATUS)
                except:
                #    print(traceback)
                    print(ID,day,line)

    return mapalert


In [22]:
def getDISTALERTStatus_vI_GEN(block,skipNodata=False):
    #print(skipNodata)
    mapalert = {}
    IDlist = [ID for ID in allIDs if str(block) in ID]

    for ID in IDlist:
        mapalert[ID] = [255 for i in range(0,367)]
        #print(ID,end=',')
        with open(mapsourceHLS+'/'+ID+'_DIST-ALERT_'+ANNname+'.csv','r') as mapfile:
            lines = mapfile.readlines()
            header = lines[0]
            maplist = lines[1:]
            for line in maplist:
                try:
                    (granuleID,SensingTime,ProductionTime,VEGDISTSTATUS,VEGANOM,VEGIND,VEGHIST,VEGANOMMAX,VEGDISTCONF,VEGDISTDATE,VEGDISTCOUNT,VEGDISTDUR,VEGLASTDATE,GENDISTSTATUS,GENANOM,GENANOMMAX,GENDISTCONF,GENDISTDATE,GENDISTCOUNT,GENDISTDUR,GENLASTDATE)= line.strip().split(',')
                    day = dayDiff("20240101",SensingTime[0:8])

                    if not (skipNodata and int(GENANOM)==255):
                        if (int(GENDISTSTATUS)==7 or int(GENDISTSTATUS)==8) and GENDISTDATE[0:4]=='2023': 
                            mapalert[ID][day] = 0
                        else:
                            mapalert[ID][day] = int(GENDISTSTATUS)
                except:
                #    print(traceback)
                    print(ID,line)

    return mapalert