In [None]:
#############################################################################################

##### 2-STAGE ESTIMATOR FOR LAO 3RD NFI CYCLE #####
##### DATA PROVIDED BY LAO Department of Forestry, Ministry of Agriculture and Forestry (DOF; https://nfms.maf.gov.la/)
##### DATA IS NOT PUBLICLY AVAILABLE

##### Code prepared by Neha Hunka, University of Maryland
##### Manuscript available at doi: 10.1088/1748-9326/ad0b60 (https://iopscience.iop.org/article/10.1088/1748-9326/ad0b60)
##### Supplementary documentation at https://content.cld.iop.org/journals/1748-9326/18/12/124042/revision2/erlad0b60supp1.pdf?Expires=1720847657&Signature=aV-X~Cs2M7LMHMaVrc8F16uaxib1w8vgRZWNBotBWJaxJ03dHp~I85GONti6QQ-7dgvgQXN4gusDhNj2AfNGX42FIp3l2WX0n~5kup0qcbe9jvNAp5Ce0n4uvvC~yHvUkp6UeuS6YuZiqPULthGILQwEz5w6hyo4ybY3vBxhMiqj-m5NfapyLaR9IgdGVQvOtdVIR5KSptzjZjuazHEHZRWMpRTj1DzqEUgUwNNpNrnV18~F8tjif88nFK8edK-bFyP-kamBfHItj0~9pztfVPmGikNMKwcM0bMYGTuQNd76IxCDUgzGfdYvrkq64o8wz3SDjiWJ1icmrAMA78yM6Q__&Key-Pair-Id=KL1D8TIY3N7T8

#############################################################################################
### INSTALL PACKAGES
library(dplyr)

### READ NFI DATA
DATA <- read.csv("C:/Users/nhunka/Desktop/UMD/Biomass_Harmonization/Data/LAOS_NFI/RCode_NFI3_TreePlots_Cstock_StrataArea_DesignStrata_UMD.csv")
#####  FEW NOTES ABOUT THE DATAFRAME "DATA" READ FROM THE CSV
## Ftype: Forest type identified by the team in the field at the sub-plot (or tree-plot) location.
## Assigned_Ftype:  Forest type assigned to the plot based on the Ftype of the sub-plots.
## Design_SSU_Ftype : Pre-sampling forest-type stratum for each SSU based on the country's forest strata map (extracted using QGIS)
## Design_PSU_Ftype : Pre-sampling forest-type stratum for each PSU based on forest strata of the anchor plot (here, I chose "sub_plotA" as the anchor plot, "sub_plotB" if A was not available...and so on)(extracted using QGIS).
## Design_PSU_StrataArea_MAX : The forest stratum that has the largest area (i.e. in m2) in each PSU (extracted using QGIS).

##### DATA FILTERS, IF NEEDED
DATA <-  DATA[DATA$lc0 != "nf",] # Only plots in forest strata are considered
DATA <-  DATA[DATA$CstockAGB != 0,] # Only plots with AGB > 0 are considered

##### CORRECTING THE RESULTS OF THE ALLOMETRIC MODEL 
## NOTE, when the ln-ln allometric model is applied, a correction factor is needed when the plot-level AGBD is derived AFTER back-transformation
## Two correction factors are provided below, but the allometric models must be checked and refit correctly. This script does not perform model refitting.
## A separate script is available for this step, if required. 

EF_correction_factor = 1.045282
DD_correction_factor = 1.035969
DATA$AGB_Mgha[DATA$Ftype == "EF"] = DATA$AGB_Mgha[DATA$Ftype == "EF"]*EF_correction_factor
DATA$AGB_Mgha[DATA$Ftype == "DD"] = DATA$AGB_Mgha[DATA$Ftype == "DD"]*DD_correction_factor

##### BEGIN ANALYSIS 
## It is important to note that for strata for which the stratification map has greater inaccuracies, 
## i.e. mismatches between field observations and map stratum classifications, the estimates of the 
## strata AGBD means will not correspond to a single forest type but rather to the mix of strata observed 
## by the field crews for plots assigned by the map to strata. For this reason, it is important to note 
## that the estimates obtained in this study differ from those used nationally by the DOF for each forest type, 
## for example, for REDD+ assessments.

PSU_IDs <- unique(DATA$ID) #these are unique IDs of all PSUs
PSU_sampled <- as.data.frame(DATA %>% group_by(ID) %>% summarize (Design_PSU_Ftype=names(which.max(table(Design_PSU_Ftype))))) # Associate each PSU with a single stratum, based on the "Design_PSU_Ftype" column in DATA
UNQ_STRATA = c("EF", "MDF",  "DD",  "CF", "MCB")
y_hij = DATA$AGB_Mgha # value of AGB in each subplot estimated by Lao NFI
A_hij <- DATA[,29:33] # Columns EF_Area_m2, MDF_Area_m2, DD_Area_m2, CF_Area_m2, MCB_Area_m2, which is the area of each strata in each 3 km x 3 km PSU
A_hij["ID"] = DATA$ID
A_hi = aggregate(A_hij, list(A_hij$ID), FUN=mean, na.rm=TRUE)
a = 1256.64 #in units of m2, area of each 20-m radius plot

M_hi = A_hi/a # M_hi is the total number of SSU-size units within the i_th PSU within the h_th stratum
M_hi["ID"] = A_hi$ID
colnames(M_hi) = c("Random","EF_M_hi","MDF_M_hi","DD_M_hi","CF_M_hi","MCB_M_hi","ID")

#############################################################################################
# Now, we produce m_hi, that is how many of each strata were in the SSUs contained in each PSU
# For this, we create empty columns in an empty dataframe "DATA_PSU" and populate it with the number of strata

DATA_PSU <- data.frame(PSU_IDs) # Create a new PSU dataframe
for (h in UNQ_STRATA){ #for each stratum in the country
  newcol_name <- paste0(h,"_m_hi") #Create column names
  DATA_PSU[newcol_name] <- NA # Create empty columns
}
counter_PSU = 0 #set a counter for the following for loop

for (i in PSU_IDs) { #for each PSU
  counter_PSU = counter_PSU + 1
  counter_STRATA = 1
  for (h in UNQ_STRATA){ #for each stratum in the country
    counter_STRATA = counter_STRATA + 1
    DATA_subset = DATA[DATA$ID == i,] #subset data for one PSU at a time
    DATA_PSU[counter_PSU,counter_STRATA] = nrow(DATA_subset[DATA_subset$Design_SSU_Ftype == h,]) #find the count of SSUs in subset data
  }
}
DATA_PSU['TOTAL_n_SSU'] = rowSums(DATA_PSU[,2:ncol(DATA_PSU)])
NCOL <- ncol(DATA_PSU)
### The dataset DATA_PSU contains values of "m_hi", which is the number of SSUs within the ith PSU within the hth stratum

### Now, we produce y_bar_hi, which is the mean PSU AGBD, for m_hi samples
### We do this in two steps. First, we calculate the sum of all AGBDs in the relevant SSUs and then divide by m_hi. 
for (h in UNQ_STRATA){ #for each stratum in the country
  Design_PSU_Ftype_name <- paste0(h,"_y_sum_hi") #Create column names
  DATA_PSU[Design_PSU_Ftype_name] <- 0 # Create empty columns
}

counter_PSU = 0 #set a counter for the following for loop
for (i in PSU_IDs) { #for each PSU
  counter_PSU = counter_PSU + 1
  counter_STRATA = NCOL
  for (h in UNQ_STRATA){ #for each stratum in the country
    counter_STRATA = counter_STRATA + 1
    DATA_subset = DATA[DATA$ID == i,]
    DATA_PSU[counter_PSU,counter_STRATA] = sum(DATA_subset[DATA_subset$Design_SSU_Ftype == h,]$AGB_Mgha,na.rm=TRUE)
  }
}

# M_hi = M_hi[DATA_PSU$TOTAL_n_SSU == 4,]
# DATA_PSU = DATA_PSU[DATA_PSU$TOTAL_n_SSU == 4,]

DATA_PSU <- cbind(DATA_PSU,M_hi[2:6],PSU_sampled)
DATA_PSU[DATA_PSU==0] = NA

# # Now, we estimate y_bar_hi, 
y_bar_hi = DATA_PSU[8:12]/DATA_PSU[2:6]
colnames(y_bar_hi) = c("EF_y_bar_hi","MDF_y_bar_hi","DD_y_bar_hi","CF_y_bar_hi","MCB_y_bar_hi")
DATA_PSU <- cbind(DATA_PSU,y_bar_hi)

counter_STRATA = 0
for (h in UNQ_STRATA){ #for each stratum in the country
  counter_STRATA = counter_STRATA + 1
  DATA_PSU_h <- DATA_PSU[DATA_PSU$Design_PSU_Ftype == h,]
  print(nrow(DATA_PSU_h))
  mu_h <- (colSums(DATA_PSU_h[,20:24] * DATA_PSU_h[,13:17],na.rm=TRUE)/colSums(DATA_PSU_h[,13:17],na.rm=TRUE))
  print(paste0("Mean of strata ",h, " ", as.character(mu_h[counter_STRATA])))
}