
# ╔═══════════════════════════════════════════════════╗
# ║      🌿  **GAP-FILLING USING REddyProc**                              ║
# ║      📘  **R-Based Script (Runtime type)**                            ║
# ╚════════════════════════════════════════════════════╝



# ============================================================
# 📦 **1. Importing Packages**
# ============================================================

In [11]:
install.packages("REddyProc")   # Install REddyProc package for flux data processing
library(REddyProc)              # Load REddyProc
library(lubridate)              # Handle date-time data
library(tidyverse)              # Data manipulation and visualization
library(dplyr)                  # Data wrangling
library(bigleaf)                # Canopy and flux calculations
library(data.table)             # Efficient data handling
library(ggplot2)                # Plotting and visualization


Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



# ============================================================
# 🌿 **2. Data Directory and Loading**
# ============================================================

In [13]:
# Directory setting
setwd("/content")

# site parameters
siteLat <- 53.15174484
siteLon <- -7.967936516
siteTz <- 0 # UTC+2
siteName <- "All Saints Bog"


datafile = "AsiaFlux2025.txt"     # add the extension (e.g., .txt)
resdir <- "/content"
resfile = "Gap-filled-AsiaFlux2025"


# Load input file
#file <- file.path(datadir, datafile)
EddyData <- fLoadTXTIntoDataframe(datafile)

Loaded file AsiaFlux2025.txt with the following variables (units):

 *** Year(--) Hour(--) DoY(--) NEE(--) LE(--) H(--) Ustar(--) qc_co2_flux(--) qc_h2o_flux(--) Net(--) Tair(--) rH(--) Rg(--) Tsoil(--) VPD(--)

Number of '-9999' convertered to NA: 35481



# ============================================================
# ⚙️ **3. Data Processing**
# ============================================================


In [14]:
# add POSIXtime column if DateTime
EddyData <- fConvertTimeToPosix(EddyData, "YDH", Year="Year", Day="DoY", Hour="Hour")

# REddyProc does not accept negative radiation values, convert to zero
# (missing data values -9999 were already converted to NA by fLoadTXTIntoDataframe)
EddyData$Rg[EddyData$Rg<0] <- 0

# calculate VPD
#EddyData$VPD <- fCalcVPDfromRHandTair(EddyData$rH, EddyData$Tair)

# calculate mean of Tair and Tsoil as Tmean to get more meaningful binning variable or respiration driver than Tair
EddyData$Tmean <- rowMeans(EddyData[,c("Tair","Tsoil")], na.rm=TRUE)

# removal of outliers and obvious peaks in the data
#EddyData$NEE[abs(EddyData$NEE - mean(EddyData$NEE, na.rm=T)) > 3 * sd(EddyData$NEE, na.rm=T)] <- NA
#EddyData$LE[abs(EddyData$LE - mean(EddyData$LE, na.rm=T)) > 3 * sd(EddyData$LE, na.rm=T)] <- NA
#EddyData$H[abs(EddyData$H - mean(EddyData$H, na.rm=T)) > 3 * sd(EddyData$H, na.rm=T)] <- NA

# removal of data with high QC flag
#EddyData$NEE[EddyData$qc_co2_flux > 1.0] <- NA
#EddyData$LE[EddyData$qc_co2_flux > 1.0] <- NA

Converted time format 'YDH' to POSIX with column name 'DateTime'.



# ============================================================
# 🌿 **4 REddyProc Application**
# ============================================================








📂 **Processed and Clean EC Data**
      
      ---> ⚙️ REddyProc Object Initiation
      
                    ---> 🌬️ U* Star Filtering
      
                    ---> 🧩 Gap-Filling
      
                  ---> 🌿 Flux Partitioning
      
      ---> 📊 Outputs (NEE_f, GPP, Reco)


# 🔹 **4.1 Initialize REddyProc Object**

In [15]:
# initialize new REddyProc object with Hyytiälä data and location
EddyObj <- sEddyProc$new(ID=siteName, Data=EddyData, ColNames=c("NEE","Rg","Tair","Tsoil","Tmean","VPD","Ustar"),
                         LatDeg=siteLat, LongDeg=siteLon, TimeZoneHour=siteTz)

“sEddyProc.initialize:::fCheckHHTimeSeries::: Data not provided in full days (multiple of daily time step). One day only has 6 (half-)hours!”
“sEddyProc.initialize:::fCheckHHTimeSeries::: Time stamp of first data row is not at the end of the first half-hour: 21:00 instead of 00:30!”
“sEddyProc.initialize:::fCheckHHTimeSeries::: The last time stamp is not midnight: 0:00!”
New sEddyProc class for site 'All Saints Bog'



# 🔹 **4.2 uStar Threshold Estimation and NEE Gap-Filling Preparation**

In [16]:
# u* threshold

# REddyProc estimates seasonal u* thresholds,
# modify season boundaries to pool all data (May-Aug) into one season (default seasons are Dec-Feb, Mar-May, Jun-Aug, Sep-Nov)
seasons <- usCreateSeasonFactorMonth(EddyData$DateTime-15*60, startMonth = c(1,5,9))

# estimate u* threshold distribution...
uStarThAll <- EddyObj$sEstUstarThresholdDistribution(seasonFactor=seasons)

# ... or just mean values
uStarTh <- EddyObj$sEstUstarThold(seasonFactor=seasons)

# estimate u* threshold with Tmean instead of Tair
uStarTh_Tmean <- EddyObj$sEstUstarThold(seasonFactor=seasons, TempColName="Tmean")

# extract one value...
uStarThSingle <- uStarTh$uStar[uStarTh$aggregationMode=="single"]
uStarThSingle_Tmean <- uStarTh_Tmean$uStar[uStarTh$aggregationMode=="single"]

# ...or seasonal value(s) as data frame
uStarThSeasonal <- usGetSeasonalSeasonUStarMap(uStarTh)

# plot NEE vs uStar for the summer season (see season names by looking at levels(uStarTh$season))
# assigning file name is not possible so the second command overwrites the first plotss
season <- "2021001"
uStarThSeasonal
EddyObj

EddyObj$sPlotNEEVersusUStarForSeason(season=season, dir=resdir)
#EddyObj$sPlotNEEVersusUStarForSeason(season=season)
EddyObj$sPlotNEEVersusUStarForSeason(season=season, TempColName="Tmean", dir=resdir)
#EddyObj$sPlotNEEVersusUStarForSeason(season=season, TempColName="Tmean")
# If season filtering does not work, assign input data of the desired season:
# plotData <- cbind(EddyObj$sDATA, EddyObj$sTEMP, EddyObj$sUSTAR_DETAILS$bins[,c("uStarBin","tempBin")])
# plotData <- subset(plotData, season==season)
# EddyObj$sPlotNEEVersusUStarForSeason(season=season, dir=resdir, data=plotData)
# EddyObj$sPlotNEEVersusUStarForSeason(season=season, TempColName="Tmean", dir=resdir, data=plotData)



Estimated UStar distribution of:
        uStar         5%       50%       95%
1 0.09502202 0.07546469 0.1180055 0.1641311 
by using  200 bootstrap samples and controls:
                       taClasses                    UstarClasses 
                              7                              20 
                          swThr            minRecordsWithinTemp 
                             10                             100 
         minRecordsWithinSeason            minRecordsWithinYear 
                            160                            3000 
isUsingOneBigSeasonOnFewRecords 
                              1 

Estimated UStar threshold of:  0.095 by using controls:
                       taClasses                    UstarClasses 
                              7                              20 
                          swThr            minRecordsWithinTemp 
                             10                             100 
         minRecordsWithinSeason            minRecordsW

Unnamed: 0_level_0,season,uStar
Unnamed: 0_level_1,<fct>,<dbl>
3,2021001,0.09502202
4,2021005,0.0751374
5,2021009,0.07118518


Reference class object of class "sEddyProc"
Field "sID":
[1] "All Saints Bog"
Field "sDATA":
                sDateTime         NEE          Rg   Tair  Tsoil   Tmean
1     2021-03-04 20:45:00 -1.78883000 0.00000e+00 20.600 20.900 20.7500
2     2021-03-04 21:15:00 -1.07742614 0.00000e+00 20.560 20.900 20.7300
3     2021-03-04 21:45:00  0.30305593 0.00000e+00 20.531 20.893 20.7120
4     2021-03-04 22:15:00 -0.73187004 0.00000e+00 20.527 20.900 20.7135
5     2021-03-04 22:45:00          NA 0.00000e+00 20.501 20.900 20.7005
6     2021-03-04 23:15:00  0.24054813 0.00000e+00 20.474 20.900 20.6870
7     2021-03-04 23:45:00  0.51762084 0.00000e+00 20.445 20.807 20.6260
8     2021-03-05 00:15:00          NA          NA     NA     NA     NaN
9     2021-03-05 00:45:00          NA          NA     NA     NA     NaN
10    2021-03-05 01:15:00          NA          NA     NA     NA     NaN
11    2021-03-05 01:45:00          NA          NA     NA     NA     NaN
12    2021-03-05 02:15:00          NA      

Saved plot to: /content/All Saints Bog_2021_NEEvsUStar_2021001_none.pdf

Saved plot to: /content/All Saints Bog_2021_NEEvsUStar_2021001_none.pdf



# 🔹 **4.3 Gap-Filling CO₂ Fluxes Using uStar Thresholds**

In [17]:
# gapfilling of CO2 fluxes with single and seasonal u* thresholds

# run gapfilling with single u* value,
# override default suffix "uStar" that is added to the result column names to distinguish the results of different gapfilling runs
EddyObj$sMDSGapFillAfterUstar("NEE", uStarTh=uStarThSingle, uStarSuffix="single_ust")

# run gapfilling using mean of Tair and Tsoil as third condition variable
EddyObj$sMDSGapFillAfterUstar("NEE", V3="Tmean", uStarTh=uStarThSingle_Tmean, uStarSuffix="single_ust_Tmean")

# run gapfilling using Tsoil as third condition variable
# EddyObj$sMDSGapFillAfterUstar("NEE", V3="Tsoil", uStarTh=uStarThSingle, uStarSuffix="single_ust_Tsoil")

# run gapfilling with seasonally varying ustar
# note that determining threshold is uncertain when fluxes are small, often it's better to use annual or single threshold
EddyObj$sMDSGapFillAfterUstar("NEE", uStarTh=uStarThSeasonal, uStarSuffix="seasonal_ust")

# assign results into dataframe (you can repeat this in later phases,
# results accumulate in EddyObj$sTEMP if different scenarios are named with unique suffixes)
resGapfill <- EddyObj$sExportResults()

Ustar filtering (u * Th_1 = 0.09502202), marked 25% of the data as gap

Initialized variable 'NEE' with 6739 real gaps for gap filling of all 14502 values (to estimate uncertainties).

Full MDS algorithm for gap filling of 'NEE.Ustar_single_ust_fqc_0' with LUT(Rg, VPD, Tair) and MDC.

Look up table with window size of 7 days with Rg VPD Tair

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
11175

Look up table with window size of 14 days with Rg VPD Tair

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
65

Look up table with window size of 7 days with Rg

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
457

Mean diurnal course with window size of 0 days: .

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.


# 🔹 **4.4 Partitioning CO₂ Fluxes into GPP and Reco**

In [18]:
# CO2 flux partitioning with two driving temperatures of Reco and sMRFluxPartition method,
# sGLFluxPartition and sTKFluxPartition methods are also available.

# fill possible missing Rg, T and VPD before partitioning
EddyObj$sMDSGapFill("Rg", FillAll=FALSE)
EddyObj$sMDSGapFill("Tair", FillAll=FALSE)
EddyObj$sMDSGapFill("Tsoil", FillAll=FALSE)
EddyObj$sMDSGapFill("Tmean", FillAll=FALSE)
EddyObj$sMDSGapFill("VPD", FillAll=FALSE)

# run partitioning using default Reco temperature (Tair),
# indicate NEE suffix (if other than plain "_f") for REddyProc to find corresponding gapfilled NEE
EddyObj$sMRFluxPartition(suffix="single_ust")

# partitioning using the mean of filled Tair and Tsoil as Reco driver, reduce required temperature range to 3 degrees
EddyObj$sMRFluxPartition(suffix="single_ust_Tmean", TempVar="Tmean_f", parsE0Regression=list(TempRange=3))

# assign results into temporary variable (results in EddyObj will be overwritten each time if not given unique suffix)
resPart <- EddyObj$sExportResults()

# try partitioning using Tsoil as Reco driver, it probably fails as the range of variability in Tsoil is too small
# EddyObj$sMRFluxPartition(suffix="single_ust_Tsoil", TempVar="Tsoil_f", parsE0Regression=list(TempRange=2))

Initialized variable 'Rg' with 2804 real gaps for gap filling.

Restriced MDS algorithm for gap filling of 'Rg' with no meteo conditions and hence only MDC.

Mean diurnal course with window size of 0 days: .

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
1118

Mean diurnal course with window size of 1 days: .

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
996

Mean diurnal course with window size of 2 days: .

.
.
.
.
.
.
367

Mean diurnal course with window size of 7 days: .

.
.
.
323

Finished gap filling of 'Rg' in 0 seconds. Artificial gaps filled: 14502, real gaps filled: 2804, unfilled (long) gaps: 0.

Initialized variable 'Tair' with 2158 real gaps for gap filling.

Limited MDS algorithm for gap filling of 'Tair' with LUT(Rg only) and MDC.

Look up table with window size of 7 days with Rg

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
164

Mean diurnal course with window size of 0 days: .

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
769

Mean diurnal course with window size of 1 days: .

.
.

# ============================================================
# 💾 **5 Data Saving**
# ============================================================

In [19]:
# add gapfilled NEE to the final data frame
EddyDataFilled <- cbind(EddyData, resGapfill[,
                                             c("NEE_single_ust_f","NEE_single_ust_fqc","NEE_seasonal_ust_f","NEE_seasonal_ust_fqc",
                                               "NEE_single_ust_Tmean_f","NEE_single_ust_Tmean_fqc")])



# add component fluxes with air T as Reco driver
tmpData <- resPart[,c("Reco_single_ust","GPP_single_ust_f","GPP_single_ust_fqc")]
names(tmpData) <- c("Reco_Tair","GPP_Tair","GPP_qc_Tair")
EddyDataFilled <- cbind(EddyDataFilled, tmpData)

# add component fluxes with air & soil mean T as Reco driver
tmpData <- resPart[,c("Reco_single_ust_Tmean","GPP_single_ust_Tmean_f","GPP_single_ust_Tmean_fqc")]
names(tmpData) <- c("Reco_Tmean","GPP_Tmean","GPP_qc_Tmean")
EddyDataFilled <- cbind(EddyDataFilled, tmpData)

# save results into tab-separated text file using REddyProc's write function
fWriteDataframeToFile(EddyDataFilled, resfile, Dir=resdir)
#fWriteDataframeToFile(EddyDataFilled, resfile)

Number of NA convertered to '-9999': 37342

Wrote tab separated textfile: /content/Gap-filled-AsiaFlux2025



# ============================================================
# 📊 **5. Plotting Gap-Filled Fluxes and Diurnal Patterns**
# ============================================================

In [20]:
# plot stuff, note inconsistent case in dir/Dir argument

EddyObj$sPlotHHFluxes("NEE_single_ust_f", Dir=resdir)
EddyObj$sPlotDiurnalCycle("GPP_single_ust_f", Dir=resdir)
EddyObj$sPlotDiurnalCycle("GPP_single_ust_Tmean_f", Dir=resdir)

#EddyObj$sPlotHHFluxes("NEE_single_ust_f")
#EddyObj$sPlotDiurnalCycle("GPP_single_ust_f")
#EddyObj$sPlotDiurnalCycle("GPP_single_ust_Tmean_f")

Saved plot to: /content/All Saints Bog_2021_Flux_NEE_single_ust_f.pdf

“data length [14502] is not a sub-multiple or multiple of the number of rows [48]”
“data length [14502] is not a sub-multiple or multiple of the number of rows [48]”
“data length [14502] is not a sub-multiple or multiple of the number of rows [303]”
“.sPlotDiurnalCycleM::: No data available for month: January!”
“data length [14502] is not a sub-multiple or multiple of the number of rows [48]”
“data length [14502] is not a sub-multiple or multiple of the number of rows [48]”
“data length [14502] is not a sub-multiple or multiple of the number of rows [303]”
“.sPlotDiurnalCycleM::: No data available for month: February!”
“data length [14502] is not a sub-multiple or multiple of the number of rows [48]”
“data length [14502] is not a sub-multiple or multiple of the number of rows [48]”
“data length [14502] is not a sub-multiple or multiple of the number of rows [303]”
“data length [14502] is not a sub-multiple or multip