diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 1f4174e..5dec52b 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -15,7 +15,8 @@ jobs:
matrix:
version:
- '1'
- - '^1.7.0-0'
+ #- '^1.6' # currently covered by '1'
+ - '^1.7.0-0'
os:
- ubuntu-latest
arch:
diff --git a/Esat_abs.svg b/Esat_abs.svg
deleted file mode 100644
index 8a08a99..0000000
--- a/Esat_abs.svg
+++ /dev/null
@@ -1,132 +0,0 @@
-
-
diff --git a/Project.toml b/Project.toml
index 992f8c5..ca8804b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,35 +4,41 @@ authors = ["Thomas Wutzler and contributors"]
version = "0.2.1-DEV"
[deps]
-TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53"
-Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
-Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+AstroLib = "c7932e45-9af1-51e7-9da9-f004cd3a462b"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
+Infiltrator = "5903a43b-9cc3-4c30-8d17-598619ec4e9b"
LabelledArrays = "2ee39098-c373-598a-b85f-a56591580800"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
-Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+PaddedViews = "5432bcbf-9aad-5242-b902-cca2824c8663"
Pipe = "b98c9c47-44ae-5843-9183-064241ee97a0"
-DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
-FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
-AstroLib = "c7932e45-9af1-51e7-9da9-f004cd3a462b"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
+TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53"
[compat]
-TimeZones = "1"
+AstroLib = "0.4.1"
+DataFrames = "1.2.2"
+FillArrays = "0.12.7"
+LabelledArrays = "1.6.5"
+Missings = "1.0.2"
+Optim = "1.4.1"
+PaddedViews = "0.5.10"
+Pipe = "1.3"
+RecursiveArrayTools = "2.20.0"
+StaticArrays = "1.2.13"
+StatsBase = "0.33.12"
Suppressor = "0.2"
-Optim = "1"
-LabelledArrays = "1"
-Missings = "1"
-StaticArrays = "1"
-julia = "1.6, 1.7"
-Pipe = "1"
-DataFrames = "1"
-RecursiveArrayTools = "2"
-FillArrays = "0.12"
-AstroLib = "0.4"
+TimeZones = "1.6.1"
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[targets]
test = ["Test"]
diff --git a/README.md b/README.md
index 23c0607..907046f 100644
--- a/README.md
+++ b/README.md
@@ -46,8 +46,8 @@ file an [issue](https://github.com/bgctw/Bigleaf.jl/issues) if you need a specif
At the current state we ported
- Meteorological variables
+- Evapotranspiration
+- Potential radiation
- Unit conversions
-
-Upcoming:
-- Computation of potential radiation
+- Filtering data
diff --git a/doc/Manifest.toml b/doc/Manifest.toml
deleted file mode 100644
index 19ab490..0000000
--- a/doc/Manifest.toml
+++ /dev/null
@@ -1,65 +0,0 @@
-# This file is machine-generated - editing it directly is not advised
-
-julia_version = "1.7.0-rc1"
-manifest_format = "2.0"
-
-[[deps.Base64]]
-uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
-
-[[deps.Formatting]]
-deps = ["Printf"]
-git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8"
-uuid = "59287772-0a20-5a39-b81b-1366585eb4c0"
-version = "0.4.2"
-
-[[deps.InteractiveUtils]]
-deps = ["Markdown"]
-uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
-
-[[deps.LaTeXStrings]]
-git-tree-sha1 = "c7f1c695e06c01b95a67f0cd1d34994f3e7db104"
-uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
-version = "1.2.1"
-
-[[deps.Latexify]]
-deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "Printf", "Requires"]
-git-tree-sha1 = "669315d963863322302137c4591ffce3cb5b8e68"
-uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316"
-version = "0.15.8"
-
-[[deps.MacroTools]]
-deps = ["Markdown", "Random"]
-git-tree-sha1 = "5a5bc6bf062f0f95e62d0fe0a2d99699fed82dd9"
-uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
-version = "0.5.8"
-
-[[deps.Markdown]]
-deps = ["Base64"]
-uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
-
-[[deps.Printf]]
-deps = ["Unicode"]
-uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-
-[[deps.Random]]
-deps = ["Serialization"]
-uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[[deps.Requires]]
-deps = ["UUIDs"]
-git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
-uuid = "ae029012-a4dd-5104-9daa-d747884805df"
-version = "1.1.3"
-
-[[deps.SHA]]
-uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-
-[[deps.Serialization]]
-uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-
-[[deps.UUIDs]]
-deps = ["Random", "SHA"]
-uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
-
-[[deps.Unicode]]
-uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
diff --git a/doc/Project.toml b/doc/Project.toml
deleted file mode 100644
index 28825c2..0000000
--- a/doc/Project.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[deps]
-Latexify = "23fbe1c1-3f47-55db-b15f-69d7ec21a316"
diff --git a/docs/make.jl b/docs/make.jl
index 33dcbda..f0e884e 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -26,9 +26,11 @@ makedocs(;
"Walkthrough" => "walkthrough.md",
hide("metorological_variables.md"),
hide("evapotranspiration.md"),
+ hide("surface_conductance.md"),
hide("global_radiation.md"),
hide("unit_conversions.md"),
hide("bigleaf_constants.md"),
+ hide("filter_data.md"),
"Index" => "autodocs.md",
],
)
diff --git a/docs/src/filter_data.md b/docs/src/filter_data.md
new file mode 100644
index 0000000..bcbeb4d
--- /dev/null
+++ b/docs/src/filter_data.md
@@ -0,0 +1,12 @@
+## Filtering data
+```@index
+Pages = ["filter_data.md",]
+```
+
+```@docs
+setinvalid_qualityflag!
+setinvalid_range!
+setinvalid_nongrowingseason!
+get_growingseason
+setinvalid_afterprecip!
+```
\ No newline at end of file
diff --git a/docs/src/index.md b/docs/src/index.md
index 2925b20..2949027 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -28,4 +28,9 @@ Pages = ["global_radiation.md",]
Pages = ["unit_conversions.md","bigleaf_constants.md",]
```
+## Filtering data
+```@index
+Pages = ["filter_data.md",]
+```
+
diff --git a/docs/src/surface_conductance.md b/docs/src/surface_conductance.md
new file mode 100644
index 0000000..ccfeb4c
--- /dev/null
+++ b/docs/src/surface_conductance.md
@@ -0,0 +1,10 @@
+## Surface conductance
+```@index
+Pages = ["surface_conductance.md",]
+```
+
+```@docs
+decoupling
+surface_conductance
+aerodynamic_conductance
+```
\ No newline at end of file
diff --git a/docs/src/walkthrough.md b/docs/src/walkthrough.md
index e7de8d9..d645224 100644
--- a/docs/src/walkthrough.md
+++ b/docs/src/walkthrough.md
@@ -17,7 +17,7 @@ The use of more detailed models is not within the scope of the `Bigleaf.jl` pack
In this tutorial, we will work with a dataset from the eddy covariance site Tharandt (DE-Tha), a spruce forest in Eastern Germany. The DataFrame `DE_Tha_Jun_2014` is downloaded from the `bigleaf`
[R package](https://bitbucket.org/juergenknauer/Bigleaf/) repository and contains half-hourly data of meteorological and flux measurements made in June 2014. For loading the RData into Julia, see the
-[source](https://github.com/bgctw/Bigleaf.jl/blob/main/docs/src/walkthrough.md?plain=1#L26) of this file. We give the data.frame a shorter name here.
+[source](https://github.com/bgctw/Bigleaf.jl/blob/main/docs/src/walkthrough.md?plain=1#L26) of this file. We give the data.frame a shorter name here and create a timestamp.
```@example doc
using Bigleaf
@@ -42,10 +42,11 @@ nothing
```
```@example doc
tha = DE_Tha_Jun_2014
-mdtable(select(describe(tha), :variable, :eltype, :min, :max), latex=false) # hide
+set_datetime_ydh!(tha)
+# mdtable(select(describe(tha), :variable, :eltype, :min, :max), latex=false) # hide
+nothing # hide
```
-
And the first six rows of tha:
```@example doc
mdtable(tha[1:6,:],latex=false) # hide
@@ -75,7 +76,7 @@ It is imperative that variables are provided in the right units, as the plausibi
the input units is not checked in most cases. The required units of the input arguments
can be found in the respective help file of the function. The good news is that units
do not change across functions. For example, pressure is always required in kPa,
-and temperature always in °c.
+and temperature always in °C.
## Function arguments
@@ -87,6 +88,11 @@ such that in many cases it is not necessary to provide them explicitly.
The column names in the DataFrame should correspond to the argument names
of the corresponding method that accespts each input individually.
+Methods are usually provided with two forms:
+- all required scalar inputs as positional arguments with outputing a NamedTuple
+- a mutating DataFrame with columns corresponding to required inputs
+ where the output columns are added or modified.
+
We can demonstrate the usage with a simple example:
```@example doc
@@ -94,10 +100,10 @@ We can demonstrate the usage with a simple example:
Tair, pressure, Rn, = 14.81, 97.71, 778.17
potential_ET(Tair, pressure, Rn, Val(:PriestleyTaylor))
# DataFrame
-potential_ET(tha, Val(:PriestleyTaylor))
+potential_ET!(copy(tha), Val(:PriestleyTaylor))
# DataFrame with a few columns overwritten by user values
-potential_ET(transform(tha, :Tair => x -> 25.0; renamecols=false), Val(:PriestleyTaylor))
-# varying one input only
+potential_ET!(transform(tha, :Tair => x -> 25.0; renamecols=false), Val(:PriestleyTaylor))
+# varying one input only: scalar form with dot-notation
Tair_vec = 10.0:1.0:20.0
DataFrame(potential_ET.(Tair_vec, pressure, Rn, Val(:PriestleyTaylor)))
nothing # hide
@@ -111,21 +117,139 @@ and $S$ is the sum of all storage fluxes of the ecosystem
(see e.g. Leuning et al. 2012 for an overview). For some sites, $G$ is not available,
and for most sites, only a few components of $S$ are measured.
-In `Bigleaf.jl` it is not a problem if $G$ and/or $S$ are missing (other than the results might be (slightly) biased), but special options exist for the treatment of missing $S$ and $G$ values.
+In `Bigleaf.jl` it is not a problem if $G$ and/or $S$ are missing (other than the results
+might be (slightly) biased), but special options exist for the treatment of missing
+$S$ and $G$ values.
Note that the default for G and S in the dataframe variant is missing (and assumed zero),
even if those columns are
present in the DataFrame. You need to explictly pass those columns with the optional
arguments: e.g. `potential_ET(df, Val(:PriestleyTaylor); G = df.G)`
-Note that in difference to the bigleaf R package missing entries in a provide
+Note that in difference to the bigleaf R package missing entries in an input
vector are not relaced by zero by default.
You need to explitly use coalesce when specifying a ground heat flux
for which missings should be replaced by zero: `;G = coalesce(df.G, zero(df.G))`
-
# Function walkthrough #
+## Data filtering
+
+For most applications it is meaningful to filter your data. There are two main reasons
+why we want to filter our data before we start calculating ecosystem properties.
+The first one is to exclude datapoints that do not fulfill the requirements of the
+EC technique or that are of bad quality due to e.g. instrument failure or gap-filling
+with poor confidence. Note that the quality assessment of the EC data is not the purpose
+of the `Bigleaf.jl` package. This is done by other packages (e.g. `REddyProc`),
+which often provide quality control flags for the variables. These quality control
+flags are used here to filter out bad-quality datapoints.
+
+A second reason for filtering our data is that some derived properties are only
+meaningful if certain meteorological conditions are met. For instance, if we are
+interested in properties related to plant gas exchange, it makes most sense to focus on
+time periods when plants are photosynthetically active
+(i.e. in the growing season and at daytime).
+
+`Bigleaf.jl` provides methods that update (or create) the :valid column in
+a DataFrame. Records, i.e. rows, that contain non valid conditions are set to false.
+If the valid column was false before, it stays at false.
+
+### `setinvalid_qualityflag!`
+One can check quality flags. By default (argument `setvalmissing = true`) this also
+replaces the non-valid values in the data-columns by `missing`.
+```@example doc
+thaf = copy(tha) # keep the original tha DataFrame
+# if the :valid columns does not exist yet, it is created with all values true
+setinvalid_qualityflag!(thaf; vars = ["LE", "NEE"])
+sum(.!thaf.valid) # 7 records marked non-valid
+sum(ismissing.(thaf.NEE)) # 7 NEE values set to missing
+```
+In the function call above, `vars` lists the variables that should be filtered with
+respect to their quality. Optional parameter `qc_suffix="_qc"` denotes the extension
+of the variable name that identifies the column as a quality control indicator of a given
+variable. The variables `LE` and `LE_qc`, for example, denote the variable itself
+(latent heat flux), and the quality of the variable `LE`, respectively. The optional
+argument `good_quality_threshold = 1.0` specifies the values of the quality column
+below which the quality control to be considered as acceptable quality
+(i.e. to not be filtered). For example with default value 1,
+all `LE` values whose `LE_qc` variable is larger than 1 are set to `missing`.
+The variable `missing_qc_as_bad` is required to decide what to do in
+case of missing values in the quality control variable. By default this is (conservatively)
+set to `TRUE`, i.e. all entries where the qc variable is missing is set invalid.
+
+### `setinvalid_range!`
+
+We can filter for meteorological conditions to be in acceptable ranges.
+For each variable to check we supply the valid minimum and valid maxium as a two-tuple
+as the second component of a pair. If their is no limit towards small or
+large values, supply `-Inf` or `Inf` as the minimum or maximum respectively.
+```@example doc
+setinvalid_range!(thaf,
+ :PPFD => (200, Inf),
+ :ustar => (0.2, Inf),
+ :LE =>(0, Inf),
+ :VPD => (0.01, Inf)
+ )
+sum(.!thaf.valid) # many more records marked invalid
+minimum(skipmissing(thaf.PPFD)) >= 200 # values outsides range some set to missing
+sum(ismissing.(thaf.PPFD))
+```
+
+About half of the data were filtered because radiation was not high enough (night-time).
+Another quarter was filtered because they showed negative LE values.
+However, most of them occured during the night:
+```@example doc
+sum(ismissing.(thaf.PPFD)) / nrow(thaf) # 0.48
+sum(.!ismissing.(thaf.PPFD) .&& ismissing.(thaf.LE)) / nrow(thaf) # 0.05
+```
+
+### `setinvalid_nongrowingseason!`
+
+A third method filters periods outside the growing season:
+```@example doc
+setinvalid_nongrowingseason!(thaf, 0.4)
+sum(.!thaf.valid) # tha dataset is all within growing season - no additional invalids
+```
+
+This function implements a simple growing season filter based on daily smoothed GPP time
+series.
+Arguments `tGPP` determines how high daily GPP has to be in relation to its peak value
+within the year. In this case, the value of 0.4 denotes that smoothed GPP has to be at
+least 40% of the 95th quantile.
+Argument `ws` controls the degree of smoothing in the timeseries
+and should be between 10-20 days.
+The purpose of which is to minimize the high variation of GPP between days,
+Argument `min_int` is a parameter that avoids that data are switching from
+inside the growing season and out from one day to the next.
+It determines the minimum number of days that a season should have.
+The growing season filter is applicable to all sites, with one more more growing seasons,
+but its advisable that site-specific parameter settings are used.
+
+In this example, it does not really make sense to filter for growing season,
+since it uses only one month of data of which we know that vegetation is active at the site.
+The algorithm realizes that and does not mark any additional data as invalid.
+
+### `setinvalid_afterprecip!`
+
+As a last step we will filter for precipitation events.
+This is often meaningful for ecophysiological studies because data during and shortly
+after rainfall events do not contain much information on the physiological activity
+of the vegetation because they comprise significant fractions of evaporation from the
+soil and plant surfaces. The purpose of such a filter is mostly to minimize the fraction
+of soil and interception evaporation on the total water flux. This filter simply excludes
+periods following a precipitation event. A precipitation event, here, is defined as any time
+step with a recorded precipitation higher than `min_precip` (in mm per timestep).
+The function then filters all time periods following a precipitation event.
+The number of subsequent time periods excluded is controlled by the argument `precip_hours`.
+Here, we exclude rainfall events and the following 24 hours.
+The timestamps in the DataFrame must be sorted in increasing order.
+
+```@example doc
+setinvalid_afterprecip!(thaf; min_precip=0.02, hours_after=24)
+sum(.!thaf.valid) # some more invalids
+```
+
+
## Meteorological variables
The `Bigleaf.jl` package provides calculation routines for a number of meteorological variables, which are basic to the calculation of many other variables. A few examples on their usage are given below:
@@ -192,6 +316,26 @@ The following figure compares them at absole scale and as difference to the

+## Potential evapotranspiration
+
+For many hydrological applications, it is relevant to get an estimate on the potential
+evapotranspiration (PET). At the moment, the `Bigleaf.jl` contains two formulations
+for the estimate of PET: the Priestley-Taylor equation, and the Penman-Monteith equation:
+
+```@example doc
+potential_ET!(thaf, Val(:PriestleyTaylor); G = thaf.G, infoGS = false)
+# TODO need aerodynamci and surface conductance to compute Ga and Gs_mol before
+# potential_ET!(thaf, Val(:PenmanMonteith); G = thaf.G,
+# Gs_pot=quantile(skipmissing(thaf.Gs_mol),0.95))
+select(thaf[24:26,:], :datetime, :ET_pot, :LE_pot)
+```
+
+In the second calculation it is important to provide an estimate of aerodynamic
+conductance Ga and ``Gs_{pot}``, the potential surface conductance under optimal conditions.
+Here, we have approximated ``Gs_{pot}`` with the ``95^{\text{th}}`` percentile of all
+``G_s`` values of the site.
+
+
## Global radiation
Potential radiation for given time and latitude:
@@ -210,7 +354,7 @@ lat,long = 51.0, 13.6 # Dresden Germany
doy = 160
datetimes = DateTime(2021) .+Day(doy-1) .+ Hour.(hours) #.- Second(round(long*deg2second))
res3 = @pipe calc_sun_position_hor.(datetimes, lat, long) |> DataFrame(_)
-@df res3 scatter(datetimes, cols([:altitude,:azimuth]), legend = :topleft, xlab="Date and Time", ylab = "rad")
+@df res3 scatter(datetimes, cols([:altitude,:azimuth]), legend = :topleft, xlab="Date and Time", ylab = "rad", xrotation=6)
```
The hour-angle at noon represents the difference to
diff --git a/inst/fromR/filter_data.jl b/inst/fromR/filter_data.jl
index 00b5566..7497a1b 100755
--- a/inst/fromR/filter_data.jl
+++ b/inst/fromR/filter_data.jl
@@ -190,7 +190,7 @@ end
check_input(data,doy,year,GPP)
date = strptime(paste0(year,"-",doy),format="%Y-%j")
GPP_daily = aggregate(GPP,by=list(strftime(date)),mean,na_rm=TRUE)[,2]
- growing_season = filter_growing_season(GPP_daily,tGPP=tGPP,ws=ws,min_int=min_int)
+ growing_season = filter_growingseason(GPP_daily,tGPP=tGPP,ws=ws,min_int=min_int)
growseas_invalid = which(sapply(growing_season,rep,48) == 0)
end
@@ -315,7 +315,7 @@ end
#'
#' @importFrom stats quantile filter
#' @export
-function filter_growing_season(GPPd,tGPP,ws=15,min_int=5)
+function filter_growingseason(GPPd,tGPP,ws=15,min_int=5)
if(sum(is_na(GPPd)) < 0.5*length(GPPd))
diff --git a/inst/tha.jl b/inst/tha.jl
new file mode 100644
index 0000000..64f8771
--- /dev/null
+++ b/inst/tha.jl
@@ -0,0 +1,47 @@
+using DataFrames, Pipe, Missings
+using Dates, TimeZones
+using Statistics
+
+using Latexify
+using DataDeps, Suppressor
+using RData
+import CodecBzip2, CodecXz
+#@suppress_err # error in github-actions: GitHubActionsLogger has no field stream
+register(DataDep(
+ "DE_Tha_Jun_2014.rda",
+ "downloading exampple dataset DE_Tha_Jun_2014 from bitbucket.org/juergenknauer/bigleaf",
+ "https://bitbucket.org/juergenknauer/bigleaf/raw/0ebe11626b4409305951e8add9f6436703c82584/data/DE_Tha_Jun_2014.rda",
+ "395f02e1a1a2d175ac7499c200d9d48b1cb58ff4755dfd2d7fe96fd18258d73c"
+))
+#println(datadep"DE_Tha_Jun_2014.rda")
+ENV["DATADEPS_ALWAYS_ACCEPT"]="true" # avoid question to download
+DE_Tha_Jun_2014 = first(values(load(joinpath(datadep"DE_Tha_Jun_2014.rda/DE_Tha_Jun_2014.rda"))))
+nothing
+tha = DE_Tha_Jun_2014
+set_datetime_ydh!(tha)
+
+thaf = copy(tha)
+setinvalid_qualityflag!(thaf)
+setinvalid_range!(thaf,
+ :PPFD => (200, Inf),
+ :ustar => (0.2, Inf),
+ :LE =>(0, Inf),
+ :VPD => (0.01, Inf)
+ )
+setinvalid_nongrowingseason!(thaf, 0.4)
+setinvalid_afterprecip!(thaf; min_precip=0.02, hours_after=24)
+
+
+
+dfGPPd = @pipe tha |>
+ transform(_, :datetime => ByRow(yearmonthday) => :ymd, copycols = false) |>
+ groupby(_, :ymd) |>
+ combine(_, :datetime => (x -> Date(first(x))) => :date, :GPP => mean => :GPPd, )
+ #x -> x[!,2]
+
+show(dfGPPd.GPPd)
+
+
+GPPd = [11.288497760271033, 13.013025930772224, 12.851774960756302, 11.996453734696843, 11.635422044472458, 11.155685572574535, 10.774393322790273, 10.181774605065584, 11.257192575993637, 12.9423423493281, 12.352468963712454, 13.402045020057509, 9.53826415212825, 12.071680051895479, 13.692589149111882, 12.845505638824156, 12.378533909407755, 11.672167064607493, 10.401156075240579, 10.705716138705611, 10.207347450816693, 11.052016352768987, 13.54435911634937, 12.060648361220956, 7.758974596237143, 9.869706534050541, 12.998054057980577, 10.627359564105669, 8.685295419767499, 10.874667977293333]
+using Plots,StatsPlots
+@df dfGPPd plot(:date, [:GPPd], xlab = "Date", ylab="GPP")
diff --git a/inst/walkthrough_todo.jmd b/inst/walkthrough_todo.jmd
index 961846d..f7bfd5e 100644
--- a/inst/walkthrough_todo.jmd
+++ b/inst/walkthrough_todo.jmd
@@ -1,99 +1,141 @@
-
-
-
-## Function arguments
-
-Most functions of the `Bigleaf.jl` package require an input matrix or (more common) a DataFrame, from which the required variables are extracted. This is usually the first argument of a function. Most functions further provide default values for their arguments, such that in many cases it is not necessary to provide them explicitly.
-
-Another convenient feature of the `Bigleaf.jl` package is that it supports functions
-that accept a DataFrame with columnnames corresponding to required arguments.
-
-We can demonstrate the usage with a simple example:
-
-```julia
-potential_ET(tha,Tair="Tair",pressure="pressure",Rn="Rn",VPD="VPD",approach=Val(:PriestleyTaylor))
-potential_ET(tha)
-potential_ET(tha,Tair=tha$Tair)
-potential_ET(tha,Tair=25)
-potential_ET(Tair=25,pressure=100,Rn=200)
-```
-
-In the first line above, the input arguments are provided as the names of the DataFrame. In this case we do not need to provide them explicitly because the column names correspond to the default names of the function (i.e. the command can be written as in line 2). In the third example, we replace one variable name with a numeric vector. In the fourth row, we calculate PET for a constant temperature of 25°C, but take all other variables from the DataFrame. For some applications, in particular for exploratory or sensitivity analyses, application nr. 5 can be useful. In this case, we did not provide a DataFrame, but only numeric vectors of length one (or of any other length). This can be useful to see e.g. how sensitive the results of a given functions are with respect to one of the input variables. We could, for instance, investigate how potential ET as calculated with the Priestley-Taylor formulation changes when $R_n$ increases from 200 $\text{W m}^{-2}$ to 400 $\text{W m}^{-2}$ when all other variables are held constant:
-
-```julia
-potential_ET(Tair=25,pressure=100,Rn=200)
-potential_ET(Tair=25,pressure=100,Rn=400)
+# Setup
+
+```julia
+using Bigleaf
+using DataFrames
+
+using Latexify
+using DataDeps, Suppressor
+using RData
+import CodecBzip2, CodecXz
+#@suppress_err # error in github-actions: GitHubActionsLogger has no field stream
+register(DataDep(
+ "DE_Tha_Jun_2014.rda",
+ "downloading exampple dataset DE_Tha_Jun_2014 from bitbucket.org/juergenknauer/bigleaf",
+ "https://bitbucket.org/juergenknauer/bigleaf/raw/0ebe11626b4409305951e8add9f6436703c82584/data/DE_Tha_Jun_2014.rda",
+ "395f02e1a1a2d175ac7499c200d9d48b1cb58ff4755dfd2d7fe96fd18258d73c"
+))
+#println(datadep"DE_Tha_Jun_2014.rda")
+ENV["DATADEPS_ALWAYS_ACCEPT"]="true" # avoid question to download
+DE_Tha_Jun_2014 = first(values(load(joinpath(datadep"DE_Tha_Jun_2014.rda/DE_Tha_Jun_2014.rda"))))
+tha = DE_Tha_Jun_2014
+set_datetime_ydh!(tha)
+LAI = 7.6 # leaf area index
+zh = 26.5 # average vegetation height (m)
+zr = 42 # sensor height (m)
+Dl = 0.01 # leaf characteristic dimension (m)
```
-When using your own data, it is not mandatory to use exactly the same variable names as here, but working with `Bigleaf.jl` is easier if you do so because then the variable names do not have to be specified when calling a function.
-
-
-## Ground heat flux and storage fluxes
-
-Many functions require the available energy ($A$), which is defined as ($A = R_n - G - S$, all in $\text{W m}^{-2}$), where $R_n$ is the net radiation, $G$ is the ground heat flux, and $S$ is the sum of all storage fluxes of the ecosystem (see e.g. Leuning et al. 2012 for an overview). For some sites, $G$ is not available, and for most sites, only a few components of $S$ are measured. In `Bigleaf.jl` it is not a problem if $G$ and/or $S$ are missing (other than the results might be (slightly) biased), but special options exist for the treatment of missing $S$ and $G$ values. If the options `missing_G_as_NA = TRUE` or `missing_S_as_NA = TRUE`, then the output variable is not calculated for that time period. Otherwise missing $S$ and $G$ values are set to O automatically. Please note that the default is to ignore $S$ and $G$ values. If $G$ and/or $S$ are available, they always have to be added explicitly to the function call (by providing the column name of $G$/$S$ or a vector).
-
-
-
-
-\vspace{1cm}
-
-
-
-## Function walkthrough
-
-In the following, we explain how to use several of the package's key functions. Further information on the functions can be found on the respective function help pages and the references therein.
-
-
## Data filtering
-For most applications it is meaningful to filter your data. There are two main reasons why we want to filter our data before we start calculating ecosystem properties. The first one is to exclude datapoints that do not fulfill the requirements of the EC technique or that are of bad quality due to e.g. instrument failure or gap-filling with poor confidence. Note that the quality assessment of the EC data is not the purpose of the `Bigleaf.jl` package. This is done by other packages (e.g. `REddyProc`), which often provide quality control flags for the variables. These quality control flags are used here to filter out bad-quality datapoints.
-
-A second reason for filtering our data is that some derived properties are only meaningful if certain meteorological conditions are met. For instance, if we are interested in properties related to plant gas exchange, it makes most sense to focus on time periods when plants are photosynthetically active (i.e. in the growing season and at daytime).
-
-The Bigleaf package provides the function `filter_data` that filters the data according to the criteria described above. We start with an example where the DataFrame is filtered only with respect to data quality (`quality_control=TRUE`):
-
-```julia
-tha_filtered1 = filter_data(tha,quality_control=TRUE,vars_qc=c("LE","H","NEE","Tair","VPD","wind"),
- quality_ext="_qc",good_quality = c(0,1),missing_qc_as_bad=TRUE)
-```
-
-In the function call above, `vars_qc` lists the variables that should be filtered with respect to their quality. This is usually a vector of type character that contains the column names of the variables that are to be filtered. `quality_ext` denotes the extension of the variable name that identifies the column as a quality control indicator of a given variable. The variables "LE" and "LE_qc", for example, denote the variable itself (latent heat flux), and the quality of the variable "LE", respectively. The argument `good_quality` specifies the values that the quality control indicator has to take in order to be considered as acceptable quality (i.e. to not be filtered). For example, if `good_quality=c(0,1)`, then all "LE" values whose "LE_qc" variable is larger than 1 are set to `NA`. The variable `missing_qc_as_bad` is required to decide what to do in case of missing values in the quality control variable. By default this is (conservatively) set to `TRUE`, i.e. all entries where the qc variable is missing is filtered out. The function prints some information on the amount of data filtered out. In this case, only a few values did not fulfill the quality criteria.
-In the next example, we filter for meteorological conditions only, including growing season (`filter_growseas=TRUE`):
-
-```julia
-tha_filtered2 = filter_data(tha,quality_control=false,filter_growseas=TRUE,
- filter_vars=c("PPFD","ustar","LE","VPD"),
- filter_vals_min=c(200,0.2,0,0.01), filter_vals_max=c(NA,NA,NA,NA),
- NA_as_invalid = TRUE,
- # arguments for growing season filter:
- GPP="GPP",doy="doy",year="year",tGPP=0.4,ws=15,min_int=5)
-```
-
-The arguments `filter_vars`, `filter_vals_min`, and `filter_vals_max` control the variables to be filtered (corresponding to the column names of the DataFrame), the minimum and the maximum acceptable values, respectively. If there is no minimum or maximum, the respective entry can be set to `NA`. In this case we filter for time periods in which PPFD (photosynthetic photon flux density) has to be higher than 200 $\mu$mol m$^{-2}$ s$^{-1}$, but no maximum limit is considered.
-
-If `filter_growseas=TRUE`, the function implements a simple growing season filter based on daily smoothed GPP time series. The arguments `GPP`, `doy` and `year` are required at halfhourly/hourly time scale. GPP is aggregated to daily sums internally. The arguments `tGPP`, `ws`, and `min_int` determine how the growing season is filtered. `tGPP` determines how high daily GPP has to be in relation to its peak value within the year. In this case, the value of 0.4 denotes that smoothed GPP has to be at least 40% of the 95th quantile. `ws` controls the degree of smoothing in the timeseries (the purpose of which is to minimize the high variation of GPP between days), and should probably be between 10-20 days. `min_int` is a parameter that avoids that data are switching from inside the growing season and out from one day to the next, but determines the minimum number of days that the growing season should have. The growing season filter is applicable to all sites, with one more more growing seasons, but it's advisable that other parameter settings are used depending on the site.
-
-In this case, it does not really make sense to filter for growing season, since it's only one month of which we know that vegetation is active at the site. Luckily, the algorithm realizes that as well and does not filter out any data if `filter_growseas=TRUE` (same will happen at sites with a year-round growing season). In the function output we further see that almost half of the data were filtered because radiation was not high enough (night-time). Another 23.5% were filtered because they showed negative LE values. However, most of them occur during the night, and only 5.2% of them were not already filtered by the radiation filter (denoted as "additional data points" above).
-
-As a last step we will filter for precipitation events. This is often meaningful for ecophysiological studies because data during and shortly after rainfall events do not contain much information on the physiological activity of the vegetation (i.e. they comprise significant fractions of evaporation from the soil and plant surfaces). The purpose of such a filter is mostly to minimize the fraction of soil and interception evaporation on the total water flux. This filter simply excludes periods following a precipitation event. A precipitation event is here defined as any time step with a recorded precipitation higher than `tprecip` (in mm per timestep). The function then filters all time periods following a precipitation event. The number of subsequent time periods excluded is controlled by the argument `precip_hours`. Here, we exclude rainfall events and the following 24 hours.
-
-```julia
-tha_filtered3 = filter_data(tha,quality_control=false,filter_growseas=false,
- filter_precip=TRUE,precip="precip",tprecip=0.02,
- records_per_hour=2,precip_hours=24)
+For most applications it is meaningful to filter your data. There are two main reasons
+why we want to filter our data before we start calculating ecosystem properties.
+The first one is to exclude datapoints that do not fulfill the requirements of the
+EC technique or that are of bad quality due to e.g. instrument failure or gap-filling
+with poor confidence. Note that the quality assessment of the EC data is not the purpose
+of the `Bigleaf.jl` package. This is done by other packages (e.g. `REddyProc`),
+which often provide quality control flags for the variables. These quality control
+flags are used here to filter out bad-quality datapoints.
+
+A second reason for filtering our data is that some derived properties are only
+meaningful if certain meteorological conditions are met. For instance, if we are
+interested in properties related to plant gas exchange, it makes most sense to focus on
+time periods when plants are photosynthetically active
+(i.e. in the growing season and at daytime).
+
+`Bigleaf.jl` provides methods that update (or create) the :valid column in
+a DataFrame. Records, i.e. rows, that contain non valid conditions are set to false.
+If the valid column was false before, it stays at false.
+
+One can check quality flags. By default (argument `setvalmissing = true`) this also
+replaces the non-valid values in the data-columns by `missing`.
+```julia
+thaf = copy(tha) # keep the original tha DataFrame
+# if the :valid columns does not exist yet, it is created with all values true
+setinvalid_qualityflag!(thaf; vars = ["LE", "NEE"])
+sum(.!thaf.valid) # 7 records marked non-valid
+sum(ismissing.(thaf.NEE)) # 7 NEE values set to missing
+```
+In the function call above, `vars` lists the variables that should be filtered with
+respect to their quality. Optional parameter `qc_suffix="_qc"` denotes the extension
+of the variable name that identifies the column as a quality control indicator of a given
+variable. The variables "LE" and "LE_qc", for example, denote the variable itself
+(latent heat flux), and the quality of the variable "LE", respectively. The optional
+argument `good_quality_threshold = 1.0` specifies the values of the quality column
+below which the quality control to be considered as acceptable quality
+(i.e. to not be filtered). For example with default value 1,
+all "LE" values whose "LE_qc" variable is larger than 1 are set to `missing`.
+The variable `missing_qc_as_bad` is required to decide what to do in
+case of missing values in the quality control variable. By default this is (conservatively)
+set to `TRUE`, i.e. all entries where the qc variable is missing is set invalid.
+
+We can filter for meteorological conditions to be in acceptable ranges.
+For each variable to check we supply the valid minimum and valid maxium as a two-tuple
+as the second component of a pair. If their is no limit towards small or
+large values, supply `-Inf` or `Inf` as the minimum or maximum respectively.
+```julia
+setinvalid_range!(thaf,
+ :PPFD => (200, Inf),
+ :ustar => (0.2, Inf),
+ :LE =>(0, Inf),
+ :VPD => (0.01, Inf)
+ )
+sum(.!thaf.valid) # many more records marked invalid
+minimum(skipmissing(thaf.PPFD)) >= 200 # values outsides range some set to missing
+sum(ismissing.(thaf.PPFD)
+```
+
+About half of the data were filtered because radiation was not high enough (night-time).
+Another quarter was filtered because they showed negative LE values.
+However, most of them occured during the night:
+```julia
+sum(ismissing.(thaf.PPFD)) / nrow(thaf)
+sum(.!ismissing.(thaf.PPFD) .&& ismissing.(thaf.LE)) / nrow(thaf)
+```
+
+A third method filters periods outside the growing season:
+```julia
+setinvalid_nongrowingseason!(thaf, 0.4)
+sum(.!thaf2.valid) # tha dataset is all within growing season - no additional invalids
+```
+
+This function implements a simple growing season filter based on daily smoothed GPP time
+series.
+Arguments `tGPP` determines how high daily GPP has to be in relation to its peak value
+within the year. In this case, the value of 0.4 denotes that smoothed GPP has to be at
+least 40% of the 95th quantile.
+Argument `ws` controls the degree of smoothing in the timeseries
+and should be between 10-20 days.
+The purpose of which is to minimize the high variation of GPP between days,
+Argument `min_int` is a parameter that avoids that data are switching from
+inside the growing season and out from one day to the next.
+It determines the minimum number of days that a season should have.
+The growing season filter is applicable to all sites, with one more more growing seasons,
+but its advisable that site-specific parameter settings are used.
+
+In this example, it does not really make sense to filter for growing season,
+since it uses only one month of data of which we know that vegetation is active at the site.
+The algorithm realizes that and does not mark any additional data as invalid.
+
+
+As a last step we will filter for precipitation events.
+This is often meaningful for ecophysiological studies because data during and shortly
+after rainfall events do not contain much information on the physiological activity
+of the vegetation because they comprise significant fractions of evaporation from the
+soil and plant surfaces. The purpose of such a filter is mostly to minimize the fraction
+of soil and interception evaporation on the total water flux. This filter simply excludes
+periods following a precipitation event. A precipitation event, here, is defined as any time
+step with a recorded precipitation higher than `min_precip` (in mm per timestep).
+The function then filters all time periods following a precipitation event.
+The number of subsequent time periods excluded is controlled by the argument `precip_hours`.
+Here, we exclude rainfall events and the following 24 hours.
+The timestamps in the DataFrame must be sorted in increasing order.
+
+```julia
+setinvalid_afterprecip!(thaf; min_precip=0.02, hours_after=24)
+sum(.!thaf.valid) # some more invalids
```
-We can also do all the steps described above with a single function call, which is also the intention of the function:
-
-```julia
-tha_filtered = filter_data(tha,quality_control=TRUE,filter_growseas=TRUE,
- filter_precip=TRUE, filter_vars=c("PPFD","ustar","LE","VPD"),
- filter_vals_min=c(200,0.2,0,0.01),filter_vals_max=c(NA,NA,NA,NA),
- NA_as_invalid = TRUE,vars_qc=c("GPP","LE","H","NEE","Tair","VPD","wind"),
- quality_ext="_qc",good_quality = c(0,1),missing_qc_as_bad=TRUE,
- GPP="GPP",doy="doy",year="year",tGPP=0.4,ws=15,min_int=5,
- precip="precip",tprecip=0.02,records_per_hour=2,precip_hours=24)
-```
When looking at the function output we see that we these settings, we exclude in total 1013 data points (70.35% of the data). In total, 29.65% of all data remained. The output of the `filter_data` function is another DataFrame (tha_filtered), in which all filtered timesteps are set to NA. (Note that this is the default case. If we add `filtered_data_to_NA=TRUE`, the data are left untouched, but an additional column "valid" is added to the DataFrame that specifies whether the time points fulfull the criteria or not). In the following examples we will work mostly with the filtered DataFrame `tha_filtered`.
@@ -264,19 +306,21 @@ Here, the points denote the mean wind speed and the bars denote the standard dev
## Potential evapotranspiration
-For many hydrological applications, it is relevant to get an estimate on the potential evapotranspiration (PET). At the moment, the `Bigleaf.jl` package contains two formulations for the estimate of PET: the Priestley-Taylor equation, and the Penman-Monteith equation:
+For many hydrological applications, it is relevant to get an estimate on the potential
+evapotranspiration (PET). At the moment, the `Bigleaf.jl` contains two formulations
+for the estimate of PET: the Priestley-Taylor equation, and the Penman-Monteith equation:
```julia
-summary(potential_ET(tha_filtered,G="G",approach=Val(:PriestleyTaylor)))
-summary(potential_ET(tha_filtered,G="G",approach=Val(:PenmanMonteith)),
- Gs_pot=quantile(tha_filtered$Gs_mol,0.95,na_rm=TRUE))
+potential_ET!(thaf, Val(:PriestleyTaylor); G = thaf.G, infoGS = false)
+# TODO need surface conductance to compute Ga and Gs_mol before
+# potential_ET!(thaf, Val(:PenmanMonteith); G = thaf.G,
+# Gs_pot=quantile(skipmissing(thaf.Gs_mol),0.95))
```
-In the second calculation it is important to provide an estimate of `Gs_pot`, which corresponds to the potential surface conductance under optimal conditions. Here, we have approximated `Gs_pot` with the 95$^{\text{th}}$ percentile of all $G_s$ values of the site.
-
-
-
-
+In the second calculation it is important to provide an estimate of aerodynamic
+conductance Ga and ``Gs_{pot}``, the potential surface conductance under optimal conditions.
+Here, we have approximated ``Gs_{pot}`` with the ``95^{\text{th}}`` percentile of all
+``G_s`` values of the site.
## Energy balance closure (EBC)
diff --git a/src/Bigleaf.jl b/src/Bigleaf.jl
index 5cd056e..83c544d 100644
--- a/src/Bigleaf.jl
+++ b/src/Bigleaf.jl
@@ -9,9 +9,11 @@ using Pipe
using AstroLib
using Suppressor
using Missings
+using Statistics, StatsBase # mean, rle
+using PaddedViews, StaticArrays
+using Infiltrator
-
-export frac_hour
+export frac_hour, moving_average, get_nonoverlapping_periods, set_datetime_ydh!
export bigleaf_constants
export Esat_slope, Esat_from_Tair, Esat_from_Tair_deriv,
LE_to_ET, ET_to_LE, ms_to_mol, mol_to_ms, VPD_to_rH, rH_to_VPD,
@@ -24,7 +26,9 @@ export air_density, pressure_from_elevation, psychrometric_constant,
export calc_sun_position_MOD, calc_sun_position_hor
export potential_radiation, extraterrestrial_radiation, get_datetime_for_doy_hour
export potential_ET, potential_ET!, equilibrium_imposed_ET, equilibrium_imposed_ET!
-#export surface_conductance
+export setinvalid_range!, setinvalid_qualityflag!,
+ setinvalid_nongrowingseason!, get_growingseason, setinvalid_afterprecip!
+export decoupling, surface_conductance, aerodynamic_conductance
include("util.jl")
include("bigleaf_constants.jl")
@@ -33,5 +37,6 @@ include("meteorological_variables.jl")
include("sun_position.jl")
include("potential_radiation.jl")
include("evapotranspiration.jl")
+include("filter_data.jl")
end
diff --git a/src/evapotranspiration.jl b/src/evapotranspiration.jl
index e2a30f7..dedeb85 100755
--- a/src/evapotranspiration.jl
+++ b/src/evapotranspiration.jl
@@ -2,41 +2,40 @@
potential_ET(Tair, pressure, Rn, ::Val{:PriestleyTaylor}; ...)
potential_ET(Tair, pressure, Rn, G, S, ::Val{:PriestleyTaylor}; ...)
- potential_ET(df, approach; ...)
potential_ET!(df, approach; ...)
Potential evapotranspiration according to Priestley & Taylor 1972 or
the Penman-Monteith equation with a prescribed surface conductance.
# Arguments
-- Tair: Air temperature (degC)
-- pressure: Atmospheric pressure (kPa)
-- Rn: Net radiation (W m-2)
-- VPD: Vapor pressure deficit (kPa)
-- Ga: Aerodynamic conductance to heat/water vapor (m s-1)
-- approach: Approach used.
+- `Tair`: Air temperature (degC)
+- `pressure`: Atmospheric pressure (kPa)
+- `Rn`: Net radiation (W m-2)
+- `VPD`: Vapor pressure deficit (kPa)
+- `Ga`: Aerodynamic conductance to heat/water vapor (m s-1)
+- `df`: Data_frame or matrix containing all required variables; optional
+- `approach`: Approach used.
Either `Val(:PriestleyTaylor)` (default), or `Val(:PenmanMonteith)`.
-- df: Data_frame or matrix containing all required variables; optional
optional:
-- G: Ground heat flux (W m-2). Defaults to zero.
-- S: Sum of all storage fluxes (W m-2) . Defaults to zero.
+- `G`: Ground heat flux (W m-2). Defaults to zero.
+- `S`: Sum of all storage fluxes (W m-2) . Defaults to zero.
- `Esat_formula`: formula used in [`Esat_from_Tair`](@ref)
- `constants=`[`bigleaf_constants`](@ref)`()`: Dictionary with entries
- - cp - specific heat of air for constant pressure (J K-1 kg-1)
- - eps - ratio of the molecular weight of water vapor to dry air
- - Pa2kPa - conversion pascal (Pa) to kilopascal (kPa)
- - for :PenmanMonteith:
- - Rd - gas constant of dry air (J kg-1 K-1)
- - Rgas - universal gas constant (J mol-1 K-1)
- - Kelvin - conversion degree Celsius to Kelvin
+ - `cp` - specific heat of air for constant pressure (J K-1 kg-1)
+ - `eps` - ratio of the molecular weight of water vapor to dry air
+ - `Pa2kPa` - conversion pascal (Pa) to kilopascal (kPa)
+ - for `PenmanMonteith`:
+ - `Rd` - gas constant of dry air (J kg-1 K-1)
+ - `Rgas` - universal gas constant (J mol-1 K-1)
+ - `Kelvin` - conversion degree Celsius to Kelvin
additional optional arguments with data.frame variants
-- infoGS=ture: sete to false to avoid infor log-message if G or S is not
- specified
+- `infoGS = true`: Set to false to avoid info-log-message if G or S is not
+ specified.
additional optional for PriestleyTaylor:
-- alpha: Priestley-Taylor coefficient
+- `alpha = 1.26`: Priestley-Taylor coefficient
additional optional for PenmanMonteith:
-- Gs_pot: Potential/maximum surface conductance (mol m-2 s-1);
- defaults to 0.6 mol m-2 s-1;
+- `Gs_pot = 0.6`: Potential/maximum surface conductance (mol m-2 s-1);
+
# Details
Potential evapotranspiration is calculated according to Priestley & Taylor, 1972
@@ -67,26 +66,22 @@ care for missing values (see examples).
Both methods are provided with several forms:
- all required inputs as positional arguments
-- providing a DataFrame with columns corresponding to required inputs
- returning a DataFrame with only the result columns.
-- a mutating DataFrame version that returns the original DataFrame with
- result columns added or modified.
+- a mutating DataFrame version with columns corresponding to required inputs
+ where the output columns are added or modified.
# Value
NamedTuple or DataFrame with the following entries:
- `ET_pot`: Potential evapotranspiration (kg m-2 s-1)
- `LE_pot`: Potential latent heat flux (W m-2)
-For the mutating form, the original df with columns `ET_pot`, `LE_pot`
-updated or added.
# References
- Priestley, C_H_B., Taylor, R_J., 1972: On the assessment of surface heat flux
-and evaporation using large-scale parameters. Monthly Weather Review 100, 81-92.
+ and evaporation using large-scale parameters. Monthly Weather Review 100, 81-92.
- Allen, R_G., Pereira L_S., Raes D., Smith M., 1998: Crop evapotranspiration -
-Guidelines for computing crop water requirements - FAO Irrigation and drainage
-paper 56.
+ Guidelines for computing crop water requirements - FAO Irrigation and drainage
+ paper 56.
- Novick, K_A., et al. 2016: The increasing importance of atmospheric demand
-for ecosystem water and carbon fluxes. Nature Climate Change 6, 1023 - 1027.
+ for ecosystem water and carbon fluxes. Nature Climate Change 6, 1023 - 1027.
# See also
[`surface_conductance`](@ref)
@@ -123,11 +118,11 @@ df = DataFrame(Tair = 20.0:1.0:30.0,pressure = 100.0, Rn = 500.0, G = 105.0, VPD
allowmissing!(df, Cols(:G)); df.G[1] = missing
#
# need to provide G explicitly
-df_ET = potential_ET(df, Val(:PriestleyTaylor); G = df.G, infoGS = false)
+df_ET = potential_ET!(copy(df), Val(:PriestleyTaylor); G = df.G, infoGS = false)
ismissing(df_ET.ET_pot[1])
#
# use coalesce to replace missing values
-df_ET = potential_ET(df, Val(:PriestleyTaylor); G = coalesce.(df.G, zero(df.G)), infoGS = false)
+df_ET = potential_ET!(copy(df), Val(:PriestleyTaylor); G = coalesce.(df.G, zero(df.G)), infoGS = false)
!ismissing(df_ET.ET_pot[1])
# output
true
@@ -137,7 +132,7 @@ function potential_ET(Tair, pressure, Rn, approach::Val{:PriestleyTaylor};
G=zero(Tair),S=zero(Tair), kwargs...)
#
potential_ET(Tair, pressure, Rn, G, S, approach; kwargs...)
-end,
+end
function potential_ET(Tair, pressure, Rn, G, S, ::Val{:PriestleyTaylor};
alpha=1.26,
Esat_formula=Val(:Sonntag_1990),
@@ -148,12 +143,12 @@ function potential_ET(Tair, pressure, Rn, G, S, ::Val{:PriestleyTaylor};
LE_pot = (alpha * Delta * (Rn - G - S)) / (Delta + gamma)
ET_pot = LE_to_ET(LE_pot,Tair)
(ET_pot = ET_pot, LE_pot = LE_pot)
-end,
+end
function potential_ET(Tair, pressure, Rn, VPD, Ga, approach::Val{:PenmanMonteith};
G=zero(Tair),S=zero(Tair), kwargs...)
#
potential_ET(Tair, pressure, Rn, VPD, Ga, G, S, approach; kwargs...)
-end,
+end
function potential_ET(Tair, pressure, Rn, VPD, Ga, G, S, ::Val{:PenmanMonteith};
Gs_pot=0.6,
Esat_formula=Val(:Sonntag_1990),
@@ -167,27 +162,27 @@ function potential_ET(Tair, pressure, Rn, VPD, Ga, G, S, ::Val{:PenmanMonteith};
(Delta + gamma * (1 + Ga / Gs_pot))
ET_pot = LE_to_ET(LE_pot,Tair)
(ET_pot = ET_pot, LE_pot = LE_pot)
-end,
-function potential_ET(df, approach::Val{:PriestleyTaylor};
- G=missing,S=missing, infoGS=true, kwargs...)
- #
- dfGS = get_df_GS(df, G,S; infoGS)
- f(args...) = potential_ET(args..., approach; kwargs...)
- select(hcat(select(df,:Tair, :pressure, :Rn), dfGS; copycols = false),
- All() => ByRow(f) => AsTable
- )
-end,
-function potential_ET(df, approach::Val{:PenmanMonteith};
- G=missing,S=missing, infoGS=true, kwargs...)
- #
- dfGS = get_df_GS(df, G,S; infoGS)
- function f(args...)
- potential_ET(args..., approach; kwargs...)
- end
- select(hcat(select(df,:Tair, :pressure, :Rn, :VPD, :Ga), dfGS; copycols = false),
- All() => ByRow(f) => AsTable
- )
-end,
+end
+# function potential_ET(df, approach::Val{:PriestleyTaylor};
+# G=missing,S=missing, infoGS=true, kwargs...)
+# #
+# dfGS = get_df_GS(df, G,S; infoGS)
+# f(args...) = potential_ET(args..., approach; kwargs...)
+# select(hcat(select(df,:Tair, :pressure, :Rn), dfGS; copycols = false),
+# All() => ByRow(f) => AsTable
+# )
+# end
+# function potential_ET(df, approach::Val{:PenmanMonteith};
+# G=missing,S=missing, infoGS=true, kwargs...)
+# #
+# dfGS = get_df_GS(df, G,S; infoGS)
+# function f(args...)
+# potential_ET(args..., approach; kwargs...)
+# end
+# select(hcat(select(df,:Tair, :pressure, :Rn, :VPD, :Ga), dfGS; copycols = false),
+# All() => ByRow(f) => AsTable
+# )
+# end
function potential_ET!(df, approach::Val{:PriestleyTaylor};
G=missing,S=missing, infoGS=true, kwargs...)
dfGS = get_df_GS(df, G,S; infoGS)
@@ -199,7 +194,7 @@ function potential_ET!(df, approach::Val{:PriestleyTaylor};
[:Tair, :pressure, :Rn, :_tmp_G, :_tmp_S] => ByRow(f) => AsTable
)
select!(df, Not([:_tmp_G, :_tmp_S]))
-end,
+end
function potential_ET!(df, approach::Val{:PenmanMonteith};
G=missing,S=missing, infoGS=true, kwargs...)
dfGS = get_df_GS(df, G,S; infoGS)
@@ -250,25 +245,24 @@ end
"""
equilibrium_imposed_ET(Tair,pressure,VPD,Gs, Rn; ...)
- equilibrium_imposed_ET(df; ...)
equilibrium_imposed_ET!(df; ...)
Evapotranspiration (ET) split up into imposed ET and equilibrium ET.
# Argumens
-- Tair: Air temperature (deg C)
-- pressure: Atmospheric pressure (kPa)
-- VPD: Air vapor pressure deficit (kPa)
-- Gs: surface conductance to water vapor (m s-1)
-- Rn: Net radiation (W m-2)
+- `Tair`: Air temperature (deg C)
+- `pressure`: Atmospheric pressure (kPa)
+- `VPD`: Air vapor pressure deficit (kPa)
+- `Gs`: surface conductance to water vapor (m s-1)
+- `Rn`: Net radiation (W m-2)
optional
-- G: Ground heat flux (W m-2); optional
-- S: Sum of all storage fluxes (W m-2); optional
+- `G`: Ground heat flux (W m-2); optional
+- `S`: Sum of all storage fluxes (W m-2); optional
- `Esat_formula`: formula used in [`Esat_from_Tair`](@ref)
- `constants=`[`bigleaf_constants`](@ref)`()`: Dictionary with entries
- - cp - specific heat of air for constant pressure (J K-1 kg-1)
- - eps - ratio of the molecular weight of water vapor to dry (-)
- - Pa2kPa - conversion pascal (Pa) to kilopascal (kPa)
+ - `cp` - specific heat of air for constant pressure (J K-1 kg-1)
+ - `eps` - ratio of the molecular weight of water vapor to dry (-)
+ - `Pa2kPa` - conversion pascal (Pa) to kilopascal (kPa)
# Details
Total evapotranspiration can be written in the form (Jarvis & McNaughton 6):
@@ -291,11 +285,7 @@ that would occur under fully coupled conditions (when Ga -> inf):
``ET_{imp} = (\\rho * cp * VPD * Gs * \\lambda) / \\gamma``
where ``\\rho`` is the air density (kg m-3).
-
-# Note
-Surface conductance (Gs) can be calculated with [`surface_conductance`](@ref)
-Aerodynamic conductance (Ga) can be calculated using [`aerodynamic_conductance`](@ref).
-
+
# Value
A `NamedTuple` or `DataFrame` with the following columns:
- `ET_eq`: Equilibrium ET (kg m-2 s-1)
@@ -320,8 +310,11 @@ true
"""
function equilibrium_imposed_ET(Tair,pressure,VPD,Gs, Rn;
G=zero(Tair),S=zero(Tair), kwargs...)
+# # Note
+# Surface conductance (Gs) can be calculated with [`surface_conductance`](@ref)
+# Aerodynamic conductance (Ga) can be calculated using [`aerodynamic_conductance`](@ref).
equilibrium_imposed_ET(Tair,pressure,VPD,Gs, Rn, G, S; kwargs...)
-end,
+end
function equilibrium_imposed_ET(Tair,pressure,VPD,Gs, Rn, G, S;
Esat_formula=Val(:Sonntag_1990),
constants=bigleaf_constants())
@@ -335,17 +328,17 @@ function equilibrium_imposed_ET(Tair,pressure,VPD,Gs, Rn, G, S;
ET_imp = LE_to_ET(LE_imp,Tair)
ET_eq = LE_to_ET(LE_eq,Tair)
(;ET_eq, ET_imp, LE_eq, LE_imp)
-end,
-function equilibrium_imposed_ET(df;
- G=missing,S=missing, infoGS=true, kwargs...)
- #
- dfGS = get_df_GS(df, G,S; infoGS)
- function f(args...)
- equilibrium_imposed_ET(args...; kwargs...)
- end
- dfb = hcat(select(df,:Tair, :pressure, :VPD, :Gs, :Rn), dfGS; copycols = false)
- select(dfb, All() => ByRow(f) => AsTable )
-end,
+end
+# function equilibrium_imposed_ET(df;
+# G=missing,S=missing, infoGS=true, kwargs...)
+# #
+# dfGS = get_df_GS(df, G,S; infoGS)
+# function f(args...)
+# equilibrium_imposed_ET(args...; kwargs...)
+# end
+# dfb = hcat(select(df,:Tair, :pressure, :VPD, :Gs, :Rn), dfGS; copycols = false)
+# select(dfb, All() => ByRow(f) => AsTable )
+# end
function equilibrium_imposed_ET!(df;
G=missing,S=missing, infoGS=true, kwargs...)
#
@@ -360,8 +353,6 @@ function equilibrium_imposed_ET!(df;
select!(df, Not([:_tmp_G, :_tmp_S]))
end
-
-
"""
TODO; implement decoupling.
diff --git a/src/filter_data.jl b/src/filter_data.jl
new file mode 100755
index 0000000..04c5a19
--- /dev/null
+++ b/src/filter_data.jl
@@ -0,0 +1,311 @@
+"""
+ setinvalid_qualityflag!(df;
+ vars=["LE","H","NEE","Tair","VPD","wind"],
+ qc_suffix="_qc",
+ good_quality_threshold = 1.0,
+ missing_qc_as_bad = true,
+ setvalmissing = true,
+ )
+
+Set records with quality flags indicating problems to false in :valid column.
+
+# Arguments
+- df: DataFrame with column :GPP
+optional
+- `vars=["LE","H","NEE","Tair","VPD","wind"]`: columns to theck for quality
+- `qc_suffix="_qc"`: naming of the corresponding quality-flag column
+- `good_quality_threshold = 1.0`: threshold in quality flag up to which
+ data is considered good quality
+- `missing_qc_as_bad = true`: set to false to not mark records with missing
+ quality flag as invalid
+- `setvalmissing = true`: set to false to prevent replacing values in value column
+ corresponding to problematic quality flag to missing.
+
+# Value
+df with modified :valid and value columns.
+
+# Example
+```jldoctest; output = false
+using DataFrames
+df = DataFrame(
+ NEE = 1:3, NEE_qc = [1,1,2],
+ GPP = 10:10:30, GPP_qc = [1,missing,1])
+setinvalid_qualityflag!(df; vars = ["NEE", "GPP"])
+df.valid == [true, false, false]
+ismissing(df.GPP[2]) && ismissing(df.NEE[3])
+# output
+true
+```
+"""
+function setinvalid_qualityflag!(df; setvalmissing = true, kwargs...)
+ if setvalmissing
+ set_badquality_missing!(df; kwargs...)
+ end
+ setinvalid_qualityflag!(df, Val(false); kwargs...)
+end
+
+function set_badquality_missing!(df;
+ vars=["LE","H","NEE","Tair","VPD","wind"],
+ qc_suffix="_qc",
+ good_quality_threshold = 1.0,
+ missing_qc_as_bad = true
+ )
+ fqc(x,xqc) = if missing_qc_as_bad
+ ifelse(!ismissing(xqc) && xqc <= good_quality_threshold, x, missing)
+ else
+ ifelse(ismissing(xqc) || xqc <= good_quality_threshold, x, missing)
+ end
+ tmp = map(vars) do var
+ qcvar = var * qc_suffix
+ [var, qcvar] => ByRow(fqc) => var
+ end
+ transform!(df, tmp...)
+end
+
+function setinvalid_qualityflag!(df, setvalsmissing::Val{false};
+ vars=["LE","H","NEE","Tair","VPD","wind"],
+ qc_suffix="_qc",
+ good_quality_threshold = 1.0,
+ missing_qc_as_bad=true
+ )
+ vars_qc = vars .* qc_suffix
+ fvalid_var = missing_qc_as_bad ? valid_nonmissingvar_ : valid_missingvar_
+ function fsel(valid, x...)
+ nvar = length(x) ÷ 2
+ vs = x[1:nvar]
+ vs_qc = x[(nvar+1):end]
+ #valid = repeat(@MVector([true]), length(vs[1]))
+ for i = 1:nvar
+ # TODO change when 1.7 is out which support .&&: @. valid = valid && fvalid_var(vs[i], vs_qc[i], good_quality_threshold)
+ @. valid = valid & fvalid_var(vs[i], vs_qc[i], good_quality_threshold)
+ end
+ valid
+ end
+ if !hasproperty(df, :valid) df[!,:valid] .= true; end
+ select!(df, :, Cols(vcat("valid", vars, vars_qc)) => fsel => :valid)
+end
+
+function valid_nonmissingvar_(x, xqc, good_quality_threshold)
+ !ismissing(x) && !ismissing(xqc) && xqc <= good_quality_threshold
+end
+function valid_missingvar_(x, xqc, good_quality_threshold)
+ !ismissing(x) && (ismissing(xqc) || xqc <= good_quality_threshold)
+end
+
+"""
+ setinvalid_range!(df, var_ranges...; setvalmissing = true, ...)
+
+Set records with values outside specified ranges to false in :valid column.
+
+If their is no limit towards small or
+large values, supply `-Inf` or `Inf` as the minimum or maximum respectively.
+If there were false values in the :value column before, they are kept false.
+In addition, set values outside ranges to missing.
+
+# Arguments
+- `df`: DataFrame with column :GPP
+- `var_ranges`: Pair `Varname_symbol => (min,max)`: closed valid interval for
+ respective column
+optional
+- setvalmissing: set to false to prevent replacing values in value column outside ranges to missing.
+
+# Value
+df with modified :valid and value columns.
+```jldoctest; output = false
+using DataFrames
+df = DataFrame(NEE = [missing, 2,3], GPP = 10:10:30)
+setinvalid_range!(df, :NEE => (-2.0,4.0), :GPP => (8.0,28.0))
+df.valid == [false, true, false]
+ismissing(df.GPP[3])
+# output
+true
+```
+"""
+function setinvalid_range!(df, var_ranges::Vararg{Pair,N}; setvalmissing = true, kwargs...) where N
+ if setvalmissing
+ set_badrange_missing!(df, var_ranges...; kwargs...)
+ end
+ setinvalid_range!(df, Val(false), var_ranges...; kwargs...)
+end
+
+function set_badrange_missing!(df, var_ranges::Vararg{Pair,N}) where N
+ tmp = map(var_ranges) do p
+ var, (min, max) = p
+ var => ByRow(x -> (ifelse(!ismissing(x) && min <= x <= max, x, missing))) => var
+ end
+ transform!(df, tmp...)
+end
+
+function setinvalid_range!(df, setvalsmissing::Val{false}, var_ranges::Vararg{Pair,N}) where N
+ function fval(valid, x...)
+ for (p,xj) in zip(var_ranges, x)
+ var, (min, max) = p
+ #TODO simplify once 1.7 with .&& is out: @. valid = valid && !ismissing(xi) && (min <= xi <= max)
+ for i in eachindex(xj)
+ valid[i] = valid[i] && !ismissing(xj[i]) && (min <= xj[i] <= max)
+ end
+ end
+ valid
+ end
+ vars = map(x -> x.first, var_ranges)
+ if !hasproperty(df, :valid) df[!,:valid] .= true; end
+ select!(df, :, SA[:valid, vars...] => fval => :valid)
+end
+
+"""
+ setinvalid_nongrowingseason!(df, tGPP; kwargs...)
+
+Set non-growseason to false in :valid column.
+
+# Arguments
+- `df`: DataFrame with columns `:GPP` and `:datetime`
+- `tGPP`: scalar threshold of daily GPP (see [`get_growingseason`](@ref))
+optional:
+- `update_GPPd`: set to true additionally update `:GPPd_smoothed` column to
+ results from [`get_growingseason`](@ref)
+- and others passed to [`get_growingseason`](@ref)
+
+# Value
+df with modified columns :valid and if `:GPPd_smoothed`,
+where all non-growing season records are set to false.
+"""
+function setinvalid_nongrowingseason!(df, tGPP; update_GPPd_smoothed = false, kwargs...)
+ if !hasproperty(df, :valid) df[!,:valid] .= true; end
+ # non-copying dataframe where we can add grouping column __day
+ dft = transform(df, :datetime => ByRow(Date) => :__day, copycols = false)
+ function mean_nonmissing(x)
+ mx = mean(skipmissing(x))
+ # if there is no non-missing record in a group, mean returns nan
+ ifelse(isnan(mx), missing, mx)
+ end
+ dfday = combine(groupby(dft, :__day), :GPP => mean_nonmissing => :GPPd, nrow)
+ transform!(dfday,
+ :GPPd => (x -> get_growingseason(x, tGPP; kwargs...)) => SA[:valid, :GPPd_smoothed])
+ # rep(dfd.valid, each = df.nrow)
+ #TODO simplify once 1.7 is out: df[!,:valid] .= df.valid .&& vcat(fill.(dfday.valid, dfday.nrow)...)
+ df[!,:valid] .= df.valid .& vcat(fill.(dfday.valid, dfday.nrow)...)
+ if update_GPPd_smoothed
+ df[!,:GPPd_smoothed] .= vcat(fill.(dfday.GPPd_smoothed, dfday.nrow)...)
+ end
+ df
+end
+
+"""
+ get_growingseason(GPPd, tGPP; ws=15, min_int=5, warngap=true)
+
+Filters annual time series for growing season based on smoothed daily GPP data.
+
+# Arguments
+- `GPPd`: daily GPP (any unit)
+- `tGPP`: GPP threshold (fraction of 95th percentile of the GPP time series).
+ Takes values between 0 and 1.
+optional
+- `ws`: window size used for GPP time series smoothing
+- `min_int`: minimum time interval in days for a given state of growing season
+- `warngap`: set to false to suppress warning on too few non-missing data
+
+# Details
+The basic idea behind the growing season filter is that vegetation is
+considered to be active when its carbon uptake (GPP) is above a specified
+threshold, which is defined relative to the peak GPP (95th percentile)
+observed in the year.
+The GPP-threshold is calculated as:
+
+``GPP_{threshold} = quantile(GPPd,0.95)*tGPP``
+
+GPPd time series are smoothed with a moving average to avoid fluctuations
+in the delineation of the growing season. The window size defaults to 15
+days, but depending on the ecosystem, other values can be appropriate.
+
+The argument `min_int` serves to avoid short fluctuations in the
+status growing season vs. no growing season by defining a minimum length
+of the status. If a time interval shorter than `min_int` is labeled
+as growing season or non-growing season, it is changed to the status of
+the neighboring values, i.e its opposite.
+
+# Value
+A NamedTuple with entries
+- `is_growingseason`: a `BitVector` of the same length as the input GPPd in which `false`
+ indicate no growing season (dormant season) and `true` indicate growing season.
+- `GPPd_smoothed`: smoothed GPPd
+"""
+function get_growingseason(GPPd,tGPP;ws=15,min_int=5,warngap=true)
+ nday = length(GPPd)
+ if sum(.!ismissing.(GPPd)) < 0.5*nday
+ error("Expected number of available GPPd data " *
+ "to be at least half the total number of days ($nday). " *
+ "But was only ($(sum(.!ismissing.(GPPd)))).")
+ end
+ GPP_threshold = quantile(skipmissing(GPPd), 0.95)*tGPP
+ #@show GPP_threshold
+ # smooth GPP
+ #fromR: GPPd_smoothed = filter(GPPd,method="convolution",filter=rep(1/ws,ws))
+ GPPd_smoothed = moving_average(GPPd, ws)
+ # set values at the beginning and end of the time series to the mean of original values
+ wsd = floor(Int, ws/2)
+ GPPd_smoothed[1:wsd] .= mean(skipmissing(GPPd[1:(2*wsd)]))
+ GPPd_smoothed[(length(GPPd)-(wsd-1)):length(GPPd)] .=
+ mean(skipmissing(GPPd[(length(GPPd)-(2*wsd-1)):length(GPPd)]))
+ # check for occurence of missing values and set to mean of the values surrounding them
+ imissing = findall(ismissing, GPPd_smoothed)
+ if length(imissing) > 0
+ warngap && length(imissing) > 10 &&
+ @warn "Attention, there is a gap in 'GPPd' of length n = $(length(imissing))"
+ #TODO check and correct for several gaps, see Impute package
+ replace_val = mean(skipmissing(GPPd_smoothed[max(1,imissing[1] - 4):min(
+ (imissing[length(imissing)] + 4),length(GPPd_smoothed))]))
+ GPPd_smoothed = coalesce.(GPPd_smoothed, replace_val)
+ end
+ # filter daily GPP
+ growseas = GPPd_smoothed .>= GPP_threshold
+ # change short intervals to the surrounding values to avoid 'wrong' fluctuations
+ # switch shortest interval successively and recompute interval lengths
+ intervals = rle(growseas)[2]
+ imin = argmin(intervals)
+ while intervals[imin] < min_int
+ end_int = cumsum(intervals)
+ start_int = vcat(0, end_int[1:end-1]) .+ 1
+ growseas[start_int[imin]:end_int[imin]] .= !growseas[start_int[imin]]
+ intervals = rle(growseas)[2]
+ imin = argmin(intervals)
+ end
+ (is_growingseason = growseas, GPPd_smoothed = GPPd_smoothed)
+end
+
+"""
+ setinvalid_afterprecip!(df; min_precip = 0.01, hours_after = 24.0)
+
+Set records after precipitation to false in `:valid` column.
+
+# Arguments
+- df: DataFrame with columns `:datetime` and `:precip` sorted by `:datetime`
+ in increasing order.
+optional:
+- `min_precip` (in mm per timestep): minimum precip to be considered effective
+ precipitation.
+- `hours_after`: time after the precipitation event to be considered invalid
+
+# Value
+`df` with modified column `:valid`.
+"""
+function setinvalid_afterprecip!(df; min_precip = 0.01, hours_after = 24.0)
+ dfo = @pipe copy(df) |>
+ select!(_, :datetime, :precip) |>
+ subset!(_, :precip => ByRow(>=(min_precip)); skipmissing = true) |>
+ select!(_,
+ :datetime => :p_start,
+ :datetime => (x -> x .+ frac_hour(Second, hours_after)) => :p_end)
+ dfno = get_nonoverlapping_periods(dfo)
+ #
+ if !hasproperty(df, :valid) df[!,:valid] .= true; end
+ #ip = 1
+ # since date.time is ordered we need to search for the next index only from last position
+ i_start = i_end = 1
+ for ip in 1:nrow(dfno)
+ p_start, p_end = dfno.p_start[ip], dfno.p_end[ip]
+ i_start = i_end - 1 + findfirst(>=(p_start), df.datetime[i_end:end])
+ i_end = i_start - 1 + findlast(<=(p_end), df.datetime[i_start:end])
+ df.valid[i_start:i_end] .= false
+ end
+ df
+end
\ No newline at end of file
diff --git a/src/util.jl b/src/util.jl
index aece4e3..7ca46f9 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -17,8 +17,8 @@
# end
"""
- frac_hour(float::AbstractFloat)
- frac_hour(period::Type{<:Period}, float::AbstractFloat)
+ frac_hour(float::Number)
+ frac_hour(period::Type{<:Period}, float::Number)
Create a period in given type (defaults to `Nanosecond`) from
fractional hours.
@@ -30,12 +30,95 @@ frac_hour(1+1/60) == Hour(1) + Minute(1)
true
```
"""
-function frac_hour(period::Type{<:Period}, float::AbstractFloat)
+function frac_hour(period::Type{<:Period}, float::Number)
#adapted from https://stackoverflow.com/a/51448499
full_hour, Δ = divrem(float, 1)
partial = period(round(Dates.value(period(Hour(1))) * Δ))
Hour(full_hour) + partial
end
-frac_hour(float::AbstractFloat) = frac_hour(Nanosecond, float)
+frac_hour(float::Number) = frac_hour(Nanosecond, float)
+
+"""
+ moving_average(vs,n; nmin = n ÷ 2)
+
+Compute the moving average over a vector allowing for missings.
+
+# Arguments
+- vs: numeric vector to average over
+- n: window size: number of items to average
+- nmin: minimum number of non-missing records
+
+Values outside the edges are assumed missing.
+
+If the number of non-missing records within a window is smaller than nmin
+then the averaged value is assumed missing. This avoids average at edges of
+periods with many missings to be very sensitive to the edge values.
+"""
+function moving_average(vs,n; nmin = n ÷ 2)
+ kernel = -trunc(Int,(n-1)/2):ceil(Int,(n-1)/2)
+ #[mean(skipmissing(@view vs[i:(i+n-1)])) for i in 1:(length(vs)-(n-1))]
+ vsp = PaddedView(missing, vs, (-n:length(vs)+n,))
+ fagg = function(x)
+ sum(.!ismissing.(x)) < nmin && return(missing)
+ mean(skipmissing(x))
+ end
+ #ans = [fagg(@view vsp[i.+kernel]) for i in 100:101]
+ ans = [fagg(@view vsp[i.+kernel]) for i in 1:length(vs)]
+end
+
+"""
+ get_nonoverlapping_periods(dfo::DataFrame)
+
+Get non-overlapping periods.
+
+# Arguments
+- `dfo`: DataFrame with columns `p_start` and `p_end` sorted by increasing `p_start`
+
+# Value
+- DataFrame with columns `p_start` and `p_end` with `p_start[i] > p_end[i-1]`
+"""
+function get_nonoverlapping_periods(dfo::DataFrame)
+ nrow(dfo) == 0 && return(dfo)
+ dfno = DataFrame()
+ p_start, p_end = dfo.p_start[1], dfo.p_end[1]
+ for i in 2:(nrow(dfo)-1)
+ if dfo.p_start[i] <= p_end
+ # if current row is in previous intervals, just extend the previous interval
+ p_end = max(p_end, dfo.p_end[i])
+ else
+ # push the previous interval and take this row as interval
+ push!(dfno, (p_start = p_start, p_end = p_end))
+ p_start, p_end = dfo.p_start[i], dfo.p_end[i]
+ end
+ end
+ if dfo.p_start[end] <= p_end
+ # last row start within previous interval: push extended interval
+ push!(dfno, (p_start = p_start, p_end = max(p_end, dfo.p_end[end])))
+ else
+ # last row not in previous interval: push both interval and last row
+ push!(dfno, (p_start = p_start, p_end = p_end))
+ push!(dfno, (p_start = dfo.p_start[end], p_end = dfo.p_end[end]))
+ end
+ dfno
+end
+
+"""
+ set_datetime_ydh!(df, timezone = tz"UTC+0";
+ year = df.year, doy = df.doy, hour = df.hour)
+
+Update column DateTime column datetime according to year, dayOfYear, and fractional hour.
+
+# Value
+`df` with updated or added column `:datetime` moved to the first position.
+"""
+function set_datetime_ydh!(df, timezone = tz"UTC+0";
+ year = df.year, doy = df.doy, hour = df.hour)
+ df[!,:datetime] = @. ZonedDateTime(
+ DateTime(year) + Day(doy-1) + frac_hour(Second, hour), timezone)
+ select!(df, :datetime, :)
+end
+
+
+
diff --git a/test/Project.toml b/test/Project.toml
index 6ea5027..749d32a 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -2,6 +2,10 @@
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Pipe = "b98c9c47-44ae-5843-9183-064241ee97a0"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53"
diff --git a/test/evapotranspiration.jl b/test/evapotranspiration.jl
index bb18b46..e3414f2 100644
--- a/test/evapotranspiration.jl
+++ b/test/evapotranspiration.jl
@@ -13,12 +13,12 @@ end
@testset "potential_ET dataframe" begin
df = DataFrame(
Tair = 20.0:1.0:30.0,pressure = 100.0, Rn = 500.0)
- df_ET = @test_logs (:info,r"G is not provided") (:info,r"S is not provided") potential_ET(df, Val(:PriestleyTaylor); alpha=1.26)
- # non-mutating
- @test ncol(df) == 3
- @test nrow(df_ET) == nrow(df)
- @test ≈(last(df_ET).ET_pot, 0.0002035969; rtol = 1e-5)
- @test ≈(last(df_ET).LE_pot, 494.7202; rtol = 1e-5)
+ # df_ET = @test_logs (:info,r"G is not provided") (:info,r"S is not provided") potential_ET(df, Val(:PriestleyTaylor); alpha=1.26)
+ # # non-mutating
+ # @test ncol(df) == 3
+ # @test nrow(df_ET) == nrow(df)
+ # @test ≈(last(df_ET).ET_pot, 0.0002035969; rtol = 1e-5)
+ # @test ≈(last(df_ET).LE_pot, 494.7202; rtol = 1e-5)
#
df2 = copy(df)
# df2.VPD .= 2.0 # fail in older versions
@@ -29,9 +29,9 @@ end
# [] => ByRow(() -> 2.0) => :VPD,
# [] => ByRow(() -> 0.1) => :Ga,
# )
- df_ET2 = @test_logs (:info,r"G is not provided") (:info,r"S is not provided") potential_ET(df2, Val(:PenmanMonteith))
- @test ncol(df2) == 5
- @test nrow(df_ET2) == nrow(df2)
+ # df_ET2 = @test_logs (:info,r"G is not provided") (:info,r"S is not provided") potential_ET(df2, Val(:PenmanMonteith))
+ # @test ncol(df2) == 5
+ # @test nrow(df_ET2) == nrow(df2)
#TODO surface_conductance(Tair=20,pressure=100,VPD=2,Ga=0.1,Rn=400,LE=LE_pot_PM)
#
# mutating
@@ -49,9 +49,8 @@ end
df = @pipe DataFrame(Tair = 20.0:1.0:30.0,pressure = 100.0, Rn = 500.0, G = 105.0) |>
allowmissing(_, Cols(:G))
df.G[1] = missing
- df_ET = @test_logs (:info,r"S is not provided") potential_ET(df, Val(:PriestleyTaylor); G = df.G)
- # non-mutating
- @test ncol(df) == 4
+ df_ET = @test_logs (:info,r"S is not provided") potential_ET!(copy(df), Val(:PriestleyTaylor); G = df.G)
+ @test ncol(df_ET) == ncol(df)+2 # two columns added: ET_pot, LE_pot
@test nrow(df_ET) == nrow(df)
@test last(df_ET).ET_pot < 0.0002035969 # smaller because less energy available
@test ismissing(first(df_ET).LE_pot)
@@ -59,21 +58,11 @@ end
df2 = copy(df)
df2[!, :VPD] .= 2.0
df2[!, :Ga] .= 0.1
- df_ET2 = @test_logs (:info,r"G is not provided") potential_ET(df2, Val(:PenmanMonteith), S = df.G)
- @test ncol(df2) == 6
+ df_ET2 = @test_logs (:info,r"G is not provided") potential_ET!(copy(df2), Val(:PenmanMonteith), S = df.G)
+ @test ncol(df_ET2) == ncol(df2)+2 # two columns added: ET_pot, LE_pot
@test nrow(df_ET2) == nrow(df2)
@test ismissing(first(df_ET).LE_pot)
#TODO surface_conductance(Tair=20,pressure=100,VPD=2,Ga=0.1,Rn=400,LE=LE_pot_PM)
- #
- # mutating
- dfm = copy(df)
- #@test_throws Exception
- @test_logs (:info,r"G is not provided") potential_ET!(dfm, Val(:PriestleyTaylor); S = dfm.G)
- #dfm = @test_logs (:info,r"G is not provided") (:info,r"S is not provided") hcat(dfm, Bigleaf.fill_GS_missings(dfm, missing, missing, false, false); copycols = false)
- #potential_ET!(dfm, Val(:PriestleyTaylor); alpha=1.26)
- @test ncol(dfm) == ncol(df)+2 # two columns added: ET_pot, LE_pot
- dfm = @test_logs (:info,r"S is not provided") potential_ET!(copy(df2), Val(:PenmanMonteith); G = dfm.G)
- @test ncol(dfm) == ncol(df2)+2
end
@testset "equilibrium_imposed_ET scalars" begin
@@ -87,14 +76,15 @@ end
#
df = DataFrame(Tair = 20.0:1.0:22.0, pressure = 100.0, Rn = 50.0, VPD = 0.5, Gs = 0.01)
ncoldf0 = ncol(df)
- df_ET = @test_logs (:info,r"G is not provided") (:info,r"S is not provided") equilibrium_imposed_ET(df)
- @test ncol(df) == ncoldf0
- @test nrow(df_ET) == nrow(df)
- @test ≈(first(df_ET).ET_eq, ET_eq)
- @test ≈(first(df_ET).ET_imp, ET_imp)
+ # df_ET = @test_logs (:info,r"G is not provided") (:info,r"S is not provided") equilibrium_imposed_ET(df)
+ # @test ncol(df) == ncoldf0
+ # @test nrow(df_ET) == nrow(df)
+ # @test ≈(first(df_ET).ET_eq, ET_eq)
+ # @test ≈(first(df_ET).ET_imp, ET_imp)
#
dfm = copy(df)
dfm_ET = equilibrium_imposed_ET!(dfm; infoGS = false)
+ @test dfm_ET === dfm
@test ncol(dfm) == ncoldf0 + 4
@test nrow(dfm) == nrow(df)
@test ≈(first(dfm_ET).ET_eq, ET_eq)
diff --git a/test/filter_data.jl b/test/filter_data.jl
new file mode 100644
index 0000000..c3e6dff
--- /dev/null
+++ b/test/filter_data.jl
@@ -0,0 +1,128 @@
+@testset "get_growingseason" begin
+ rng = StableRNG(815)
+ GPPd = @pipe sin.((1:365).*π./365) .+ 0.5 .* rand(rng,365) |> allowmissing(_)
+ GPPd[100:120] .= missing
+ #plot(1:365, GPPd)
+ growseas = @test_logs (:warn, r"gap in 'GPPd'") get_growingseason(GPPd, 0.5)
+ #plot(growseas)
+ @test rle(growseas.is_growingseason) == (Bool[0, 1, 0], [51, 258, 56])
+ growseas = get_growingseason(GPPd, 0.5; min_int = 56, warngap = false)
+ @test rle(growseas.is_growingseason) == (Bool[1, 0], [309, 56])
+end
+
+@testset "get_growingseason with few data" begin
+ rng = StableRNG(815)
+ GPPd = @pipe sin.((1:365).*π./365) .+ 0.5 .* rand(rng,365) |> allowmissing(_)
+ GPPd[1:200] .= missing
+ @test_throws ErrorException get_growingseason(GPPd, 0.5)
+end
+
+@testset "setinvalid_nongrowingseason!" begin
+ rng = StableRNG(815)
+ nday = 365
+ GPPd = @pipe sin.((1:nday).*π./365) .+ 0.5 .* rand(rng,365) |> allowmissing(_)
+ GPPd[100:120] .= missing
+ df = DataFrame(datetime = DateTime(2021) .+ Hour(1).+ Day.(0:nday-1), GPP = GPPd)
+ df2 = copy(df)
+ df2a = setinvalid_nongrowingseason!(df2, 0.5; warngap = false)
+ @test df2a === df2
+ @test propertynames(df2a) == union(propertynames(df), SA[:valid])
+ @test rle(df2a.valid) == (Bool[0, 1, 0], [51, 258, 56])
+ #
+ df2.valid[80:90] .= false
+ df2a = setinvalid_nongrowingseason!(df2, 0.5; warngap = false, update_GPPd_smoothed = true)
+ @test df2a === df2
+ @test propertynames(df2a) == union(propertynames(df), SA[:valid, :GPPd_smoothed])
+ @test rle(df2a.valid) == (Bool[0, 1, 0, 1, 0], [51, 28, 11, 219, 56])
+end
+
+@testset "setinvalid_qualityflag!" begin
+ df = DataFrame(
+ NEE = 1:3,
+ GPP = 10:10:30,
+ NEE_qc = [1,1,2],
+ GPP_qc = [1,missing,1],
+ )
+ df2 = copy(df)
+ df2a = setinvalid_qualityflag!(df2; vars = SA["NEE", "GPP"], setvalmissing = false)
+ @test df2a === df2
+ @test df2a.valid == [true, false, false]
+ @test df2a.NEE == 1:3 # not set to missing
+ # test with existing :valid column, where already false entries are kept
+ df2.valid[1] = false
+ df2a = setinvalid_qualityflag!(df2; vars = SA["NEE", "GPP"], setvalmissing = false)
+ @test df2a === df2
+ @test df2a.valid == [false, false, false]
+ @test df2a.NEE == 1:3 # not set to missing
+ #
+ # test with setting values to missing
+ df2 = copy(df)
+ df2a = setinvalid_qualityflag!(df2; vars = SA["NEE", "GPP"], setvalmissing = true)
+ @test df2a === df2
+ @test df2a.valid == [true, false, false]
+ @test isequal(df2a.NEE, [1,2,missing])
+ @test isequal(df2a.GPP, [10,missing,30])
+ df2 = copy(df)
+ df2a = setinvalid_qualityflag!(df2; vars = SA["NEE", "GPP"], setvalmissing = true, missing_qc_as_bad = false)
+ @test df2a === df2
+ @test df2a.valid == [true, true, false]
+ @test isequal(df2a.NEE, [1,2,missing])
+ @test isequal(df2a.GPP, [10,20,30])
+end
+
+@testset "setinvalid_range!" begin
+ df = DataFrame(
+ NEE = 1:3,
+ GPP = 10:10:30,
+ )
+ allowmissing!(df, :NEE)
+ var_ranges = [:NEE => (-2.0,4.0), :GPP => (8.0,28.0)]
+ df2 = copy(df)
+ df3 = copy(df2)
+ df2a = setinvalid_range!(df2, var_ranges...; setvalmissing = false)
+ # @btime setinvalid_range!($df2, $(var_ranges)...; setvalmissing = false)
+ @test df2a === df2
+ @test df2a.valid == [true, true, false]
+ df3a = setinvalid_range!(df3, var_ranges...)
+ @test df3a === df3
+ @test isequal(df3a.valid, df2a.valid)
+ @test ismissing(df3a.GPP[3])
+ @test all(.!ismissing.(df3a.GPP[1:2]))
+ #
+ df2.NEE[1] = missing
+ df3 = copy(df2)
+ df2b = setinvalid_range!(df2, var_ranges...; setvalmissing = false)
+ @test df2b === df2
+ @test df2b.valid == [false, true, false]
+ df3a = setinvalid_range!(df3, var_ranges...)
+ @test isequal(df3a.valid, df2a.valid)
+ @test ismissing(df3a.GPP[3])
+ @test all(.!ismissing.(df3a.GPP[1:2]))
+ @test all(.!ismissing.(df3a.NEE[2:3]))
+ #
+ df2.valid[2] = false
+ df3 = copy(df2)
+ df2b = setinvalid_range!(df2, var_ranges...; setvalmissing = false)
+ @test df2b === df2
+ @test df2b.valid == [false, false, false]
+ df3a = setinvalid_range!(df3, var_ranges...)
+ @test isequal(df3a.valid, df2a.valid)
+end
+
+@testset "setinvalid_afterprecip!" begin
+ min_precip = 0.01
+ hours_after = 1.0
+ dfo = DataFrame(datetime = DateTime(2021) .+ Hour.(1:13), precip = 0.0)
+ dfo.precip[3] = 0.001
+ dfo.precip[10:12] .= 2.0
+ dfo.precip[6:8] .= 2.0
+ allowmissing!(dfo, :precip); dfo.precip[2] = missing
+ #
+ df = copy(dfo)
+ df2 = setinvalid_afterprecip!(df; min_precip, hours_after)
+ @test df2 === df
+ @test all(.!df.valid[6:9])
+ @test all(.!df.valid[10:13])
+ @test all(df.valid[Not(vcat(6:9,10:13))])
+end
+
diff --git a/test/runtests.jl b/test/runtests.jl
index ee4f73c..61d9f07 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,6 +1,8 @@
using Bigleaf
-using Test
+using Test, StableRNGs
using Pipe, DataFrames, Dates, TimeZones
+using StaticArrays
+using Statistics, StatsBase
@testset "Bigleaf" begin
@testset "util" begin
@@ -9,6 +11,9 @@ using Pipe, DataFrames, Dates, TimeZones
@testset "unit_conversions" begin
include("unit_conversions.jl")
end
+ @testset "filter_data" begin
+ include("filter_data.jl")
+ end
@testset "meteorological_variables" begin
include("meteorological_variables.jl")
end
diff --git a/test/util.jl b/test/util.jl
index a28d2b3..34b72ca 100644
--- a/test/util.jl
+++ b/test/util.jl
@@ -2,3 +2,46 @@
@test frac_hour(Minute, 1+1/60) == Hour(1) + Minute(1)
@test frac_hour(1+1/60) == Hour(1) + Minute(1)
end
+
+@testset "moving_average" begin
+ GPPd = @pipe sin.((1:365).*π./365) .+ 0.5 .* rand(365) |> allowmissing(_)
+ GPPd[100:120] .= missing
+ GPPd_smooth = moving_average(GPPd, 15)
+ #plot(1:365, GPPd)
+ #plot!(1:365, GPPd_smooth)
+ @test length(GPPd_smooth) == length(GPPd)
+ @test GPPd_smooth[1] == mean(GPPd[1:8])
+ @test GPPd_smooth[8] == mean(GPPd[1:15])
+ @test isfinite(GPPd_smooth[100])
+ @test ismissing(GPPd_smooth[101])
+end
+
+@testset "get_nonoverlapping_periods" begin
+ dfo = DataFrame(
+ p_start = DateTime(2021) .+ Hour.(SA[0,4,5,10]),
+ p_end = DateTime(2021) .+ Hour.(SA[2,5,6,11]),
+ )
+ dfno = get_nonoverlapping_periods(dfo)
+ @test dfno.p_start == DateTime(2021) .+ Hour.(SA[0,4,10])
+ @test dfno.p_end == DateTime(2021) .+ Hour.(SA[2,6,11])
+ #
+ # extend last row
+ dfo = DataFrame(
+ p_start = DateTime(2021) .+ Hour.(SA[0,4,5]),
+ p_end = DateTime(2021) .+ Hour.(SA[2,5,6]),
+ )
+ dfno = get_nonoverlapping_periods(dfo)
+ @test dfno.p_start == DateTime(2021) .+ Hour.(SA[0,4])
+ @test dfno.p_end == DateTime(2021) .+ Hour.(SA[2,6])
+end
+
+@testset "set_datetime_ydh!" begin
+ dfo = DataFrame(year = 2021, doy = repeat(1:3, inner = 48), hour = repeat(0.0:0.5:23.5, outer = 3))
+ df = copy(dfo)
+ df2 = set_datetime_ydh!(df)
+ @test df2 === df
+ @test df2.datetime[1] == ZonedDateTime(DateTime(2021), tz"UTC")
+ @test all(Dates.year.(df2.datetime) .== dfo.year)
+ @test all(Dates.dayofyear.(df2.datetime) .== dfo.doy)
+ @test all(Dates.hour.(df2.datetime) .+ Dates.minute.(df2.datetime)/60 .== dfo.hour)
+end