/
cfconventions.jl
188 lines (150 loc) · 5.73 KB
/
cfconventions.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
CFStdName(n::AbstractString) = CFStdName(Symbol(n))
macro CF_str(n)
CFStdName(n)
end
import Base.string
Base.string(n::CFStdName) = string(n.name)
"""
ncvar = CommonDataModel.ancillaryvariables(ncv::CFVariable,modifier)
Return the first ancillary variables from the NetCDF (or other format)
variable `ncv` with the
standard name modifier `modifier`. It can be used for example to access
related variable like status flags.
"""
function ancillaryvariables(ncv::CFVariable,modifier)
ds = dataset(ncv)
varname = name(ncv)
if !haskey(ncv.attrib,"ancillary_variables")
return nothing
end
ancillary_variables = split(ncv.attrib["ancillary_variables"])
for ancillary_variable in ancillary_variables
ncv_ancillary = ds[ancillary_variable]
if occursin(modifier,ncv_ancillary.attrib["standard_name"])
@debug ancillary_variable
return ncv_ancillary
end
end
# nothing found
return nothing
end
allowmissing(x::AbstractArray{T}) where {T} = convert(AbstractArray{Union{T, Missing}}, x)
"""
data = CommonDataModel.filter(ncv, indices...; accepted_status_flags = nothing)
Load and filter observations by replacing all variables without an acepted status
flag to `missing`. It is used the attribute `ancillary_variables` to identify
the status flag.
```
# da["data"] is 2D matrix
good_data = NCDatasets.filter(ds["data"],:,:, accepted_status_flags = ["good_data","probably_good_data"])
```
"""
function filter(ncv::AbstractVariable, indices...; accepted_status_flags = nothing)
#function filter_(ncv, indices...)
# accepted_status_flags = ("good_value", "probably_good_value")
data = allowmissing(ncv[indices...])
if (accepted_status_flags != nothing)
ncv_ancillary = ancillaryvariables(ncv,"status_flag");
if ncv_ancillary == nothing
error("no ancillary variable with the attribute status_flag related to $(name(ncv)) found")
end
flag_values = ncv_ancillary.attrib["flag_values"]
flag_meanings = ncv_ancillary.attrib["flag_meanings"]::String
if typeof(flag_meanings) <: AbstractString
flag_meanings = split(flag_meanings)
end
accepted_status_flag_values = zeros(eltype(flag_values),length(accepted_status_flags))
for i = eachindex(accepted_status_flags,accepted_status_flag_values)
tmp = findfirst(accepted_status_flags[i] .== flag_meanings)
if tmp == nothing
error("cannot recognise flag $(accepted_status_flags[i])")
end
accepted_status_flag_values[i] = flag_values[tmp]
end
#@debug accepted_status_flag_values
dataflag = ncv_ancillary.var[indices...];
for i in eachindex(data)
good = false;
for accepted_status_flag_value in accepted_status_flag_values
good = good || (dataflag[i] .== accepted_status_flag_value)
end
if !good
#@show i,dataflag[i]
data[i] = missing
end
end
end
return data
end
"""
cv = coord(v::Union{CFVariable,Variable},standard_name)
Find the coordinate of the variable `v` by the standard name `standard_name`
or some [standardized heuristics based on units](https://web.archive.org/web/20190918144052/http://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate). If the heuristics fail to detect the coordinate,
consider to modify the file to add the `standard_name` attribute.
All dimensions of the coordinate must also be dimensions of the variable `v`.
## Example
```julia
using NCDatasets
ds = NCDataset("file.nc")
ncv = ds["SST"]
lon = coord(ncv,"longitude")[:]
lat = coord(ncv,"latitude")[:]
v = ncv[:]
close(ds)
```
"""
function coord(v::AbstractVariable,standard_name)
matches = Dict(
"time" => [r".*since.*"],
# It is great to have choice!
# https://web.archive.org/web/20190918144052/http://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate
"longitude" => [r"degree east",r"degrees east",r"degrees_east",
r"degree_east", r"degree_E", r"degrees_E",
r"degreeE", r"degreesE"],
"latitude" => [r"degree north",r"degrees north",r"degrees_north",
r"degree_north", r"degree_N", r"degrees_N", r"degreeN",
r"degreesN"],
)
ds = dataset(v)
dims = Set(dimnames(v))
# find by standard name
for coord in varbyattrib(ds,standard_name = standard_name)
if Set(dimnames(coord)) ⊆ dims
return coord
end
end
# find by units
if haskey(matches,standard_name)
# prefer e.g. vectors over scalars
# this is necessary for ROMS model output
coordfound = nothing
coordndims = -1
for (_,coord) in ds
units = get(coord.attrib,"units","")
for re in matches[standard_name]
if match(re,units) != nothing
if Set(dimnames(coord)) ⊆ dims
if ndims(coord) > coordndims
coordfound = coord
coordndims = ndims(coord)
end
end
end
end
end
return coordfound
end
return nothing
end
"""
b = bounds(ncvar::NCDatasets.CFVariable)
Return the CFVariable corresponding to the `bounds` attribute of the variable `ncvar`.
The time units and calendar from the `ncvar` are used but not the
attributes controling the
packing of data `scale_factor`, `add_offset` and `_FillValue`.
"""
function bounds(ncvar::CFVariable)
ds = dataset(ncvar)
varname = ncvar.attrib["bounds"]
return ds[varname]
end