-
Notifications
You must be signed in to change notification settings - Fork 19
/
write.py
745 lines (583 loc) · 28.1 KB
/
write.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
from os.path import abspath
import numpy
from ..cfimplementation import implementation
from ..decorators import _manage_log_level_via_verbosity
from ..functions import _DEPRECATION_ERROR_FUNCTION_KWARGS, flat
from .netcdf import NetCDFWrite
# from . import mpi_on
mpi_on = False
if mpi_on:
from . import mpi_comm, mpi_rank, mpi_size
netcdf = NetCDFWrite(implementation())
@_manage_log_level_via_verbosity
def write(
fields,
filename,
fmt="NETCDF4",
mode="w",
overwrite=True,
global_attributes=None,
file_descriptors=None,
external=None,
Conventions=None,
datatype=None,
least_significant_digit=None,
endian="native",
compress=0,
fletcher32=False,
shuffle=True,
reference_datetime=None,
verbose=None,
cfa_options=None,
single=None,
double=None,
variable_attributes=None,
string=True,
warn_valid=True,
group=True,
coordinates=False,
HDF_chunksizes=None,
no_shuffle=None,
unlimited=None,
):
"""Write field constructs to a netCDF file.
**File format**
See the *fmt* parameter for details on which output netCDF file
formats are supported.
**NetCDF variable and dimension names**
These names are stored within constructs read a from dataset, or
may be set manually. They are used when writing a field construct
to the file. If a name has not been set then one will be
constructed (usually based on the standard name if it exists). The
names may be modified internally to prevent duplication in the
file.
Each construct, or construct component, that corresponds to a
netCDF variable has the following methods to get, set and remove a
netCDF variable name: `!nc_get_variable`, `!nc_set_variable` and
`!nc_del_variable` method
The domain axis construct has the following methods to get, set
and remove a netCDF dimension name:
`~cf.DomainAxis.nc_get_dimension`,
`~cf.DomainAxis.nc_set_dimension` and
`~cf.DomainAxis.nc_del_dimension`.
**NetCDF attributes**
Field construct properties may be written as netCDF global
attributes and/or netCDF data variable attributes. See the
*file_descriptors*, *global_attributes* and *variable_attributes*
parameters for details.
**External variables**
Metadata constructs marked as external are omitted from the file
and referred to via the netCDF "external_variables" global
attribute. However, omitted constructs may be written to an
external file (see the *external* parameter for details).
**NetCDF unlimited dimensions**
Domain axis constructs that correspond to NetCDF unlimited
dimensions may be accessed with the
`~cf.DomainAxis.nc_is_unlimited` and
`~cf.DomainAxis.nc_set_unlimited` methods of a domain axis
construct.
**NetCDF hierarchical groups**
Hierarchical groups in CF provide a mechanism to structure
variables within netCDF4 datasets with well defined rules for
resolving references to out-of-group netCDF variables and
dimensions. The group structure defined by a field construct's
netCDF interface will, by default, be recreated in the output
dataset. See the *group* parameter for details.
**NetCDF4 HDF chunk sizes**
HDF5 chunksizes may be set on contruct's data. See the
`~cf.Data.nc_hdf5_chunksizes`,
`~cf.Data.nc_clear_hdf5_chunksizes` and
`~cf.Data.nc_set_hdf5_chunksizes` methods of a `Data` instance.
.. seealso:: `cf.read`
:Parameters:
fields: (arbitrarily nested sequence of) `Field` or `FieldList`
The field constructs to write to the file.
filename: `str`
The output netCDF file name. Various type of expansion are
applied to the file names.
Relative paths are allowed, and standard tilde and shell
parameter expansions are applied to the string.
*Parameter example:*
The file file.nc in the user’s home directory could be
described by any of the following: '``$HOME/file.nc'``,
``'${HOME}/file.nc'``, ``'~/file.nc'``,
``'~/tmp/../file.nc'``.
fmt: `str`, optional
The format of the output file. One of:
========================== ================================
*fmt* Output file type
========================== ================================
``'NETCDF4'`` NetCDF4 format file. This is the
default.
``'NETCDF4_CLASSIC'`` NetCDF4 classic format file (see
below)
``'NETCDF3_CLASSIC'`` NetCDF3 classic format file
(limited to file sizes less
than 2GB).
``'NETCDF3_64BIT_OFFSET'`` NetCDF3 64-bit offset format
file
``'NETCDF3_64BIT'`` An alias for
``'NETCDF3_64BIT_OFFSET'``
``'NETCDF3_64BIT_DATA'`` NetCDF3 64-bit offset format
file with extensions (see below)
``'CFA'`` or ``'CFA4'`` CFA-netCDF4 format file
``'CFA3'`` CFA-netCDF3 classic format file
========================== ================================
By default the format is ``'NETCDF4'``.
All formats support large files (i.e. those greater than
2GB) except ``'NETCDF3_CLASSIC'``.
``'NETCDF3_64BIT_DATA'`` is a format that requires version
4.4.0 or newer of the C library (use `cf.environment` to
see which version if the netCDF-C library is in use). It
extends the ``'NETCDF3_64BIT_OFFSET'`` binary format to
allow for unsigned/64 bit integer data types and 64-bit
dimension sizes.
``'NETCDF4_CLASSIC'`` files use the version 4 disk format
(HDF5), but omits features not found in the version 3
API. They can be read by HDF5 clients. They can also be
read by netCDF3 clients only if they have been re-linked
against the netCDF4 library.
``'NETCDF4'`` files use the version 4 disk format (HDF5)
and use the new features of the version 4 API.
mode: `str`, optional
Specify the mode of write access for the output file. One of:
======== =================================================
*mode* Description
======== =================================================
``'w'`` Open a new file for writing to. If it exists and
*overwrite* is True then the file is deleted
prior to being recreated.
``'a'`` Open an existing file for appending new
information to. The new information will be
incorporated whilst the original contents of the
file will be preserved.
In practice this means that new fields will be
created, whilst the original fields will not be
edited at all. Coordinates on the fields, where
equal, will be shared as standard.
For append mode, note the following:
* Global attributes on the file
will remain the same as they were originally,
so will become inaccurate where appended fields
have incompatible attributes. To rectify this,
manually inspect and edit them as appropriate
after the append operation using methods such as
`nc_clear_global_attributes` and
`nc_set_global_attribute`.
* Fields with incompatible ``featureType`` to
the original file cannot be appended.
* At present fields with groups cannot be
appended, but this will be possible in a future
version. Groups can however be cleared, the
fields appended, and groups re-applied, via
methods such as `nc_clear_variable_groups` and
`nc_set_variable_groups`, to achieve the same
for now.
* At present domain ancillary constructs of
appended fields may not be handled correctly
and can appear as extra fields. Set them on the
resultant fields using `set_domain_ancillary`
and similar methods if required.
``'r+'`` Alias for ``'a'``.
======== =================================================
By default the file is opened with write access mode
``'w'``.
overwrite: `bool`, optional
If False then raise an error if the output file
pre-exists. By default a pre-existing output file is
overwritten.
Conventions: (sequence of) `str`, optional
Specify conventions to be recorded by the netCDF global
"Conventions" attribute. By default the current
conventions are always included, but if an older CF
conventions is defined then this is used instead.
*Parameter example:*
``Conventions='UGRID-1.0'``
*Parameter example:*
``Conventions=['UGRID-1.0']``
*Parameter example:*
``Conventions=['CMIP-6.2', 'UGRID-1.0']``
*Parameter example:*
``Conventions='CF-1.7'``
*Parameter example:*
``Conventions=['CF-1.7', 'UGRID-1.0']``
Note that if the "Conventions" property is set on a field
construct then it is ignored.
file_descriptors: `dict`, optional
Create description of file contents netCDF global
attributes from the specified attributes and their
values.
If any field construct has a property with the same name
then it will be written as a netCDF data variable
attribute, even if it has been specified by the
*global_attributes* parameter, or has been flagged as
global on any of the field constructs (see
`cf.Field.nc_global_attributes` for details).
Identification of the conventions being adhered to by the
file are not specified as a file descriptor, but by the
*Conventions* parameter instead.
*Parameter example:*
``file_attributes={'title': 'my data'}``
*Parameter example:*
``file_attributes={'history': 'created 2019-01-01', 'foo': 'bar'}``
global_attributes: (sequence of) `str`, optional
Create netCDF global attributes from the specified field
construct properties, rather than netCDF data variable
attributes.
These attributes are in addition to the following field
construct properties, which are created as netCDF global
attributes by default:
* the description of file contents properties (as defined
by the CF conventions), and
* properties flagged as global on any of the field
constructs being written (see
`cf.Field.nc_global_attributes` for details).
Note that it is not possible to create a netCDF global
attribute from a property that has different values for
different field constructs being written. In this case
the property will not be written as a netCDF global
attribute, even if it has been specified by the
*global_attributes* parameter or is one of the default
properties, but will appear as an attribute on the netCDF
data variable corresponding to each field construct that
contains the property.
Any global attributes that are also specified as file
descriptors will not be written as netCDF global
variables, but as netCDF data variable attributes
instead.
*Parameter example:*
``global_attributes='project'``
*Parameter example:*
``global_attributes=['project']``
*Parameter example:*
``global_attributes=['project', 'experiment']``
variable_attributes: (sequence of) `str`, optional
Create netCDF data variable attributes from the specified
field construct properties.
By default, all field construct properties that are not
created as netCDF global properties are created as
attributes netCDF data variables. See the
*global_attributes* parameter for details.
Any field construct property named by the
*variable_attributes* parameter will always be created as
a netCDF data variable attribute
*Parameter example:*
``variable_attributes='project'``
*Parameter example:*
``variable_attributes=['project']``
*Parameter example:*
``variable_attributes=['project', 'doi']``
external: `str`, optional
Write metadata constructs that have data and are marked as
external to the named external file. Ignored if there are
no such constructs.
cfa_options: `dict`, optional
A dictionary giving parameters for configuring the output
CFA-netCDF file:
========== ===============================================
Key Value
========== ===============================================
``'base'`` * If ``None`` (the default) then file names
within CFA-netCDF files are stored with
absolute paths.
* If set to an empty string then file names
within CFA-netCDF files are given relative to
the directory or URL base containing the
output CFA-netCDF file.
* If set to a string then file names within
CFA-netCDF files are given relative to the
directory or URL base described by the
value. For example: ``'../archive'``.
========== ===============================================
By default no parameters are specified.
endian: `str`, optional
The endian-ness of the output file. Valid values are
``'little'``, ``'big'`` or ``'native'``. By default the
output is native endian. See the `netCDF4 package
<http://unidata.github.io/netcdf4-python>`_ for more
details.
*Parameter example:*
``endian='big'``
compress: `int`, optional
Regulate the speed and efficiency of compression. Must be
an integer between ``0`` and ``9``. ``0`` means no
compression; ``1`` is the fastest, but has the lowest
compression ratio; ``9`` is the slowest but best
compression ratio. The default value is ``0``. An error is
raised if compression is requested for a netCDF3 output
file format. See the `netCDF4 package
<http://unidata.github.io/netcdf4-python>`_ for more
details.
*Parameter example:*
``compress=4``
least_significant_digit: `int`, optional
Truncate the input field construct data arrays, but not
the data arrays of metadata constructs. For a given
positive integer, N the precision that is retained in the
compressed data is 10 to the power -N. For example, a
value of 2 will retain a precision of 0.01. In conjunction
with the *compress* parameter this produces 'lossy', but
significantly more efficient, compression. See the
`netCDF4 package
<http://unidata.github.io/netcdf4-python>`_ for more
details.
*Parameter example:*
``least_significant_digit=3``
fletcher32: `bool`, optional
If True then the Fletcher-32 HDF5 checksum algorithm is
activated to detect compression errors. Ignored if
*compress* is ``0``. See the `netCDF4 package
<http://unidata.github.io/netcdf4-python>`_ for details.
shuffle: `bool`, optional
If True (the default) then the HDF5 shuffle filter (which
de-interlaces a block of data before compression by
reordering the bytes by storing the first byte of all of a
variable's values in the chunk contiguously, followed by
all the second bytes, and so on) is turned off. By default
the filter is applied because if the data array values are
not all wildly different, using the filter can make the
data more easily compressible. Ignored if the *compress*
parameter is ``0`` (which is its default value). See the
`netCDF4 package
<http://unidata.github.io/netcdf4-python>`_ for more
details.
This parameter replaces the deprecated *no_shuffle*
parameter.
datatype: `dict`, optional
Specify data type conversions to be applied prior to
writing data to disk. This may be useful as a means of
packing, or because the output format does not support a
particular data type (for example, netCDF3 classic files
do not support 64-bit integers). By default, input data
types are preserved. Any data type conversion is only
applied to the arrays on disk, and not to the input field
constructs themselves.
Data types conversions are defined by `numpy.dtype`
objects in a dictionary whose keys are input data types
with values of output data types.
*Parameter example:*
To convert 64-bit integers to 32-bit integers:
``datatype={numpy.dtype('int64'):
numpy.dtype('int32')}``.
single: `bool`, optional
If True then write 64-bit floats as 32-bit floats and
64-bit integers as 32-bit integers.
If False then write 32-bit floats as 64-bit floats and
32-bit integers as 64-bit integers.
By default, input data types are preserved.
.. note:: ``single=True`` is exactly equivalent to
``double=False``, as well as
``datatype={numpy.dtype(float):
numpy.dtype('float32'), numpy.dtype(int):
numpy.dtype('int32')}``.
``single=False`` is exactly equivalent to
``double=True``.
double: `bool`, optional
If True then write 32-bit floats as 64-bit floats and
32-bit integers as 64-bit integers.
If False then write 64-bit floats as 32-bit floats and
64-bit integers as 32-bit integers.
By default, input data types are preserved.
.. note:: ``double=True`` is exactly equivalent to
``single=False``, as well as
``datatype={numpy.dtype('float32'):
numpy.dtype(float), numpy.dtype('int32'):
numpy.dtype(int)}``.
``double=False`` is exactly equivalent to
``single=True``.
string: `bool`, optional
By default string-valued construct data are written as
netCDF arrays of type string if the output file format is
``'NETCDF4'``, or of type char with an extra dimension
denoting the maximum string length for any other output
file format (see the *fmt* parameter). If *string* is False
then string-valued construct data are written as netCDF
arrays of type char with an extra dimension denoting the
maximum string length, regardless of the selected output
file format.
verbose: `int` or `str` or `None`, optional
If an integer from ``-1`` to ``3``, or an equivalent string
equal ignoring case to one of:
* ``'DISABLE'`` (``0``)
* ``'WARNING'`` (``1``)
* ``'INFO'`` (``2``)
* ``'DETAIL'`` (``3``)
* ``'DEBUG'`` (``-1``)
set for the duration of the method call only as the minimum
cut-off for the verboseness level of displayed output (log)
messages, regardless of the globally-configured `cf.log_level`.
Note that increasing numerical value corresponds to increasing
verbosity, with the exception of ``-1`` as a special case of
maximal and extreme verbosity.
Otherwise, if `None` (the default value), output messages will
be shown according to the value of the `cf.log_level` setting.
Overall, the higher a non-negative integer or equivalent string
that is set (up to a maximum of ``3``/``'DETAIL'``) for
increasing verbosity, the more description that is printed to
convey how constructs map to output netCDF dimensions, variables
and attributes.
warn_valid: `bool`, optional
If False then do not print a warning when writing
"out-of-range" data, as indicated by the values, if
present, of any of the ``valid_min``, ``valid_max`` or
``valid_range`` properties on field and metadata
constructs that have data. By default a warning is printed
if any such construct has any of these properties in
combination with out-of-range data.
The consequence of writing out-of-range data values is
that, by default, these values will be masked when the
file is subsequently read.
*Parameter example:*
If a construct has ``valid_max`` property with value
``100`` and data with maximum value ``999``, then the
resulting warning may be suppressed by setting
``warn_valid=False``.
.. versionadded:: 3.4.0
group: `bool`, optional
If False then create a "flat" netCDF file, i.e. one with
only the root group, regardless of any group structure
specified by the field constructs. By default any groups
defined by the netCDF interface of the field constructs
and its components will be created and populated.
.. versionadded:: 3.6.0
coordinates: `bool`, optional
If True then include CF-netCDF coordinate variable names
in the 'coordinates' attribute of output data
variables. By default only auxiliary and scalar coordinate
variables are included.
.. versionadded:: (cfdm) 3.7.0
HDF_chunksizes: deprecated at version 3.0.0
HDF chunk sizes may be set for individual constructs prior
to writing, instead. See `cf.Data.nc_set_hdf5_chunksizes`.
no_shuffle: deprecated at version 3.0.0
Use keyword *shuffle* instead.
unlimited: deprecated at version 3.0.0
Use method `DomainAxis.nc_set_unlimited` instead.
:Returns:
`None`
**Examples:**
There are further worked examples
:ref:`in the tutorial <Writing-to-a-netCDF-dataset>`.
>>> cf.write(f, 'file.nc')
>>> cf.write(f, 'file.nc', fmt='NETCDF3_CLASSIC')
>>> cf.write(f, 'file.nc', external='cell_measures.nc')
>>> cf.write(f, 'file.nc', Conventions='CMIP-6.2')
"""
if unlimited is not None:
_DEPRECATION_ERROR_FUNCTION_KWARGS(
"cf.write",
{"unlimited": unlimited},
"Use method 'DomainAxis.nc_set_unlimited' instead.",
) # pragma: no cover
if no_shuffle is not None:
_DEPRECATION_ERROR_FUNCTION_KWARGS(
"cf.write",
{"no_shuffle": no_shuffle},
"Use keyword 'shuffle' instead.",
) # pragma: no cover
if HDF_chunksizes is not None:
_DEPRECATION_ERROR_FUNCTION_KWARGS(
"cf.write",
{"HDF_chunksizes": HDF_chunksizes},
"HDF chunk sizes may be set for individual field constructs "
"prior to writing, instead.",
) # pragma: no cover
# Flatten the sequence of intput fields
fields = tuple(flat(fields))
mpi_on = False
if mpi_on:
path = abspath(filename)
paths = mpi_comm.allgather(path)
unique_paths = set(paths)
n_unique_paths = len(unique_paths)
if n_unique_paths == 1:
write_only_on_pe0 = True
elif n_unique_paths == mpi_size:
write_only_on_pe0 = False
else:
raise RuntimeError(
"write expects either one unique filename or as many as "
"there are PEs"
)
if write_only_on_pe0 and not mpi_rank == 0:
mpi_comm.Barrier()
return
# --- End: if
if fields:
# double and single
if datatype:
if single is not None:
raise ValueError("Can't set datatype and single")
if double is not None:
raise ValueError("Can't set datatype and double")
# --- End: if
if single is not None and double is not None:
raise ValueError(
"Can't set both the single and double " "parameters"
)
if single is not None and not single:
double = True
if double is not None and not double:
single = True
if single:
datatype = {
numpy.dtype(float): numpy.dtype("float32"),
numpy.dtype(int): numpy.dtype("int32"),
}
if double:
datatype = {
numpy.dtype("float32"): numpy.dtype(float),
numpy.dtype("int32"): numpy.dtype(int),
}
extra_write_vars = {
"cfa": False,
"cfa_options": {},
"reference_datetime": reference_datetime,
}
# CFA options
if fmt in ("CFA", "CFA4"):
extra_write_vars["cfa"] = True
fmt = "NETCDF4"
if cfa_options:
extra_write_vars["cfa_options"] = cfa_options
elif fmt == "CFA3":
extra_write_vars["cfa"] = True
fmt = "NETCDF3_CLASSIC"
if cfa_options:
extra_write_vars["cfa_options"] = cfa_options
# --- End: if
if extra_write_vars["cfa"]:
if Conventions:
if isinstance(Conventions, str):
Conventions = (Conventions,)
Conventions = tuple(Conventions) + ("CFA",)
else:
Conventions = "CFA"
# --- End: if
netcdf.write(
fields,
filename,
fmt=fmt,
mode=mode,
overwrite=overwrite,
global_attributes=global_attributes,
variable_attributes=variable_attributes,
file_descriptors=file_descriptors,
external=external,
Conventions=Conventions,
datatype=datatype,
least_significant_digit=least_significant_digit,
endian=endian,
compress=compress,
shuffle=shuffle,
fletcher32=fletcher32,
verbose=verbose,
string=string,
warn_valid=warn_valid,
group=group,
coordinates=coordinates,
extra_write_vars=extra_write_vars,
)
# --- End: if
if mpi_on and write_only_on_pe0:
mpi_comm.Barrier()