-
Notifications
You must be signed in to change notification settings - Fork 12
/
stagyyparsers.py
891 lines (770 loc) · 31.9 KB
/
stagyyparsers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
"""Parsers of StagYY output files.
Note:
These functions are low level utilities. You should not use these unless
you know what you are doing. To access StagYY output data, use an instance
of :class:`~stagpy.stagyydata.StagyyData`.
"""
from functools import partial
from itertools import product, repeat
from operator import itemgetter
from xml.etree import ElementTree as xmlET
import re
import numpy as np
import pandas as pd
import h5py
from .error import ParsingError
from .phyvars import FIELD_FILES_H5, SFIELD_FILES_H5
def _tidy_names(names, nnames, extra_names=None):
"""Truncate or extend names so that its len is nnames.
The list is modified, this function returns nothing.
Args:
names (list): list of names.
nnames (int): desired number of names.
extra_names (list of str): list of names to be used to extend the list
if needed. If this list isn't provided, a range is used instead.
"""
if len(names) < nnames and extra_names is not None:
names.extend(extra_names)
names.extend(range(nnames - len(names)))
del names[nnames:]
def time_series(timefile, colnames):
"""Read temporal series text file.
If :data:`colnames` is too long, it will be truncated. If it is too short,
additional numeric column names from 0 to N-1 will be attributed to the N
extra columns present in :data:`timefile`.
Args:
timefile (:class:`pathlib.Path`): path of the time.dat file.
colnames (list of names): names of the variables expected in
:data:`timefile` (may be modified).
Returns:
:class:`pandas.DataFrame`:
Time series, with the variables in columns and the time steps in
rows.
"""
if not timefile.is_file():
return None
data = pd.read_csv(timefile, delim_whitespace=True, dtype=str,
header=None, skiprows=1, index_col=0,
engine='c', memory_map=True,
error_bad_lines=False, warn_bad_lines=False)
data = data.apply(pd.to_numeric, raw=True, errors='coerce')
# detect useless lines produced when run is restarted
rows_to_del = []
irow = len(data) - 1
while irow > 0:
iprev = irow - 1
while iprev >= 0 and data.index[irow] <= data.index[iprev]:
rows_to_del.append(iprev)
iprev -= 1
irow = iprev
if rows_to_del:
rows_to_keep = set(range(len(data))) - set(rows_to_del)
data = data.take(list(rows_to_keep))
ncols = data.shape[1]
_tidy_names(colnames, ncols)
data.columns = colnames
return data
def time_series_h5(timefile, colnames):
"""Read temporal series HDF5 file.
If :data:`colnames` is too long, it will be truncated. If it is too short,
additional column names will be deduced from the content of the file.
Args:
timefile (:class:`pathlib.Path`): path of the TimeSeries.h5 file.
colnames (list of names): names of the variables expected in
:data:`timefile` (may be modified).
Returns:
:class:`pandas.DataFrame`:
Time series, with the variables in columns and the time steps in
rows.
"""
if not timefile.is_file():
return None
with h5py.File(timefile, 'r') as h5f:
dset = h5f['tseries']
_, ncols = dset.shape
ncols -= 1 # first is istep
h5names = map(bytes.decode, h5f['names'][len(colnames) + 1:])
_tidy_names(colnames, ncols, h5names)
data = dset[()]
pdf = pd.DataFrame(data[:, 1:],
index=np.int_(data[:, 0]), columns=colnames)
# remove duplicated lines in case of restart
return pdf.loc[~pdf.index.duplicated(keep='last')]
def _extract_rsnap_isteps(rproffile):
"""Extract istep and compute list of rows to delete."""
step_regex = re.compile(r'^\*+step:\s*(\d+) ; time =\s*(\S+)')
isteps = [] # list of (istep, time, nz)
rows_to_del = set()
line = ' '
with rproffile.open() as stream:
while line[0] != '*':
line = stream.readline()
match = step_regex.match(line)
istep = int(match.group(1))
time = float(match.group(2))
nlines = 0
iline = 0
for line in stream:
if line[0] == '*':
isteps.append((istep, time, nlines))
match = step_regex.match(line)
istep = int(match.group(1))
time = float(match.group(2))
nlines = 0
# remove useless lines produced when run is restarted
nrows_to_del = 0
while isteps and istep <= isteps[-1][0]:
nrows_to_del += isteps.pop()[-1]
rows_to_del = rows_to_del.union(
range(iline - nrows_to_del, iline))
else:
nlines += 1
iline += 1
isteps.append((istep, time, nlines))
return isteps, rows_to_del
def rprof(rproffile, colnames):
"""Extract radial profiles data.
If :data:`colnames` is too long, it will be truncated. If it is too short,
additional numeric column names from 0 to N-1 will be attributed to the N
extra columns present in :data:`timefile`.
Args:
rproffile (:class:`pathlib.Path`): path of the rprof.dat file.
colnames (list of names): names of the variables expected in
:data:`rproffile` (may be modified).
Returns:
tuple of :class:`pandas.DataFrame`: (profs, times)
:data:`profs` are the radial profiles, with the variables in
columns and rows double-indexed with the time step and the radial
index of numerical cells.
:data:`times` is the dimensionless time indexed by time steps.
"""
if not rproffile.is_file():
return None, None
data = pd.read_csv(rproffile, delim_whitespace=True, dtype=str,
header=None, comment='*', skiprows=1,
engine='c', memory_map=True,
error_bad_lines=False, warn_bad_lines=False)
data = data.apply(pd.to_numeric, raw=True, errors='coerce')
isteps, rows_to_del = _extract_rsnap_isteps(rproffile)
if rows_to_del:
rows_to_keep = set(range(len(data))) - rows_to_del
data = data.take(list(rows_to_keep))
id_arr = [[], []]
for istep, _, n_z in isteps:
id_arr[0].extend(repeat(istep, n_z))
id_arr[1].extend(range(n_z))
data.index = id_arr
ncols = data.shape[1]
_tidy_names(colnames, ncols)
data.columns = colnames
df_times = pd.DataFrame(list(map(itemgetter(1), isteps)),
index=map(itemgetter(0), isteps))
return data, df_times
def rprof_h5(rproffile, colnames):
"""Extract radial profiles data.
If :data:`colnames` is too long, it will be truncated. If it is too short,
additional column names will be deduced from the content of the file.
Args:
rproffile (:class:`pathlib.Path`): path of the rprof.dat file.
colnames (list of names): names of the variables expected in
:data:`rproffile`.
Returns:
tuple of :class:`pandas.DataFrame`: (profs, times)
:data:`profs` are the radial profiles, with the variables in
columns and rows double-indexed with the time step and the radial
index of numerical cells.
:data:`times` is the dimensionless time indexed by time steps.
"""
if not rproffile.is_file():
return None, None
isteps = []
with h5py.File(rproffile, 'r') as h5f:
dnames = sorted(dname for dname in h5f.keys()
if dname.startswith('rprof_'))
ncols = h5f['names'].shape[0]
h5names = map(bytes.decode, h5f['names'][len(colnames):])
_tidy_names(colnames, ncols, h5names)
data = np.zeros((0, ncols))
for dname in dnames:
dset = h5f[dname]
data = np.concatenate((data, dset[()]))
isteps.append((dset.attrs['istep'], dset.attrs['time'],
dset.shape[0]))
id_arr = [[], []]
for istep, _, n_z in isteps:
id_arr[0].extend(repeat(istep, n_z))
id_arr[1].extend(range(n_z))
df_data = pd.DataFrame(data, index=id_arr, columns=colnames)
df_times = pd.DataFrame(list(map(itemgetter(1), isteps)),
index=map(itemgetter(0), isteps))
return df_data, df_times
def _clean_names_refstate(names):
"""Uniformization of refstate profile names."""
to_clean = {
'Tref': 'T',
'rhoref': 'rho',
'tcond': 'Tcond',
}
return [to_clean.get(n, n) for n in names]
def refstate(reffile, ncols=8):
"""Extract reference state profiles.
Args:
reffile (:class:`pathlib.Path`): path of the refstate file.
ncols (int): number of columns.
Returns:
tuple of list: (syst, adia)
:data:`syst` is a list of list of :class:`pandas.DataFrame`
containing the reference state profiles for each system and
each phase in these systems.
:data:`adia` is a list of :class:`pandas.DataFrame` containing
the adiabatic reference state profiles for each system, the last
item being the combined adiabat.
"""
if not reffile.is_file():
return None, None
data = pd.read_csv(reffile, delim_whitespace=True, dtype=str,
header=None, names=range(ncols),
engine='c', memory_map=True,
error_bad_lines=False, warn_bad_lines=False)
data = data.apply(pd.to_numeric, raw=True, errors='coerce')
# drop lines corresponding to metadata
data.dropna(subset=[0], inplace=True)
isystem = -1
systems = [[]]
adiabats = []
with reffile.open() as rsf:
for line in rsf:
line = line.lstrip()
if line.startswith('SYSTEM'):
isystem += 1
if isystem > 0:
systems.append([])
elif line.startswith('z'):
systems[isystem].append(_clean_names_refstate(line.split()))
elif line.startswith('ADIABAT') or line.startswith('COMBINED'):
line = line.partition(':')[-1]
adiabats.append(_clean_names_refstate(line.split()))
nprofs = sum(map(len, systems)) + len(adiabats)
nzprof = len(data) // nprofs
iprof = 0
syst = []
adia = []
for isys, layers in enumerate(systems):
syst.append([])
for layer in layers:
ibgn = iprof * nzprof
iend = ibgn + nzprof
syst[isys].append(
pd.DataFrame(data.iloc[ibgn:iend, :len(layer)].values,
columns=layer))
iprof += 1
if len(layers) > 1:
ibgn = iprof * nzprof
iend = ibgn + nzprof
cols = adiabats.pop(0)
adia.append(
pd.DataFrame(data.iloc[ibgn:iend, :len(cols)].values,
columns=cols))
iprof += 1
else:
adia.append(syst[isys][0])
ibgn = iprof * nzprof
iend = ibgn + nzprof
cols = adiabats.pop(0)
adia.append(pd.DataFrame(data.iloc[ibgn:iend, :len(cols)].values,
columns=cols))
return syst, adia
def _readbin(fid, fmt='i', nwords=1, file64=False, unpack=True):
"""Read n words of 4 or 8 bytes with fmt format.
fmt: 'i' or 'f' or 'b' (integer or float or bytes)
4 or 8 bytes: depends on header
Return an array of elements if more than one element.
Default: read 1 word formatted as an integer.
"""
if fmt in 'if':
fmt += '8' if file64 else '4'
elts = np.fromfile(fid, fmt, nwords)
if unpack and len(elts) == 1:
elts = elts[0]
return elts
def fields(fieldfile, only_header=False, only_istep=False):
"""Extract fields data.
Args:
fieldfile (:class:`pathlib.Path`): path of the binary field file.
only_header (bool): when True (and :data:`only_istep` is False), only
:data:`header` is returned.
only_istep (bool): when True, only :data:`istep` is returned.
Returns:
depends on flags.: :obj:`int`: istep
If :data:`only_istep` is True, this function returns the time step
at which the binary file was written.
:obj:`dict`: header
Else, if :data:`only_header` is True, this function returns a dict
containing the header informations of the binary file.
:class:`numpy.array`: fields
Else, this function returns the tuple :data:`(header, fields)`.
:data:`fields` is an array of scalar fields indexed by variable,
x-direction, y-direction, z-direction, block.
"""
# something to skip header?
if not fieldfile.is_file():
return None
header = {}
with fieldfile.open('rb') as fid:
readbin = partial(_readbin, fid)
magic = readbin()
if magic > 8000: # 64 bits
magic -= 8000
readbin() # need to read 4 more bytes
readbin = partial(readbin, file64=True)
# check nb components
nval = 1
sfield = False
if magic > 400:
nval = 4
elif magic > 300:
nval = 3
elif magic > 100:
sfield = True
magic %= 100
# extra ghost point in horizontal direction
header['xyp'] = int(magic >= 9 and nval == 4)
# total number of values in relevant space basis
# (e1, e2, e3) = (theta, phi, radius) in spherical geometry
# = (x, y, z) in cartesian geometry
header['nts'] = readbin(nwords=3)
# number of blocks, 2 for yinyang or cubed sphere
header['ntb'] = readbin() if magic >= 7 else 1
# aspect ratio
header['aspect'] = readbin('f', 2)
# number of parallel subdomains
header['ncs'] = readbin(nwords=3) # (e1, e2, e3) space
header['ncb'] = readbin() if magic >= 8 else 1 # blocks
# r - coordinates
# rgeom[0:self.nrtot+1, 0] are edge radial position
# rgeom[0:self.nrtot, 1] are cell-center radial position
if magic >= 2:
header['rgeom'] = readbin('f', header['nts'][2] * 2 + 1)
else:
header['rgeom'] = np.array(range(0, header['nts'][2] * 2 + 1))\
* 0.5 / header['nts'][2]
header['rgeom'] = np.resize(header['rgeom'], (header['nts'][2] + 1, 2))
header['rcmb'] = readbin('f') if magic >= 7 else None
header['ti_step'] = readbin() if magic >= 3 else 0
if only_istep:
return header['ti_step']
header['ti_ad'] = readbin('f') if magic >= 3 else 0
header['erupta_total'] = readbin('f') if magic >= 5 else 0
header['bot_temp'] = readbin('f') if magic >= 6 else 1
if magic >= 4:
header['e1_coord'] = readbin('f', header['nts'][0])
header['e2_coord'] = readbin('f', header['nts'][1])
header['e3_coord'] = readbin('f', header['nts'][2])
else:
# could construct them from other info
raise ParsingError(fieldfile,
'magic >= 4 expected to get grid geometry')
if only_header:
return header
# READ FIELDS
# number of points in (e1, e2, e3) directions PER CPU
npc = header['nts'] // header['ncs']
# number of blocks per cpu
nbk = header['ntb'] // header['ncb']
# number of values per 'read' block
npi = (npc[0] + header['xyp']) * (npc[1] + header['xyp']) * npc[2] * \
nbk * nval
header['scalefac'] = readbin('f') if nval > 1 else 1
flds = np.zeros((nval,
header['nts'][0] + header['xyp'],
header['nts'][1] + header['xyp'],
header['nts'][2],
header['ntb']))
# loop over parallel subdomains
for icpu in product(range(header['ncb']),
range(header['ncs'][2]),
range(header['ncs'][1]),
range(header['ncs'][0])):
# read the data for one CPU
data_cpu = readbin('f', npi) * header['scalefac']
# icpu is (icpu block, icpu z, icpu y, icpu x)
# data from file is transposed to obtained a field
# array indexed with (x, y, z, block), as in StagYY
flds[:,
icpu[3] * npc[0]:(icpu[3] + 1) * npc[0] + header['xyp'], # x
icpu[2] * npc[1]:(icpu[2] + 1) * npc[1] + header['xyp'], # y
icpu[1] * npc[2]:(icpu[1] + 1) * npc[2], # z
icpu[0] * nbk:(icpu[0] + 1) * nbk # block
] = np.transpose(data_cpu.reshape(
(nbk, npc[2], npc[1] + header['xyp'],
npc[0] + header['xyp'], nval)))
if sfield:
# for surface fields, variables are written along z direction
flds = np.swapaxes(flds, 0, 3)
return header, flds
def tracers(tracersfile):
"""Extract tracers data.
Args:
tracersfile (:class:`pathlib.Path`): path of the binary tracers file.
Returns:
dict of list of numpy.array:
Tracers data organized by attribute and block.
"""
if not tracersfile.is_file():
return None
tra = {}
with tracersfile.open('rb') as fid:
readbin = partial(_readbin, fid)
magic = readbin()
if magic > 8000: # 64 bits
magic -= 8000
readbin()
readbin = partial(readbin, file64=True)
if magic < 100:
raise ParsingError(tracersfile,
'magic > 100 expected to get tracervar info')
nblk = magic % 100
readbin('f', 2) # aspect ratio
readbin() # istep
readbin('f') # time
ninfo = readbin()
ntra = readbin(nwords=nblk, unpack=False)
readbin('f') # tracer ideal mass
curv = readbin()
if curv:
readbin('f') # r_cmb
infos = [] # list of info names
for _ in range(ninfo):
infos.append(b''.join(readbin('b', 16)).strip().decode())
tra[infos[-1]] = []
if magic > 200:
ntrace_elt = readbin()
if ntrace_elt > 0:
readbin('f', ntrace_elt) # outgassed
for ntrab in ntra: # blocks
data = readbin('f', ntrab * ninfo)
for idx, info in enumerate(infos):
tra[info].append(data[idx::ninfo])
return tra
def _read_group_h5(filename, groupname):
"""Return group content.
Args:
filename (:class:`pathlib.Path`): path of hdf5 file.
groupname (str): name of group to read.
Returns:
:class:`numpy.array`: content of group.
"""
with h5py.File(filename, 'r') as h5f:
data = h5f[groupname][()]
return data # need to be reshaped
def _make_3d(field, twod):
"""Add a dimension to field if necessary.
Args:
field (numpy.array): the field that need to be 3d.
twod (str): 'XZ', 'YZ' or None depending on what is relevant.
Returns:
numpy.array: reshaped field.
"""
shp = list(field.shape)
if twod and 'X' in twod:
shp.insert(1, 1)
elif twod:
shp.insert(0, 1)
return field.reshape(shp)
def _ncores(meshes, twod):
"""Compute number of nodes in each direction."""
nnpb = len(meshes) # number of nodes per block
nns = [1, 1, 1] # number of nodes in x, y, z directions
if twod is None or 'X' in twod:
while (nnpb > 1 and
meshes[nns[0]]['X'][0, 0, 0] ==
meshes[nns[0] - 1]['X'][-1, 0, 0]):
nns[0] += 1
nnpb -= 1
cpu = lambda icy: icy * nns[0]
if twod is None or 'Y' in twod:
while (nnpb > 1 and
meshes[cpu(nns[1])]['Y'][0, 0, 0] ==
meshes[cpu(nns[1] - 1)]['Y'][0, -1, 0]):
nns[1] += 1
nnpb -= nns[0]
cpu = lambda icz: icz * nns[0] * nns[1]
while (nnpb > 1 and
meshes[cpu(nns[2])]['Z'][0, 0, 0] ==
meshes[cpu(nns[2] - 1)]['Z'][0, 0, -1]):
nns[2] += 1
nnpb -= nns[0] * nns[1]
return np.array(nns)
def _conglomerate_meshes(meshin, header):
"""Conglomerate meshes from several cores into one."""
meshout = {}
npc = header['nts'] // header['ncs']
shp = [val + 1 if val != 1 else 1 for val in header['nts']]
x_p = int(shp[0] != 1)
y_p = int(shp[1] != 1)
for coord in meshin[0]:
meshout[coord] = np.zeros(shp)
for icore in range(np.prod(header['ncs'])):
ifs = [icore // np.prod(header['ncs'][:i]) % header['ncs'][i] * npc[i]
for i in range(3)]
for coord, mesh in meshin[icore].items():
meshout[coord][ifs[0]:ifs[0] + npc[0] + x_p,
ifs[1]:ifs[1] + npc[1] + y_p,
ifs[2]:ifs[2] + npc[2] + 1] = mesh
return meshout
def _read_coord_h5(files, shapes, header, twod):
"""Read all coord hdf5 files of a snapshot.
Args:
files (list of pathlib.Path): list of NodeCoordinates files of
a snapshot.
shapes (list of (int,int)): shape of mesh grids.
header (dict): geometry info.
twod (str): 'XZ', 'YZ' or None depending on what is relevant.
"""
meshes = []
for h5file, shape in zip(files, shapes):
meshes.append({})
with h5py.File(h5file, 'r') as h5f:
for coord, mesh in h5f.items():
# for some reason, the array is transposed!
meshes[-1][coord] = mesh[()].reshape(shape).T
meshes[-1][coord] = _make_3d(meshes[-1][coord], twod)
header['ncs'] = _ncores(meshes, twod)
header['nts'] = list((meshes[0]['X'].shape[i] - 1) * header['ncs'][i]
for i in range(3))
header['nts'] = np.array([max(1, val) for val in header['nts']])
# meshes could also be defined in legacy parser, so that these can be used
# in geometry setup
meshes = _conglomerate_meshes(meshes, header)
if np.any(meshes['Z'][:, :, 0] != 0):
# spherical
header['x_mesh'] = np.copy(meshes['Y']) # annulus geometry...
header['y_mesh'] = np.copy(meshes['Z'])
header['z_mesh'] = np.copy(meshes['X'])
header['r_mesh'] = np.sqrt(header['x_mesh']**2 + header['y_mesh']**2 +
header['z_mesh']**2)
header['t_mesh'] = np.arccos(header['z_mesh'] / header['r_mesh'])
header['p_mesh'] = np.roll(
np.arctan2(header['y_mesh'], -header['x_mesh']) + np.pi, -1, 1)
header['e1_coord'] = header['t_mesh'][:, 0, 0]
header['e2_coord'] = header['p_mesh'][0, :, 0]
header['e3_coord'] = header['r_mesh'][0, 0, :]
else:
header['e1_coord'] = meshes['X'][:, 0, 0]
header['e2_coord'] = meshes['Y'][0, :, 0]
header['e3_coord'] = meshes['Z'][0, 0, :]
header['aspect'] = (header['e1_coord'][-1] - header['e2_coord'][0],
header['e1_coord'][-1] - header['e2_coord'][0])
header['rcmb'] = header['e3_coord'][0]
if header['rcmb'] == 0:
header['rcmb'] = -1
else:
header['e3_coord'] = header['e3_coord'] - header['rcmb']
if twod is None or 'X' in twod:
header['e1_coord'] = header['e1_coord'][:-1]
if twod is None or 'Y' in twod:
header['e2_coord'] = header['e2_coord'][:-1]
header['e3_coord'] = header['e3_coord'][:-1]
def _get_dim(data_item):
"""Extract shape of data item."""
return tuple(map(int, data_item.get('Dimensions').split()))
def _get_field(xdmf_file, data_item):
"""Extract field from data item."""
shp = _get_dim(data_item)
h5file, group = data_item.text.strip().split(':/', 1)
icore = int(group.split('_')[-2]) - 1
fld = _read_group_h5(xdmf_file.parent / h5file, group).reshape(shp)
return icore, fld
def _maybe_get(elt, item, info, conversion=None):
"""Extract and convert info if item is present."""
maybe_item = elt.find(item)
if maybe_item is not None:
maybe_item = maybe_item.get(info)
if conversion is not None:
maybe_item = conversion(maybe_item)
return maybe_item
def read_geom_h5(xdmf_file, snapshot):
"""Extract geometry information from hdf5 files.
Args:
xdmf_file (:class:`pathlib.Path`): path of the xdmf file.
snapshot (int): snapshot number.
Returns:
(dict, root): geometry information and root of xdmf document.
"""
header = {}
xdmf_root = xmlET.parse(str(xdmf_file)).getroot()
if snapshot is None:
return None, xdmf_root
# Domain, Temporal Collection, Snapshot
# should check that this is indeed the required snapshot
elt_snap = xdmf_root[0][0][snapshot]
header['ti_ad'] = float(elt_snap.find('Time').get('Value'))
header['mo_lambda'] = _maybe_get(elt_snap, 'mo_lambda', 'Value', float)
header['mo_thick_sol'] = _maybe_get(elt_snap, 'mo_thick_sol', 'Value',
float)
header['ntb'] = 1
coord_h5 = [] # all the coordinate files
coord_shape = [] # shape of meshes
twod = None
for elt_subdomain in elt_snap.findall('Grid'):
if elt_subdomain.get('Name').startswith('meshYang'):
header['ntb'] = 2
break # iterate only through meshYin
elt_geom = elt_subdomain.find('Geometry')
if elt_geom.get('Type') == 'X_Y' and twod is None:
twod = ''
for data_item in elt_geom.findall('DataItem'):
coord = data_item.text.strip()[-1]
if coord in 'XYZ':
twod += coord
data_item = elt_geom.find('DataItem')
coord_shape.append(_get_dim(data_item))
coord_h5.append(
xdmf_file.parent / data_item.text.strip().split(':/', 1)[0])
_read_coord_h5(coord_h5, coord_shape, header, twod)
return header, xdmf_root
def _to_spherical(flds, header):
"""Convert vector field to spherical."""
cth = np.cos(header['t_mesh'][:, :, :-1])
sth = np.sin(header['t_mesh'][:, :, :-1])
cph = np.cos(header['p_mesh'][:, :, :-1])
sph = np.sin(header['p_mesh'][:, :, :-1])
fout = np.copy(flds)
fout[0] = cth * cph * flds[0] + cth * sph * flds[1] - sth * flds[2]
fout[1] = sph * flds[0] - cph * flds[1] # need to take the opposite here
fout[2] = sth * cph * flds[0] + sth * sph * flds[1] + cth * flds[2]
return fout
def _flds_shape(fieldname, header):
"""Compute shape of flds variable."""
shp = list(header['nts'])
shp.append(header['ntb'])
if len(FIELD_FILES_H5.get(fieldname, [])) == 3:
shp.insert(0, 3)
# extra points
header['xp'] = int(header['nts'][0] != 1)
shp[1] += header['xp']
header['yp'] = int(header['nts'][1] != 1)
shp[2] += header['yp']
header['zp'] = 1
header['xyp'] = 1
else:
shp.insert(0, 1)
header['xp'] = 0
header['yp'] = 0
header['zp'] = 0
header['xyp'] = 0
return shp
def _post_read_flds(flds, header):
"""Process flds to handle sphericity."""
if flds.shape[0] >= 3 and header['rcmb'] > 0:
# spherical vector
header['p_mesh'] = np.roll(
np.arctan2(header['y_mesh'], header['x_mesh']), -1, 1)
for ibk in range(header['ntb']):
flds[..., ibk] = _to_spherical(flds[..., ibk], header)
header['p_mesh'] = np.roll(
np.arctan2(header['y_mesh'], -header['x_mesh']) + np.pi, -1, 1)
return flds
def read_field_h5(xdmf_file, fieldname, snapshot, header=None):
"""Extract field data from hdf5 files.
Args:
xdmf_file (:class:`pathlib.Path`): path of the xdmf file.
fieldname (str): name of field to extract.
snapshot (int): snapshot number.
header (dict): geometry information.
Returns:
(dict, numpy.array): geometry information and field data. None
is returned if data is unavailable.
"""
if header is None:
header, xdmf_root = read_geom_h5(xdmf_file, snapshot)
else:
xdmf_root = xmlET.parse(str(xdmf_file)).getroot()
npc = header['nts'] // header['ncs'] # number of grid point per node
flds = np.zeros(_flds_shape(fieldname, header))
data_found = False
for elt_subdomain in xdmf_root[0][0][snapshot].findall('Grid'):
ibk = int(elt_subdomain.get('Name').startswith('meshYang'))
for data_attr in elt_subdomain.findall('Attribute'):
if data_attr.get('Name') != fieldname:
continue
icore, fld = _get_field(xdmf_file, data_attr.find('DataItem'))
# for some reason, the field is transposed
fld = fld.T
shp = fld.shape
if shp[-1] == 1 and header['nts'][0] == 1: # YZ
fld = fld.reshape((shp[0], 1, shp[1], shp[2]))
if header['rcmb'] < 0:
fld = fld[(2, 0, 1), ...]
elif shp[-1] == 1: # XZ
fld = fld.reshape((shp[0], shp[1], 1, shp[2]))
if header['rcmb'] < 0:
fld = fld[(1, 2, 0), ...]
elif fieldname in SFIELD_FILES_H5:
fld = fld.reshape((1, npc[0], npc[1], 1))
elif header['nts'][1] == 1: # cart XZ
fld = fld.reshape((1, shp[0], 1, shp[1]))
ifs = [icore // np.prod(header['ncs'][:i]) % header['ncs'][i] *
npc[i] for i in range(3)]
if fieldname in SFIELD_FILES_H5:
ifs[2] = 0
npc[2] = 1
if header['zp']: # remove top row
fld = fld[:, :, :, :-1]
flds[:,
ifs[0]:ifs[0] + npc[0] + header['xp'],
ifs[1]:ifs[1] + npc[1] + header['yp'],
ifs[2]:ifs[2] + npc[2],
ibk] = fld
data_found = True
flds = _post_read_flds(flds, header)
if fieldname in SFIELD_FILES_H5:
# remove z component
flds = flds[..., 0, :]
return (header, flds) if data_found else None
def read_tracers_h5(xdmf_file, infoname, snapshot, position):
"""Extract tracers data from hdf5 files.
Args:
xdmf_file (:class:`pathlib.Path`): path of the xdmf file.
infoname (str): name of information to extract.
snapshot (int): snapshot number.
position (bool): whether to extract position of tracers.
Returns:
dict of list of numpy.array:
Tracers data organized by attribute and block.
"""
xdmf_root = xmlET.parse(str(xdmf_file)).getroot()
tra = {}
tra[infoname] = [{}, {}] # two blocks, ordered by cores
if position:
for axis in 'xyz':
tra[axis] = [{}, {}]
for elt_subdomain in xdmf_root[0][0][snapshot].findall('Grid'):
ibk = int(elt_subdomain.get('Name').startswith('meshYang'))
if position:
for data_attr in elt_subdomain.findall('Geometry'):
for data_item, axis in zip(data_attr.findall('DataItem'),
'xyz'):
icore, data = _get_field(xdmf_file, data_item)
tra[axis][ibk][icore] = data
for data_attr in elt_subdomain.findall('Attribute'):
if data_attr.get('Name') != infoname:
continue
icore, data = _get_field(xdmf_file, data_attr.find('DataItem'))
tra[infoname][ibk][icore] = data
for info in tra:
tra[info] = [trab for trab in tra[info] if trab] # remove empty blocks
for iblk, trab in enumerate(tra[info]):
tra[info][iblk] = np.concatenate([trab[icore]
for icore in range(len(trab))])
return tra
def read_time_h5(h5folder):
"""Iterate through (isnap, istep) recorded in h5folder/'time_botT.h5'.
Args:
h5folder (:class:`pathlib.Path`): directory of HDF5 output files.
Yields:
tuple of int: (isnap, istep).
"""
with h5py.File(h5folder / 'time_botT.h5', 'r') as h5f:
for name, dset in h5f.items():
yield int(name[-5:]), int(dset[2])