# PyASDF IO API

- http://seismicdata.github.io/pyasdf/
- https://github.com/krischer/cig_all_hands_2016_obspy_and_python_tutorial/tree/master/ObsPy

- https://gajira.atlassian.net/browse/PV-282


## Create a new ASDF file

In [1]:
import pyasdf
import glob

In [2]:
ASDF_FILE="new_ASDF_file.h5"

ds = pyasdf.ASDFDataSet(ASDF_FILE, compression="gzip-3") # default mode="a"


In [3]:
print(ds)

ASDF file [format version: 1.0.3]: 'new_ASDF_file.h5' (96.0 bytes)
	Contains 0 event(s)
	Contains waveform data from 0 station(s).


In [4]:
# Add some miniseed files

# myfiles = glob.glob("/Datasets/tmp_demult_mseed/*.mseed")
# for _i, filename in enumerate(myfiles):
#     print("Adding file %s:  %i of %i..."%(filename,_i +1, len(myfiles)))
#     ds.add_waveforms(filename, tag="raw_recording")

In [5]:
print (ds)

# want close this object?
# del(ds)

ASDF file [format version: 1.0.3]: 'new_ASDF_file.h5' (96.0 bytes)
	Contains 0 event(s)
	Contains waveform data from 0 station(s).


## Adding Station Information

In [6]:
ASDF_FILE="new_asdf_stationxml_files.h5"

ds = pyasdf.ASDFDataSet(ASDF_FILE, compression="gzip-3") # default mode="a"

In [7]:

# >>> files = glob.glob("/path/to/stations/*.xml")
# >>> for _i, filename in enumerate(files):
# ...     print("Adding file %i of %i ..." % (_i + 1, len(files)))
# ...     ds.add_stationxml(filename)

In [8]:
#ds.add_stationxml("/Datasets/networks_fdsnstationxml/inventory.xml")
#ds.add_stationxml("/g/data/ha3/Passive/_AusArray/OA/ASDF_cleaned/OA_stations_2017-2018.xml")

In [9]:
# adding individual stations
# files = glob.glob("/g/data/ha3/Passive/SHARED_DATA/GPS_Clock/StationXML_with_time_corrections2/OA*.xml")
# files = glob.glob("/Datasets/StationXML_with_time_corrections2/OA*.xml")
files = glob.glob("./OA.CF28_station_inv_modified_json.xml")
print (files)
for _i, filename in enumerate(files):
    print("Adding file %s %i of %i ..." % (filename, _i + 1, len(files)))
    ds.add_stationxml(filename)

['./OA.CF28_station_inv_modified_json.xml']
Adding file ./OA.CF28_station_inv_modified_json.xml 1 of 1 ...


In [10]:
print(ds)

ASDF file [format version: 1.0.3]: 'new_asdf_stationxml_files.h5' (8.1 KB)
	Contains 0 event(s)
	Contains waveform data from 1 station(s).


### Now check what are inside the ASDF

In [11]:
print(ds.waveforms.list())

['OA.CF28']


In [12]:
# print(ds.waveforms.OAFZ_CE22.StationXML)
print(ds.waveforms.OA_CF28.StationXML)

Inventory created at 2019-02-02T18:42:45.000000Z
	Created by: ObsPy 1.0.2
		    https://www.obspy.org
	Sending institution: Geoscience Australia
	Contains:
		Networks (1):
			OA
		Stations (3):
			OA.CF28 (CF28) (3x)
		Channels (9):
			OA.CF28.0M.HHZ (3x), OA.CF28.0M.HHN (3x), OA.CF28.0M.HHE (3x)


In [13]:
inv=ds.waveforms.OA_CF28.StationXML
inv.write('test_OA_CF28_inventory.xml', format='STATIONXML')
#           nsmap={'my_ns': 'http://test.org/xmlns/0.1',
#                  'somepage_ns': 'http://some-page.de/xmlns/1.0'})

In [14]:
# the extracted station xml file only have one xml node for station code="CE22", date ranges are merged by asdf

break

!cat test_OA_CF28_inventory.xml

SyntaxError: 'break' outside loop (<ipython-input-14-2e91fbc41d10>, line 6)

In [None]:
ds.add_stationxml("/Datasets/StationXML_with_time_corrections2/OAFZ.CE22.tmp4test.xml")

In [None]:
inv=ds.waveforms.OAFZ_CE22.StationXML
inv.write('test_OAFZ_CE22_inventory.xml', format='STATIONXML')

In [None]:
!cat test_OAFZ_CE22_inventory.xml

In [None]:
# compare with the original xml with GPS correction data. The GPS data was lost in ASDF?
!cat /Datasets/StationXML_with_time_corrections2/OAFZ.CE22.tmp4test.xml

In [None]:
break point



## Extract data from an existing ASDF file

In [None]:
ASDF_FILE="/g/data/ha3/GASeisDataArchive/DevSpace/2020.h5"
ASDF_FILE="/g/data/ha3/Passive/STRIPED_DATA/TEMP/OA_AUSARRAY1_rev1.h5"
#ASDF_FILE="/g/data/ha3/Passive/STRIPED_DATA/TEMP/OA_AUSARRAY_Yr2_S1.h5"  

ds = pyasdf.ASDFDataSet(ASDF_FILE, mode="r")

In [None]:
print(ds)

In [None]:
print(type(ds))

In [None]:
ds.waveforms.list()

In [None]:
# OA.BY22_station_inv_modified.xml  OA.CE28_station_inv_modified.xml
# OA.CE22_station_inv_modified.xml  OA.CF28_station_inv_modified.xml

print(ds.waveforms.OA_BY22)

In [None]:
print(ds.waveforms.OA_BY22.StationXML)

In [None]:
print (type(ds.waveforms.OA_BY22.StationXML) )

In [None]:
inv=ds.waveforms.OA_CF28.StationXML
inv.write('OA_CF28_inventory.xml', format='STATIONXML')
#           nsmap={'my_ns': 'http://test.org/xmlns/0.1',
#                  'somepage_ns': 'http://some-page.de/xmlns/1.0'})

In [None]:
Break

print(ds.waveforms.OA_CE22.raw_recording)

#ASDFValueError: All waveforms for station 'OA.CE22' and item 'raw_recording' would require '56639.45 MB of memory. The current limit is 4096.00 MB. Adjust by setting 'ASDFDataSet.single_item_read_limit_in_mb' or use a different method to read the waveform data.

In [None]:
for astation in ds.waveforms:  # takes a long time!!!
    print(astation)

In [None]:
# inv=ds.waveforms.AU_FITZ.StationXML
# inv.write('AU.FITZ_inventory.xml', format='STATIONXML')