# PyASDF IO API

- http://seismicdata.github.io/pyasdf/
- https://github.com/krischer/cig_all_hands_2016_obspy_and_python_tutorial/tree/master/ObsPy

- https://gajira.atlassian.net/browse/PV-282


## Create a new ASDF file

In [1]:
import pyasdf
import glob

In [2]:
ASDF_FILE="new_ASDF_file.h5"

ds = pyasdf.ASDFDataSet(ASDF_FILE, compression="gzip-3") # default mode="a"


In [3]:
print(ds)

ASDF file [format version: 1.0.3]: 'new_ASDF_file.h5' (96.0 bytes)
	Contains 0 event(s)
	Contains waveform data from 0 station(s).


In [4]:
# Add some miniseed files

# myfiles = glob.glob("/Datasets/tmp_demult_mseed/*.mseed")
# for _i, filename in enumerate(myfiles):
#     print("Adding file %s:  %i of %i..."%(filename,_i +1, len(myfiles)))
#     ds.add_waveforms(filename, tag="raw_recording")

In [5]:
print (ds)

# want close this object?
# del(ds)

ASDF file [format version: 1.0.3]: 'new_ASDF_file.h5' (96.0 bytes)
	Contains 0 event(s)
	Contains waveform data from 0 station(s).


## Adding Station Information

In [6]:
ASDF_FILE="new_asdf_stationxml_files.h5"

ds = pyasdf.ASDFDataSet(ASDF_FILE, compression="gzip-3") # default mode="a"

In [7]:

# >>> files = glob.glob("/path/to/stations/*.xml")
# >>> for _i, filename in enumerate(files):
# ...     print("Adding file %i of %i ..." % (_i + 1, len(files)))
# ...     ds.add_stationxml(filename)

In [8]:
#ds.add_stationxml("/Datasets/networks_fdsnstationxml/inventory.xml")
#ds.add_stationxml("/g/data/ha3/Passive/_AusArray/OA/ASDF_cleaned/OA_stations_2017-2018.xml")

In [9]:
# adding individual stations
# files = glob.glob("/g/data/ha3/Passive/SHARED_DATA/GPS_Clock/StationXML_with_time_corrections2/OA*.xml")
files = glob.glob("/Datasets/StationXML_with_time_corrections2/OA*.xml")
print (files)
for _i, filename in enumerate(files):
    print("Adding file %s %i of %i ..." % (filename, _i + 1, len(files)))
    ds.add_stationxml(filename)

['/Datasets/StationXML_with_time_corrections2/OA.CE28_station_inv_modified.xml', '/Datasets/StationXML_with_time_corrections2/OA.BY22_station_inv_modified.xml', '/Datasets/StationXML_with_time_corrections2/OAFZ.CE22.tmp4test.xml', '/Datasets/StationXML_with_time_corrections2/OA.CE22_station_inv_modified.xml', '/Datasets/StationXML_with_time_corrections2/OA.CF28_station_inv_modified.xml']
Adding file /Datasets/StationXML_with_time_corrections2/OA.CE28_station_inv_modified.xml 1 of 5 ...
Adding file /Datasets/StationXML_with_time_corrections2/OA.BY22_station_inv_modified.xml 2 of 5 ...
Adding file /Datasets/StationXML_with_time_corrections2/OAFZ.CE22.tmp4test.xml 3 of 5 ...
Adding file /Datasets/StationXML_with_time_corrections2/OA.CE22_station_inv_modified.xml 4 of 5 ...
Adding file /Datasets/StationXML_with_time_corrections2/OA.CF28_station_inv_modified.xml 5 of 5 ...


In [10]:
print(ds)

ASDF file [format version: 1.0.3]: 'new_asdf_stationxml_files.h5' (36.8 KB)
	Contains 0 event(s)
	Contains waveform data from 5 station(s).


### Now check what are inside the ASDF

In [11]:
print(ds.waveforms.OA_CE22)

Contents of the data set for station OA.CE22:
    - Has a StationXML file
    - 0 Waveform Tag(s):
        


In [12]:
print(ds.waveforms.OA_CE22.StationXML)

Inventory created at 2019-02-02T18:42:45.000000Z
	Created by: ObsPy 1.0.2
		    https://www.obspy.org
	Sending institution: Geoscience Australia
	Contains:
		Networks (1):
			OA
		Stations (1):
			OA.CE22 (CE22)
		Channels (3):
			OA.CE22.0M.HHZ, OA.CE22.0M.HHN, OA.CE22.0M.HHE


In [13]:
inv=ds.waveforms.OA_CE22.StationXML
inv.write('test_OA_CE22_inventory.xml', format='STATIONXML')
#           nsmap={'my_ns': 'http://test.org/xmlns/0.1',
#                  'somepage_ns': 'http://some-page.de/xmlns/1.0'})

In [14]:
# the extracted station xml file only have one xml node for station code="CE22", date ranges are merged by asdf

!cat test_OA_CE22_inventory.xml

<?xml version='1.0' encoding='UTF-8'?>
<FDSNStationXML xmlns="http://www.fdsn.org/xml/station/1" schemaVersion="1.0">
  <Source>Geoscience Australia</Source>
  <Module>ObsPy 1.0.2</Module>
  <ModuleURI>https://www.obspy.org</ModuleURI>
  <Created>2019-02-02T18:42:45</Created>
  <Network code="OA" startDate="2017-09-11T00:00:36" endDate="2018-11-28T23:06:20">
    <SelectedNumberStations>1</SelectedNumberStations>
    <Station code="CE22" startDate="2017-11-04T03:16:35" endDate="2018-11-18T20:23:20">
      <Latitude unit="DEGREES">-18.49507</Latitude>
      <Longitude unit="DEGREES">139.002731</Longitude>
      <Elevation unit="METERS">62.7</Elevation>
      <Site>
        <Name>CE22</Name>
      </Site>
      <Vault>Transportable Array</Vault>
      <CreationDate>2017-11-04T03:16:35</CreationDate>
      <TerminationDate>2018-06-06T01:02:24</TerminationDate>
      <SelectedNumberChannels>3</SelectedNumberChannels>
      <Channel code="HHZ" locationCode="0M">
        <

In [15]:
ds.add_stationxml("/Datasets/StationXML_with_time_corrections2/OAFZ.CE22.tmp4test.xml")

In [16]:
inv=ds.waveforms.OAFZ_CE22.StationXML
inv.write('test_OAFZ_CE22_inventory.xml', format='STATIONXML')

In [17]:
!cat test_OAFZ_CE22_inventory.xml

<?xml version='1.0' encoding='UTF-8'?>
<FDSNStationXML xmlns="http://www.fdsn.org/xml/station/1" schemaVersion="1.0">
  <Source>Geoscience Australia</Source>
  <Module>ObsPy 1.0.2</Module>
  <ModuleURI>https://www.obspy.org</ModuleURI>
  <Created>2019-02-02T18:42:45</Created>
  <Network code="OAFZ" startDate="2017-09-11T00:00:36" endDate="2018-11-28T23:06:20">
    <SelectedNumberStations>1</SelectedNumberStations>
    <Station code="CE22" startDate="2017-11-04T03:16:35" endDate="2018-06-06T01:02:24">
      <Latitude unit="DEGREES">-18.49507</Latitude>
      <Longitude unit="DEGREES">139.002731</Longitude>
      <Elevation unit="METERS">62.7</Elevation>
      <Site>
        <Name>CE22</Name>
      </Site>
      <Vault>Transportable Array</Vault>
      <CreationDate>2017-11-04T03:16:35</CreationDate>
      <TerminationDate>2018-06-06T01:02:24</TerminationDate>
      <SelectedNumberChannels>3</SelectedNumberChannels>
      <Channel code="HHZ" locationCode="0M">
       

In [18]:
# compare with the original xml with GPS correction data. The GPS data was lost in ASDF?
!cat /Datasets/StationXML_with_time_corrections2/OAFZ.CE22.tmp4test.xml

<?xml version='1.0' encoding='UTF-8'?>
<FDSNStationXML xmlns:GeoscienceAustralia="https://github.com/GeoscienceAustralia/hiperseis/xmlns/1.0" xmlns="http://www.fdsn.org/xml/station/1" schemaVersion="1.0">
  <Source>Geoscience Australia</Source>
  <Module>ObsPy 1.0.2</Module>
  <ModuleURI>https://www.obspy.org</ModuleURI>
  <Created>2019-02-02T18:42:45.000000Z</Created>
  <Network code="OAFZ" endDate="2018-11-28T23:06:20.000000Z" startDate="2017-09-11T00:00:36.000000Z">
    <Station code="CE22" endDate="2018-06-06T01:02:24.000000Z" startDate="2017-11-04T03:16:35.000000Z">
      <GeoscienceAustralia:gpsclockcorrection>net,sta,date,clock_correction
OA,CE22,2018-04-15,-0.062078925693916966
OA,CE22,2018-04-16,0.031089249233698407
OA,CE22,2018-04-17,0.12425742416131377
OA,CE22,2018-04-18,0.21742559908892917
OA,CE22,2018-04-19,0.31059377401654453
OA,CE22,2018-04-20,0.4037619489441599
OA,CE22,2018-04-21,0.49693012387177526
OA,CE22,2018-04-22,0.5900982987993908
OA,CE22,2018-04-

In [19]:
break point



SyntaxError: invalid syntax (<ipython-input-19-102ec1852100>, line 1)

## Extract data from an exisitng ASDF file

In [None]:
ASDF_FILE="/g/data/ha3/GASeisDataArchive/DevSpace/2020.h5"
ASDF_FILE="/g/data/ha3/Passive/STRIPED_DATA/TEMP/OA_AUSARRAY1_rev1.h5"
#ASDF_FILE="/g/data/ha3/Passive/STRIPED_DATA/TEMP/OA_AUSARRAY_Yr2_S1.h5"  

ds = pyasdf.ASDFDataSet(ASDF_FILE, mode="r")

In [None]:
print(ds)

In [None]:
print(type(ds))

In [None]:
ds.waveforms.list()

In [None]:
# OA.BY22_station_inv_modified.xml  OA.CE28_station_inv_modified.xml
# OA.CE22_station_inv_modified.xml  OA.CF28_station_inv_modified.xml

print(ds.waveforms.OA_BY22)

In [None]:
print(ds.waveforms.OA_BY22.StationXML)

In [None]:
print (type(ds.waveforms.OA_BY22.StationXML) )

In [None]:
inv=ds.waveforms.OA_CF28.StationXML
inv.write('OA_CF28_inventory.xml', format='STATIONXML')
#           nsmap={'my_ns': 'http://test.org/xmlns/0.1',
#                  'somepage_ns': 'http://some-page.de/xmlns/1.0'})

In [None]:
Break

print(ds.waveforms.OA_CE22.raw_recording)

#ASDFValueError: All waveforms for station 'OA.CE22' and item 'raw_recording' would require '56639.45 MB of memory. The current limit is 4096.00 MB. Adjust by setting 'ASDFDataSet.single_item_read_limit_in_mb' or use a different method to read the waveform data.

In [None]:
for astation in ds.waveforms:  # takes a long time!!!
    print(astation)

In [None]:
# inv=ds.waveforms.AU_FITZ.StationXML
# inv.write('AU.FITZ_inventory.xml', format='STATIONXML')