# Use Obspy to read and write station inventory files

- Read station XML files with extra metadata in Json format 
- modify existing metadata values
- add new metadata items such as Equipments

## Fei Zhang

* CreationDate: 2020-07-14
* LastUpdated:  2020-09-10

# Ref:

* https://docs.obspy.org/tutorial/code_snippets/stationxml_file_from_scratch.html
* https://docs.obspy.org/packages/autogen/obspy.core.inventory.util.Equipment.__init__.html


In [1]:
!pwd

/Softlab/Githubz/hiperseis/notebooks


In [2]:
import os
import sys
from obspy import read_inventory
from obspy.core import UTCDateTime

dirpath = "/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML"  # if in NCI
dirpath="/Datasets/Station_Extra_Metadata/NewInventoryXML"  # the dir where input_station xml file located
xmlfile_name= "OA.CE28_station_metadata_JSON.xml"  

input_station_xml = os.path.join(dirpath, xmlfile_name)


# VS extracted from ASDF
#input_station_xml ="OA.CF28_station_inv_modified_json_extracted_from_ASDF.xml"

our_inv = read_inventory(input_station_xml,format='STATIONXML')


stn_meta = our_inv.networks[0].stations


In [3]:
print(type(stn_meta[0].extra))
print(stn_meta[0].extra)

<class 'obspy.core.util.attribdict.AttribDict'>
AttribDict({'GAMetadata': AttribDict({'namespace': 'https://github.com/GeoscienceAustralia/hiperseis', 'value': '{\n  "network": "OA",\n  "station": "CE28",\n  "GPS_CORRECTION": [\n    {\n      "date": "2017-12-01",\n      "clock_correction": 0.269773702\n    },\n    {\n      "date": "2017-12-02",\n      "clock_correction": 0.2650179231\n    },\n    {\n      "date": "2017-12-03",\n      "clock_correction": 0.2602621441\n    },\n    {\n      "date": "2017-12-04",\n      "clock_correction": 0.2555063652\n    },\n    {\n      "date": "2017-12-05",\n      "clock_correction": 0.2507505863\n    },\n    {\n      "date": "2017-12-06",\n      "clock_correction": 0.2459948074\n    },\n    {\n      "date": "2017-12-07",\n      "clock_correction": 0.2412390284\n    },\n    {\n      "date": "2017-12-08",\n      "clock_correction": 0.2364832495\n    },\n    {\n      "date": "2017-12-09",\n      "clock_correction": 0.2317274706\n    },\n    {\n      "da

In [4]:
# print(len(stn_meta))

print("type", type(stn_meta[0].start_date))
print(stn_meta[0].start_date)
print(stn_meta[0].end_date)

print(stn_meta[0].equipments)
print(stn_meta[0].creation_date, stn_meta[0].termination_date)
#print(stn_meta[0].extra) 

type <class 'obspy.core.utcdatetime.UTCDateTime'>
2017-11-06T02:31:42.000000Z
2018-06-09T05:29:53.000000Z
[]
2017-11-06T02:31:42.000000Z 2018-06-09T05:29:53.000000Z


In [5]:
# extra_meta = our_inv.networks[0].stations[0].extra.gpsclockcorrection.value

#extra_meta = stn_meta[2].extra.metadata_ga.value
extra_meta = stn_meta[0].extra.GAMetadata.value

print(type(extra_meta))

<class 'str'>


In [6]:
print(extra_meta)

{
  "network": "OA",
  "station": "CE28",
  "GPS_CORRECTION": [
    {
      "date": "2017-12-01",
      "clock_correction": 0.269773702
    },
    {
      "date": "2017-12-02",
      "clock_correction": 0.2650179231
    },
    {
      "date": "2017-12-03",
      "clock_correction": 0.2602621441
    },
    {
      "date": "2017-12-04",
      "clock_correction": 0.2555063652
    },
    {
      "date": "2017-12-05",
      "clock_correction": 0.2507505863
    },
    {
      "date": "2017-12-06",
      "clock_correction": 0.2459948074
    },
    {
      "date": "2017-12-07",
      "clock_correction": 0.2412390284
    },
    {
      "date": "2017-12-08",
      "clock_correction": 0.2364832495
    },
    {
      "date": "2017-12-09",
      "clock_correction": 0.2317274706
    },
    {
      "date": "2017-12-10",
      "clock_correction": 0.2269716916
    },
    {
      "date": "2017-12-11",
      "clock_correction": 0.2222159127
    },
    {
      "date": "2017-12-12",
      "clock_correction

In [7]:
import json

mdata = json.loads(extra_meta)

# print (mdata)
print(mdata.keys())
# print(type(mdata['gps_clock_corrections']))

print(type(mdata['GPS_CORRECTION']))

for corr in mdata['GPS_CORRECTION']:
    print (corr["date"], corr["clock_correction"], "seconds" )

dict_keys(['network', 'station', 'GPS_CORRECTION', 'ORIENT_CORRECTION'])
<class 'list'>
2017-12-01 0.269773702 seconds
2017-12-02 0.2650179231 seconds
2017-12-03 0.2602621441 seconds
2017-12-04 0.2555063652 seconds
2017-12-05 0.2507505863 seconds
2017-12-06 0.2459948074 seconds
2017-12-07 0.2412390284 seconds
2017-12-08 0.2364832495 seconds
2017-12-09 0.2317274706 seconds
2017-12-10 0.2269716916 seconds
2017-12-11 0.2222159127 seconds
2017-12-12 0.2174601338 seconds
2017-12-13 0.2127043548 seconds
2017-12-14 0.2079485759 seconds
2017-12-15 0.203192797 seconds
2017-12-16 0.198437018 seconds
2017-12-17 0.1936812391 seconds
2017-12-18 0.1889254602 seconds
2017-12-19 0.1841696813 seconds
2017-12-20 0.1794139023 seconds
2017-12-21 0.1746581234 seconds
2017-12-22 0.1699023445 seconds
2017-12-23 0.1651465655 seconds
2017-12-24 0.1603907866 seconds
2017-12-25 0.1556350077 seconds
2017-12-26 0.1508792287 seconds
2017-12-27 0.1461234498 seconds
2017-12-28 0.1413676709 seconds
2017-12-29 0.136611

In [8]:

print (mdata["ORIENT_CORRECTION"])

{}


In [9]:
# https://pbpython.com/pandas-list-dict.html

import pandas as pd

df_clock_correction = pd.DataFrame(mdata['GPS_CORRECTION'])
df_clock_correction.head()

Unnamed: 0,date,clock_correction
0,2017-12-01,0.269774
1,2017-12-02,0.265018
2,2017-12-03,0.260262
3,2017-12-04,0.255506
4,2017-12-05,0.250751


In [10]:
print (df_clock_correction.tail())

           date  clock_correction
151  2018-06-04        -10.547384
152  2018-06-05        -10.879123
153  2018-06-06        -11.214748
154  2018-06-07        -11.554780
155  2018-06-08        -11.899737


In [11]:
# get correction for a certain date YYYY-MM-DD
print(df_clock_correction.loc[df_clock_correction["date"] =="2018-06-06"])

           date  clock_correction
153  2018-06-06        -11.214748


In [12]:
type(df_clock_correction.query('date == "2018-06-06" '))

pandas.core.frame.DataFrame

In [13]:
df_clock_correction["date"] =="2018-06-06"

0      False
1      False
2      False
3      False
4      False
       ...  
151    False
152    False
153     True
154    False
155    False
Name: date, Length: 156, dtype: bool

In [14]:
df_clock_correction.query('date == "2018-06-06"').clock_correction

153   -11.214748
Name: clock_correction, dtype: float64

In [15]:
df_clock_correction.describe()


Unnamed: 0,clock_correction
count,156.0
mean,-0.877534
std,5.408053
min,-13.673948
25%,-2.766375
50%,0.024851
75%,0.199626
max,15.991937


In [16]:
df_clock_correction.dtypes

date                 object
clock_correction    float64
dtype: object

In [17]:
# What if there are more than one Station Node, such as AU.HTT in testdata/network_AU_0.xml, and OA.CF28 
stations =our_inv.networks[0].stations
print (len(stations))

3


In [18]:
# wrtite out the inventory

#GA_NameSpace = "https://github.com/GeoscienceAustralia/hiperseis"
# how to get the extra's key 'GAMetadata'  
extram = list(stn_meta[0].extra)[0] 
print(extram)

GA_NameSpace = stn_meta[0].extra['GAMetadata']['namespace']

our_inv.write("our_inv.xml",format="stationxml",  nsmap={'GeoscienceAustralia': GA_NameSpace})

GAMetadata


In [19]:
!ls -lt *.xml 

-rw-r--r-- 1 fzhang fzhang 23337 Sep 18 10:52 our_inv.xml
-rw-r--r-- 1 fzhang fzhang 23679 Sep 18 10:31 our_inv2.xml
-rwxr-xr-x 1 fzhang fzhang  7924 Jul 17 07:59 OA.CF28_station_inv_modified_json.xml
-rw-r--r-- 1 fzhang fzhang  6113 Jun 15 14:26 test_OA_CF28_inventory.xml
-rw-r--r-- 1 fzhang fzhang  6053 Jun 15 14:26 test_OA_CE28_inventory.xml
-rw-r--r-- 1 fzhang fzhang  6113 Jun 15 14:25 test_OA_BY22_inventory.xml
-rw-r--r-- 1 fzhang fzhang  4200 Jun 15 14:21 test_OAFZ_CE22_inventory.xml
-rw-r--r-- 1 fzhang fzhang  2309 Jun 15 14:20 test_OA_CE22_inventory.xml
-rw-r--r-- 1 fzhang fzhang 11879 Apr 10 09:28 modified_inventory_select.xml


In [20]:
my_cmd = "diff our_inv.xml " + input_station_xml
print(my_cmd)

diff our_inv.xml /Datasets/Station_Extra_Metadata/NewInventoryXML/OA.CE28_station_metadata_JSON.xml


In [21]:
os.system(my_cmd)  # =0 if success

0

In [22]:
# dir(our_inv)

type(our_inv)

obspy.core.inventory.inventory.Inventory

In [23]:
# try to analyze the extra metadata tags
print( type(stn_meta[0].extra))
print(dir(stn_meta[0].extra))
stn_meta[0].extra['GAMetadata']['namespace']

<class 'obspy.core.util.attribdict.AttribDict'>
['GAMetadata', '_MutableMapping__marker', '__abstractmethods__', '__class__', '__contains__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_cast_type', '_pretty_str', '_types', 'clear', 'copy', 'defaults', 'do_not_warn_on', 'get', 'items', 'keys', 'pop', 'popitem', 'readonly', 'setdefault', 'update', 'values', 'warn_on_non_default_key']


'https://github.com/GeoscienceAustralia/hiperseis'

In [24]:
list(stn_meta[0].extra)[0]

'GAMetadata'

# Modify the station XML file to include Equipments

In [25]:
# See also: https://docs.obspy.org/tutorial/code_snippets/stationxml_file_from_scratch.html

# Existing metadata values can be changed. 
import obspy
# stn_meta[0].start_date ="1990-01-01"
# stn_meta[0].end_date ="2029-07-01"

# print(stn_meta[0].start_date)
# print(stn_meta[0].end_date)

In [26]:
# New metadata (eg, Equipment) can be added 
# See https://gajira.atlassian.net/browse/PV-311

my_sensor=obspy.core.inventory.util.Equipment(type="Sensor", description="Nanometrics Trillium Compact 120s",serial_number="004940")
my_digitizer = obspy.core.inventory.util.Equipment(type="Digitizer", description="Guralp Minimus",serial_number="MIN-A456")

stn_meta[0].equipments = [my_sensor, my_digitizer]
print(stn_meta[0].equipments)

[<obspy.core.inventory.util.Equipment object at 0x7f5bef962e50>, <obspy.core.inventory.util.Equipment object at 0x7f5bef962dd0>]


In [27]:
print(stn_meta[0].extra)


# Re-write out the inventory

our_inv.write("our_inv2.xml",format="stationxml", nsmap={'GeoscienceAustralia': GA_NameSpace})

AttribDict({'GAMetadata': AttribDict({'namespace': 'https://github.com/GeoscienceAustralia/hiperseis', 'value': '{\n  "network": "OA",\n  "station": "CE28",\n  "GPS_CORRECTION": [\n    {\n      "date": "2017-12-01",\n      "clock_correction": 0.269773702\n    },\n    {\n      "date": "2017-12-02",\n      "clock_correction": 0.2650179231\n    },\n    {\n      "date": "2017-12-03",\n      "clock_correction": 0.2602621441\n    },\n    {\n      "date": "2017-12-04",\n      "clock_correction": 0.2555063652\n    },\n    {\n      "date": "2017-12-05",\n      "clock_correction": 0.2507505863\n    },\n    {\n      "date": "2017-12-06",\n      "clock_correction": 0.2459948074\n    },\n    {\n      "date": "2017-12-07",\n      "clock_correction": 0.2412390284\n    },\n    {\n      "date": "2017-12-08",\n      "clock_correction": 0.2364832495\n    },\n    {\n      "date": "2017-12-09",\n      "clock_correction": 0.2317274706\n    },\n    {\n      "date": "2017-12-10",\n      "clock_correction": 0.

In [28]:
# compare of the two inventory files

!diff our_inv.xml our_inv2.xml

646a647,656
>       <Equipment>
>         <Type>Sensor</Type>
>         <Description>Nanometrics Trillium Compact 120s</Description>
>         <SerialNumber>004940</SerialNumber>
>       </Equipment>
>       <Equipment>
>         <Type>Digitizer</Type>
>         <Description>Guralp Minimus</Description>
>         <SerialNumber>MIN-A456</SerialNumber>
>       </Equipment>


In [29]:
#STOP!!

# Dealing with multiple station-nodes in a network.station inventory file

See Jira: https://gajira.atlassian.net/browse/PV-130

 What if there are more than one Station Node, such as AU.HTT in testdata/network_AU_0.xml. 
 And input_station_xml ="OA.CF28_station_inv_modified_json.xml"
 
The multple station code represent the same station with different channel configuraiton over different time periods.
 

In [30]:

# create the modified xml file by running
# python  add_time_corrections.py  /g/data/ha3/Passive/SHARED_DATA/GPS_Clock/corrections/AU.HTT_clock_correction.csv ../../tests/testdata/network_AU_0.xml 

def inspect_stations( input_station_xml ):
    
    our_inv = read_inventory(input_station_xml,format='STATIONXML')

    #csv_str = our_inv.networks[0].stations[0].extra.gpsclockcorrection.value

    stations =our_inv.networks[0].stations
    
    if len(stations)>=2:
        print (input_station_xml)
        print ("Warning: more than one Station Node = %s" %len(stations))
    
    return stations


In [31]:
#input_station_xml ="/g/data/ha3/Passive/SHARED_DATA/GPS_Clock/StationXML_with_time_corrections2/AU.HTT_station_inv_modified.xml"
#inspect_stations(input_station_xml)

In [32]:

import os, glob

xmlfolder = "/g/data/ha3/Passive/SHARED_DATA/GPS_Clock/StationXML_with_time_corrections2/"
xmlfolder = dirpath # "/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML"

for axmlfile in glob.glob(os.path.join(xmlfolder,"*.xml")):
    
    stations = inspect_stations(axmlfile)
    
#     if len(stations)>=2:
#         for astation in stations:
#             print("### ", astation.code, astation.extra)

/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.CC22_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.CE22_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BY20_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.CI21_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BW22_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BZ22_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.CF23_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BZ26_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.CB25_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BX22_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BZ24_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.CI23_station_metadata_JSON.xml
/Dat

/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BX27_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.CJ27_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.CG23_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.CF22_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BU26_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BS28_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BW27_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BU28_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BW28_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BX26_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BY22_station_metadata_JSON.xml
/Datasets/Station_Extra_Metadata/NewInventoryXML/OA.BX25_station_metadata_JSON.xml
/Dat

# Check the output statoinXML file from ASDF 

- to ensure they are identical to the ingested station xml
- in particular, the extra station xml file can be recovered


In [None]:
!skip this section

In [None]:
# Check the output statoinXML file from ASDF

# We have added the input station xml into an ASDF file, then extract it out. See pyasdf_tests.ipynb
# https://github.com/SeismicData/pyasdf/issues/63

    
from obspy import read_inventory

input_station_xml ="OA.CF28_station_inv_modified_json_extracted_from_ASDF.xml"
our_inv = read_inventory(input_station_xml,format='STATIONXML')
# print(our_inv.networks[0].stations[0].extra)

stn_meta0 = our_inv.networks[0].stations[0]


print(type(stn_meta0.extra))
print(stn_meta0.extra)

print(stn_meta0.start_date)
print(stn_meta0.end_date)

print(stn_meta0.equipments)

# How to create  a ne station xml file

https://docs.obspy.org/tutorial/code_snippets/stationxml_file_from_scratch.html