# Use Obspy to read and write station inventory files

- Read station XML files with extra metadata in Json format 
- modify existing metadata values
- add new metadata items such as Equipments

## Fei Zhang

* CreationDate: 2020-07-14
* LastUpdated:  2020-09-10

# Ref:

* https://docs.obspy.org/tutorial/code_snippets/stationxml_file_from_scratch.html
* https://docs.obspy.org/packages/autogen/obspy.core.inventory.util.Equipment.__init__.html


In [31]:
from obspy import read_inventory
from obspy.core import UTCDateTime

our_new_station_xml = "OA.CF28_station_inv_modified_json.xml"  
our_new_station_xml = "/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.CE28_station_metadata_JSON.xml"

# VS extracted from ASDF
#our_new_station_xml ="OA.CF28_station_inv_modified_json_extracted_from_ASDF.xml"

our_inv = read_inventory(our_new_station_xml,format='STATIONXML')


stn_meta = our_inv.networks[0].stations


In [32]:
print(type(stn_meta[0].extra))
print(stn_meta[0].extra)

<class 'obspy.core.util.attribdict.AttribDict'>
AttribDict({'GAMetadata': AttribDict({'namespace': 'https://github.com/GeoscienceAustralia/hiperseis', 'value': '{\n  "network": "OA",\n  "station": "CE28",\n  "GPS_CORRECTION": [\n    {\n      "date": "2017-12-01",\n      "clock_correction": 0.269773702\n    },\n    {\n      "date": "2017-12-02",\n      "clock_correction": 0.2650179231\n    },\n    {\n      "date": "2017-12-03",\n      "clock_correction": 0.2602621441\n    },\n    {\n      "date": "2017-12-04",\n      "clock_correction": 0.2555063652\n    },\n    {\n      "date": "2017-12-05",\n      "clock_correction": 0.2507505863\n    },\n    {\n      "date": "2017-12-06",\n      "clock_correction": 0.2459948074\n    },\n    {\n      "date": "2017-12-07",\n      "clock_correction": 0.2412390284\n    },\n    {\n      "date": "2017-12-08",\n      "clock_correction": 0.2364832495\n    },\n    {\n      "date": "2017-12-09",\n      "clock_correction": 0.2317274706\n    },\n    {\n      "da

In [51]:
# print(len(stn_meta))

print("type", type(stn_meta[0].start_date))
print(stn_meta[0].start_date)
print(stn_meta[0].end_date)

print(stn_meta[0].equipments)
print(stn_meta[0].creation_date, stn_meta[0].termination_date)
#print(stn_meta[0].extra) 

type <class 'obspy.core.utcdatetime.UTCDateTime'>
2017-11-06T02:31:42.000000Z
2018-06-09T05:29:53.000000Z
[<obspy.core.inventory.util.Equipment object at 0x7f4660eeca10>, <obspy.core.inventory.util.Equipment object at 0x7f4660eec9d0>]
2017-11-06T02:31:42.000000Z 2018-06-09T05:29:53.000000Z


In [36]:
# extra_meta = our_inv.networks[0].stations[0].extra.gpsclockcorrection.value
#extra_meta = stn_meta[2].extra.metadata_ga.value
extra_meta = stn_meta[2].extra.GAMetadata.value

print(type(extra_meta))

<class 'str'>


In [37]:
print(extra_meta)

{
  "network": "OA",
  "station": "CE28",
  "GPS_CORRECTION": [],
  "ORIENT_CORRECTION": {}
}


In [38]:
import json

mdata = json.loads(extra_meta)

print (mdata)
print(mdata.keys())
print(type(mdata['gps_clock_corrections']))

for corr in mdata['gps_clock_corrections']:
    print (corr["date"], corr["seconds"] )

{'network': 'OA', 'station': 'CE28', 'GPS_CORRECTION': [], 'ORIENT_CORRECTION': {}}
dict_keys(['network', 'station', 'GPS_CORRECTION', 'ORIENT_CORRECTION'])


KeyError: 'gps_clock_corrections'

In [7]:

print (mdata['gps_clock_corrections'])

[{'date': '2018-06-10', 'seconds': -0.17439437861636772}, {'date': '2018-06-11', 'seconds': -0.17770115938069328}, {'date': '2018-06-12', 'seconds': -0.18100794014501886}]


In [8]:
# https://pbpython.com/pandas-list-dict.html

import pandas as pd

df_clock_correction = pd.DataFrame(mdata['gps_clock_corrections'])
df_clock_correction.head()

Unnamed: 0,date,seconds
0,2018-06-10,-0.174394
1,2018-06-11,-0.177701
2,2018-06-12,-0.181008


In [9]:
print (df_clock_correction.tail())

         date   seconds
0  2018-06-10 -0.174394
1  2018-06-11 -0.177701
2  2018-06-12 -0.181008


In [11]:
# get correction for a certain date YYYY-MM-DD
print(df_clock_correction.loc[df_clock_correction["date"] =="2018-06-10"])

         date   seconds
0  2018-06-10 -0.174394


In [13]:
type(df_clock_correction.query('date == "2018-06-10" ').seconds)

pandas.core.series.Series

In [14]:
df_clock_correction["date"] =="2018-06-10"

0     True
1    False
2    False
Name: date, dtype: bool

In [15]:
df_clock_correction.query('date == "2018-06-10"').seconds

0   -0.174394
Name: seconds, dtype: float64

In [16]:
df_clock_correction.describe()


Unnamed: 0,seconds
count,3.0
mean,-0.177701
std,0.003307
min,-0.181008
25%,-0.179355
50%,-0.177701
75%,-0.176048
max,-0.174394


In [17]:
df_clock_correction.dtypes

date        object
seconds    float64
dtype: object

In [18]:
# What if there are more than one Station Node, such as AU.HTT in testdata/network_AU_0.xml, and OA.CF28 
stations =our_inv.networks[0].stations
print (len(stations))

3


In [19]:
# wrtite out the inventory

our_inv.write("our_inv.xml",format="stationxml")

In [None]:
# See also: https://docs.obspy.org/tutorial/code_snippets/stationxml_file_from_scratch.html

# Existing metadata values can be changed. 
import obspy
# stn_meta[0].start_date ="1990-01-01"
# stn_meta[0].end_date ="2029-07-01"

# print(stn_meta[0].start_date)
# print(stn_meta[0].end_date)

In [39]:
# New metadata (eg, Equipment) can be added 
# See https://gajira.atlassian.net/browse/PV-311

my_sensor=obspy.core.inventory.util.Equipment(type="Sensor", description="Nanometrics Trillium Compact 120s",serial_number="004940")
my_digitizer = obspy.core.inventory.util.Equipment(type="Digitizer", description="Guralp Minimus",serial_number="MIN-A456")

stn_meta[0].equipments = [my_sensor, my_digitizer]
print(stn_meta[0].equipments)

[<obspy.core.inventory.util.Equipment object at 0x7f4660eeca10>, <obspy.core.inventory.util.Equipment object at 0x7f4660eec9d0>]


In [40]:
print(stn_meta[0].extra)
# Re-wrtite out the inventory

our_inv.write("our_inv2.xml",format="stationxml")

AttribDict({'GAMetadata': AttribDict({'namespace': 'https://github.com/GeoscienceAustralia/hiperseis', 'value': '{\n  "network": "OA",\n  "station": "CE28",\n  "GPS_CORRECTION": [\n    {\n      "date": "2017-12-01",\n      "clock_correction": 0.269773702\n    },\n    {\n      "date": "2017-12-02",\n      "clock_correction": 0.2650179231\n    },\n    {\n      "date": "2017-12-03",\n      "clock_correction": 0.2602621441\n    },\n    {\n      "date": "2017-12-04",\n      "clock_correction": 0.2555063652\n    },\n    {\n      "date": "2017-12-05",\n      "clock_correction": 0.2507505863\n    },\n    {\n      "date": "2017-12-06",\n      "clock_correction": 0.2459948074\n    },\n    {\n      "date": "2017-12-07",\n      "clock_correction": 0.2412390284\n    },\n    {\n      "date": "2017-12-08",\n      "clock_correction": 0.2364832495\n    },\n    {\n      "date": "2017-12-09",\n      "clock_correction": 0.2317274706\n    },\n    {\n      "date": "2017-12-10",\n      "clock_correction": 0.

In [41]:
# compare of the two inventory files

!diff our_inv.xml our_inv2.xml

8,17c8,147
<     <Station code="CF28" startDate="2017-11-06T02:02:50.000000Z" endDate="2018-06-09T06:06:39.000000Z">
<       <GeoscienceAustralia:metadata_ga xmlns:GeoscienceAustralia="https://github.com/GeoscienceAustralia/hiperseis/xmlns/1.0">
< {
<     "network_code":"OA",
<     "station_code":"CF28",
< 
<     "orient_correction": {
<         "start_dt": "2017-11-07T09:07:34.930000Z",
<         "end_dt":   "2018-08-23T03:52:29.528000Z",
<         "azimuth_correction": -5.0
---
>     <Station code="CE28" startDate="2017-11-06T02:31:42.000000Z" endDate="2018-06-09T05:29:53.000000Z">
>       <GeoscienceAustralia:GAMetadata xmlns:GeoscienceAustralia="https://github.com/GeoscienceAustralia/hiperseis">{
>   "network": "OA",
>   "station": "CE28",
>   "GPS_CORRECTION": [
>     {
>       "date": "2017-12-01",
>       "clock_correction": 0.269773702
>     },
>     {
>       "date": "2017-12-02",
>       "clock_correction": 0.2650179231
>     },
>     {
>       "date

# Dealing with multiple station-nodes in a network.station inventory file

See Jira: https://gajira.atlassian.net/browse/PV-130

 What if there are more than one Station Node, such as AU.HTT in testdata/network_AU_0.xml. 
 And our_new_station_xml ="OA.CF28_station_inv_modified_json.xml"
 
The multple station code represent the same station with different channel configuraiton over different time periods.
 

In [27]:

# create the modified xml file by running
# python  add_time_corrections.py  /g/data/ha3/Passive/SHARED_DATA/GPS_Clock/corrections/AU.HTT_clock_correction.csv ../../tests/testdata/network_AU_0.xml 

def inspect_stations( our_new_station_xml ):
    
    our_inv = read_inventory(our_new_station_xml,format='STATIONXML')

    #csv_str = our_inv.networks[0].stations[0].extra.gpsclockcorrection.value

    stations =our_inv.networks[0].stations
    
    if len(stations)>=2:
        print (our_new_station_xml)
        print ("Warning: more than one Station Node = %s" %len(stations))
    
    return stations


In [None]:
#our_new_station_xml ="/g/data/ha3/Passive/SHARED_DATA/GPS_Clock/StationXML_with_time_corrections2/AU.HTT_station_inv_modified.xml"
#inspect_stations(our_new_station_xml)

In [30]:

import os, glob

xmlfolder = "/g/data/ha3/Passive/SHARED_DATA/GPS_Clock/StationXML_with_time_corrections2/"
xmlfolder = "/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML"

for axmlfile in glob.glob(os.path.join(xmlfolder,"*.xml")):
    
    stations = inspect_stations(axmlfile)
    
#     if len(stations)>=2:
#         for astation in stations:
#             print("### ", astation.code, astation.extra)

/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.CG21_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BY25_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.CF28_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.CD21_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.CC25_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BV24_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BW27_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.CH21_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML

/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BW23_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.CJ27_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.CF27_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BU24_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BX25_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BW28_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BZ23_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BV28_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML

/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BX22_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BU22_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BU28_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BX27_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BW25_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.CA28_station_metadata_JSON.xml
/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA.BS25_station_metadata_JSON.xml


# Check the output statoinXML file from ASDF

In [45]:
# Check the output statoinXML file from ASDF

# We have added the input station xml into an ASDF file, then extract it out. See pyasdf_tests.ipynb
# https://github.com/SeismicData/pyasdf/issues/63

    
from obspy import read_inventory

our_new_station_xml ="OA.CF28_station_inv_modified_json_extracted_from_ASDF.xml"
our_inv = read_inventory(our_new_station_xml,format='STATIONXML')
# print(our_inv.networks[0].stations[0].extra)

stn_meta0 = our_inv.networks[0].stations[0]


print(type(stn_meta0.extra))
print(stn_meta0.extra)

print(stn_meta0.start_date)
print(stn_meta0.end_date)

print(stn_meta0.equipments)

FileNotFoundError: [Errno 2] No such file or directory: 'OA.CF28_station_inv_modified_json_extracted_from_ASDF.xml'

# https://docs.obspy.org/tutorial/code_snippets/stationxml_file_from_scratch.html

In [50]:
import obspy
from obspy.core.inventory import Inventory, Network, Station, Channel, Site
from obspy.clients.nrl import NRL


# We'll first create all the various objects. These strongly follow the
# hierarchy of StationXML files.
inv = Inventory(
    # We'll add networks later.
    networks=[],
    # The source should be the id whoever create the file.
    source="ObsPy-Tutorial")

net = Network(
    # This is the network code according to the SEED standard.
    code="XX",
    # A list of stations. We'll add one later.
    stations=[],
    description="A test stations.",
    # Start-and end dates are optional.
    start_date=obspy.UTCDateTime(2016, 1, 2))

sta = Station(
    # This is the station code according to the SEED standard.
    code="ABC",
    latitude=1.0,
    longitude=2.0,
    elevation=345.0,
    creation_date=obspy.UTCDateTime(2020, 1, 2),
    site=Site(name="First station"))

sta.start_date=obspy.UTCDateTime(2020, 1, 3)
sta.end_date=obspy.UTCDateTime(2020, 12, 15)
# results: <Station code="ABC" startDate="2020-01-03T00:00:00.000000Z" endDate="2020-12-15T00:00:00.000000Z">


cha = Channel(
    # This is the channel code according to the SEED standard.
    code="HHZ",
    # This is the location code according to the SEED standard.
    location_code="",
    # Note that these coordinates can differ from the station coordinates.
    latitude=1.0,
    longitude=2.0,
    elevation=345.0,
    depth=10.0,
    azimuth=0.0,
    dip=-90.0,
    sample_rate=200)

# By default this accesses the NRL online. Offline copies of the NRL can
# also be used instead
nrl = NRL()
# The contents of the NRL can be explored interactively in a Python prompt,
# see API documentation of NRL submodule:
# http://docs.obspy.org/packages/obspy.clients.nrl.html
# Here we assume that the end point of data logger and sensor are already
# known:
response = nrl.get_response( # doctest: +SKIP
    sensor_keys=['Streckeisen', 'STS-1', '360 seconds'],
    datalogger_keys=['REF TEK', 'RT 130 & 130-SMA', '1', '200'])


# Now tie it all together.
cha.response = response
sta.channels.append(cha)
net.stations.append(sta)
inv.networks.append(net)

# And finally write it to a StationXML file. We also force a validation against
# the StationXML schema to ensure it produces a valid StationXML file.
#
# Note that it is also possible to serialize to any of the other inventory
# output formats ObsPy supports.
inv.write("station.xml", format="stationxml", validate=True)