# Use Obspy to read and write station inventory files

- Read station XML files with extra metadata in Json format 
- modify existing metadata values
- add new metadata items such as Equipments

## Fei Zhang

* CreationDate: 2020-07-14
* LastUpdated:  2020-09-10

# Ref:

* https://docs.obspy.org/tutorial/code_snippets/stationxml_file_from_scratch.html
* https://docs.obspy.org/packages/autogen/obspy.core.inventory.util.Equipment.__init__.html


In [1]:
!pwd

/g/data/ha3/fxz547/Githubz/hiperseis/notebooks


In [2]:
import os
import sys
from obspy import read_inventory
from obspy.core import UTCDateTime

dirpath = "/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML"  # if in NCI
dirpath="/Datasets/Station_Extra_Metadata/NewInventoryXML"  # the dir where input_station xml file located
xmlfile_name= "OA.CE28_station_metadata_JSON.xml"  

input_station_xml = os.path.join(dirpath, xmlfile_name)
input_station_xml ="/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML/OA_stations_2017-2018_new.xml"


# VS extracted from ASDF
#input_station_xml ="OA.CF28_station_inv_modified_json_extracted_from_ASDF.xml"

our_inv = read_inventory(input_station_xml,format='STATIONXML')


# the first network's stations
# stn_meta = our_inv.networks[0].stations


In [10]:
stn_meta=our_inv.select(network="OA",station="CF28")  # station code name "CF28"

In [18]:

selected_stations = stn_meta.networks[0].stations

print (stn_meta.networks[0].stations)

[<obspy.core.inventory.station.Station object at 0x7f4bbef57e90>, <obspy.core.inventory.station.Station object at 0x7f4bbef57a90>, <obspy.core.inventory.station.Station object at 0x7f4bbef57990>]


In [22]:
print(type(selected_stations[0].extra))


<class 'obspy.core.util.attribdict.AttribDict'>


In [23]:
# print(len(stn_meta))

print("type", type(selected_stations[0].start_date))
print(selected_stations[0].start_date)
print(selected_stations[0].end_date)

print(selected_stations[0].equipments)
print(selected_stations[0].creation_date, selected_stations[0].termination_date)
#print(stn_meta[0].extra) 

type <class 'obspy.core.utcdatetime.UTCDateTime'>
2017-11-06T02:02:50.000000Z
2018-06-09T06:06:39.000000Z
[<obspy.core.inventory.util.Equipment object at 0x7f4bbf329350>, <obspy.core.inventory.util.Equipment object at 0x7f4bbf3293d0>]
2017-11-06T02:02:50.000000Z 2018-06-09T06:06:39.000000Z


In [24]:
# extra_meta = our_inv.networks[0].stations[0].extra.gpsclockcorrection.value

#extra_meta = stn_meta[2].extra.metadata_ga.value
extra_meta = selected_stations[0].extra.GAMetadata.value

print(type(extra_meta))

<class 'str'>


In [25]:
print(extra_meta)

{
  "network": "OA",
  "station": "CF28",
  "GPS_CORRECTION": [
    {
      "date": "2018-01-04",
      "clock_correction": -1.3375814425
    },
    {
      "date": "2018-01-05",
      "clock_correction": -1.1104495647
    },
    {
      "date": "2018-01-06",
      "clock_correction": -0.9032476255
    },
    {
      "date": "2018-01-07",
      "clock_correction": -0.715187057
    },
    {
      "date": "2018-01-08",
      "clock_correction": -0.5454903733
    },
    {
      "date": "2018-01-09",
      "clock_correction": -0.393391171
    },
    {
      "date": "2018-01-10",
      "clock_correction": -0.258134129
    },
    {
      "date": "2018-01-11",
      "clock_correction": -0.1389750085
    },
    {
      "date": "2018-01-12",
      "clock_correction": -0.035180653
    },
    {
      "date": "2018-01-13",
      "clock_correction": 0.0539710115
    },
    {
      "date": "2018-01-14",
      "clock_correction": 0.1291909768
    },
    {
      "date": "2018-01-15",
      "clock_corr

In [26]:
import json

mdata = json.loads(extra_meta)

# print (mdata)
print(mdata.keys())
# print(type(mdata['gps_clock_corrections']))

print(type(mdata['GPS_CORRECTION']))

for corr in mdata['GPS_CORRECTION']:
    print (corr["date"], corr["clock_correction"], "seconds" )

dict_keys(['network', 'station', 'GPS_CORRECTION', 'ORIENT_CORRECTION'])
<class 'list'>
2018-01-04 -1.3375814425 seconds
2018-01-05 -1.1104495647 seconds
2018-01-06 -0.9032476255 seconds
2018-01-07 -0.715187057 seconds
2018-01-08 -0.5454903733 seconds
2018-01-09 -0.393391171 seconds
2018-01-10 -0.258134129 seconds
2018-01-11 -0.1389750085 seconds
2018-01-12 -0.035180653 seconds
2018-01-13 0.0539710115 seconds
2018-01-14 0.1291909768 seconds
2018-01-15 0.1911791523 seconds
2018-01-16 0.240624365 seconds
2018-01-17 0.2782043599 seconds
2018-01-18 0.3045857992 seconds
2018-01-19 0.320424263 seconds
2018-01-20 0.3263642491 seconds
2018-01-21 0.3230391729 seconds
2018-01-22 0.3110713674 seconds
2018-01-23 0.2910720833 seconds
2018-01-24 0.263641489 seconds
2018-01-25 0.2293686706 seconds
2018-01-26 0.1888316318 seconds
2018-01-27 0.142597294 seconds
2018-01-28 0.0912214961 seconds
2018-01-29 0.0352489949 seconds
2018-01-30 -0.0247865352 seconds
2018-01-31 -0.0883625023 seconds
2018-02-01 -0

In [27]:

print (mdata["ORIENT_CORRECTION"])

{'start_dt': '2017-11-07T09:07:34.930000Z', 'end_dt': '2018-08-23T03:52:29.528000Z', 'azimuth_correction': -5.0}


In [28]:
# https://pbpython.com/pandas-list-dict.html

import pandas as pd

df_clock_correction = pd.DataFrame(mdata['GPS_CORRECTION'])
df_clock_correction.head()

Unnamed: 0,date,clock_correction
0,2018-01-04,-1.337581
1,2018-01-05,-1.11045
2,2018-01-06,-0.903248
3,2018-01-07,-0.715187
4,2018-01-08,-0.54549


In [29]:
print (df_clock_correction.tail())

           date  clock_correction
151  2018-06-04         38.110867
152  2018-06-05         39.022514
153  2018-06-06         39.948762
154  2018-06-07         40.889832
155  2018-06-08         41.845951


In [30]:
# get correction for a certain date YYYY-MM-DD
print(df_clock_correction.loc[df_clock_correction["date"] =="2018-06-06"])

           date  clock_correction
153  2018-06-06         39.948762


In [None]:
type(df_clock_correction.query('date == "2018-06-06" '))

In [None]:
df_clock_correction["date"] =="2018-06-06"

In [None]:
df_clock_correction.query('date == "2018-06-06"').clock_correction

In [None]:
df_clock_correction.describe()


In [None]:
df_clock_correction.dtypes

In [None]:
# What if there are more than one Station Node, such as AU.HTT in testdata/network_AU_0.xml, and OA.CF28 
stations =our_inv.networks[0].stations
print (len(stations))

In [None]:
# wrtite out the inventory

#GA_NameSpace = "https://github.com/GeoscienceAustralia/hiperseis"
# how to get the extra's key 'GAMetadata'  
extram = list(stn_meta[0].extra)[0] 
print(extram)

GA_NameSpace = stn_meta[0].extra['GAMetadata']['namespace']

our_inv.write("our_inv.xml",format="stationxml",  nsmap={'GeoscienceAustralia': GA_NameSpace})

In [None]:
!ls -lt *.xml 

In [None]:
my_cmd = "diff our_inv.xml " + input_station_xml
print(my_cmd)

In [None]:
os.system(my_cmd)  # =0 if success

In [None]:
# dir(our_inv)

type(our_inv)

In [None]:
# try to analyze the extra metadata tags
print( type(stn_meta[0].extra))
print(dir(stn_meta[0].extra))
stn_meta[0].extra['GAMetadata']['namespace']

In [None]:
list(stn_meta[0].extra)[0]

# Modify the station XML file to include Equipments

In [None]:
# See also: https://docs.obspy.org/tutorial/code_snippets/stationxml_file_from_scratch.html

# Existing metadata values can be changed. 
import obspy
# stn_meta[0].start_date ="1990-01-01"
# stn_meta[0].end_date ="2029-07-01"

# print(stn_meta[0].start_date)
# print(stn_meta[0].end_date)

In [None]:
# New metadata (eg, Equipment) can be added 
# See https://gajira.atlassian.net/browse/PV-311

my_sensor=obspy.core.inventory.util.Equipment(type="Sensor", description="Nanometrics Trillium Compact 120s",serial_number="004940")
my_digitizer = obspy.core.inventory.util.Equipment(type="Digitizer", description="Guralp Minimus",serial_number="MIN-A456")

stn_meta[0].equipments = [my_sensor, my_digitizer]
print(stn_meta[0].equipments)

In [None]:
print(stn_meta[0].extra)


# Re-write out the inventory

our_inv.write("our_inv2.xml",format="stationxml", nsmap={'GeoscienceAustralia': GA_NameSpace})

In [None]:
# compare of the two inventory files

!diff our_inv.xml our_inv2.xml

In [None]:
#STOP!!

# Dealing with multiple station-nodes in a network.station inventory file

See Jira: https://gajira.atlassian.net/browse/PV-130

 What if there are more than one Station Node, such as AU.HTT in testdata/network_AU_0.xml. 
 And input_station_xml ="OA.CF28_station_inv_modified_json.xml"
 
The multple station code represent the same station with different channel configuraiton over different time periods.
 

In [None]:

# create the modified xml file by running
# python  add_time_corrections.py  /g/data/ha3/Passive/SHARED_DATA/GPS_Clock/corrections/AU.HTT_clock_correction.csv ../../tests/testdata/network_AU_0.xml 

def inspect_stations( input_station_xml ):
    
    our_inv = read_inventory(input_station_xml,format='STATIONXML')

    #csv_str = our_inv.networks[0].stations[0].extra.gpsclockcorrection.value

    stations =our_inv.networks[0].stations
    print (input_station_xml)
    
    if len(stations)>=2: 
        print ("Warning: more than one Station Node = %s" %len(stations), stations[0],stations[1])
    else:
        print ("Just one Station Node = %s" %len(stations))
    
    return stations


In [None]:
#input_station_xml ="/g/data/ha3/Passive/SHARED_DATA/GPS_Clock/StationXML_with_time_corrections2/AU.HTT_station_inv_modified.xml"
#inspect_stations(input_station_xml)

In [None]:

import os, glob

xmlfolder = "/g/data/ha3/Passive/SHARED_DATA/GPS_Clock/StationXML_with_time_corrections2/"
xmlfolder = "/Datasets/Station_Extra_Metadata/NewInventoryXML" 
# dirpath # "/g/data/ha3/Passive/SHARED_DATA/Inventory/Station_Extra_Metadata/NewInventoryXML"
xmlfolder ="/Datasets/InventoryXml/OA_stations_2017-2018"

for axmlfile in glob.glob(os.path.join(xmlfolder,"*.xml")):
    
    stations = inspect_stations(axmlfile)
    
#     if len(stations)>=2:
#         for astation in stations:
#             print("### ", astation.code, astation.extra)

# Check the output statoinXML file from ASDF 

- to ensure they are identical to the ingested station xml
- in particular, the extra station xml file can be recovered


In [None]:
!skip this section

In [None]:
# Check the output statoinXML file from ASDF

# We have added the input station xml into an ASDF file, then extract it out. See pyasdf_tests.ipynb
# https://github.com/SeismicData/pyasdf/issues/63

    
from obspy import read_inventory

input_station_xml ="OA.CF28_station_inv_modified_json_extracted_from_ASDF.xml"
our_inv = read_inventory(input_station_xml,format='STATIONXML')
# print(our_inv.networks[0].stations[0].extra)

stn_meta0 = our_inv.networks[0].stations[0]


print(type(stn_meta0.extra))
print(stn_meta0.extra)

print(stn_meta0.start_date)
print(stn_meta0.end_date)

print(stn_meta0.equipments)