# Use Obspy to manage inventory XML files

## Fei Zhang
## created on 2020-09-18

- split a big xml file into multiple single-station xml files

- modify each station xml file with new GA-metadata

- concatenate the mutiple station xml files into one big xml file

In [1]:
import os
import obspy
import obspy
from obspy.core.inventory import Inventory, Network, Station, Channel, Site
from obspy.clients.nrl import NRL


In [2]:
print (obspy.__version__)

1.2.1


In [3]:
BIG_INVENTOTOR_FILE="/Datasets/InventoryXml/OA_stations_2017-2018.xml"

DIR4XML="/Datasets/InventoryXml/OA_stations_2017-2018"



In [4]:
if not os.path.exists(DIR4XML):
    os.mkdir(DIR4XML)

In [5]:
big_inv = obspy.read_inventory(BIG_INVENTOTOR_FILE) 


In [6]:
new_big_xml= BIG_INVENTOTOR_FILE.replace(".xml", "_new.xml")

print("New XML file = ", new_big_xml)


New XML file =  /Datasets/InventoryXml/OA_stations_2017-2018_new.xml


In [7]:
# Write the inventory into a new file to see if they are identical?

big_inv.write(new_big_xml, format="stationxml", validate=True)

# Inconsistency Problem:  Missing startDate for some stations grep "<Station" OA_stations2.xml 
#  $ grep "<Station" OA_stations_2017-2018_new.xml 
#     <Station code="BS24" startDate="2017-09-27T05:16:47.000000Z" endDate="2018-07-04T04:38:50.000000Z">
#     <Station code="BS25" endDate="2018-06-27T11:44:37.000000Z">
#     <Station code="BS26" startDate="2017-09-27T00:49:10.000000Z" endDate="2018-01-12T04:02:13.000000Z">
#     <Station code="BS27" startDate="2017-09-26T00:42:49.000000Z" endDate="2018-07-03T03:04:17.000000Z">
#     <Station code="BS28" startDate="2017-09-26T02:02:42.000000Z" endDate="2018-07-03T02:37:29.000000Z">
#     <Station code="BT23" startDate="2017-09-28T04:11:41.000000Z" endDate="2018-07-04T03:40:38.000000Z">
#     <Station code="BT24" startDate="2017-09-27T06:14:32.000000Z" endDate="2018-06-26T22:36:58.000000Z">
#     <Station code="BT25" startDate="2017-09-27T02:45:22.000000Z" endDate="2018-05-19T11:23:10.000000Z">
#     <Station code="BT26" startDate="2017-09-26T23:41:45.000000Z" endDate="2017-10-27T00:19:21.000000Z">


In [8]:
type(big_inv.networks)

list

In [9]:
for net in big_inv.networks:
    print(len(net.stations))
    print(net.stations[0].code)
    print(net.stations[-1].code)

328
BS24
CJ27


In [10]:
# Split the big inventory station XML file into multiple one-station.xml file

for a_net in big_inv.networks:
    for a_sta in a_net.stations:
        
        print(a_net.code, a_sta.code)  # This has 328 (OA, sta) pairs? not unique!!!
        
        a_inv = big_inv.select(network=a_net.code, station=a_sta.code)  #.copy()
        
        # modify station metadata
#         my_sensor=obspy.core.inventory.util.Equipment(type="Sensor", description="Nanometrics Trillium Compact 120s",serial_number="004940")
#         my_digitizer = obspy.core.inventory.util.Equipment(type="Digitizer", description="Guralp Minimus",serial_number="MIN-A456")
#         a_sta.equipments = [my_sensor, my_digitizer]
        
        sta_file_name = "%s_%s_station.xml"%(a_net.code, a_sta.code)
        
        outxml = os.path.join(DIR4XML, sta_file_name)
        a_inv.write(outxml,format="stationxml", validate=True) # nsmap={'GeoscienceAustralia': GA_NameSpace})
        # 119 OA_*_station.xml file written. Some re-written 3 times!!
# fzhang@zubuntu1804 /Datasets/InventoryXml/OA_stations_2017-2018 
#  $ ls *station.xml| wc
#     119     119    2380


    

OA BS24
OA BS25
OA BS26
OA BS27
OA BS28
OA BT23
OA BT24
OA BT25
OA BT26
OA BT27
OA BT28
OA BU22
OA BU23
OA BU24
OA BU25
OA BU26
OA BU27
OA BU28
OA BV21
OA BV22
OA BV23
OA BV24
OA BV26
OA BV27
OA BV28
OA BW20
OA BW21
OA BW22
OA BW23
OA BW24
OA BW25
OA BW26
OA BW27
OA BW28
OA BX20
OA BX21
OA BX22
OA BX23
OA BX24
OA BX25
OA BX26
OA BX27
OA BX28
OA BY20
OA BY21
OA BY22
OA BY23
OA BY24
OA BY25
OA BY26
OA BY27
OA BY28
OA BZ20
OA BZ21
OA BZ22
OA BZ23
OA BZ24
OA BZ25
OA BZ26
OA BZ27
OA BZ28
OA CA20
OA CA21
OA CA22
OA CA23
OA CA24
OA CA25
OA CA26
OA CA27
OA CA28
OA CB20
OA CB21
OA CB22
OA CB23
OA CB24
OA CB25
OA CB26
OA CB27
OA CB28
OA CC20
OA CC21
OA CC22
OA CC23
OA CC24
OA CC25
OA CC26
OA CC27
OA CC28
OA CD21
OA CD22
OA CD23
OA CD24
OA CD25
OA CD26
OA CD27
OA CD28
OA CE22
OA CE23
OA CE24
OA CE25
OA CE26
OA CE27
OA CE28
OA CF22
OA CF23
OA CF24
OA CF25
OA CF27
OA CF28
OA CG21
OA CG22
OA CG23
OA CH21
OA CH22
OA CH23
OA CI21
OA CI22
OA CI23
OA CJ27
OA BS24
OA BS25
OA BS27
OA BS28
OA BT23
OA BT24


In [11]:
# Construct a new inventory object of networks.
# This will use new obspy version and new attributes for Inventory
inv2 = Inventory(
    # We'll add networks later.
    networks=[],
    # The source should be the id whoever create the file.
    source="Geoscience Australia EFTF AusArray")

for a_net in big_inv.networks:
    # Re-write each network of the big inventory into the new object inv2 
    inv2.networks = []  
    inv2.networks.append(a_net)
    inv2.write(a_net.code+"_stations.xml", format="stationxml", validate=True)  # every Station got equipment
    
    print(len(a_net.stations))

328


# Modify the existing station XML files inclding new metadata

- add sensor digitizer
- add extra metadata: GPS correction
- add extra metadata: Orientation correction

In [12]:
# Construct a new inventory object of networks.
# This will use new obspy version and new attributes:
inv2 = Inventory(
    # We'll add networks later.
    networks=[],
    # The source should be the id whoever create the file.
    source="Geoscience Australia EFTF AusArray")


In [14]:
from extract_equipments_from_csv import  EquipmentExtractor
import json

my_equip_obj = EquipmentExtractor()

for a_net in big_inv.networks:
    
    print("The number of station-nodes in the network =", len(a_net.stations))
    
    for a_sta in a_net.stations:
        # print(a_net.code, a_sta.code)  # this contains non-unique pairs

        
        a_inv = big_inv.select(network=a_net.code,station=a_sta.code).copy()  # .copy appears to have no effect here
        
        # print (a_sta.code, " stations has %s channels"%len(a_sta))
        
        _sensors = my_equip_obj.get_sensors(a_net.code, a_sta.code)
        if len(_sensors)>0:
            sensor_desc =_sensors[0].get("Description")
            sensor_sernumb =_sensors[0].get("SerNumber")
        else:
            print("Warning: No sensors for (%s,%s)"%(a_net.code, a_sta.code))
            sensor_desc = "NA Sensor for (%s,%s)"%(a_net.code, a_sta.code)
            sensor_sernumb = "NA"
            
        _digitizers = my_equip_obj.get_digitizer(a_net.code, a_sta.code)
        if len(_digitizers)>0:
            dig_desc =_digitizers[0].get("Description")
            dig_sernumb = _digitizers[0].get("SerNumber")
        else:
            print("Warning: No digitizer for (%s,%s)"%(a_net.code, a_sta.code))
            dig_desc = "NA Digitizer for (%s,%s)"%(a_net.code, a_sta.code)
            dig_sernumb = "NA"
         
        
        # modify station metadata
        my_sensor=obspy.core.inventory.util.Equipment(type="Sensor", description=sensor_desc,serial_number=sensor_sernumb)
        
        #my_digitizer = obspy.core.inventory.util.Equipment(type="Digitizer", description="Guralp Minimus",serial_number="MIN-A456")
        my_digitizer = obspy.core.inventory.util.Equipment(type="Digitizer", description=dig_desc,serial_number=dig_sernumb)

        a_sta.equipments = [my_sensor, my_digitizer]
              
#         sta_file_name2 = "%s_%s_station2.xml"%(a_net.code, a_sta.code)  
#         # OA_CE28 was written 3-times!!!!!! due to multiple (OA,CE28)-station-nodes 
#         There will be 119 xml files written in this loop of 328 items. However, the final results missed 119 equipments!!
#         outxml2 = os.path.join(DIR4XML, sta_file_name2)

#         inv2.networks = a_inv.networks

#         inv2.write(outxml2,format="stationxml", validate=True) # nsmap={'GeoscienceAustralia': GA_NameSpace})

    # the big inventory in new object inv2 writing out -- appears OK 328 Sensor and Digitizer
    inv2.networks = []  
    inv2.networks.append(a_net)
    inv2.write(a_net.code+"_stations2.xml", format="stationxml", validate=True)  # every Station got equipment
    
    # The original write out again now:
    
    big_inv.write(a_net.code+"_stations_post.xml", format="stationxml", validate=True) # also has the Sensors etc
    

The number of station-nodes in the network = 328


# TODOs:  

## for each (net, sta):
* get the real Sensor and Ditizer from OA_sensors_digitizers.csv 

* get the GPS corrections

* get the orientation corrections

###

(hiperseis) fzhang@zubuntu1804 ~/Githubz/hiperseis/seismic/inventory/sandbox (develop) 
 $ grep "Guralp Minimus"  OA_sensors_digitizers.csv | wc
    262     524   10856
(hiperseis) fzhang@zubuntu1804 ~/Githubz/hiperseis/seismic/inventory/sandbox (develop) 
 $ grep "120s"  OA_sensors_digitizers.csv | wc
    218     872   12654
(hiperseis) fzhang@zubuntu1804 ~/Githubz/hiperseis/seismic/inventory/sandbox (develop) 
 $ wc OA_sensors_digitizers.csv
  481  1397 23683 OA_sensors_digitizers.csv
(hiperseis) fzhang@zubuntu1804 ~/Githubz/hiperseis/seismic/inventory/sandbox (develop) 
 $ grep BV26 OA_sensors_digitizers.csv
33850940,OABV26,,Guralp Minimus,MIN-4556
33850940,OABV26,,Nanometrics Trillium Compact 120s,004768


# How to merge the individual xml files into One?

## use new obspy version

In [None]:
stop here

# How to create a new station xml file

Adapted from https://docs.obspy.org/tutorial/code_snippets/stationxml_file_from_scratch.html


In [None]:
import obspy
from obspy.core.inventory import Inventory, Network, Station, Channel, Site
from obspy.clients.nrl import NRL


def create_new_inventory(out_file_name="new_inventory.xml"):

    # We'll first create all the various objects. These strongly follow the
    # hierarchy of StationXML files.
    inv = Inventory(
        # We'll add networks later.
        networks=[],
        # The source should be the id whoever create the file.
        source="Geoscience Australia EFTF Program AusArray")

    net = Network(
        # This is the network code according to the SEED standard.
        code="XX",
        # A list of stations. We'll add one later.
        stations=[],
        description="A test stations.",
        # Start-and end dates are optional.
        start_date=obspy.UTCDateTime(2016, 1, 2))

    sta = Station(
        # This is the station code according to the SEED standard.
        code="ABC",
        latitude=1.0,
        longitude=2.0,
        elevation=345.0,

        creation_date=obspy.UTCDateTime(2016, 1, 2), # <CreationDate>2016-01-02T00:00:00.000000Z</CreationDate>

        start_date = obspy.UTCDateTime(2016, 1, 3),
        end_date = obspy.UTCDateTime(2020, 1, 3),
        #<Station code="ABC" startDate="2016-01-03T00:00:00.000000Z" endDate="2020-01-03T00:00:00.000000Z">
        site=Site(name="First station"))

    cha = Channel(
        # This is the channel code according to the SEED standard.
        code="HHZ",
        # This is the location code according to the SEED standard.
        location_code="",
        # Note that these coordinates can differ from the station coordinates.
        latitude=1.0,
        longitude=2.0,
        elevation=345.0,
        depth=10.0,
        azimuth=0.0,
        dip=-90.0,
        sample_rate=200)

    # By default this accesses the NRL online. Offline copies of the NRL can
    # also be used instead
    nrl = NRL()
    # The contents of the NRL can be explored interactively in a Python prompt,
    # see API documentation of NRL submodule:
    # http://docs.obspy.org/packages/obspy.clients.nrl.html
    # Here we assume that the end point of data logger and sensor are already
    # known:
    response = nrl.get_response( # doctest: +SKIP
        sensor_keys=['Streckeisen', 'STS-1', '360 seconds'],
        datalogger_keys=['REF TEK', 'RT 130 & 130-SMA', '1', '200'])


    # Now tie it all together.
    cha.response = response
    sta.channels.append(cha)
    net.stations.append(sta)
    inv.networks.append(net)

    # And finally write it to a StationXML file. We also force a validation against
    # the StationXML schema to ensure it produces a valid StationXML file.
    #
    # Note that it is also possible to serialize to any of the other inventory
    # output formats ObsPy supports.
    inv.write(out_file_name, format="stationxml", validate=True)

In [None]:
create_new_inventory()

In [None]:
2.27*1.09

In [None]:
2800*0.25 + 15000*0.08 + 200 + 200