# Copy and reformat hydrophone data from Rob's harddrive 
## Data will be store in data-long MSEED files in DIGIT COUNT to save space

In [1]:
# Import modules
from os.path import join, exists, basename
from os import makedirs
from obspy import read, read_inventory, Stream, Trace
from glob import glob
from numpy import amax
from matplotlib.pyplot import figure, subplots
from pandas import to_datetime
from sys import getsizeof

from utils_basic import ROOTDIR_HYDRO, HYDRO_STATIONS, HYDRO_LOCATIONS, NETWORK

In [2]:
# Inputs

## Write over existing files or not
write_over = True

indir_root = "/Volumes/Oman4/Hydrophones/DAYS"
outdir_root = ROOTDIR_HYDRO

network = NETWORK

In [4]:

## Process each MSEED file
stations = HYDRO_STATIONS
locations = HYDRO_LOCATIONS

for station in stations:
    indir_sta = join(indir_root, station)
    paths = glob(join(indir_sta, f"{station}*"))
    paths.sort()

    for path in paths:
        print(f"Processing {path}...")

        ## Get the SEED ID and time window from the file name
        filename_in = basename(path)
        fields = filename_in.split(".")
        location = fields[2]
        channel = fields[3]
        year = fields[4]
        day_of_year = fields[5]
        filename_out = f"{network}.{station}.{location}.{channel}.mseed"

        ## Get the month and day from the day of year
        dateobj = to_datetime(f"{year}.{day_of_year}", format="%Y.%j")
        month = dateobj.strftime("%m")
        day = dateobj.strftime("%d")
        dirname_day = f"{year}-{month}-{day}"
        
        ## Check if the file already exists
        if not write_over:
            outpath = join(outdir_root, dirname_day, filename_out)
            if exists(outpath):
                print(f"File {outpath} already exists. Skipping...")
                continue

        ## Read the MSEED file and merge the traces
        print(f"Reading {path}...")
        stream_in = read(path)
        
        ## Select the trace with the longest duration
        trace_out = Trace()
        numpts_max = 0
        for trace in stream_in:
            if trace.stats.npts > numpts_max:
                trace_out = trace
                numpts_max = trace.stats.npts

        outdir_day = join(outdir_root, dirname_day)
        if not exists(outdir_day):
            makedirs(outdir_day)
            print(f"Created directory {outdir_day}")

        outpath = join(outdir_day, filename_out)

        trace_out.write(outpath, format="MSEED")
        print(f"Saved {outpath}")


Processing /Volumes/Oman4/Hydrophones/DAYS/A00/A00.7F.01.GDH.2019.121...
Reading /Volumes/Oman4/Hydrophones/DAYS/A00/A00.7F.01.GDH.2019.121...
Created directory /Volumes/OmanData/data/hydrophones/2019-05-01
Saved /Volumes/OmanData/data/hydrophones/2019-05-01/7F.A00.01.GDH.mseed
Processing /Volumes/Oman4/Hydrophones/DAYS/A00/A00.7F.01.GDH.2019.122...
Reading /Volumes/Oman4/Hydrophones/DAYS/A00/A00.7F.01.GDH.2019.122...
Created directory /Volumes/OmanData/data/hydrophones/2019-05-02
Saved /Volumes/OmanData/data/hydrophones/2019-05-02/7F.A00.01.GDH.mseed
Processing /Volumes/Oman4/Hydrophones/DAYS/A00/A00.7F.01.GDH.2019.123...
Reading /Volumes/Oman4/Hydrophones/DAYS/A00/A00.7F.01.GDH.2019.123...
Created directory /Volumes/OmanData/data/hydrophones/2019-05-03
Saved /Volumes/OmanData/data/hydrophones/2019-05-03/7F.A00.01.GDH.mseed
Processing /Volumes/Oman4/Hydrophones/DAYS/A00/A00.7F.01.GDH.2019.124...
Reading /Volumes/Oman4/Hydrophones/DAYS/A00/A00.7F.01.GDH.2019.124...
Created directory /V