# Copy and reformat geophone data from Rob's harddrive 
## Data will be store in data-long MSEED files in DIGIT COUNT to save space

In [13]:
# Import modules
from os.path import join, exists, basename
from os import makedirs
from obspy import read, read_inventory, Stream, Trace
from glob import glob
from numpy import amax
from matplotlib.pyplot import figure, subplots
from pandas import to_datetime
from sys import getsizeof

from utils_basic import ROOTDIR_GEO, METAPATH_HYDRO, HYDRO_STATIONS, HYDRO_LOCATIONS, NETWORK, get_geophone_locs

In [14]:
# Inputs

## Write over existing files or not
write_over = True

indir_root = "/Volumes/Oman4/PASSCAL/Main_deployment/DAYS"
outdir_root = ROOTDIR_GEO

network = NETWORK

In [15]:
# Read the station information
stadf = get_geophone_locs()

In [16]:

## Process each MSEED file
for station in stadf["name"]:
    print(f"Processing station {station}...")
    indir_sta = join(indir_root, station)
    paths = glob(join(indir_sta, f"{station}*"))
    paths.sort()

    for path in paths:
        print(f"Processing {path}...")

        ## Get the SEED ID and time window from the file name
        filename_in = basename(path)
        fields = filename_in.split(".")
        channel = fields[3]
        year = fields[4]
        day_of_year = fields[5]
        filename_out = f"{network}.{station}..{channel}.mseed"

        ## Get the month and day from the day of year
        dateobj = to_datetime(f"{year}.{day_of_year}", format="%Y.%j")
        month = dateobj.strftime("%m")
        day = dateobj.strftime("%d")
        dirname_day = f"{year}-{month}-{day}"
        
        ## Check if the file already exists
        if not write_over:
            outpath = join(outdir_root, dirname_day, filename_out)
            if exists(outpath):
                print(f"File {outpath} already exists. Skipping...")
                continue

        ## Read the MSEED file and merge the traces
        print(f"Reading {path}...")
        stream = read(path)
        numtra = len(stream)
        print(f"Read {numtra} traces.")
        
        ## Merge the overlapping traces
        if numtra > 1:
            print("Merging overlapping traces...")
            stream.merge(fill_value=0)
            print(f"Merged to {len(stream)} traces.")

        outdir_day = join(outdir_root, dirname_day)
        if not exists(outdir_day):
            makedirs(outdir_day)
            print(f"Created directory {outdir_day}")

        outpath = join(outdir_day, filename_out)

        stream.write(outpath, format="MSEED")
        print(f"Saved {outpath}")
        print("\n")


Processing station A01...
Processing /Volumes/Oman4/PASSCAL/Main_deployment/DAYS/A01/A01.7F..GH1.2020.010...
Reading /Volumes/Oman4/PASSCAL/Main_deployment/DAYS/A01/A01.7F..GH1.2020.010...
Read 1 traces.
Saved /Volumes/OmanData/data/geophones_new/2020-01-10/7F.A01..GH1.mseed


Processing /Volumes/Oman4/PASSCAL/Main_deployment/DAYS/A01/A01.7F..GH1.2020.011...
Reading /Volumes/Oman4/PASSCAL/Main_deployment/DAYS/A01/A01.7F..GH1.2020.011...
Read 1 traces.
Saved /Volumes/OmanData/data/geophones_new/2020-01-11/7F.A01..GH1.mseed


Processing /Volumes/Oman4/PASSCAL/Main_deployment/DAYS/A01/A01.7F..GH1.2020.012...
Reading /Volumes/Oman4/PASSCAL/Main_deployment/DAYS/A01/A01.7F..GH1.2020.012...
Read 1 traces.
Saved /Volumes/OmanData/data/geophones_new/2020-01-12/7F.A01..GH1.mseed


Processing /Volumes/Oman4/PASSCAL/Main_deployment/DAYS/A01/A01.7F..GH1.2020.013...
Reading /Volumes/Oman4/PASSCAL/Main_deployment/DAYS/A01/A01.7F..GH1.2020.013...
Read 1 traces.
Saved /Volumes/OmanData/data/geophones_n