# Pride Workflow
This notebook is used to develop and test the steps to generate PPP solutions efficiently

In [1]:
import os
from pathlib import Path
import pandas as pd

from es_sfgtools.pipeline import DataHandler
from es_sfgtools.utils.archive_pull import (
    list_survey_files
    )

## Step 1. Initial Setup


#### Browse available surveys from the community archive and select target
- Locate the survey of interest in https://gage-data.earthscope.org/archive/seafloor, and note the `network`, `station`, and `survey` names, which will be input in the cell below.
- In order to use this notebook to process new surveys, the data must first be submitted and made available from the community archive 

# Step 2. Inventory available data and its location

In [2]:
network='aleutian'

#SV2 test 
# site='SPT1'
# survey='2020_A_UOH1'

#SV3 test
site='SEM1'
survey='2022_A_1049'

#### USE THE FOLLOWING DEFAULTS UNLESS DESIRED####

# Set data directory
data_dir = Path(f"{os.getcwd()}/data")

data_handler = DataHandler(network=network, station=site, survey=survey, data_dir=data_dir)

Data Handler initialized, data will be stored in /Users/gottlieb/working/GIT/seafloor_geodesy_notebooks/notebooks/data/aleutian/SEM1/2022_A_1049


In [3]:
%%time
# Generate a list of files available from remote archive
#TODO: implement options for raw vs intermediate vs processed 
remote_filepaths = list_survey_files(network=network, station=site, survey=survey, show_details=True)

Found under https://gage-data.earthscope.org/archive/seafloor/aleutian/SEM1/2022_A_1049/raw:
    132 novatel000 file(s)
    54 novatel770 file(s)
    9 dfop00 file(s)
    1 master file(s)
    1 lever_arms file(s)
    2 ctd file(s)
CPU times: user 14.6 ms, sys: 7.1 ms, total: 21.7 ms
Wall time: 1.71 s


In [4]:
# See what files exist locally
data_type_counts = data_handler.get_dtype_counts()
print(f"Local data directory contains the following:")
for item in data_type_counts.items():
    print(f"    {item[0]}: {item[1]}")

Local data directory contains the following:
    novatel770: 54
    rinex: 54


## Step 3. Pull data from remote archive

In [5]:
#Add found remote files to the local catalog.  Note this builds an inventory, 
#but does not do the downloading until a later step.
# TODO: Detail counts of files local vs only remote
data_handler.add_data_remote(remote_filepaths=remote_filepaths)


Total remote files found 66
Total files tracked in catalog 120
No new files found to add


In [6]:
# Download the files by type
#data_handler.download_data(file_type='novatel', show_details=False)
data_handler.download_data(file_type='novatel770', show_details=False)


No new files of type novatel770 to download


In [7]:
# generate 1 rinex file per raw file, had to hack the rinex2 names to add minutesecond 
# in case there are multiple files starting in the same hour
data_handler.process_rinex(override=True, show_details=False)

No unprocessed data found in catalog for types ['pin']
Processing 54 Parent Files to rinex Data
/Users/gottlieb/working/GIT/seafloor_geodesy_notebooks/notebooks/data/aleutian/SEM1/2022_A_1049/raw/329653_001_20220708_031933_00204_NOV770.raw
/Users/gottlieb/working/GIT/seafloor_geodesy_notebooks/notebooks/data/aleutian/SEM1/2022_A_1049/raw/329653_001_20220708_064617_00204_NOV770.raw
/Users/gottlieb/working/GIT/seafloor_geodesy_notebooks/notebooks/data/aleutian/SEM1/2022_A_1049/raw/329653_001_20220708_101847_00204_NOV770.raw
/Users/gottlieb/working/GIT/seafloor_geodesy_notebooks/notebooks/data/aleutian/SEM1/2022_A_1049/raw/329653_001_20220708_134658_00204_NOV770.raw
/Users/gottlieb/working/GIT/seafloor_geodesy_notebooks/notebooks/data/aleutian/SEM1/2022_A_1049/raw/329653_001_20220708_172439_00204_NOV770.raw
/Users/gottlieb/working/GIT/seafloor_geodesy_notebooks/notebooks/data/aleutian/SEM1/2022_A_1049/raw/329653_001_20220708_210146_00204_NOV770.raw
/Users/gottlieb/working/GIT/seafloor_geo

Processing novatel770 To rinex:   0%|          | 0/54 [00:00<?, ?it/s]

{'id': 216, 'network': 'aleutian', 'station': 'SEM1', 'survey': '2022_A_1049', 'remote_path': 'https://gage-data.earthscope.org/archive/seafloor/aleutian/SEM1/2022_A_1049/raw/329653_001_20220708_031933_00204_NOV770.raw', 'remote_type': 'http', 'local_path': '/Users/gottlieb/working/GIT/seafloor_geodesy_notebooks/notebooks/data/aleutian/SEM1/2022_A_1049/raw/329653_001_20220708_031933_00204_NOV770.raw', 'type': 'novatel770', 'timestamp_data_start': datetime.datetime(2022, 7, 8, 3, 20, 7), 'timestamp_data_end': datetime.datetime(2022, 7, 8, 6, 46, 34), 'timestamp_created': datetime.datetime(2024, 9, 16, 12, 55, 16, 734065), 'parent_id': None}
{'network': 'aleutian', 'station': 'SEM1', 'survey': '2022_A_1049', 'local_path': '/Users/gottlieb/working/GIT/seafloor_geodesy_notebooks/notebooks/data/aleutian/SEM1/2022_A_1049/intermediate/SEM1_20220708031933.rinex', 'type': 'rinex', 'timestamp_data_start': datetime.datetime(2022, 7, 8, 3, 20, 7), 'timestamp_data_end': datetime.datetime(2022, 7, 8

In [34]:
# combine into daily rinex files using TEQC
#teqc +obs + -tbin 1d IVB1 *.18O
import subprocess
cmd = [
   "teqc", "+obs", "+", "-tbin", "1d", site, "*0708*.rinex"
]
#result = subprocess.run(cmd, check=True, shell=True, capture_output=True, cwd=data_handler.inter_dir)
with subprocess.Popen(cmd, shell=True, cwd=data_handler.inter_dir, stdout=subprocess.PIPE) as p:
    while True:
        # Use read1() instead of read() or Popen.communicate() as both blocks until EOF
        # https://docs.python.org/3/library/io.html#io.BufferedIOBase.read1
        text = p.stdout.read1().decode("utf-8")
        print(text, end='', flush=True)

# def run_command(cmd):
#     process = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True, cwd=data_handler.inter_dir)
#     while True:
#         output = process.stdout.readline()
#         if output == '' and process.poll() is not None:
#             break
#         if output:
#             print(output.strip())
#     rc = process.poll()
#     return rc
# run_command(cmd)

KeyboardInterrupt: 

In [22]:
result.stderr.decode()

"teqc: ! Error ! cannot fopen argument file '*.rinex' in r(ead) mode ... exiting\n"

In [30]:
# run pride on daily rinex\
str(data_handler.inter_dir)

'/Users/gottlieb/working/GIT/seafloor_geodesy_notebooks/notebooks/data/aleutian/SEM1/2022_A_1049/intermediate'