Ocean-Data-Lab · John-Ragland · Jul 19, 2023 · Jun 27, 2023 · Jun 27, 2023 · Jul 7, 2023
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -23,12 +23,13 @@ First Steps
     demos
 
 
-Modules Documentation
-^^^^^^^^^^^^^^^^^^^^^
+API Documentation
+^^^^^^^^^^^^^^^^^
 .. toctree::
    :maxdepth: 2
 
    hydrophone
    ctd
    request
    tools
+   scripts
diff --git a/docs/source/install_instructions.rst b/docs/source/install_instructions.rst
@@ -12,7 +12,7 @@ OOIPY is available on PyPI.
 
 Install Instruction
 -------------------
-OOIPY is designed to run on Python 3.8.5. To install OOIPY, run the following command.
+OOIPY is designed to run on Python > 3.7 To install OOIPY, run the following command.
 
 .. code-block :: bash
 

diff --git a/docs/source/scripts.rst b/docs/source/scripts.rst
@@ -0,0 +1,27 @@
+.. image:: ../../imgs/ooipy_banner2.png
+  :width: 700
+  :alt: OOIPY Logo
+  :align: left
+
+Scripts
+=======
+There are a collection of useful scripts that can be used to
+download and process data. The scripts are included in the package if downloaded with pypi,
+but can also be downloaded and run individually from the `github repo <https://github.com/Ocean-Data-Lab/ooipy/tree/master/ooipy/scripts/>`_.
+
+download_hydrophone_data
+************************
+location: ooipy/scripts/download_hydrophone_data.py
+
+This script downloads hydrophone data that is specified in a csv.
+
+Here is an example csv file:
+
+| node,start_time,end_time,file_format,downsample_factor
+| LJ03A,2019-08-03T08:00:00,2019-08-03T08:01:00,mat,64
+| AXBA1,2019-08-03T12:01:00,2019-08-03T12:02:00,mat,1
+
+The script can be run with the following command:
+
+.. code-block :: bash
+  python download_hydrophone_data.py --csv <path_to_csv> --output_path <output_directory>
diff --git a/ooipy/hydrophone/basic.py b/ooipy/hydrophone/basic.py
@@ -17,12 +17,13 @@
 import matplotlib.dates as mdates
 import numpy as np
 import pandas as pd
+import xarray as xr
 from matplotlib import pyplot as plt
 from obspy import Trace
 from obspy.core import UTCDateTime
 from scipy import signal
 from scipy.interpolate import interp1d
-from scipy.io import wavfile
+from scipy.io import savemat, wavfile
 
 import ooipy
 
@@ -678,6 +679,64 @@ def get_asset_ID(self):
 
         return asset_ID
 
+    def save(self, file_format, filename, wav_kwargs={}):
+        """
+        save hydrophone data in specified method. Supported methods are:
+        - pickle - saves the HydrophoneData object as a pickle file
+        - netCDF - saves HydrophoneData object as netCDF. Time coordinates are not included
+        - mat - saves HydrophoneData object as a .mat file
+        - wav - calls wav_write method to save HydrophoneData object as a .wav file
+
+        Parameters
+        ----------
+        file_format : str
+            format to save HydrophoneData object as. Supported formats are
+            ['pkl', 'nc', 'mat', 'wav']
+        filepath : str
+            filepath to save HydrophoneData object. file extension should not be included
+        wav_kwargs : dict
+            dictionary of keyword arguments to pass to wav_write method
+
+        Returns
+        -------
+        None
+        """
+
+        try:
+            self.data
+        except AttributeError:
+            raise AttributeError("HydrophoneData object does not contain any data")
+
+        if file_format == "pkl":
+            # save HydrophoneData object as pickle file
+
+            print(filename + ".pkl")
+            with open(filename + ".pkl", "wb") as f:
+                pickle.dump(self, f)
+        elif file_format == "nc":
+            # save HydrophoneData object as netCDF file
+            attrs = dict(self.stats)
+            attrs["starttime"] = self.stats.starttime.strftime("%Y-%m-%dT%H:%M:%S.%f")
+            attrs["endtime"] = self.stats.endtime.strftime("%Y-%m-%dT%H:%M:%S.%f")
+            attrs["mseed"] = str(attrs["mseed"])
+            hdata_x = xr.DataArray(self.data, dims=["time"], attrs=attrs)
+            hdata_x.to_netcdf(filename + ".nc")
+        elif file_format == "mat":
+            # save HydrophoneData object as .mat file
+            data_dict = dict(self.stats)
+            data_dict["data"] = self.data
+            data_dict["starttime"] = self.stats.starttime.strftime("%Y-%m-%dT%H:%M:%S.%f")
+            data_dict["endtime"] = self.stats.endtime.strftime("%Y-%m-%dT%H:%M:%S.%f")
+            savemat(filename + ".mat", {self.stats.location: data_dict})
+
+        elif file_format == "wav":
+            # save HydrophoneData object as .wav file
+            self.wav_write(filename + ".wav", **wav_kwargs)
+        else:
+            raise Exception(
+                "Invalid file format. Supported formats are: ['pkl', 'nc', 'mat', 'wav']"
+            )
+
 
 def node_id(node):
     """

diff --git a/ooipy/scripts/download_hydrophone_data.py b/ooipy/scripts/download_hydrophone_data.py
@@ -0,0 +1,131 @@
+"""
+download_broanband.py
+John Ragland, June 2023
+
+download_broadband.py takes a csv file containing a list of
+sensors and time segments that you would like to download,
+and downloads them to your local machine. You can specify the file format
+that you want them to be saved. Supported file formats at this time
+include ['.mat', '.pkl', '.nc', '.wav'].
+
+example csv file:
+-----------------
+node,start_time,end_time,file_format
+LJ03A,2019-08-03T08:00:00,2019-08-03T08:01:00,pkl
+LJ03A,2019-08-03T12:01:00,2019-08-03T12:02:00,pkl
+
+- create a csv file with the above contents and save it in your working path
+
+script usage:
+-------------
+python download_broadband.py --csv path/to/csv --output_path path/to/output
+"""
+
+import argparse
+import sys
+
+import pandas as pd
+from tqdm import tqdm
+
+import ooipy
+
+hyd_type = {
+    "LJ01D": "BB",
+    "LJ01A": "BB",
+    "PC01A": "BB",
+    "PC03A": "BB",
+    "LJ01C": "BB",
+    "LJ03A": "BB",
+    "AXBA1": "LF",
+    "AXCC1": "LF",
+    "AXEC2": "LF",
+    "HYS14": "LF",
+    "HYSB1": "LF",
+}
+
+# Create the argument parser
+parser = argparse.ArgumentParser()
+
+# Add command-line options
+parser.add_argument("--csv", help="file path to csv file")
+parser.add_argument("--output_path", help="file path to save files in")
+
+# Parse the command-line arguments
+args = parser.parse_args()
+
+# Check if the --path_to_csv option is present
+if args.csv is None:
+    raise Exception("You must provide a path to the csv file, --csv <absolute file path>")
+if args.output_path is None:
+    raise Exception(
+        "You must provide a path to the output directory, --output_path <absolute file path>"
+    )
+
+# Access the values of the command-line options
+df = pd.read_csv(args.csv)
+
+# estimate total download size and ask to proceed
+total_time = 0
+for k, item in df.iterrows():
+    total_time += (pd.Timestamp(item.end_time) - pd.Timestamp(item.start_time)).value / 1e9
+
+total_storage = total_time * 64e3 * 8  # 8 Bytes per sample
+
+
+def format_bytes(size):
+    power = 2**10  # Power of 2^10
+    n = 0
+    units = ["B", "KB", "MB", "GB", "TB"]
+
+    while size >= power and n < len(units) - 1:
+        size /= power
+        n += 1
+
+    formatted_size = "{:.2f} {}".format(size, units[n])
+    return formatted_size
+
+
+print(f"total uncompressed download size: ~{format_bytes(total_storage)}")
+proceed = input("Do you want to proceed? (y/n): ")
+
+if proceed.lower() != "y":
+    print("Exiting the script.")
+    sys.exit(0)
+
+# download the data
+for k, item in tqdm(df.iterrows()):
+    if item.node not in hyd_type.keys():
+        print(f"node {item.node} invalid, skipping")
+        continue
+
+    start_time_d = pd.Timestamp(item.start_time).to_pydatetime()
+    end_time_d = pd.Timestamp(item.end_time).to_pydatetime()
+
+    if hyd_type[item.node] == "LF":
+        hdata = ooipy.get_acoustic_data_LF(start_time_d, end_time_d, item.node)
+    else:
+        hdata = ooipy.get_acoustic_data(start_time_d, end_time_d, item.node)
+
+    if hdata is None:
+        print(f"no data found for {item.node} between {start_time_d} and {end_time_d}")
+        continue
+    # downsample
+    downsample_factor = item.downsample_factor
+    if item.downsample_factor <= 16:
+        hdata_ds = hdata.decimate(item.downsample_factor)
+    else:
+        hdata_ds = hdata
+        while downsample_factor > 16:
+            hdata_ds = hdata_ds.decimate(16)
+            downsample_factor //= 16
+        hdata_ds = hdata_ds.decimate(downsample_factor)
+
+    # save
+    op_path = args.output_path
+    hdat_loc = hdata_ds.stats.location
+    hdat_start_time = hdata_ds.stats.starttime.strftime("%Y%m%dT%H%M%S")
+    hdat_end_time = hdata_ds.stats.endtime.strftime("%Y%m%dT%H%M%S")
+    filename = f"{op_path}/{hdat_loc}_{hdat_start_time}_{hdat_end_time}"
+
+    print(filename)
+    hdata_ds.save(filename=filename, file_format=item.file_format, wav_kwargs={"norm": True})
diff --git a/setup.cfg b/setup.cfg
@@ -29,6 +29,8 @@ install_requires =
     pandas
     numpy<1.22
     lxml
+    xarray
+    tqdm
 python_requires = >=3.7
 setup_requires =
     setuptools_scm