Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read Directly From Cloud Functionality #104

Closed
2 changes: 1 addition & 1 deletion echoregions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@

from .core import read_evl, read_evr

__all__ = ["read_evl", "read_evr"]
__all__ = ["read_evl", "read_evr", "read_cloud_evr", "read_cloud_evl"]
199 changes: 199 additions & 0 deletions echoregions/core.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import os
from typing import Union

import s3fs

from .lines.lines import Lines
from .regions2d.regions2d import Regions2D

Expand Down Expand Up @@ -40,3 +45,197 @@ def read_evl(filepath: str, nan_depth_value: float = None) -> Lines:
Object that contains EVL data and metadata with methods for saving to file.
"""
return Lines(input_file=str(filepath), nan_depth_value=nan_depth_value)


def read_cloud_evr(
s3_path: str,
s3_key: str,
s3_secret: str,
target_directory_path: str,
min_depth: float = None,
max_depth: float = None,
) -> Regions2D:
"""Read an EVR file from the cloud into a Regions2D object.

Parameters
----------
s3_path : str
A valid path to either a evr file on the cloud.
s3_key: str
Valid S3 Bucket Key.
s3_secret: str
Valid S3 Bucket Secret.
target_directory_path: str
Valid relative directory to temporarily place cloud file. Defaults to the
/echoregions/tmp directory. Must be a non-existent directory.
min_depth : float, default ``None``
Depth value in meters to set -9999.99 depth edges to.
max_depth : float, default ``None``
Depth value in meters to set 9999.99 depth edges to.

Returns
-------
Regions2d
Object that contains the either evr data and metadata
with methods for saving to file.
"""

return read_cloud(
file_type="evr",
s3_path=s3_path,
s3_key=s3_key,
s3_secret=s3_secret,
target_directory_path=target_directory_path,
min_depth=min_depth,
max_depth=max_depth,
)


def read_cloud_evl(
s3_path: str,
s3_key: str,
s3_secret: str,
target_directory_path: str,
nan_depth_value: float = None,
) -> Regions2D:
"""Read an EVR file from the cloud into a Regions2D object.

Parameters
----------
s3_path : str
A valid path to either a evr file on the cloud.
s3_key: str
Valid S3 Bucket Key.
s3_secret: str
Valid S3 Bucket Secret.
target_directory_path: str
Valid relative directory to temporarily place cloud file. Defaults to the
/echoregions/tmp directory. Must be a non-existent directory.
nan_depth_value : float, default ``None``
Depth in meters to replace -10000.990000 ranges with.

Returns
-------
Regions2d
Object that contains the either evr data and metadata
with methods for saving to file.
"""

return read_cloud(
file_type="evl",
s3_path=s3_path,
s3_key=s3_key,
s3_secret=s3_secret,
target_directory_path=target_directory_path,
nan_depth_value=nan_depth_value,
)


def read_cloud(
file_type: str,
s3_path: str,
s3_key: str,
s3_secret: str,
target_directory_path: str = os.getcwd() + "/echoregions/tmp/",
min_depth: float = None,
max_depth: float = None,
nan_depth_value: float = None,
) -> Union["Regions2D", "Lines"]:
"""Read an EVR file from the cloud into a Regions2D object.

Parameters
----------
file_type: str

s3_path : str
A valid path to either a evr or evl file on the cloud.
s3_key: str
Valid S3 Bucket Key.
s3_secret: str
Valid S3 Bucket Secret.
target_directory_path: str
Valid relative directory to temporarily place cloud file. Defaults to the
/echoregions/tmp directory. Must be a non-existent directory.
min_depth : float, default ``None``
Depth value in meters to set -9999.99 depth edges to.
max_depth : float, default ``None``
Depth value in meters to set 9999.99 depth edges to.
nan_depth_value : float, default ``None``
Depth in meters to replace -10000.990000 ranges with.

Returns
-------
Regions2D, Lines
Object that contains the either evr or evl data and metadata
with methods for saving to file.
"""
# Check file type. Must be evr or evl.
if file_type not in ["evr", "evl"]:
raise ValueError(f"file_type is {file_type}. Must be evl or evr. ")

# Ensure correct variables are being passed in.
if file_type == "evl" and (min_depth is not None or max_depth is not None):
raise ValueError(
"file_type evl does not use min_depth or max_depth values. \
Please clear input for mentioned variables."
)
elif file_type == "evr" and nan_depth_value is not None:
raise ValueError(
"file_type evr does not use nan_depth_values. \
Please clear input for nan_depth_values."
)

if isinstance(s3_key, str) and isinstance(s3_secret, str):
try:
# Get access to S3 bucket filesystem.
fs = s3fs.S3FileSystem(
key=s3_key,
secret=s3_secret,
)
except Exception as e:
print(e)

# Create directory if not exists. Else, throw value error.
if not os.path.exists(target_directory_path):
os.makedirs(target_directory_path)
else:
raise ValueError(
f"Directory {target_directory_path} already exists. Please \
choose a path for a directory that does not current exist."
)

# Download File
try:
fs.download(s3_path, target_directory_path)

# Check which file it is in.
file_name = os.listdir(target_directory_path)[
0
] # Should be only file in directory.
target_path = target_directory_path + "/" + file_name

# Check if filetype is evr or evl and create object based off of filetype.
if file_type == "evr":
from echoregions import read_evr

r2d = read_evr(
filepath=target_path, min_depth=min_depth, max_depth=max_depth
)
return_object = r2d
else:
from echoregions import read_evl

lines = read_evl(filepath=target_path, nan_depth_value=nan_depth_value)
return_object = lines

# Remove target path and target_directory path.
os.remove(target_path)
os.removedirs(target_directory_path)

return return_object
except Exception as e:
# Remove target directory created prior to download attempt.
os.removedirs(target_directory_path)
print(e)
else:
raise TypeError("Both s3_key and s3 secret must be of type str.")
6 changes: 3 additions & 3 deletions echoregions/lines/lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pandas import DataFrame, Timestamp
from xarray import DataArray

from ..utils.io import validate_path
from ..utils.io import validate_save_path
from .lines_parser import parse_line_file

ECHOVIEW_NAN_DEPTH_VALUE = -10000.99
Expand Down Expand Up @@ -67,7 +67,7 @@ def to_csv(self, save_path: bool = None) -> None:
path to save the CSV file to
"""
# Check if the save directory is safe
save_path = validate_path(
save_path = validate_save_path(
save_path=save_path, input_file=self.input_file, ext=".csv"
)
# Reorder columns and export to csv
Expand All @@ -88,7 +88,7 @@ def to_json(self, save_path: str = None, pretty: bool = True, **kwargs) -> None:
keyword arguments passed into `parse_file`
"""
# Check if the save directory is safe
save_path = validate_path(
save_path = validate_save_path(
save_path=save_path, input_file=self.input_file, ext=".json"
)
indent = 4 if pretty else None
Expand Down
4 changes: 2 additions & 2 deletions echoregions/lines/lines_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from ..utils.io import check_file
from ..utils.io import check_file_extension_existence
from ..utils.time import parse_time


Expand All @@ -19,7 +19,7 @@ def parse_line_file(input_file: str):
DataFrame with parsed data from input EVL file.
"""
# Check for validity of input_file
check_file(input_file, "EVL")
check_file_extension_existence(input_file, "EVL")
# Read file and read all lines
fid = open(input_file, encoding="utf-8-sig")
file_lines = fid.readlines()
Expand Down
4 changes: 2 additions & 2 deletions echoregions/regions2d/regions2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandas import DataFrame, Series, Timestamp
from xarray import DataArray

from ..utils.io import validate_path
from ..utils.io import validate_save_path
from ..utils.time import parse_simrad_fname_time
from .regions2d_parser import parse_regions_file

Expand Down Expand Up @@ -57,7 +57,7 @@ def to_csv(self, save_path: bool = None) -> None:
path to save the CSV file to
"""
# Check if the save directory is safe
save_path = validate_path(
save_path = validate_save_path(
save_path=save_path, input_file=self.input_file, ext=".csv"
)
# Reorder columns and export to csv
Expand Down
4 changes: 2 additions & 2 deletions echoregions/regions2d/regions2d_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
from numpy import ndarray

from ..utils.io import check_file
from ..utils.io import check_file_extension_existence
from ..utils.time import parse_time


Expand All @@ -22,7 +22,7 @@ def parse_regions_file(input_file: str):
DataFrame with parsed data from input EVR file.
"""
# Check for validity of input_file.
check_file(input_file, "EVR")
check_file_extension_existence(input_file, "EVR")

# Read file.
fid = open(input_file, encoding="utf-8-sig")
Expand Down
8 changes: 7 additions & 1 deletion echoregions/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,10 @@
from .io import check_file, from_JSON
from .time import parse_simrad_fname_time, parse_time

__all__ = ["from_JSON", "check_file", "parse_simrad_fname_time", "parse_time"]
__all__ = [
"from_JSON",
"check_file_extension_existence",
"validate_save_path",
"parse_simrad_fname_time",
"parse_time",
]
22 changes: 7 additions & 15 deletions echoregions/utils/io.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import json
import os
import re
from pathlib import Path
from typing import Dict, List, Union
from typing import Dict


def from_JSON(j: str) -> Dict:
Expand All @@ -23,7 +24,7 @@ def from_JSON(j: str) -> Dict:
return data_dict


def validate_path(
def validate_save_path(
save_path: str = None, input_file: str = None, ext: str = ".json"
) -> str:
"""
Expand Down Expand Up @@ -85,18 +86,9 @@ def check_file(file: str, format: Union[List[str], str]) -> None:
File format.
"""
if file is not None:
if isinstance(format, List):
within = False
for str_value in format:
if file.upper().endswith(str_value):
within = True
if not within:
raise ValueError(f"Input file {file} is not a {format} file")
else:
if not file.upper().endswith(format):
raise ValueError(f"Input file {file} is not a {format} file")
if not re.search(rf".{format}$", file, flags=re.IGNORECASE):
raise ValueError(f"Input file {file} is not a {format} file")
if not os.path.isfile(file):
if not os.path.isdir(file):
raise ValueError(f"{file} does not exist as file or directory.")
raise ValueError(f"{file} does not exist as file.")
else:
raise TypeError("Input file must not be of type None")
raise TypeError("Input file must not be None")
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ pre-commit
pytest
xarray>=2023.2.0
pytest-cov
python-dotenv
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ shapely>=2.0.0
zarr
netcdf4
scipy
s3fs
Loading