Skip to content

Commit

Permalink
Add Further Selection Functionality for Regions2D (#97)
Browse files Browse the repository at this point in the history
* add further selection functionality

* add selection functionality for depth range
* add selection functionality for time range
* create test to check for both time and depth functionality

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove option for dataframe and series

* updated select region by removing option for region_id to be series or dataframe
* update test to test for "bad" list values

* add timestamp

* add timestamp import

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* convert numpy num to python num

* convert numpy numeric values to basic python float values

* formatting changes

* make description changes for preciseness
* change multiple isinstance to single isinstance

* initiate region first; remove untouched

* make copy start with true
* initiate region with copy value at beginning
* remove untouched
* remove checking if region is none

* rework with itterrows .apply and lambda

* for both selecting rows with "valid" time and depth values, exchange itterrows implementation with a .apply and lambda implementation

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ctuguinay and pre-commit-ci[bot] authored Jul 3, 2023
1 parent 1c1510e commit 3a3251b
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 29 deletions.
145 changes: 116 additions & 29 deletions echoregions/regions2d/regions2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import regionmask
import xarray as xr
from pandas import DataFrame, Series
from pandas import DataFrame, Series, Timestamp
from xarray import DataArray

from ..utils.io import validate_path
Expand Down Expand Up @@ -73,47 +73,134 @@ def to_json(self, save_path: str = None) -> None:
"""

def select_region(
self, region: Union[float, str, list, Series, DataFrame] = None, copy=False
self,
region_id: Union[float, int, str, List[Union[float, int, str]]] = None,
time_range: List[Timestamp] = None,
depth_range: List[Union[float, int]] = None,
copy=True,
) -> DataFrame:
"""Ensure that region is a DataFrame.
"""Selects a subset of this Region2D object's dataframe.
Parameters
----------
region : float, str, list, Series, DataFrame, ``None``
A region id provided as a number, string, list of these,
or a DataFrame/Series containing the region_id column name.
region_id : float, int, str, list, ``None``
A region id provided as a number, a string, or list of these.
time_range: List of 2 Pandas Timestamps.
Datetime range for expected output of subselected DataFrame. 1st
index value must be later than 0th index value.
depth_range: List of 2 floats.
Depth range for expected output of subselected DataFrame. 1st
index value must be larger than 0th index value.
copy : bool
Return a copy of the `data` DataFrame
Returns
-------
DataFrame
A DataFrame subselected from Regions2D.data.
There is a row for each region id provided by the region parameter.
There is a row for each region id provided by the ``region_id`` parameter,
and each row has time and depth within or on the boundaries passed
in by the ``time_range`` and ``depth_range`` values.
"""
if region is not None:
if isinstance(region, DataFrame):
region = list(region.region_id)
elif isinstance(region, Series):
region = [region.region_id]
elif (
isinstance(region, float)
or isinstance(region, int)
or isinstance(region, str)
):
region = [region]
elif not isinstance(region, list):
# Make copy of original dataframe; else, use original dataframe in selection.
if copy:
region = self.data.copy()
else:
region = self.data
if region_id is not None:
if isinstance(region_id, (float, int, str)):
region_id = [region_id]
elif not isinstance(region_id, list):
raise TypeError(
f"Invalid Region Type: {type(region)}. Must be \
of type float, str, list, Series, DataFrame, ``None``"
f"Invalid region_id type: {type(region_id)}. Must be \
of type float, int, str, list, ``None``."
)
# Select row by column id
region = self.data[self.data["region_id"].isin(region)]
else:
region = self.data
if copy:
return region.copy()
else:
return region
for value in region_id:
if not isinstance(value, (float, int, str)):
raise TypeError(
f"Invalid element in list region_id. Is of \
type: {type(value)}Must be \
of type float, int, str."
)
region = self.data[self.data["region_id"].isin(region_id)]
if time_range is not None:
if isinstance(time_range, List):
if len(time_range) == 2:
if isinstance(time_range[0], Timestamp) and isinstance(
time_range[1], Timestamp
):
if time_range[0] < time_range[1]:
# Select rows with time values that are all within time range
region = region[
region["time"].apply(
lambda time_array: all(
time_range[0] <= Timestamp(x)
or time_range[1] >= Timestamp(x)
for x in time_array
)
)
]
else:
raise ValueError(
f"1st index value must be later than 0th index \
value. Currently 0th index value is {time_range[0]} \
and 1st index value is {time_range[1]}"
)
else:
raise TypeError(
f"Invalid time_range value types: \
{type(time_range[0])} and {type(time_range[1])}. Must \
be both of type Timestamp."
)
else:
raise ValueError(
f"Invalid time_range size: {len(time_range)}. \
Must be of size 2."
)
else:
raise TypeError(
f"Invalid time_range type: {type(time_range)}. Must be \
of type List."
)
if depth_range is not None:
if isinstance(depth_range, List):
if len(depth_range) == 2:
if isinstance(depth_range[0], (float, int)) and isinstance(
depth_range[1], (float, int)
):
if depth_range[0] < depth_range[1]:
# Select rows with depth values that are all within depth range
region = region[
region["time"].apply(
lambda depth_array: all(
depth_range[0] <= float(x)
or depth_range[1] >= float(x)
for x in depth_array
)
)
]
else:
raise ValueError(
f"1st index value must be later than 0th index \
value. Currently 0th index value is {depth_range[0]} \
and 1st index value is {depth_range[1]}"
)
else:
raise TypeError(
f"Invalid depth_range value types: \
{type(depth_range[0])} and {type(depth_range[1])}. Must \
be both of type either float or int."
)
else:
raise ValueError(
f"Invalid depth_range size: {len(depth_range)}. \
Must be of size 2."
)
else:
raise TypeError(
f"Invalid depth_range type: {type(depth_range)}. Must be \
of type List."
)
return region

def close_region(
self, region: Union[float, str, List, Series, DataFrame] = None
Expand Down
30 changes: 30 additions & 0 deletions echoregions/tests/test_r2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from datetime import timedelta

import numpy as np
import pandas as pd
import pytest
import xarray as xr
from xarray import DataArray, Dataset
Expand Down Expand Up @@ -150,6 +151,33 @@ def test_select_sonar_file():
assert raw == ["Summer2017-D20170625-T195927.nc"]


def test_select_region():
"""
tests select region functionality
"""
evr_path = data_dir + "x1.evr"
r2d = er.read_evr(evr_path)
region_id = 2
time_range = [
pd.to_datetime("2017-06-24T16:31:36.338500000"),
pd.to_datetime("2017-06-26T16:31:40.211500000"),
]
depth_range = [-10000.0, 10000.0]
df_1 = r2d.select_region(region_id=region_id)
df_2 = r2d.select_region(time_range=time_range)
df_3 = r2d.select_region(depth_range=depth_range)
for df_region_id in df_1["region_id"]:
assert df_region_id == region_id
for time_array in df_2["time"]:
for time in time_array:
assert time >= time_range[0]
assert time <= time_range[1]
for depth_array in df_3["depth"]:
for depth in depth_array:
assert depth >= depth_range[0]
assert depth <= depth_range[1]


@pytest.mark.filterwarnings("ignore:No gridpoint belongs to any region")
def test_mask_no_overlap():
"""
Expand Down Expand Up @@ -186,6 +214,8 @@ def test_mask_correct_labels():
r2d = er.read_evr(evr_path)
region_ids = r2d.data.region_id.values # Output is that of IntegerArray
region_ids = list(region_ids) # Convert to List
# Convert numpy numeric values to basic Python float values
region_ids = [region_id.item() for region_id in region_ids]
da_Sv = xr.open_dataset(os.path.join(data_dir, "x1_test.nc")).Sv
M = r2d.mask(da_Sv, region_ids, mask_labels=region_ids)
# it matches only a 11th region becasue x1_test.nc is a chunk around that region only
Expand Down

0 comments on commit 3a3251b

Please sign in to comment.