Skip to content

Commit

Permalink
new functions index_to_row and rowsize_to_rowvector to easily fin…
Browse files Browse the repository at this point in the history
…d the rows of observations
  • Loading branch information
selipot committed Jul 24, 2024
1 parent 13abb80 commit 7875ab0
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 0 deletions.
80 changes: 80 additions & 0 deletions clouddrift/ragged.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,86 @@ def unpack(
return [unpacked[i] for i in rows]


def rowsize_to_rowvector(
rowsize: list[int] | np.ndarray | xr.DataArray,
) -> list:
"""Obtain a list of repeated row indices from a list of row sizes of a ragged array.
Parameters
----------
rowsize : list or np.ndarray or xr.DataArray
A sequence of row sizes greater than zero.
Returns
-------
list
A list of repeated row indices.
Examples
--------
To obtain the repeated row indices within a ragged array of three consecutive rows of sizes 2, 4, and 3:
>>> rowsize_to_rowvector([2, 4, 3])
[0, 0, 1, 1, 1, 1, 2, 2, 2]
"""
# test is there is any zero or negative rowsizes
if any(i <= 0 for i in rowsize):
raise ValueError("The row sizes must be greater than zero.")

if isinstance(rowsize, xr.DataArray):
rowsize = rowsize.values

rowvector = [[i] * rowsize[i] for i in range(len(rowsize))]
rowvector_flattened = [item for sublist in rowvector for item in sublist]

return rowvector_flattened


def index_to_row(
index: int | list[int],
rowsize: list[int] | np.ndarray | xr.DataArray,
) -> list:
"""Obtain a list of row indices from a list of observation indices of a ragged array.
Parameters
----------
index : int or list
A integer observation index or a list of observation indices of a ragged array.
rowsize : list or np.ndarray or xr.DataArray
A sequence of row sizes of a ragged array.
Returns
-------
list
A list of row indices.
Examples
--------
To obtain the row index of observation 5 within a ragged array of three consecutive
rows of sizes 2, 4, and 3:
>>> index_to_row(5, [2, 4, 3])
1
To obtain the row indices of observations 0, 2, and 4 within a ragged array of three consecutive
rows of sizes 2, 4, and 3:
>>> index_to_row([0, 2, 4], [2, 4, 3])
[0, 1, 2]
"""
# if index is an integer, convert it to a list
if isinstance(index, int):
index_list = [index]
else:
index_list = index

# if index is not a list of integers, raise an error
if not all(isinstance(i, int) for i in index_list):
raise ValueError("The index must be an integer or a list of integers.")

rowvector_flattened = rowsize_to_rowvector(rowsize)

return [rowvector_flattened[i] for i in index_list]


def _mask_var(
var: xr.DataArray | list[xr.DataArray],
criterion: tuple | list | np.ndarray | xr.DataArray | bool | float | int | Callable,
Expand Down
47 changes: 47 additions & 0 deletions tests/ragged_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
segment,
subset,
unpack,
rowsize_to_rowvector,
index_to_row,
)
from clouddrift.raggedarray import RaggedArray

Expand Down Expand Up @@ -807,3 +809,48 @@ def test_unpack_rows(self):
for a, b in zip(unpack(x, rowsize, np.int64(0)), unpack(x, rowsize)[:1])
)
)


class rowsize_to_rowvector_tests(unittest.TestCase):
def test_rowsize_to_rowvector(self):
rowsize = [2, 3, 4]
rowvector = rowsize_to_rowvector(rowsize)
self.assertTrue(np.all(rowvector == np.array([0, 0, 1, 1, 1, 2, 2, 2, 2])))

def test_rowsize_to_rowvector_empty(self):
rowsize = []
rowvector = rowsize_to_rowvector(rowsize)
self.assertTrue(rowvector == [])

def test_rowsize_to_rowvector_zero(self):
rowsize = [2, 3, 0, 4]
with self.assertRaises(ValueError):
rowvector = rowsize_to_rowvector(rowsize)

def test_rowsize_to_rowvector_negative(self):
rowsize = [2, 3, -1, 4]
with self.assertRaises(ValueError):
rowvector = rowsize_to_rowvector(rowsize)

def test_rowsize_to_rowvector_array_like(self):
rowsize = np.array([2, 3, 4])
rowvector = rowsize_to_rowvector(rowsize)
self.assertTrue(np.all(rowvector == np.array([0, 0, 1, 1, 1, 2, 2, 2, 2])))

rowsize = xr.DataArray(data=[2, 3, 4])
rowvector = rowsize_to_rowvector(rowsize)
self.assertTrue(np.all(rowvector == np.array([0, 0, 1, 1, 1, 2, 2, 2, 2])))


class index_to_row_tests(unittest.TestCase):
def test_index_to_row(self):
rowsize = [2, 5, 3]
index = list(range(10))
row = index_to_row(index, rowsize)
self.assertTrue(np.all(row == [0, 0, 1, 1, 1, 1, 1, 2, 2, 2]))

def test_index_to_row_array_like(self):
rowsize = xr.DataArray(data=[2, 5, 3])
index = list(range(10))
row = index_to_row(index, rowsize)
self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))

0 comments on commit 7875ab0

Please sign in to comment.