new functions index_to_row and rowsize_to_rowvector to easily fin…

…d the rows of observations
Cloud-Drift · Jul 24, 2024 · 7875ab0 · 7875ab0
1 parent 13abb80
commit 7875ab0
Show file tree

Hide file tree

Showing 2 changed files with 127 additions and 0 deletions.
diff --git a/clouddrift/ragged.py b/clouddrift/ragged.py
@@ -854,6 +854,86 @@ def unpack(
     return [unpacked[i] for i in rows]
 
 
+def rowsize_to_rowvector(
+    rowsize: list[int] | np.ndarray | xr.DataArray,
+) -> list:
+    """Obtain a list of repeated row indices from a list of row sizes of a ragged array.
+
+    Parameters
+    ----------
+    rowsize : list or np.ndarray or xr.DataArray
+        A sequence of row sizes greater than zero.
+
+    Returns
+    -------
+    list
+        A list of repeated row indices.
+
+    Examples
+    --------
+    To obtain the repeated row indices within a ragged array of three consecutive rows of sizes 2, 4, and 3:
+    >>> rowsize_to_rowvector([2, 4, 3])
+    [0, 0, 1, 1, 1, 1, 2, 2, 2]
+    """
+    # test is there is any zero or negative rowsizes
+    if any(i <= 0 for i in rowsize):
+        raise ValueError("The row sizes must be greater than zero.")
+
+    if isinstance(rowsize, xr.DataArray):
+        rowsize = rowsize.values
+
+    rowvector = [[i] * rowsize[i] for i in range(len(rowsize))]
+    rowvector_flattened = [item for sublist in rowvector for item in sublist]
+
+    return rowvector_flattened
+
+
+def index_to_row(
+    index: int | list[int],
+    rowsize: list[int] | np.ndarray | xr.DataArray,
+) -> list:
+    """Obtain a list of row indices from a list of observation indices of a ragged array.
+
+    Parameters
+    ----------
+    index : int or list
+        A integer observation index or a list of observation indices of a ragged array.
+    rowsize : list or np.ndarray or xr.DataArray
+        A sequence of row sizes of a ragged array.
+
+    Returns
+    -------
+    list
+        A list of row indices.
+
+    Examples
+    --------
+    To obtain the row index of observation 5 within a ragged array of three consecutive
+    rows of sizes 2, 4, and 3:
+    >>> index_to_row(5, [2, 4, 3])
+    1
+
+    To obtain the row indices of observations 0, 2, and 4 within a ragged array of three consecutive
+    rows of sizes 2, 4, and 3:
+    >>> index_to_row([0, 2, 4], [2, 4, 3])
+    [0, 1, 2]
+
+    """
+    # if index is an integer, convert it to a list
+    if isinstance(index, int):
+        index_list = [index]
+    else:
+        index_list = index
+
+    # if index is not a list of integers, raise an error
+    if not all(isinstance(i, int) for i in index_list):
+        raise ValueError("The index must be an integer or a list of integers.")
+
+    rowvector_flattened = rowsize_to_rowvector(rowsize)
+
+    return [rowvector_flattened[i] for i in index_list]
+
+
 def _mask_var(
     var: xr.DataArray | list[xr.DataArray],
     criterion: tuple | list | np.ndarray | xr.DataArray | bool | float | int | Callable,

diff --git a/tests/ragged_tests.py b/tests/ragged_tests.py
@@ -17,6 +17,8 @@
     segment,
     subset,
     unpack,
+    rowsize_to_rowvector,
+    index_to_row,
 )
 from clouddrift.raggedarray import RaggedArray
 
@@ -807,3 +809,48 @@ def test_unpack_rows(self):
                 for a, b in zip(unpack(x, rowsize, np.int64(0)), unpack(x, rowsize)[:1])
             )
         )
+
+
+class rowsize_to_rowvector_tests(unittest.TestCase):
+    def test_rowsize_to_rowvector(self):
+        rowsize = [2, 3, 4]
+        rowvector = rowsize_to_rowvector(rowsize)
+        self.assertTrue(np.all(rowvector == np.array([0, 0, 1, 1, 1, 2, 2, 2, 2])))
+
+    def test_rowsize_to_rowvector_empty(self):
+        rowsize = []
+        rowvector = rowsize_to_rowvector(rowsize)
+        self.assertTrue(rowvector == [])
+
+    def test_rowsize_to_rowvector_zero(self):
+        rowsize = [2, 3, 0, 4]
+        with self.assertRaises(ValueError):
+            rowvector = rowsize_to_rowvector(rowsize)
+
+    def test_rowsize_to_rowvector_negative(self):
+        rowsize = [2, 3, -1, 4]
+        with self.assertRaises(ValueError):
+            rowvector = rowsize_to_rowvector(rowsize)
+
+    def test_rowsize_to_rowvector_array_like(self):
+        rowsize = np.array([2, 3, 4])
+        rowvector = rowsize_to_rowvector(rowsize)
+        self.assertTrue(np.all(rowvector == np.array([0, 0, 1, 1, 1, 2, 2, 2, 2])))
+
+        rowsize = xr.DataArray(data=[2, 3, 4])
+        rowvector = rowsize_to_rowvector(rowsize)
+        self.assertTrue(np.all(rowvector == np.array([0, 0, 1, 1, 1, 2, 2, 2, 2])))
+
+
+class index_to_row_tests(unittest.TestCase):
+    def test_index_to_row(self):
+        rowsize = [2, 5, 3]
+        index = list(range(10))
+        row = index_to_row(index, rowsize)
+        self.assertTrue(np.all(row == [0, 0, 1, 1, 1, 1, 1, 2, 2, 2]))
+
+    def test_index_to_row_array_like(self):
+        rowsize = xr.DataArray(data=[2, 5, 3])
+        index = list(range(10))
+        row = index_to_row(index, rowsize)
+        self.assertTrue(np.all(row == np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 2])))