Skip to content

Commit

Permalink
feat: Added Table.group_by to group a table by a given key (#343)
Browse files Browse the repository at this point in the history
Closes #160.

### Summary of Changes

Added a `group_by` method for the table class that requires a lambda
function which computes the grouping keys. The method returns a
dictionary with the computed keys as keys and the grouped rows as new
Tables as values.

---------

Co-authored-by: Alexander Gréus <alexgreus51@gmail.com>
Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com>
  • Loading branch information
3 people committed Jun 2, 2023
1 parent 9a332b2 commit afb98be
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 1 deletion.
28 changes: 27 additions & 1 deletion src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import io
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, TypeVar

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -717,6 +717,32 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table:
result_table = self.from_rows(rows)
return result_table

_T = TypeVar("_T")

def group_by(self, key_selector: Callable[[Row], _T]) -> dict[_T, Table]:
"""
Return a dictionary with the output tables as values and the keys from the key_selector.
This table is not modified.
Parameters
----------
key_selector : Callable[[Row], _T]
A Callable that is applied to all rows and returns the key of the group.
Returns
-------
dictionary : dict
A dictionary containing the new tables as values and the selected keys as keys.
"""
dictionary: dict[Table._T, Table] = {}
for row in self.to_rows():
if key_selector(row) in dictionary:
dictionary[key_selector(row)] = dictionary[key_selector(row)].add_row(row)
else:
dictionary[key_selector(row)] = Table.from_rows([row])
return dictionary

def keep_only_columns(self, column_names: list[str]) -> Table:
"""
Return a table with only the given column(s).
Expand Down
35 changes: 35 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_group_by.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from collections.abc import Callable

import pytest
from safeds.data.tabular.containers import Table


@pytest.mark.parametrize(
("table", "selector", "expected"),
[
(
Table({"col1": [1, 1, 2, 2, 3], "col2": ["a", "b", "c", "d", "e"]}),
lambda row: row["col1"],
{
1: Table({"col1": [1, 1], "col2": ["a", "b"]}),
2: Table({"col1": [2, 2], "col2": ["c", "d"]}),
3: Table({"col1": [3], "col2": ["e"]}),
},
),
(
Table({"col1": [1, 1, 2, 2, 3], "col2": ["a", "b", "c", "d", 2]}),
lambda row: row["col1"],
{
1: Table({"col1": [1, 1], "col2": ["a", "b"]}),
2: Table({"col1": [2, 2], "col2": ["c", "d"]}),
3: Table({"col1": [3], "col2": [2]}),
},
),
(Table(), lambda row: row["col1"], {}),
(Table({"col1": [], "col2": []}), lambda row: row["col1"], {}),
],
ids=["select by row1", "different types in column", "empty table", "table with no rows"],
)
def test_group_by(table: Table, selector: Callable, expected: dict) -> None:
out = table.group_by(selector)
assert out == expected

0 comments on commit afb98be

Please sign in to comment.