Skip to content

Commit

Permalink
feat: swap name and data parameters of Column (#105)
Browse files Browse the repository at this point in the history
### Summary of Changes

The order of parameters of the `__init__` method of `Column` is now

1. `name` (was 2.)
2. `data` (was 1.)
3. `type_`.

Having the `name` first is more readable since the data can be quite
long and highlights that the `name` acts as a key and the `data` as a
value.

---------

Co-authored-by: lars-reimann <lars-reimann@users.noreply.github.com>
  • Loading branch information
lars-reimann and lars-reimann committed Mar 27, 2023
1 parent 20aaf5e commit c2f8da5
Show file tree
Hide file tree
Showing 27 changed files with 84 additions and 89 deletions.
13 changes: 5 additions & 8 deletions src/safeds/data/tabular/containers/_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,19 @@ class Column:
Parameters
----------
data : Iterable
The data.
name : str
The name of the column.
data : Iterable
The data.
type_ : Optional[ColumnType]
The type of the column. If not specified, the type will be inferred from the data.
"""

def __init__(
self,
data: Iterable,
name: str,
type_: Optional[ColumnType] = None,
self, name: str, data: Iterable, type_: Optional[ColumnType] = None
) -> None:
self._data: pd.Series = data if isinstance(data, pd.Series) else pd.Series(data)
self._name: str = name
self._data: pd.Series = data if isinstance(data, pd.Series) else pd.Series(data)
self._type: ColumnType = (
type_
if type_ is not None
Expand Down Expand Up @@ -158,7 +155,7 @@ def rename(self, new_name: str) -> Column:
column : Column
A new column with the new name.
"""
return Column(self._data, new_name, self._type)
return Column(new_name, self._data, self._type)

def all(self, predicate: Callable[[Any], bool]) -> bool:
"""
Expand Down
4 changes: 2 additions & 2 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,10 @@ def get_column(self, column_name: str) -> Column:
"""
if self._schema.has_column(column_name):
output_column = Column(
column_name,
self._data.iloc[
:, [self._schema._get_column_index_by_name(column_name)]
].squeeze(),
column_name,
self._schema.get_type_of_column(column_name),
)
return output_column
Expand Down Expand Up @@ -952,7 +952,7 @@ def transform_column(
"""
if self.has_column(name):
items: list = [transformer(item) for item in self.to_rows()]
result: Column = Column(pd.Series(items), name)
result: Column = Column(name, pd.Series(items))
return self.replace_column(name, result)
raise UnknownColumnNameError([name])

Expand Down
8 changes: 4 additions & 4 deletions tests/safeds/data/tabular/containers/_column/test_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@


def test_from_columns() -> None:
column1 = Column(pd.Series([1, 4]), "A")
column2 = Column(pd.Series([2, 5]), "B")
column1 = Column("A", pd.Series([1, 4]))
column2 = Column("B", pd.Series([2, 5]))

assert column1._type == column2._type


def test_from_columns_negative() -> None:
column1 = Column(pd.Series([1, 4]), "A")
column2 = Column(pd.Series(["2", "5"]), "B")
column1 = Column("A", pd.Series([1, 4]))
column2 = Column("B", pd.Series(["2", "5"]))

assert column1._type != column2._type
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,30 @@


def test_column_property_all_positive() -> None:
column = Column(pd.Series([1, 1, 1]), "col1")
column = Column("col1", pd.Series([1, 1, 1]))
assert column.all(lambda value: value == 1)


def test_column_property_all_negative() -> None:
column = Column(pd.Series([1, 2, 1]), "col1")
column = Column("col1", pd.Series([1, 2, 1]))
assert not column.all(lambda value: value == 1)


def test_column_property_any_positive() -> None:
column = Column(pd.Series([1, 2, 1]), "col1")
column = Column("col1", pd.Series([1, 2, 1]))
assert column.any(lambda value: value == 1)


def test_column_property_any_negative() -> None:
column = Column(pd.Series([1, 2, 1]), "col1")
column = Column("col1", pd.Series([1, 2, 1]))
assert not column.any(lambda value: value == 3)


def test_column_property_none_positive() -> None:
column = Column(pd.Series([1, 2, 1]), "col1")
column = Column("col1", pd.Series([1, 2, 1]))
assert column.none(lambda value: value == 3)


def test_column_property_none_negative() -> None:
column = Column(pd.Series([1, 2, 1]), "col1")
column = Column("col1", pd.Series([1, 2, 1]))
assert not column.none(lambda value: value == 1)
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@


def test_correlation_with() -> None:
column1 = Column(pd.Series([1, 2, 3, 4]), "A")
column2 = Column(pd.Series([2, 3, 4, 5]), "B")
column1 = Column("A", pd.Series([1, 2, 3, 4]))
column2 = Column("B", pd.Series([2, 3, 4, 5]))
actual_corr = column1.correlation_with(column2)
expected_corr = column1._data.corr(column2._data)
assert actual_corr == expected_corr


def test_correlation_with_NonNumericColumnError() -> None:
column1 = Column(pd.Series([1, 2, 3, 4]), "A")
column2 = Column(pd.Series(["a", "b", "c", "d"]), "B")
column1 = Column("A", pd.Series([1, 2, 3, 4]))
column2 = Column("B", pd.Series(["a", "b", "c", "d"]))
with pytest.raises(NonNumericColumnError):
column1.correlation_with(column2)


def test_correlation_with_ColumnsLengthMismachtError() -> None:
column1 = Column(pd.Series([1, 2, 3, 4]), "A")
column2 = Column(pd.Series([2]), "B")
column1 = Column("A", pd.Series([1, 2, 3, 4]))
column2 = Column("B", pd.Series([2]))
with pytest.raises(ColumnLengthMismatchError):
column1.correlation_with(column2)
2 changes: 1 addition & 1 deletion tests/safeds/data/tabular/containers/_column/test_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@


def test_count_valid() -> None:
column = Column([1, 2, 3, 4, 5], "col1")
column = Column("col1", [1, 2, 3, 4, 5])
assert column.count() == 5
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def test_get_unique_values(
values: list[typing.Any], unique_values: list[typing.Any]
) -> None:
column: Column = Column(values, "")
column: Column = Column("", values)
extracted_unique_values: list[typing.Any] = column.get_unique_values()

assert extracted_unique_values == unique_values
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@


def test_get_value_valid() -> None:
column = Column(pd.Series([0, "1"]), "testColumn")
column = Column("testColumn", pd.Series([0, "1"]))
assert column.get_value(0) == 0
assert column.get_value(1) == "1"


def test_get_value_invalid() -> None:
column = Column(pd.Series([0, "1"]), "testColumn")
column = Column("testColumn", pd.Series([0, "1"]))
with pytest.raises(IndexOutOfBoundsError):
column.get_value(-1)

Expand Down
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_column/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@


def test_getitem_valid() -> None:
column = Column(pd.Series([0, "1"]), "testColumn")
column = Column("testColumn", pd.Series([0, "1"]))
assert column[0] == 0
assert column[1] == "1"


# noinspection PyStatementEffect
def test_getitem_invalid() -> None:
column = Column(pd.Series([0, "1"]), "testColumn")
column = Column("testColumn", pd.Series([0, "1"]))
with pytest.raises(IndexOutOfBoundsError):
column[-1]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
)
def test_has_missing_values(values: list, expected: bool) -> None:
if len(values) == 0:
column = Column(pd.Series(values, dtype=np.dtype("float64")), "A")
column = Column("A", pd.Series(values, dtype=np.dtype("float64")))
else:
column = Column(pd.Series(values), "A")
column = Column("A", pd.Series(values))
assert column.has_missing_values() == expected
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_column/test_idness.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
[(["A", "B"], 1), (["A", "A", "A", "B"], 0.5)],
)
def test_idness_valid(values: list[str], result: float) -> None:
column: Column = Column(pd.Series(values), "test_idness_valid")
column: Column = Column("test_idness_valid", pd.Series(values))
idness = column.idness()
assert idness == result


def test_idness_invalid() -> None:
column = Column(pd.Series([], dtype=int), "test_idness_invalid")
column = Column("test_idness_invalid", pd.Series([], dtype=int))
with pytest.raises(ColumnSizeError):
column.idness()
2 changes: 1 addition & 1 deletion tests/safeds/data/tabular/containers/_column/test_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@


def test_iter() -> None:
column = Column([0, "1"], "testColumn")
column = Column("testColumn", [0, "1"])
assert list(column) == [0, "1"]
2 changes: 1 addition & 1 deletion tests/safeds/data/tabular/containers/_column/test_len.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@


def test_count_valid() -> None:
column = Column([1, 2, 3, 4, 5], "col1")
column = Column("col1", [1, 2, 3, 4, 5])
assert len(column) == 5
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
[([1, 2, 3], 0), ([1, 2, 3, None], 1 / 4), ([None, None, None], 1)],
)
def test_missing_value_ratio(values: list, expected: float) -> None:
column = Column(pd.Series(values), "A")
column = Column("A", pd.Series(values))
result = column.missing_value_ratio()
assert result == expected


def test_missing_value_ratio_empty() -> None:
column = Column(pd.Series([], dtype=np.dtype("float64")), "A")
column = Column("A", pd.Series([], dtype=np.dtype("float64")))
with pytest.raises(ColumnSizeError):
column.missing_value_ratio()
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_column/test_rename.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@


def test_should_return_new_column_with_new_name() -> None:
column = Column([1, 2, 3], "A")
column = Column("A", [1, 2, 3])
new_column = column.rename("B")
assert new_column.name == "B"


def test_should_not_change_name_of_original_column() -> None:
column = Column([1, 2, 3], "A")
column = Column("A", [1, 2, 3])
column.rename("B")
assert column.name == "A"
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
],
)
def test_stability(values: list[typing.Any], expected: float) -> None:
column = Column(pd.Series(values), "A")
column = Column("A", pd.Series(values))
assert column.stability() == expected


def test_stability_error() -> None:
column = Column(
pd.Series([], dtype=np.dtype("float64")), "A"
"A", pd.Series([], dtype=np.dtype("float64"))
) # Fix warning against unknown type
with pytest.raises(ColumnSizeError):
column.stability()
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_column/test_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@


def test_sum_valid() -> None:
c1 = Column(pd.Series([1, 2]), "test")
c1 = Column("test", pd.Series([1, 2]))
assert c1.sum() == 3


def test_sum_invalid() -> None:
c1 = Column(pd.Series([1, "a"]), "test")
c1 = Column("test", pd.Series([1, "a"]))
with pytest.raises(NonNumericColumnError):
c1.sum()
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
@pytest.mark.parametrize(
"column, col_type",
[
(Column(["a", "b", "c"], "col3"), StringColumnType()),
(Column([0, -1, -2], "col3"), IntColumnType()),
(Column("col3", ["a", "b", "c"]), StringColumnType()),
(Column("col3", [0, -1, -2]), IntColumnType()),
],
)
def test_add_column_valid(column: Column, col_type: ColumnType) -> None:
Expand All @@ -26,10 +26,10 @@ def test_add_column_valid(column: Column, col_type: ColumnType) -> None:
def test_add_column_invalid_duplicate_column_name_error() -> None:
with raises(DuplicateColumnNameError):
table1 = Table(pd.DataFrame(data={"col1": [1, 2, 1], "col2": [1, 2, 4]}))
table1 = table1.add_column(Column(["a", "b", "c"], "col1"))
table1 = table1.add_column(Column("col1", ["a", "b", "c"]))


def test_add_column_invalid_column_size_error() -> None:
with raises(ColumnSizeError):
table1 = Table(pd.DataFrame(data={"col1": [1, 2, 1], "col2": [1, 2, 4]}))
table1 = table1.add_column(Column(["a", "b", "c", "d"], "col3"))
table1 = table1.add_column(Column("col3", ["a", "b", "c", "d"]))
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

def test_add_columns_valid() -> None:
table1 = Table(pd.DataFrame(data={"col1": [1, 2, 1], "col2": [1, 2, 4]}))
col3 = Column(pd.Series(data=[0, -1, -2]), "col3")
col4 = Column(pd.Series(data=["a", "b", "c"]), "col4")
col3 = Column("col3", pd.Series(data=[0, -1, -2]))
col4 = Column("col4", pd.Series(data=["a", "b", "c"]))
table1 = table1.add_columns([col3, col4])
assert table1.count_columns() == 4
assert table1.get_column("col3") == col3
Expand All @@ -19,8 +19,8 @@ def test_add_columns_valid() -> None:

def test_add_columns_table_valid() -> None:
table1 = Table(pd.DataFrame(data={"col1": [1, 2, 1], "col2": [1, 2, 4]}))
col3 = Column(pd.Series(data=[0, -1, -2]), "col3")
col4 = Column(pd.Series(data=["a", "b", "c"]), "col4")
col3 = Column("col3", pd.Series(data=[0, -1, -2]))
col4 = Column("col4", pd.Series(data=["a", "b", "c"]))
table2 = Table.from_columns([col3, col4])
table1 = table1.add_columns(table2)
assert table1.count_columns() == 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
def test_from_columns() -> None:
table_expected = Table.from_csv_file(resolve_resource_path("test_column_table.csv"))
columns_table: list[Column] = [
Column(pd.Series([1, 4]), "A"),
Column(pd.Series([2, 5]), "B"),
Column("A", pd.Series([1, 4])),
Column("B", pd.Series([2, 5])),
]
table_restored: Table = Table.from_columns(columns_table)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_replace_valid(column_name: str, path: str) -> None:
)
expected: Table = Table.from_csv_file(resolve_resource_path(path))

column = Column(pd.Series(["d", "e", "f"]), column_name)
column = Column(column_name, pd.Series(["d", "e", "f"]))

result = input_table.replace_column("C", column)

Expand All @@ -46,7 +46,7 @@ def test_replace_invalid(
input_table: Table = Table.from_csv_file(
resolve_resource_path("test_table_replace_column_input.csv")
)
column = Column(pd.Series(column_values), column_name)
column = Column(column_name, pd.Series(column_values))

with pytest.raises(error):
input_table.replace_column(old_column_name, column)
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ def test_sort_columns_valid(
query: Callable[[Column, Column], int], col1: int, col2: int, col3: int, col4: int
) -> None:
columns = [
Column(pd.Series(data=["A", "B", "C", "A", "D"]), "col1"),
Column(pd.Series(data=["Test1", "Test1", "Test3", "Test1", "Test4"]), "col2"),
Column(pd.Series(data=[1, 2, 3, 4, 5]), "col3"),
Column(pd.Series(data=[2, 3, 1, 4, 6]), "col4"),
Column("col1", pd.Series(data=["A", "B", "C", "A", "D"])),
Column("col2", pd.Series(data=["Test1", "Test1", "Test3", "Test1", "Test4"])),
Column("col3", pd.Series(data=[1, 2, 3, 4, 5])),
Column("col4", pd.Series(data=[2, 3, 1, 4, 6])),
]
table1 = Table(
pd.DataFrame(
Expand Down

0 comments on commit c2f8da5

Please sign in to comment.