Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 154 additions & 59 deletions src/safeds/data/tabular/containers/_row.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,26 @@
from __future__ import annotations

from hashlib import md5
from typing import TYPE_CHECKING, Any

import pandas as pd
from IPython.core.display_functions import DisplayHandle, display
from pandas.core.util.hashing import hash_pandas_object
import polars as pl

from safeds.data.tabular.exceptions import UnknownColumnNameError
from safeds.data.tabular.typing import ColumnType, Schema

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator
from collections.abc import Iterator


class Row:
"""
A row is a collection of values, where each value is associated with a column name.

Parameters
----------
data : Iterable
The data.
schema : Schema
The schema of the row.
To create a row manually, use the static method [from_dict][safeds.data.tabular.containers._row.Row.from_dict].

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
"""

# ------------------------------------------------------------------------------------------------------------------
Expand All @@ -44,60 +41,111 @@ def from_dict(data: dict[str, Any]) -> Row:
-------
row : Row
The generated row.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
"""
row_frame = pd.DataFrame([data.values()], columns=list(data.keys()))
# noinspection PyProtectedMember
return Row(data.values(), Schema._from_pandas_dataframe(row_frame))
return Row(pl.DataFrame(data))

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, data: Iterable, schema: Schema | None = None):
self._data: pd.Series = data if isinstance(data, pd.Series) else pd.Series(data)
self._data = self._data.reset_index(drop=True)
def __init__(self, data: pl.DataFrame, schema: Schema | None = None):
"""
Initialize a row from a `polars.DataFrame`.

**Do not use this method directly.** It is not part of the public interface and may change in the future
without a major version bump. Use the static method
[from_dict][safeds.data.tabular.containers._row.Row.from_dict] instead.

Parameters
----------
data : polars.DataFrame
The data.
schema : Schema | None
The schema. If None, the schema is inferred from the data.
"""
self._data: pl.DataFrame = data

self._schema: Schema
if schema is not None:
self._schema = schema
else:
column_names = [f"column_{i}" for i in range(len(self._data))]
dataframe = self._data.to_frame().T
dataframe.columns = column_names
# noinspection PyProtectedMember
self._schema = Schema._from_pandas_dataframe(dataframe)
self._schema = Schema._from_polars_dataframe(self._data)

def __eq__(self, other: Any) -> bool:
if not isinstance(other, Row):
return NotImplemented
if self is other:
return True
return self._schema == other._schema and self._data.equals(other._data)
return self._schema == other._schema and self._data.frame_equal(other._data)

def __getitem__(self, column_name: str) -> Any:
return self.get_value(column_name)
"""
Return the value of a specified column.

def __hash__(self) -> int:
data_hash_string = md5(hash_pandas_object(self._data, index=True).values).hexdigest()
column_names_frozenset = frozenset(self.get_column_names())
Parameters
----------
column_name : str
The column name.

Returns
-------
value : Any
The value of the column.

return hash((data_hash_string, column_names_frozenset))
Raises
------
UnknownColumnNameError
If the row does not contain the specified column.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
>>> row["a"]
1
"""
return self.get_value(column_name)

def __iter__(self) -> Iterator[Any]:
return iter(self.get_column_names())

def __len__(self) -> int:
return len(self._data)
"""
Return the number of columns in this row.

Returns
-------
count : int
The number of columns.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
>>> len(row)
2
"""
return self._data.shape[1]

def __repr__(self) -> str:
tmp = self._data.to_frame().T
tmp.columns = self.get_column_names()
return tmp.__repr__()
return f"Row({str(self)})"

def __str__(self) -> str:
tmp = self._data.to_frame().T
tmp.columns = self.get_column_names()
return tmp.__str__()
match len(self):
case 0:
return "{}"
case 1:
return str(self.to_dict())
case _:
lines = (f" {name!r}: {value!r}" for name, value in self.to_dict().items())
joined = ",\n".join(lines)
return f"{{\n{joined}\n}}"

# ------------------------------------------------------------------------------------------------------------------
# Properties
Expand All @@ -112,6 +160,12 @@ def schema(self) -> Schema:
-------
schema : Schema
The schema.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
>>> schema = row.schema
"""
return self._schema

Expand All @@ -130,50 +184,73 @@ def get_value(self, column_name: str) -> Any:

Returns
-------
value :
value : Any
The value of the column.

Raises
------
UnknownColumnNameError
If the row does not contain the specified column.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
>>> row.get_value("a")
1
"""
if not self._schema.has_column(column_name):
if not self.has_column(column_name):
raise UnknownColumnNameError([column_name])
# noinspection PyProtectedMember
return self._data[self._schema._get_column_index(column_name)]

return self._data[0, column_name]

def has_column(self, column_name: str) -> bool:
"""
Return whether the row contains a given column.

Alias for self.schema.hasColumn(column_name: str) -> bool.

Parameters
----------
column_name : str
The name of the column.

Returns
-------
contains : bool
has_column : bool
True, if row contains the column.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
>>> row.has_column("a")
True

>>> row.has_column("c")
False
"""
return self._schema.has_column(column_name)

def get_column_names(self) -> list[str]:
"""
Return a list of all column names saved in this schema.

Alias for self.schema.get_column_names() -> list[str].
Return a list of all column names in the row.

Returns
-------
column_names : list[str]
The column names.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
>>> row.get_column_names()
['a', 'b']
"""
return self._schema.get_column_names()

def get_type_of_column(self, column_name: str) -> ColumnType:
"""
Return the type of a specified column.

Alias for self.schema.get_type_of_column(column_name: str) -> ColumnType.
Return the type of the specified column.

Parameters
----------
Expand All @@ -187,8 +264,15 @@ def get_type_of_column(self, column_name: str) -> ColumnType:

Raises
------
ColumnNameError
If the specified target column name does not exist.
UnknownColumnNameError
If the row does not contain the specified column.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
>>> row.get_type_of_column("a")
Integer
"""
return self._schema.get_type_of_column(column_name)

Expand All @@ -204,8 +288,15 @@ def count(self) -> int:
-------
count : int
The number of columns.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
>>> row.count()
2
"""
return len(self._data)
return self._data.shape[1]

# ------------------------------------------------------------------------------------------------------------------
# Conversion
Expand All @@ -219,24 +310,28 @@ def to_dict(self) -> dict[str, Any]:
-------
data : dict[str, Any]
Dictionary representation of the row.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row.from_dict({"a": 1, "b": 2})
>>> row.to_dict()
{'a': 1, 'b': 2}
"""
return {column_name: self.get_value(column_name) for column_name in self.get_column_names()}

# ------------------------------------------------------------------------------------------------------------------
# IPython integration
# ------------------------------------------------------------------------------------------------------------------

def _ipython_display_(self) -> DisplayHandle:
def _repr_html_(self) -> str:
"""
Return a display object for the column to be used in Jupyter Notebooks.
Return an HTML representation of the row.

Returns
-------
output : DisplayHandle
Output object.
output : str
The generated HTML.
"""
tmp = self._data.to_frame().T
tmp.columns = self.get_column_names()

with pd.option_context("display.max_rows", tmp.shape[0], "display.max_columns", tmp.shape[1]):
return display(tmp)
# noinspection PyProtectedMember
return self._data._repr_html_()
Loading