Skip to content

Commit

Permalink
feat: rename TableSchema to Schema (#133)
Browse files Browse the repository at this point in the history
### Summary of Changes

Rename `TableSchema` to `Schema` since a `Row` also has a schema.

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
lars-reimann and github-actions[bot] committed Mar 31, 2023
1 parent 1786a87 commit 1419d25
Show file tree
Hide file tree
Showing 23 changed files with 52 additions and 57 deletions.
12 changes: 5 additions & 7 deletions src/safeds/data/tabular/containers/_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pandas as pd
from IPython.core.display_functions import DisplayHandle, display
from safeds.data.tabular.typing import ColumnType, TableSchema
from safeds.data.tabular.typing import ColumnType, Schema
from safeds.exceptions import UnknownColumnNameError


Expand All @@ -15,13 +15,13 @@ class Row:
----------
data : typing.Iterable
The data.
schema : TableSchema
schema : Schema
The schema of the row.
"""

def __init__(self, data: typing.Iterable, schema: TableSchema):
def __init__(self, data: typing.Iterable, schema: Schema):
self._data: pd.Series = data if isinstance(data, pd.Series) else pd.Series(data)
self.schema: TableSchema = schema
self.schema: Schema = schema
self._data = self._data.reset_index(drop=True)

def __getitem__(self, column_name: str) -> Any:
Expand Down Expand Up @@ -145,7 +145,5 @@ def _ipython_display_(self) -> DisplayHandle:
tmp = self._data.to_frame().T
tmp.columns = self.get_column_names()

with pd.option_context(
"display.max_rows", tmp.shape[0], "display.max_columns", tmp.shape[1]
):
with pd.option_context("display.max_rows", tmp.shape[0], "display.max_columns", tmp.shape[1]):
return display(tmp)
14 changes: 7 additions & 7 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import seaborn as sns
from IPython.core.display_functions import DisplayHandle, display
from pandas import DataFrame, Series
from safeds.data.tabular.typing import ColumnType, TableSchema
from safeds.data.tabular.typing import ColumnType, Schema
from safeds.exceptions import (
ColumnLengthMismatchError,
ColumnSizeError,
Expand Down Expand Up @@ -40,7 +40,7 @@ class Table:
----------
data : typing.Iterable
The data.
schema : Optional[TableSchema]
schema : Optional[Schema]
The schema of the table. If not specified, the schema will be inferred from the data.
Raises
Expand Down Expand Up @@ -174,7 +174,7 @@ def from_rows(rows: list[Row]) -> Table:
if len(rows) == 0:
raise MissingDataError("This function requires at least one row.")

schema_compare: TableSchema = rows[0].schema
schema_compare: Schema = rows[0].schema
row_array: list[Series] = []

for row in rows:
Expand All @@ -190,9 +190,9 @@ def from_rows(rows: list[Row]) -> Table:
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, data: Iterable, schema: Optional[TableSchema] = None):
def __init__(self, data: Iterable, schema: Optional[Schema] = None):
self._data: pd.Dataframe = data if isinstance(data, pd.DataFrame) else pd.DataFrame(data)
self._schema: TableSchema = TableSchema._from_dataframe(self._data) if schema is None else schema
self._schema: Schema = Schema._from_dataframe(self._data) if schema is None else schema

if self._data.empty:
self._data = pd.DataFrame(columns=self._schema.get_column_names())
Expand Down Expand Up @@ -227,13 +227,13 @@ def __str__(self) -> str:
# ------------------------------------------------------------------------------------------------------------------

@property
def schema(self) -> TableSchema:
def schema(self) -> Schema:
"""
Return the schema of the table.
Returns
-------
schema : TableSchema
schema : Schema
The schema.
"""
return self._schema
Expand Down
6 changes: 3 additions & 3 deletions src/safeds/data/tabular/containers/_tagged_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from IPython.core.display_functions import DisplayHandle
from safeds.data.tabular.containers import Column, Table
from safeds.data.tabular.typing import TableSchema
from safeds.data.tabular.typing import Schema


class TaggedTable(Table):
Expand All @@ -17,7 +17,7 @@ class TaggedTable(Table):
Name of the target column.
feature_names : Optional[list[str]]
Names of the feature columns. If None, all columns except the target column are used.
schema : Optional[TableSchema]
schema : Optional[Schema]
The schema of the table. If not specified, the schema will be inferred from the data.
"""

Expand All @@ -26,7 +26,7 @@ def __init__(
data: Iterable,
target_name: str,
feature_names: Optional[list[str]] = None,
schema: Optional[TableSchema] = None,
schema: Optional[Schema] = None,
):
super().__init__(data, schema)

Expand Down
2 changes: 1 addition & 1 deletion src/safeds/data/tabular/typing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from ._column_type import Anything, Boolean, ColumnType, Integer, RealNumber, String
from ._table_schema import TableSchema
from ._schema import Schema
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@


@dataclass
class TableSchema:
class Schema:
"""
Store column names and corresponding data types for a table.
Store column names and corresponding data types for a `Table` or `Row`.
Parameters
----------
Expand Down Expand Up @@ -80,7 +80,7 @@ def _get_column_index_by_name(self, column_name: str) -> int:
return list(self._schema.keys()).index(column_name)

@staticmethod
def _from_dataframe(dataframe: pd.DataFrame) -> TableSchema:
def _from_dataframe(dataframe: pd.DataFrame) -> Schema:
"""
Construct a TableSchema from a Dataframe. This function is not supposed to be exposed to the user.
Expand All @@ -91,7 +91,7 @@ def _from_dataframe(dataframe: pd.DataFrame) -> TableSchema:
Returns
-------
_from_dataframe: TableSchema
_from_dataframe: Schema
The constructed TableSchema.
"""
Expand All @@ -100,7 +100,7 @@ def _from_dataframe(dataframe: pd.DataFrame) -> TableSchema:
# noinspection PyProtectedMember
types = (ColumnType._from_numpy_dtype(dtype) for dtype in dataframe.dtypes)

return TableSchema(dict(zip(names, types)))
return Schema(dict(zip(names, types)))

def get_column_names(self) -> list[str]:
"""
Expand Down Expand Up @@ -132,7 +132,7 @@ def __repr__(self) -> str:
return self.__str__()

def __eq__(self, o: object) -> bool:
if not isinstance(o, TableSchema):
if not isinstance(o, Schema):
return NotImplemented
if self is o:
return True
Expand Down
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_row/test_count.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from safeds.data.tabular.containers import Row
from safeds.data.tabular.typing import Integer, String, TableSchema
from safeds.data.tabular.typing import Integer, Schema, String


def test_count() -> None:
row = Row(
[0, "1"],
TableSchema({"testColumn1": Integer(), "testColumn2": String()}),
Schema({"testColumn1": Integer(), "testColumn2": String()}),
)
assert row.count() == 2
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import pandas as pd
from safeds.data.tabular.containers import Row
from safeds.data.tabular.typing import RealNumber, TableSchema
from safeds.data.tabular.typing import RealNumber, Schema


def test_get_column_names() -> None:
row = Row(
pd.Series(data=[1, 2]),
TableSchema(
Schema(
{
"col1": RealNumber(),
"col2": RealNumber(),
Expand All @@ -17,5 +17,5 @@ def test_get_column_names() -> None:


def test_get_column_names_empty() -> None:
row = Row(pd.Series(data=[]), TableSchema({}))
row = Row(pd.Series(data=[]), Schema({}))
assert not row.get_column_names()
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_row/test_iter.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from safeds.data.tabular.containers import Row
from safeds.data.tabular.typing import Integer, String, TableSchema
from safeds.data.tabular.typing import Integer, Schema, String


def test_iter() -> None:
row = Row(
[0, "1"],
TableSchema({"testColumn1": Integer(), "testColumn2": String()}),
Schema({"testColumn1": Integer(), "testColumn2": String()}),
)
assert list(row) == ["testColumn1", "testColumn2"]
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_row/test_len.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from safeds.data.tabular.containers import Row
from safeds.data.tabular.typing import Integer, String, TableSchema
from safeds.data.tabular.typing import Integer, Schema, String


def test_count() -> None:
row = Row(
[0, "1"],
TableSchema({"testColumn1": Integer(), "testColumn2": String()}),
Schema({"testColumn1": Integer(), "testColumn2": String()}),
)
assert len(row) == 2
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_table/test_add_row.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd
from _pytest.python_api import raises
from safeds.data.tabular.containers import Row, Table
from safeds.data.tabular.typing import Integer, String, TableSchema
from safeds.data.tabular.typing import Integer, Schema, String
from safeds.exceptions import SchemaMismatchError


Expand All @@ -19,6 +19,6 @@ def test_add_row_invalid() -> None:
table1 = Table(pd.DataFrame(data={"col1": [1, 2, 1], "col2": [1, 2, 4]}))
row = Row(
pd.Series(data=[5, "Hallo"]),
TableSchema({"col1": Integer(), "col2": String()}),
Schema({"col1": Integer(), "col2": String()}),
)
table1 = table1.add_row(row)
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import TableSchema
from safeds.data.tabular.typing import Schema


def test_get_column_names() -> None:
Expand All @@ -9,5 +9,5 @@ def test_get_column_names() -> None:


def test_get_column_names_empty() -> None:
table = Table(pd.DataFrame(), TableSchema({}))
table = Table(pd.DataFrame(), Schema({}))
assert not table.get_column_names()
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import RealNumber, TableSchema
from safeds.data.tabular.typing import RealNumber, Schema


def test_remove_columns_with_missing_values_valid() -> None:
Expand All @@ -19,6 +19,6 @@ def test_remove_columns_with_missing_values_valid() -> None:


def test_remove_columns_with_missing_values_empty() -> None:
table = Table([], TableSchema({"col1": RealNumber()}))
table = Table([], Schema({"col1": RealNumber()}))
updated_table = table.remove_columns_with_missing_values()
assert updated_table.get_column_names() == ["col1"]
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import RealNumber, TableSchema
from safeds.data.tabular.typing import RealNumber, Schema


def test_remove_columns_with_non_numerical_values_valid() -> None:
Expand All @@ -19,6 +19,6 @@ def test_remove_columns_with_non_numerical_values_valid() -> None:


def test_remove_columns_with_non_numerical_values_empty() -> None:
table = Table([], TableSchema({"col1": RealNumber()}))
table = Table([], Schema({"col1": RealNumber()}))
updated_table = table.remove_columns_with_non_numerical_values()
assert updated_table.get_column_names() == ["col1"]
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import RealNumber, TableSchema
from safeds.data.tabular.typing import RealNumber, Schema


def test_remove_rows_with_missing_values_valid() -> None:
Expand All @@ -19,6 +19,6 @@ def test_remove_rows_with_missing_values_valid() -> None:


def test_remove_rows_with_missing_values_empty() -> None:
table = Table([], TableSchema({"col1": RealNumber()}))
table = Table([], Schema({"col1": RealNumber()}))
updated_table = table.remove_rows_with_missing_values()
assert updated_table.get_column_names() == ["col1"]
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import RealNumber, TableSchema
from safeds.data.tabular.typing import RealNumber, Schema


def test_remove_rows_with_outliers_no_outliers() -> None:
Expand Down Expand Up @@ -59,7 +59,7 @@ def test_remove_rows_with_outliers_with_outliers() -> None:


def test_remove_rows_with_outliers_no_rows() -> None:
table = Table([], TableSchema({"col1": RealNumber()}))
table = Table([], Schema({"col1": RealNumber()}))
result = table.remove_rows_with_outliers()
assert result.count_rows() == 0
assert result.count_columns() == 1
6 changes: 3 additions & 3 deletions tests/safeds/data/tabular/containers/_table/test_table.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import RealNumber, TableSchema
from safeds.data.tabular.typing import RealNumber, Schema


def test_create_empty_table() -> None:
table = Table([], TableSchema({"col1": RealNumber()}))
table = Table([], Schema({"col1": RealNumber()}))
col = table.get_column("col1")
assert col.count() == 0
assert isinstance(col.type, RealNumber)
Expand All @@ -12,4 +12,4 @@ def test_create_empty_table() -> None:

def test_create_empty_table_without_schema() -> None:
table = Table([])
assert table.schema == TableSchema({})
assert table.schema == Schema({})
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_table/test_to_rows.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import pandas as pd
from safeds.data.tabular.containers import Row, Table
from safeds.data.tabular.typing import Integer, String, TableSchema
from safeds.data.tabular.typing import Integer, Schema, String
from tests.helpers import resolve_resource_path


def test_to_rows() -> None:
table = Table.from_csv_file(resolve_resource_path("test_row_table.csv"))
expected_schema: TableSchema = TableSchema(
expected_schema: Schema = Schema(
{
"A": Integer(),
"B": Integer(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,4 @@

def test_get_column_index_by_name() -> None:
table = Table(pd.DataFrame(data={"col1": [1], "col2": [2]}))
assert (
table.schema._get_column_index_by_name("col1") == 0
and table.schema._get_column_index_by_name("col2") == 1
)
assert table.schema._get_column_index_by_name("col1") == 0 and table.schema._get_column_index_by_name("col2") == 1
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import Integer, RealNumber, TableSchema
from safeds.data.tabular.typing import Integer, RealNumber, Schema
from tests.helpers import resolve_resource_path


def test_table_equals_valid() -> None:
table = Table.from_json_file(resolve_resource_path("test_schema_table.json"))
schema_expected = TableSchema(
schema_expected = Schema(
{
"A": Integer(),
"B": Integer(),
Expand All @@ -17,7 +17,7 @@ def test_table_equals_valid() -> None:

def test_table_equals_invalid() -> None:
table = Table.from_json_file(resolve_resource_path("test_schema_table.json"))
schema_not_expected = TableSchema(
schema_not_expected = Schema(
{
"A": RealNumber(),
"C": Integer(),
Expand Down

0 comments on commit 1419d25

Please sign in to comment.