diff --git a/poetry.lock b/poetry.lock index 3333cfeee..41cbf0a91 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "anyio" @@ -2182,42 +2182,6 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] -[[package]] -name = "polars" -version = "0.17.5" -description = "Blazingly fast DataFrame library" -category = "main" -optional = false -python-versions = ">=3.7" -files = [ - {file = "polars-0.17.5-cp37-abi3-macosx_10_7_x86_64.whl", hash = "sha256:3ec088bb68c2b833f1172b85dc1222ae88732ce0ae7de34590dd387204a84b1b"}, - {file = "polars-0.17.5-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:1f5389d29d5e5e993a9a361801d54dccd0399cedb72632274b341e27957c631c"}, - {file = "polars-0.17.5-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4518d2a70bf69eaae04437a241f6d81f2d576e4491d8d4b45c95eacb53415616"}, - {file = "polars-0.17.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fbe7dc6be495d1805f8252bc6bcfb0372134595bea0ebf7c46db21bad86bf58"}, - {file = "polars-0.17.5-cp37-abi3-win_amd64.whl", hash = "sha256:7f112d6cefb37a32fc723195f0be1f62ec528b5f83905ad2e614bc78585a0313"}, - {file = "polars-0.17.5.tar.gz", hash = "sha256:7db2da068e983312238799ad8263e80544151304aac0bc2e6511f91cb56af54d"}, -] - -[package.dependencies] -pandas = {version = "*", optional = true, markers = "extra == \"pandas\""} -pyarrow = {version = ">=7.0.0", optional = true, markers = "extra == \"pyarrow\""} -typing_extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} -xlsx2csv = {version = ">=0.8.0", optional = true, markers = "extra == \"xlsx2csv\""} - -[package.extras] -all = ["polars[connectorx,deltalake,fsspec,matplotlib,numpy,pandas,pyarrow,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"] -connectorx = ["connectorx"] -deltalake = ["deltalake (>=0.8.0)"] -fsspec = ["fsspec"] -matplotlib = ["matplotlib"] -numpy = ["numpy (>=1.16.0)"] -pandas = ["pandas", "pyarrow (>=7.0.0)"] -pyarrow = ["pyarrow (>=7.0.0)"] -sqlalchemy = ["pandas", "sqlalchemy"] -timezone = ["backports.zoneinfo", "tzdata"] -xlsx2csv = ["xlsx2csv (>=0.8.0)"] -xlsxwriter = ["xlsxwriter"] - [[package]] name = "prometheus-client" version = "0.16.0" @@ -2302,44 +2266,6 @@ files = [ [package.extras] tests = ["pytest"] -[[package]] -name = "pyarrow" -version = "11.0.0" -description = "Python library for Apache Arrow" -category = "main" -optional = false -python-versions = ">=3.7" -files = [ - {file = "pyarrow-11.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:40bb42afa1053c35c749befbe72f6429b7b5f45710e85059cdd534553ebcf4f2"}, - {file = "pyarrow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7c28b5f248e08dea3b3e0c828b91945f431f4202f1a9fe84d1012a761324e1ba"}, - {file = "pyarrow-11.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a37bc81f6c9435da3c9c1e767324ac3064ffbe110c4e460660c43e144be4ed85"}, - {file = "pyarrow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad7c53def8dbbc810282ad308cc46a523ec81e653e60a91c609c2233ae407689"}, - {file = "pyarrow-11.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:25aa11c443b934078bfd60ed63e4e2d42461682b5ac10f67275ea21e60e6042c"}, - {file = "pyarrow-11.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:e217d001e6389b20a6759392a5ec49d670757af80101ee6b5f2c8ff0172e02ca"}, - {file = "pyarrow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ad42bb24fc44c48f74f0d8c72a9af16ba9a01a2ccda5739a517aa860fa7e3d56"}, - {file = "pyarrow-11.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d942c690ff24a08b07cb3df818f542a90e4d359381fbff71b8f2aea5bf58841"}, - {file = "pyarrow-11.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f010ce497ca1b0f17a8243df3048055c0d18dcadbcc70895d5baf8921f753de5"}, - {file = "pyarrow-11.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:2f51dc7ca940fdf17893227edb46b6784d37522ce08d21afc56466898cb213b2"}, - {file = "pyarrow-11.0.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:1cbcfcbb0e74b4d94f0b7dde447b835a01bc1d16510edb8bb7d6224b9bf5bafc"}, - {file = "pyarrow-11.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaee8f79d2a120bf3e032d6d64ad20b3af6f56241b0ffc38d201aebfee879d00"}, - {file = "pyarrow-11.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:410624da0708c37e6a27eba321a72f29d277091c8f8d23f72c92bada4092eb5e"}, - {file = "pyarrow-11.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2d53ba72917fdb71e3584ffc23ee4fcc487218f8ff29dd6df3a34c5c48fe8c06"}, - {file = "pyarrow-11.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:f12932e5a6feb5c58192209af1d2607d488cb1d404fbc038ac12ada60327fa34"}, - {file = "pyarrow-11.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:41a1451dd895c0b2964b83d91019e46f15b5564c7ecd5dcb812dadd3f05acc97"}, - {file = "pyarrow-11.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:becc2344be80e5dce4e1b80b7c650d2fc2061b9eb339045035a1baa34d5b8f1c"}, - {file = "pyarrow-11.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f40be0d7381112a398b93c45a7e69f60261e7b0269cc324e9f739ce272f4f70"}, - {file = "pyarrow-11.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:362a7c881b32dc6b0eccf83411a97acba2774c10edcec715ccaab5ebf3bb0835"}, - {file = "pyarrow-11.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:ccbf29a0dadfcdd97632b4f7cca20a966bb552853ba254e874c66934931b9841"}, - {file = "pyarrow-11.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e99be85973592051e46412accea31828da324531a060bd4585046a74ba45854"}, - {file = "pyarrow-11.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69309be84dcc36422574d19c7d3a30a7ea43804f12552356d1ab2a82a713c418"}, - {file = "pyarrow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da93340fbf6f4e2a62815064383605b7ffa3e9eeb320ec839995b1660d69f89b"}, - {file = "pyarrow-11.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:caad867121f182d0d3e1a0d36f197df604655d0b466f1bc9bafa903aa95083e4"}, - {file = "pyarrow-11.0.0.tar.gz", hash = "sha256:5461c57dbdb211a632a48facb9b39bbeb8a7905ec95d768078525283caef5f6d"}, -] - -[package.dependencies] -numpy = ">=1.16.6" - [[package]] name = "pycparser" version = "2.21" @@ -3193,18 +3119,6 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=2.0)", "pre-commit", "pytest", "pytest-mock"] -[[package]] -name = "typing-extensions" -version = "4.5.0" -description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" -optional = false -python-versions = ">=3.7" -files = [ - {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, - {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, -] - [[package]] name = "tzdata" version = "2023.3" @@ -3355,19 +3269,7 @@ files = [ {file = "widgetsnbextension-4.0.5.tar.gz", hash = "sha256:003f716d930d385be3fd9de42dd9bf008e30053f73bddde235d14fbeaeff19af"}, ] -[[package]] -name = "xlsx2csv" -version = "0.8.1" -description = "xlsx to csv converter" -category = "main" -optional = false -python-versions = "*" -files = [ - {file = "xlsx2csv-0.8.1-py3-none-any.whl", hash = "sha256:6c36c0295d64f231570479e514d6163ce135af3c431a1705b073230bedaef9f2"}, - {file = "xlsx2csv-0.8.1.tar.gz", hash = "sha256:7ecd6d2bc2426f2e432f4fdac12211e1976d3cbb65f9033e1eda65edda2045e3"}, -] - [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "0f7d82568234cd91553d17836ecc8fa422cfef33a58d49386c1dac4f79a6332f" +content-hash = "5d228313ef6bbd0a16d1b9259fa0cbb7e3ab3a79c0a740bbb61ba01b4b670852" diff --git a/pyproject.toml b/pyproject.toml index e7c65f1b3..41be911d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,6 @@ pandas = "^2.0.0" pillow = "^9.5.0" scikit-learn = "^1.2.0" seaborn = "^0.12.2" -polars = {extras = ["pandas", "pyarrow", "xlsx2csv"], version = "^0.17.5"} [tool.poetry.group.dev.dependencies] pytest = "^7.2.1" diff --git a/src/safeds/data/tabular/containers/_row.py b/src/safeds/data/tabular/containers/_row.py index 71151e5bf..dfa85d6e3 100644 --- a/src/safeds/data/tabular/containers/_row.py +++ b/src/safeds/data/tabular/containers/_row.py @@ -3,7 +3,7 @@ from collections.abc import Mapping from typing import TYPE_CHECKING, Any -import polars as pl +import pandas as pd from safeds.data.tabular.exceptions import UnknownColumnNameError from safeds.data.tabular.typing import ColumnType, Schema @@ -54,13 +54,13 @@ def from_dict(data: dict[str, Any]) -> Row: return Row(data) @staticmethod - def _from_polars_dataframe(data: pl.DataFrame, schema: Schema | None = None) -> Row: + def _from_pandas_dataframe(data: pd.DataFrame, schema: Schema | None = None) -> Row: """ - Create a row from a `polars.DataFrame`. + Create a row from a `pandas.DataFrame`. Parameters ---------- - data : polars.DataFrame + data : pd.DataFrame The data. schema : Schema | None The schema. If None, the schema is inferred from the data. @@ -72,16 +72,18 @@ def _from_polars_dataframe(data: pl.DataFrame, schema: Schema | None = None) -> Examples -------- - >>> import polars as pl + >>> import pandas as pd >>> from safeds.data.tabular.containers import Row - >>> row = Row._from_polars_dataframe(pl.DataFrame({"a": [1], "b": [2]})) + >>> row = Row._from_pandas_dataframe(pd.DataFrame({"a": [1], "b": [2]})) """ + data = data.reset_index(drop=True) + result = object.__new__(Row) result._data = data if schema is None: # noinspection PyProtectedMember - result._schema = Schema._from_polars_dataframe(data) + result._schema = Schema._from_pandas_dataframe(data) else: result._schema = schema @@ -108,9 +110,11 @@ def __init__(self, data: Mapping[str, Any] | None = None): if data is None: data = {} - self._data: pl.DataFrame = pl.DataFrame(data) + data = {key: [value] for key, value in data.items()} + + self._data: pd.DataFrame = pd.DataFrame(data) # noinspection PyProtectedMember - self._schema: Schema = Schema._from_polars_dataframe(self._data) + self._schema: Schema = Schema._from_pandas_dataframe(self._data) def __contains__(self, obj: Any) -> bool: """ @@ -169,7 +173,7 @@ def __eq__(self, other: Any) -> bool: return NotImplemented if self is other: return True - return self._schema == other._schema and self._data.frame_equal(other._data) + return self._schema == other._schema and self._data.equals(other._data) def __getitem__(self, column_name: str) -> Any: """ @@ -233,7 +237,7 @@ def __len__(self) -> int: >>> len(row) 2 """ - return self._data.width + return self._data.shape[1] def __repr__(self) -> str: """ @@ -319,7 +323,7 @@ def n_columns(self) -> int: >>> row.n_columns 2 """ - return self._data.width + return self._data.shape[1] @property def schema(self) -> Schema: @@ -372,7 +376,7 @@ def get_value(self, column_name: str) -> Any: if not self.has_column(column_name): raise UnknownColumnNameError([column_name]) - return self._data[0, column_name] + return self._data.loc[0, column_name] def has_column(self, column_name: str) -> bool: """ diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 23767ac2f..02b863cb1 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -8,7 +8,6 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -import polars as pl import seaborn as sns from IPython.core.display_functions import DisplayHandle, display from pandas import DataFrame @@ -211,7 +210,7 @@ def from_rows(rows: list[Row]) -> Table: for row in rows: if schema_compare != row._schema: raise SchemaMismatchError - row_array.append(row._data.to_pandas()) + row_array.append(row._data) dataframe: DataFrame = pd.concat(row_array, ignore_index=True) dataframe.columns = schema_compare.column_names @@ -251,10 +250,7 @@ def __eq__(self, other: Any) -> bool: return True table1 = self.sort_columns() table2 = other.sort_columns() - return table1._data.equals(table2._data) and table1._schema == table2._schema - - def __hash__(self) -> int: - return hash(self._data) + return table1._schema == table2._schema and table1._data.equals(table2._data) def __repr__(self) -> str: tmp = self._data.copy(deep=True) @@ -416,7 +412,7 @@ def get_row(self, index: int) -> Row: if len(self._data.index) - 1 < index or index < 0: raise IndexOutOfBoundsError(index) - return Row._from_polars_dataframe(pl.DataFrame(self._data.iloc[[index]]), self._schema) + return Row._from_pandas_dataframe(self._data.iloc[[index]], self._schema) # ------------------------------------------------------------------------------------------------------------------ # Information @@ -549,9 +545,7 @@ def add_row(self, row: Row) -> Table: if self._schema != row.schema: raise SchemaMismatchError - row_frame = row._data.to_pandas() - - new_df = pd.concat([self._data, row_frame]).infer_objects() + new_df = pd.concat([self._data, row._data]).infer_objects() new_df.columns = self.column_names return Table(new_df) @@ -576,9 +570,7 @@ def add_rows(self, rows: list[Row] | Table) -> Table: if self._schema != row.schema: raise SchemaMismatchError - row_frames = [row._data.to_pandas() for row in rows] - for row_frame in row_frames: - row_frame.columns = self.column_names + row_frames = (row._data for row in rows) result = pd.concat([result, *row_frames]).infer_objects() result.columns = self.column_names @@ -1266,8 +1258,8 @@ def to_rows(self) -> list[Row]: List of rows. """ return [ - Row._from_polars_dataframe( - pl.DataFrame([list(series_row)], schema=self._schema.column_names), + Row._from_pandas_dataframe( + pd.DataFrame([list(series_row)], columns=self._schema.column_names), self._schema, ) for (_, series_row) in self._data.iterrows() diff --git a/src/safeds/data/tabular/typing/_column_type.py b/src/safeds/data/tabular/typing/_column_type.py index 670a16f51..82396f334 100644 --- a/src/safeds/data/tabular/typing/_column_type.py +++ b/src/safeds/data/tabular/typing/_column_type.py @@ -4,15 +4,6 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -from polars import FLOAT_DTYPES as POLARS_FLOAT_DTYPES -from polars import INTEGER_DTYPES as POLARS_INTEGER_DTYPES -from polars import TEMPORAL_DTYPES as POLARS_TEMPORAL_DTYPES -from polars import Boolean as PolarsBoolean -from polars import Decimal as PolarsDecimal -from polars import Object as PolarsObject -from polars import PolarsDataType -from polars import Utf8 as PolarsUtf8 - if TYPE_CHECKING: import numpy as np @@ -52,38 +43,6 @@ def _from_numpy_data_type(data_type: np.dtype) -> ColumnType: message = f"Unsupported numpy data type '{data_type}'." raise NotImplementedError(message) - @staticmethod - def _from_polars_data_type(data_type: PolarsDataType) -> ColumnType: - """ - Return the column type for a given `polars` data type. - - Parameters - ---------- - data_type : PolarsDataType - The `polars` data type. - - Returns - ------- - column_type : ColumnType - The ColumnType. - - Raises - ------ - NotImplementedError - If the given data type is not supported. - """ - if data_type in POLARS_INTEGER_DTYPES: - return Integer() - if data_type is PolarsBoolean: - return Boolean() - if data_type in POLARS_FLOAT_DTYPES or data_type is PolarsDecimal: - return RealNumber() - if data_type is PolarsUtf8 or data_type is PolarsObject or data_type in POLARS_TEMPORAL_DTYPES: - return String() - - message = f"Unsupported polars data type '{data_type}'." - raise NotImplementedError(message) - @abstractmethod def is_nullable(self) -> bool: """ diff --git a/src/safeds/data/tabular/typing/_schema.py b/src/safeds/data/tabular/typing/_schema.py index 457b70ccb..fec9affa5 100644 --- a/src/safeds/data/tabular/typing/_schema.py +++ b/src/safeds/data/tabular/typing/_schema.py @@ -8,7 +8,6 @@ if TYPE_CHECKING: import pandas as pd - import polars as pl @dataclass @@ -45,27 +44,6 @@ def _from_pandas_dataframe(dataframe: pd.DataFrame) -> Schema: return Schema(dict(zip(names, types, strict=True))) - @staticmethod - def _from_polars_dataframe(dataframe: pl.DataFrame) -> Schema: - """ - Create a schema from a `polars.Dataframe`. - - Parameters - ---------- - dataframe : pl.DataFrame - The dataframe. - - Returns - ------- - schema : Schema - The schema. - """ - names = dataframe.columns - # noinspection PyProtectedMember - types = (ColumnType._from_polars_data_type(data_type) for data_type in dataframe.dtypes) - - return Schema(dict(zip(names, types, strict=True))) - def __init__(self, schema: dict[str, ColumnType]): self._schema = dict(schema) # Defensive copy diff --git a/tests/safeds/data/tabular/containers/_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/test_add_rows.py index 50481e4e8..a6f79b7fc 100644 --- a/tests/safeds/data/tabular/containers/_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_add_rows.py @@ -1,23 +1,23 @@ -import polars as pl +import pandas as pd from safeds.data.tabular.containers import Row, Table def test_add_rows_valid() -> None: table1 = Table.from_dict({"col1": ["a", "b", "c"], "col2": [1, 2, 4]}) - row1 = Row._from_polars_dataframe( - pl.DataFrame( + row1 = Row._from_pandas_dataframe( + pd.DataFrame( { - "col1": "d", - "col2": 6, + "col1": ["d"], + "col2": [6], }, ), table1.schema, ) - row2 = Row._from_polars_dataframe( - pl.DataFrame( + row2 = Row._from_pandas_dataframe( + pd.DataFrame( { - "col1": "e", - "col2": 8, + "col1": ["e"], + "col2": [8], }, ), table1.schema, @@ -32,20 +32,20 @@ def test_add_rows_valid() -> None: def test_add_rows_table_valid() -> None: table1 = Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]}) - row1 = Row._from_polars_dataframe( - pl.DataFrame( + row1 = Row._from_pandas_dataframe( + pd.DataFrame( { - "col1": 5, - "col2": 6, + "col1": [5], + "col2": [6], }, ), table1.schema, ) - row2 = Row._from_polars_dataframe( - pl.DataFrame( + row2 = Row._from_pandas_dataframe( + pd.DataFrame( { - "col1": 7, - "col2": 8, + "col1": [7], + "col2": [8], }, ), table1.schema, diff --git a/tests/safeds/data/tabular/containers/_table/test_to_rows.py b/tests/safeds/data/tabular/containers/_table/test_to_rows.py index fcb9dcd4b..bfda8a6b4 100644 --- a/tests/safeds/data/tabular/containers/_table/test_to_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_to_rows.py @@ -1,4 +1,4 @@ -import polars as pl +import pandas as pd from safeds.data.tabular.containers import Row, Table from safeds.data.tabular.typing import Integer, Schema, String @@ -20,9 +20,9 @@ def test_to_rows() -> None: }, ) rows_expected = [ - Row._from_polars_dataframe(pl.DataFrame({"A": 1, "B": 4, "D": "d"}), expected_schema), - Row._from_polars_dataframe(pl.DataFrame({"A": 2, "B": 5, "D": "e"}), expected_schema), - Row._from_polars_dataframe(pl.DataFrame({"A": 3, "B": 6, "D": "f"}), expected_schema), + Row._from_pandas_dataframe(pd.DataFrame({"A": [1], "B": [4], "D": ["d"]}), expected_schema), + Row._from_pandas_dataframe(pd.DataFrame({"A": [2], "B": [5], "D": ["e"]}), expected_schema), + Row._from_pandas_dataframe(pd.DataFrame({"A": [3], "B": [6], "D": ["f"]}), expected_schema), ] rows_is = table.to_rows() diff --git a/tests/safeds/data/tabular/containers/test_row.py b/tests/safeds/data/tabular/containers/test_row.py index 546a4459a..4773b76b6 100644 --- a/tests/safeds/data/tabular/containers/test_row.py +++ b/tests/safeds/data/tabular/containers/test_row.py @@ -1,6 +1,6 @@ from typing import Any -import polars as pl +import pandas as pd import pytest from safeds.data.tabular.containers import Row, Table from safeds.data.tabular.exceptions import UnknownColumnNameError @@ -32,21 +32,21 @@ def test_should_create_row_from_dict(self, data: dict[str, Any], expected: Row) assert Row.from_dict(data) == expected -class TestFromPolarsDataFrame: +class TestFromPandasDataFrame: @pytest.mark.parametrize( ("row", "expected"), [ ( - Row._from_polars_dataframe(pl.DataFrame(), Schema({})), + Row._from_pandas_dataframe(pd.DataFrame(), Schema({})), Schema({}), ), ( - Row._from_polars_dataframe(pl.DataFrame({"col1": 0}), Schema({"col1": Integer()})), + Row._from_pandas_dataframe(pd.DataFrame({"col1": [0]}), Schema({"col1": Integer()})), Schema({"col1": Integer()}), ), ( - Row._from_polars_dataframe( - pl.DataFrame({"col1": 0, "col2": "a"}), + Row._from_pandas_dataframe( + pd.DataFrame({"col1": [0], "col2": ["a"]}), Schema({"col1": Integer(), "col2": String()}), ), Schema({"col1": Integer(), "col2": String()}), @@ -64,8 +64,8 @@ def test_should_use_the_schema_if_passed(self, row: Row, expected: Schema) -> No @pytest.mark.parametrize( ("row", "expected"), [ - (Row._from_polars_dataframe(pl.DataFrame()), Schema({})), - (Row._from_polars_dataframe(pl.DataFrame({"col1": 0})), Schema({"col1": Integer()})), + (Row._from_pandas_dataframe(pd.DataFrame()), Schema({})), + (Row._from_pandas_dataframe(pd.DataFrame({"col1": [0]})), Schema({"col1": Integer()})), ], ids=[ "empty", diff --git a/tests/safeds/data/tabular/typing/test_column_type.py b/tests/safeds/data/tabular/typing/test_column_type.py index a8c0a33c9..5ad58915b 100644 --- a/tests/safeds/data/tabular/typing/test_column_type.py +++ b/tests/safeds/data/tabular/typing/test_column_type.py @@ -1,14 +1,5 @@ import numpy as np import pytest -from polars import FLOAT_DTYPES as POLARS_FLOAT_DTYPES -from polars import INTEGER_DTYPES as POLARS_INTEGER_DTYPES -from polars import PolarsDataType -from polars.datatypes import TEMPORAL_DTYPES as POLARS_TEMPORAL_DTYPES -from polars.datatypes import Boolean as PolarsBoolean -from polars.datatypes import Decimal as PolarsDecimal -from polars.datatypes import Object as PolarsObject -from polars.datatypes import Unknown as PolarsUnknown -from polars.datatypes import Utf8 as PolarsUtf8 from safeds.data.tabular.typing import ( Anything, Boolean, @@ -59,36 +50,6 @@ def test_should_raise_if_data_type_is_not_supported(self) -> None: ColumnType._from_numpy_data_type(np.dtype(np.void)) -class TestFromPolarsDataType: - @pytest.mark.parametrize( - ("data_type", "expected"), - [ - # Boolean - (PolarsBoolean, Boolean()), - # Float - *((data_type, RealNumber()) for data_type in POLARS_FLOAT_DTYPES), - (PolarsDecimal, RealNumber()), - # Int - *((data_type, Integer()) for data_type in POLARS_INTEGER_DTYPES), - # String - (PolarsUtf8, String()), - (PolarsObject, String()), - *((data_type, String()) for data_type in POLARS_TEMPORAL_DTYPES), - ], - ids=repr, - ) - def test_should_create_column_type_from_polars_data_type( - self, - data_type: PolarsDataType, - expected: ColumnType, - ) -> None: - assert ColumnType._from_polars_data_type(data_type) == expected - - def test_should_raise_if_data_type_is_not_supported(self) -> None: - with pytest.raises(NotImplementedError): - ColumnType._from_polars_data_type(PolarsUnknown) - - class TestRepr: @pytest.mark.parametrize( ("column_type", "expected"), diff --git a/tests/safeds/data/tabular/typing/test_schema.py b/tests/safeds/data/tabular/typing/test_schema.py index e41e0b590..3d8da385b 100644 --- a/tests/safeds/data/tabular/typing/test_schema.py +++ b/tests/safeds/data/tabular/typing/test_schema.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING import pandas as pd -import polars as pl import pytest from safeds.data.tabular.exceptions import UnknownColumnNameError from safeds.data.tabular.typing import Boolean, ColumnType, Integer, RealNumber, Schema, String @@ -49,43 +48,6 @@ def test_should_create_schema_from_pandas_dataframe(self, dataframe: pd.DataFram assert Schema._from_pandas_dataframe(dataframe) == expected -class TestFromPolarsDataFrame: - @pytest.mark.parametrize( - ("dataframe", "expected"), - [ - ( - pl.DataFrame({"A": [True, False, True]}), - Schema({"A": Boolean()}), - ), - ( - pl.DataFrame({"A": [1, 2, 3]}), - Schema({"A": Integer()}), - ), - ( - pl.DataFrame({"A": [1.0, 2.0, 3.0]}), - Schema({"A": RealNumber()}), - ), - ( - pl.DataFrame({"A": ["a", "b", "c"]}), - Schema({"A": String()}), - ), - ( - pl.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]}), - Schema({"A": Integer(), "B": String()}), - ), - ], - ids=[ - "integer", - "real number", - "string", - "boolean", - "multiple columns", - ], - ) - def test_should_create_schema_from_polars_dataframe(self, dataframe: pl.DataFrame, expected: Schema) -> None: - assert Schema._from_polars_dataframe(dataframe) == expected - - class TestStr: @pytest.mark.parametrize( ("schema", "expected"),