-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: added Column.summarize_statistics() (#715)
Closes #701 ### Summary of Changes Added `summarize_statistics` to the `Column` class to quickly get an overview of relevant statistics. The Column is converted into a Table with one Column and the results from Table.summarize_statistics() are returned. This way, if someone adds a new feature to Table.summarize_statistics(), it also appears in Column.summarize_statistics(). --------- Co-authored-by: Lars Reimann <mail@larsreimann.com>
- Loading branch information
1 parent
f2f4418
commit 71730a9
Showing
2 changed files
with
139 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
106 changes: 106 additions & 0 deletions
106
tests/safeds/data/tabular/containers/_column/test_summarize_statistics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
from statistics import stdev | ||
|
||
import pytest | ||
from safeds.data.tabular.containers import Column, Table | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("column", "expected"), | ||
[ | ||
( | ||
Column("col1", [1, 2, 1]), | ||
Table( | ||
{ | ||
"metric": [ | ||
"minimum", | ||
"maximum", | ||
"mean", | ||
"mode", | ||
"median", | ||
"variance", | ||
"standard deviation", | ||
"missing value count", | ||
"missing value ratio", | ||
"idness", | ||
"stability", | ||
], | ||
"col1": [ | ||
"1", | ||
"2", | ||
str(4.0 / 3), | ||
"[1]", | ||
"1.0", | ||
str(1.0 / 3), | ||
str(stdev([1, 2, 1])), | ||
"0", | ||
"0.0", | ||
str(2.0 / 3), | ||
str(2.0 / 3), | ||
], | ||
}, | ||
), | ||
), | ||
( | ||
Column("col1", ["a", "b", "c"]), | ||
Table( | ||
{ | ||
"metric": [ | ||
"minimum", | ||
"maximum", | ||
"mean", | ||
"mode", | ||
"median", | ||
"variance", | ||
"standard deviation", | ||
"missing value count", | ||
"missing value ratio", | ||
"idness", | ||
"stability", | ||
], | ||
"col1": [ | ||
"-", | ||
"-", | ||
"-", | ||
"['a', 'b', 'c']", | ||
"-", | ||
"-", | ||
"-", | ||
"0", | ||
"0.0", | ||
"1.0", | ||
str(1.0 / 3), | ||
], | ||
}, | ||
), | ||
), | ||
( | ||
Column("col", [None, None]), | ||
Table( | ||
{ | ||
"metric": [ | ||
"minimum", | ||
"maximum", | ||
"mean", | ||
"mode", | ||
"median", | ||
"variance", | ||
"standard deviation", | ||
"missing value count", | ||
"missing value ratio", | ||
"idness", | ||
"stability", | ||
], | ||
"col": ["-", "-", "-", "[]", "-", "-", "-", "2", "1.0", "0.0", "-"], | ||
}, | ||
), | ||
), | ||
], | ||
ids=[ | ||
"Column of integers", | ||
"Column of characters", | ||
"Column of None", | ||
], | ||
) | ||
def test_should_summarize_statistics(column: Column, expected: Table) -> None: | ||
assert column.summarize_statistics().schema == expected.schema | ||
assert column.summarize_statistics() == expected |