Skip to content

Commit

Permalink
feat: Added Table.plot_histograms to plot a histogram for each colu…
Browse files Browse the repository at this point in the history
…mn in the table (#252)

Closes #157.

### Summary of Changes

Added `Table.plot_histograms` to plot a histpogram for each column in
the table
Changed xticks in `Column.plot_histogram` to match
`Table.plot_histograms` (the first and last xtick labels are outside of
the value range)

---------

Co-authored-by: sibre28 <86068340+sibre28@users.noreply.github.com>
Co-authored-by: Lars Reimann <mail@larsreimann.com>
  • Loading branch information
3 people committed May 8, 2023
1 parent 01c3ad9 commit e27d410
Show file tree
Hide file tree
Showing 10 changed files with 94 additions and 6 deletions.
20 changes: 20 additions & 0 deletions docs/tutorials/data_visualization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,26 @@
}
}
},
{
"cell_type": "markdown",
"source": [
"## Histogram of all columns"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"titanic_numerical.plot_histograms()"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
Expand Down
29 changes: 29 additions & 0 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,35 @@ def plot_scatterplot(self, x_column_name: str, y_column_name: str) -> Image:
buffer.seek(0)
return Image(buffer, format_=ImageFormat.PNG)

def plot_histograms(self) -> Image:
"""
Plot a histogram for every column.
Returns
-------
plot: Image
The plot as an image.
"""
col_wrap = min(self.number_of_columns, 3)

data = pd.melt(self._data, value_vars=self.column_names)
grid = sns.FacetGrid(data=data, col="variable", col_wrap=col_wrap, sharex=False, sharey=False)
grid.map(sns.histplot, "value")
grid.set_xlabels("")
grid.set_ylabels("")
grid.set_titles("{col_name}")
for axes in grid.axes.flat:
axes.set_xticks(axes.get_xticks())
axes.set_xticklabels(axes.get_xticklabels(), rotation=45, horizontalalignment="right")
grid.tight_layout()
fig = grid.fig

buffer = io.BytesIO()
fig.savefig(buffer, format="png")
plt.close()
buffer.seek(0)
return Image(buffer, ImageFormat.PNG)

# ------------------------------------------------------------------------------------------------------------------
# Conversion
# ------------------------------------------------------------------------------------------------------------------
Expand Down
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ def test_should_match_snapshot() -> None:
table.get_column("A").plot_boxplot()
current = table.get_column("A").plot_boxplot()
snapshot = Image.from_png_file(resolve_resource_path("./image/snapshot_boxplot.png"))
assert snapshot._image.tobytes() == current._image.tobytes()

# Inlining the expression into the assert causes pytest to hang if the assertion fails when run from PyCharm.
assertion = snapshot._image.tobytes() == current._image.tobytes()
assert assertion


def test_should_raise_if_column_contains_non_numerical_values() -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ def test_should_match_snapshot_numeric() -> None:
table = Table({"A": [1, 2, 3]})
current = table.get_column("A").plot_histogram()
snapshot = Image.from_png_file(resolve_resource_path("./image/snapshot_histogram_numeric.png"))
assert snapshot._image.tobytes() == current._image.tobytes()

# Inlining the expression into the assert causes pytest to hang if the assertion fails when run from PyCharm.
assertion = snapshot._image.tobytes() == current._image.tobytes()
assert assertion


def test_should_match_snapshot_str() -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,8 @@
def test_should_match_snapshot() -> None:
table = Table({"A": [1, 2, 3.5], "B": [0.2, 4, 77]})
current = table.plot_correlation_heatmap()
legacy = Image.from_png_file(resolve_resource_path("./image/snapshot_heatmap.png"))
assert legacy._image.tobytes() == current._image.tobytes()
snapshot = Image.from_png_file(resolve_resource_path("./image/snapshot_heatmap.png"))

# Inlining the expression into the assert causes pytest to hang if the assertion fails when run from PyCharm.
assertion = snapshot._image.tobytes() == current._image.tobytes()
assert assertion
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest
from safeds.data.image.containers import Image
from safeds.data.tabular.containers import Table

from tests.helpers import resolve_resource_path


@pytest.mark.parametrize(
("table", "path"),
[
(Table({"A": [1, 2, 3]}), "./image/snapshot_histograms/one_column.png"),
(
Table({"A": [1, 2, 3], "B": ["A", "A", "Bla"], "C": [True, True, False], "D": [1.0, 2.1, 4.5]}),
"./image/snapshot_histograms/four_columns.png",
),
],
)
def test_should_match_snapshot(table: Table, path: str) -> None:
current = table.plot_histograms()
snapshot = Image.from_png_file(resolve_resource_path(path))

# Inlining the expression into the assert causes pytest to hang if the assertion fails when run from PyCharm.
assertion = snapshot._image.tobytes() == current._image.tobytes()
assert assertion
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ def test_should_match_snapshot() -> None:
table = Table({"A": [1, 2, 3], "B": [2, 4, 7]})
current = table.plot_lineplot("A", "B")
snapshot = Image.from_png_file(resolve_resource_path("./image/snapshot_lineplot.png"))
assert snapshot._image.tobytes() == current._image.tobytes()

# Inlining the expression into the assert causes pytest to hang if the assertion fails when run from PyCharm.
assertion = snapshot._image.tobytes() == current._image.tobytes()
assert assertion


@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ def test_should_match_snapshot() -> None:
table = Table({"A": [1, 2, 3], "B": [2, 4, 7]})
current = table.plot_scatterplot("A", "B")
snapshot = Image.from_png_file(resolve_resource_path("./image/snapshot_scatterplot.png"))
assert snapshot._image.tobytes() == current._image.tobytes()

# Inlining the expression into the assert causes pytest to hang if the assertion fails when run from PyCharm.
assertion = snapshot._image.tobytes() == current._image.tobytes()
assert assertion


@pytest.mark.parametrize(
Expand Down

0 comments on commit e27d410

Please sign in to comment.