Skip to content

Commit

Permalink
feat: Added Table.plot_boxplots to plot a boxplot for each numerica…
Browse files Browse the repository at this point in the history
…l column in the table (#254)

Closes #156.

### Summary of Changes

Added `Table.plot_boxplots` to plot a boxplot for each numerical column
in the table
Changed `Column.plot_boxplot` to set the title instead of the xlabel to
match `Table.plot_boxplots`

### Additional Context

Waiting for #239 -> Needed to add tests for `Table.plot_boxplots`

---------

Co-authored-by: sibre28 <86068340+sibre28@users.noreply.github.com>
Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com>
Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Co-authored-by: Lars Reimann <mail@larsreimann.com>
  • Loading branch information
5 people committed May 8, 2023
1 parent e27d410 commit 0203a0c
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 2 deletions.
20 changes: 20 additions & 0 deletions docs/tutorials/data_visualization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,26 @@
}
}
},
{
"cell_type": "markdown",
"source": [
"## Boxplot of all numerical columns"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"titanic_numerical.plot_boxplots()"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
Expand Down
3 changes: 2 additions & 1 deletion src/safeds/data/tabular/containers/_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,8 @@ def plot_boxplot(self) -> Image:

fig = plt.figure()
ax = sns.boxplot(data=self._data)
ax.set(xlabel=self.name)
ax.set(title=self.name)
ax.set_xticks([])
plt.tight_layout()

buffer = io.BytesIO()
Expand Down
42 changes: 42 additions & 0 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import functools
import io
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, Any

Expand Down Expand Up @@ -1313,6 +1314,47 @@ def plot_scatterplot(self, x_column_name: str, y_column_name: str) -> Image:
buffer.seek(0)
return Image(buffer, format_=ImageFormat.PNG)

def plot_boxplots(self) -> Image:
"""
Plot a boxplot for every numerical column.
Returns
-------
plot: Image
The plot as an image.
Raises
------
NonNumericColumnError
If the table contains only non-numerical columns.
"""
numerical_table = self.remove_columns_with_non_numerical_values()
if numerical_table.number_of_columns == 0:
raise NonNumericColumnError("This table contains only non-numerical columns.")
col_wrap = min(numerical_table.number_of_columns, 3)

data = pd.melt(numerical_table._data, value_vars=numerical_table.column_names)
grid = sns.FacetGrid(data, col="variable", col_wrap=col_wrap, sharex=False, sharey=False)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="Using the boxplot function without specifying `order` is likely to produce an incorrect plot.",
)
grid.map(sns.boxplot, "variable", "value")
grid.set_xlabels("")
grid.set_ylabels("")
grid.set_titles("{col_name}")
for axes in grid.axes.flat:
axes.set_xticks([])
plt.tight_layout()
fig = grid.fig

buffer = io.BytesIO()
fig.savefig(buffer, format="png")
plt.close() # Prevents the figure from being displayed directly
buffer.seek(0)
return Image(buffer, format_=ImageFormat.PNG)

def plot_histograms(self) -> Image:
"""
Plot a histogram for every column.
Expand Down
Binary file modified tests/resources/image/snapshot_boxplot.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

def test_should_match_snapshot() -> None:
table = Table({"A": [1, 2, 3]})
table.get_column("A").plot_boxplot()
current = table.get_column("A").plot_boxplot()
snapshot = Image.from_png_file(resolve_resource_path("./image/snapshot_boxplot.png"))

Expand Down
37 changes: 37 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_plot_boxplots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import pytest
from safeds.data.image.containers import Image
from safeds.data.tabular.containers import Table
from safeds.exceptions import NonNumericColumnError

from tests.helpers import resolve_resource_path


@pytest.mark.parametrize(
("table", "path"),
[
(Table({"A": [1, 2, 3]}), "./image/snapshot_boxplots/one_column.png"),
(
Table({"A": [1, 2, 3], "B": ["A", "A", "Bla"], "C": [True, True, False], "D": [1.0, 2.1, 4.5]}),
"./image/snapshot_boxplots/four_columns_some_non_numeric.png",
),
(
Table({"A": [1, 2, 3], "B": [1.0, 2.1, 4.5], "C": [1, 2, 3], "D": [1.0, 2.1, 4.5]}),
"./image/snapshot_boxplots/four_columns_all_numeric.png",
),
],
ids=["one column", "four columns (some non-numeric)", "four columns (all numeric)"],
)
def test_should_match_snapshot(table: Table, path: str) -> None:
current = table.plot_boxplots()
current.to_png_file(resolve_resource_path(path))
snapshot = Image.from_png_file(resolve_resource_path(path))

# Inlining the expression into the assert causes pytest to hang if the assertion fails when run from PyCharm.
assertion = snapshot._image.tobytes() == current._image.tobytes()
assert assertion


def test_should_raise_if_column_contains_non_numerical_values() -> None:
table = Table.from_dict({"A": ["1", "2", "3.5"], "B": ["0.2", "4", "77"]})
with pytest.raises(NonNumericColumnError):
table.plot_boxplots()
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"./image/snapshot_histograms/four_columns.png",
),
],
ids=["one column", "four columns"],
)
def test_should_match_snapshot(table: Table, path: str) -> None:
current = table.plot_histograms()
Expand Down

0 comments on commit 0203a0c

Please sign in to comment.