diff --git a/docs/api_examples/histogram_plot.ipynb b/docs/api_examples/histogram_plot.ipynb new file mode 100644 index 0000000..df0e5d0 --- /dev/null +++ b/docs/api_examples/histogram_plot.ipynb @@ -0,0 +1,4318 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "640ba1d5", + "metadata": {}, + "source": [ + "# Histogram Plot\n", + "\n", + "![VueCore logo][vuecore_logo]\n", + "\n", + "[![Open In Colab][colab_badge]][colab_link]\n", + "\n", + "[VueCore][vuecore_repo] is a Python package for creating interactive and static visualizations of multi-omics data.\n", + "It is part of a broader ecosystem of tools—including [ACore][acore_repo] for data processing and [VueGen][vuegen_repo] for automated reporting—that together enable end-to-end workflows for omics analysis.\n", + "\n", + "This notebook demonstrates how to generate histogram plots using plotting functions from VueCore. We showcase basic and advanced plot configurations, highlighting key customization options such as grouping, color mapping, text annotations, and export to multiple file formats.\n", + "\n", + "## Notebook structure\n", + "\n", + "First, we will set up the work environment by installing the necessary packages and importing the required libraries. Next, we will create basic and advanced histogram plots.\n", + "\n", + "0. [Work environment setup](#0-work-environment-setup)\n", + "1. [Basic histogram plot](#1-basic-histogram-plot)\n", + "2. [Advanced histogram plot](#2-advanced-histogram-plot)\n", + "\n", + "## Credits and Contributors\n", + "\n", + "- This notebook was created by Sebastián Ayala-Ruano under the supervision of Henry Webel and Alberto Santos, head of the [Multiomics Network Analytics Group (MoNA)][Mona] at the [Novo Nordisk Foundation Center for Biosustainability (DTU Biosustain)][Biosustain].\n", + "- You can find more details about the project in this [GitHub repository][vuecore_repo].\n", + "\n", + "[colab_badge]: https://colab.research.google.com/assets/colab-badge.svg\n", + "[colab_link]: https://colab.research.google.com/github/Multiomics-Analytics-Group/vuecore/blob/main/docs/api_examples/bar_plot.ipynb\n", + "[vuecore_logo]: https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuecore/main/docs/images/logo/vuecore_logo.svg\n", + "[Mona]: https://multiomics-analytics-group.github.io/\n", + "[Biosustain]: https://www.biosustain.dtu.dk/\n", + "[vuecore_repo]: https://github.com/Multiomics-Analytics-Group/vuecore\n", + "[vuegen_repo]: https://github.com/Multiomics-Analytics-Group/vuegen\n", + "[acore_repo]: https://github.com/Multiomics-Analytics-Group/acore" + ] + }, + { + "cell_type": "markdown", + "id": "3b504dfb", + "metadata": {}, + "source": [ + "## 0. Work environment setup" + ] + }, + { + "cell_type": "markdown", + "id": "f0c056a7", + "metadata": {}, + "source": [ + "### 0.1. Installing libraries and creating global variables for platform and working directory\n", + "\n", + "To run this notebook locally, you should create a virtual environment with the required libraries. If you are running this notebook on Google Colab, everything should be set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36246ed6", + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "# VueCore library\n", + "%pip install vuecore" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "963a9529", + "metadata": { + "tags": [ + "hide-cell" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "IN_COLAB = \"COLAB_GPU\" in os.environ" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ee2ffd40", + "metadata": { + "tags": [ + "hide-cell" + ] + }, + "outputs": [], + "source": [ + "# Create a directory for outputs\n", + "output_dir = \"./outputs\"\n", + "os.makedirs(output_dir, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "id": "31638f9a", + "metadata": {}, + "source": [ + "### 0.2. Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "06dbf6a2", + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "import pandas as pd\n", + "import numpy as np\n", + "from pathlib import Path\n", + "import plotly.io as pio\n", + "\n", + "from vuecore.plots.basic.histogram import create_histogram_plot\n", + "\n", + "# Set the Plotly renderer based on the environment\n", + "pio.renderers.default = \"notebook\"" + ] + }, + { + "cell_type": "markdown", + "id": "5cc60050", + "metadata": {}, + "source": [ + "### 0.3. Create sample data\n", + "We create a synthetic dataset simulating gene expression data across two experimental conditions to demonstrate how histograms can visualize data distribution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac2db647", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Gene_IDExpressionCondition
0Gene_010.993428Control
1Gene_19.723471Control
2Gene_211.295377Control
3Gene_313.046060Control
4Gene_49.531693Control
\n", + "
" + ], + "text/plain": [ + " Gene_ID Expression Condition\n", + "0 Gene_0 10.993428 Control\n", + "1 Gene_1 9.723471 Control\n", + "2 Gene_2 11.295377 Control\n", + "3 Gene_3 13.046060 Control\n", + "4 Gene_4 9.531693 Control" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Set a random seed for reproducibility of the synthetic data \n", + "np.random.seed(42)\n", + "\n", + "# Define parameters for synthetic gene expression data\n", + "num_genes = 1000\n", + "gene_names = [f\"Gene_{i}\" for i in range(num_genes)]\n", + "\n", + "# Simulate expression data with a slight shift in the \"Treated\" group\n", + "expression_values = np.concatenate([\n", + " np.random.normal(loc=10, scale=2, size=num_genes // 2),\n", + " np.random.normal(loc=12, scale=2, size=num_genes // 2)\n", + "])\n", + "condition_values = np.concatenate([\n", + " [\"Control\"] * (num_genes // 2),\n", + " [\"Treated\"] * (num_genes // 2)\n", + "])\n", + "\n", + "# Create the DataFrame\n", + "gene_exp_df = pd.DataFrame({\n", + " \"Gene_ID\": gene_names,\n", + " \"Expression\": expression_values,\n", + " \"Condition\": condition_values\n", + "})\n", + "\n", + "gene_exp_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "ade445fe", + "metadata": {}, + "source": [ + "## 1. Basic Histogram Plot\n", + "A basic histogram plot can be created by simply providing the `x` and `y` columns from the DataFrame, along with style options like `title`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d0d34455", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[VueCore] Plot saved to outputs/histogram_plot_basic.png\n" + ] + }, + { + "data": { + "text/html": [ + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Define output file path for the PNG basic histogram\n", + "file_path_basic_hist_png = Path(output_dir) / \"histogram_plot_basic.png\"\n", + "\n", + "# Generate the basic histogram plot\n", + "histogram_plot_basic = create_histogram_plot(\n", + " data=gene_exp_df,\n", + " x=\"Expression\",\n", + " title=\"Distribution of Gene Expression Levels\",\n", + " file_path=file_path_basic_hist_png,\n", + ")\n", + "\n", + "histogram_plot_basic.show()" + ] + }, + { + "cell_type": "markdown", + "id": "f5e16637", + "metadata": {}, + "source": [ + "## 2. Advanced Histogram Plot\n", + "Here is an example of an advanced histogram plot with more descriptive parameters, including `color grouping`, `overlay barmode`, `probability density normalization`, `hover tooltips`, and export to `HTML`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "358e45fe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[VueCore] Plot saved to outputs/histogram_plot_advanced.html\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Define the output file path for the advanced HTML histogram\n", + "file_path_adv_hist_html = Path(output_dir) / \"histogram_plot_advanced.html\"\n", + "\n", + "# Generate the advanced histogram plot\n", + "histogram_plot_adv = create_histogram_plot(\n", + " data=gene_exp_df,\n", + " x=\"Expression\",\n", + " color=\"Condition\",\n", + " barmode=\"overlay\",\n", + " histnorm=\"probability density\",\n", + " title=\"Gene Expression Distribution by Treatment Condition\",\n", + " subtitle=\"Histogram with probability density normalized\",\n", + " labels={\n", + " \"Expression\": \"Gene Expression\",\n", + " \"Condition\": \"Treatment Condition\"\n", + " },\n", + " hover_data=[\"Gene_ID\"],\n", + " opacity=0.75, \n", + " file_path=file_path_adv_hist_html,\n", + ")\n", + "\n", + "histogram_plot_adv.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "vuecore-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/api_examples/histogram_plot.py b/docs/api_examples/histogram_plot.py new file mode 100644 index 0000000..45de99a --- /dev/null +++ b/docs/api_examples/histogram_plot.py @@ -0,0 +1,162 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.17.2 +# kernelspec: +# display_name: vuecore-dev +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Histogram Plot +# +# ![VueCore logo][vuecore_logo] +# +# [![Open In Colab][colab_badge]][colab_link] +# +# [VueCore][vuecore_repo] is a Python package for creating interactive and static visualizations of multi-omics data. +# It is part of a broader ecosystem of tools—including [ACore][acore_repo] for data processing and [VueGen][vuegen_repo] for automated reporting—that together enable end-to-end workflows for omics analysis. +# +# This notebook demonstrates how to generate histogram plots using plotting functions from VueCore. We showcase basic and advanced plot configurations, highlighting key customization options such as grouping, color mapping, text annotations, and export to multiple file formats. +# +# ## Notebook structure +# +# First, we will set up the work environment by installing the necessary packages and importing the required libraries. Next, we will create basic and advanced histogram plots. +# +# 0. [Work environment setup](#0-work-environment-setup) +# 1. [Basic histogram plot](#1-basic-histogram-plot) +# 2. [Advanced histogram plot](#2-advanced-histogram-plot) +# +# ## Credits and Contributors +# +# - This notebook was created by Sebastián Ayala-Ruano under the supervision of Henry Webel and Alberto Santos, head of the [Multiomics Network Analytics Group (MoNA)][Mona] at the [Novo Nordisk Foundation Center for Biosustainability (DTU Biosustain)][Biosustain]. +# - You can find more details about the project in this [GitHub repository][vuecore_repo]. +# +# [colab_badge]: https://colab.research.google.com/assets/colab-badge.svg +# [colab_link]: https://colab.research.google.com/github/Multiomics-Analytics-Group/vuecore/blob/main/docs/api_examples/bar_plot.ipynb +# [vuecore_logo]: https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuecore/main/docs/images/logo/vuecore_logo.svg +# [Mona]: https://multiomics-analytics-group.github.io/ +# [Biosustain]: https://www.biosustain.dtu.dk/ +# [vuecore_repo]: https://github.com/Multiomics-Analytics-Group/vuecore +# [vuegen_repo]: https://github.com/Multiomics-Analytics-Group/vuegen +# [acore_repo]: https://github.com/Multiomics-Analytics-Group/acore + +# %% [markdown] +# ## 0. Work environment setup + +# %% [markdown] +# ### 0.1. Installing libraries and creating global variables for platform and working directory +# +# To run this notebook locally, you should create a virtual environment with the required libraries. If you are running this notebook on Google Colab, everything should be set. + +# %% tags=["hide-output"] +# VueCore library +# %pip install vuecore + +# %% tags=["hide-cell"] +import os + +IN_COLAB = "COLAB_GPU" in os.environ + +# %% tags=["hide-cell"] +# Create a directory for outputs +output_dir = "./outputs" +os.makedirs(output_dir, exist_ok=True) + +# %% [markdown] +# ### 0.2. Importing libraries + +# %% +# Imports +import pandas as pd +import numpy as np +from pathlib import Path +import plotly.io as pio + +from vuecore.plots.basic.histogram import create_histogram_plot + +# Set the Plotly renderer based on the environment +pio.renderers.default = "notebook" + +# %% [markdown] +# ### 0.3. Create sample data +# We create a synthetic dataset simulating gene expression data across two experimental conditions to demonstrate how histograms can visualize data distribution. + +# %% +# Set a random seed for reproducibility of the synthetic data +np.random.seed(42) + +# Define parameters for synthetic gene expression data +num_genes = 1000 +conditions = ["Control", "Treated"] +gene_names = [f"Gene_{i}" for i in range(num_genes)] + +# Simulate expression data with a slight shift in the "Treated" group +expression_values = np.concatenate( + [ + np.random.normal(loc=10, scale=2, size=num_genes // 2), + np.random.normal(loc=12, scale=2, size=num_genes // 2), + ] +) +condition_values = np.concatenate( + [["Control"] * (num_genes // 2), ["Treated"] * (num_genes // 2)] +) + +# Create the DataFrame +gene_exp_df = pd.DataFrame( + { + "Gene_ID": gene_names, + "Expression": expression_values, + "Condition": condition_values, + } +) + +gene_exp_df.head() + +# %% [markdown] +# ## 1. Basic Histogram Plot +# A basic histogram plot can be created by simply providing the `x` and `y` columns from the DataFrame, along with style options like `title`. + +# %% +# Define output file path for the PNG basic histogram +file_path_basic_hist_png = Path(output_dir) / "histogram_plot_basic.png" + +# Generate the basic histogram plot +histogram_plot_basic = create_histogram_plot( + data=gene_exp_df, + x="Expression", + title="Distribution of Gene Expression Levels", + file_path=file_path_basic_hist_png, +) + +histogram_plot_basic.show() + +# %% [markdown] +# ## 2. Advanced Histogram Plot +# Here is an example of an advanced histogram plot with more descriptive parameters, including `color grouping`, `overlay barmode`, `probability density normalization`, `hover tooltips`, and export to `HTML`. + +# %% +# Define the output file path for the advanced HTML histogram +file_path_adv_hist_html = Path(output_dir) / "histogram_plot_advanced.html" + +# Generate the advanced histogram plot +histogram_plot_adv = create_histogram_plot( + data=gene_exp_df, + x="Expression", + color="Condition", + barmode="overlay", + histnorm="probability density", + title="Gene Expression Distribution by Treatment Condition", + subtitle="Histogram with probability density normalized", + labels={"Expression": "Gene Expression", "Condition": "Treatment Condition"}, + hover_data=["Gene_ID"], + opacity=0.75, + file_path=file_path_adv_hist_html, +) + +histogram_plot_adv.show() diff --git a/docs/index.md b/docs/index.md index b4967aa..ef7c76f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -24,6 +24,7 @@ api_examples/scatter_plot api_examples/line_plot api_examples/bar_plot api_examples/box_violin_plot +api_examples/histogram_plot ``` ```{toctree} diff --git a/src/vuecore/constants.py b/src/vuecore/constants.py index 0d80806..a09c186 100644 --- a/src/vuecore/constants.py +++ b/src/vuecore/constants.py @@ -15,6 +15,7 @@ class PlotType(StrEnum): BAR = auto() BOX = auto() VIOLIN = auto() + HISTOGRAM = auto() class EngineType(StrEnum): diff --git a/src/vuecore/engines/plotly/__init__.py b/src/vuecore/engines/plotly/__init__.py index 326c1bc..ef1b23b 100644 --- a/src/vuecore/engines/plotly/__init__.py +++ b/src/vuecore/engines/plotly/__init__.py @@ -6,6 +6,7 @@ from .bar import build as build_bar from .box import build as build_box from .violin import build as build_violin +from .histogram import build as build_histogram from .saver import save # Import build_utils to ensure it's available @@ -19,5 +20,8 @@ register_builder(plot_type=PlotType.BAR, engine=EngineType.PLOTLY, func=build_bar) register_builder(plot_type=PlotType.BOX, engine=EngineType.PLOTLY, func=build_box) register_builder(plot_type=PlotType.VIOLIN, engine=EngineType.PLOTLY, func=build_violin) +register_builder( + plot_type=PlotType.HISTOGRAM, engine=EngineType.PLOTLY, func=build_histogram +) register_saver(engine=EngineType.PLOTLY, func=save) diff --git a/src/vuecore/engines/plotly/histogram.py b/src/vuecore/engines/plotly/histogram.py new file mode 100644 index 0000000..8250ade --- /dev/null +++ b/src/vuecore/engines/plotly/histogram.py @@ -0,0 +1,58 @@ +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go + +from vuecore.schemas.basic.histogram import HistogramConfig +from .theming import apply_histogram_theme +from .plot_builder import build_plot + +# Define parameters handled by the theme script +THEMING_PARAMS = [ + "opacity", + "barmode", + "barnorm", + "histnorm", + "log_x", + "log_y", + "range_x", + "range_y", + "title", + "x_title", + "y_title", + "subtitle", + "template", + "width", + "height", +] + + +def build(data: pd.DataFrame, config: HistogramConfig) -> go.Figure: + """ + Creates a Plotly histogram figure from a DataFrame and a Pydantic configuration. + + This function acts as a bridge between the abstract plot definition and the + Plotly Express implementation. It translates the validated `HistogramConfig` + into the arguments for `plotly.express.histogram` and also forwards any + additional, unvalidated keyword arguments from Plotly. The resulting figure + is then customized with layout and theme settings using `plotly.graph_objects`. + (https://plotly.com/python-api-reference/generated/plotly.express.histogram.html). + + Parameters + ---------- + data : pd.DataFrame + The DataFrame containing the plot data. + config : HistogramConfig + The validated Pydantic model with all plot configurations. + + Returns + ------- + go.Figure + A `plotly.graph_objects.Figure` object representing the histogram. + """ + return build_plot( + data=data, + config=config, + px_function=px.histogram, + theming_function=apply_histogram_theme, + theming_params=THEMING_PARAMS, + ) diff --git a/src/vuecore/engines/plotly/theming.py b/src/vuecore/engines/plotly/theming.py index 619e227..dfba900 100644 --- a/src/vuecore/engines/plotly/theming.py +++ b/src/vuecore/engines/plotly/theming.py @@ -5,6 +5,7 @@ from vuecore.schemas.basic.bar import BarConfig from vuecore.schemas.basic.box import BoxConfig from vuecore.schemas.basic.violin import ViolinConfig +from vuecore.schemas.basic.histogram import HistogramConfig def _get_axis_title(config, axis: str) -> str: @@ -246,3 +247,36 @@ def apply_violin_theme(fig: go.Figure, config: ViolinConfig) -> go.Figure: fig = _apply_common_layout(fig, config) return fig + + +def apply_histogram_theme(fig: go.Figure, config: HistogramConfig) -> go.Figure: + """ + Applies a consistent layout and theme to a Plotly histogram plot. + + This function handles all styling and layout adjustments, such as titles, + dimensions, templates, and trace properties, separating these concerns + from the initial data mapping. + + Parameters + ---------- + fig : go.Figure + The Plotly figure object to be styled. + config : HistogramConfig + The configuration object containing all styling and layout info. + + Returns + ------- + go.Figure + The styled Plotly figure object. + """ + # Apply trace-specific updates for histogram + fig.update_traces( + opacity=config.opacity, + orientation=config.orientation, + selector=dict(type="histogram"), + ) + + # Apply common layout + fig = _apply_common_layout(fig, config) + + return fig diff --git a/src/vuecore/plots/basic/__init__.py b/src/vuecore/plots/basic/__init__.py index b233782..1043771 100644 --- a/src/vuecore/plots/basic/__init__.py +++ b/src/vuecore/plots/basic/__init__.py @@ -1,12 +1,16 @@ # vuecore/plots/basic/__init__.py from .bar import create_bar_plot from .box import create_box_plot +from .histogram import create_histogram_plot from .line import create_line_plot from .scatter import create_scatter_plot +from .violin import create_violin_plot __all__ = [ "create_bar_plot", "create_box_plot", "create_line_plot", "create_scatter_plot", + "create_histogram_plot", + "create_violin_plot", ] diff --git a/src/vuecore/plots/basic/histogram.py b/src/vuecore/plots/basic/histogram.py new file mode 100644 index 0000000..a5327de --- /dev/null +++ b/src/vuecore/plots/basic/histogram.py @@ -0,0 +1,73 @@ +from typing import Any + +import pandas as pd + +from vuecore import EngineType, PlotType +from vuecore.schemas.basic.histogram import HistogramConfig +from vuecore.plots.plot_factory import create_plot +from vuecore.utils.docs_utils import document_pydant_params + + +@document_pydant_params(HistogramConfig) +def create_histogram_plot( + data: pd.DataFrame, + engine: EngineType = EngineType.PLOTLY, + file_path: str = None, + **kwargs, +) -> Any: + """ + Creates, styles, and optionally saves a histogram using the specified engine. + + This function serves as the main entry point for users to generate histograms. + It validates the provided configuration against the `HistogramConfig` schema, + retrieves the appropriate plotting builder and saver functions based on the + selected engine, builds the plot, and optionally saves it to a file. + + Parameters + ---------- + data : pd.DataFrame + The DataFrame containing the data to be plotted. Each row represents + an observation, and columns correspond to variables. + engine : EngineType, optional + The plotting engine to use for rendering the plot. + Defaults to `EngineType.PLOTLY`. + file_path : str, optional + If provided, the path where the final plot will be saved. + The file format is automatically inferred from the file extension + (e.g., '.html', '.png', '.jpeg', '.svg'). Defaults to None, meaning + the plot will not be saved. + + Returns + ------- + Any + The final plot object returned by the selected engine. + For Plotly, this will typically be a `plotly.graph_objects.Figure`. + The exact type depends on the chosen engine. + + Raises + ------ + pydantic.ValidationError + If the provided keyword arguments do not conform to the `HistogramConfig` schema. + e.g., a required parameter is missing or a value has an incorrect type. + ValueError + Raised by the plotting engine (e.g., Plotly Express) if a + column specified in the configuration (e.g., 'x', 'y', 'color') is + not found in the provided DataFrame. + + Examples + -------- + For detailed examples and usage, please refer to the documentation: + + * **Jupyter Notebook:** `docs/api_examples/histogram_plot.ipynb` - + https://vuecore.readthedocs.io/en/latest/api_examples/histogram_plot.html + * **Python Script:** `docs/api_examples/histogram_plot.py` - + https://github.com/Multiomics-Analytics-Group/vuecore/blob/main/docs/api_examples/histogram_plot.py + """ + return create_plot( + data=data, + config=HistogramConfig, + plot_type=PlotType.HISTOGRAM, + engine=engine, + file_path=file_path, + **kwargs, + ) diff --git a/src/vuecore/schemas/basic/histogram.py b/src/vuecore/schemas/basic/histogram.py new file mode 100644 index 0000000..7fc89e8 --- /dev/null +++ b/src/vuecore/schemas/basic/histogram.py @@ -0,0 +1,63 @@ +# vuecore/schemas/basic/histogram.py + +from typing import Dict, Optional +from pydantic import Field, ConfigDict +from vuecore.schemas.plotly_base import PlotlyBaseConfig + + +class HistogramConfig(PlotlyBaseConfig): + """ + Pydantic model for validating and managing histogram plot configurations, + which extends PlotlyBaseConfig. + + This model serves as a curated API for the most relevant parameters + for histogram plots, closely aligned with the `plotly.express.histogram` API + (https://plotly.com/python-api-reference/generated/plotly.express.histogram.html). + + This model includes the most relevant parameters for data mapping, styling, + and layout. It ensures that user-provided configurations are type-safe and + adhere to the expected structure. The plotting function handles parameters + defined here, and also accepts additional Plotly keyword arguments, + forwarding them to the appropriate `plotly.express.histogram` or + `plotly.graph_objects.Figure` call. + """ + + # General Configuration + # Allow extra parameters to pass through to Plotly + model_config = ConfigDict(extra="allow") + + # Data Mapping + pattern_shape: Optional[str] = Field( + None, description="Column to assign pattern shapes to bars." + ) + pattern_shape_map: Optional[Dict[str, str]] = Field( + None, description="Map values to specific pattern shapes." + ) + + # Styling and Layout + marginal: Optional[str] = Field( + None, + description="Adds a marginal subplot ('rug', 'box', 'violin', 'histogram').", + ) + opacity: float = Field(0.8, description="Overall opacity of the bars.") + orientation: Optional[str] = Field( + None, + description="Orientation of the bars ('v' for vertical, 'h' for horizontal).", + ) + barmode: str = Field("relative", description="Mode for grouping bars.") + barnorm: Optional[str] = Field( + None, description="Normalization mode for stacked bars ('fraction', 'percent')." + ) + histnorm: Optional[str] = Field( + None, + description="Normalization mode for the histogram ('percent', 'probability', 'density', 'probability density').", + ) + histfunc: Optional[str] = Field( + "count", + description="Function used to aggregate values ('count', 'sum', 'avg', 'min', 'max').", + ) + cumulative: bool = Field( + False, description="If True, histogram values are cumulative." + ) + nbins: Optional[int] = Field(None, description="Sets the number of bins.") + text_auto: bool = Field(False, description="If True, displays text labels on bars.") diff --git a/tests/test_histogramplot.py b/tests/test_histogramplot.py new file mode 100644 index 0000000..ea28512 --- /dev/null +++ b/tests/test_histogramplot.py @@ -0,0 +1,102 @@ +import pandas as pd +import numpy as np +import pytest +from pathlib import Path + +from vuecore.plots.basic.histogram import create_histogram_plot + + +@pytest.fixture +def sample_histogram_data() -> pd.DataFrame: + """ + Fixture for generating synthetic data for histogram plots, replicating + the code used in the docs/api_examples/histogram_plot.ipynb example. + """ + # Set a random seed for reproducibility of the synthetic data + np.random.seed(42) + + # Define parameters for synthetic gene expression data + num_genes = 1000 + gene_names = [f"Gene_{i}" for i in range(num_genes)] + + # Simulate expression data with a slight shift in the "Treated" group + expression_values = np.concatenate( + [ + np.random.normal(loc=10, scale=2, size=num_genes // 2), + np.random.normal(loc=12, scale=2, size=num_genes // 2), + ] + ) + condition_values = np.concatenate( + [["Control"] * (num_genes // 2), ["Treated"] * (num_genes // 2)] + ) + + # Create the DataFrame + gene_exp_df = pd.DataFrame( + { + "Gene_ID": gene_names, + "Expression": expression_values, + "Condition": condition_values, + } + ) + return gene_exp_df + + +@pytest.mark.parametrize("ext", ["png", "svg", "html", "json"]) +def test_basic_histogram_plot( + sample_histogram_data: pd.DataFrame, tmp_path: Path, ext: str +): + """ + Test basic histogram plot creation, ensuring the figure is returned, + and output files are generated correctly for various formats. + """ + # Define the output path using tmp_path fixture for temporary files + output_path = tmp_path / f"basic_histogram_test.{ext}" + + # Create the basic histogram plot using the VueCore function + fig = create_histogram_plot( + data=sample_histogram_data, + x="Expression", + title="Distribution of Gene Expression Levels", + file_path=str(output_path), + ) + + # Assertions to verify plot creation and file output + assert fig is not None, "Figure object should not be None." + assert output_path.exists(), f"Output file should exist: {output_path}" + assert ( + output_path.stat().st_size > 0 + ), f"Output file should not be empty: {output_path}" + + +@pytest.mark.parametrize("ext", ["png", "svg", "html", "json"]) +def test_advanced_histogram_plot( + sample_histogram_data: pd.DataFrame, tmp_path: Path, ext: str +): + """ + Test advanced histogram plot creation with multiple parameters, + ensuring the figure is returned and output files are generated. + """ + # Define the output path for the advanced plot + output_path = tmp_path / f"advanced_histogram_test.{ext}" + + # Create the advanced histogram plot using the VueCore function + fig = create_histogram_plot( + data=sample_histogram_data, + x="Expression", + color="Condition", + barmode="overlay", + histnorm="probability density", + title="Gene Expression Distribution by Treatment Condition", + subtitle="Histogram with probability density normalized ", + labels={"Expression": "Gene Expression", "Condition": "Treatment Condition"}, + hover_data=["Gene_ID"], + opacity=0.75, + file_path=str(output_path), + ) + + # Assertions to verify plot creation and file output + assert fig is not None, "Figure object should not be None." + assert output_path.exists(), f"Output file should exist: {output_path}" + assert ( + output_path.stat().st_size > 0 + ), f"Output file should not be empty: {output_path}"