Multiomics-Analytics-Group · enryH · Jul 1, 2025 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/.github/workflows/cdci.yml b/.github/workflows/cdci.yml
@@ -118,6 +118,14 @@ jobs:
             echo "Error: One or more protected files have been modified."
             exit 1
           fi
+      - name: check streamlit report files for chatbot API
+        run: |
+          vuegen -c docs/example_config_files/Chatbot_example_config.yaml -output_dir tests/report_examples/chat_bot
+          if git diff tests/report_examples | grep .; then
+            echo Failed for report: $format
+            echo "Error: One or more protected files have been modified."
+            exit 1
+          fi
       - name: check for changes in report files
         run: |
           # write streamlit report to test folder 

diff --git a/.gitignore b/.gitignore
@@ -116,7 +116,7 @@ cython_debug/
 # Temporary files
 logs/
 vuegen/logs/
-./streamlit_report/
+streamlit_report/
 !tests/report_examples
 quarto_report/
 output_docker/

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+  "editor.rulers": [88, 100, 120]
+}
diff --git a/docs/README.md b/docs/README.md
@@ -1,10 +1,10 @@
 # Docs creation
 
-In order to build the docs you need to 
+In order to build the docs you need to
 
-  1. Install sphinx and additional support packages
-  2. Build the package reference files
-  3. Run sphinx to create a local html version
+1. Install sphinx and additional support packages
+2. Build the package reference files
+3. Run sphinx to create a local html version
 
 The documentation is build using readthedocs automatically.
 
@@ -18,12 +18,13 @@ poetry install --with docs
 
 ## Build docs using Sphinx command line tools
 
-Command to be run from `path/to/docs`, i.e. from within the `docs` package folder: 
+Command to be run from `path/to/docs`, i.e. from within the `docs` package folder:
 
 Options:
-  - `--separate` to build separate pages for each (sub-)module
 
-```bash	
+- `--separate` to build separate pages for each (sub-)module
+
+```bash
 # pwd: docs
 # apidoc
 sphinx-apidoc --force --implicit-namespaces --module-first -o reference ../src/vuegen
@@ -38,4 +39,3 @@ The README is included in the `Overview` section of the docs. We created a [Pyth
 Relative links are used in the main README, which need to be resolved when building. It's
 possible to include the a `relative-docs` option if one uses `index.md` ([see docs](https://myst-parser.readthedocs.io/en/latest/faq/index.html#include-a-file-from-outside-the-docs-folder-like-readme-md)). This does not work
 with `href` links, only native markdown links.
-
diff --git a/docs/example_config_files/Chatbot_example_config.yaml b/docs/example_config_files/Chatbot_example_config.yaml
@@ -1,7 +1,7 @@
 report:
   title: Chatbot example
   description: >
-    A chatbot exaple.
+    A chatbot example.
 sections:
   - title: ChatBot test
     subsections:

diff --git a/docs/example_config_files/Earth_microbiome_vuegen_demo_notebook_config.yaml b/docs/example_config_files/Earth_microbiome_vuegen_demo_notebook_config.yaml
@@ -1,16 +1,16 @@
 report:
   title: Earth Microbiome Vuegen Demo Notebook
-  description: "The Earth Microbiome Project (EMP) is a systematic attempt to characterize\
-    \ global microbial taxonomic and functional diversity for the benefit of the planet\
-    \ and humankind. \n  It aimed to sample the Earth\u2019s microbial communities\
-    \ at an unprecedented scale in order to advance our understanding of the organizing\
-    \ biogeographic principles that govern microbial community structure. \n  The\
-    \ EMP dataset is generated from samples that individual researchers have compiled\
-    \ and contributed to the EMP. \n  The result is both a reference database giving\
-    \ global context to DNA sequence data and a framework for incorporating data from\
-    \ future studies, fostering increasingly complete characterization of Earth\u2019\
-    s microbial diversity.\n  \n  You can find more information about the Earth Microbiome\
-    \ Project at https://earthmicrobiome.org/ and in the [original article](https://www.nature.com/articles/nature24621).\n"
+  description: >
+    The Earth Microbiome Project (EMP) is a systematic attempt to characterize global
+    microbial taxonomic and functional diversity for the benefit of the planet and humankind.
+    It aimed to sample the Earth’s microbial communities at an unprecedented scale in order to
+    advance our understanding of the organizing biogeographic principles that govern microbial
+    community structure. The EMP dataset is generated from samples that individual researchers
+    have compiled and contributed to the EMP. The result is both a reference database giving
+    global context to DNA sequence data and a framework for incorporating data from future
+    studies, fostering increasingly complete characterization of Earth’s microbial diversity.
+    You can find more information about the Earth Microbiome Project at https://earthmicrobiome.org/
+    and in the original article at https://www.nature.com/articles/nature24621.
   graphical_abstract: https://raw.githubusercontent.com/ElDeveloper/cogs220/master/emp-logo.svg
   logo: https://raw.githubusercontent.com/ElDeveloper/cogs220/master/emp-logo.svg
 sections:

diff --git a/docs/vuegen_basic_case_study_configfile.md b/docs/vuegen_basic_case_study_configfile.md
@@ -1,13 +1,13 @@
 # Predefined Directory Case Study - Configuration File
 
-The [configuration file](https://github.com/Multiomics-Analytics-Group/vuegen/blob/main/docs/example_config_files/Basic_example_vuegen_demo_notebook_config.yaml) of the basic case study using a predefined directory is presented below: 
+The [configuration file](https://github.com/Multiomics-Analytics-Group/vuegen/blob/main/docs/example_config_files/Basic_example_vuegen_demo_notebook_config.yaml) of the basic case study using a predefined directory is presented below:
 
 ```yaml
 report:
   title: Basic Example Vuegen Demo Notebook
   description: A general description of the report.
-  graphical_abstract: https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuegen/main/docs/images/vuegen_logo.svg
-  logo: https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuegen/main/docs/images/vuegen_logo.svg
+  graphical_abstract: https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuegen/main/docs/images/vuegen_logo.png
+  logo: https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuegen/main/docs/images/vuegen_logo.png
 sections:
 - title: Plots
   description: This section contains example plots.

diff --git a/docs/vuegen_earth_microbiome_case_study_configfile.md b/docs/vuegen_earth_microbiome_case_study_configfile.md
@@ -5,17 +5,17 @@ The [configuration file](https://github.com/Multiomics-Analytics-Group/vuegen/bl
 ```yaml
 report:
   title: Earth Microbiome Vuegen Demo Notebook
-  description: "The Earth Microbiome Project (EMP) is a systematic attempt to characterize\
-    \ global microbial taxonomic and functional diversity for the benefit of the planet\
-    \ and humankind. \n  It aimed to sample the Earth\u2019s microbial communities\
-    \ at an unprecedented scale in order to advance our understanding of the organizing\
-    \ biogeographic principles that govern microbial community structure. \n  The\
-    \ EMP dataset is generated from samples that individual researchers have compiled\
-    \ and contributed to the EMP. \n  The result is both a reference database giving\
-    \ global context to DNA sequence data and a framework for incorporating data from\
-    \ future studies, fostering increasingly complete characterization of Earth\u2019\
-    s microbial diversity.\n  \n  You can find more information about the Earth Microbiome\
-    \ Project at https://earthmicrobiome.org/ and in the [original article](https://www.nature.com/articles/nature24621).\n"
+  description: >
+    The Earth Microbiome Project (EMP) is a systematic attempt to characterize global
+    microbial taxonomic and functional diversity for the benefit of the planet and humankind.
+    It aimed to sample the Earth’s microbial communities at an unprecedented scale in order to
+    advance our understanding of the organizing biogeographic principles that govern microbial
+    community structure. The EMP dataset is generated from samples that individual researchers
+    have compiled and contributed to the EMP. The result is both a reference database giving
+    global context to DNA sequence data and a framework for incorporating data from future
+    studies, fostering increasingly complete characterization of Earth’s microbial diversity.
+    You can find more information about the Earth Microbiome Project at https://earthmicrobiome.org/
+    and in the original article at https://www.nature.com/articles/nature24621.
   graphical_abstract: https://raw.githubusercontent.com/ElDeveloper/cogs220/master/emp-logo.svg
   logo: https://raw.githubusercontent.com/ElDeveloper/cogs220/master/emp-logo.svg
 sections:
@@ -128,8 +128,7 @@ sections:
       component_type: PLOT
       plot_type: STATIC
   - title: Shanon entropy analysis
-    description: This subsection contains the Shannon entropy analysis of the EMP
-    dataset.
+    description: This subsection contains the Shannon entropy analysis of the EMP dataset.
     components:
     - title: Specificity of sequences and higher taxonomic groups for environment
       file_path: https://raw.githubusercontent.com/biocore/emp/master/methods/images/figure4_entropy.png

diff --git a/pyproject.toml b/pyproject.toml
@@ -74,3 +74,20 @@ vuegen = "vuegen.__main__:main"
 
 [tool.isort]
 profile = "black"
+
+[tool.jupytext]
+formats = "ipynb,py:percent"
+
+[tool.ruff]
+# Allow lines to be as long as:
+line-length = 88
+
+[tool.ruff.lint]
+# https://docs.astral.sh/ruff/tutorial/#rule-selection
+# 1. Enable flake8-bugbear (`B`) rules
+# 2. Enable pycodestyle (`E`) errors and (`W`) warnings
+# 3. Pyflakes (`F`) errors
+extend-select = ["E", "W", 'F', 'B']
+
+[tool.black]
+line-length = 88
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,4 @@
+[flake8]
+exclude = docs
+max-line-length = 88
+aggressive = 2
diff --git a/src/vuegen/__init__.py b/src/vuegen/__init__.py
@@ -1 +1,6 @@
+"""VueGen automates the creation of reports from bioinformatics outputs,
+supporting formats like PDF, HTML, DOCX, ODT, PPTX, Reveal.js, Jupyter notebooks,
+and Streamlit web applications. Users simply provide a directory with output files
+and VueGen compiles them into a structured report."""
+
 __version__ = "1.0.0"
diff --git a/src/vuegen/__main__.py b/src/vuegen/__main__.py
@@ -1,3 +1,5 @@
+"""Command-line interface for VueGen report generation."""
+
 import sys
 from pathlib import Path
 

diff --git a/src/vuegen/config_manager.py b/src/vuegen/config_manager.py
@@ -1,3 +1,7 @@
+"""ConfigManage creates configuration files from folders and can create components
+for reports from YAML config files.
+"""
+
 import json
 import logging
 import os
@@ -10,7 +14,8 @@
 
 class ConfigManager:
     """
-    Class for handling metadata of reports from YAML config file and creating report objects.
+    Class for handling metadata of reports from YAML config file and creating report
+    objects.
     """
 
     def __init__(self, logger: Optional[logging.Logger] = None, max_depth: int = 2):
@@ -20,10 +25,11 @@ def __init__(self, logger: Optional[logging.Logger] = None, max_depth: int = 2):
         Parameters
         ----------
         logger : logging.Logger, optional
-            A logger instance for the class. If not provided, a default logger will be created.
+            A logger instance for the class.
+            If not provided, a default logger will be created.
         max_depth : int, optional
-            The maximum depth of the directory structure to consider when generating the report
-            config from a directory.
+            The maximum depth of the directory structure to consider when generating
+            the report config from a directory.
             The default is 2, which means it will include sections and subsections.
         """
         if logger is None:
@@ -53,7 +59,8 @@ def _create_title_fromdir(self, file_dirname: str) -> str:
 
     def _create_component_config_fromfile(self, file_path: Path) -> Dict[str, str]:
         """
-        Infers a component config from a file, including component type, plot type, and additional fields.
+        Infers a component config from a file, including component type, plot type,
+        and additional fields.
 
         Parameters
         ----------
@@ -144,21 +151,27 @@ def _create_component_config_fromfile(self, file_path: Path) -> Dict[str, str]:
                 else:
                     component_config["plot_type"] = r.PlotType.PLOTLY.value
             except Exception as e:
-                self.logger.warning(f"Could not parse JSON file {file_path}: {e}")
+                self.logger.warning(
+                    "Could not parse JSON file %s: %s", file_path, e, exc_info=True
+                )
                 component_config["plot_type"] = "unknown"
         elif file_ext == ".md":
             component_config["component_type"] = r.ComponentType.MARKDOWN.value
         else:
+            if not file_ext:
+                # hidden files starting with a dot
+                file_ext = file_path.name
             self.logger.error(
-                f"Unsupported file extension: {file_ext}. Skipping file: {file_path}"
+                "Unsupported file extension: %s. Skipping file: %s", file_ext, file_path
             )
             return None
 
         return component_config
 
     def _sort_paths_by_numprefix(self, paths: List[Path]) -> List[Path]:
         """
-        Sorts a list of Paths by numeric prefixes in their names, placing non-numeric items at the end.
+        Sorts a list of Paths by numeric prefixes in their names, placing non-numeric
+        items at the end.
 
         Parameters
         ----------
@@ -239,7 +252,8 @@ def _create_subsect_config_fromdir(
                     continue
                 # components are added to subsection
                 # ! Alternatively, one could add (sub-)sections to the subsection
-                # ? Then one could remove differentiation between sections and subsections
+                # ? Then one could remove differentiation between sections and
+                # ? subsections
                 nested_components = self._create_subsect_config_fromdir(file, level + 1)
                 components.extend(nested_components["components"])
 
@@ -298,7 +312,8 @@ def create_yamlconfig_fromdir(
         self, base_dir: str
     ) -> Tuple[Dict[str, Union[str, List[Dict]]], Path]:
         """
-        Generates a YAML-compatible config file from a directory. It also returns the resolved folder path.
+        Generates a YAML-compatible config file from a directory. It also returns the
+        resolved folder path.
 
         Parameters
         ----------
@@ -361,7 +376,8 @@ def create_yamlconfig_fromdir(
 
     def initialize_report(self, config: dict) -> tuple[r.Report, dict]:
         """
-        Extracts report metadata from a YAML config file and returns a Report object and the raw metadata.
+        Extracts report metadata from a YAML config file and returns a Report object and
+        the raw metadata.
 
         Parameters
         ----------
@@ -371,7 +387,8 @@ def initialize_report(self, config: dict) -> tuple[r.Report, dict]:
         Returns
         -------
         report, config : tuple[Report, dict]
-            A tuple containing the Report object created from the YAML config file and the raw metadata dictionary.
+            A tuple containing the Report object created from the YAML config file and
+            the raw metadata dictionary.
 
         Raises
         ------
@@ -396,7 +413,9 @@ def initialize_report(self, config: dict) -> tuple[r.Report, dict]:
             report.sections.append(section)
 
         self.logger.info(
-            f"Report '{report.title}' initialized with {len(report.sections)} sections."
+            "Report '%s' initialized with %d sections.",
+            report.title,
+            len(report.sections),
         )
         return report, config
 
@@ -472,7 +491,8 @@ def _create_component(self, component_data: dict) -> r.Component:
         Returns
         -------
         Component
-            A Component object (Plot, DataFrame, or Markdown) populated with the provided metadata.
+            A Component object (Plot, DataFrame, or Markdown) populated with the
+            provided metadata.
         """
         # Determine the component type
         component_type = assert_enum_value(
@@ -620,8 +640,10 @@ def _create_apicall_component(self, component_data: dict) -> r.APICall:
             try:
                 parsed_body = json.loads(request_body)
             except json.JSONDecodeError as e:
-                self.logger.error(f"Failed to parse request_body JSON: {e}")
-                raise ValueError(f"Invalid JSON in request_body: {e}")
+                self.logger.error(
+                    "Failed to parse request_body JSON: %s", e, exc_info=True
+                )
+                raise ValueError("Invalid JSON in request_body.") from e
 
         return r.APICall(
             title=component_data["title"],

diff --git a/src/vuegen/constants.py b/src/vuegen/constants.py
@@ -0,0 +1,11 @@
+"""Constants for the Vuegen project."""
+
+GITHUB_ORG_URL = "https://github.com/Multiomics-Analytics-Group"
+ORG = "Multiomics Network Analytics Group (MoNA)"
+GITHUB_ORG_URL_BRACKETS = "{https://github.com/Multiomics-Analytics-Group}"
+REPO_URL = "https://github.com/Multiomics-Analytics-Group/vuegen"
+LOGO_URL = (
+    "https://raw.githubusercontent.com/Multiomics-Analytics-Group/"
+    "vuegen/main/docs/images/vuegen_logo.svg"
+)
+TIMEOUT: int = 60