Merge pull request #2 from oboril/main

Ready for publication
Bayer-Group · May 27, 2024 · 8376b4f · 8376b4f
2 parents 8978038 + ae42385
commit 8376b4f
Show file tree

Hide file tree

Showing 35 changed files with 604 additions and 339 deletions.
diff --git a/.github/workflows/publish_example_data.yaml b/.github/workflows/publish_example_data.yaml
@@ -0,0 +1,56 @@
+# Deploy example data
+name: Example Data
+
+on:
+  push:
+    branches: ["main"]
+  workflow_dispatch:
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "example-data"
+  cancel-in-progress: false
+
+jobs:
+  deploy-example-data:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      # Prepare data
+      - name: Prepare Data
+        run: |
+          cp -r example_data _temp
+
+      # Configure Git, Important for actions to commit
+      - name: Configure Git
+        run: |
+          git config --global user.email "jan.oboril@gmail.com"
+          git config --global user.name "Jan Oboril"
+
+      # Check if branch exists and create or clean it
+      - name: Prepare Branch
+        run: |
+          git checkout -b example-data
+          git pull origin example-data --rebase || true
+
+
+      # Add the compressed data file
+      - name: Add Data and Commit
+        run: |
+          git rm -rf .
+          mv _temp/* .
+          ls -la
+          rm -rf _temp
+          git add README.md calibration/* reaction_ba_ome_nme2/*
+          git commit -m "Added example data" || true
+
+      # Push the branch to remote
+      - name: Push Changes
+        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          branch: example-data
diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,6 @@ dist
 .mypy_cache
 .ipynb_checkpoints
 */.ipynb_checkpoints/*
-.idea
+.idea
+venv
+example_data/unnest_data.sh
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2023 Jan Oboril
+Copyright (c) 2024 Jan Oboril
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -1,17 +1,149 @@
-# Frontend web app for MOCCA
+# Web App for MOCCA2
 
-### Installation
-Get the latest MOCCA from `!!! TO DO !!!`.
+This Web App is a GUI for automatic HPLC chromatogram processing using the [MOCCA2 Python package](https://github.com/oboril/mocca).
 
-Install Python (3.11.4 has been tested) and the packages according to requirements.txt (`pip install -r requirements.txt`).
+To automate your workflow and get accurate results, MOCCA2 features:
 
-Make sure Python can find the MOCCA package, ideally by adding `[PARENT DIR]/!!! TO DO !!!/src` to `PYTHONPATH`.
+ * support for raw data files from Agilent, Shimadzu and Waters
+ * automatic baseline correction
+ * adaptive peak picking
+ * automatic purity checking and peak deconvolution
+ * compound tracking across chromatograms
+ * fully automatic processing of any number of chromatograms
 
-Start the application by `python app.py` or using the `run.bat` file. The frontend can be then accessed from `http://localhost:8050/`.
 
-### Compiling to executable
+# Installation
+
+Clone this repository.
+
+```
+git clone https://github.com/oboril/MOCCA-frontend
+```
+
+Install Python (versions 3.10, 3.11 or 3.12) and install the required packages.
+
+```
+python -m pip install -r requirements.txt
+```
+
+You can now run the app!
+
+```
+python app.py
+```
+
+The MOCCA front end should open in your web browser. If the window does not open automatically, go to [localhost:8050](http://localhost:8050/).
+
+![Home page screenshot](tutorial_screenshots/home_page.png)
+
+# Getting started
+
+First, install and start the MOCCA2 Web App.
+
+## Example data
+
+This tutorial is using example chromatograms from Knoevenagel condensation ([Haas et al., 2023](https://doi.org/10.1021/acscentsci.2c01042)). You can download this data using:
+
+```
+git clone -b example-data https://github.com/oboril/MOCCA-frontend
+```
+
+The data contains:
+- calibration standards
+    - blank gradient
+    - benzaldehyde, 994.7 uM
+    - 4-anisaldehyde, 1316.2 uM
+    - N,N-dimethylaminobenzaldehyde, 967.6 uM
+- reaction samples
+    - blank gradient
+    - 10 samples taken roughly every 18:12 (min:sec), accurate times are in the file names
+    - the reaction start was at 15:53:30 (first sample was taken after 18:08 min:sec)
+
+
+
+## Uploading data
+
+1. Go to the `Data` page.
+2. Upload blank chromatogram (just gradient without analytes) - this is optional but strongly recommended.
+3. Upload the chromatograms
+   - either calibration standards
+   - or chromatograms with unknown sample composition for analysis
+4. Fill in relevant columns in the table, such as:
+   - **Sample Name**: any name for the chromatogram
+   - **Compound Name**: if this chromatogram is a reference with known compound, please choose a name for the compound
+   - **Concentration**: the concentration of the pure reference compound (if applicable)
+   - **ISTD Concentration**: Concentration of ISTD (if present)
+5. If you are using internal standard, fill in `Name of ISTD` under the table
+
+After uploading all data, don't forget to `Confirm Changes`!
+
+![Upload Page Screenshot](tutorial_screenshots/upload_page.png)
+
+## Processing the chromatograms
+
+1. Go to the `Process` page.
+
+Here you can adjust settings that MOCCA2 uses to process the chromatograms. The description of all settings is at the bottom of the processing page.
+
+In most cases, the default settings will work just fine and you should change only:
+ - **Min and Max Retention Time**: peaks outside these times will be ignored
+ - **Min (Relative) Peak Height**: these parameters determine peak picking sensitivity
+ - **Min Peak Purity**: depending on signal-to-noise ratio in your chromatograms, you might need to decrease (high noise) or increase (small peaks overlapping with large ones) this value
+
+2. Test the settings on some of your chromatograms using `Process Single Sample`
+3. Once you are happy with the settings, click `Process All` to process the entire dataset
+    - Processing all chromatograms can take a few minutes. You can see the progress in the command line
+
+You can also download/upload your favourite settings to reuse them.
+
+![Process Page Screenshot](tutorial_screenshots/process_page.png)
+
+## Inspecting the results
+
+The processed chromatograms, peak areas and calibrated concentrations are in the `Results` page.
+
+![Results Concentrations Page Screenshot](tutorial_screenshots/results_concentrations.png)
+
+The `Chromatograms` tab provides interactive way to visualize the chromatogram:
+- click on the heatmap to change see absorbances at different times and wavelengths
+- click and drag on any of the graphs to zoom in
+- double click to zoom back out
+
+![Results Chromatogram Page Screenshot](tutorial_screenshots/results_chromatogram.png)
+
+## Saving the campaign
+
+At any time, you can go to the `Data` page and download the campaign (all chromatograms, processing settings, and processed results) as a `.mocca2` file using the `Download Campaign` button.
+
+**Limitations**: _Dash_ limits the size of the file that can be downloaded to around 300 MB. It might not be possible to download a campaign if it contains too many chromatograms. The `.mocca2` file is compressed before downloading - this helps, but the download can take a few seconds.
+
+You can then restore the campaign by uploading the `.mocca2` file using `Load Campaign`.
+
+## Exporting data
+
+Any tabular data can be copied from `MOCCA2` directly into another program, for example Excel.
+
+![Data exported and plotted in excel](tutorial_screenshots/excel.png)
+
+# Compiling to executable
+
+If you wish, you can compile everything into a single executable file.
+
+First, install the MOCCA front end and make sure you can start it using `python app.py`.
+
+The app can be compiled using pyinstaller:
+
+```
+python -m pip install pyinstaller
+pyinstaller app.spec
+```
+
+After the command finishes, the executable will be in the `dist` directory.
+
+# Contributing
+
+The process for contributing is outlined in [CONTRIBUTING.md](https://github.com/oboril/MOCCA-frontend/blob/main/CONTRIBUTING.md).
 
-Run `pyinstaller app.spec`. The executable should be created in `dist/mocca.exe`.
 
 # Code Standards
 This section describes the directory structure, file structure, naming of variables, and overall standards for the code.
@@ -27,7 +159,7 @@ Each **page** folder contains the following:
  * all callbacks must be in files `callbacks*.py`
  * other functions (data processing, parsing) should be in separate files
 
-### Code standards
+### General standards
 Imports are in following order:
  * `dataclasses` and `typing`
  * imports from external packages
@@ -43,12 +175,12 @@ Global variables must be stored in flask cache - this is also neccessary because
 
 Cached files can be stored in the `_cache` folder. All information about the cached files must be in `cache.files`.
 
-_Note that the current implementation is not suitable for having multiple clients - file cache needs to be changed later!_
+_Note that the current implementation is not suitable for having multiple clients - file cache needs to be changed!_
 
 ### Running background jobs
 The background callbacks provided by Dash don't work very well with flask-cache and are slow.
 
 Background jobs are thus done using python `threading` and the `Interval` component.
 
-# Naming variables
+### Naming variables
 All IDs of html components must be `[page-name]-[component-type]-[anything else]`, for example `process-dropdown-input-file-type`.
diff --git a/app.py b/app.py
@@ -2,31 +2,34 @@
 This is the entry point for the Dash application
 """
 
-import dash # type: ignore
-import dash_bootstrap_components as dbc # type: ignore
+import dash  # type: ignore
+import dash_bootstrap_components as dbc  # type: ignore
 import webbrowser
 
 # This is for caching global variables
 from flask_caching import Cache
 
 # Do not print request logs
 import logging
-logging.getLogger('werkzeug').setLevel(logging.WARNING)
+
+logging.getLogger("werkzeug").setLevel(logging.WARNING)
 
 # bootstrap theme (https://bootswatch.com/cerulean/)
 external_stylesheets = [dbc.themes.CERULEAN]
-external_scripts = [{
-        'src': 'https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js',
-        'integrity': 'sha384-ka7Sk0Gln4gmtz2MlQnikT1wXgYsOg+OMhuP+IlRH9sENBO0LRn5q+8nbTov4+1p',
-        'crossorigin': 'anonymous'
-    }]
+external_scripts = [
+    {
+        "src": "https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js",
+        "integrity": "sha384-ka7Sk0Gln4gmtz2MlQnikT1wXgYsOg+OMhuP+IlRH9sENBO0LRn5q+8nbTov4+1p",
+        "crossorigin": "anonymous",
+    }
+]
 
 # initialize the application
 app = dash.Dash(
     __name__,
     external_stylesheets=external_stylesheets,
     external_scripts=external_scripts,
-    title="Mocca"
+    title="Mocca",
 )
 server = app.server
 
@@ -35,10 +38,12 @@
 
 # Initialize cache - needed for global variables
 flask_cache = Cache()
-flask_cache.init_app(app.server, config={'CACHE_TYPE':'SimpleCache', "CACHE_DEFAULT_TIMEOUT":1e30})
+flask_cache.init_app(
+    app.server, config={"CACHE_TYPE": "SimpleCache", "CACHE_DEFAULT_TIMEOUT": 1e30}
+)
 
 # define directory for caching files
-CACHE_DIR = '_cache'
+CACHE_DIR = "_cache"
 
 # Pages must be imported after cache and campaign are initialized
 import cache
@@ -51,14 +56,17 @@
 import pages.process
 import pages.results
 
+
 # create callback for loading content for different URL paths
-@app.callback(dash.dependencies.Output('page-content', 'children'),
-              [dash.dependencies.Input('url', 'pathname')])
-def display_page(pathname : str):
+@app.callback(
+    dash.dependencies.Output("page-content", "children"),
+    [dash.dependencies.Input("url", "pathname")],
+)
+def display_page(pathname: str):
     """
     When URL changes, the content of `div#page-content` is updated accordingly
     """
-    if pathname in ['', '/', '/home']:
+    if pathname in ["", "/", "/home"]:
         return pages.home.get_layout()
     elif pathname == "/data":
         return pages.data.get_layout()
@@ -68,19 +76,16 @@ def display_page(pathname : str):
         return pages.results.get_layout()
     else:
         # TODO: add page not found page
-        return None 
+        return None
 
-@app.server.before_first_request
-def initialize():
+
+# start the server
+if __name__ == "__main__":
     # initialize global variables and file caching
     cache.init()
 
     # load the base layout
     app.layout = pages.base_layout.get_layout()
 
-# start the server
-if __name__ == '__main__':
-    # app.run(host='127.0.0.1', debug=True)
-
-    #webbrowser.open_new("http://localhost:8050")
-    app.run(host='127.0.0.1', debug=True)
+    webbrowser.open("http://localhost:8050")
+    app.run(host="127.0.0.1", debug=False, port=8050)