Merge pull request #249 from SCM-NV/rdkit

ENH: Make RDKit an optional dependency for python >= 3.7
SCM-NV · Jul 23, 2021 · 81d98df · 81d98df
2 parents a0a16ce + 7427ea0
commit 81d98df
Show file tree

Hide file tree

Showing 21 changed files with 295 additions and 86 deletions.
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
@@ -1,24 +1,43 @@
 name: build with conda
 
-on: [push]
+on:
+  push:
+    branches:
+      - master
+  pull_request:
 
 jobs:
   build:
-
     runs-on: ${{ matrix.os }}
-
     strategy:
       fail-fast: false
       matrix:
-        os:  [macos-latest, ubuntu-latest]
+        os: [macos-latest, ubuntu-latest]
         version: [3.6, 3.7, 3.8, 3.9]
+        special: ['']
+        include:
+          - os: ubuntu-latest
+            special: 'pre-release'
+            version: 3.9
+          - os: ubuntu-latest
+            special: 'no RDKit'
+            version: 3.9
 
     steps:
+    - name: Cancel Previous Runs
+      uses: styfle/cancel-workflow-action@0.9.0
+      with:
+        access_token: ${{ github.token }}
+
     - uses: actions/checkout@v2
 
-    - name: Optionally Install cp2k
-      if: matrix.os == 'ubuntu-latest'
-      run: sudo apt install cp2k
+    - name: Install cp2k
+      if: matrix.os == 'DISABLE'
+      run: sudo apt install cp2k --fix-missing
+
+    - name: Info CP2K
+      if: matrix.os == 'DISABLE'
+      run: cp2k.popt --version
 
     - name: Setup conda
       uses: s-weigand/setup-conda@v1
@@ -27,10 +46,23 @@ jobs:
         conda-channels: anaconda
 
     - name: Install dependencies
-      run: conda create -n test -c conda-forge python=${{ matrix.version }} h5py rdkit nbsphinx jupyter
+      if: matrix.special == ''
+      run: |
+        conda create -n test -c conda-forge python=${{ matrix.version }} h5py rdkit nbsphinx jupyter
+        source $CONDA/bin/activate test
+        pip install -e .[test]
+
+    - name: Install dependencies (pre-release)
+      if: matrix.special == 'pre-release'
+      run: |
+        conda create -n test -c conda-forge python=${{ matrix.version }} h5py rdkit nbsphinx jupyter
+        source $CONDA/bin/activate test
+        pip install --pre -e .[test] --upgrade --force-reinstall
 
-    - name: Install the package
+    - name: Install dependencies (no RDKit)
+      if: matrix.special == 'no RDKit'
       run: |
+        conda create -n test -c conda-forge python=${{ matrix.version }} h5py nbsphinx jupyter
         source $CONDA/bin/activate test
         pip install -e .[test]
 
@@ -41,11 +73,18 @@ jobs:
       run: conda list -n test
 
     - name: Test with pytest
+      if: matrix.special != 'no RDKit'
       run: |
         source $CONDA/bin/activate test
         pytest -m "not (slow or long)"
 
-    - uses: codecov/codecov-action@v1
+    - name: Test with pytest (no RDKit)
+      if: matrix.special == 'no RDKit'
+      run: |
+        source $CONDA/bin/activate test
+        pytest -m "not (slow or long)" test
+
+    - uses: codecov/codecov-action@v2
       with:
         file: ./coverage.xml
         name: codecov-umbrella
diff --git a/README.rst b/README.rst
@@ -17,9 +17,9 @@ See documentation_ for tutorials and documentation.
 Motivation
 ==========
 Research on modern computational quantum chemistry relies on a set of computational
-tools to carry out calculations. The complexity of the calculations usually requires 
-intercommunication between the aforementioned tools, such communication is usually done 
-through shell scripts that try to automate input/output actions like: launching 
+tools to carry out calculations. The complexity of the calculations usually requires
+intercommunication between the aforementioned tools, such communication is usually done
+through shell scripts that try to automate input/output actions like: launching
 the computations in a cluster, reading the resulting output and feeding the relevant
 numerical result to another program. Such scripts are difficult to maintain and extend,
 requiring a significant programming expertise to work with them. Being then desirable a
@@ -29,7 +29,7 @@ heterogeneous hardware platforms.
 This library tackles the construction and efficient execution of computational chemistry workflows.
 This allows computational chemists to use the emerging massively parallel compute environments in
 an easy manner and focus on interpretation of scientific data rather than on tedious job submission
-procedures and manual data processing. 
+procedures and manual data processing.
 
 Description
 ===========
@@ -52,53 +52,55 @@ Installation
 
 - Download miniconda for python3: miniconda_ (also you can install the complete anaconda_ version).
 
-- Install according to: installConda_. 
+- Install according to: installConda_.
 
 - Create a new virtual environment using the following commands:
 
-  - ``conda create -n qmflows`` 
+  - ``conda create -n qmflows``
 
 - Activate the new virtual environment
-  
+
   - ``source activate qmflows``
 
 To exit the virtual environment type  ``source deactivate``.
-    
-    
+
+
 .. _dependecies:
 
 Dependencies installation
 -------------------------
 
 - Type in your terminal:
 
-  ``conda activate qmflows``  
+  ``conda activate qmflows``
 
-Using the conda environment the following packages should be installed:    
+Using the conda environment the following packages should be installed:
 
 
 - install rdkit_ and h5py_ using conda:
 
   - ``conda install -y -q -c conda-forge rdkit h5py``
-
+
+  - Note that ``rdkit`` is optional for Python 3.7 and later.
+
 .. _installation:
 
 Package installation
 --------------------
 Finally install the package:
-    
+
 - Install **QMFlows** using pip:
   - ``pip install git+https://github.com/SCM-NV/qmflows@master#egg=qmflows``
 
-Now you are ready to use *qmflows*.  
+Now you are ready to use *qmflows*.
 
 
   **Notes:**
 
   - Once the libraries and the virtual environment are installed, you only need to type
     ``conda activate qmflows`` each time that you want to use the software.
 
-    
+
 .. _documentation: https://qmflows.readthedocs.io/en/latest/
 .. _miniconda: https://docs.conda.io/en/latest/miniconda.html
 .. _anaconda: https://www.anaconda.com/distribution/#download-section

diff --git a/src/qmflows/__init__.py b/src/qmflows/__init__.py
@@ -1,25 +1,21 @@
 """QMFlows API."""
 
 import sys
-from typing import TYPE_CHECKING
+import types
+import importlib as _importlib
+from typing import TYPE_CHECKING, Any
 
 from .__version__ import __version__
 
 from .logger import logger
 
 from .utils import InitRestart
 
-from .components import (
-    Angle, Dihedral, Distance,
-    find_first_job, select_max, select_min)
-
 from .packages import (
     adf, cp2k, cp2k_mm, dftb, orca, run, PackageWrapper)
 
 from . import templates
 from .settings import Settings
-from .examples import (example_H2O2_TS, example_freqs, example_generic_constraints,
-                       example_partial_geometry_opt)
 
 __all__ = [
     '__version__',
@@ -34,22 +30,53 @@
 
 if TYPE_CHECKING or sys.version_info < (3, 7):
     from .templates import freq, geometry, singlepoint, ts, md, cell_opt
+    from .components import (
+        Angle, Dihedral, Distance, find_first_job, select_max, select_min
+    )
+    from .examples import (
+        example_H2O2_TS, example_freqs, example_generic_constraints, example_partial_geometry_opt
+    )
+    from . import components, examples
 else:
     _TEMPLATES = frozenset(templates.__all__)
+    _REQUIRES_RDKIT = types.MappingProxyType({
+        "components": "qmflows.components",
+        "Angle": "qmflows.components",
+        "Dihedral": "qmflows.components",
+        "Distance": "qmflows.components",
+        "find_first_job": "qmflows.components",
+        "select_max": "qmflows.components",
+        "select_min": "qmflows.components",
+        "examples": "qmflows.examples",
+        "example_H2O2_TS": "qmflows.examples",
+        "example_freqs": "qmflows.examples",
+        "example_generic_constraints": "qmflows.examples",
+        "example_partial_geometry_opt": "qmflows.examples",
+    })
+
     _DIR_CACHE: "None | list[str]" = None
 
-    def __getattr__(name: str) -> Settings:
+    def __getattr__(name: str) -> Any:
         """Ensure that the qmflows templates are always copied before returning."""
         if name in _TEMPLATES:
             return getattr(templates, name).copy()
-        raise AttributeError(f"module {__name__} has no attribute {name}")
+
+        # Lazily load (and cache) the content of `qmflows.examples` and `
+        # qmflows.components` in order to avoid directly importing RDKit
+        module_name = _REQUIRES_RDKIT.get(name)
+        if module_name is not None:
+            globals()[module_name] = module = _importlib.import_module(module_name)
+            globals()[name] = ret = getattr(module, name, module)
+            return ret
+        raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
     def __dir__() -> "list[str]":
         """Manually insert the qmflows templates into :func:`dir`."""
         global _DIR_CACHE
         if _DIR_CACHE is None:
-            _DIR_CACHE = sorted(list(globals()) + templates.__all__)
+            _DIR_CACHE = list(globals()) + templates.__all__ + list(_REQUIRES_RDKIT)
+            _DIR_CACHE.sort()
         return _DIR_CACHE
 
 # Clean up the namespace
-del sys, TYPE_CHECKING
+del sys, types, TYPE_CHECKING, Any
diff --git a/src/qmflows/packages/package_wrapper.py b/src/qmflows/packages/package_wrapper.py
@@ -103,15 +103,14 @@
 from warnings import warn
 
 from scm import plams
-from rdkit import Chem
 from noodles import has_scheduled_methods, schedule
 
 from .packages import Package, Result, load_properties
 from .SCM import adf, dftb
 from .orca import orca
 from .cp2k_package import cp2k
 from ..settings import Settings
-from ..type_hints import _Settings
+from ..type_hints import _Settings, MolType
 from ..warnings_qmflows import Key_Warning
 
 plams.Job = plams.core.basejob.Job
@@ -216,7 +215,7 @@ def __reduce__(self: PT) -> Tuple[Type[PT], Tuple[Type[JT], str]]:
 
     @schedule(display="Running {self.pkg_name} {job_name}...", store=True, confirm=True)
     def __call__(self, settings: Settings,
-                 mol: Union[plams.Molecule, Chem.Mol],
+                 mol: MolType,
                  job_name: str = '', **kwargs: Any) -> RT:
         """If possible, call :meth:`__call__()` of the Package instance appropiate to :attr:`PackageWrapper.job_type`.
 

diff --git a/src/qmflows/packages/packages.py b/src/qmflows/packages/packages.py
@@ -19,15 +19,13 @@
 import numpy as np
 import pandas as pd
 import pkg_resources as pkg
-import scm.plams.interfaces.molecule.rdkit as molkit
 from more_itertools import collapse
 from noodles import has_scheduled_methods, schedule, serial
 from noodles.run.threading.sqlite3 import run_parallel
 from noodles.serial import AsDict, Registry
 from noodles.serial.numpy import SerNumpyScalar, arrays_to_hdf5
 from noodles.serial.path import SerPath
 from noodles.serial.reasonable import SerReasonableObject
-from rdkit import Chem
 from scm import plams
 
 from .serializer import SerMolecule, SerMol, SerSettings, SerNDFrame, SerReduce
@@ -38,6 +36,14 @@
 from ..settings import _Settings as _SettingsType
 from ..warnings_qmflows import QMFlows_Warning
 
+try:
+    from rdkit import Chem
+    from scm.plams import from_rdmol
+except ImportError:
+    Chem = None
+    def from_rdmol(mol: plams.Molecule) -> plams.Molecule:
+        return mol
+
 __all__ = ['Package', 'run', 'registry', 'Result', 'SerMolecule', 'SerSettings']
 
 
@@ -345,8 +351,7 @@ def __call__(self, settings: Settings,
             #  Check if plams finishes normally
             try:
                 # If molecule is an RDKIT molecule translate it to plams
-                plams_mol = molkit.from_rdmol(
-                    mol) if isinstance(mol, Chem.Mol) else mol
+                plams_mol = from_rdmol(mol)
 
                 if job_name != '':
                     kwargs['job_name'] = job_name
@@ -636,23 +641,26 @@ def call_default(wf: PromisedObject, n_processes: int, always_cache: bool) -> Re
         db_file=db_file, always_cache=always_cache, echo_log=False)
 
 
+_REGISTRY_TYPES = {
+    Package: SerReduce(Package),
+    Path: SerPath(),
+    plams.Molecule: SerMolecule(),
+    Result: AsDict(Result),
+    Settings: SerSettings(),
+    plams.KFFile: SerReasonableObject(plams.KFFile),
+    plams.KFReader: SerReasonableObject(plams.KFReader),
+    np.floating: SerNumpyScalar(),
+    np.integer: SerNumpyScalar(),
+    pd.DataFrame: SerNDFrame(pd.DataFrame),
+    pd.Series: SerNDFrame(pd.Series),
+}
+if Chem is not None:
+    _REGISTRY_TYPES[Chem.Mol] = SerMol()
+
 #: A :class:`Registry` instance to-be returned by :func:`registry`.
-REGISTRY: Registry = Registry(
+REGISTRY = Registry(
     parent=serial.base() + arrays_to_hdf5(),
-    types={
-        Package: SerReduce(Package),
-        Path: SerPath(),
-        plams.Molecule: SerMolecule(),
-        Chem.Mol: SerMol(),
-        Result: AsDict(Result),
-        Settings: SerSettings(),
-        plams.KFFile: SerReasonableObject(plams.KFFile),
-        plams.KFReader: SerReasonableObject(plams.KFReader),
-        np.floating: SerNumpyScalar(),
-        np.integer: SerNumpyScalar(),
-        pd.DataFrame: SerNDFrame(pd.DataFrame),
-        pd.Series: SerNDFrame(pd.Series),
-    }
+    types=_REGISTRY_TYPES,
 )