Merge pull request #298 from SCM-NV/orb

ENH: Add two new fields to CP2K MO named tuple
SCM-NV · May 18, 2022 · cd03c1c · cd03c1c
2 parents b9c2795 + 3660a3f
commit cd03c1c
Show file tree

Hide file tree

Showing 9 changed files with 88 additions and 27 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,13 @@
-# Version 0.12.1 (*unreleased*)
+# Version 0.12.1 (18/05/2022)
 
 ## New
- * *placeholder*.
+ * Add the MO index and occupation numbers to the CP2K orbital output.
+
+## Changed
+ * Explicitly raise when the line with the number of orbitals doesn't have any actual orbitals.
+
+## Fixed
+ * Fixed various bugs related to the parsing of unrestricted orbitals.
 
 
 # Version 0.12.0 (13/04/2022)

diff --git a/CITATION.cff b/CITATION.cff
@@ -1,8 +1,9 @@
 # YAML 1.2
 # Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/)
-cff-version: 1.0.3
+cff-version: 1.2.0
 message: If you use this software, please cite it as below.
 title: QMflows
+abstract: QMflows library tackles the construction and efficient execution of computational chemistry workflows.
 doi: 10.5281/zenodo.1045523
 authors:
 - given-names: Felipe
@@ -22,7 +23,7 @@ keywords:
  - materials-science
  - python
  - Workflows
-version: '0.12.0'
-date-released: 2022-04-13  # YYYY-MM-DD
+version: '0.12.1'
+date-released: 2022-05-18  # YYYY-MM-DD
 repository-code: https://github.com/SCM-NV/qmflows
 license: "LGPL-3.0"
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,3 @@
+exclude test/*
+include src/qmflows/py.typed
+include src/qmflows/data/dictionaries/*yaml
diff --git a/setup.cfg b/setup.cfg
@@ -1,7 +1,3 @@
-[metadata]
-description_file = README.rst
-license_file = LICENSE.md
-
 [aliases]
 # Define `python setup.py test`
 test = pytest

diff --git a/src/qmflows/_version.py b/src/qmflows/_version.py
@@ -1,3 +1,3 @@
 """The QMFlows version."""
 
-__version__ = "0.12.1.dev0"
+__version__ = "0.12.1"
diff --git a/src/qmflows/common.py b/src/qmflows/common.py
@@ -9,7 +9,7 @@
 else:
     ParserElement = 'pyparsing.ParserElement'
 
-__all__ = ['AtomBasisKey', 'AtomBasisData', 'AtomXYZ', 'CGF',
+__all__ = ['AtomBasisKey', 'AtomBasisData', 'AtomXYZ', 'CGF', 'CP2KInfoMO',
            'InfoMO', 'MO_metadata', 'ParseWarning', 'CP2KVersion']
 
 
@@ -61,16 +61,60 @@ class InfoMO(NamedTuple):
     #: Orbital eigenvectors.
     eigenvectors: np.ndarray
 
+
+class CP2KInfoMO(NamedTuple):
+    """Energies and coefficients of the CP2K molecular orbitals."""
+
+    #: Orbital eigenvalues.
+    eigenvalues: np.ndarray
+
+    #: Orbital eigenvectors.
+    eigenvectors: np.ndarray
+
+    #: MO indices.
+    orb_index: np.ndarray
+
+    #: Occupation numbers.
+    occupation: np.ndarray
+
+    def get_nocc_nvirt(self, threshold: "None | float" = None) -> "tuple[int, int]":
+        """Return the number of occupied and virtual orbitals within the MO range spanned \
+        by this instance.
+
+        Parameters
+        ----------
+        threshold : None | float
+            The occupation number threshold for determining what consitutes an occupied orbital.
+            If ``None`` assume that all occupied orbitals are defined by a non-zero occupation
+            number (*e.g.* canonical orbitals).
+
+        Returns
+        -------
+        tuple[int, int]
+            A 2-tuple with the number of occupied and virtual orbitals.
+
+        """
+        if threshold is None:
+            is_occ = self.occupation != 0
+        else:
+            is_occ = self.occupation >= threshold
+        nocc = is_occ.sum().item()
+        return nocc, len(self.occupation) - nocc
+
     def __array__(self, dtype: "None | np.dtype" = None) -> np.ndarray:
         """Convert this instance into a structured array."""
         struc_dtype = np.dtype([
             ("eigenvalues", "f8"),
             ("eigenvectors", "f8", (self.eigenvectors.shape[0],)),
+            ("orb_index", "i8"),
+            ("occupation", "f8"),
         ])
 
         ret = np.empty(self.eigenvalues.shape[0], dtype=struc_dtype)
         ret["eigenvalues"] = self.eigenvalues
         ret["eigenvectors"] = np.abs(self.eigenvectors.T)
+        ret["orb_index"] = self.orb_index
+        ret["occupation"] = self.occupation
         return ret if dtype is None else ret.astype(dtype)
 
 

diff --git a/src/qmflows/packages/_cp2k.py b/src/qmflows/packages/_cp2k.py
@@ -13,7 +13,7 @@
 from .._settings import Settings
 from ..warnings_qmflows import cp2k_warnings, Key_Warning
 from ..type_hints import Final, _Settings, Generic2Special
-from ..common import InfoMO
+from ..common import CP2KInfoMO
 
 if TYPE_CHECKING:
     from numpy.typing import NDArray
@@ -141,7 +141,7 @@ class CP2K_Result(Result):
     geometry: "None | plams.Molecule"
     enthalpy: "None | float"
     free_energy: "None | float"
-    orbitals: "None | InfoMO | tuple[InfoMO, InfoMO]"
+    orbitals: "None | CP2KInfoMO | tuple[CP2KInfoMO, CP2KInfoMO]"
     forces: "None | NDArray[f8]"
     coordinates: "None | NDArray[f8]"
     temperature: "None | NDArray[f8]"

diff --git a/src/qmflows/parsers/_cp2k_orbital_parser.py b/src/qmflows/parsers/_cp2k_orbital_parser.py
@@ -10,15 +10,15 @@
 
 import numpy as np
 
-from ..common import InfoMO, MO_metadata
+from ..common import CP2KInfoMO, MO_metadata
 
 __all__ = ["read_cp2k_coefficients"]
 
 
 def read_cp2k_coefficients(
     path_mos: "str | os.PathLike[str]",
     plams_dir: "None | str | os.PathLike[str]" = None,
-) -> "InfoMO | tuple[InfoMO, InfoMO]":
+) -> "CP2KInfoMO | tuple[CP2KInfoMO, CP2KInfoMO]":
     """Read the MO's from the CP2K output.
 
     First it reads the number of ``Orbitals`` and ``Orbital`` functions from the
@@ -39,7 +39,7 @@ def read_cp2k_coefficients(
 def read_log_file(
     path: "str | os.PathLike[str]",
     orbitals_info: MO_metadata,
-) -> "InfoMO | tuple[InfoMO, InfoMO]":
+) -> "CP2KInfoMO | tuple[CP2KInfoMO, CP2KInfoMO]":
     """
     Read the orbitals from the Log file.
 
@@ -58,9 +58,9 @@ def read_log_file(
 
     Returns
     -------
-    InfoMO or tuple[InfoMO, InfoMO]
+    CP2KInfoMO or tuple[CP2KInfoMO, CP2KInfoMO]
         Molecular orbitals and orbital energies.
-        Returns a tuple of two ``InfoMO`` objects if its an unrestricted calculation.
+        Returns a tuple of two ``CP2KInfoMO`` objects if its an unrestricted calculation.
 
     """
     n_orb_list = orbitals_info.nOrbitals
@@ -114,7 +114,7 @@ def read_coefficients(
     n_orb: int,
     n_orb_funcs: int,
     start: int = 0,
-) -> InfoMO:
+) -> CP2KInfoMO:
     """Read the coefficients from the plain text output.
 
     MO coefficients are stored in Column-major order.
@@ -146,6 +146,8 @@ def read_coefficients(
     """
     energies = np.empty(n_orb, dtype=np.float64)
     coefs = np.empty((n_orb_funcs, n_orb), dtype=np.float64)
+    orb_index = np.empty(n_orb, dtype=np.int64)
+    occupation = np.empty(n_orb, dtype=np.float64)
 
     j0 = 0
     j1 = 0
@@ -159,13 +161,15 @@ def read_coefficients(
                 break
 
             j1 += len(headers)
+            orb_index[j0:j1] = headers
             energies[j0:j1] = next(iterator).split()
-            coefs[..., j0:j1] = [i.split()[4:] for i in islice(iterator, 1, 1 + n_orb_funcs)]
+            occupation[j0:j1] = next(iterator).split()
+            coefs[..., j0:j1] = [i.split()[4:] for i in islice(iterator, 0, n_orb_funcs)]
             j0 += len(headers)
 
     # `n_orb` is an upper bound to the actual printed number of orbitals,
     # so some trimming might be required
-    return InfoMO(energies[:j0], coefs[:, :j0])
+    return CP2KInfoMO(energies[:j0], coefs[:, :j0], orb_index[:j0], occupation[:j0])
 
 
 _ORBITAL_PATTERN = re.compile((

diff --git a/test/test_common.py b/test/test_common.py
@@ -1,20 +1,27 @@
 import numpy as np
 from assertionlib import assertion
 
-from qmflows.common import InfoMO
+from qmflows.common import CP2KInfoMO
 
 
 class TestInfoMO:
     def test_array(self) -> None:
-        info_mo = InfoMO(
+        info_mo = CP2KInfoMO(
             eigenvalues=np.arange(3),
             eigenvectors=np.arange(12).reshape(4, 3),
+            orb_index=np.array([5, 6, 7]),
+            occupation=np.array([2.0, 2.0, 0.0]),
         )
         ref = np.array([
-            (0, [0, 3, 6, 9]),
-            (1, [1, 4, 7, 10]),
-            (2, [2, 5, 8, 11])
-        ], dtype=[("eigenvalues", "f8"), ("eigenvectors", "f8", (4,))])
+            (0, [0, 3, 6, 9], 5, 2),
+            (1, [1, 4, 7, 10], 6, 2),
+            (2, [2, 5, 8, 11], 7, 0),
+        ], dtype=[
+            ("eigenvalues", "f8"),
+            ("eigenvectors", "f8", (4,)),
+            ("orb_index", "i8"),
+            ("occupation", "f8"),
+        ])
 
         ar = np.array(info_mo)
         assertion.eq(ar.dtype, ref.dtype)