ENH: Add 4 new properties to CP2K_Result (#236)

The properties in question (all are returned as arrays): * `volume`: the cell-volume across the trajectory; `shape = (n_mol,)` * `forces`: the atomic forces across the trajectory; `shape = (n_mol, n_atom, 3)` * `coordinates`: the atomic coordinates across the trajectory; `shape = (n_mol, n_atom, 3)` * `temperature`: the temperature across the trajectory; `shape = (n_mol,)`
SCM-NV · Dec 16, 2020 · b355751 · b355751
1 parent a084860
commit b355751
Show file tree

Hide file tree

Showing 3 changed files with 89 additions and 3 deletions.
diff --git a/src/qmflows/data/dictionaries/propertiesCP2K.yaml b/src/qmflows/data/dictionaries/propertiesCP2K.yaml
@@ -29,3 +29,25 @@ orbitals:
     file_pattern: '*MOLog'
     function: read_cp2k_coefficients
     parser: cp2KParser
+forces:
+    file_pattern: '*-frc-1.xyz'
+    function: read_cp2k_xyz
+    parser: cp2KParser
+coordinates:
+    file_pattern: '*-pos-1.xyz'
+    function: read_cp2k_xyz
+    parser: cp2KParser
+temperature:
+    file_pattern: '*-1.ener'
+    function: read_cp2k_table
+    parser: cp2KParser
+    kwargs:
+        column: 3
+        start: 1
+volume:
+    file_pattern: '*-1.cell'
+    function: read_cp2k_table
+    parser: cp2KParser
+    kwargs:
+        column: -1
+        start: 1
diff --git a/src/qmflows/data/dictionaries/propertiesCP2KMM.yaml b/src/qmflows/data/dictionaries/propertiesCP2KMM.yaml
@@ -25,3 +25,25 @@ free_energy:
     kwargs:
         quantity: G
     parser: cp2KParser
+forces:
+    file_pattern: '*-frc-1.xyz'
+    function: read_cp2k_xyz
+    parser: cp2KParser
+coordinates:
+    file_pattern: '*-pos-1.xyz'
+    function: read_cp2k_xyz
+    parser: cp2KParser
+temperature:
+    file_pattern: '*-1.ener'
+    function: read_cp2k_table
+    parser: cp2KParser
+    kwargs:
+        column: 3
+        start: 1
+volume:
+    file_pattern: '*-1.cell'
+    function: read_cp2k_table
+    parser: cp2KParser
+    kwargs:
+        column: -1
+        start: 1
diff --git a/src/qmflows/parsers/cp2KParser.py b/src/qmflows/parsers/cp2KParser.py
@@ -6,11 +6,11 @@
 import os
 import subprocess
 from io import TextIOBase
-from itertools import islice
+from itertools import islice, chain
 from pathlib import Path
 from typing import Any, Dict, FrozenSet, Generator, Iterable, List
 from typing import Optional as Optional_
-from typing import Sequence, Tuple, Type, TypeVar, Union, overload
+from typing import Sequence, Tuple, Type, TypeVar, Union, overload, Iterator
 
 import numpy as np
 from more_itertools import chunked
@@ -29,7 +29,7 @@
 from .xyzParser import manyXYZ, tuplesXYZ_to_plams
 
 __all__ = ['readCp2KBasis', 'read_cp2k_coefficients', 'get_cp2k_freq',
-           'read_cp2k_number_of_orbitals']
+           'read_cp2k_number_of_orbitals', 'read_cp2k_xyz', 'read_cp2k_table']
 
 
 # Starting logger
@@ -565,3 +565,45 @@ def is_string_in_file(string: str, path: PathLike) -> bool:
     with open(path, 'r') as handler:
         s_mmap = mmap.mmap(handler.fileno(), 0, access=mmap.ACCESS_READ)
         return s_mmap.find(string.encode()) != -1
+
+
+def read_cp2k_xyz(path: PathLike, dtype: Any = np.float64) -> np.ndarray:
+    """Extract a 3D array from **path** with the atomic forces of all molecules.
+
+    Requires a CP2K ``*.xyz`` file.
+
+    """
+    with open(path, 'r') as f:
+        n_atom = int(next(f))
+        flat_iter = chain.from_iterable(_read_cp2k_xyz(f, n_atom))
+        ret = np.fromiter(flat_iter, dtype=dtype)
+        ret.shape = -1, n_atom, 3  # (n_mol, n_atom, 3)
+    return ret
+
+
+def _read_cp2k_xyz(f: Iterable[str], n_atom: int) -> Generator[Iterator[str], None, None]:
+    """Create a generator for :func:`read_cp2k_xyz`."""
+    stop = 1 + n_atom
+    # Account for the fact that `read_cp2k_xyz` already iterated through
+    # the first element
+    yield chain.from_iterable(at.split()[1:] for at in islice(f, 1, stop))
+    for _ in f:
+        yield chain.from_iterable(at.split()[1:] for at in islice(f, 1, stop))
+
+
+def read_cp2k_table(
+    path: PathLike,
+    column: int,
+    start: Optional_[int] = None,
+    stop: Optional_[int] = None,
+    step: Optional_[int] = None,
+    dtype: Any = np.float64,
+) -> np.ndarray:
+    """Extract a 1D array from the specified **column** in **path**.
+
+    **start**, **stop** and **step** can be used for specifiying the to-be parsed rows.
+
+    """
+    with open(path, 'r') as f:
+        flat_iter = (i.split()[column] for i in islice(f, start, stop, step))
+        return np.fromiter(flat_iter, dtype=dtype)