Skip to content

Commit

Permalink
ENH: Add 4 new properties to CP2K_Result (#236)
Browse files Browse the repository at this point in the history
The properties in question (all are returned as arrays):
* `volume`: the cell-volume across the trajectory; `shape = (n_mol,)`
* `forces`: the atomic forces across the trajectory; `shape = (n_mol, n_atom, 3)`
* `coordinates`: the atomic coordinates across the trajectory; `shape = (n_mol, n_atom, 3)`
* `temperature`: the temperature across the trajectory; `shape = (n_mol,)`
  • Loading branch information
BvB93 committed Dec 16, 2020
1 parent a084860 commit b355751
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 3 deletions.
22 changes: 22 additions & 0 deletions src/qmflows/data/dictionaries/propertiesCP2K.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,25 @@ orbitals:
file_pattern: '*MOLog'
function: read_cp2k_coefficients
parser: cp2KParser
forces:
file_pattern: '*-frc-1.xyz'
function: read_cp2k_xyz
parser: cp2KParser
coordinates:
file_pattern: '*-pos-1.xyz'
function: read_cp2k_xyz
parser: cp2KParser
temperature:
file_pattern: '*-1.ener'
function: read_cp2k_table
parser: cp2KParser
kwargs:
column: 3
start: 1
volume:
file_pattern: '*-1.cell'
function: read_cp2k_table
parser: cp2KParser
kwargs:
column: -1
start: 1
22 changes: 22 additions & 0 deletions src/qmflows/data/dictionaries/propertiesCP2KMM.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,25 @@ free_energy:
kwargs:
quantity: G
parser: cp2KParser
forces:
file_pattern: '*-frc-1.xyz'
function: read_cp2k_xyz
parser: cp2KParser
coordinates:
file_pattern: '*-pos-1.xyz'
function: read_cp2k_xyz
parser: cp2KParser
temperature:
file_pattern: '*-1.ener'
function: read_cp2k_table
parser: cp2KParser
kwargs:
column: 3
start: 1
volume:
file_pattern: '*-1.cell'
function: read_cp2k_table
parser: cp2KParser
kwargs:
column: -1
start: 1
48 changes: 45 additions & 3 deletions src/qmflows/parsers/cp2KParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import os
import subprocess
from io import TextIOBase
from itertools import islice
from itertools import islice, chain
from pathlib import Path
from typing import Any, Dict, FrozenSet, Generator, Iterable, List
from typing import Optional as Optional_
from typing import Sequence, Tuple, Type, TypeVar, Union, overload
from typing import Sequence, Tuple, Type, TypeVar, Union, overload, Iterator

import numpy as np
from more_itertools import chunked
Expand All @@ -29,7 +29,7 @@
from .xyzParser import manyXYZ, tuplesXYZ_to_plams

__all__ = ['readCp2KBasis', 'read_cp2k_coefficients', 'get_cp2k_freq',
'read_cp2k_number_of_orbitals']
'read_cp2k_number_of_orbitals', 'read_cp2k_xyz', 'read_cp2k_table']


# Starting logger
Expand Down Expand Up @@ -565,3 +565,45 @@ def is_string_in_file(string: str, path: PathLike) -> bool:
with open(path, 'r') as handler:
s_mmap = mmap.mmap(handler.fileno(), 0, access=mmap.ACCESS_READ)
return s_mmap.find(string.encode()) != -1


def read_cp2k_xyz(path: PathLike, dtype: Any = np.float64) -> np.ndarray:
"""Extract a 3D array from **path** with the atomic forces of all molecules.
Requires a CP2K ``*.xyz`` file.
"""
with open(path, 'r') as f:
n_atom = int(next(f))
flat_iter = chain.from_iterable(_read_cp2k_xyz(f, n_atom))
ret = np.fromiter(flat_iter, dtype=dtype)
ret.shape = -1, n_atom, 3 # (n_mol, n_atom, 3)
return ret


def _read_cp2k_xyz(f: Iterable[str], n_atom: int) -> Generator[Iterator[str], None, None]:
"""Create a generator for :func:`read_cp2k_xyz`."""
stop = 1 + n_atom
# Account for the fact that `read_cp2k_xyz` already iterated through
# the first element
yield chain.from_iterable(at.split()[1:] for at in islice(f, 1, stop))
for _ in f:
yield chain.from_iterable(at.split()[1:] for at in islice(f, 1, stop))


def read_cp2k_table(
path: PathLike,
column: int,
start: Optional_[int] = None,
stop: Optional_[int] = None,
step: Optional_[int] = None,
dtype: Any = np.float64,
) -> np.ndarray:
"""Extract a 1D array from the specified **column** in **path**.
**start**, **stop** and **step** can be used for specifiying the to-be parsed rows.
"""
with open(path, 'r') as f:
flat_iter = (i.split()[column] for i in islice(f, start, stop, step))
return np.fromiter(flat_iter, dtype=dtype)

0 comments on commit b355751

Please sign in to comment.