Skip to content

Commit

Permalink
Merge pull request #283 from SCM-NV/parser
Browse files Browse the repository at this point in the history
ENH: Print the basis set name and line number whenever failing to read a CP2K basis set file
  • Loading branch information
BvB93 committed Feb 4, 2022
2 parents 99befe1 + 66fa348 commit e168ba9
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 37 deletions.
98 changes: 62 additions & 36 deletions src/qmflows/parsers/_cp2k_basis_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Functions for reading CP2K basis set files."""

from typing import Iterator, Generator, Tuple, List
from typing import Iterator, Tuple, List, Tuple, Iterable
from itertools import islice

import numpy as np
Expand All @@ -13,54 +13,80 @@
_Basis2Tuple = Tuple[List[AtomBasisKey], List[AtomBasisData]]


def _basis_file_iter(f: Iterator[str]) -> Generator[str, None, None]:
"""Iterate through `f` and remove all empty and commented lines."""
for i in f:
i = i.strip().rstrip("\n")
if not i or i.startswith("#"):
continue
yield i
class _BasisFileIter:
"""Enumerate through the passed ``iterable`` and remove all empty and commented lines."""

__slots__ = ("__weakref__", "_enumerator", "_name", "_index")

@property
def index(self) -> int:
"""Get the index within the current iterator."""
return self._index

def __init__(self, iterable: Iterable[str], start: int = 0) -> None:
self._enumerator = enumerate(iterable, start=start)
self._name: "str | None" = getattr(iterable, "name", None)
self._index = start

def __iter__(self) -> "_BasisFileIter":
return self

def __next__(self) -> str:
i, item = next(self._enumerator)
item = item.strip().rstrip("\n")
if not item or item.startswith("#"):
return self.__next__()

def _read_basis(iterator: Iterator[str]) -> _Basis2Tuple:
self._index = i
return item

def __repr__(self) -> str:
return f"<{type(self).__name__} name={self._name!r} index={self._index!r}>"


def _read_basis(f: _BasisFileIter) -> _Basis2Tuple:
"""Helper function for parsing the opened basis set file."""
f = _basis_file_iter(iterator)
keys = []
values = []
for i in f:
# Read the atom type and basis set name(s)
atom, *basis_list = i.split()
atom = atom.capitalize()

# Identify the number of exponent sets
n_sets = int(next(f))
if n_sets != 1:
raise NotImplementedError(
"Basis sets with more than 1 set of exponents are not supported yet"
)

for _ in range(n_sets):
# Parse the basis format, its exponents and its coefficients
basis_fmt = [int(j) for j in next(f).split()]
n_exp = basis_fmt[3]
basis_data = np.array([j.split() for j in islice(f, 0, n_exp)], dtype=np.float64)
exp, coef = basis_data[:, 0], basis_data[:, 1:]

# Two things happen whenever an basis set alias is encountered (i.e. `is_alias > 0`):
# 1. The `alias` field is set for the keys
# 2. The `AtomBasisData` instance, used for the original value, is reused
for is_alias, basis in enumerate(basis_list):
if not is_alias:
basis_key = AtomBasisKey(atom, basis, basis_fmt)
basis_value = AtomBasisData(exp, coef)
keys.append(basis_key)
else:
keys.append(AtomBasisKey(atom, basis, basis_fmt, alias=basis_key))
values.append(basis_value)
try:
# Identify the number of exponent sets
n_sets = int(next(f))
if n_sets != 1:
raise NotImplementedError(
"Basis sets with more than 1 set of exponents are not supported yet"
)

for _ in range(n_sets):
# Parse the basis format, its exponents and its coefficients
basis_fmt = [int(j) for j in next(f).split()]
n_exp = basis_fmt[3]
basis_data = np.array([j.split() for j in islice(f, 0, n_exp)], dtype=np.float64)
exp, coef = basis_data[:, 0], basis_data[:, 1:]

# Two things happen whenever an basis set alias is encountered (i.e. `is_alias > 0`):
# 1. The `alias` field is set for the keys
# 2. The `AtomBasisData` instance, used for the original value, is reused
for is_alias, basis in enumerate(basis_list):
if not is_alias:
basis_key = AtomBasisKey(atom, basis, basis_fmt)
basis_value = AtomBasisData(exp, coef)
keys.append(basis_key)
else:
keys.append(AtomBasisKey(atom, basis, basis_fmt, alias=basis_key))
values.append(basis_value)
except Exception as ex:
raise ValueError(
f'Failed to parse the basis set "{atom} {basis_list[0]}" on line {f.index}'
) from ex
return keys, values


def readCp2KBasis(file: PathLike) -> _Basis2Tuple:
"""Read the Contracted Gauss function primitives format from a text file."""
with open(file, "r") as f:
return _read_basis(f)
return _read_basis(_BasisFileIter(f, start=1))
14 changes: 13 additions & 1 deletion test/test_cp2k_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def get_key(key_tup: AtomBasisKey) -> str:
"-".join(str(i) for i in key_tup.basisFormat),
)

def test_pass(self):
def test_pass(self) -> None:
basis_file = PATH / "BASIS_MOLOPT"
keys, values = readCp2KBasis(basis_file)

Expand All @@ -49,6 +49,18 @@ def test_pass(self):
if key_tup.alias is not None:
assertion.eq(self.get_key(key_tup.alias), alias, message=key)

PARAMS = {
"BASIS_INVALID_N_EXP": 11,
"BASIS_INVALID_FMT": 3,
"BASIS_NOTIMPLEMENTED": 2,
}

@pytest.mark.parametrize("filename,lineno", PARAMS.items(), ids=PARAMS)
def test_raise(self, filename: str, lineno: int) -> None:
pattern = r'Failed to parse the basis set ".+" on line {}'.format(lineno)
with pytest.raises(ValueError, match=pattern):
readCp2KBasis(PATH / filename)


@requires_cp2k
class TestGetCP2KVersion:
Expand Down
10 changes: 10 additions & 0 deletions test/test_files/BASIS_INVALID_FMT
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
U DZVP-MOLOPT-GTH-q14
1
6 0 4 7 3 3 2 2 1 6s 7s 8s 6p 7p 8p 6d 7d 5f 6f 5g
2.970404051267 -8.613286991729 -4.847218644580 -0.094448304725 2.089613520508 -1.388690863894 -0.339032038879 -0.517917661294 -0.108712988266 0.630053678940 -0.328083139683 0.193812038517
2.837409896206 9.359917338956 5.295013384216 0.080503840695 -2.330831226482 1.591366466533 -0.297639904371 0.575541916820 0.134339314945 0.077256524471 -0.136357555344 -0.096937116722
1.029287870466 -0.223393808655 -0.207730929575 0.170499084947 0.118073808224 -0.298526476737 -0.144235936067 -0.007051323571 -0.072924564853 0.798224117501 -0.044975575325 0.942045397086
0.447156512122 -1.138927348908 -1.085942892963 -0.128005768148 0.875430068610 -0.558080313712 0.020421937017 -0.328938941888 -0.022899936943 0.416776715909 0.084347995405 1.112468197028
0.224386023364 -0.475585548696 -0.147870903958 0.028006926300 0.257221846022 -0.186869197027 -0.068935950988 -0.436497768592 0.047774203203 0.269342903301 0.107901532632 0.049712666496
0.082082026090 -0.038412518166 1.206363318489 0.959404540206 -0.004940369038 1.337846156620 1.078056470699 -0.170128075474 1.023142281896 0.095618485831 0.591613653295 0.071150976147
0.048989649967 -0.161438730593 0.204614845616 0.064250249847 -0.040926507048 0.070410902066 0.151425801177 0.019870929957 0.031382006308 -0.029027116072 0.159595534158 -0.000586301554
20 changes: 20 additions & 0 deletions test/test_files/BASIS_INVALID_N_EXP
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
H SZV-MOLOPT-GTH SZV-MOLOPT-GTH-q1
1
2 0 0 8 1
11.478000339908 0.024916243200
3.700758562763 0.079825490000
1.446884268432 0.128862675300
0.716814589696 0.379448894600
0.247918564176 0.324552432600
0.066918004004 0.037148121400
0.021708243634 -0.001125195500
H DZVP-MOLOPT-GTH DZVP-MOLOPT-GTH-q1
1
2 0 1 7 2 1
11.478000339908 0.024916243200 -0.012512421400 0.024510918200
3.700758562763 0.079825490000 -0.056449071100 0.058140794100
1.446884268432 0.128862675300 0.011242684700 0.444709498500
0.716814589696 0.379448894600 -0.418587548300 0.646207973100
0.247918564176 0.324552432600 0.590363216700 0.803385018200
0.066918004004 0.037148121400 0.438703133000 0.892971208700
0.021708243634 -0.001125195500 -0.059693171300 0.120101316500
8 changes: 8 additions & 0 deletions test/test_files/BASIS_NOTIMPLEMENTED
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Li cFIT4
2
1 0 0 1 1
0.03262984 1.00000000
1 0 0 3 2
0.52140146 0.20867571 -0.97593987
1.87732160 0.83154821 0.14303820
7.13651609 0.51476395 0.16456434

0 comments on commit e168ba9

Please sign in to comment.