From 6f186f33268d1c9ae842e2becfd135c9df97ce05 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Wed, 8 Jan 2025 01:12:14 +0530 Subject: [PATCH 01/20] mark analysis.rdf.InterRDF and analysis.rdf.InterRDF_s as not parallizable --- package/CHANGELOG | 1 + package/MDAnalysis/analysis/rdf.py | 12 ++++++++++++ testsuite/MDAnalysisTests/analysis/test_rdf.py | 18 ++++++++++++++++++ .../MDAnalysisTests/analysis/test_rdf_s.py | 18 ++++++++++++++++++ 4 files changed, 49 insertions(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index 5d8328f3313..9fe6e8713c3 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -33,6 +33,7 @@ Enhancements * Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670) * Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824) * Added `precision` for XYZWriter (Issue #4775, PR #4771) + * explicitly mark `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF` as not parallelizable (Issue #4675) Changes diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 02ac014d47c..9990a1e2b08 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -218,6 +218,12 @@ class InterRDF(AnalysisBase): :class:`~MDAnalysis.analysis.AnalysisBase`. """ + @classmethod + def get_supported_backends(cls): + return ('serial',) + + _analysis_algorithm_is_parallelizable = False + def __init__( self, g1, @@ -563,6 +569,12 @@ class InterRDF_s(AnalysisBase): The `universe` parameter is superflous. """ + @classmethod + def get_supported_backends(cls): + return ('serial',) + + _analysis_algorithm_is_parallelizable = False + def __init__( self, u, diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index d3507e56734..4a8fe46994e 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -152,3 +152,21 @@ def test_unknown_norm(sels): s1, s2 = sels with pytest.raises(ValueError, match="invalid norm"): InterRDF(s1, s2, sels, norm="foo") + +@pytest.mark.parametrize( + "classname,is_parallelizable", + [ + (mda.analysis.rdf, False), + ] +) +def test_class_is_parallelizable(classname, is_parallelizable): + assert classname.InterRDF._analysis_algorithm_is_parallelizable == is_parallelizable + +@pytest.mark.parametrize( + "classname,backends", + [ + (mda.analysis.rdf, ('serial',)), + ] +) +def test_supported_backends(classname, backends): + assert classname.InterRDF.get_supported_backends() == backends diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 5c6e8b6031a..bdce3eed64f 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -171,3 +171,21 @@ def test_rdf_attr_warning(rdf, attr): wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] + +@pytest.mark.parametrize( + "classname,is_parallelizable", + [ + (mda.analysis.rdf, False), + ] +) +def test_class_is_parallelizable(classname, is_parallelizable): + assert classname.InterRDF_s._analysis_algorithm_is_parallelizable == is_parallelizable + +@pytest.mark.parametrize( + "classname,backends", + [ + (mda.analysis.rdf, ('serial',)), + ] +) +def test_supported_backends(classname, backends): + assert classname.InterRDF_s.get_supported_backends() == backends From 48197578994ec21607a5268e359ea10b3b41d644 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Wed, 8 Jan 2025 22:30:00 +0530 Subject: [PATCH 02/20] Adds parallization for analysis.rdf.InterRDF --- package/MDAnalysis/analysis/rdf.py | 28 ++++++-- .../MDAnalysisTests/analysis/conftest.py | 9 +++ .../MDAnalysisTests/analysis/test_rdf.py | 65 +++++++------------ 3 files changed, 56 insertions(+), 46 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 9990a1e2b08..e36d5a21a8f 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -80,7 +80,7 @@ import numpy as np from ..lib import distances -from .base import AnalysisBase +from .base import AnalysisBase, ResultsGroup class InterRDF(AnalysisBase): @@ -220,9 +220,9 @@ class InterRDF(AnalysisBase): @classmethod def get_supported_backends(cls): - return ('serial',) + return ('serial', 'multiprocessing', 'dask',) - _analysis_algorithm_is_parallelizable = False + _analysis_algorithm_is_parallelizable = True def __init__( self, @@ -279,6 +279,7 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization + self.results.volume_cum = 0 self.volume_cum = 0 # Set the max range to filter the search radius self._maxrange = self.rdf_settings["range"][1] @@ -308,8 +309,15 @@ def _single_frame(self): self.results.count += count if self.norm == "rdf": + self.results.volume_cum += self._ts.volume self.volume_cum += self._ts.volume + def _get_aggregator(self): + return ResultsGroup(lookup={'count': ResultsGroup.ndarray_sum, + 'volume_cum': ResultsGroup.ndarray_sum, + 'bins': ResultsGroup.ndarray_sum, + 'edges': ResultsGroup.ndarray_sum}) + def _conclude(self): norm = self.n_frames if self.norm in ["rdf", "density"]: @@ -330,6 +338,7 @@ def _conclude(self): N -= xA * xB * nblocks # Average number density + self.volume_cum = self.results.volume_cum box_vol = self.volume_cum / self.n_frames norm *= N / box_vol @@ -571,9 +580,9 @@ class InterRDF_s(AnalysisBase): @classmethod def get_supported_backends(cls): - return ('serial',) + return ('serial', 'multiprocessing', 'dask',) - _analysis_algorithm_is_parallelizable = False + _analysis_algorithm_is_parallelizable = True def __init__( self, @@ -626,6 +635,7 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization + self.results.volume_cum = 0 self.volume_cum = 0 self._maxrange = self.rdf_settings["range"][1] @@ -643,8 +653,15 @@ def _single_frame(self): self.results.count[i][idx1, idx2, :] += count if self.norm == "rdf": + self.results.volume_cum += self._ts.volume self.volume_cum += self._ts.volume + def _get_aggregator(self): + return ResultsGroup(lookup={'count': ResultsGroup.ndarray_sum, + 'volume_cum': ResultsGroup.ndarray_sum, + 'bins': ResultsGroup.ndarray_sum, + 'edges': ResultsGroup.ndarray_sum}) + def _conclude(self): norm = self.n_frames if self.norm in ["rdf", "density"]: @@ -654,6 +671,7 @@ def _conclude(self): if self.norm == "rdf": # Average number density + self.volume_cum = self.results.volume_cum norm *= 1 / (self.volume_cum / self.n_frames) # Empty lists to restore indices, RDF diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 5848c503165..0268bc8c879 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -17,6 +17,7 @@ from MDAnalysis.analysis.nucleicacids import NucPairDist from MDAnalysis.analysis.contacts import Contacts from MDAnalysis.analysis.density import DensityAnalysis +from MDAnalysis.analysis.rdf import InterRDF, InterRDF_s from MDAnalysis.lib.util import is_installed @@ -176,3 +177,11 @@ def client_Contacts(request): @pytest.fixture(scope="module", params=params_for_cls(DensityAnalysis)) def client_DensityAnalysis(request): return request.param + +@pytest.fixture(scope="module", params=params_for_cls(InterRDF)) +def client_InterRDF(request): + return request.param + +@pytest.fixture(scope="module", params=params_for_cls(InterRDF_s)) +def client_InterRDF_s(request): + return request.param diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index 4a8fe46994e..bce114fcea4 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -48,83 +48,83 @@ def sels(u): return s1, s2 -def test_nbins(u): +def test_nbins(u, client_InterRDF): s1 = u.atoms[:3] s2 = u.atoms[3:] - rdf = InterRDF(s1, s2, nbins=412).run() + rdf = InterRDF(s1, s2, nbins=412).run(**client_InterRDF) assert len(rdf.results.bins) == 412 -def test_range(u): +def test_range(u, client_InterRDF): s1 = u.atoms[:3] s2 = u.atoms[3:] rmin, rmax = 1.0, 13.0 - rdf = InterRDF(s1, s2, range=(rmin, rmax)).run() + rdf = InterRDF(s1, s2, range=(rmin, rmax)).run(**client_InterRDF) assert rdf.results.edges[0] == rmin assert rdf.results.edges[-1] == rmax -def test_count_sum(sels): +def test_count_sum(sels, client_InterRDF): # OW vs HW # should see 8 comparisons in count s1, s2 = sels - rdf = InterRDF(s1, s2).run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) assert rdf.results.count.sum() == 8 -def test_count(sels): +def test_count(sels, client_InterRDF): # should see two distances with 4 counts each s1, s2 = sels - rdf = InterRDF(s1, s2).run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) assert len(rdf.results.count[rdf.results.count == 4]) == 2 -def test_double_run(sels): +def test_double_run(sels, client_InterRDF): # running rdf twice should give the same result s1, s2 = sels - rdf = InterRDF(s1, s2).run() - rdf.run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) + rdf.run(**client_InterRDF) assert len(rdf.results.count[rdf.results.count == 4]) == 2 -def test_exclusion(sels): +def test_exclusion(sels, client_InterRDF): # should see two distances with 4 counts each s1, s2 = sels - rdf = InterRDF(s1, s2, exclusion_block=(1, 2)).run() + rdf = InterRDF(s1, s2, exclusion_block=(1, 2)).run(**client_InterRDF) assert rdf.results.count.sum() == 4 @pytest.mark.parametrize( "attr, count", [("residue", 8), ("segment", 0), ("chain", 8)] ) -def test_ignore_same_residues(sels, attr, count): +def test_ignore_same_residues(sels, attr, count, client_InterRDF): # should see two distances with 4 counts each s1, s2 = sels - rdf = InterRDF(s2, s2, exclude_same=attr).run() + rdf = InterRDF(s2, s2, exclude_same=attr).run(**client_InterRDF) assert rdf.rdf[0] == 0 assert rdf.results.count.sum() == count -def test_ignore_same_residues_fails(sels): +def test_ignore_same_residues_fails(sels, client_InterRDF): s1, s2 = sels with pytest.raises( ValueError, match="The exclude_same argument to InterRDF must be" ): - InterRDF(s2, s2, exclude_same="unsupported").run() + InterRDF(s2, s2, exclude_same="unsupported").run(**client_InterRDF) with pytest.raises( ValueError, match="The exclude_same argument to InterRDF cannot be used with", ): - InterRDF(s2, s2, exclude_same="residue", exclusion_block=tuple()).run() + InterRDF(s2, s2, exclude_same="residue", exclusion_block=tuple()).run(**client_InterRDF) @pytest.mark.parametrize("attr", ("rdf", "bins", "edges", "count")) -def test_rdf_attr_warning(sels, attr): +def test_rdf_attr_warning(sels, attr, client_InterRDF): s1, s2 = sels - rdf = InterRDF(s1, s2).run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] @@ -133,18 +133,18 @@ def test_rdf_attr_warning(sels, attr): @pytest.mark.parametrize( "norm, value", [("density", 1.956823), ("rdf", 244602.88385), ("none", 4)] ) -def test_norm(sels, norm, value): +def test_norm(sels, norm, value, client_InterRDF): s1, s2 = sels - rdf = InterRDF(s1, s2, norm=norm).run() + rdf = InterRDF(s1, s2, norm=norm).run(**client_InterRDF) assert_allclose(max(rdf.results.rdf), value) @pytest.mark.parametrize( "norm, norm_required", [("Density", "density"), (None, "none")] ) -def test_norm_values(sels, norm, norm_required): +def test_norm_values(sels, norm, norm_required, client_InterRDF): s1, s2 = sels - rdf = InterRDF(s1, s2, norm=norm).run() + rdf = InterRDF(s1, s2, norm=norm).run(**client_InterRDF) assert rdf.norm == norm_required @@ -153,20 +153,3 @@ def test_unknown_norm(sels): with pytest.raises(ValueError, match="invalid norm"): InterRDF(s1, s2, sels, norm="foo") -@pytest.mark.parametrize( - "classname,is_parallelizable", - [ - (mda.analysis.rdf, False), - ] -) -def test_class_is_parallelizable(classname, is_parallelizable): - assert classname.InterRDF._analysis_algorithm_is_parallelizable == is_parallelizable - -@pytest.mark.parametrize( - "classname,backends", - [ - (mda.analysis.rdf, ('serial',)), - ] -) -def test_supported_backends(classname, backends): - assert classname.InterRDF.get_supported_backends() == backends From 6f10067fa6a14009c06fe9ca255d8d10576acf30 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Wed, 8 Jan 2025 23:02:25 +0530 Subject: [PATCH 03/20] Minor changes --- package/MDAnalysis/analysis/rdf.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index e36d5a21a8f..675c910289c 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -578,11 +578,6 @@ class InterRDF_s(AnalysisBase): The `universe` parameter is superflous. """ - @classmethod - def get_supported_backends(cls): - return ('serial', 'multiprocessing', 'dask',) - - _analysis_algorithm_is_parallelizable = True def __init__( self, @@ -635,7 +630,6 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization - self.results.volume_cum = 0 self.volume_cum = 0 self._maxrange = self.rdf_settings["range"][1] @@ -653,15 +647,8 @@ def _single_frame(self): self.results.count[i][idx1, idx2, :] += count if self.norm == "rdf": - self.results.volume_cum += self._ts.volume self.volume_cum += self._ts.volume - def _get_aggregator(self): - return ResultsGroup(lookup={'count': ResultsGroup.ndarray_sum, - 'volume_cum': ResultsGroup.ndarray_sum, - 'bins': ResultsGroup.ndarray_sum, - 'edges': ResultsGroup.ndarray_sum}) - def _conclude(self): norm = self.n_frames if self.norm in ["rdf", "density"]: @@ -671,7 +658,6 @@ def _conclude(self): if self.norm == "rdf": # Average number density - self.volume_cum = self.results.volume_cum norm *= 1 / (self.volume_cum / self.n_frames) # Empty lists to restore indices, RDF From c584b0ba2136ca04ff7d2ca93eecab03658d629f Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 16:41:11 +0530 Subject: [PATCH 04/20] Adds custom aggregegator for InterRDF_s --- package/MDAnalysis/analysis/rdf.py | 98 ++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 675c910289c..7a04d43d432 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -577,6 +577,22 @@ class InterRDF_s(AnalysisBase): .. deprecated:: 2.3.0 The `universe` parameter is superflous. """ + @classmethod + def get_supported_backends(cls): + return ('serial', 'multiprocessing', 'dask',) + + _analysis_algorithm_is_parallelizable = True + + + def _get_aggregator(self): + return ResultsGroup( + lookup={ + 'count': self._flattened_ndarray_sum, + 'volume_cum': ResultsGroup.ndarray_sum, + 'bins': ResultsGroup.ndarray_sum, + 'edges': ResultsGroup.ndarray_sum, + } + ) def __init__( @@ -630,6 +646,7 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization + self.results.volume_cum = 0 self.volume_cum = 0 self._maxrange = self.rdf_settings["range"][1] @@ -647,8 +664,88 @@ def _single_frame(self): self.results.count[i][idx1, idx2, :] += count if self.norm == "rdf": + self.results.volume_cum += self._ts.volume self.volume_cum += self._ts.volume + @staticmethod + def arr_resize(arr): + if arr.ndim == 2: # If shape is (x, y) + return arr[np.newaxis, ...] # Add a new axis at the beginning + elif arr.ndim == 3 and arr.shape[0] == 1: # If shape is already (1, x, y) + return arr + else: + raise ValueError("Array has an invalid shape") + + # @staticmethod + # def custom_aggregate(combined_arr): + # arr1 = combined_arr[0][0] + # arr2 = combined_arr[1][0] + # arr3 = combined_arr[1][1][0] + # arr4 = combined_arr[1][1][1] + + # arr1 = InterRDF_s.arr_resize(arr1) + # arr2 = InterRDF_s.arr_resize(arr2) + # arr3 = InterRDF_s.arr_resize(arr3) + # arr4 = InterRDF_s.arr_resize(arr4) + + + # print(arr1.shape, arr2.shape, arr3.shape, arr4.shape) + + + + # arr01 = arr1 + arr2 + # arr02 = np.vstack((arr3, arr4)) + # print("New shape", arr01.shape, arr02.shape) + + # arr = [arr01, arr02] + # # arr should be [(1,2,75), (2,2,75)] + # return arr + + + # #TODO: check shapes without parallelization then emulate that in custom_aggregate + + # def _get_aggregator(self): + # return ResultsGroup(lookup={'count': self.custom_aggregate, + # 'volume_cum': ResultsGroup.ndarray_sum, + # 'bins': ResultsGroup.ndarray_sum, + # 'edges': ResultsGroup.ndarray_sum}) + + @staticmethod + def _flattened_ndarray_sum(arrs): + """Custom aggregator for nested count arrays + + Parameters + ---------- + arrs : list + List of arrays or nested lists of arrays to sum + + Returns + ------- + ndarray + Sum of all arrays after flattening nested structure + """ + # Handle nested list/array structures + def flatten(arr): + if isinstance(arr, (list, tuple)): + return [item for sublist in arr for item in flatten(sublist)] + return [arr] + + # Flatten and sum arrays + flat = flatten(arrs) + if not flat: + return None + + f1 = np.zeros_like(flat[0]) + f2 = np.zeros_like(flat[1]) + # print(flat, "SIZE:", len(flat)) + for i in range(len(flat)//2): + f1 += flat[2*i] + f2 += flat[2*i+1] + array1 = [f1, f2] + # print("ARRAY", array1) + return array1 + + def _conclude(self): norm = self.n_frames if self.norm in ["rdf", "density"]: @@ -658,6 +755,7 @@ def _conclude(self): if self.norm == "rdf": # Average number density + self.volume_cum = self.results.volume_cum norm *= 1 / (self.volume_cum / self.n_frames) # Empty lists to restore indices, RDF From 2c919eabc2361a83f00358a32e9e045fe045cd00 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 21:26:24 +0530 Subject: [PATCH 05/20] Fixes aggregation of results.edges --- package/MDAnalysis/analysis/rdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 7a04d43d432..511e0495f19 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -590,7 +590,7 @@ def _get_aggregator(self): 'count': self._flattened_ndarray_sum, 'volume_cum': ResultsGroup.ndarray_sum, 'bins': ResultsGroup.ndarray_sum, - 'edges': ResultsGroup.ndarray_sum, + 'edges': ResultsGroup.ndarray_mean, } ) From ea6d4ae284147c1db0878dca483a8f1719319c5d Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 22:20:12 +0530 Subject: [PATCH 06/20] Parallizes InterRDF_s --- package/MDAnalysis/analysis/rdf.py | 103 +++++------------- .../MDAnalysisTests/analysis/test_rdf_s.py | 52 +++------ 2 files changed, 47 insertions(+), 108 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 511e0495f19..4a131527385 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -316,7 +316,7 @@ def _get_aggregator(self): return ResultsGroup(lookup={'count': ResultsGroup.ndarray_sum, 'volume_cum': ResultsGroup.ndarray_sum, 'bins': ResultsGroup.ndarray_sum, - 'edges': ResultsGroup.ndarray_sum}) + 'edges': ResultsGroup.ndarray_mean}) def _conclude(self): norm = self.n_frames @@ -583,17 +583,42 @@ def get_supported_backends(cls): _analysis_algorithm_is_parallelizable = True - + @staticmethod + def func(arrs): + r"""Custom aggregator for nested arrays + + Parameters + ---------- + arrs : list + List of arrays or nested lists of arrays + + Returns + ------- + ndarray + Sums flattened arrays at alternate index + in the list and returns a list of two arrays + """ + def flatten(arr): + if isinstance(arr, (list, tuple)): + return [item for sublist in arr for item in flatten(sublist)] + return [arr] + + flat = flatten(arrs) + aggregated_arr = [np.zeros_like(flat[0]), np.zeros_like(flat[1])] + for i in range(len(flat)//2): + aggregated_arr[0] += flat[2*i] # 0, 2, 4, ... + aggregated_arr[1] += flat[2*i+1] # 1, 3, 5, ... + return aggregated_arr + def _get_aggregator(self): return ResultsGroup( lookup={ - 'count': self._flattened_ndarray_sum, + 'count': self.func, 'volume_cum': ResultsGroup.ndarray_sum, - 'bins': ResultsGroup.ndarray_sum, + 'bins': ResultsGroup.ndarray_mean, 'edges': ResultsGroup.ndarray_mean, } ) - def __init__( self, @@ -676,74 +701,6 @@ def arr_resize(arr): else: raise ValueError("Array has an invalid shape") - # @staticmethod - # def custom_aggregate(combined_arr): - # arr1 = combined_arr[0][0] - # arr2 = combined_arr[1][0] - # arr3 = combined_arr[1][1][0] - # arr4 = combined_arr[1][1][1] - - # arr1 = InterRDF_s.arr_resize(arr1) - # arr2 = InterRDF_s.arr_resize(arr2) - # arr3 = InterRDF_s.arr_resize(arr3) - # arr4 = InterRDF_s.arr_resize(arr4) - - - # print(arr1.shape, arr2.shape, arr3.shape, arr4.shape) - - - - # arr01 = arr1 + arr2 - # arr02 = np.vstack((arr3, arr4)) - # print("New shape", arr01.shape, arr02.shape) - - # arr = [arr01, arr02] - # # arr should be [(1,2,75), (2,2,75)] - # return arr - - - # #TODO: check shapes without parallelization then emulate that in custom_aggregate - - # def _get_aggregator(self): - # return ResultsGroup(lookup={'count': self.custom_aggregate, - # 'volume_cum': ResultsGroup.ndarray_sum, - # 'bins': ResultsGroup.ndarray_sum, - # 'edges': ResultsGroup.ndarray_sum}) - - @staticmethod - def _flattened_ndarray_sum(arrs): - """Custom aggregator for nested count arrays - - Parameters - ---------- - arrs : list - List of arrays or nested lists of arrays to sum - - Returns - ------- - ndarray - Sum of all arrays after flattening nested structure - """ - # Handle nested list/array structures - def flatten(arr): - if isinstance(arr, (list, tuple)): - return [item for sublist in arr for item in flatten(sublist)] - return [arr] - - # Flatten and sum arrays - flat = flatten(arrs) - if not flat: - return None - - f1 = np.zeros_like(flat[0]) - f2 = np.zeros_like(flat[1]) - # print(flat, "SIZE:", len(flat)) - for i in range(len(flat)//2): - f1 += flat[2*i] - f2 += flat[2*i+1] - array1 = [f1, f2] - # print("ARRAY", array1) - return array1 def _conclude(self): diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index bdce3eed64f..e209ea21028 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -48,19 +48,19 @@ def sels(u): @pytest.fixture(scope="module") -def rdf(u, sels): - return InterRDF_s(u, sels).run() +def rdf(u, sels, client_InterRDF_s): + r = InterRDF_s(u, sels).run(**client_InterRDF_s) + return r -def test_nbins(u, sels): - rdf = InterRDF_s(u, sels, nbins=412).run() +def test_nbins(u, sels, client_InterRDF_s): + rdf = InterRDF_s(u, sels, nbins=412).run(**client_InterRDF_s) assert len(rdf.results.bins) == 412 - -def test_range(u, sels): +def test_range(u, sels, client_InterRDF_s): rmin, rmax = 1.0, 13.0 - rdf = InterRDF_s(u, sels, range=(rmin, rmax)).run() + rdf = InterRDF_s(u, sels, range=(rmin, rmax)).run(**client_InterRDF_s) assert rdf.results.edges[0] == rmin assert rdf.results.edges[-1] == rmax @@ -113,19 +113,19 @@ def test_cdf(rdf): (True, 0.021915460340071267), ], ) -def test_density(u, sels, density, value): +def test_density(u, sels, density, value, client_InterRDF_s): kwargs = {"density": density} if density is not None else {} - rdf = InterRDF_s(u, sels, **kwargs).run() + rdf = InterRDF_s(u, sels, **kwargs).run(**client_InterRDF_s) + print(rdf.results.rdf[0][0][0], "RDF") assert_almost_equal(max(rdf.results.rdf[0][0][0]), value) if not density: s1 = u.select_atoms("name ZND and resid 289") s2 = u.select_atoms( "name OD1 and resid 51 and sphzone 5.0 (resid 289)" ) - rdf_ref = InterRDF(s1, s2).run() + rdf_ref = InterRDF(s1, s2).run(**client_InterRDF_s) assert_almost_equal(rdf_ref.results.rdf, rdf.results.rdf[0][0][0]) - def test_overwrite_norm(u, sels): rdf = InterRDF_s(u, sels, norm="rdf", density=True) assert rdf.norm == "density" @@ -139,23 +139,23 @@ def test_overwrite_norm(u, sels): ("none", 0.6), ], ) -def test_norm(u, sels, norm, value): - rdf = InterRDF_s(u, sels, norm=norm).run() +def test_norm(u, sels, norm, value, client_InterRDF_s): + rdf = InterRDF_s(u, sels, norm=norm).run(**client_InterRDF_s) assert_allclose(max(rdf.results.rdf[0][0][0]), value) if norm == "rdf": s1 = u.select_atoms("name ZND and resid 289") s2 = u.select_atoms( "name OD1 and resid 51 and sphzone 5.0 (resid 289)" ) - rdf_ref = InterRDF(s1, s2).run() + rdf_ref = InterRDF(s1, s2).run(**client_InterRDF_s) assert_almost_equal(rdf_ref.results.rdf, rdf.results.rdf[0][0][0]) @pytest.mark.parametrize( "norm, norm_required", [("Density", "density"), (None, "none")] ) -def test_norm_values(u, sels, norm, norm_required): - rdf = InterRDF_s(u, sels, norm=norm).run() +def test_norm_values(u, sels, norm, norm_required, client_InterRDF_s): + rdf = InterRDF_s(u, sels, norm=norm).run(**client_InterRDF_s) assert rdf.norm == norm_required @@ -170,22 +170,4 @@ def test_rdf_attr_warning(rdf, attr): rdf.get_cdf() wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): - getattr(rdf, attr) is rdf.results[attr] - -@pytest.mark.parametrize( - "classname,is_parallelizable", - [ - (mda.analysis.rdf, False), - ] -) -def test_class_is_parallelizable(classname, is_parallelizable): - assert classname.InterRDF_s._analysis_algorithm_is_parallelizable == is_parallelizable - -@pytest.mark.parametrize( - "classname,backends", - [ - (mda.analysis.rdf, ('serial',)), - ] -) -def test_supported_backends(classname, backends): - assert classname.InterRDF_s.get_supported_backends() == backends + getattr(rdf, attr) is rdf.results[attr] \ No newline at end of file From a897ca9e6332366d741e11fd08956f17a101304a Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 22:40:09 +0530 Subject: [PATCH 07/20] Minor changes --- package/MDAnalysis/analysis/rdf.py | 61 ++++++++++++++++-------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 4a131527385..1eb9047ad52 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -220,7 +220,11 @@ class InterRDF(AnalysisBase): @classmethod def get_supported_backends(cls): - return ('serial', 'multiprocessing', 'dask',) + return ( + "serial", + "multiprocessing", + "dask", + ) _analysis_algorithm_is_parallelizable = True @@ -313,10 +317,14 @@ def _single_frame(self): self.volume_cum += self._ts.volume def _get_aggregator(self): - return ResultsGroup(lookup={'count': ResultsGroup.ndarray_sum, - 'volume_cum': ResultsGroup.ndarray_sum, - 'bins': ResultsGroup.ndarray_sum, - 'edges': ResultsGroup.ndarray_mean}) + return ResultsGroup( + lookup={ + "count": ResultsGroup.ndarray_sum, + "volume_cum": ResultsGroup.ndarray_sum, + "bins": ResultsGroup.ndarray_sum, + "edges": ResultsGroup.ndarray_mean, + } + ) def _conclude(self): norm = self.n_frames @@ -577,49 +585,55 @@ class InterRDF_s(AnalysisBase): .. deprecated:: 2.3.0 The `universe` parameter is superflous. """ + @classmethod def get_supported_backends(cls): - return ('serial', 'multiprocessing', 'dask',) + return ( + "serial", + "multiprocessing", + "dask", + ) _analysis_algorithm_is_parallelizable = True @staticmethod def func(arrs): r"""Custom aggregator for nested arrays - + Parameters ---------- arrs : list List of arrays or nested lists of arrays - + Returns ------- ndarray - Sums flattened arrays at alternate index + Sums flattened arrays at alternate index in the list and returns a list of two arrays """ + def flatten(arr): if isinstance(arr, (list, tuple)): return [item for sublist in arr for item in flatten(sublist)] return [arr] - + flat = flatten(arrs) aggregated_arr = [np.zeros_like(flat[0]), np.zeros_like(flat[1])] - for i in range(len(flat)//2): - aggregated_arr[0] += flat[2*i] # 0, 2, 4, ... - aggregated_arr[1] += flat[2*i+1] # 1, 3, 5, ... + for i in range(len(flat) // 2): + aggregated_arr[0] += flat[2 * i] # 0, 2, 4, ... + aggregated_arr[1] += flat[2 * i + 1] # 1, 3, 5, ... return aggregated_arr def _get_aggregator(self): return ResultsGroup( lookup={ - 'count': self.func, - 'volume_cum': ResultsGroup.ndarray_sum, - 'bins': ResultsGroup.ndarray_mean, - 'edges': ResultsGroup.ndarray_mean, + "count": self.func, + "volume_cum": ResultsGroup.ndarray_sum, + "bins": ResultsGroup.ndarray_mean, + "edges": ResultsGroup.ndarray_mean, } ) - + def __init__( self, u, @@ -692,17 +706,6 @@ def _single_frame(self): self.results.volume_cum += self._ts.volume self.volume_cum += self._ts.volume - @staticmethod - def arr_resize(arr): - if arr.ndim == 2: # If shape is (x, y) - return arr[np.newaxis, ...] # Add a new axis at the beginning - elif arr.ndim == 3 and arr.shape[0] == 1: # If shape is already (1, x, y) - return arr - else: - raise ValueError("Array has an invalid shape") - - - def _conclude(self): norm = self.n_frames if self.norm in ["rdf", "density"]: From eb91475e758e29e6b2079b217ad0e41841165199 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 22:46:01 +0530 Subject: [PATCH 08/20] Fixes linter --- package/MDAnalysis/core/selection.py | 28 +++++++++++++++---- .../MDAnalysisTests/analysis/conftest.py | 2 ++ .../MDAnalysisTests/analysis/test_rdf.py | 5 ++-- .../MDAnalysisTests/coordinates/test_xdr.py | 12 ++++---- 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 6c2ea3c8172..d47f4d7ac1c 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -1106,25 +1106,41 @@ class WaterSelection(Selection): .. versionadded:: 2.9.0 """ - token = 'water' + + token = "water" # Recognized water resnames water_res = { - 'H2O', 'HOH', 'OH2', 'HHO', 'OHH', - 'T3P', 'T4P', 'T5P', 'SOL', 'WAT', - 'TIP', 'TIP2', 'TIP3', 'TIP4' + "H2O", + "HOH", + "OH2", + "HHO", + "OHH", + "T3P", + "T4P", + "T5P", + "SOL", + "WAT", + "TIP", + "TIP2", + "TIP3", + "TIP4", } def _apply(self, group): resnames = group.universe._topology.resnames nmidx = resnames.nmidx[group.resindices] - matches = [ix for (nm, ix) in resnames.namedict.items() - if nm in self.water_res] + matches = [ + ix + for (nm, ix) in resnames.namedict.items() + if nm in self.water_res + ] mask = np.isin(nmidx, matches) return group[mask] + class BackboneSelection(ProteinSelection): """A BackboneSelection contains all atoms with name 'N', 'CA', 'C', 'O'. diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 0268bc8c879..c306f05238b 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -178,10 +178,12 @@ def client_Contacts(request): def client_DensityAnalysis(request): return request.param + @pytest.fixture(scope="module", params=params_for_cls(InterRDF)) def client_InterRDF(request): return request.param + @pytest.fixture(scope="module", params=params_for_cls(InterRDF_s)) def client_InterRDF_s(request): return request.param diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index bce114fcea4..7af7fecce72 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -118,7 +118,9 @@ def test_ignore_same_residues_fails(sels, client_InterRDF): ValueError, match="The exclude_same argument to InterRDF cannot be used with", ): - InterRDF(s2, s2, exclude_same="residue", exclusion_block=tuple()).run(**client_InterRDF) + InterRDF(s2, s2, exclude_same="residue", exclusion_block=tuple()).run( + **client_InterRDF + ) @pytest.mark.parametrize("attr", ("rdf", "bins", "edges", "count")) @@ -152,4 +154,3 @@ def test_unknown_norm(sels): s1, s2 = sels with pytest.raises(ValueError, match="invalid norm"): InterRDF(s1, s2, sels, norm="foo") - diff --git a/testsuite/MDAnalysisTests/coordinates/test_xdr.py b/testsuite/MDAnalysisTests/coordinates/test_xdr.py index 2ca76580fcc..c2c7a0b2179 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_xdr.py +++ b/testsuite/MDAnalysisTests/coordinates/test_xdr.py @@ -984,10 +984,12 @@ def test_persistent_offsets_readonly(self, tmpdir, trajectory): shutil.copy(self.filename, str(tmpdir)) filename = str(tmpdir.join(os.path.basename(self.filename))) - print('filename', filename) + print("filename", filename) ref_offset = trajectory._xdr.offsets # Mock filelock acquire to raise an error - with patch.object(FileLock, "acquire", side_effect=PermissionError): # Simulate failure + with patch.object( + FileLock, "acquire", side_effect=PermissionError + ): # Simulate failure with pytest.warns(UserWarning, match="Cannot write lock"): reader = self._reader(filename) saved_offsets = reader._xdr.offsets @@ -1008,12 +1010,10 @@ def test_persistent_offsets_readonly(self, tmpdir, trajectory): @pytest.mark.skipif( sys.platform.startswith("win"), - reason="The lock file only exists when it's locked in windows" + reason="The lock file only exists when it's locked in windows", ) def test_offset_lock_created(self, traj): - assert os.path.exists( - XDR.offsets_filename(traj, ending="lock") - ) + assert os.path.exists(XDR.offsets_filename(traj, ending="lock")) class TestXTCReader_offsets(_GromacsReader_offsets): From 8686aaa4cd7249bed06737f4183fc7566d1b6721 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 22:53:01 +0530 Subject: [PATCH 09/20] Fixes linter --- testsuite/MDAnalysisTests/analysis/test_rdf_s.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index e209ea21028..fd6c8f7edc3 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -58,6 +58,7 @@ def test_nbins(u, sels, client_InterRDF_s): assert len(rdf.results.bins) == 412 + def test_range(u, sels, client_InterRDF_s): rmin, rmax = 1.0, 13.0 rdf = InterRDF_s(u, sels, range=(rmin, rmax)).run(**client_InterRDF_s) @@ -126,6 +127,7 @@ def test_density(u, sels, density, value, client_InterRDF_s): rdf_ref = InterRDF(s1, s2).run(**client_InterRDF_s) assert_almost_equal(rdf_ref.results.rdf, rdf.results.rdf[0][0][0]) + def test_overwrite_norm(u, sels): rdf = InterRDF_s(u, sels, norm="rdf", density=True) assert rdf.norm == "density" @@ -170,4 +172,4 @@ def test_rdf_attr_warning(rdf, attr): rdf.get_cdf() wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): - getattr(rdf, attr) is rdf.results[attr] \ No newline at end of file + getattr(rdf, attr) is rdf.results[attr] From 76c24689edf40f0773f699a5b111d3894dde4fa7 Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Sat, 18 Jan 2025 23:26:03 +0530 Subject: [PATCH 10/20] Tests for parallization --- package/MDAnalysis/analysis/rdf.py | 10 +++++++++ .../MDAnalysisTests/analysis/test_rdf.py | 22 +++++++++++++++++++ .../MDAnalysisTests/analysis/test_rdf_s.py | 22 +++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 1eb9047ad52..a79971d151e 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -216,6 +216,11 @@ class InterRDF(AnalysisBase): Store results as attributes `bins`, `edges`, `rdf` and `count` of the `results` attribute of :class:`~MDAnalysis.analysis.AnalysisBase`. + + .. versionchanged:: 2.9.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. """ @classmethod @@ -584,6 +589,11 @@ class InterRDF_s(AnalysisBase): Instead of `density=True` use `norm='density'` .. deprecated:: 2.3.0 The `universe` parameter is superflous. + + .. versionchanged:: 2.9.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. """ @classmethod diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index 7af7fecce72..ef31e4f4fb7 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -154,3 +154,25 @@ def test_unknown_norm(sels): s1, s2 = sels with pytest.raises(ValueError, match="invalid norm"): InterRDF(s1, s2, sels, norm="foo") + +# tests for parallelization + +@pytest.mark.parametrize( + "classname,is_parallelizable", + [ + (mda.analysis.rdf.InterRDF, True), + ] +) +def test_class_is_parallelizable(classname, is_parallelizable): + assert classname._analysis_algorithm_is_parallelizable == is_parallelizable + + +@pytest.mark.parametrize( + "classname,backends", + [ + (mda.analysis.rdf.InterRDF, + ('serial', 'multiprocessing', 'dask',)), + ] +) +def test_supported_backends(classname, backends): + assert classname.get_supported_backends() == backends diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index fd6c8f7edc3..f7bbc51c058 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -173,3 +173,25 @@ def test_rdf_attr_warning(rdf, attr): wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] + +# tests for parallelization + +@pytest.mark.parametrize( + "classname,is_parallelizable", + [ + (mda.analysis.rdf.InterRDF_s, True), + ] +) +def test_class_is_parallelizable(classname, is_parallelizable): + assert classname._analysis_algorithm_is_parallelizable == is_parallelizable + + +@pytest.mark.parametrize( + "classname,backends", + [ + (mda.analysis.rdf.InterRDF_s, + ('serial', 'multiprocessing', 'dask',)), + ] +) +def test_supported_backends(classname, backends): + assert classname.get_supported_backends() == backends From 91773388e3425d3710d58de5940f2ca3e585c966 Mon Sep 17 00:00:00 2001 From: Tanish Yelgoe <143334319+tanishy7777@users.noreply.github.com> Date: Tue, 21 Jan 2025 02:05:23 +0530 Subject: [PATCH 11/20] Update CHANGELOG --- package/CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 0d3c1607075..7bc60f645f6 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -33,7 +33,7 @@ Enhancements * Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670) * Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824) * Added `precision` for XYZWriter (Issue #4775, PR #4771) - * explicitly mark `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF` as not parallelizable (Issue #4675) + * Parallelize `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF` (Issue #4675) Changes From 4971c503150275e32961216e6ec23469e68fb3c4 Mon Sep 17 00:00:00 2001 From: Tanish Yelgoe <143334319+tanishy7777@users.noreply.github.com> Date: Sat, 22 Feb 2025 17:31:20 +0530 Subject: [PATCH 12/20] Update package/MDAnalysis/analysis/rdf.py Co-authored-by: Egor Marin --- package/MDAnalysis/analysis/rdf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index a79971d151e..be2199f8ad9 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -289,7 +289,6 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization self.results.volume_cum = 0 - self.volume_cum = 0 # Set the max range to filter the search radius self._maxrange = self.rdf_settings["range"][1] From 14cb40e49df9a0248c4012a369997756775d3d1a Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Mon, 24 Mar 2025 02:32:35 +0530 Subject: [PATCH 13/20] refactor custom aggregator for rdf --- package/MDAnalysis/analysis/rdf.py | 63 ++++++++++++++++-------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index a79971d151e..808983ddfa3 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -83,6 +83,39 @@ from .base import AnalysisBase, ResultsGroup +def flatten_sum_alternate(arrs): + r"""Custom aggregator for nested arrays + + This function takes a nested list or tuple of NumPy arrays, flattens it into a single list, + and aggregates the elements at alternating indices into two separate arrays. The first + array accumulates elements at even indices, while the second accumulates elements at odd indices. + + Parameters + ---------- + arrs : list + List of arrays or nested lists of arrays + + Returns + ------- + list of ndarray + A list containing two NumPy arrays: + - The first array is the sum of all elements at even indices in the sum of flattened arrays. + - The second array is the sum of all elements at odd indices in the sum of flattened arrays. + """ + + def flatten(arr): + if isinstance(arr, (list, tuple)): + return [item for sublist in arr for item in flatten(sublist)] + return [arr] + + flat = flatten(arrs) + aggregated_arr = [np.zeros_like(flat[0]), np.zeros_like(flat[1])] + for i in range(len(flat) // 2): + aggregated_arr[0] += flat[2 * i] # 0, 2, 4, ... + aggregated_arr[1] += flat[2 * i + 1] # 1, 3, 5, ... + return aggregated_arr + + class InterRDF(AnalysisBase): r"""Radial distribution function @@ -606,38 +639,10 @@ def get_supported_backends(cls): _analysis_algorithm_is_parallelizable = True - @staticmethod - def func(arrs): - r"""Custom aggregator for nested arrays - - Parameters - ---------- - arrs : list - List of arrays or nested lists of arrays - - Returns - ------- - ndarray - Sums flattened arrays at alternate index - in the list and returns a list of two arrays - """ - - def flatten(arr): - if isinstance(arr, (list, tuple)): - return [item for sublist in arr for item in flatten(sublist)] - return [arr] - - flat = flatten(arrs) - aggregated_arr = [np.zeros_like(flat[0]), np.zeros_like(flat[1])] - for i in range(len(flat) // 2): - aggregated_arr[0] += flat[2 * i] # 0, 2, 4, ... - aggregated_arr[1] += flat[2 * i + 1] # 1, 3, 5, ... - return aggregated_arr - def _get_aggregator(self): return ResultsGroup( lookup={ - "count": self.func, + "count": flatten_sum_alternate, "volume_cum": ResultsGroup.ndarray_sum, "bins": ResultsGroup.ndarray_mean, "edges": ResultsGroup.ndarray_mean, From e9b599c19ddaf79c48931422218179ddd2ee97eb Mon Sep 17 00:00:00 2001 From: tanishy7777 <23110328@iitgn.ac.in> Date: Mon, 24 Mar 2025 02:37:57 +0530 Subject: [PATCH 14/20] remove uneccesary variables --- package/MDAnalysis/analysis/rdf.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 808983ddfa3..1a101d551f5 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -322,7 +322,6 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization self.results.volume_cum = 0 - self.volume_cum = 0 # Set the max range to filter the search radius self._maxrange = self.rdf_settings["range"][1] @@ -352,7 +351,6 @@ def _single_frame(self): if self.norm == "rdf": self.results.volume_cum += self._ts.volume - self.volume_cum += self._ts.volume def _get_aggregator(self): return ResultsGroup( @@ -701,7 +699,6 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization self.results.volume_cum = 0 - self.volume_cum = 0 self._maxrange = self.rdf_settings["range"][1] def _single_frame(self): @@ -719,7 +716,6 @@ def _single_frame(self): if self.norm == "rdf": self.results.volume_cum += self._ts.volume - self.volume_cum += self._ts.volume def _conclude(self): norm = self.n_frames From 5782b25f9943f360285ca7d4d15a6964f53ab1fb Mon Sep 17 00:00:00 2001 From: tanishy7777 Date: Tue, 1 Apr 2025 17:24:04 +0530 Subject: [PATCH 15/20] adds tests for nested_array_sum --- package/MDAnalysis/analysis/rdf.py | 4 +-- .../MDAnalysisTests/analysis/test_rdf_s.py | 26 +++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 1a101d551f5..0604669bf13 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -83,7 +83,7 @@ from .base import AnalysisBase, ResultsGroup -def flatten_sum_alternate(arrs): +def nested_array_sum(arrs): r"""Custom aggregator for nested arrays This function takes a nested list or tuple of NumPy arrays, flattens it into a single list, @@ -640,7 +640,7 @@ def get_supported_backends(cls): def _get_aggregator(self): return ResultsGroup( lookup={ - "count": flatten_sum_alternate, + "count": nested_array_sum, "volume_cum": ResultsGroup.ndarray_sum, "bins": ResultsGroup.ndarray_mean, "edges": ResultsGroup.ndarray_mean, diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index f7bbc51c058..20e37c04ded 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -21,11 +21,12 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # import pytest +import numpy as np from numpy.testing import assert_allclose, assert_almost_equal import MDAnalysis as mda -from MDAnalysis.analysis.rdf import InterRDF_s, InterRDF +from MDAnalysis.analysis.rdf import InterRDF_s, InterRDF, nested_array_sum from MDAnalysisTests.datafiles import GRO_MEMPROT, XTC_MEMPROT @@ -117,7 +118,6 @@ def test_cdf(rdf): def test_density(u, sels, density, value, client_InterRDF_s): kwargs = {"density": density} if density is not None else {} rdf = InterRDF_s(u, sels, **kwargs).run(**client_InterRDF_s) - print(rdf.results.rdf[0][0][0], "RDF") assert_almost_equal(max(rdf.results.rdf[0][0][0]), value) if not density: s1 = u.select_atoms("name ZND and resid 289") @@ -174,6 +174,28 @@ def test_rdf_attr_warning(rdf, attr): with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] +def test_nested_array_sum(): + arr_1 = np.random.rand(1, 2, 75) + arr_2 = np.random.rand(2, 2, 75) + arr_3 = np.random.rand(1, 2, 75) + arr_4 = np.random.rand(2, 2, 75) + arrs = [[arr_1, arr_2], + [arr_3, arr_4]] + + result = nested_array_sum(arrs) + + assert result[0].shape == arrs[0][0].shape + assert result[0].shape == arrs[1][0].shape + assert result[1].shape == arrs[0][1].shape + assert result[1].shape == arrs[1][1].shape + + assert np.array_equal(result[0], arr_1 + arr_3) + assert np.array_equal(result[1], arr_2 + arr_4) + + arrs = [[np.ones((2, 2)), np.ones((2, 2))], + [np.ones((2, 2)), np.ones((2, 2))]] + + # tests for parallelization @pytest.mark.parametrize( From 4c710cfd99d45f2420415fe57740f7a241c05b8b Mon Sep 17 00:00:00 2001 From: tanishy7777 Date: Tue, 1 Apr 2025 17:40:53 +0530 Subject: [PATCH 16/20] fixes formatting --- package/MDAnalysis/analysis/rdf.py | 14 ++++++++------ testsuite/MDAnalysisTests/analysis/test_rdf.py | 1 + testsuite/MDAnalysisTests/analysis/test_rdf_s.py | 1 + 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 0604669bf13..5020a0fe159 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -86,9 +86,10 @@ def nested_array_sum(arrs): r"""Custom aggregator for nested arrays - This function takes a nested list or tuple of NumPy arrays, flattens it into a single list, - and aggregates the elements at alternating indices into two separate arrays. The first - array accumulates elements at even indices, while the second accumulates elements at odd indices. + This function takes a nested list or tuple of NumPy arrays, flattens it + into a single list, and aggregates the elements at alternating indices + into two separate arrays. The first array accumulates elements at even + indices, while the second accumulates elements at odd indices. Parameters ---------- @@ -99,8 +100,10 @@ def nested_array_sum(arrs): ------- list of ndarray A list containing two NumPy arrays: - - The first array is the sum of all elements at even indices in the sum of flattened arrays. - - The second array is the sum of all elements at odd indices in the sum of flattened arrays. + - The first array is the sum of all elements at even indices + in the sum of flattened arrays. + - The second array is the sum of all elements at odd indices + in the sum of flattened arrays. """ def flatten(arr): @@ -620,7 +623,6 @@ class InterRDF_s(AnalysisBase): Instead of `density=True` use `norm='density'` .. deprecated:: 2.3.0 The `universe` parameter is superflous. - .. versionchanged:: 2.9.0 Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` backends; use the new method :meth:`get_supported_backends` to see all diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index ef31e4f4fb7..1738c23cb83 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -157,6 +157,7 @@ def test_unknown_norm(sels): # tests for parallelization + @pytest.mark.parametrize( "classname,is_parallelizable", [ diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 20e37c04ded..1203681f82f 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -174,6 +174,7 @@ def test_rdf_attr_warning(rdf, attr): with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] + def test_nested_array_sum(): arr_1 = np.random.rand(1, 2, 75) arr_2 = np.random.rand(2, 2, 75) From 41a9fadb2ae889868a95876a4af59121332e326e Mon Sep 17 00:00:00 2001 From: tanishy7777 Date: Tue, 1 Apr 2025 17:44:41 +0530 Subject: [PATCH 17/20] fixes formatting --- .../MDAnalysisTests/analysis/test_rdf.py | 15 ++++++++---- .../MDAnalysisTests/analysis/test_rdf_s.py | 24 ++++++++++++------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index 1738c23cb83..88e56b6364e 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -155,6 +155,7 @@ def test_unknown_norm(sels): with pytest.raises(ValueError, match="invalid norm"): InterRDF(s1, s2, sels, norm="foo") + # tests for parallelization @@ -162,7 +163,7 @@ def test_unknown_norm(sels): "classname,is_parallelizable", [ (mda.analysis.rdf.InterRDF, True), - ] + ], ) def test_class_is_parallelizable(classname, is_parallelizable): assert classname._analysis_algorithm_is_parallelizable == is_parallelizable @@ -171,9 +172,15 @@ def test_class_is_parallelizable(classname, is_parallelizable): @pytest.mark.parametrize( "classname,backends", [ - (mda.analysis.rdf.InterRDF, - ('serial', 'multiprocessing', 'dask',)), - ] + ( + mda.analysis.rdf.InterRDF, + ( + "serial", + "multiprocessing", + "dask", + ), + ), + ], ) def test_supported_backends(classname, backends): assert classname.get_supported_backends() == backends diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 1203681f82f..8d6ee027891 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -180,8 +180,7 @@ def test_nested_array_sum(): arr_2 = np.random.rand(2, 2, 75) arr_3 = np.random.rand(1, 2, 75) arr_4 = np.random.rand(2, 2, 75) - arrs = [[arr_1, arr_2], - [arr_3, arr_4]] + arrs = [[arr_1, arr_2], [arr_3, arr_4]] result = nested_array_sum(arrs) @@ -193,17 +192,20 @@ def test_nested_array_sum(): assert np.array_equal(result[0], arr_1 + arr_3) assert np.array_equal(result[1], arr_2 + arr_4) - arrs = [[np.ones((2, 2)), np.ones((2, 2))], - [np.ones((2, 2)), np.ones((2, 2))]] + arrs = [ + [np.ones((2, 2)), np.ones((2, 2))], + [np.ones((2, 2)), np.ones((2, 2))], + ] # tests for parallelization + @pytest.mark.parametrize( "classname,is_parallelizable", [ (mda.analysis.rdf.InterRDF_s, True), - ] + ], ) def test_class_is_parallelizable(classname, is_parallelizable): assert classname._analysis_algorithm_is_parallelizable == is_parallelizable @@ -212,9 +214,15 @@ def test_class_is_parallelizable(classname, is_parallelizable): @pytest.mark.parametrize( "classname,backends", [ - (mda.analysis.rdf.InterRDF_s, - ('serial', 'multiprocessing', 'dask',)), - ] + ( + mda.analysis.rdf.InterRDF_s, + ( + "serial", + "multiprocessing", + "dask", + ), + ), + ], ) def test_supported_backends(classname, backends): assert classname.get_supported_backends() == backends From b3ba91d5f328e7ead2cb93d95f51c86190423d5e Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 8 Oct 2025 16:34:08 +0100 Subject: [PATCH 18/20] Remove unused code from test_nested_array_sum --- testsuite/MDAnalysisTests/analysis/test_rdf_s.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index e1f1149e233..2ddedd6c809 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -199,11 +199,6 @@ def test_nested_array_sum(): assert np.array_equal(result[0], arr_1 + arr_3) assert np.array_equal(result[1], arr_2 + arr_4) - arrs = [ - [np.ones((2, 2)), np.ones((2, 2))], - [np.ones((2, 2)), np.ones((2, 2))], - ] - # tests for parallelization From 08b1795ac64bd46bb364de7197afdcd41c447f2a Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 8 Oct 2025 16:39:12 +0100 Subject: [PATCH 19/20] remove unnecessary tests from test_rdf and test_rdf_s --- .../MDAnalysisTests/analysis/test_rdf.py | 28 ------------------- .../MDAnalysisTests/analysis/test_rdf_s.py | 28 ------------------- 2 files changed, 56 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index 54b42bddbfb..a6a5eae5aa7 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -163,31 +163,3 @@ def test_norm(sels, backend): rdf = InterRDF(s1, s2, norm="none", backend=backend).run() assert_allclose(max(rdf.results.rdf), 4) - -# tests for parallelization - -@pytest.mark.parametrize( - "classname,is_parallelizable", - [ - (mda.analysis.rdf.InterRDF, True), - ], -) -def test_class_is_parallelizable(classname, is_parallelizable): - assert classname._analysis_algorithm_is_parallelizable == is_parallelizable - - -@pytest.mark.parametrize( - "classname,backends", - [ - ( - mda.analysis.rdf.InterRDF, - ( - "serial", - "multiprocessing", - "dask", - ), - ), - ], -) -def test_supported_backends(classname, backends): - assert classname.get_supported_backends() == backends diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 2ddedd6c809..7269757741f 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -199,31 +199,3 @@ def test_nested_array_sum(): assert np.array_equal(result[0], arr_1 + arr_3) assert np.array_equal(result[1], arr_2 + arr_4) - -# tests for parallelization - -@pytest.mark.parametrize( - "classname,is_parallelizable", - [ - (mda.analysis.rdf.InterRDF_s, True), - ], -) -def test_class_is_parallelizable(classname, is_parallelizable): - assert classname._analysis_algorithm_is_parallelizable == is_parallelizable - - -@pytest.mark.parametrize( - "classname,backends", - [ - ( - mda.analysis.rdf.InterRDF_s, - ( - "serial", - "multiprocessing", - "dask", - ), - ), - ], -) -def test_supported_backends(classname, backends): - assert classname.get_supported_backends() == backends From 79601e6469ba689f34e7fbbd45c840a70fa10a5f Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 8 Oct 2025 17:09:16 +0100 Subject: [PATCH 20/20] Make linters happy --- testsuite/MDAnalysisTests/analysis/test_rdf.py | 1 - testsuite/MDAnalysisTests/analysis/test_rdf_s.py | 1 - 2 files changed, 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index a6a5eae5aa7..b438b73b7fe 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -162,4 +162,3 @@ def test_norm(sels, backend): s1, s2 = sels rdf = InterRDF(s1, s2, norm="none", backend=backend).run() assert_allclose(max(rdf.results.rdf), 4) - diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 7269757741f..5095c4d4326 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -198,4 +198,3 @@ def test_nested_array_sum(): assert np.array_equal(result[0], arr_1 + arr_3) assert np.array_equal(result[1], arr_2 + arr_4) -