Skip to content

Commit

Permalink
Use new function for saving results
Browse files Browse the repository at this point in the history
  • Loading branch information
PicoCentauri committed May 16, 2021
1 parent ab4c3c6 commit c5718a9
Showing 1 changed file with 168 additions and 11 deletions.
179 changes: 168 additions & 11 deletions src/mdacli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,18 @@
this functionality.
"""
import argparse
from collections import defaultdict
import importlib
import inspect
import json
import os
import re
import pickle
import sys
import warnings
from collections import defaultdict

import zipfile

import numpy as np
import MDAnalysis as mda
from MDAnalysis.analysis import __all__
from MDAnalysis.analysis.base import AnalysisBase
Expand Down Expand Up @@ -395,6 +399,24 @@ def create_CLI(cli_parser, interface_name, parameters):
help="step or time step for evaluation. (default: %(default)s)"
)

common_group.add_argument(
"-pre",
dest="output_prefix",
type=str,
default="",
help="Additional prefix for all output files. Files will be "
" automatically named by the used module (default: %(default)s)"
)

common_group.add_argument(
"-o",
dest="output_directory",
type=str,
default=".",
help="Directory in which the output files produced will be stored."
"(default: %(default)s)"
)

common_group.add_argument(
"-v",
dest="verbose",
Expand Down Expand Up @@ -469,6 +491,142 @@ def create_CLI(cli_parser, interface_name, parameters):
)
return

def stack_1d_arrays_list(list_1D, extra_list=None):
"""Stacks a list of 1D numpy arrays of the same length vertically together.
The result is a list containing 2D arrays where each array got the same
number of rows.
Parameters
----------
list_1d : list
list of 1 dimensional numpy arrays
extra_list : list
additional list of numpy arrays on which the
operations are executed as for `list_1d``
Returns
-------
out_list : list
list of stacked 2D numpy arrays organized by their length
out_extra : list
list of stacked 2D numpy applied applied to the same operations
as out_list
"""

# Sort for lengths
lengths = np.array([len(a) for a in list_1D])
sorted_idx = np.argsort(lengths)

# Sort lists according to the lengths of the items
list_1D_sorted = [list_1D[i] for i in sorted_idx]

# Count the number of items for each length
counts = np.unique(lengths, return_counts=True)[1]
new_length_idx = np.hstack([[0], np.cumsum(counts)])

out_lists = []
# Concentanate lists of the same lenngth
for i in range(0, len(new_length_idx) - 1):
out_lists.append(np.vstack(list_1D_sorted[new_length_idx[i]:new_length_idx[i+1]]))


if extra_list is not None:
extra_list_sorted = [extra_list[i] for i in sorted_idx]
out_extra = []
for i in range(0, len(new_length_idx) - 1):
out_extra.append(np.vstack(extra_list_sorted[new_length_idx[i]:new_length_idx[i+1]]))

return out_lists, out_extra
else:
return out_lists


def save_results(fprefix, results):
"""Save the attributes of a results instance to disk.
1D, 2D and 3D numpy arrays are saved to csv files. All 1D arrays
of the same lengths are veertically stacked. For 3D arrays
a csv file is created for the dimension with the lowest number of
indices. Higher dimensional arrays are ignored.
Everything else is tried to saved inside a json file. Types which
can not be saved into json are ignored.
Parameters
----------
fprefix : str
prefix for all files saved
results : `MDAnalysis.analysis.base.Results`
A Results instance from which the stored data is taken.
"""

list_1D = []
list_1D_labels = []
json_dict = {}

for key, item in results.items():
if isinstance(item, Results):
# Run `save_results` recursively if
# `item` is results instancee
save_results(f"{fprefix}_{key}", item)
elif isinstance(item, np.ndarray):
# Remove extra dimensions
item = np.squeeze(item)
n_dims = len(item.shape)

if n_dims == 1:
list_1D.append(item)
list_1D_labels.append(key)
elif n_dims == 2:
np.savetxt(fname=f"{fprefix}_{key}.csv",
X=item,
delimiter=',')
elif n_dims == 3:
min_dim = np.argmin(item)
files_to_zip = []
# Split array along the dimension with smallest number of entries
for i, arr in enumerate(np.split(item, item.shape[min_dim], axis=min_dim)):
files_to_zip.append(f"{key}_dim_{min_dim}_idx_{i}.csv")
np.savetxt(fname=files_to_zip[i],
X=np.squeeze(arr),
delimiter=',')

# Compress all csv files into a single zip archive
with zipfile.ZipFile(f'{key}.zip', 'w') as zipF:
for file in files_to_zip:
zipF.write(file, compress_type=zipfile.ZIP_DEFLATED)
os.remove(file)

else:
warnings.warn("Saving numpy arrays with more than "
"three dimensions is currently not supported.")
elif isinstance(item, (bool, int, float, list, tuple, dict)) or \
item is None:
#This can be encoded in a json file
json_dict[key] = item

else:
warnings.warn(f"Saving {key} of type {type(item)}"
"is currently not supported.")

# Stack 1D arrays and save teheem to csv
if len(list_1D) > 0:
out_lists, out_lables = stack_1d_arrays_list(list_1D, list_1D_labels)

for out_list, out_label in zip(out_lists, out_lables):
out_label = np.squeeze(out_label).tolist()

# [3:] to align lables with entries
np.savetxt(fname=f"{fprefix}_{'_'.join(out_label)}.csv",
X=out_list.T,
header=''.join([f"{i:>25}" for i in out_label])[3:]
)

# Save everything which is left to a json file
with open(f'{fprefix}.json', 'w') as f:
json.dump(json_dict, f)


def analyze_data(
# top and trajs need to be positional parameters in all CLIs
Expand Down Expand Up @@ -523,23 +681,21 @@ def analyze_data(
"".format(startframe, stopframe, step, u.trajectory.n_frames)) # noqa: E501

# Collect paramaters not necessary for initilizing ac object.
verbose = analysis_kwargs.pop("verbose")
analysis_kwargs.pop("func")
verbose = analysis_kwargs.pop("verbose")
output_directory = analysis_kwargs.pop("output_directory")
output_prefix = analysis_kwargs.pop("output_prefix")
output_prefix += "_" if len(output_prefix) > 0 else ""

ac = analysis_callable(**analysis_kwargs)
ac.run(start=startframe,
stop=stopframe,
step=step,
verbose=verbose)

try:
ac.save_results()
except AttributeError:
fname = analysis_callable.__name__ + ".pickle"
warnings.warn("No specific saving function."
"Pickling results into `{}`.".format(fname))
with open(fname, "wb") as f:
pickle.dump(ac, f)
save_results(os.path.join(output_directory,
f"{output_prefix}{type(ac).__name__}"),
ac.results)


def maincli(ap):
Expand Down Expand Up @@ -578,6 +734,7 @@ def setup_clients():
# adds each Analysis class/function as a CLI under 'cli_parser'
# to be writen
for interface_name, parameters in analysis_interfaces.items():
print(interface_name)
create_CLI(cli_parser, interface_name, parameters)

return ap
Expand Down

0 comments on commit c5718a9

Please sign in to comment.