Use new function for saving results

MDAnalysis · May 16, 2021 · c5718a9 · c5718a9
1 parent ab4c3c6
commit c5718a9
Showing 1 changed file with 168 additions and 11 deletions.
diff --git a/src/mdacli/cli.py b/src/mdacli/cli.py
@@ -13,14 +13,18 @@
 this functionality.
 """
 import argparse
+from collections import defaultdict
 import importlib
 import inspect
+import json
+import os
 import re
-import pickle
 import sys
 import warnings
-from collections import defaultdict
 
+import zipfile
+
+import numpy as np
 import MDAnalysis as mda
 from MDAnalysis.analysis import __all__
 from MDAnalysis.analysis.base import AnalysisBase
@@ -395,6 +399,24 @@ def create_CLI(cli_parser, interface_name, parameters):
         help="step or time step for evaluation. (default: %(default)s)"
         )
 
+    common_group.add_argument(
+        "-pre",
+        dest="output_prefix",
+        type=str,
+        default="",
+        help="Additional prefix for all output files. Files will be "
+             " automatically named by the used module (default: %(default)s)"
+        )
+
+    common_group.add_argument(
+        "-o",
+        dest="output_directory",
+        type=str,
+        default=".",
+        help="Directory in which the output files produced will be stored."
+             "(default: %(default)s)"
+        )
+
     common_group.add_argument(
         "-v",
         dest="verbose",
@@ -469,6 +491,142 @@ def create_CLI(cli_parser, interface_name, parameters):
                 )
     return
 
+def stack_1d_arrays_list(list_1D, extra_list=None):
+    """Stacks a list of 1D numpy arrays of the same length vertically together. 
+       The result is a list containing 2D arrays where each array got the same 
+       number of rows.
+       
+       Parameters
+       ----------
+       list_1d : list
+           list of 1 dimensional numpy arrays
+        
+       extra_list : list
+           additional list of numpy arrays on which the 
+           operations are executed as for `list_1d``
+           
+       Returns
+       -------
+       out_list : list
+           list of stacked 2D numpy arrays organized by their length
+       out_extra : list
+           list of stacked 2D numpy applied applied to the same operations
+           as out_list
+       """
+
+    # Sort for lengths
+    lengths = np.array([len(a) for a in list_1D])
+    sorted_idx = np.argsort(lengths)
+
+    # Sort lists according to the lengths of the items
+    list_1D_sorted = [list_1D[i] for i in sorted_idx]
+
+    # Count the number of items for each length
+    counts = np.unique(lengths, return_counts=True)[1]
+    new_length_idx = np.hstack([[0], np.cumsum(counts)])
+
+    out_lists = []
+    # Concentanate lists of the same lenngth
+    for i in range(0, len(new_length_idx) - 1):
+        out_lists.append(np.vstack(list_1D_sorted[new_length_idx[i]:new_length_idx[i+1]]))
+
+
+    if extra_list is not None:
+        extra_list_sorted = [extra_list[i] for i in sorted_idx]
+        out_extra = []
+        for i in range(0, len(new_length_idx) - 1):
+            out_extra.append(np.vstack(extra_list_sorted[new_length_idx[i]:new_length_idx[i+1]]))
+
+        return out_lists, out_extra
+    else:
+        return out_lists
+
+
+def save_results(fprefix, results):
+    """Save the attributes of a results instance to disk.
+
+       1D, 2D and 3D numpy arrays are saved to csv files. All 1D arrays 
+       of the same lengths are veertically stacked. For 3D arrays 
+       a csv file is created for the dimension with the lowest number of 
+       indices. Higher dimensional arrays are ignored.
+       
+       Everything else is tried to saved inside a json file. Types which 
+       can not be saved into json are ignored.
+    
+       Parameters
+       ----------
+       fprefix : str
+            prefix for all files saved
+       results : `MDAnalysis.analysis.base.Results`
+            A Results instance from which the stored data is taken.
+       """
+
+    list_1D = []
+    list_1D_labels = []
+    json_dict = {}
+
+    for key, item in results.items():
+        if isinstance(item, Results):
+            # Run `save_results` recursively if 
+            # `item` is results instancee
+            save_results(f"{fprefix}_{key}", item)
+        elif isinstance(item, np.ndarray):
+            # Remove extra dimensions
+            item = np.squeeze(item)
+            n_dims = len(item.shape)
+
+            if n_dims == 1:
+                list_1D.append(item)
+                list_1D_labels.append(key)
+            elif n_dims == 2:
+                np.savetxt(fname=f"{fprefix}_{key}.csv",
+                           X=item,
+                           delimiter=',')
+            elif n_dims == 3:
+                min_dim = np.argmin(item)
+                files_to_zip = []
+                # Split array along the dimension with smallest number of entries
+                for i, arr in enumerate(np.split(item, item.shape[min_dim], axis=min_dim)):
+                    files_to_zip.append(f"{key}_dim_{min_dim}_idx_{i}.csv")
+                    np.savetxt(fname=files_to_zip[i],
+                               X=np.squeeze(arr),
+                               delimiter=',')
+
+                # Compress all csv files into a single zip archive
+                with zipfile.ZipFile(f'{key}.zip', 'w') as zipF:
+                    for file in files_to_zip:
+                        zipF.write(file, compress_type=zipfile.ZIP_DEFLATED)
+                        os.remove(file) 
+
+            else:
+                warnings.warn("Saving numpy arrays with more than "
+                              "three dimensions is currently not supported.")
+        elif isinstance(item, (bool, int, float, list, tuple, dict)) or \
+             item is None:
+            #This can be encoded in a json file
+            json_dict[key] = item
+
+        else:
+            warnings.warn(f"Saving {key} of type {type(item)}"
+                          "is currently not supported.")
+
+    # Stack 1D arrays and save teheem to csv
+    if len(list_1D) > 0:
+        out_lists, out_lables = stack_1d_arrays_list(list_1D, list_1D_labels)
+
+        for out_list, out_label in zip(out_lists, out_lables):
+            out_label = np.squeeze(out_label).tolist()
+
+            # [3:] to align lables with entries
+            np.savetxt(fname=f"{fprefix}_{'_'.join(out_label)}.csv",
+                       X=out_list.T,
+                       header=''.join([f"{i:>25}" for i in out_label])[3:]
+                          )
+
+    # Save everything which is left to a json file
+    with open(f'{fprefix}.json', 'w') as f:
+        json.dump(json_dict, f)
+
 
 def analyze_data(
         # top and trajs need to be positional parameters in all CLIs
@@ -523,23 +681,21 @@ def analyze_data(
                              "".format(startframe, stopframe, step, u.trajectory.n_frames))  # noqa: E501
 
     # Collect paramaters not necessary for initilizing ac object.
-    verbose = analysis_kwargs.pop("verbose")
     analysis_kwargs.pop("func")
+    verbose = analysis_kwargs.pop("verbose")
+    output_directory = analysis_kwargs.pop("output_directory")
+    output_prefix = analysis_kwargs.pop("output_prefix")
+    output_prefix += "_" if len(output_prefix) > 0 else ""
 
     ac = analysis_callable(**analysis_kwargs)
     ac.run(start=startframe,
            stop=stopframe,
            step=step,
            verbose=verbose)
 
-    try:
-        ac.save_results()
-    except AttributeError:
-        fname = analysis_callable.__name__ + ".pickle"
-        warnings.warn("No specific saving function."
-                      "Pickling results into `{}`.".format(fname))
-        with open(fname, "wb") as f:
-            pickle.dump(ac, f)
+    save_results(os.path.join(output_directory,
+                              f"{output_prefix}{type(ac).__name__}"),
+                 ac.results)
 
 
 def maincli(ap):
@@ -578,6 +734,7 @@ def setup_clients():
     # adds each Analysis class/function as a CLI under 'cli_parser'
     # to be writen
     for interface_name, parameters in analysis_interfaces.items():
+        print(interface_name)
         create_CLI(cli_parser, interface_name, parameters)
 
     return ap