diff --git a/CHANGELOG.md b/CHANGELOG.md index 856d076c..d692a0cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/) - Process Plugin support for List Data - Adds Exception handling to Processes - PyMask support for multiple masking files. + - PyFit will now filter out events if the Bin value is 0 ### Changed - Removed previous_event from Process Interface - Duplex Pipes are used over Simplex Pipes for Duplex Processes @@ -17,6 +18,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/) - Moved PyPWA.core.shared to PyPWA.libs - Split interface's plugins and internals to their own separate file based on the interfaces purpose. + - PyFit no longer assumes bins are named 'BinN' you must specify Bin names + in 'internal data'. ### Fixed - PyFit will now shutdown correctly when killed with Ctrl-C or other interrupt. diff --git a/PyPWA/progs/shell/__init__.py b/PyPWA/progs/shell/__init__.py index 00de0cbc..f68ac973 100644 --- a/PyPWA/progs/shell/__init__.py +++ b/PyPWA/progs/shell/__init__.py @@ -21,7 +21,7 @@ --------------------------------------- - fit - The package that contains PyFit - simulate - The package that contains PySimulate -- loaders - Module that loads the data and functions for both programs +- loaders - Package that loads the data and functions for both programs - pyshell_functions - Contains the example functions for PyFit and PySimulate - shell types - The static typing information for the expected user's functions. diff --git a/PyPWA/progs/shell/loaders.py b/PyPWA/progs/shell/loaders.py deleted file mode 100644 index 001713b1..00000000 --- a/PyPWA/progs/shell/loaders.py +++ /dev/null @@ -1,295 +0,0 @@ -# coding=utf-8 -# -# PyPWA, a scientific analysis toolkit. -# Copyright (C) 2016 JLab -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -""" -Shared logic between PyFit and PySimulate ------------------------------------------ - -- DataLoading - takes a data parsing object and use it to load data - for the two programs in a way that the data can be easily repacked into - processes. - -- FunctionLoader - used to load the setup and processing functions in a - predictable way. -""" - -import logging -import os -from typing import Optional as Opt -from typing import Dict, List, Union - -import numpy -from numpy import ndarray - -from PyPWA import AUTHOR, VERSION -from PyPWA.libs import plugin_loader -from PyPWA.libs.interfaces import data_loaders -from PyPWA.progs.shell import shell_types - -__credits__ = ["Mark Jones"] -__author__ = AUTHOR -__version__ = VERSION - - -class DataLoading(object): - - __LOGGER = logging.getLogger(__name__ + ".DataLoading") - - def __init__( - self, - parser, # type: data_loaders.ParserPlugin - file_data, # type: str - internal_data=None, # type: Opt[Dict[str, str]] - qfactor=None, # type: Opt[str] - monte_carlo=None # type: Opt[str] - ): - # type: (...) -> None - self._parser = parser - self._data_file = file_data - self._qfactor_file = qfactor - self._monte_carlo_file = monte_carlo - if internal_data: - self.__internal_names = internal_data - else: - self.__internal_names = dict() - self.__data = None # type: ndarray - self.__qfactor = None # type: ndarray - self.__monte_carlo = None # type: ndarray - self.__binned = None # type: ndarray - self.__event_errors = None # type: ndarray - self.__expected_values = None # type: ndarray - self.__load_data() - - def __load_data(self): - self.__parse_data_file() - self.__process_data() - self.__parse_qfactor_file() - self.__parse_monte_carlo_file() - - def __parse_data_file(self): - if self.__is_file(self._data_file): - self.__LOGGER.info("Loading data.") - self.__data = self._parser.parse(self._data_file) - else: - raise ValueError('"' + self._data_file + '"' + " is not a file!") - - def __process_data(self): - if "quality factor" in self.__internal_names: - self.__qfactor = self.__extract_data( - self.__internal_names["quality factor"] - ) - else: - self.__qfactor = self.__extract_data("qfactor") - - if "binned data" in self.__internal_names: - self.__binned = self.__extract_data( - self.__internal_names["binned data"] - ) - else: - self.__binned = self.__extract_data("BinN") - - if "event errors" in self.__internal_names: - self.__event_errors = self.__extract_data( - self.__internal_names["event errors"] - ) - else: - self.__event_errors = numpy.ones(len(self.__data)) - - if "expected values" in self.__internal_names: - self.__expected_values = self.__extract_data( - self.__internal_names["expected values"] - ) - else: - self.__expected_values = numpy.ones(len(self.__data)) - - def __extract_data(self, column): - # type: (str) -> ndarray - names = self.__get_type_names() - if column in names: - self.__LOGGER.info("Extracting '%s' from data." % column) - names.remove(column) - data = self.__data[column] - self.__data = self.__data[names] - else: - data = numpy.ones(len(self.__data)) - - return data - - def __get_type_names(self): - # type: () -> List[str] - try: - return list(self.__data.dtype.names) - except TypeError: - return [] - - def __parse_qfactor_file(self): - if self.__is_file(self._qfactor_file): - self.__LOGGER.info("Loading QFactor data.") - self.__qfactor = self._parser.parse(self._qfactor_file) - elif self.__qfactor is None: - self.__qfactor = numpy.ones(len(self.__data)) - - def __parse_monte_carlo_file(self): - if self.__is_file(self._monte_carlo_file): - self.__LOGGER.info("Loading Monte Carlo Data.") - self.__monte_carlo = self._parser.parse(self._monte_carlo_file) - else: - self.__monte_carlo = None - - @staticmethod - def __is_file(file_location): - # type: (str) -> bool - if isinstance(file_location, str) and os.path.isfile(file_location): - return True - else: - return False - - def write(self, file_location, data): - # type: (str, ndarray) -> None - self._parser.write(file_location, data) - - @property - def data(self): - # type: () -> ndarray - return self.__data - - @property - def qfactor(self): - # type: () -> ndarray - return self.__qfactor - - @property - def monte_carlo(self): - # type: () -> Union[ndarray, None] - return self.__monte_carlo - - @property - def binned(self): - # type: () -> ndarray - return self.__binned - - @property - def event_errors(self): - # type: () -> ndarray - return self.__event_errors - - @property - def expected_values(self): - # type: () -> ndarray - return self.__expected_values - - -class _ProcessFunctionLoader(object): - - __LOGGER = logging.getLogger(__name__ + "._ProcessingLoader") - - def __init__(self, loader, name): - # type: (plugin_loader.PluginLoader, str) -> None - self.__loader = loader - self.__process_name = name - self.__function = None # type: shell_types.users_processing - self.__try_to_load_processing_function() - - def __try_to_load_processing_function(self): - try: - self.__load_processing_function() - except Exception as error: - self.__handle_processing_error(error) - - def __load_processing_function(self): - self.__function = self.__loader.get_by_name(self.__process_name) - - def __handle_processing_error(self, error): - self.__LOGGER.critical("Failed to load %s!" % self.__process_name) - raise error - - @property - def process(self): - # type: () -> shell_types.users_processing - return self.__function - - -class _SetupFunctionLoader(object): - - __LOGGER = logging.getLogger(__name__ + "._SetupFunctionLoader") - - def __init__(self, loader, name): - # type: (plugin_loader.PluginLoader, str) -> None - self.__loader = loader - self.__function = None # type: shell_types.users_setup - self.__process_setup_name(name) - - def __process_setup_name(self, name): - # type: (Opt[str]) -> None - if isinstance(name, str): - self.__try_to_load_setup_function(name) - else: - self.__set_setup_to_empty() - - def __try_to_load_setup_function(self, name): - # type: (str) -> None - try: - self.__load_setup_function(name) - except Exception as error: - self.__handle_setup_error(name, error) - - def __load_setup_function(self, name): - # type: (str) -> None - self.__function = self.__loader.get_by_name(name) - - def __handle_setup_error(self, name, error): - # type: (str, Exception) -> None - self.__LOGGER.critical("%s failed to load!" % name) - self.__LOGGER.exception(error) - self.__set_setup_to_empty() - - def __set_setup_to_empty(self): - self.__LOGGER.info("No setup function found, settings to empty.") - self.__function = self.__empty_function - - @staticmethod - def __empty_function(): - # type: () -> None - pass - - @property - def setup(self): - # type: () -> shell_types.users_setup - return self.__function - - -class FunctionLoader(object): - - __LOGGER = logging.getLogger(__name__ + ".FunctionLoader") - - def __init__(self, location, process_name, setup_name=None): - # type: (str, str, Opt[str]) -> None - loader = plugin_loader.PluginLoader() - loader.add_plugin_location(location) - self.__process_loader = _ProcessFunctionLoader(loader, process_name) - self.__setup_loader = _SetupFunctionLoader(loader, setup_name) - - @property - def process(self): - # type: () -> shell_types.users_processing - return self.__process_loader.process - - @property - def setup(self): - # type: () -> shell_types.users_setup - return self.__setup_loader.setup diff --git a/PyPWA/progs/shell/loaders/__init__.py b/PyPWA/progs/shell/loaders/__init__.py new file mode 100644 index 00000000..155c7ddc --- /dev/null +++ b/PyPWA/progs/shell/loaders/__init__.py @@ -0,0 +1,32 @@ +# coding=utf-8 +# +# PyPWA, a scientific analysis toolkit. +# Copyright (C) 2016 JLab +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +Data and Function Loading for the PyShell +----------------------------------------- +- data_loader - Loads in all data for PyShell +- function_loading - loads in the users functions. +""" + +from PyPWA import AUTHOR, VERSION +from PyPWA.progs.shell.loaders.data_loader.load import DataLoading +from PyPWA.progs.shell.loaders._function_loading import FunctionLoader + +__credits__ = ["Mark Jones"] +__author__ = AUTHOR +__version__ = VERSION diff --git a/PyPWA/progs/shell/loaders/_function_loading.py b/PyPWA/progs/shell/loaders/_function_loading.py new file mode 100644 index 00000000..a217a800 --- /dev/null +++ b/PyPWA/progs/shell/loaders/_function_loading.py @@ -0,0 +1,138 @@ +# coding=utf-8 +# +# PyPWA, a scientific analysis toolkit. +# Copyright (C) 2016 JLab +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +Loads functions for PyShell +--------------------------- +- _ProcessFunctionLoader - Loads the users processing function +- _SetupFunctionLoader - Loads the users setup function +- FunctionLoader - Calls the Process and Setup Function Loaders and exposes + their results. +""" + +import logging +from typing import Optional as Opt + +from PyPWA import AUTHOR, VERSION +from PyPWA.libs import plugin_loader +from PyPWA.progs.shell import shell_types + +__credits__ = ["Mark Jones"] +__author__ = AUTHOR +__version__ = VERSION + + +class _ProcessFunctionLoader(object): + + __LOGGER = logging.getLogger(__name__ + "._ProcessingLoader") + + def __init__(self, loader, name): + # type: (plugin_loader.PluginLoader, str) -> None + self.__loader = loader + self.__process_name = name + self.__function = None # type: shell_types.users_processing + self.__try_to_load_processing_function() + + def __try_to_load_processing_function(self): + try: + self.__load_processing_function() + except Exception as error: + self.__handle_processing_error(error) + + def __load_processing_function(self): + self.__function = self.__loader.get_by_name(self.__process_name) + + def __handle_processing_error(self, error): + self.__LOGGER.critical("Failed to load %s!" % self.__process_name) + raise error + + @property + def process(self): + # type: () -> shell_types.users_processing + return self.__function + + +class _SetupFunctionLoader(object): + + __LOGGER = logging.getLogger(__name__ + "._SetupFunctionLoader") + + def __init__(self, loader, name): + # type: (plugin_loader.PluginLoader, str) -> None + self.__loader = loader + self.__function = None # type: shell_types.users_setup + self.__process_setup_name(name) + + def __process_setup_name(self, name): + # type: (Opt[str]) -> None + if isinstance(name, str): + self.__try_to_load_setup_function(name) + else: + self.__set_setup_to_empty() + + def __try_to_load_setup_function(self, name): + # type: (str) -> None + try: + self.__load_setup_function(name) + except Exception as error: + self.__handle_setup_error(name, error) + + def __load_setup_function(self, name): + # type: (str) -> None + self.__function = self.__loader.get_by_name(name) + + def __handle_setup_error(self, name, error): + # type: (str, Exception) -> None + self.__LOGGER.critical("%s failed to load!" % name) + self.__LOGGER.exception(error) + self.__set_setup_to_empty() + + def __set_setup_to_empty(self): + self.__LOGGER.info("No setup function found, settings to empty.") + self.__function = self.__empty_function + + @staticmethod + def __empty_function(): + # type: () -> None + pass + + @property + def setup(self): + # type: () -> shell_types.users_setup + return self.__function + + +class FunctionLoader(object): + + __LOGGER = logging.getLogger(__name__ + ".FunctionLoader") + + def __init__(self, location, process_name, setup_name=None): + # type: (str, str, Opt[str]) -> None + loader = plugin_loader.PluginLoader() + loader.add_plugin_location(location) + self.__process_loader = _ProcessFunctionLoader(loader, process_name) + self.__setup_loader = _SetupFunctionLoader(loader, setup_name) + + @property + def process(self): + # type: () -> shell_types.users_processing + return self.__process_loader.process + + @property + def setup(self): + # type: () -> shell_types.users_setup + return self.__setup_loader.setup diff --git a/PyPWA/progs/shell/loaders/data_loader/__init__.py b/PyPWA/progs/shell/loaders/data_loader/__init__.py new file mode 100644 index 00000000..3cc0f026 --- /dev/null +++ b/PyPWA/progs/shell/loaders/data_loader/__init__.py @@ -0,0 +1,37 @@ +# coding=utf-8 +# +# PyPWA, a scientific analysis toolkit. +# Copyright (C) 2016 JLab +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +Data Loading for PyShell +------------------------ +This package loads and filters data for the PyShell. + +- _bin_filter - This filters all needed data types to remove events that are + associated with a Bin of zero +- _dataset_storage - A simple data structure object that stores all of the + loaded data for the loader +- _file_handling - This actually parses the files for the Data Loader +- _setup_dataset - This moves around the loaded data to its correct place. +- load - The main entry point for the data_loader. +""" + +from PyPWA import AUTHOR, VERSION + +__credits__ = ["Mark Jones"] +__author__ = AUTHOR +__version__ = VERSION diff --git a/PyPWA/progs/shell/loaders/data_loader/_bin_filter.py b/PyPWA/progs/shell/loaders/data_loader/_bin_filter.py new file mode 100644 index 00000000..f741c183 --- /dev/null +++ b/PyPWA/progs/shell/loaders/data_loader/_bin_filter.py @@ -0,0 +1,110 @@ +# coding=utf-8 +# +# PyPWA, a scientific analysis toolkit. +# Copyright (C) 2016 JLab +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +Logic for filtering out events by impossible bins +------------------------------------------------- + +- _FilterData - Does the actual filtering of the dataset. +- BinFilter - Quickly searches the dataset for bins of zeros and passes the + dataset to the Filter object if it finds any, otherwise just passes the + dataset along. +""" + +import warnings +import numpy + +from PyPWA import AUTHOR, VERSION +from PyPWA.progs.shell.loaders.data_loader import _dataset_storage + +__credits__ = ["Mark Jones"] +__author__ = AUTHOR +__version__ = VERSION + + +class _FilterDataset(object): + + __DATASET_FILTER_NAMES = [ + 'data', 'binned', 'qfactor', 'expected_values', 'event_errors' + ] + + def __init__(self): + self.__initial_dataset = None # type: _dataset_storage.DataStorage + self.__zero_indexes = None # type: numpy.ndarray + self.__new_dataset = None # type: _dataset_storage.DataStorage + + def filter(self, dataset): + # type: (_dataset_storage.DataStorage) -> _dataset_storage.DataStorage + self.__initial_dataset = dataset + self.__get_zero_indexes() + self.__create_new_dataset() + return self.__new_dataset + + def __get_zero_indexes(self): + locations = numpy.where(self.__initial_dataset.binned == 0.) + self.__zero_indexes = numpy.ravel(locations) + + def __create_new_dataset(self): + self.__new_dataset = _dataset_storage.DataStorage() + self.__iterate_over_dataset_names() + self.__pass_extra_values() + + def __iterate_over_dataset_names(self): + for dataset_name in self.__DATASET_FILTER_NAMES: + self.__filter_dataset_name(dataset_name) + + def __filter_dataset_name(self, dataset_name): + # type: (str) -> None + extracted_array = getattr(self.__initial_dataset, dataset_name) + filtered_array = numpy.delete(extracted_array, self.__zero_indexes) + setattr(self.__new_dataset, dataset_name, filtered_array) + + def __pass_extra_values(self): + self.__new_dataset.single_array = self.__initial_dataset.single_array + self.__new_dataset.monte_carlo = self.__initial_dataset.monte_carlo + + +class BinFilter(object): + + def __init__(self): + self.__filter_utility = _FilterDataset() + + def __call__(self, dataset): + # type: (_dataset_storage.DataStorage) -> _dataset_storage.DataStorage + if self.__bins_have_zeros(dataset): + self.__warn_user_about_dataset(dataset) + return self.__filter_data_set(dataset) + else: + return dataset + + def __bins_have_zeros(self, dataset): + # type: (_dataset_storage.DataStorage) -> bool + return numpy.any(dataset.binned == 0.) + + @staticmethod + def __warn_user_about_dataset(dataset): + # type: (_dataset_storage.DataStorage) -> None + number_of_zero_bins = len(dataset.binned[dataset.binned == 0.]) + warnings.warn( + "Removing %d event(s) from dataset where bin value is zero." % + number_of_zero_bins + ) + + def __filter_data_set(self, dataset): + # type: (_dataset_storage.DataStorage) -> _dataset_storage.DataStorage + return self.__filter_utility.filter(dataset) diff --git a/PyPWA/progs/shell/loaders/data_loader/_dataset_storage.py b/PyPWA/progs/shell/loaders/data_loader/_dataset_storage.py new file mode 100644 index 00000000..e060a674 --- /dev/null +++ b/PyPWA/progs/shell/loaders/data_loader/_dataset_storage.py @@ -0,0 +1,38 @@ +# coding=utf-8 +# +# PyPWA, a scientific analysis toolkit. +# Copyright (C) 2016 JLab +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +A simple dataset to store the loaded data types. +""" + +from PyPWA import AUTHOR, VERSION + +__credits__ = ["Mark Jones"] +__author__ = AUTHOR +__version__ = VERSION + + +class DataStorage(object): + + monte_carlo = None + single_array = None + data = None + qfactor = None + binned = None + event_errors = None + expected_values = None diff --git a/PyPWA/progs/shell/loaders/data_loader/_file_handling.py b/PyPWA/progs/shell/loaders/data_loader/_file_handling.py new file mode 100644 index 00000000..38ef06ba --- /dev/null +++ b/PyPWA/progs/shell/loaders/data_loader/_file_handling.py @@ -0,0 +1,106 @@ +# coding=utf-8 +# +# PyPWA, a scientific analysis toolkit. +# Copyright (C) 2016 JLab +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +Loads files for the Data Loader +------------------------------- +- _FileLoader - Actually parses the files if a filename is provided. +- DataHandler - Takes all loaded data and exposes it through its properties. +""" + +import logging +from typing import Optional as Opt + +import numpy + +from PyPWA import AUTHOR, VERSION +from PyPWA.libs.interfaces import data_loaders + +__credits__ = ["Mark Jones"] +__author__ = AUTHOR +__version__ = VERSION + + +class _FileLoader(object): + + __LOGGER = logging.getLogger(__name__ +"._FileLoader") + + def __init__(self, data_parser): + # type: (data_loaders.ParserPlugin) -> None + self.__data_parser = data_parser + + def load_file(self, file): + # type: (Opt[str]) -> Opt[numpy.ndarray] + if file: + return self.__try_to_load_file(file) + + def __try_to_load_file(self, file): + # type: (str) -> Opt[numpy.ndarray] + try: + return self.__data_parser.parse(file) + except Exception as error: + self.__LOGGER.exception(error) + + +class DataHandler(object): + + __LOGGER = logging.getLogger(__name__ + "._DataFileLoader") + + def __init__( + self, + data_parser, # type: data_loaders.ParserPlugin + data, # type: Opt[str] + monte_carlo, # type: Opt[str] + qfactor # type: Opt[str] + ): + # type: (...) -> None + self.__data_parser = data_parser + self.__file_loader = _FileLoader(data_parser) + self.__data = self.__file_loader.load_file(data) + self.__monte_carlo = self.__file_loader.load_file(monte_carlo) + self.__qfactor = self.__file_loader.load_file(qfactor) + + def write(self, file, array): + # type: (str, numpy.ndarray) -> None + self.__data_parser.write(file, array) + + @property + def data(self): + # type: () -> Opt[numpy.ndarray] + if self.__data_is_columned(): + return self.__data + + def __data_is_columned(self): + # type: () -> bool + return bool(self.__data.dtype.names) + + @property + def monte_carlo(self): + # type: () -> Opt[numpy.ndarray] + return self.__monte_carlo + + @property + def qfactor(self): + # type: () -> Opt[numpy.ndarray] + return self.__qfactor + + @property + def single_array(self): + # type: () -> Opt[numpy.ndarray] + if not self.__data_is_columned(): + return self.__data diff --git a/PyPWA/progs/shell/loaders/data_loader/_setup_dataset.py b/PyPWA/progs/shell/loaders/data_loader/_setup_dataset.py new file mode 100644 index 00000000..a80f07a4 --- /dev/null +++ b/PyPWA/progs/shell/loaders/data_loader/_setup_dataset.py @@ -0,0 +1,182 @@ +# coding=utf-8 +# +# PyPWA, a scientific analysis toolkit. +# Copyright (C) 2016 JLab +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +Moves the data in the dataset to its correct positions. +------------------------------------------------------- +- _InternalDataExtractor - Takes the data and extracts columns from it, + all while remove those columns from the original data. +- _QFactorSetup - Sets up the QFactor data using either the source data + file or a separate data file. +- LoadData - Main entry point, extracts and loads all the data into the + dateset object and returns that object. +""" + +import logging +from typing import Dict +from typing import Optional as Opt + +import numpy + +from PyPWA import AUTHOR, VERSION +from PyPWA.libs.interfaces import data_loaders +from PyPWA.progs.shell.loaders.data_loader import _dataset_storage +from PyPWA.progs.shell.loaders.data_loader import _file_handling + +__credits__ = ["Mark Jones"] +__author__ = AUTHOR +__version__ = VERSION + + +class _InternalDataExtractor(object): + + __LOGGER = logging.getLogger(__name__ + "._ColumnExtractor") + + def __init__(self, data_loader, internal_names): + # type: (_file_handling.DataHandler, Dict[str, str]) -> None + self.__source_array = data_loader.data + self.__internal_names = internal_names + if self.__array_exists(): + self.__column_names = list(data_loader.data.dtype.names) + else: + self.__column_names = list() + + def __array_exists(self): + # type: () -> bool + return isinstance(self.__source_array, numpy.ndarray) + + def extract(self, name): + # type: (str) -> numpy.ndarray + if name in self.__internal_names and self.__array_exists(): + return self.__process_column_name(self.__internal_names[name]) + else: + return self.__empty_array() + + def __process_column_name(self, column): + # type: (str) -> numpy.ndarray + if self.__column_in_array(column): + return self.__extract_column_from_array(column) + else: + return self.__empty_array() + + def __column_in_array(self, column): + # type: (str) -> bool + return column in self.__column_names + + def __extract_column_from_array(self, column): + # type: (str) -> numpy.ndarray + extracted_data = self.__extract_array_from_source_array(column) + self.__trim_extracted_column_from_source_array() + return extracted_data + + def __extract_array_from_source_array(self, column): + # type: (str) -> numpy.ndarray + self.__column_names.remove(column) + return self.__source_array[column] + + def __trim_extracted_column_from_source_array(self): + self.__source_array = self.__source_array[self.__column_names] + + def __empty_array(self): + # type: () -> numpy.ndarray + empty_array = numpy.ones(len(self.__source_array)) + return empty_array + + @property + def trimmed_array(self): + # type: () -> numpy.ndarray + return self.__source_array + + +class _QFactorSetup(object): + + def __init__(self, data_loader, extraction): + # type: (_file_handling.DataHandler, _InternalDataExtractor) -> None + self.__data_loader = data_loader + self.__extractor = extraction + + def load_data(self): + # type: () -> numpy.ndarray + if isinstance(self.__data_loader.qfactor, numpy.ndarray): + return self.__data_loader.qfactor + else: + return self.__extracted_data() + + def __extracted_data(self): + # type: () -> numpy.ndarray + return self.__extractor.extract("quality factor") + + +class LoadData(object): + + __LOGGER = logging.getLogger(__name__ + ".DataLoading") + + def __init__( + self, + parser, # type: data_loaders.ParserPlugin + data, # type: str + internal_data, # type: Dict[str, str] + qfactor=None, # type: Opt[str] + monte_carlo=None # type: Opt[str] + ): + # type: (...) -> None + self.__data_handler = _file_handling.DataHandler( + parser, data, monte_carlo, qfactor + ) + self.__extractor = _InternalDataExtractor( + self.__data_handler, internal_data + ) + self.__qfactor_setup = _QFactorSetup( + self.__data_handler, self.__extractor + ) + self.__storage = _dataset_storage.DataStorage() + + def load(self): + # type: () -> _dataset_storage.DataStorage + if isinstance(self.__data_handler.data, numpy.ndarray): + self.__process_columns() + self.__process_data() + return self.__storage + + def __process_columns(self): + self.__get_binned() + self.__get_qfactor() + self.__get_event_errors() + self.__get_expected_values() + + def __get_binned(self): + self.__storage.binned = self.__extractor.extract("binned data") + + def __get_qfactor(self): + self.__storage.qfactor = self.__qfactor_setup.load_data() + + def __get_event_errors(self): + self.__storage.event_errors = self.__extractor.extract("event errors") + + def __get_expected_values(self): + self.__storage.expected_values = self.__extractor.extract( + "expected values" + ) + + def __process_data(self): + self.__storage.data = self.__extractor.trimmed_array + self.__storage.monte_carlo = self.__data_handler.monte_carlo + self.__storage.single_array = self.__data_handler.single_array + + def write(self, file, array): + self.__data_handler.write(file, array) diff --git a/PyPWA/progs/shell/loaders/data_loader/load.py b/PyPWA/progs/shell/loaders/data_loader/load.py new file mode 100644 index 00000000..a2522be7 --- /dev/null +++ b/PyPWA/progs/shell/loaders/data_loader/load.py @@ -0,0 +1,108 @@ +# coding=utf-8 +# +# PyPWA, a scientific analysis toolkit. +# Copyright (C) 2016 JLab +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +Data Loading, the main place to load data for the Shell +------------------------------------------------------- +Loads all the data from setup_dataset and filters it with _bin_filter, +then exposes that through it's properties. +""" + +import logging +from typing import Dict, Union +from typing import Optional as Opt + +import numpy + +from PyPWA import AUTHOR, VERSION +from PyPWA.libs.interfaces import data_loaders +from PyPWA.progs.shell.loaders.data_loader import _bin_filter +from PyPWA.progs.shell.loaders.data_loader import _setup_dataset +from PyPWA.progs.shell.loaders.data_loader import _dataset_storage + +__credits__ = ["Mark Jones"] +__author__ = AUTHOR +__version__ = VERSION + + +class DataLoading(object): + + __LOGGER = logging.getLogger(__name__ + ".DataLoading") + + def __init__( + self, + parser, # type: data_loaders.ParserPlugin + data, # type: str + internal_data=None, # type: Opt[Dict[str, str]] + qfactor=None, # type: Opt[str] + monte_carlo=None # type: Opt[str] + ): + # type: (...) -> None + if not internal_data: + internal_data = {} + + self.__storage = None # type: _dataset_storage.DataStorage + + self.__loader = _setup_dataset.LoadData( + parser, data, internal_data, qfactor, monte_carlo + ) + self.__filter = _bin_filter.BinFilter() + self.__load_data() + + def __load_data(self): + storage = self.__loader.load() + self.__storage = self.__filter(storage) + + def write(self, file_location, data): + # type: (str, numpy.ndarray) -> None + self.__loader.write(file_location, data) + + @property + def data(self): + # type: () -> numpy.ndarray + return self.__storage.data + + @property + def qfactor(self): + # type: () -> numpy.ndarray + return self.__storage.qfactor + + @property + def monte_carlo(self): + # type: () -> Union[numpy.ndarray, None] + return self.__storage.monte_carlo + + @property + def binned(self): + # type: () -> numpy.ndarray + return self.__storage.binned + + @property + def event_errors(self): + # type: () -> numpy.ndarray + return self.__storage.event_errors + + @property + def expected_values(self): + # type: () -> numpy.ndarray + return self.__storage.expected_values + + @property + def single_array(self): + # type: () -> numpy.ndarray + return self.__storage.single_array diff --git a/PyPWA/progs/shell/simulate/pysimulate.py b/PyPWA/progs/shell/simulate/pysimulate.py index e69b55e4..efcc5ce4 100644 --- a/PyPWA/progs/shell/simulate/pysimulate.py +++ b/PyPWA/progs/shell/simulate/pysimulate.py @@ -18,7 +18,7 @@ """ The Simulation program, this object simply routes the data around depending -on the type of program execution passed to it, the actual logic for the +on the type of program execution passed to it, the actual logic for the program exists in _libs.py """ @@ -110,7 +110,7 @@ def __intensity_program(self): self.__write_intensity_data() def __rejection_program(self): - self.__intensity_array = self.__data_loader.data + self.__intensity_array = self.__data_loader.single_array self.__setup_rejection_calc() self.__rejection_calc.rejection_method() self.__write_rejection_data() diff --git a/tests/data/shell/data/internal_names.csv b/tests/data/shell/data/internal_names.csv index 398a729c..a45b907a 100644 --- a/tests/data/shell/data/internal_names.csv +++ b/tests/data/shell/data/internal_names.csv @@ -1,6 +1,6 @@ x,y,qf,bn,exp,err 0.22103004237267732,0.19963131489364483,0.1931923516437426,0.9138648807444445,0.74691968932464348,0.034934761040754325 -0.48643269532046229,0.02354315487698766,0.76136937293783036,0.58808363992955881,0.92977055626776883,0.19704036050381257 +0.48643269532046229,0.02354315487698766,0.76136937293783036,0.,0.92977055626776883,0.19704036050381257 0.82899457747428951,0.72303121933782333,0.096044095978928601,0.10498435784555171,0.86245668108739393,0.32963709491544158 0.4717218456158726,0.75158736586676,0.13927900997841769,0.14072262275954006,0.41567564307425919,0.78418342753457071 0.32245112927429009,0.781271184992266,0.14433401005211699,0.70645993497548398,0.35783843756383882,0.6637430475944136 diff --git a/tests/progs/shell/test_loaders.py b/tests/progs/shell/loaders/test_data_loader.py similarity index 59% rename from tests/progs/shell/test_loaders.py rename to tests/progs/shell/loaders/test_data_loader.py index 2a0516b8..b2502d39 100644 --- a/tests/progs/shell/test_loaders.py +++ b/tests/progs/shell/loaders/test_data_loader.py @@ -6,12 +6,28 @@ from PyPWA.builtin_plugins.data import memory from PyPWA.progs.shell import loaders + +""" +Setup Data Locations +""" + +# Single File data DATA = os.path.join( - os.path.dirname(__file__), "../../data/shell/data/data.csv" + os.path.dirname(__file__), "../../../data/shell/data/data.csv" +) + +QFACTOR = os.path.join( + os.path.dirname(__file__), "../../../data/shell/data/qfactor.txt" +) + +MONTE_CARLO = os.path.join( + os.path.dirname(__file__), "../../../data/shell/data/monte_carlo.csv" ) +# Data with embedded internal names + INTERNAL_NAMES = os.path.join( - os.path.dirname(__file__), "../../data/shell/data/internal_names.csv" + os.path.dirname(__file__), "../../../data/shell/data/internal_names.csv" ) INTERNAL_NAMES_DICT = { @@ -21,21 +37,13 @@ "expected values": "exp" } -QFACTOR = os.path.join( - os.path.dirname(__file__), "../../data/shell/data/qfactor.txt" -) - -MONTE_CARLO = os.path.join( - os.path.dirname(__file__), "../../data/shell/data/monte_carlo.csv" -) - +# Parser PARSER = memory.Memory(False, True) -FUNCTIONS_FOR_TEST = os.path.join( - os.path.dirname(__file__), - "../../data/source_files/functions_without_math.py" -) +""" +Tests with QFactor file +""" @pytest.fixture def data_with_qfactor(): @@ -45,21 +53,20 @@ def data_with_qfactor(): return loader -@pytest.fixture -def data_without_qfactor(): - loader = loaders.DataLoading(PARSER, DATA, monte_carlo=MONTE_CARLO) - return loader - +def test_qfactor_sum_file(data_with_qfactor): + assert numpy.sum(data_with_qfactor.qfactor) == 10.794689011836818 -@pytest.fixture -def data_without_extra(): - loader = loaders.DataLoading(PARSER, MONTE_CARLO) - return loader +""" +Tests without QFactor file +""" @pytest.fixture -def data_with_internal_names(): - loader = loaders.DataLoading(PARSER, INTERNAL_NAMES, INTERNAL_NAMES_DICT) +def data_without_qfactor(): + loader = loaders.DataLoading( + PARSER, DATA, monte_carlo=MONTE_CARLO, + internal_data={'quality factor': 'qfactor'} + ) return loader @@ -67,8 +74,18 @@ def test_qfactor_sum_embedded(data_without_qfactor): assert numpy.sum(data_without_qfactor.qfactor) == 9.1540205293413841 -def test_qfactor_sum_file(data_with_qfactor): - assert numpy.sum(data_with_qfactor.qfactor) == 10.794689011836818 +def test_extras_not_in_data(data_without_qfactor): + assert ["qfactor, BinN"] not in data_without_qfactor.data.dtype.names + + +""" +Test with Simple file. +""" + +@pytest.fixture +def data_without_extra(): + loader = loaders.DataLoading(PARSER, MONTE_CARLO) + return loader def test_qfactor_sum_ones(data_without_extra): @@ -80,59 +97,50 @@ def test_monte_carlo_empty(data_without_extra): assert not data_without_extra.monte_carlo -def test_extras_not_in_data(data_without_qfactor): - assert ["qfactor, BinN"] not in data_without_qfactor.data.dtype.names - - def test_qfactor_size_is_correct(data_without_extra): multiplier = data_without_extra.qfactor * data_without_extra.data['x'] for index, value in enumerate(multiplier): assert value == data_without_extra.data['x'][index] +""" +Test with all internal names. +""" + +@pytest.fixture +def data_with_internal_names(): + with pytest.warns(UserWarning): + loader = loaders.DataLoading( + PARSER, INTERNAL_NAMES, INTERNAL_NAMES_DICT + ) + return loader + + def test_qfactor_sum_with_internal_names(data_with_internal_names): - assert numpy.sum(data_with_internal_names.qfactor) == 4.3227260998520247 + assert numpy.sum(data_with_internal_names.qfactor) == 3.5613567269141941 def test_binned_sum_with_internal_names(data_with_internal_names): - assert numpy.sum(data_with_internal_names.binned) == 4.7429487930395018 + assert numpy.sum(data_with_internal_names.binned) == 4.1548651531099434 def test_error_sum_with_internal_names(data_with_internal_names): numpy.testing.assert_approx_equal( numpy.sum(data_with_internal_names.event_errors), - 4.2857024214064667 + 4.088662060902655 ) def test_expected_sum_with_internal_names(data_with_internal_names): assert numpy.sum(data_with_internal_names.expected_values) == \ - 5.673049557244684 - - -@pytest.fixture -def function_without_math(): - loader = loaders.FunctionLoader( - FUNCTIONS_FOR_TEST, "processing", "setup" - ) - return loader - - -@pytest.fixture -def function_without_math_or_setup(): - loader = loaders.FunctionLoader( - FUNCTIONS_FOR_TEST, "processing", "A dirty lie" - ) - return loader - - -def test_function_and_setup_return_true(function_without_math): - processing = function_without_math.process - setup = function_without_math.setup - assert processing(1, 1) - assert setup() + 4.7432790009769166 -def test_function_without_setup_is_none(function_without_math_or_setup): - setup = function_without_math_or_setup.setup - assert isinstance(setup(), type(None)) +def test_length_of_all_data_match_with_internal(data_with_internal_names): + data_len = len(data_with_internal_names.data) + qfactor_len = len(data_with_internal_names.data) + binned_len = len(data_with_internal_names.data) + measurement_len = len(data_with_internal_names.data) + errors_len = len(data_with_internal_names.data) + data = [data_len, qfactor_len, binned_len, measurement_len, errors_len] + assert len(set(data)) == 1 diff --git a/tests/progs/shell/loaders/test_function_loader.py b/tests/progs/shell/loaders/test_function_loader.py new file mode 100644 index 00000000..fd4bae20 --- /dev/null +++ b/tests/progs/shell/loaders/test_function_loader.py @@ -0,0 +1,50 @@ +import os + +import pytest + +from PyPWA.progs.shell import loaders + +""" +Test Data +""" + +FUNCTIONS_FOR_TEST = os.path.join( + os.path.dirname(__file__), + "../../../data/source_files/functions_without_math.py" +) + + +""" +Test simple function +""" + +@pytest.fixture +def function_without_math(): + loader = loaders.FunctionLoader( + FUNCTIONS_FOR_TEST, "processing", "setup" + ) + return loader + + +def test_function_and_setup_return_true(function_without_math): + processing = function_without_math.process + setup = function_without_math.setup + assert processing(1, 1) + assert setup() + + +""" +Test with simpler function +""" + +@pytest.fixture +def function_without_math_or_setup(): + loader = loaders.FunctionLoader( + FUNCTIONS_FOR_TEST, "processing", "A dirty lie" + ) + return loader + + +def test_function_without_setup_is_none(function_without_math_or_setup): + setup = function_without_math_or_setup.setup + assert isinstance(setup(), type(None))