# core

> Defines central classes of findmycells: `ProcessingStrategy`, `ProcessingObject`, `DataLoader`, and `DataReader`

In [None]:
#| default_exp core

In [None]:
#| export

from abc import ABC, abstractmethod
from findmycells.database import Database
from findmycells.configs import DefaultConfigs, GUIConfigs
from typing import List, Dict, Tuple, Optional, Any
from types import ModuleType
from pathlib import Path, PosixPath
import inspect

In [None]:
#| hide
from nbdev.showdoc import *

# Handling data processing

The following two classes, `ProcessingObject` and `ProcessingStrategy`, provide the blueprints for all processing strategies and objects that are used throughout the *findmycells* package. As you can see in the corresponding processing step modules (i.e. "preprocess", "segment", or "quantify"), these abstract base classes provide the basic structure of the more specific objects and strategies in each of these modules (e.g. `QuantificationObject` and `QuantificationStrategy` within the "quantify" module inherit from `ProcessingObject` and `ProcessingStrategy`, respectively). While this makes these two classes highly relevant for any developer, regular users of *findmycells* won´t be interacting with them, even if they want to use the API instead of the GUI.

In [None]:
#| export

class ProcessingObject(ABC):
    
    """
    Abstract base class (inherits from ABC) that defines the general structure of `ProcessingObjects` in findmycells.
    A `ProcessingObject` combines all information needed for the corresponding processing step, 
    i.e. what files are supposed to be processed & how. It also interfaces to the database of the
    project, such that it can automatically update the database with the latest progress.
    """

    @property
    @abstractmethod
    def processing_type(self
                       ) -> str: # string that defines the processing type (e.g. "preprocessing" or "quantification")
        """
        Abstract method that requires its subclasses to define the `processing_type`
        as a property of the class. Thus, this will be specified in each individual 
        processing module (e.g. the "preprocess" or "quantify" modules). It will be used
        in the database to keep track of the processing progress of the project.
        Has to be a string.
        """
        pass

    
    @property
    @abstractmethod
    def default_configs(self) -> DefaultConfigs:
        """
        Abstract method that requires its subclasses to define the `default_configs`
        as a property of the class. Thus, this will specify all configuration options
        that come with each subclass, while simultaneously also providing default values
        for each option and, moreover, defining what types of values are allowed for each
        option. Check out the implementation of `DefaultConfigs` in the configs module, or
        have a look at how this is implemented in one of the processing sub-modules, for 
        instance in the "specs.py" file in the preprocessing sub-module.
        """
        pass    

    
    @abstractmethod
    def _add_processing_specific_infos_to_updates(self, 
                                                 updates: Dict # A dictionary with updates that need to be passed to the database
                                                ) -> Dict: # A dictionary with all updates that need to be passed to the database
        """
        Abstract method that that requires its subclasses to define what updates need to be
        passed to the database, in addition to those that are already covered by the corresponding
        ProcessingStrategies or the "self.update_database()" method. If there are no more 
        information to add, simply return the input 'updates' dictionary without any alterations.
        
        Returns a dictionary with all updates that need to be passed to the database.
        """
        return updates
    
    
    @abstractmethod
    def _processing_specific_preparations(self) -> None:
        pass
    
    
    def prepare_for_processing(self,
                               file_ids: List[str], # A list with the file_ids of all files that need to be processed
                               database: Database, # The database of the findmycells project
                              ) -> None:
        self.file_ids = file_ids
        self.database = database
        self._processing_specific_preparations()
    
    
    def run_all_strategies(self, strategies: List, strategy_configs: List[Dict]) -> None:
        """
        Runs all ProcessingStrategies that were passed upon initialization (i.e. self.strategies).
        For this, the corresponding ProcessingStrategy objects will be initialized and their ".run()"
        method will be called, while passing "self" as "processing_object". Finally, it updates the
        database and deletes the ProcessingStrategy object to clear it from memory.
        """
        for strategy, configs in zip(strategies, strategy_configs):
            processing_strategy = strategy()
            self = processing_strategy.run(processing_object = self, strategy_configs = configs)
            self = processing_strategy.update_tracking_histories(processing_object = self, strategy_configs = configs)
            del processing_strategy


    def update_database(self, mark_as_completed: bool=True) -> None:
        """
        For each microscopy file that had to be processed (self.file_ids), the database
        will be updated with the respective processing progress information. Interfaces
        back to the abstract method "self.add_processing_specific_infos_to_updates()" that
        enables the corresponding subclasses to add more specific details before triggering
        the update method of the database.
        """
        for file_id in self.file_ids:
            updates = {}
            if mark_as_completed == True:
                self.database.file_histories[file_id].mark_processing_step_as_completed(processing_step_id = self.processing_type)
            updates = self._add_processing_specific_infos_to_updates(updates = updates)
            self.database.update_file_infos(file_id = file_id, updates = updates)

Subclasses that inherit from `ProcessingObject` need to implement the following two abstract methods:

In [None]:
show_doc(ProcessingObject.processing_type)

---

[source](https://github.com/Defense-Circuits-Lab/findmycells/blob/main/findmycells/core.py#L27){target="_blank" style="float:right; font-size:smaller"}

### ProcessingObject.processing_type

>      ProcessingObject.processing_type ()

Abstract method that requires its subclasses to define the `processing_type`
as a property of the class. Thus, this will be specified in each individual 
processing module (e.g. the "preprocess" or "quantify" modules). It will be used
in the database to keep track of the processing progress of the project.
Has to be a string.

In [None]:
show_doc(ProcessingObject._add_processing_specific_infos_to_updates)

---

[source](https://github.com/Defense-Circuits-Lab/findmycells/blob/main/findmycells/core.py#L55){target="_blank" style="float:right; font-size:smaller"}

### ProcessingObject._add_processing_specific_infos_to_updates

>      ProcessingObject._add_processing_specific_infos_to_updates (updates:Dict)

Abstract method that that requires its subclasses to define what updates need to be
passed to the database, in addition to those that are already covered by the corresponding
ProcessingStrategies or the "self.update_database()" method. If there are no more 
information to add, simply return the input 'updates' dictionary without any alterations.

Returns a dictionary with all updates that need to be passed to the database.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| updates | typing.Dict | A dictionary with updates that need to be passed to the database |
| **Returns** | **typing.Dict** | **A dictionary with all updates that need to be passed to the database** |

In addition, `ProcessingObject` defines two core functions that will be called on all its subclasses, which are:

In [None]:
show_doc(ProcessingObject.run_all_strategies)

---

[source](https://github.com/Defense-Circuits-Lab/findmycells/blob/main/findmycells/core.py#L83){target="_blank" style="float:right; font-size:smaller"}

### ProcessingObject.run_all_strategies

>      ProcessingObject.run_all_strategies (strategies:List,
>                                           strategy_configs:List[Dict])

Runs all ProcessingStrategies that were passed upon initialization (i.e. self.strategies).
For this, the corresponding ProcessingStrategy objects will be initialized and their ".run()"
method will be called, while passing "self" as "processing_object". Finally, it updates the
database and deletes the ProcessingStrategy object to clear it from memory.

In [None]:
show_doc(ProcessingObject.update_database)

---

[source](https://github.com/Defense-Circuits-Lab/findmycells/blob/main/findmycells/core.py#L97){target="_blank" style="float:right; font-size:smaller"}

### ProcessingObject.update_database

>      ProcessingObject.update_database (mark_as_completed:bool=True)

For each microscopy file that had to be processed (self.file_ids), the database
will be updated with the respective processing progress information. Interfaces
back to the abstract method "self.add_processing_specific_infos_to_updates()" that
enables the corresponding subclasses to add more specific details before triggering
the update method of the database.

In [None]:
#| export

class ProcessingStrategy(ABC):
    
    """
    Abstract base class that defines the general structure of `ProcessingStrategies` in findmycells.
    A `ProcessingStrategy` combines all functions that are required for one particular processing step, 
    e.g. `ConvertTo8Bit` is a `ProcessingStrategy` in the "preprocess" module and converts the corresponding
    images into 8-bit.
    """

    @property
    @abstractmethod
    def processing_type(self):
        # has to be any of these: 'preprocessing', 'segmentation', 'postprocessing', 'quantification', 'inspection'
        pass
    
    
    @property
    @abstractmethod
    def default_configs(self) -> DefaultConfigs:
        """
        Abstract method that requires its subclasses to define the `default_configs`
        as a property of the class. Thus, this will specify all configuration options
        that come with each subclass, while simultaneously also providing default values
        for each option and, moreover, defining what types of values are allowed for each
        option. Check out the implementation of `DefaultConfigs` in the configs module, or
        have a look at how this is implemented in one of the processing sub-modules, for 
        instance in the "specs.py" file in the preprocessing sub-module.
        """
        pass
    
    
    @property
    @abstractmethod
    def widget_names(self) -> Dict[str, str]:
        pass
    
    
    @property
    @abstractmethod
    def descriptions(self) -> Dict[str, str]:
        pass
    
    
    @property
    @abstractmethod
    def tooltips(self) -> Optional[Dict[str, str]]:
        return None
    
    
    @property
    @abstractmethod
    def dropdown_option_value_for_gui(self) -> str:
        pass


    @abstractmethod
    def run(self, processing_object: ProcessingObject, strategy_configs: Dict) -> ProcessingObject:
        # process the processing_object
        return processing_object

    
    @abstractmethod
    def _add_strategy_specific_infos_to_updates(self, updates: Dict) -> Dict:
        # add all ProcessingStrategy specifc information to the update dictionary
        # or simply return updates right away if there are no information to add
        return updates


    @property
    def strategy_name(self):
        return self.__class__.__name__ 

    
    def initialize_gui_configs_and_widget(self) -> None:
        gui_configs = GUIConfigs(widget_names = self.widget_names,
                                 descriptions = self.descriptions,
                                 tooltips = self.tooltips)
        gui_configs.construct_widget(strategy_description = self.__doc__,
                                     default_configs = self.default_configs)
        setattr(self, 'gui_configs', gui_configs)
        self.widget = self.gui_configs.strategy_widget
    
    
    def export_current_gui_config_values(self) -> Dict:
        return self.gui_configs.export_current_config_values()
    
    
    def update_tracking_histories(self, processing_object: ProcessingObject, strategy_configs: Dict) -> ProcessingObject:
        for file_id in processing_object.file_ids:
            strategy_configs_with_updates = self._add_strategy_specific_infos_to_updates(updates = strategy_configs)
            tracking_history = processing_object.database.file_histories[file_id]
            tracking_history.track_processing_strat(processing_step_id = self.processing_type,
                                                    processing_strategy_name = self.strategy_name,
                                                    strategy_configs = strategy_configs_with_updates)
        return processing_object

# Handling data reading

Furthermore, the following two classes `DataLoader` and `DataReader` will be re-used throughout the *findmycells* package to load data into your *findmycells* project.

In [None]:
#| export

class DataReader(ABC):
    
    """
    Abstract base class that defines the general structure of DataReader subclasses.
    Essentially, it demands the corresponding subclasses to define the "readable_filetype_extensions" 
    attribut, as well as the "set_optional_configs()" and the "read()" methods.
    """
    
    @property
    @abstractmethod
    def readable_filetype_extensions(self) -> List[str]:
        """
        Property that will denote which filetype extensions the respective DataReader subclass can handle.
        """
        pass
    
    
    @property
    @abstractmethod
    def default_configs(self) -> DefaultConfigs:
        pass
    
    
    @abstractmethod
    def read(self, filepath: Path, reader_configs: Dict) -> Any:
        """
        This method eventually reads the data stored at the given filepath applying the specified configs.
        The returned datatype will be different for each DataReader subclass, e.g. a numpy array of a specific
        shape for MicroscopyImageReaders, or a shapely Polygon for ROIReaders.
        """
        pass
    
    
    @abstractmethod
    def assert_correct_output_format(self, output: Any) -> None:
        """
        Run an assert to validate that the data was actually read in the correct way and that the created output
        matches the intended format!
        """
        pass

In [None]:
#| export

class DataLoader:
    
    def determine_reader(self, file_extension: str, data_reader_module: ModuleType) -> DataReader:
        available_reader = None
        for name, data_reader in inspect.getmembers(data_reader_module):
            if (name.endswith('Reader') == True) & (name != 'DataReader'):
                if file_extension in data_reader().readable_filetype_extensions:
                    available_reader = data_reader
        if available_reader == None:
            raise NotImplementedError(f'Unfortunately, there is no DataReader implemented in {data_reader_module} '
                                      f'which can handle your filetype ("{file_extension}").')
        return available_reader
    
    
    def load(self, data_reader_class: DataReader, filepath: PosixPath, reader_configs: Dict) -> Any:
        data_reader = data_reader_class()
        # data_reader.set_optional_configs(database = database)
        data = data_reader.read(filepath = filepath, reader_configs = reader_configs)
        data_reader.assert_correct_output_format(output = data)
        return data                 

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()