In [None]:
from pathlib import Path, PosixPath
from typing import Optional, Dict, Any, List
import pandas as pd
import datetime


In [None]:
def list_dir_no_hidden(path: Path, only_dirs: Optional[bool]=False, only_files: Optional[bool]=False) -> List:
    if only_dirs == True:
        detected_paths = [elem for elem in path.iterdir() if (elem.is_dir() == True) & (elem.name.startswith('.') == False)]
    elif only_files == True:
        detected_paths = [elem for elem in path.iterdir() if (elem.is_dir() == False) & (elem.name.startswith('.') == False)]
    else:
        detected_paths = [elem for elem in path.iterdir() if elem.name.startswith('.') == False]
    return detected_paths

In [None]:
class ProjectConfigs:
    
    def __init__(self, root_dir: Path, project_configs_filepath: Optional[Path]=None) -> None:
        assert type(root_dir) == PosixPath, '"root_dir" must be pathlib.Path referring to an existing directory.'
        assert root_dir.is_dir(), '"root_dir" must be pathlib.Path referring to an existing directory.'
        self.root_dir = root_dir
        if type(project_configs_filepath) == PosixPath:
            self.attempt_loading_from_configs_filepath(project_configs_filepath = project_configs_filepath)
            
    
    def _load_available_processing_modules(self) -> None:
        """
        load all processing modules (and their specs submodules)
        that are available in the installed fmc version as attr to this object, 
        to make it easier to access them and the default values.
        """
        pass
            
            
    def add_processing_step_configs(self, processing_step_id: str, configs: Dict[str, Any]) -> None:
        """
        1. Checks whether 'processing_step_id' is valid - i.e. if it matches a module name of findmycells
            - seems the easiest approach that would also keep it flexible for extensions
            - alternatively, a module could be passed here, such that defaults can be loaded from there right away 
              (which should also be possible from the str?)
        2. Set passed configs (confirm that it´s a dictionary and has only strings as keys) as attribute to this object
            - loading of default values needed here? From "specs" submodule in each processing module?
        """
        self._assert_valid_processing_step_id(processing_step_id)
        configs = self._assert_valid_configs_input_and_fill_with_defaults_where_needed(processing_step_id, configs)
        setattr(self, processing_step_id, configs)
        # maybe we could add a warning here, if there were already some configs existing and they have been replaced now
        
        
        
    def _assert_valid_processing_step_id(self, processing_step_id: str) -> None:
        """
        Asserts whether 'processing_step_id' is valid, i.e. whether it matches an
        existing processing module in findmycells that contains a "specs.py" file.
        
        Needs to be implemented once basic refactored structure of fmc was established & exported via nbdev
        """
        pass
    
    
    def _assert_valid_configs_input_and_fill_with_defaults_where_needed(self, processing_step_id: str, configs: Dict[str, Any]) -> None:
        """
        Asserts that all keys of the configs dictionary are:
        - of type string
        - are keys defined by the respective specs
        - their value types match the valid value types
        
        If keys are missing, will load the corresponding defaults as defined in the specs
        """
        pass
        
        
    
    def load_from_disk(self, project_configs_filepath: Path) -> None:
        """
        Reconstitute configs object from this file (which is probably a pickle dictionary)
        """
    
    
    def save_to_disk(self, filename: Optional[str]=None, out_dir: Optional[Path]=None) -> None:
        """
        Saves current object as pickle file to disk. If filename & out_dir are provided, 
        and of correct types, will save it there, otherwise defaults are something like
        "findmycells_project_configs_" + date and time in project root dir.
        """

In [None]:
class ProjectDatabase:
    
    def __init__(self, project_configs: ProjectConfigs) -> None:
        self.project_configs = project_configs
        self._initialize_project_in_root_dir()
        self._create_file_infos_as_attr()
        self._create_file_histories_as_attr()
        
        
    def _initialize_project_in_root_dir(self) -> None:
        self._initialize_all_top_level_subdirectories()
        self._initialize_segmentation_tool_subdirectories()
        self._initialize_microscopy_images_subdirectory_tree()
    
          
    def _initialize_all_top_level_subdirectories(self) -> None:
        self._find_or_create_subdir(target_name = 'microscopy_images', keywords = ['microscopy', 'Microscopy'])
        self._find_or_create_subdir(target_name = 'rois_to_analyze', keywords = ['rois', 'ROIs', 'ROIS', 'Rois'])
        self._find_or_create_subdir(target_name = 'preprocessed_images', keywords = ['preprocessed', 'Preprocessed', 'pre-processed'])
        self._find_or_create_subdir(target_name = 'segmentation_tool', keywords = ['tool', 'Tool'])
        self._find_or_create_subdir(target_name = 'semantic_segmentations', keywords = ['semantic', 'Semantic'])
        self._find_or_create_subdir(target_name = 'instance_segmentations', keywords = ['instance', 'Instance'])
        self._find_or_create_subdir(target_name = 'postprocessed_images', keywords = ['postprocessed', 'Postprocessed', 'post-processed'])
        self._find_or_create_subdir(target_name = 'quantified_segmentations', keywords = ['quantified', 'Quantified', 'quantification', 'Quantification'])
        self._find_or_create_subdir(target_name = 'results', keywords = ['results', 'Results'])
        self._find_or_create_subdir(target_name = 'inspection', keywords = ['inspect', 'Inspect'])
        
        
    def _find_or_create_subdir(self, target_name: str, keywords: List[str], parent_dir: Optional[Path]=None) -> None:
        if parent_dir == None:
            parent_dir = self.project_configs.root_dir
        subdir_found = False
        for path in parent_dir.iterdir():
            if path.is_dir():
                for key in keywords:
                    if key in path.name:
                        subdir_found = True
                        subdir_path = path
                        break
        if subdir_found == False:
            subdir_path = parent_dir.joinpath(target_name)
            subdir_path.mkdir()
        print(subdir_path)
        print(subdir_found)
        setattr(self, f'{target_name}_dir', subdir_path.name)
                                               
    
    def _initialize_segmentation_tool_subdirectories(self) -> None:    
        self._find_or_create_subdir(target_name = 'trained_models',
                                    keywords = ['models'],
                                    parent_dir = self.project_configs.root_dir.joinpath(self.segmentation_tool_dir))
        self._find_or_create_subdir(target_name = 'segmentation_tool_temp_dir',
                                    keywords = ['tmp', 'temp'],
                                    parent_dir = self.project_configs.root_dir.joinpath(self.segmentation_tool_dir))
        
        
    def _initialize_microscopy_images_subdirectory_tree(self) -> None:
        if len(list_dir_no_hidden(path = self.project_configs.root_dir.joinpath(self.microscopy_images_dir), only_dirs = True)) > 0:
            self._assert_valid_microscopy_image_subdir_tree_structure()
        else:
            self._create_representative_microscopy_image_subdir_tree()
            
            
    def _assert_valid_microscopy_image_subdir_tree_structure(self) -> None:
        microscopy_images_dir_path = self.project_configs.root_dir.joinpath(self.microscopy_images_dir)
        for main_group_id_subdir_path in list_dir_no_hidden(path = microscopy_images_dir_path, only_dirs = True):
            tmp_subgroup_subdir_paths = list_dir_no_hidden(path = main_group_id_subdir_path, only_dirs = True)
            assert len(tmp_subgroup_subdir_paths) > 0, f'Invalid microscopy images subdir structure! Expected at least one subdirectory in {main_group_id_subdir_path}.'
            for subgroup_id_subdir_path in tmp_subgroup_subdir_paths:
                tmp_subject_subdir_paths = list_dir_no_hidden(path = subgroup_id_subdir_path, only_dirs = True)
                assert len(tmp_subject_subdir_paths) > 0, f'Invalid microscopy images subdir structure! Expected at least one subdirectory in {subgroup_id_subdir_path}.'
                for subject_id_subdir_path in tmp_subject_subdir_paths:
                    tmp_hemisphere_subdir_paths = list_dir_no_hidden(path = subject_id_subdir_path, only_dirs = True)
                    assert len(tmp_subject_subdir_paths) > 0, f'Invalid microscopy images subdir structure! Expected at least one subdirectory in {subject_id_subdir_path}.'
                    for hemisphere_id_subdir_path in tmp_hemisphere_subdir_paths:
                        valid_hemisphere_id = hemisphere_id_subdir_path.name in ['ipsilateral', 'ipsi', 'Ipsilateral', 'Ipsi',
                                                                                  'contralateral', 'contra', 'Contralateral', 'Contra',
                                                                                  'any', 'Any', 'undefiened', 'Undefiened', 'unidentified', 'Unidentified']
                        assert valid_hemisphere_id == True, f'"{hemisphere_id_subdir_path.name}" ({hemisphere_id_subdir_path}) is not a valid hemisphere id!'
                        #any_file_present = len(list_dir_no_hidden(path = hemisphere_id_subdir_path, only_files = True)) > 0
                        #assert any_file_present == True, f'Invalid microscopy images subdir structure! Expected at least one file in {hemisphere_id_subdir_path}.'
                           
                            
    def _create_representative_microscopy_image_subdir_tree(self) -> None:
        for representative_main_group_id in ['wildtype', 'transgenic']:
            for representative_subgroup_id in ['week_1', 'week_4']:
                if representative_main_group_id == 'wildtype':
                    subject_ids = ['mouse_1', 'mouse_2', 'mouse_3']
                else:
                    subject_ids = ['mouse_4', 'mouse_5', 'mouse_6']
                for representative_subject_id in subject_ids:
                    for representative_hemisphere_id in ['contralateral', 'ipsilateral']:
                        self._make_subdir_tree(main_group_id = representative_main_group_id,
                                               subgroup_id = representative_subgroup_id,
                                               subject_id = representative_subject_id,
                                               hemisphere_id = representative_hemisphere_id)
                            
                            
    def _make_subdir_tree(self, main_group_id: str, subgroup_id: str, subject_id: str, hemisphere_id: str) -> None:
        microscopy_images_dir = self.project_configs.root_dir.joinpath(self.microscopy_images_dir)
        microscopy_images_dir.joinpath(main_group_id).mkdir(exist_ok = True)
        microscopy_images_dir.joinpath(main_group_id, subgroup_id).mkdir(exist_ok = True)
        microscopy_images_dir.joinpath(main_group_id, subgroup_id, subject_id).mkdir(exist_ok = True)
        microscopy_images_dir.joinpath(main_group_id, subgroup_id, subject_id, hemisphere_id).mkdir(exist_ok = True)
                                               
                                               

    def _create_file_infos_as_attr(self) -> None:
        file_infos = {'file_id': [],
                      'original_filename': [],
                      'main_group_id': [],
                      'subgroup_id': [],
                      'subject_id': [],
                      'hemisphere_id': [],
                      'microscopy_filepath': [],
                      'microscopy_filetype': [],
                      'rois_present': [],
                      'rois_filepath': [],
                      'rois_filetype': [],
                      'datetime_added': [],
                      'datetime_removed': []}
        setattr(self, 'file_infos', file_infos)
        
        
        
    def _create_file_histories_as_attr(self) -> None:
        setattr(self, 'file_histories', {})
        
        
    def compute_file_infos(self, skip_checking: bool=False) -> None:
        self._add_new_files_to_database(skip_checking = skip_checking)
        self._identify_removed_files() # ToDo: not implemented yet
        
        
    def _add_new_files_to_database(self, skip_checking: bool) -> None:
        microscopy_images_dir_path = self.project_configs.root_dir.joinpath(self.microscopy_images_dir)
        for main_group_id_subdir_path in list_dir_no_hidden(path = microscopy_images_dir_path, only_dirs = True):
            for subgroup_id_subdir_path in list_dir_no_hidden(path = rois_to_analyze_dir_path, only_dirs = True):
                for subject_id_subdir_path in list_dir_no_hidden(path = subgroup_id_subdir_path, only_dirs = True):
                    for hemisphere_id_subdir_path in list_dir_no_hidden(path = subject_id_subdir_path, only_dirs = True):
                        for filepath in list_dir_no_hidden(path = hemisphere_id_subdir_path, only_files = True):
                            if skip_checking == True:
                                new_file_found = True
                            else:
                                new_file_found = self._is_this_a_new_file(filepath = filepath)
                            if new_file_found == True:
                                file_id = self._get_next_available_file_id()
                                self._append_details_to_file_infos(file_id = file_id, filepath = filepath)
                                self._add_new_file_history_tracker(file_id = file_id, source_image_filepath = filepath)
        
    
    def _is_this_a_new_file(self, filepath: Path) -> bool:
        hemisphere_subdir_path = filepath.parent
        subject_subdir_path = hemisphere_subdir_path.parent
        subgroup_subdir_path = subject_subdir_path.parent
        main_group_id = subgroup_subdir_path.parent.name
        original_filename = filepath.name[:filepath.name.find('.')]
        file_infos_as_df = pd.DataFrame(data = self.file_infos)
        matching_entries = file_infos_as_df.loc[(file_infos_as_df['main_group_id'] == main_group_id) &
                                                (file_infos_as_df['subgroup_id'] == subgroup_subdir_path.name) &
                                                (file_infos_as_df['subject_id'] == subject_subdir_path.name) &
                                                (file_infos_as_df['hemisphere_id'] == hemisphere_subdir_path.name) &
                                                (file_infos_as_df['original_filename'] == original_filename)].shape[0]
        if matching_entries == 0:
            is_new_file = True
        elif matching_entries == 1:
            is_new_file = False
        else:
            raise ValueError(f'Found multiple entries in file_infos for {filepath}.')
        return is_new_file
        
        
    def _get_next_available_file_id(self) -> int:
        if len(self.file_infos['file_id']) > 0:
            file_id = max([int(file_id_str) for file_id_str in self.file_infos['file_id']]) + 1
        else:
            file_id = 0
        return file_id
                                               
    
    
    def _append_details_to_file_infos(self, file_id: int, filepath: Path) -> None:
        hemisphere_subdir_path = filepath.parent
        subject_subdir_path = hemisphere_subdir_path.parent
        subgroup_subdir_path = subject_subdir_path.parent
        main_group_subdir_path = subgroup_subdir_path.parent
        self.file_infos['file_id'].append(str(file_id).zfill(4))
        original_filename = filepath.name[:filepath.name.find('.')]
        self.file_infos['original_filename'].append(original_filename)
        self.file_infos['main_group_id'].append(main_group_subdir_path.name)
        self.file_infos['subgroup_id'].append(subgroup_subdir_path.name)
        self.file_infos['subject_id'].append(subject_subdir_path.name)
        self.file_infos['hemisphere_id'].append(hemisphere_subdir_path.name)
        self.file_infos['microscopy_filepath'].append(filepath)
        self.file_infos['microscopy_filetype'].append(filepath.name[filepath.find('.'):])
        corresponding_dir_in_rois_to_analyze_dir = self.project_configs.root_dir.joinpath(self.rois_to_analyze_dir,
                                                                                          main_group_subdir_path.name,
                                                                                          subgroup_subdir_path.name,
                                                                                          subject_subdir_path.name,
                                                                                          hemisphere_subdir_path.name)
        matching_roi_filepaths = []
        for roi_filepath in list_dir_no_hidden(path = corresponding_dir_in_rois_to_analyze_dir, only_files = True):
            if roi_filepath.name[:roi_filepath.name.find('.')] == original_filename:
                matching_roi_filepaths.append(matching_roi_filepaths)
        if len(matching_roi_filepaths) == 0:
            self.file_infos['rois_present'].append(False)
            self.file_infos['rois_filepath'].append('not_available')
            self.file_infos['rois_filetype'].append('not_available')
        elif len(matching_roi_filepaths) == 1:
            self.file_infos['rois_present'].append(True)
            self.file_infos['rois_filepath'].append(matching_roi_filepaths[0])
            self.file_infos['rois_filetype'].append(matching_roi_filepaths[0].name[matching_roi_filepaths[0].name.find('.'):])
        else:
            raise ValueError('It seems like you provided more than a single ROI file in '
                             f'{corresponding_dir_in_rois_to_analyze_dir} that matches the microscopy '
                             f'image filename: {original_filename}. If you want to quantify image features '
                             'within multiple ROIs per image, please use RoiSets created with ImageJ as '
                             'described here: [Documentation link not provided yet - please raise an issue on '
                             'https://github.com/Defense-Circuits-Lab/findmycells - thank you!')

        
    def _add_new_file_history_tracker(self, file_id: int, source_image_filepath: Path) -> None:
        self.file_histories[file_id] = FileHistory(file_id = file_id, source_image_filepath = source_image_filepath)
                                               
        
        
    def _identify_removed_files(self) -> None:
        pass

    
    def get_file_infos(self, identifier: str) -> Dict:
        """
        supports use of either original_filename, file_id, or microscopy_filepath as input parameter identifier 
        """
        if identifier in self.file_infos['file_id']:
            index = self.file_infos['file_id'].index(identifier)
        elif identifier in self.file_infos['original_filename']:
            index = self.file_infos['original_filename'].index(identifier)
        elif identifier in self.file_infos['microscopy_filepath']:
            index = self.file_infos['microscopy_filepath'].index(identifier)
        else:
            raise ValueError(f'{identifier} is not a valid input!')
        file_infos = {}    
        for key, list_of_values in self.file_infos.items():
            if len(list_of_values) > 0:
                file_infos[key] = list_of_values[index]
        return file_infos
    
    
    # ToDo should be re-named to something like "change file info entries" or similar
    def update_file_infos(self, file_id: str, updates: Dict, preferred_empty_value: Union[bool, str, None]=None) -> None: 
        index = self.file_infos['file_id'].index(file_id)
        for key, value in updates.items():
            if key not in self.file_infos.keys():
                self.add_new_key_to_file_infos(key, preferred_empty_value = preferred_empty_value)
            self.file_infos[key][index] = value
            
            
    def get_file_ids_to_process(self, input_file_ids: Optional[List], process_tracker_key: str, overwrite: bool) -> List:
        if input_file_ids == None:
            input_file_ids = self.file_infos['file_id']
        if process_tracker_key not in self.file_infos.keys():
            self.add_new_key_to_file_infos(process_tracker_key)
        if overwrite == True:
            output_file_ids = input_file_ids
        else:
            process_tracker_status = []
            for file_id in input_file_ids:
                index = self.file_infos['file_id'].index(file_id)
                process_tracker_status.append(self.file_infos[process_tracker_key][index])
            output_file_ids = [elem[0] for elem in zip(input_file_ids, process_tracker_status) if elem[1] == False or elem[1] == None]
        return output_file_ids.copy()

In [None]:
class FileHistory:
    
    
    def __init__(self, file_id: int, source_image_filepath: Path) -> None:
        self.file_id = file_id
        self.source_image_filepath = source_image_filepath
        self.datetime_added = datetime.datetime.now()
        self._initialize_tracked_history()
        self._initilaize_tracked_settings()
        
        
    def _initialize_tracked_history(self) -> None:
        empty_history = {'processing_step_id': [],
                         'processing_strategy': [],
                         'step_finished_at': []}
        empty_history_df = pd.DataFrame(data = empty_history)
        setattr(self, 'tracked_history', empty_history_df)
        
        
    def _initialize_tracked_settings(self) -> None:
        setattr(self, 'tracked_settings', {})
        
        
    def track_processing_step(self, processing_step_id: str, processing_strategy_name: str, strategy_specific_settings: Dict) -> None:
        tracked_details = {'processing_step_id': [processing_step_id],
                           'processing_strategy': [processing_strategy_name],
                           'step_finished_at': [datetime.datetime.now()]}
        tracked_details_df = pd.DataFrame(data = tracked_details)
        self.tracked_history = pd.concat([self.tracked_history, tracked_details_df], ignore_index = True)
        self.tracked_settings[self.tracked_history.index[-1]] = strategy_specific_settings