diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml new file mode 100644 index 0000000..809ea2d --- /dev/null +++ b/.github/workflows/deploy_docs.yml @@ -0,0 +1,28 @@ +name: Publish Docs +on: + push: + branches: + - main # Запускати тільки при пуші в main + +permissions: + contents: write + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install mkdocs-material mkdocstrings[python] + + - name: Deploy to GitHub Pages + run: mkdocs gh-deploy --force diff --git a/README.MD b/README.MD index d8f011f..fc6cb3a 100644 --- a/README.MD +++ b/README.MD @@ -1,9 +1,6 @@ -# Automatic File Manager +# DataForge -A simple way to automate working with files. You can set a time delay for automatic execution of your command. For example: - - python fileManager.py move ./Downloads/ ./Videos -p .mp4 .MP4 .mov .MOV -r -s 60 -This command will move all files with .mp4 .MP4 .mov .MOV from Downloads to the Videos directory, check the Downloads directory again and do task one more time until there is no files that match patterns in Downloads, then FileManager will be waiting for 60 seconds and check Downloads again. +A simple way to automate working with datasets. You can set a time delay for automatic execution of your command. if you don’t want the command works in a cycle, just don't use "-r" argument. And it will be executed for one time. @@ -11,10 +8,16 @@ if you don’t want the command works in a cycle, just don't use "-r" argument. ## Available commands - **move** - move files from source directory to target directory - **slice** - slice video files to images from the source directory to the target directory. Also, you can set flag "--remove" or "-rm" for deleting a source video file after slicing + - **delete** - delete files that match patterns from source directory - **dedup** - find duplicates in source directory that matches a pattern. An image means a duplicate if it's hash has lower Hamming distance with comparing image hash than threshold value. The threshold value setups in percentage and must be in range [0, 100]. Pay attention to core_size parameter: the lower value makes details at photo less important, and the higher value makes details mach important while comparing information at images. It’s implemented only dHash comparing method for now. - **clean-annotations** - find annotation files in directory that doesn't have corresponding files +- **convert-annotations** - converts annotations from source format to destination format + +#### to see command syntax and arguments use: + python data_forge.py -h + ## How to use: clone git repository: @@ -36,11 +39,11 @@ read the --help command for learn more about available commands and arguments: for check available commands - python fileManager.py --help + python data_forge.py --help for check the command usage and available arguments - python fileManager.py {command} --help + python data_forge.py {command} --help ## What else? @@ -51,5 +54,5 @@ For more comfortable using FileManager with multiple tasks you can create an .sh for stop executing of all commands use: - pkill -f fileManager.py + pkill -f data_forge.py diff --git a/const_utils/annotation.py b/const_utils/annotation.py new file mode 100644 index 0000000..2133f29 --- /dev/null +++ b/const_utils/annotation.py @@ -0,0 +1,51 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Tuple, Dict, Optional + +from logger.log_level_mapping import LevelMapping +from logger.logger import LoggerConfigurator + + +class ObjectAnnotation: + def __init__(self, log_level: str = LevelMapping.debug, log_path: Optional[Path] = None, **kwargs): + self.imsize: Tuple[int, int] = kwargs.get("imsize") + self.name: str = kwargs.get("name") + self.pose: str = kwargs.get("pose", 'Unspecified') + self.truncated: int = kwargs.get("truncated", 0) + self.difficult: int = kwargs.get("difficult", 0) + self.bndbox: Dict[str, int] = kwargs.get("bndbox", {}) + self.width: int = None + self.height: int = None + self.x_center: int = None + self.y_center: int = None + self.area: int = None + self.aspect_ratio: int = None + self.relative_area: float = None + + self.logger = LoggerConfigurator.setup( + name=self.__class__.__name__, + log_level=log_level, + log_path=Path(log_path) / f"{self.__class__.__name__}.log" if log_path else None + ) + + @property + def area(self) -> int: + return self._area + + @area.setter + def area(self, value: int) -> None: + if isinstance(value, int): + self._area = value + else: + try: + self._area = int(float(value)) + except TypeError as e: + error_text = f"Area must be an integer, got {value}" + self.logger.warning(error_text) + raise TypeError(e) + + @property + def width(self) -> int: + return self._width + + diff --git a/const_utils/arguments.py b/const_utils/arguments.py index 8718b9b..c92cdaf 100644 --- a/const_utils/arguments.py +++ b/const_utils/arguments.py @@ -4,7 +4,8 @@ class Arguments: """Command arguments""" src: str = "src" - dst: str = "dst" + dst: str = "--dst" + pattern: str = "--pattern" p: str = "-p" repeat: str = "--repeat" @@ -30,3 +31,4 @@ class Arguments: cache_name: str = "--cache_name" a_suffix: str = "--a_suffix" a_source: str = "--a_source" + destination_type: str = "--destination-type" diff --git a/const_utils/commands.py b/const_utils/commands.py index ba3b190..fd27910 100644 --- a/const_utils/commands.py +++ b/const_utils/commands.py @@ -7,4 +7,5 @@ class Commands: slice: str = "slice" delete: str = "delete" dedup: str = "dedup" - clean_annotations: str = "clean-annotations" \ No newline at end of file + clean_annotations: str = "clean-annotations" + convert_annotations: str = "convert-annotations" \ No newline at end of file diff --git a/const_utils/default_values.py b/const_utils/default_values.py index 3830970..5721138 100644 --- a/const_utils/default_values.py +++ b/const_utils/default_values.py @@ -40,6 +40,7 @@ class AppSettings(BaseSettings): cache_name: Optional[Path] = Field(default=None) a_suffix: Tuple[str, ...] = Field(default_factory=tuple) a_source: Optional[Path] = Field(default=None) + destination_type: Optional[str] = Field(default=None) @field_validator('core_size') @classmethod @@ -55,7 +56,17 @@ def ensure_path(cls, value: Union[str, Path]) -> Path: return Path(value) return value - + @field_validator("n_jobs") + @classmethod + def ensure_n_jobs(cls, value: Union[int, str]) -> int: + if not isinstance(value, int): + return int(float(value)) + elif value >= multiprocessing.cpu_count(): + return multiprocessing.cpu_count() - 1 + elif value < 1: + return 1 + else: + return value @classmethod def load_config(cls, config_path: Path = Constants.config_file) -> "AppSettings": diff --git a/const_utils/parser_help.py b/const_utils/parser_help.py index 11913f3..80a751f 100644 --- a/const_utils/parser_help.py +++ b/const_utils/parser_help.py @@ -32,4 +32,5 @@ class HelpStrings: "with next signature: ") a_suffix: str = "A suffix pattern for annotations" a_source: str = ("A source directory to annotations. If None - that means annotations are in the same folder with" - " images") \ No newline at end of file + " images") + destination_type: str = "A type of destination annotation format" \ No newline at end of file diff --git a/fileManager.py b/data_forge.py similarity index 93% rename from fileManager.py rename to data_forge.py index 37ed90e..cd2dc25 100644 --- a/fileManager.py +++ b/data_forge.py @@ -5,7 +5,7 @@ from const_utils.parser_help import HelpStrings as hs from const_utils.commands import Commands from const_utils.arguments import Arguments as arg -# from const_utils.default_values import DefaultValues as defaults +from file_operations.convert_annotations import ConvertAnnotationsOperation from file_operations.deduplicate import DedupOperation from file_operations.delete import DeleteOperation from file_operations.move import MoveOperation @@ -13,7 +13,7 @@ from file_operations.clean_annotations import CleanAnnotationsOperation -class FileManager: +class DataForge: """Class corresponding to CLI and launch command""" def __init__(self): self.parser = argparse.ArgumentParser(description="FileManager") @@ -23,7 +23,8 @@ def __init__(self): Commands.slice: SliceOperation, Commands.delete: DeleteOperation, Commands.dedup: DedupOperation, - Commands.clean_annotations: CleanAnnotationsOperation + Commands.clean_annotations: CleanAnnotationsOperation, + Commands.convert_annotations: ConvertAnnotationsOperation } self.settings = AppSettings.load_config(Constants.config_file) self._setup_commands() @@ -65,5 +66,5 @@ def execute(self): if __name__ == "__main__": - app = FileManager() + app = DataForge() app.execute() \ No newline at end of file diff --git a/docs/api/base_hasher.md b/docs/api/base_hasher.md new file mode 100644 index 0000000..3cbbe0a --- /dev/null +++ b/docs/api/base_hasher.md @@ -0,0 +1 @@ +::: tools.comparer.img_comparer.hasher.base_hasher.BaseHasher diff --git a/docs/api/converter.md b/docs/api/converter.md new file mode 100644 index 0000000..8466b75 --- /dev/null +++ b/docs/api/converter.md @@ -0,0 +1 @@ +::: tools.annotation_converter.converter.base.BaseConverter \ No newline at end of file diff --git a/docs/api/dhash.md b/docs/api/dhash.md new file mode 100644 index 0000000..a62da4e --- /dev/null +++ b/docs/api/dhash.md @@ -0,0 +1 @@ +::: tools.comparer.img_comparer.hasher.dhash.DHash diff --git a/docs/api/img_comparer.md b/docs/api/img_comparer.md new file mode 100644 index 0000000..451514b --- /dev/null +++ b/docs/api/img_comparer.md @@ -0,0 +1 @@ +::: tools.comparer.img_comparer.img_comparer.ImageComparer diff --git a/docs/api/voc_yolo_converter.md b/docs/api/voc_yolo_converter.md new file mode 100644 index 0000000..6e5094a --- /dev/null +++ b/docs/api/voc_yolo_converter.md @@ -0,0 +1 @@ +::: tools.annotation_converter.converter.voc_yolo_converter.VocYOLOConverter diff --git a/docs/cli/data_forge.md b/docs/cli/data_forge.md new file mode 100644 index 0000000..fbfbeff --- /dev/null +++ b/docs/cli/data_forge.md @@ -0,0 +1 @@ +::: data_forge.DataForge diff --git a/docs/cli/default_values.md b/docs/cli/default_values.md new file mode 100644 index 0000000..f9e8533 --- /dev/null +++ b/docs/cli/default_values.md @@ -0,0 +1 @@ +::: const_utils.default_values.AppSettings diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..fc6cb3a --- /dev/null +++ b/docs/index.md @@ -0,0 +1,58 @@ +# DataForge + +A simple way to automate working with datasets. You can set a time delay for automatic execution of your command. + +if you don’t want the command works in a cycle, just don't use "-r" argument. And it will be executed for one time. + + +## Available commands +- **move** - move files from source directory to target directory +- **slice** - slice video files to images from the source directory to the target directory. Also, you can set flag "--remove" or "-rm" for deleting a source video file after slicing + +- **delete** - delete files that match patterns from source directory +- **dedup** - find duplicates in source directory that matches a pattern. An image means a duplicate if it's hash has lower +Hamming distance with comparing image hash than threshold value. The threshold value setups in percentage and must be in range [0, 100]. Pay attention to core_size parameter: the lower value makes details at photo less important, and the higher value makes details mach important while comparing information at images. It’s implemented only dHash comparing method for now. +- **clean-annotations** - find annotation files in directory that doesn't have corresponding files +- **convert-annotations** - converts annotations from source format to destination format + +#### to see command syntax and arguments use: + python data_forge.py -h + +## How to use: +clone git repository: + + git clone https://github.com/SeregaCodit/AutoFileManager.git + +go to project directory: + + cd path_to_project + +create virtual environment and activate it: + + python -m venv .venv + +install requirements : + + pip install -r requirements.txt + +read the --help command for learn more about available commands and arguments: + +for check available commands + + python data_forge.py --help + +for check the command usage and available arguments + + python data_forge.py {command} --help + + +## What else? + +For more comfortable using FileManager with multiple tasks you can create an .sh file or modify [strat_all_tasks.sh](https://github.com/SeregaCodit/AutoFileManager/blob/main/strat_all_tasks.sh) with list of your commands. And run all of them just by one simple command: + + bash path_to_file/start_all_tasks.sh + +for stop executing of all commands use: + + pkill -f data_forge.py + diff --git a/docs/operations/clean_annotations.md b/docs/operations/clean_annotations.md new file mode 100644 index 0000000..6b98850 --- /dev/null +++ b/docs/operations/clean_annotations.md @@ -0,0 +1 @@ +::: tools.mixins.file_remover.FileRemoverMixin \ No newline at end of file diff --git a/docs/operations/convert_annotations.md b/docs/operations/convert_annotations.md new file mode 100644 index 0000000..07a96cd --- /dev/null +++ b/docs/operations/convert_annotations.md @@ -0,0 +1 @@ +::: file_operations.convert_annotations.ConvertAnnotationsOperation \ No newline at end of file diff --git a/docs/operations/deduplicate.md b/docs/operations/deduplicate.md new file mode 100644 index 0000000..02010be --- /dev/null +++ b/docs/operations/deduplicate.md @@ -0,0 +1 @@ +::: file_operations.deduplicate.DedupOperation \ No newline at end of file diff --git a/docs/operations/delete.md b/docs/operations/delete.md new file mode 100644 index 0000000..3a3d5af --- /dev/null +++ b/docs/operations/delete.md @@ -0,0 +1 @@ +::: file_operations.delete.DeleteOperation \ No newline at end of file diff --git a/docs/operations/file_operation.md b/docs/operations/file_operation.md new file mode 100644 index 0000000..1a5daf5 --- /dev/null +++ b/docs/operations/file_operation.md @@ -0,0 +1 @@ +::: file_operations.file_operation.FileOperation diff --git a/docs/operations/move.md b/docs/operations/move.md new file mode 100644 index 0000000..3bf7e8c --- /dev/null +++ b/docs/operations/move.md @@ -0,0 +1 @@ +::: file_operations.move.MoveOperation diff --git a/docs/operations/slice.md b/docs/operations/slice.md new file mode 100644 index 0000000..fa44fef --- /dev/null +++ b/docs/operations/slice.md @@ -0,0 +1 @@ +::: file_operations.slice.SliceOperation diff --git a/file_operations/clean_annotations.py b/file_operations/clean_annotations.py index e5d1c6c..824d915 100644 --- a/file_operations/clean_annotations.py +++ b/file_operations/clean_annotations.py @@ -6,7 +6,7 @@ from const_utils.default_values import AppSettings from const_utils.parser_help import HelpStrings from file_operations.file_operation import FileOperation -from file_operations.file_remover import FileRemoverMixin +from tools.mixins.file_remover import FileRemoverMixin diff --git a/file_operations/convert_annotations.py b/file_operations/convert_annotations.py new file mode 100644 index 0000000..1796688 --- /dev/null +++ b/file_operations/convert_annotations.py @@ -0,0 +1,46 @@ +import argparse +from abc import ABC +from pathlib import Path + +from const_utils.arguments import Arguments +from const_utils.default_values import AppSettings +from const_utils.parser_help import HelpStrings +from file_operations.file_operation import FileOperation +from tools.annotation_converter.converter.voc_yolo_converter import VocYOLOConverter + + +class ConvertAnnotationsOperation(FileOperation): + def __init__(self, settings: AppSettings, **kwargs): + """converts annotation formats from pattern to destination. You Can use only one value of pattern at the time""" + super().__init__(settings, **kwargs) + self.destination_type = kwargs.get('destination_type') + self.converter_mapping = { + (".xml", "yolo") : VocYOLOConverter + } + self.converter = self.converter_mapping[(self.pattern[0], self.destination_type)]() + self.n_jobs = kwargs.get('n_jobs', 1) + + + @staticmethod + def add_arguments(settings: AppSettings, parser: argparse.ArgumentParser) -> None: + parser.add_argument( + Arguments.dst, + default=None, + help=HelpStrings.dst + ) + parser.add_argument( + Arguments.destination_type, + help=HelpStrings.destination_type + ) + parser.add_argument( + Arguments.n_jobs, + default=settings.n_jobs, + help=HelpStrings.n_jobs + ) + + + def do_task(self): + self.converter.convert(self.files_for_task, self.target_directory, self.n_jobs) + + + diff --git a/file_operations/deduplicate.py b/file_operations/deduplicate.py index 3a673c6..a6e4bac 100644 --- a/file_operations/deduplicate.py +++ b/file_operations/deduplicate.py @@ -4,7 +4,7 @@ from const_utils.default_values import AppSettings from const_utils.parser_help import HelpStrings from file_operations.file_operation import FileOperation -from file_operations.file_remover import FileRemoverMixin +from tools.mixins.file_remover import FileRemoverMixin from tools.comparer.img_comparer.img_comparer import ImageComparer diff --git a/file_operations/delete.py b/file_operations/delete.py index 95659c3..5041d9a 100644 --- a/file_operations/delete.py +++ b/file_operations/delete.py @@ -2,7 +2,7 @@ from const_utils.default_values import AppSettings from file_operations.file_operation import FileOperation -from file_operations.file_remover import FileRemoverMixin +from tools.mixins.file_remover import FileRemoverMixin class DeleteOperation(FileOperation, FileRemoverMixin): diff --git a/file_operations/file_operation.py b/file_operations/file_operation.py index 612abc1..f0e2cce 100644 --- a/file_operations/file_operation.py +++ b/file_operations/file_operation.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from pathlib import Path -from typing import Tuple, Union +from typing import Tuple, Union, Optional from const_utils.default_values import AppSettings from logger.logger import LoggerConfigurator @@ -21,7 +21,7 @@ def __init__(self, settings: AppSettings, **kwargs): self.src: str = kwargs.get('src', '') self.dst: str = kwargs.get('dst', '') self.source_directory = Path(self.src) - self.target_directory = Path(self.dst) + self.target_directory = self.dst self.stop: bool = False # -----логування----- @@ -54,7 +54,7 @@ def check_source_directory(self) -> None: if not self.source_directory.exists(): # print(f"[ERROR] Source path '{self.src}' does not exist.") self.logger.error(f"Source path '{self.src}' does not exist.") - raise FileNotFoundError + raise FileNotFoundError(f"Source path '{self.src}' does not exist.") def check_directories(self) -> None: """Check if source directory is valid and if target directory exists. @@ -122,3 +122,19 @@ def stop(self) -> bool: @stop.setter def stop(self, value): self.__stop = value + + @property + def target_directory(self): + return self._target_directory + + @target_directory.setter + def target_directory(self, value: Union[Path, str, None]) -> None: + if value is None: + self._target_directory = self.source_directory + elif isinstance(value, Path): + self._target_directory = value + elif isinstance(value, str): + self._target_directory = Path(value) + else: + self.logger.error(f"Target directory '{value}' is not valid. Got type '{type(value)}'") + raise TypeError(f"Target directory '{value}' is not valid. Got type '{type(value)}'") \ No newline at end of file diff --git a/file_operations/slice.py b/file_operations/slice.py index 2fa6a94..e1b3c0c 100644 --- a/file_operations/slice.py +++ b/file_operations/slice.py @@ -3,7 +3,7 @@ from const_utils.arguments import Arguments from const_utils.parser_help import HelpStrings from file_operations.file_operation import FileOperation -from file_operations.file_remover import FileRemoverMixin +from tools.mixins.file_remover import FileRemoverMixin from tools.video_slicer import VideoSlicer diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..02e87a9 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,41 @@ +site_name: DataForge +theme: + name: material + palette: + scheme: slate + primary: teal + features: + - navigation.tabs + - content.code.copy + +plugins: + - search + - mkdocstrings: + handlers: + python: + setup_python_path: [.] + options: + show_source: true + members_order: alphabetical + +nav: + - Home: index.md + - Operations: + - base operation: operations/file_operation.md + - move: operations/move.md + - delete: operations/delete.md + - slice: operations/slice.md + - dedup: operations/deduplicate.md + - clean-annotations: operations/clean_annotations.md + - convert-annotations: operations/convert_annotations.md + - API Reference: + - Image Comparer: api/img_comparer.md + - Hasher: + - Base Hasher: api/base_hasher.md + - DHash: api/dhash.md + - Annotation Converter: + - Base Converter: api/converter.md + - Voc to Yolo Converter: api/voc_yolo_converter.md + - CLI: + - CLI launching: cli/data_forge.md + - Default Settings: cli/default_values.md \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 46d8995..79e95a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,14 @@ -annotated-types -exceptiongroup -iniconfig -numpy -opencv-python -packaging -pillow -pluggy -pydantic -pydantic-settings -pydantic_core -Pygments -pytest -python-dotenv -tomli -typing_extensions +mkdocs>=1.6.0 +mkdocs-material>=9.5.0 +mkdocstrings[python]>=0.26.0 +mkdocs-autorefs>=1.2.0 +pydantic>=2.10.0 +pydantic-settings>=2.8.0 +python-dotenv>=1.0.0 +PyYAML>=6.0.0 +numpy>=1.24.0,<2.0.0 +opencv-python>=4.8.0 +requests>=2.31.0 +pytest>=8.0.0 +python-dateutil>=2.8.0 +watchdog>=4.0.0 \ No newline at end of file diff --git a/tools/annotation_converter/__init__.py b/tools/annotation_converter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/annotation_converter/converter/__init__.py b/tools/annotation_converter/converter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/annotation_converter/converter/base.py b/tools/annotation_converter/converter/base.py new file mode 100644 index 0000000..d5d9056 --- /dev/null +++ b/tools/annotation_converter/converter/base.py @@ -0,0 +1,46 @@ +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Optional, Tuple + +from logger.log_level_mapping import LevelMapping +from logger.logger import LoggerConfigurator +from tools.annotation_converter.reader.base import BaseReader +from tools.annotation_converter.reader.voc import XMLReader +from tools.annotation_converter.writer.yolo import YoloWriter + + +class BaseConverter(ABC): + """ + Base converter class. Based on the source and destination formats, selects reader and writer classes for + processing data + """ + def __init__(self, log_level: str = LevelMapping.debug, log_path: Optional[Path] = None): + + self._reader: Optional[BaseReader] = None + self._writer: Optional[YoloWriter] = None + self.reader_mapping = { + ".xml": XMLReader, + } + + self.writer_mapping = { + ".txt": YoloWriter, + } + + self.logger = LoggerConfigurator.setup( + name=self.__class__.__name__, + log_level=log_level, + log_path=Path(log_path) / f"{self.__class__.__name__}.log" if log_path else None + ) + + + @abstractmethod + def convert(self, file_paths: Tuple[Path], target_path: Path, n_jobs: int = 1) -> None: + pass + + @property + def reader(self) -> BaseReader: + return self._reader + + @reader.setter + def reader(self, reader: BaseReader) -> None: + self._reader = reader \ No newline at end of file diff --git a/tools/annotation_converter/converter/voc_yolo_converter.py b/tools/annotation_converter/converter/voc_yolo_converter.py new file mode 100644 index 0000000..785f713 --- /dev/null +++ b/tools/annotation_converter/converter/voc_yolo_converter.py @@ -0,0 +1,207 @@ +from concurrent.futures import ProcessPoolExecutor +from functools import partial +from pathlib import Path +from typing import List, Dict, Set, Tuple + +import numpy as np + +from tools.annotation_converter.converter.base import BaseConverter +from tools.annotation_converter.reader.base import BaseReader +from tools.annotation_converter.writer.base import BaseWriter + + +class VocYOLOConverter(BaseConverter): + TARGET_FORMAT = ".xml" + DESTINATION_FORMAT = ".txt" + CLASSES_FILE = "classes.txt" + def __init__(self, tolerance: int = 6): + """ + :param tolerance: an int value that determines to which decimal place to round a converted in YOLO + format coordinates. By default, it is 6 in YOLO format. + :type tolerance: int + """ + super().__init__() + + self.tolerance = tolerance + self.reader = self.reader_mapping[self.TARGET_FORMAT]() + self.writer = self.writer_mapping[self.DESTINATION_FORMAT]() + self.objects: list = list() + self.class_mapping: Dict[str, int] = dict() + + @staticmethod + def _get_classes_worker(annotation_paths: Path, reader: BaseReader) -> Set[str]: + """ + :param annotation_paths: paths to annotation files + :type annotation_paths: Path + :param reader: reader object for parsing annotations + :type reader: BaseReader + :return: a set with all object classes found in annotations + """ + try: + data = reader.read(annotation_paths) + annotation = data.get("annotation", {}) + objects = annotation.get("object", list()) + if not isinstance(objects, list): + objects = [objects] + return {obj["name"] for obj in objects} + except Exception: + return set() + + @staticmethod + def _convert_worker( + file_path: Path, + destination_path: Path, + reader: BaseReader, + writer: BaseWriter, + class_mapping: Dict[str, int], + tolerance: int, + suffix: str + ) -> bool: + """ + pipline for parsing annotations, recalculating annotated objects data to YOLO format and savin it in + destination path + + :param file_path: path to annotation file + :type file_path: Path + :param destination_path: path to output annotation file + :type destination_path: Path + :param reader: reader object for parsing annotations + :type reader: BaseReader + :param writer: writer object for writing converted annotation files + :type writer: BaseWriter + :param class_mapping: mapping from class name to class id + :type class_mapping: Dict[str, int] + :param tolerance: an int value that determines to which decimal place to round a converted in YOLO + format coordinates. + :type tolerance: int + :param suffix: suffix to add to filename + :type suffix: str + :return: True if a file was successfully converted, else returns False + + """ + data = reader.read(file_path) + + if data.get("annotation") is None: + return False + + annotation = data["annotation"] + + try: + img_width = int(annotation["size"]["width"]) + img_height = int(annotation["size"]["height"]) + + if img_width == 0 or img_height == 0: + raise ValueError(f"Image size is zero in annotation {file_path}!") + except (KeyError, ValueError, TypeError): + return False + + annotated_objects = annotation.get("object", list()) + + # reader using xmltodict that returns a dict if there is just one object, if more - returns a list + if not isinstance(annotated_objects, list): + annotated_objects = [annotated_objects] + + converted_objects: List[str] = list() + + for obj in annotated_objects: + try: + # saving objectnames for classes.txt + name = obj["name"] + + if name not in class_mapping: + continue + class_id = class_mapping[name] + + # calculate yolo format cords + bbox = obj["bndbox"] + xmin, ymin, xmax, ymax = ( + float(bbox["xmin"]), float(bbox["ymin"]), + float(bbox["xmax"]), float(bbox["ymax"]) + ) + + width = ((xmax - xmin) / img_width) + height = (ymax - ymin) / img_height + x_center = (xmin + xmax) / 2 / img_width + y_center = (ymin + ymax) / 2 / img_height + + x_center, y_center, width, height = map(lambda x: np.clip(x, 0, 1), + [x_center, y_center, width, height]) + + row = (f"{class_id} " + f"{x_center:.{tolerance}f} " + f"{y_center:.{tolerance}f} " + f"{width:.{tolerance}f} " + f"{height:.{tolerance}f}") + converted_objects.append(row) + + except (KeyError, ValueError, TypeError): + continue + + converted_path = destination_path / f"{file_path.stem}{suffix}" + writer.write(converted_objects, converted_path) + return True + + def convert(self, file_paths: Tuple[Path], target_path: Path, n_jobs: int = 1) -> None: + """ + discover classes of annotated objects and writes them in classes file. + Run multiprocessing conversion and writing pipline + + :param file_paths: list of annotation files + :type file_paths: Tuple[Path] + :param target_path: path to output annotation file directory + :type target_path: Path + :param n_jobs: number of workers + :type n_jobs: int + :return None + """ + count_to_convert = len(file_paths) + + if count_to_convert > 0: + target_path.mkdir(parents=True, exist_ok=True) + + self.logger.info(f"Start converting {count_to_convert} annotations with {n_jobs} workers...") + + classes_func = partial(self._get_classes_worker, reader=self.reader) + with ProcessPoolExecutor(max_workers=n_jobs) as executor: + classes = list(executor.map(classes_func, file_paths)) + + self.objects = sorted(set().union(*classes)) + class_mapping = {name: i for i, name in enumerate(self.objects)} + self.logger.info(f"Unified class mapping created: {len(self.objects)} classes") + + worker_func = partial( + self._convert_worker, + destination_path=target_path, + reader=self.reader, + writer=self.writer, + class_mapping=class_mapping, + tolerance=self.tolerance, + suffix=self.DESTINATION_FORMAT + ) + + self.logger.info(f"converting {count_to_convert} annotations with {n_jobs} workers...") + converted_count = 0 + with ProcessPoolExecutor(max_workers=n_jobs) as executor: + converted_results = executor.map(worker_func, file_paths) + converted_count = sum(converted_results) + + self.logger.info(f"Converted {converted_count}/{count_to_convert} annotations and saved in {target_path}") + + self.writer.write(self.objects, target_path / self.CLASSES_FILE) + self.logger.info(f"Saved {self.CLASSES_FILE} in {target_path}") + + + @property + def tolerance(self) -> int: + return self._tolerance + + @tolerance.setter + def tolerance(self, value: int): + if isinstance(value, int): + self._tolerance = value + else: + try: + self._tolerance = int(float(value)) + except TypeError as e: + self.logger.warning(f"Can`t convert {value} to int from type {type(value)})\n{e}") + raise TypeError(e) diff --git a/tools/annotation_converter/reader/__init__.py b/tools/annotation_converter/reader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/annotation_converter/reader/base.py b/tools/annotation_converter/reader/base.py new file mode 100644 index 0000000..7d73bba --- /dev/null +++ b/tools/annotation_converter/reader/base.py @@ -0,0 +1,11 @@ +from abc import ABC, abstractmethod +from pathlib import Path + + +class BaseReader(ABC): + def __init__(self): + pass + + @abstractmethod + def read(self, file_path: Path) -> dict: + pass \ No newline at end of file diff --git a/tools/annotation_converter/reader/voc.py b/tools/annotation_converter/reader/voc.py new file mode 100644 index 0000000..06476a1 --- /dev/null +++ b/tools/annotation_converter/reader/voc.py @@ -0,0 +1,10 @@ +from pathlib import Path + +from tools.annotation_converter.reader.base import BaseReader +import xmltodict + +class XMLReader(BaseReader): + """Parse .xml annotation file, stable works with XML files, that was created by labelImg """ + def read(self, file_path: Path) -> dict: + data = xmltodict.parse(file_path.read_text()) + return data \ No newline at end of file diff --git a/tools/annotation_converter/writer/__init__.py b/tools/annotation_converter/writer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/annotation_converter/writer/base.py b/tools/annotation_converter/writer/base.py new file mode 100644 index 0000000..4bbcbe0 --- /dev/null +++ b/tools/annotation_converter/writer/base.py @@ -0,0 +1,12 @@ +from abc import ABC, abstractmethod +from pathlib import Path +from typing import List, Tuple + + +class BaseWriter(ABC): + def __init__(self): + pass + + @abstractmethod + def write(self, data: List[str], file_path: Path) -> dict: + pass diff --git a/tools/annotation_converter/writer/yolo.py b/tools/annotation_converter/writer/yolo.py new file mode 100644 index 0000000..af80b39 --- /dev/null +++ b/tools/annotation_converter/writer/yolo.py @@ -0,0 +1,13 @@ +from pathlib import Path +from typing import List, Tuple + +from tools.annotation_converter.writer.base import BaseWriter + + +class YoloWriter(BaseWriter): + """Implements writing annotation files for YOLO format""" + def write(self, data: List[str], file_path: Path) -> None: + file_path.parent.mkdir(parents=True, exist_ok=True) + + with open(file_path, "w") as file: + file.writelines(f"{line}\n" for line in data if line) diff --git a/tools/cache.py b/tools/cache.py index 1338e9e..c8978ff 100644 --- a/tools/cache.py +++ b/tools/cache.py @@ -1,11 +1,10 @@ import hashlib import pickle from pathlib import Path -from typing import Union, Dict, BinaryIO, Optional +from typing import Dict, Optional import numpy as np from const_utils.default_values import AppSettings -from file_operations.file_remover import FileRemoverMixin from logger.logger import LoggerConfigurator from logger.logger_protocol import LoggerProtocol diff --git a/tools/comparer/img_comparer/hasher/dhash.py b/tools/comparer/img_comparer/hasher/dhash.py index ec8e387..f681dcc 100644 --- a/tools/comparer/img_comparer/hasher/dhash.py +++ b/tools/comparer/img_comparer/hasher/dhash.py @@ -4,7 +4,6 @@ import cv2 import numpy as np -# from const_utils.default_values import DefaultValues from tools.comparer.img_comparer.hasher.base_hasher import BaseHasher class DHash(BaseHasher): diff --git a/tools/comparer/img_comparer/img_comparer.py b/tools/comparer/img_comparer/img_comparer.py index 25f8f98..32fef61 100644 --- a/tools/comparer/img_comparer/img_comparer.py +++ b/tools/comparer/img_comparer/img_comparer.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Union, Tuple +from typing import Tuple from const_utils.copmarer import Constants from const_utils.default_values import AppSettings @@ -7,17 +7,8 @@ from tools.comparer.img_comparer.hasher.dhash import DHash - class ImageComparer: - def __init__( - self, - # method_name: str = Constants.phash, - # log_path: Union[Path, None] = None, - # threshold_percentage: int = DefaultValues.hash_threshold, - # core_size: int = DefaultValues.core_size, - # n_jobs: int = DefaultValues.n_jobs, - settings: AppSettings, - ): + def __init__(self, settings: AppSettings): """ An orchestrator for comparing two images using principial different algorithms. :param settings: settings object, includes default and user's params diff --git a/tools/mixins/__init__.py b/tools/mixins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/file_operations/file_remover.py b/tools/mixins/file_remover.py similarity index 100% rename from file_operations/file_remover.py rename to tools/mixins/file_remover.py diff --git a/tst_commands.py b/tst_commands.py new file mode 100644 index 0000000..0124feb --- /dev/null +++ b/tst_commands.py @@ -0,0 +1,64 @@ +import sys +from pathlib import Path + +from const_utils.arguments import Arguments +from const_utils.commands import Commands +from data_forge import DataForge + + +MAPPING = { + Commands.slice: [ + "data_forge.py", + "slice", + "./media/", + "--dst", "./media/imgs/", + "-p", ".mp4", ".MP4", + "-t", ".jpg", + # "-r", + "-s", "60", + "-step", "1", + ], + Commands.delete: [ + "data_forge.py", + "delete", + "./media/imgs_new/", + "-p", ".jpg", + ], + Commands.move: [ + "data_forge.py", + "move", + "./media/imgs/", + "--dst", "./media/imgs_new/", + "-p", ".jpg", ".png", + "-r", + "-s", "30" + ], + Commands.dedup: [ + "data_forge.py", + "dedup", + # "./media/imgs/", + "/mnt/qnap/Staff/Naumenko/NotTheSkynet/img_dataset/", + # "/home/pivden/PycharmProjects/yoloTrainer/saved_imgs/", + "-p", ".jpg", ".png", + "--filetype", "image", + "--threshold", "10", + "--cache_name", "test1" + ], + Commands.convert_annotations: [ + "data_forge.py", + "convert-annotations", + "./media/annotated/", + "--dst", "./media/yolo_anns/", + "-p", ".xml", + "--destination-type", "yolo" + ] +} + +if __name__ == "__main__": + MAPPING[Commands.dedup].append(Arguments.core_size) + MAPPING[Commands.dedup].append("16") + + + sys.argv = MAPPING[Commands.move] + app = DataForge() + app.execute() \ No newline at end of file