Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .github/workflows/deploy_docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Publish Docs
on:
push:
branches:
- main # Запускати тільки при пуші в main

permissions:
contents: write

jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install mkdocs-material mkdocstrings[python]

- name: Deploy to GitHub Pages
run: mkdocs gh-deploy --force
19 changes: 11 additions & 8 deletions README.MD
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
# Automatic File Manager
# DataForge

A simple way to automate working with files. You can set a time delay for automatic execution of your command. For example:

python fileManager.py move ./Downloads/ ./Videos -p .mp4 .MP4 .mov .MOV -r -s 60
This command will move all files with .mp4 .MP4 .mov .MOV from Downloads to the Videos directory, check the Downloads directory again and do task one more time until there is no files that match patterns in Downloads, then FileManager will be waiting for 60 seconds and check Downloads again.
A simple way to automate working with datasets. You can set a time delay for automatic execution of your command.

if you don’t want the command works in a cycle, just don't use "-r" argument. And it will be executed for one time.


## Available commands
- **move** - move files from source directory to target directory
- **slice** - slice video files to images from the source directory to the target directory. Also, you can set flag "--remove" or "-rm" for deleting a source video file after slicing

- **delete** - delete files that match patterns from source directory
- **dedup** - find duplicates in source directory that matches a pattern. An image means a duplicate if it's hash has lower
Hamming distance with comparing image hash than threshold value. The threshold value setups in percentage and must be in range [0, 100]. Pay attention to core_size parameter: the lower value makes details at photo less important, and the higher value makes details mach important while comparing information at images. It’s implemented only dHash comparing method for now.
- **clean-annotations** - find annotation files in directory that doesn't have corresponding files
- **convert-annotations** - converts annotations from source format to destination format

#### to see command syntax and arguments use:
python data_forge.py <command> -h

## How to use:
clone git repository:

Expand All @@ -36,11 +39,11 @@ read the --help command for learn more about available commands and arguments:

for check available commands

python fileManager.py --help
python data_forge.py --help

for check the command usage and available arguments

python fileManager.py {command} --help
python data_forge.py {command} --help


## What else?
Expand All @@ -51,5 +54,5 @@ For more comfortable using FileManager with multiple tasks you can create an .sh

for stop executing of all commands use:

pkill -f fileManager.py
pkill -f data_forge.py

51 changes: 51 additions & 0 deletions const_utils/annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Tuple, Dict, Optional

from logger.log_level_mapping import LevelMapping
from logger.logger import LoggerConfigurator


class ObjectAnnotation:
def __init__(self, log_level: str = LevelMapping.debug, log_path: Optional[Path] = None, **kwargs):
self.imsize: Tuple[int, int] = kwargs.get("imsize")
self.name: str = kwargs.get("name")
self.pose: str = kwargs.get("pose", 'Unspecified')
self.truncated: int = kwargs.get("truncated", 0)
self.difficult: int = kwargs.get("difficult", 0)
self.bndbox: Dict[str, int] = kwargs.get("bndbox", {})
self.width: int = None
self.height: int = None
self.x_center: int = None
self.y_center: int = None
self.area: int = None
self.aspect_ratio: int = None
self.relative_area: float = None

self.logger = LoggerConfigurator.setup(
name=self.__class__.__name__,
log_level=log_level,
log_path=Path(log_path) / f"{self.__class__.__name__}.log" if log_path else None
)

@property
def area(self) -> int:
return self._area

@area.setter
def area(self, value: int) -> None:
if isinstance(value, int):
self._area = value
else:
try:
self._area = int(float(value))
except TypeError as e:
error_text = f"Area must be an integer, got {value}"
self.logger.warning(error_text)
raise TypeError(e)

@property
def width(self) -> int:
return self._width


4 changes: 3 additions & 1 deletion const_utils/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
class Arguments:
"""Command arguments"""
src: str = "src"
dst: str = "dst"
dst: str = "--dst"

pattern: str = "--pattern"
p: str = "-p"
repeat: str = "--repeat"
Expand All @@ -30,3 +31,4 @@ class Arguments:
cache_name: str = "--cache_name"
a_suffix: str = "--a_suffix"
a_source: str = "--a_source"
destination_type: str = "--destination-type"
3 changes: 2 additions & 1 deletion const_utils/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ class Commands:
slice: str = "slice"
delete: str = "delete"
dedup: str = "dedup"
clean_annotations: str = "clean-annotations"
clean_annotations: str = "clean-annotations"
convert_annotations: str = "convert-annotations"
13 changes: 12 additions & 1 deletion const_utils/default_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class AppSettings(BaseSettings):
cache_name: Optional[Path] = Field(default=None)
a_suffix: Tuple[str, ...] = Field(default_factory=tuple)
a_source: Optional[Path] = Field(default=None)
destination_type: Optional[str] = Field(default=None)

@field_validator('core_size')
@classmethod
Expand All @@ -55,7 +56,17 @@ def ensure_path(cls, value: Union[str, Path]) -> Path:
return Path(value)
return value


@field_validator("n_jobs")
@classmethod
def ensure_n_jobs(cls, value: Union[int, str]) -> int:
if not isinstance(value, int):
return int(float(value))
elif value >= multiprocessing.cpu_count():
return multiprocessing.cpu_count() - 1
elif value < 1:
return 1
else:
return value

@classmethod
def load_config(cls, config_path: Path = Constants.config_file) -> "AppSettings":
Expand Down
3 changes: 2 additions & 1 deletion const_utils/parser_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,5 @@ class HelpStrings:
"with next signature: <cache_{path_hash}_d{folder_name}{hash_type}s{core_size}.pkl>")
a_suffix: str = "A suffix pattern for annotations"
a_source: str = ("A source directory to annotations. If None - that means annotations are in the same folder with"
" images")
" images")
destination_type: str = "A type of destination annotation format"
9 changes: 5 additions & 4 deletions fileManager.py → data_forge.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
from const_utils.parser_help import HelpStrings as hs
from const_utils.commands import Commands
from const_utils.arguments import Arguments as arg
# from const_utils.default_values import DefaultValues as defaults
from file_operations.convert_annotations import ConvertAnnotationsOperation
from file_operations.deduplicate import DedupOperation
from file_operations.delete import DeleteOperation
from file_operations.move import MoveOperation
from file_operations.slice import SliceOperation
from file_operations.clean_annotations import CleanAnnotationsOperation


class FileManager:
class DataForge:
"""Class corresponding to CLI and launch command"""
def __init__(self):
self.parser = argparse.ArgumentParser(description="FileManager")
Expand All @@ -23,7 +23,8 @@ def __init__(self):
Commands.slice: SliceOperation,
Commands.delete: DeleteOperation,
Commands.dedup: DedupOperation,
Commands.clean_annotations: CleanAnnotationsOperation
Commands.clean_annotations: CleanAnnotationsOperation,
Commands.convert_annotations: ConvertAnnotationsOperation
}
self.settings = AppSettings.load_config(Constants.config_file)
self._setup_commands()
Expand Down Expand Up @@ -65,5 +66,5 @@ def execute(self):

if __name__ == "__main__":

app = FileManager()
app = DataForge()
app.execute()
1 change: 1 addition & 0 deletions docs/api/base_hasher.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: tools.comparer.img_comparer.hasher.base_hasher.BaseHasher
1 change: 1 addition & 0 deletions docs/api/converter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: tools.annotation_converter.converter.base.BaseConverter
1 change: 1 addition & 0 deletions docs/api/dhash.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: tools.comparer.img_comparer.hasher.dhash.DHash
1 change: 1 addition & 0 deletions docs/api/img_comparer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: tools.comparer.img_comparer.img_comparer.ImageComparer
1 change: 1 addition & 0 deletions docs/api/voc_yolo_converter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: tools.annotation_converter.converter.voc_yolo_converter.VocYOLOConverter
1 change: 1 addition & 0 deletions docs/cli/data_forge.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: data_forge.DataForge
1 change: 1 addition & 0 deletions docs/cli/default_values.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: const_utils.default_values.AppSettings
58 changes: 58 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# DataForge

A simple way to automate working with datasets. You can set a time delay for automatic execution of your command.

if you don’t want the command works in a cycle, just don't use "-r" argument. And it will be executed for one time.


## Available commands
- **move** - move files from source directory to target directory
- **slice** - slice video files to images from the source directory to the target directory. Also, you can set flag "--remove" or "-rm" for deleting a source video file after slicing

- **delete** - delete files that match patterns from source directory
- **dedup** - find duplicates in source directory that matches a pattern. An image means a duplicate if it's hash has lower
Hamming distance with comparing image hash than threshold value. The threshold value setups in percentage and must be in range [0, 100]. Pay attention to core_size parameter: the lower value makes details at photo less important, and the higher value makes details mach important while comparing information at images. It’s implemented only dHash comparing method for now.
- **clean-annotations** - find annotation files in directory that doesn't have corresponding files
- **convert-annotations** - converts annotations from source format to destination format

#### to see command syntax and arguments use:
python data_forge.py <command> -h

## How to use:
clone git repository:

git clone https://github.com/SeregaCodit/AutoFileManager.git

go to project directory:

cd path_to_project

create virtual environment and activate it:

python -m venv .venv

install requirements :

pip install -r requirements.txt

read the --help command for learn more about available commands and arguments:

for check available commands

python data_forge.py --help

for check the command usage and available arguments

python data_forge.py {command} --help


## What else?

For more comfortable using FileManager with multiple tasks you can create an .sh file or modify [strat_all_tasks.sh](https://github.com/SeregaCodit/AutoFileManager/blob/main/strat_all_tasks.sh) with list of your commands. And run all of them just by one simple command:

bash path_to_file/start_all_tasks.sh

for stop executing of all commands use:

pkill -f data_forge.py

1 change: 1 addition & 0 deletions docs/operations/clean_annotations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: tools.mixins.file_remover.FileRemoverMixin
1 change: 1 addition & 0 deletions docs/operations/convert_annotations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: file_operations.convert_annotations.ConvertAnnotationsOperation
1 change: 1 addition & 0 deletions docs/operations/deduplicate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: file_operations.deduplicate.DedupOperation
1 change: 1 addition & 0 deletions docs/operations/delete.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: file_operations.delete.DeleteOperation
1 change: 1 addition & 0 deletions docs/operations/file_operation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: file_operations.file_operation.FileOperation
1 change: 1 addition & 0 deletions docs/operations/move.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: file_operations.move.MoveOperation
1 change: 1 addition & 0 deletions docs/operations/slice.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: file_operations.slice.SliceOperation
2 changes: 1 addition & 1 deletion file_operations/clean_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from const_utils.default_values import AppSettings
from const_utils.parser_help import HelpStrings
from file_operations.file_operation import FileOperation
from file_operations.file_remover import FileRemoverMixin
from tools.mixins.file_remover import FileRemoverMixin



Expand Down
46 changes: 46 additions & 0 deletions file_operations/convert_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import argparse
from abc import ABC
from pathlib import Path

from const_utils.arguments import Arguments
from const_utils.default_values import AppSettings
from const_utils.parser_help import HelpStrings
from file_operations.file_operation import FileOperation
from tools.annotation_converter.converter.voc_yolo_converter import VocYOLOConverter


class ConvertAnnotationsOperation(FileOperation):
def __init__(self, settings: AppSettings, **kwargs):
"""converts annotation formats from pattern to destination. You Can use only one value of pattern at the time"""
super().__init__(settings, **kwargs)
self.destination_type = kwargs.get('destination_type')
self.converter_mapping = {
(".xml", "yolo") : VocYOLOConverter
}
self.converter = self.converter_mapping[(self.pattern[0], self.destination_type)]()
self.n_jobs = kwargs.get('n_jobs', 1)


@staticmethod
def add_arguments(settings: AppSettings, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
Arguments.dst,
default=None,
help=HelpStrings.dst
)
parser.add_argument(
Arguments.destination_type,
help=HelpStrings.destination_type
)
parser.add_argument(
Arguments.n_jobs,
default=settings.n_jobs,
help=HelpStrings.n_jobs
)


def do_task(self):
self.converter.convert(self.files_for_task, self.target_directory, self.n_jobs)



2 changes: 1 addition & 1 deletion file_operations/deduplicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from const_utils.default_values import AppSettings
from const_utils.parser_help import HelpStrings
from file_operations.file_operation import FileOperation
from file_operations.file_remover import FileRemoverMixin
from tools.mixins.file_remover import FileRemoverMixin
from tools.comparer.img_comparer.img_comparer import ImageComparer


Expand Down
2 changes: 1 addition & 1 deletion file_operations/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from const_utils.default_values import AppSettings
from file_operations.file_operation import FileOperation
from file_operations.file_remover import FileRemoverMixin
from tools.mixins.file_remover import FileRemoverMixin


class DeleteOperation(FileOperation, FileRemoverMixin):
Expand Down
Loading