diff --git a/namer/command.py b/namer/command.py index 31cce926..8b979e0c 100644 --- a/namer/command.py +++ b/namer/command.py @@ -17,7 +17,7 @@ from namer.configuration import NamerConfig from namer.configuration_utils import default_config -from namer.ffmpeg import ffprobe, FFProbeResults +from namer.ffmpeg import FFMpeg, FFProbeResults from namer.fileinfo import parse_file_name, FileInfo from namer.comparison_results import ComparisonResults, LookedUpFileInfo @@ -66,6 +66,9 @@ class Command: config: NamerConfig + def get_command_target(self): + return str(self.target_movie_file.absolute()) + def move_command_files(target: Optional[Command], new_target: Path) -> Optional[Command]: if not target: @@ -176,10 +179,10 @@ def selected_best_movie(movies: List[str], config: NamerConfig) -> Optional[Path # This could use a lot of work. if movies: selected = Path(movies[0]) - selected_values = extract_relevant_attributes(ffprobe(selected), config) + selected_values = extract_relevant_attributes(FFMpeg().ffprobe(selected), config) for current_movie_str in movies: current_movie = Path(current_movie_str) - current_values = extract_relevant_attributes(ffprobe(current_movie), config) + current_values = extract_relevant_attributes(FFMpeg().ffprobe(current_movie), config) if current_values[1] <= config.max_desired_resolutions or config.max_desired_resolutions == -1: if greater_than(current_values, selected_values): selected_values = current_values diff --git a/namer/configuration_utils.py b/namer/configuration_utils.py index 47dcf62c..7c67c121 100644 --- a/namer/configuration_utils.py +++ b/namer/configuration_utils.py @@ -21,7 +21,7 @@ from namer.configuration import NamerConfig from namer.database import abbreviations from namer.name_formatter import PartialFormatter -from namer.ffmpeg import ffmpeg_version +from namer.ffmpeg import FFMpeg def __verify_naming_config(config: NamerConfig, formatter: PartialFormatter) -> bool: @@ -79,7 +79,7 @@ def __verify_name_string(formatter: PartialFormatter, name: str, name_string: st def __verify_ffmpeg() -> bool: - versions = ffmpeg_version() + versions = FFMpeg().ffmpeg_version() for tool, version in versions.items(): if not version: logger.error(f'No {tool} found, please install {tool}') diff --git a/namer/ffmpeg.py b/namer/ffmpeg.py index ccdab063..914e189b 100644 --- a/namer/ffmpeg.py +++ b/namer/ffmpeg.py @@ -20,6 +20,9 @@ import ffmpeg from loguru import logger from PIL import Image +from pathvalidate import ValidationError + +from namer.videophashstash import StashVideoPerceptualHash @dataclass(init=False, repr=False, eq=True, order=False, unsafe_hash=True, frozen=False) @@ -107,119 +110,170 @@ def get_resolution(self) -> Optional[int]: return stream.height if stream.height else 0 -@logger.catch -def ffprobe(file: Path) -> Optional[FFProbeResults]: - """ - Get the typed results of probing a video stream with ffprobe. - """ - stat = file.stat() - return _ffprobe(file, stat.st_size, stat.st_mtime) - - -@lru_cache(maxsize=1024) -def _ffprobe(file: Path, file_size: int, file_update: float) -> Optional[FFProbeResults]: - """ - Get the typed results of probing a video stream with ffprobe. - """ - - logger.info(f'ffprobe file "{file}"') - ffprobe_out: Optional[Any] = None - try: - ffprobe_out = ffmpeg.probe(file) - except: - pass - - if not ffprobe_out: - return - - streams = [stream for stream in ffprobe_out['streams'] if stream['codec_type'] in ('video', 'audio')] - if not streams: - return - - output: List[FFProbeStream] = [] - for stream in streams: - ff_stream = FFProbeStream() - ff_stream.bit_rate = int(stream['bit_rate']) if 'bit_rate' in stream else -1 - ff_stream.codec_name = stream['codec_name'] - ff_stream.codec_type = stream['codec_type'] - ff_stream.index = int(stream['index']) - ff_stream.duration = float(stream['duration']) if 'duration' in stream else -1 - - ff_stream.height = int(stream['height']) if 'height' in stream else -1 - ff_stream.width = int(stream['width']) if 'width' in stream else -1 - ff_stream.tags_language = stream['tags']['language'] if 'tags' in stream and 'language' in stream['tags'] else None - - if 'disposition' in stream: - ff_stream.disposition_attached_pic = stream['disposition']['attached_pic'] == 1 - ff_stream.disposition_default = stream['disposition']['default'] == 1 - - if 'avg_frame_rate' in stream: - numer, denom = stream['avg_frame_rate'].split('/', 2) - numer, denom = int(numer), int(denom) - if numer != 0 and denom != 0: - ff_stream.avg_frame_rate = numer / denom - - output.append(ff_stream) - - probe_format = FFProbeFormat() - if 'format' in ffprobe_out: - probe_format.bit_rate = int(ffprobe_out['format']['bit_rate']) - probe_format.duration = float(ffprobe_out['format']['duration']) - probe_format.size = int(ffprobe_out['format']['size']) - probe_format.tags = ffprobe_out['format']['tags'] if 'tags' in ffprobe_out['format'] else {} - - return FFProbeResults(output, probe_format) - - -def get_audio_stream_for_lang(file: Path, language: str) -> int: - """ - given a mp4 input file and a desired language will return the stream position of that language in the mp4. - if the language is None, or the stream is not found, or the desired stream is the only default stream, None is returned. - See: https://iso639-3.sil.org/code_tables/639/data/ - - Returns -1 if stream can not be determined - """ - - stream_index = -1 - probe = ffprobe(file) - if probe: - stream = probe.get_audio_stream(language) - if stream: - stream_index = stream.index - 1 if not stream.disposition_default else -1 - - return stream_index - - -def update_audio_stream_if_needed(mp4_file: Path, language: Optional[str]) -> bool: - """ - Returns true if the file had to be edited to have a default audio stream equal to the desired language, - mostly a concern for apple players (Quicktime/Apple TV/etc.) - Copies, and potentially updates the default audio stream of a video file. - """ - - random = "".join(choices(population=string.ascii_uppercase + string.digits, k=10)) - temp_filename = f'{mp4_file.stem}_{random}' + mp4_file.suffix - work_file = mp4_file.parent / temp_filename +class FFMpeg: + __local_dir: Optional[Path] = None + __ffmpeg_cmd: str = 'ffmpeg' + __ffprobe_cmd: str = 'ffprobe' + + def __init__(self): + versions = self.__ffmpeg_version(None) + if not versions['ffmpeg'] or not versions['ffprobe']: + home_path: Path = Path(__file__).parent + phash_path: Path = home_path / 'tools' + if not phash_path.is_dir(): + phash_path.mkdir(exist_ok=True, parents=True) + self.__local_dir = phash_path + versions = self.__ffmpeg_version(phash_path) + if not versions['ffmpeg'] and not versions['ffprobe']: + StashVideoPerceptualHash().install_ffmpeg() + versions = self.__ffmpeg_version(phash_path) + if not versions['ffmpeg'] and not versions['ffprobe']: + raise ValidationError(f"could not find ffmpeg/ffprobe on path, or in tools dir: {self.__local_dir}") + self.__ffmpeg_cmd = str(phash_path / 'ffmpeg') + self.__ffprobe_cmd = str(phash_path / 'ffprobe') + + @logger.catch + def ffprobe(self, file: Path) -> Optional[FFProbeResults]: + """ + Get the typed results of probing a video stream with ffprobe. + """ + stat = file.stat() + return self._ffprobe(file, stat.st_size, stat.st_mtime) + + @lru_cache(maxsize=1024) + def _ffprobe(self, file: Path, file_size: int, file_update: float) -> Optional[FFProbeResults]: + """ + Get the typed results of probing a video stream with ffprobe. + """ + + logger.info(f'ffprobe file "{file}"') + ffprobe_out: Optional[Any] = None + try: + ffprobe_out = ffmpeg.probe(file, self.__ffprobe_cmd) + except: + pass - stream = get_audio_stream_for_lang(mp4_file, language) if language else None - if stream and stream >= 0: + if not ffprobe_out: + return + + streams = [stream for stream in ffprobe_out['streams'] if stream['codec_type'] in ('video', 'audio')] + if not streams: + return + + output: List[FFProbeStream] = [] + for stream in streams: + ff_stream = FFProbeStream() + ff_stream.bit_rate = int(stream['bit_rate']) if 'bit_rate' in stream else -1 + ff_stream.codec_name = stream['codec_name'] + ff_stream.codec_type = stream['codec_type'] + ff_stream.index = int(stream['index']) + ff_stream.duration = float(stream['duration']) if 'duration' in stream else -1 + + ff_stream.height = int(stream['height']) if 'height' in stream else -1 + ff_stream.width = int(stream['width']) if 'width' in stream else -1 + ff_stream.tags_language = stream['tags']['language'] if 'tags' in stream and 'language' in stream['tags'] else None + + if 'disposition' in stream: + ff_stream.disposition_attached_pic = stream['disposition']['attached_pic'] == 1 + ff_stream.disposition_default = stream['disposition']['default'] == 1 + + if 'avg_frame_rate' in stream: + numer, denom = stream['avg_frame_rate'].split('/', 2) + numer, denom = int(numer), int(denom) + if numer != 0 and denom != 0: + ff_stream.avg_frame_rate = numer / denom + + output.append(ff_stream) + + probe_format = FFProbeFormat() + if 'format' in ffprobe_out: + probe_format.bit_rate = int(ffprobe_out['format']['bit_rate']) + probe_format.duration = float(ffprobe_out['format']['duration']) + probe_format.size = int(ffprobe_out['format']['size']) + probe_format.tags = ffprobe_out['format']['tags'] if 'tags' in ffprobe_out['format'] else {} + + return FFProbeResults(output, probe_format) + + def get_audio_stream_for_lang(self, file: Path, language: str) -> int: + """ + given a mp4 input file and a desired language will return the stream position of that language in the mp4. + if the language is None, or the stream is not found, or the desired stream is the only default stream, None is returned. + See: https://iso639-3.sil.org/code_tables/639/data/ + + Returns -1 if stream can not be determined + """ + + stream_index = -1 + probe = self.ffprobe(file) + if probe: + stream = probe.get_audio_stream(language) + if stream: + stream_index = stream.index - 1 if not stream.disposition_default else -1 + + return stream_index + + def update_audio_stream_if_needed(self, mp4_file: Path, language: Optional[str]) -> bool: + """ + Returns true if the file had to be edited to have a default audio stream equal to the desired language, + mostly a concern for apple players (Quicktime/Apple TV/etc.) + Copies, and potentially updates the default audio stream of a video file. + """ + + random = "".join(choices(population=string.ascii_uppercase + string.digits, k=10)) + temp_filename = f'{mp4_file.stem}_{random}' + mp4_file.suffix + work_file = mp4_file.parent / temp_filename + + stream = self.get_audio_stream_for_lang(mp4_file, language) if language else None + if stream and stream >= 0: + process = ( + ffmpeg + .input(mp4_file) + .output(str(work_file), **{ + 'map': 0, # copy all stream + 'disposition:a': 'none', # mark all audio streams as not default + f'disposition:a:{stream}': 'default', # mark this audio stream as default + 'c': 'copy' # don't re-encode anything. + }) + .run_async(quiet=True, cmd=self.__ffmpeg_cmd) + ) + + stdout, stderr = process.communicate() + stdout, stderr = (stdout.decode('UTF-8') if isinstance(stdout, bytes) else stdout), (stderr.decode('UTF-8') if isinstance(stderr, bytes) else stderr) + success = process.returncode == 0 + if not success: + logger.warning("Could not update audio stream for {}", mp4_file) + if stderr: + logger.error(stderr) + else: + logger.warning("Return code: {}", process.returncode) + mp4_file.unlink() + shutil.move(work_file, mp4_file) + + return success + + return True + + def attempt_fix_corrupt(self, mp4_file: Path) -> bool: + """ + Attempt to fix corrupt mp4 files. + """ + random = "".join(choices(population=string.ascii_uppercase + string.digits, k=10)) + temp_filename = f'{mp4_file.stem}_{random}' + mp4_file.suffix + work_file = mp4_file.parent / temp_filename + + logger.info("Attempt to fix damaged mp4 file: {}", mp4_file) process = ( ffmpeg .input(mp4_file) - .output(str(work_file), **{ - 'map': 0, # copy all stream - 'disposition:a': 'none', # mark all audio streams as not default - f'disposition:a:{stream}': 'default', # mark this audio stream as default - 'c': 'copy' # don't re-encode anything. - }) - .run_async(quiet=True) + .output(str(work_file), c='copy') + .run_async(quiet=True, cmd=self.__ffmpeg_cmd) ) stdout, stderr = process.communicate() stdout, stderr = (stdout.decode('UTF-8') if isinstance(stdout, bytes) else stdout), (stderr.decode('UTF-8') if isinstance(stderr, bytes) else stderr) success = process.returncode == 0 if not success: - logger.warning("Could not update audio stream for {}", mp4_file) + logger.warning("Could not fix mp4 files {}", mp4_file) if stderr: logger.error(stderr) else: @@ -229,80 +283,50 @@ def update_audio_stream_if_needed(mp4_file: Path, language: Optional[str]) -> bo return success - return True - - -def attempt_fix_corrupt(mp4_file: Path) -> bool: - """ - Attempt to fix corrupt mp4 files. - """ - random = "".join(choices(population=string.ascii_uppercase + string.digits, k=10)) - temp_filename = f'{mp4_file.stem}_{random}' + mp4_file.suffix - work_file = mp4_file.parent / temp_filename - - logger.info("Attempt to fix damaged mp4 file: {}", mp4_file) - process = ( - ffmpeg - .input(mp4_file) - .output(str(work_file), c='copy') - .run_async(quiet=True) - ) - - stdout, stderr = process.communicate() - stdout, stderr = (stdout.decode('UTF-8') if isinstance(stdout, bytes) else stdout), (stderr.decode('UTF-8') if isinstance(stderr, bytes) else stderr) - success = process.returncode == 0 - if not success: - logger.warning("Could not fix mp4 files {}", mp4_file) - if stderr: - logger.error(stderr) - else: - logger.warning("Return code: {}", process.returncode) - mp4_file.unlink() - shutil.move(work_file, mp4_file) - - return success - - -def extract_screenshot(file: Path, time: float, screenshot_width: int = -1) -> Image.Image: - out, _ = ( - ffmpeg - .input(file, ss=time) - .filter('scale', screenshot_width, -2) - .output('pipe:', vframes=1, format='apng') - .run(quiet=True, capture_stdout=True) - ) - out = BytesIO(out) - image = Image.open(out) - - return image - - -def ffmpeg_version() -> Dict: - tools = ['ffmpeg', 'ffprobe'] - re_tools = '|'.join(tools) - reg = re.compile(fr'({re_tools}) version (?P[\d|.]*)') - - versions = {} - for tool in tools: - args = [ - tool, - '-version' - ] - - process = None - try: - process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, universal_newlines=True) - except: - pass + def extract_screenshot(self, file: Path, time: float, screenshot_width: int = -1) -> Image.Image: + out, _ = ( + ffmpeg + .input(file, ss=time) + .filter('scale', screenshot_width, -2) + .output('pipe:', vframes=1, format='apng') + .run(quiet=True, capture_stdout=True, cmd=self.__ffmpeg_cmd) + ) + out = BytesIO(out) + image = Image.open(out) + + return image + + def ffmpeg_version(self) -> Dict: + return self.__ffmpeg_version(self.__local_dir) + + def __ffmpeg_version(self, local_dir: Optional[Path]) -> Dict: + tools = ['ffmpeg', 'ffprobe'] + re_tools = '|'.join(tools) + reg = re.compile(fr'({re_tools}) version (?P[\d|.]*)') + + versions = {} + + for tool in tools: + executable = str(local_dir / tool) if local_dir else tool + args = [ + executable, + '-version' + ] + + process = None + try: + process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, universal_newlines=True) + except: + pass - matches = None - if process: - stdout, _ = process.communicate() + matches = None + if process: + stdout, _ = process.communicate() - if stdout: - line: str = stdout.split('\n', 1)[0] - matches = reg.search(line) + if stdout: + line: str = stdout.split('\n', 1)[0] + matches = reg.search(line) - versions[tool] = matches.groupdict().get('version') if matches else None + versions[tool] = matches.groupdict().get('version') if matches else None - return versions + return versions diff --git a/namer/mutagen.py b/namer/mutagen.py index 03ccddd9..471e9b18 100644 --- a/namer/mutagen.py +++ b/namer/mutagen.py @@ -9,7 +9,7 @@ from mutagen.mp4 import MP4, MP4Cover, MP4StreamInfoError from namer.configuration import NamerConfig -from namer.ffmpeg import FFProbeResults, attempt_fix_corrupt, update_audio_stream_if_needed +from namer.ffmpeg import FFProbeResults, FFMpeg from namer.comparison_results import LookedUpFileInfo @@ -49,7 +49,7 @@ def get_mp4_if_possible(mp4: Path) -> MP4: try: video = MP4(mp4) except MP4StreamInfoError: - attempt_fix_corrupt(mp4) + FFMpeg().attempt_fix_corrupt(mp4) video = MP4(mp4) return video @@ -76,7 +76,7 @@ def update_mp4_file(mp4: Path, looked_up: LookedUpFileInfo, poster: Optional[Pat """ logger.info("Updating audio and tags for: {}", mp4) - success = update_audio_stream_if_needed(mp4, config.language) + success = FFMpeg().update_audio_stream_if_needed(mp4, config.language) if not success: logger.info("Could not process audio or copy {}", mp4) logger.info("Updating atom tags on: {}", mp4) diff --git a/namer/namer.py b/namer/namer.py index 5203231f..95a4d612 100755 --- a/namer/namer.py +++ b/namer/namer.py @@ -19,13 +19,13 @@ from namer.configuration import NamerConfig from namer.configuration_utils import default_config, verify_configuration from namer.command import make_command, move_command_files, move_to_final_location, set_permissions, write_log_file -from namer.ffmpeg import FFProbeResults, ffprobe +from namer.ffmpeg import FFProbeResults, FFMpeg from namer.fileinfo import FileInfo from namer.metadataapi import get_complete_metadataapi_net_fileinfo, get_image, get_trailer, match from namer.moviexml import parse_movie_xml_file, write_nfo from namer.name_formatter import PartialFormatter from namer.mutagen import update_mp4_file -from namer.videophash import StashVideoPerceptualHash as VideoPerceptualHash +from namer.videophashstash import StashVideoPerceptualHash as VideoPerceptualHash DESCRIPTION = """ Namer, the porndb local file renamer. It can be a command line tool to rename mp4/mkv/avi/mov/flv files and to embed tags in mp4s, @@ -187,7 +187,7 @@ def process_file(command: Command) -> Optional[Command]: target_dir = command.target_directory if command.target_directory is not None else command.target_movie_file.parent set_permissions(target_dir, command.config) if new_metadata is not None: - ffprobe_results = ffprobe(command.target_movie_file) + ffprobe_results = FFMpeg().ffprobe(command.target_movie_file) if ffprobe_results: new_metadata.resolution = ffprobe_results.get_resolution() target = move_to_final_location(command, new_metadata) diff --git a/namer/videophash.py b/namer/videophash.py index 4d630731..465b11a7 100644 --- a/namer/videophash.py +++ b/namer/videophash.py @@ -1,15 +1,9 @@ import concurrent.futures -import subprocess -import platform -from dataclasses import dataclass from decimal import Decimal, ROUND_HALF_UP from functools import lru_cache -from json import JSONDecodeError from pathlib import Path -from types import SimpleNamespace -from typing import List, Literal, Optional, Union +from typing import List, Literal, Optional -import json import imagehash import numpy import oshash @@ -18,91 +12,8 @@ from loguru import logger from PIL import Image -from namer.ffmpeg import extract_screenshot, ffprobe - - -@dataclass(init=False, repr=False, eq=True, order=False, unsafe_hash=True, frozen=False) -class PerceptualHash: - duration: int - phash: imagehash.ImageHash - oshash: str - - -def return_perceptual_hash(duration: Union[float, int], phash: Union[str, imagehash.ImageHash], file_oshash: str) -> PerceptualHash: - output = PerceptualHash() - output.duration = int(duration) if isinstance(duration, float) else duration - output.phash = imagehash.hex_to_hash(phash) if isinstance(phash, str) else phash - output.oshash = file_oshash - - return output - - -class StashVideoPerceptualHash: - __home_path: Path = Path(__file__).parent - __phash_path: Path = __home_path / 'tools' - __phash_name: str = 'videohashes' - __supported_arch: dict = { - 'amd64': 'amd64', - 'x86_64': 'amd64', - 'arm64': 'arm64', - 'aarch64': 'arm64', - 'arm': 'arm', - } - __phash_suffixes: dict = { - 'windows': '.exe', - 'linux': '-linux', - 'darwin': '-macos', - } - - def __init__(self): - if not self.__phash_path.is_dir(): - self.__phash_path.mkdir(exist_ok=True, parents=True) - - system = platform.system().lower() - arch = platform.machine().lower() - if arch not in self.__supported_arch.keys(): - raise SystemError(f"Unsupported architecture error {arch}") - - self.__phash_name += '-' + self.__supported_arch[arch] + self.__phash_suffixes[system] - - def get_hashes(self, file: Path) -> Optional[PerceptualHash]: - stat = file.stat() - return self._get_stash_phash(file, stat.st_size, stat.st_mtime) - - @lru_cache(maxsize=1024) - def _get_stash_phash(self, file: Path, file_size: int, file_update: float) -> Optional[PerceptualHash]: - logger.info(f'Calculating phash for file "{file}"') - return self.__execute_stash_phash(file) - - def __execute_stash_phash(self, file: Path) -> Optional[PerceptualHash]: - output = None - if not self.__phash_path: - return output - - args = [ - str(self.__phash_path / self.__phash_name), - '-json', - '--video', str(file) - ] - with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) as process: - stdout, stderr = process.communicate() - stdout, stderr = stdout.strip(), stderr.strip() - - success = process.returncode == 0 - if success: - data = None - try: - data = json.loads(stdout, object_hook=lambda d: SimpleNamespace(**d)) - except JSONDecodeError: - logger.error(stdout) - pass - - if data: - output = return_perceptual_hash(data.duration, data.phash, data.oshash) - else: - logger.error(stderr) - - return output +from namer.ffmpeg import FFMpeg +from namer.videophashstash import PerceptualHash, return_perceptual_hash class VideoPerceptualHash: @@ -113,7 +24,7 @@ class VideoPerceptualHash: def get_hashes(self, file: Path) -> Optional[PerceptualHash]: data = None - probe = ffprobe(file) + probe = FFMpeg().ffprobe(file) if not probe: return data @@ -175,10 +86,11 @@ def __generate_thumbnails(self, file: Path, duration: float) -> List[Image.Image return [] queue = [] + ffmpeg = FFMpeg() with concurrent.futures.ThreadPoolExecutor() as executor: for idx in range(chunk_count): time = offset + (idx * step_size) - future = executor.submit(extract_screenshot, file, time, self.__screenshot_width) + future = executor.submit(ffmpeg.extract_screenshot, file, time, self.__screenshot_width) queue.append(future) concurrent.futures.wait(queue) diff --git a/namer/videophashstash.py b/namer/videophashstash.py new file mode 100644 index 00000000..084eb06e --- /dev/null +++ b/namer/videophashstash.py @@ -0,0 +1,104 @@ +import subprocess +import platform +from dataclasses import dataclass +from functools import lru_cache +from json import JSONDecodeError +from pathlib import Path +from types import SimpleNamespace +from typing import Optional, Union + +import json +import imagehash +from loguru import logger + + +@dataclass(init=False, repr=False, eq=True, order=False, unsafe_hash=True, frozen=False) +class PerceptualHash: + duration: int + phash: imagehash.ImageHash + oshash: str + + +def return_perceptual_hash(duration: Union[float, int], phash: Optional[Union[str, imagehash.ImageHash]], file_oshash: str) -> PerceptualHash: + output = PerceptualHash() + output.duration = int(duration) if isinstance(duration, float) else duration + if phash: + output.phash = imagehash.hex_to_hash(phash) if isinstance(phash, str) else phash + output.oshash = file_oshash + + return output + + +class StashVideoPerceptualHash: + __home_path: Path = Path(__file__).parent + __phash_path: Path = __home_path / 'tools' + __phash_name: str = 'videohashes' + __supported_arch: dict = { + 'amd64': 'amd64', + 'x86_64': 'amd64', + 'arm64': 'arm64', + 'aarch64': 'arm64', + 'arm': 'arm', + } + __phash_suffixes: dict = { + 'windows': '.exe', + 'linux': '-linux', + 'darwin': '-macos', + } + + def __init__(self): + if not self.__phash_path.is_dir(): + self.__phash_path.mkdir(exist_ok=True, parents=True) + + system = platform.system().lower() + arch = platform.machine().lower() + if arch not in self.__supported_arch.keys(): + raise SystemError(f"Unsupported architecture error {arch}") + + self.__phash_name += '-' + self.__supported_arch[arch] + self.__phash_suffixes[system] + + def install_ffmpeg(self) -> None: + # videohasher installs ffmpeg next to itself by default, even if + # there's nothing to process. + self.__execute_stash_phash(None) + + def get_hashes(self, file: Path) -> Optional[PerceptualHash]: + stat = file.stat() + return self._get_stash_phash(file, stat.st_size, stat.st_mtime) + + @lru_cache(maxsize=1024) + def _get_stash_phash(self, file: Path, file_size: int, file_update: float) -> Optional[PerceptualHash]: + logger.info(f'Calculating phash for file "{file}"') + return self.__execute_stash_phash(file) + + def __execute_stash_phash(self, file: Optional[Path]) -> Optional[PerceptualHash]: + output = None + if not self.__phash_path: + return output + + args = [ + str(self.__phash_path / self.__phash_name), + '-json', + ] + if file: + args.append('--video') + args.append(str(file)) + with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) as process: + stdout, stderr = process.communicate() + stdout, stderr = stdout.strip(), stderr.strip() + + success = process.returncode == 0 + if success: + data = None + try: + data = json.loads(stdout, object_hook=lambda d: SimpleNamespace(**d)) + except JSONDecodeError: + logger.error(stdout) + pass + + if data: + output = return_perceptual_hash(data.duration, data.phash, data.oshash) + elif file: + logger.error(stderr) + + return output diff --git a/namer/watchdog.py b/namer/watchdog.py index 7c2cb463..c4bc3d96 100644 --- a/namer/watchdog.py +++ b/namer/watchdog.py @@ -129,7 +129,9 @@ class MovieWatcher: """ def enque_work(self, command: Command): - if not self.__stopped: + queue_items = list(self.__command_queue.queue) + items = list(map(lambda x: x.get_command_target(), filter(lambda i: i is not None, queue_items))) + if not self.__stopped and command.get_command_target() not in items or command is None: self.__command_queue.put(command) else: raise RuntimeError("Command not added to work queue, server is stopping") @@ -138,10 +140,15 @@ def __processing_thread(self): while True: command = self.__command_queue.get() if command is None: + logger.info("Marking None task as done") + self.__command_queue.task_done() break handle(command) self.__command_queue.task_done() - self.__command_queue.task_done() + # Throw away any items after the None item is processed. + with self.__command_queue.mutex: + self.__command_queue.queue.clear() + logger.info("exit processing_thread") def __init__(self, namer_config: NamerConfig): self.__started = False @@ -254,10 +261,18 @@ def stop(self): if self.__webserver: logger.info("webserver stop") self.__webserver.stop() - logger.debug("command queue None") self.__command_queue.put(None) - logger.debug("command join") - self.__command_queue.join() + + # let the thread processing work items complete. + self.__worker_thread.join() + logger.debug("command queue None") + test = os.environ.get('PYTEST_CURRENT_TEST', "") + logger.debug(f"{test}: command join") + + items = list(map(lambda x: x.get_command_target() if x else None, self.__command_queue.queue)) + logger.info(f"waiting for items to process {items}") + # we already wait for the worker thread. + # self.__command_queue.join() logger.debug("command joined") def get_web_port(self) -> Optional[int]: diff --git a/test/namer_ffmpeg_test.py b/test/namer_ffmpeg_test.py index aacf2225..8d2cf34d 100644 --- a/test/namer_ffmpeg_test.py +++ b/test/namer_ffmpeg_test.py @@ -6,7 +6,7 @@ import unittest from pathlib import Path -from namer.ffmpeg import get_audio_stream_for_lang, update_audio_stream_if_needed, ffprobe, ffmpeg_version +from namer.ffmpeg import FFMpeg class UnitTestAsTheDefaultExecution(unittest.TestCase): @@ -22,7 +22,7 @@ def test_get_resolution(self): tempdir = Path(tmpdir) shutil.copytree(Path(__file__).resolve().parent, tempdir / "test") file = tempdir / "test" / "Site.22.01.01.painful.pun.XXX.720p.xpost.mp4" - results = ffprobe(file) + results = FFMpeg().ffprobe(file) self.assertIsNotNone(results) if results: res = results.get_resolution() @@ -36,9 +36,9 @@ def test_get_audio_stream(self): tempdir = Path(tmpdir) shutil.copytree(Path(__file__).resolve().parent, tempdir / "test") file = tempdir / "test" / "Site.22.01.01.painful.pun.XXX.720p.xpost.mp4" - stream_number = get_audio_stream_for_lang(file, "und") + stream_number = FFMpeg().get_audio_stream_for_lang(file, "und") self.assertEqual(stream_number, -1) - stream_number = get_audio_stream_for_lang(file, "eng") + stream_number = FFMpeg().get_audio_stream_for_lang(file, "eng") self.assertEqual(stream_number, -1) def test_ffprobe(self) -> None: @@ -49,7 +49,7 @@ def test_ffprobe(self) -> None: tempdir = Path(tmpdir) shutil.copytree(Path(__file__).resolve().parent, tempdir / "test") file = tempdir / "test" / "Site.22.01.01.painful.pun.XXX.720p.xpost_wrong.mp4" - results = ffprobe(file) + results = FFMpeg().ffprobe(file) self.assertIsNotNone(results) if results: self.assertTrue(results.get_all_streams()[0].is_video()) @@ -78,16 +78,16 @@ def test_update_audio_stream(self): tempdir = Path(tmpdir) shutil.copytree(Path(__file__).resolve().parent, tempdir / "test") file = tempdir / "test" / "Site.22.01.01.painful.pun.XXX.720p.xpost_wrong.mp4" - stream_number = get_audio_stream_for_lang(file, "und") + stream_number = FFMpeg().get_audio_stream_for_lang(file, "und") self.assertEqual(stream_number, -1) - stream_number = get_audio_stream_for_lang(file, "eng") + stream_number = FFMpeg().get_audio_stream_for_lang(file, "eng") self.assertEqual(stream_number, 1) - update_audio_stream_if_needed(file, "eng") - stream_number = get_audio_stream_for_lang(file, "eng") + FFMpeg().update_audio_stream_if_needed(file, "eng") + stream_number = FFMpeg().get_audio_stream_for_lang(file, "eng") self.assertEqual(stream_number, -1) def test_file_ffmpeg(self): - versions = ffmpeg_version() + versions = FFMpeg().ffmpeg_version() for tool, version in versions.items(): self.assertIsNotNone(version) diff --git a/test/namer_mutagen_test.py b/test/namer_mutagen_test.py index 19a89bb3..181d35cd 100644 --- a/test/namer_mutagen_test.py +++ b/test/namer_mutagen_test.py @@ -11,7 +11,7 @@ from namer.configuration import NamerConfig from namer.fileinfo import parse_file_name -from namer.ffmpeg import ffprobe +from namer.ffmpeg import FFMpeg from namer.metadataapi import match from namer.mutagen import resolution_to_hdv_setting, update_mp4_file from namer.comparison_results import LookedUpFileInfo @@ -47,7 +47,7 @@ def test_writing_metadata(self): shutil.copy(test_dir / "Site.22.01.01.painful.pun.XXX.720p.xpost.mp4", target_file) name_parts = parse_file_name(target_file.name, config) info = match(name_parts, config) - ffprobe_results = ffprobe(target_file) + ffprobe_results = FFMpeg().ffprobe(target_file) update_mp4_file(target_file, info.results[0].looked_up, poster, ffprobe_results, NamerConfig()) output = MP4(target_file) self.assertEqual(output.get("\xa9nam"), ["Peeping Tom"]) @@ -65,7 +65,7 @@ def test_writing_full_metadata(self): shutil.copy(test_dir / "poster.png", poster) name_parts = parse_file_name(target_file.name, config) info = match(name_parts, config) - ffprobe_results = ffprobe(target_file) + ffprobe_results = FFMpeg().ffprobe(target_file) update_mp4_file(target_file, info.results[0].looked_up, poster, ffprobe_results, NamerConfig()) validate_mp4_tags(self, target_file) @@ -87,7 +87,7 @@ def test_sha_sum_two_identical_transformations(self): shutil.copy(test_dir / "poster.png", poster) name_parts = parse_file_name(target_file.name, config) info = match(name_parts, config) - ffprobe_results = ffprobe(target_file) + ffprobe_results = FFMpeg().ffprobe(target_file) update_mp4_file(target_file, info.results[0].looked_up, poster, ffprobe_results, NamerConfig()) validate_mp4_tags(self, target_file) sha_1 = hashlib.sha256(target_file.read_bytes()).digest().hex() @@ -99,7 +99,7 @@ def test_sha_sum_two_identical_transformations(self): shutil.copy(test_dir / "poster.png", poster) name_parts = parse_file_name(target_file.name, config) info = match(name_parts, config) - ffprobe_results = ffprobe(target_file) + ffprobe_results = FFMpeg().ffprobe(target_file) update_mp4_file(target_file, info.results[0].looked_up, poster, ffprobe_results, NamerConfig()) validate_mp4_tags(self, target_file) sha_2 = hashlib.sha256(target_file.read_bytes()).digest().hex() @@ -118,7 +118,7 @@ def test_non_existent_poster(self): poster = None name_parts = parse_file_name(target_file.name, config) info = match(name_parts, config) - ffprobe_results = ffprobe(target_file) + ffprobe_results = FFMpeg().ffprobe(target_file) update_mp4_file(target_file, info.results[0].looked_up, poster, ffprobe_results, NamerConfig()) validate_mp4_tags(self, target_file) @@ -132,7 +132,7 @@ def test_non_existent_file(self): poster = None name_parts = parse_file_name(targetfile.name, config) info = match(name_parts, config) - ffprobe_results = ffprobe(targetfile) + ffprobe_results = FFMpeg().ffprobe(targetfile) update_mp4_file(targetfile, info.results[0].looked_up, poster, ffprobe_results, config) self.assertFalse(targetfile.exists()) @@ -148,7 +148,7 @@ def test_empty_infos(self): test_dir = Path(__file__).resolve().parent shutil.copy(test_dir / "Site.22.01.01.painful.pun.XXX.720p.xpost.mp4", target_file) info = LookedUpFileInfo() - ffprobe_results = ffprobe(target_file) + ffprobe_results = FFMpeg().ffprobe(target_file) update_mp4_file(target_file, info, None, ffprobe_results, NamerConfig()) self.assertTrue(target_file.exists()) mp4 = MP4(target_file) diff --git a/test/namer_videophash_test.py b/test/namer_videophash_test.py index bd6c407a..df30f0f9 100644 --- a/test/namer_videophash_test.py +++ b/test/namer_videophash_test.py @@ -9,7 +9,8 @@ import imagehash -from namer.videophash import StashVideoPerceptualHash, VideoPerceptualHash +from namer.videophashstash import StashVideoPerceptualHash +from namer.videophash import VideoPerceptualHash class UnitTestAsTheDefaultExecution(unittest.TestCase): diff --git a/test/namer_watchdog_test.py b/test/namer_watchdog_test.py index 0510c5a7..fe57dd1e 100644 --- a/test/namer_watchdog_test.py +++ b/test/namer_watchdog_test.py @@ -11,19 +11,19 @@ from mutagen.mp4 import MP4 -from namer.ffmpeg import ffprobe +from namer.ffmpeg import FFMpeg from namer.configuration import NamerConfig from namer.watchdog import create_watcher, done_copying, retry_failed, MovieWatcher from test.utils import Wait, new_ea, validate_mp4_tags, validate_permissions, environment, sample_config, ProcessingTarget -def wait_until_processed(watcher: MovieWatcher): +def wait_until_processed(watcher: MovieWatcher, durration: int = 60): """ Waits until all files have been moved out of watch/working dirs. """ config = watcher.getConfig() logging.info("waiting for files to be processes") - Wait().seconds(60).checking(1).until(lambda: len(list(config.watch_dir.iterdir())) > 0 or len(list(config.work_dir.iterdir())) > 0).isFalse() + Wait().seconds(durration).checking(1).until(lambda: len(list(config.watch_dir.iterdir())) > 0 or len(list(config.work_dir.iterdir())) > 0).isFalse() logging.info("past waiting for files") watcher.stop() logging.info("past stopping") @@ -99,12 +99,12 @@ def test_handler_collisions_success_choose_best(self): new_ea(config.watch_dir, use_dir=False, post_stem="2", mp4_file_name=better), new_ea(config.watch_dir, use_dir=False, post_stem="1", mp4_file_name=best) ] - wait_until_processed(watcher) + wait_until_processed(watcher, 120) self.assertFalse(targets[0].get_file().exists()) self.assertEqual(len(list(config.work_dir.iterdir())), 0) output_file = config.dest_dir / "EvilAngel - 2022-01-03 - Carmela Clutch Fabulous Anal 3-Way!" / "EvilAngel - 2022-01-03 - Carmela Clutch Fabulous Anal 3-Way!.mp4" self.assertEqual(MP4(output_file).get("\xa9nam"), ["Carmela Clutch: Fabulous Anal 3-Way!"]) - results = ffprobe(output_file) + results = FFMpeg().ffprobe(output_file) self.assertIsNotNone(results) if results: stream = results.get_default_video_stream() diff --git a/test/utils.py b/test/utils.py index f9da5cf1..1af307b4 100644 --- a/test/utils.py +++ b/test/utils.py @@ -284,7 +284,11 @@ def setup(self, target_dir: Path): self.file = target_dir / self.relative os.makedirs(self.file.parent, exist_ok=True) self.file.parent.chmod(0o700) - shutil.copy(test_mp4, self.file) + try: + shutil.copy(test_mp4, self.file) + except: + # shutil isn't thread safe, it copies, then sets the mode if linux/mac + pass try: test_mp4.chmod(0o600) except: